From 2c2b0d880f1b4c01f30e14242977b82fa527342d Mon Sep 17 00:00:00 2001
From: Mukul Joshi <mukul.joshi@amd.com>
Date: Thu, 23 Jul 2020 23:09:57 -0400
Subject: drm/amdkfd: Add thermal throttling SMI event

Add support for reporting thermal throttling events through SMI.
Also, add a counter to count the number of throttling interrupts
observed and report the count in the SMI event message.

Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/linux/kfd_ioctl.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index f738c3b53f4e..df6c7a43aadc 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -450,7 +450,8 @@ struct kfd_ioctl_import_dmabuf_args {
  * KFD SMI(System Management Interface) events
  */
 /* Event type (defined by bitmask) */
-#define KFD_SMI_EVENT_VMFAULT     0x0000000000000001
+#define KFD_SMI_EVENT_VMFAULT			0x0000000000000001
+#define KFD_SMI_EVENT_THERMAL_THROTTLE		0x0000000000000002
 
 struct kfd_ioctl_smi_events_args {
 	__u32 gpuid;	/* to KFD */
-- 
cgit v1.2.3


From 522ec6e0eed0ab0678e7d5b5bf00487dfe83f7ce Mon Sep 17 00:00:00 2001
From: Mukul Joshi <mukul.joshi@amd.com>
Date: Thu, 30 Jul 2020 18:04:33 -0400
Subject: drm/amdkfd: Replace bitmask with event idx in SMI event msg

Event bitmask is a 64-bit mask with only 1 bit set. Sending this
event bitmask in KFD SMI event message is both wasteful of memory
and potentially limiting to only 64 events. Instead send event
index in SMI event message.
Please note this change does not break the ABI for the two event
types defined so far. The new index is identical to the mask used
before.

Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Suggested-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 24 +++++++++++++-----------
 include/uapi/linux/kfd_ioctl.h              | 10 +++++++---
 2 files changed, 20 insertions(+), 14 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index 86c2c3e97944..4d4b6e3ab697 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -149,7 +149,7 @@ static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
 	return 0;
 }
 
-static void add_event_to_kfifo(struct kfd_dev *dev, unsigned long long smi_event,
+static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event,
 			      char *event_msg, int len)
 {
 	struct kfd_smi_client *client;
@@ -157,14 +157,15 @@ static void add_event_to_kfifo(struct kfd_dev *dev, unsigned long long smi_event
 	rcu_read_lock();
 
 	list_for_each_entry_rcu(client, &dev->smi_clients, list) {
-		if (!(READ_ONCE(client->events) & smi_event))
+		if (!(READ_ONCE(client->events) &
+				KFD_SMI_EVENT_MASK_FROM_INDEX(smi_event)))
 			continue;
 		spin_lock(&client->lock);
 		if (kfifo_avail(&client->fifo) >= len) {
 			kfifo_in(&client->fifo, event_msg, len);
 			wake_up_all(&client->wait_queue);
 		} else {
-			pr_debug("smi_event(EventID: %llu): no space left\n",
+			pr_debug("smi_event(EventID: %u): no space left\n",
 					smi_event);
 		}
 		spin_unlock(&client->lock);
@@ -180,21 +181,21 @@ void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
 	/*
 	 * ThermalThrottle msg = throttle_bitmask(8):
 	 * 			 thermal_interrupt_count(16):
-	 * 16 bytes event + 1 byte space + 8 byte throttle_bitmask +
+	 * 1 byte event + 1 byte space + 8 byte throttle_bitmask +
 	 * 1 byte : + 16 byte thermal_interupt_counter + 1 byte \n +
-	 * 1 byte \0 = 44
+	 * 1 byte \0 = 29
 	 */
-	char fifo_in[44];
+	char fifo_in[29];
 	int len;
 
 	if (list_empty(&dev->smi_clients))
 		return;
 
-	len = snprintf(fifo_in, 44, "%x %x:%llx\n",
+	len = snprintf(fifo_in, 29, "%x %x:%llx\n",
 		       KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
 		       atomic64_read(&adev->smu.throttle_int_counter));
 
-	add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, fifo_in, len);
+	add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE,	fifo_in, len);
 }
 
 void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
@@ -202,9 +203,10 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
 	struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
 	struct amdgpu_task_info task_info;
 	/* VmFault msg = (hex)uint32_pid(8) + :(1) + task name(16) = 25 */
-	/* 16 bytes event + 1 byte space + 25 bytes msg + 1 byte \n = 43
+	/* 1 byte event + 1 byte space + 25 bytes msg + 1 byte \n +
+	 * 1 byte \0 = 29
 	 */
-	char fifo_in[43];
+	char fifo_in[29];
 	int len;
 
 	if (list_empty(&dev->smi_clients))
@@ -216,7 +218,7 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
 	if (!task_info.pid)
 		return;
 
-	len = snprintf(fifo_in, 43, "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
+	len = snprintf(fifo_in, 29, "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
 		task_info.pid, task_info.task_name);
 
 	add_event_to_kfifo(dev, KFD_SMI_EVENT_VMFAULT, fifo_in, len);
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index df6c7a43aadc..cb1f963a84e0 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -449,9 +449,13 @@ struct kfd_ioctl_import_dmabuf_args {
 /*
  * KFD SMI(System Management Interface) events
  */
-/* Event type (defined by bitmask) */
-#define KFD_SMI_EVENT_VMFAULT			0x0000000000000001
-#define KFD_SMI_EVENT_THERMAL_THROTTLE		0x0000000000000002
+enum kfd_smi_event {
+        KFD_SMI_EVENT_NONE = 0, /* not used */
+        KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */
+        KFD_SMI_EVENT_THERMAL_THROTTLE = 2,
+};
+
+#define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1))
 
 struct kfd_ioctl_smi_events_args {
 	__u32 gpuid;	/* to KFD */
-- 
cgit v1.2.3


From 16c642ec3fe9a144fbe1e97dc56f13a6308f1381 Mon Sep 17 00:00:00 2001
From: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Date: Thu, 30 Jul 2020 15:54:59 +0200
Subject: drm/amdgpu: new ids flag for tmz (v2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allows UMD to know if TMZ is supported and enabled.

This commit also bumps KMS_DRIVER_MINOR because if we don't
UMD can't tell if "ids_flags & AMDGPU_IDS_FLAGS_TMZ == 0" means
"tmz is not enabled" or "tmz may be enabled but the kernel doesn't
report it".

v2: use amdgpu_is_tmz() and reworded commit message.

Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 ++
 include/uapi/drm/amdgpu_drm.h           | 1 +
 3 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 5156c67ec67b..92d0368217a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -88,9 +88,10 @@
  * - 3.37.0 - L2 is invalidated before SDMA IBs, needed for correctness
  * - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC
  * - 3.39.0 - DMABUF implicit sync does a full pipeline sync
+ * - 3.40.0 - Add AMDGPU_IDS_FLAGS_TMZ
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	39
+#define KMS_DRIVER_MINOR	40
 #define KMS_DRIVER_PATCHLEVEL	0
 
 int amdgpu_vram_limit = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index e99ad031efd4..58580a48b648 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -737,6 +737,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 			dev_info.ids_flags |= AMDGPU_IDS_FLAGS_FUSION;
 		if (amdgpu_mcbp || amdgpu_sriov_vf(adev))
 			dev_info.ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION;
+		if (amdgpu_is_tmz(adev))
+			dev_info.ids_flags |= AMDGPU_IDS_FLAGS_TMZ;
 
 		vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
 		vm_size -= AMDGPU_VA_RESERVED_SIZE;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 3218576e109d..c5ff2b275fcd 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -673,6 +673,7 @@ struct drm_amdgpu_cs_chunk_data {
  */
 #define AMDGPU_IDS_FLAGS_FUSION         0x1
 #define AMDGPU_IDS_FLAGS_PREEMPTION     0x2
+#define AMDGPU_IDS_FLAGS_TMZ            0x4
 
 /* indicate if acceleration can be working */
 #define AMDGPU_INFO_ACCEL_WORKING		0x00
-- 
cgit v1.2.3


From e5b92773287c3eb3108a44785986a6c997866df8 Mon Sep 17 00:00:00 2001
From: Oleg Vasilev <oleg.vasilev@intel.com>
Date: Fri, 24 Apr 2020 18:20:51 +0530
Subject: drm: report dp downstream port type as a subconnector property
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, downstream port type is only reported in debugfs. This
information should be considered important since it reflects the actual
physical connector type. Some userspace (e.g. window compositors)
may want to show this info to a user.

The 'subconnector' property is already utilized for DVI-I and TV-out for
reporting connector subtype.

The initial motivation for this feature came from i2c test [1].
It is supposed to be skipped on VGA connectors, but it cannot
detect VGA over DP and fails instead.

v2:
 - Ville: utilized drm_dp_is_branch()
 - Ville: implement DP 1.0 downstream type info
 - Replaced create_dp_properties with add_dp_subconnector_property
 - Added dp_set_subconnector_property helper

v4:
 - Ville: add DP1.0 best assumption about subconnector
 - Ville: assume DVI is DVI-D
 - Ville: reuse Writeback enum value for Virtual subconnector
 - Renamed #defines: HDMI -> HDMIA, DP -> DisplayPort

v5: rebase

v6:
 - Jani Nikula: renamed a function name
 - Jani Nikula: addressed the issues with documentation

[1]: https://bugs.freedesktop.org/show_bug.cgi?id=104097

Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: intel-gfx@lists.freedesktop.org
Signed-off-by: Jeevan B <jeevan.b@intel.com>
Signed-off-by: Oleg Vasilev <oleg.vasilev@intel.com>
Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1587732655-17544-1-git-send-email-jeevan.b@intel.com
---
 drivers/gpu/drm/drm_connector.c | 49 ++++++++++++++++++++++++++--
 drivers/gpu/drm/drm_dp_helper.c | 71 +++++++++++++++++++++++++++++++++++++++++
 include/drm/drm_connector.h     |  3 ++
 include/drm/drm_dp_helper.h     |  8 +++++
 include/drm/drm_mode_config.h   |  6 ++++
 include/uapi/drm/drm_mode.h     | 21 +++++++-----
 6 files changed, 148 insertions(+), 10 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index 00e40a26a800..3d48ad1c3682 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -850,7 +850,7 @@ static const struct drm_prop_enum_list drm_dvi_i_select_enum_list[] = {
 DRM_ENUM_NAME_FN(drm_get_dvi_i_select_name, drm_dvi_i_select_enum_list)
 
 static const struct drm_prop_enum_list drm_dvi_i_subconnector_enum_list[] = {
-	{ DRM_MODE_SUBCONNECTOR_Unknown,   "Unknown"   }, /* DVI-I and TV-out */
+	{ DRM_MODE_SUBCONNECTOR_Unknown,   "Unknown"   }, /* DVI-I, TV-out and DP */
 	{ DRM_MODE_SUBCONNECTOR_DVID,      "DVI-D"     }, /* DVI-I  */
 	{ DRM_MODE_SUBCONNECTOR_DVIA,      "DVI-A"     }, /* DVI-I  */
 };
@@ -867,7 +867,7 @@ static const struct drm_prop_enum_list drm_tv_select_enum_list[] = {
 DRM_ENUM_NAME_FN(drm_get_tv_select_name, drm_tv_select_enum_list)
 
 static const struct drm_prop_enum_list drm_tv_subconnector_enum_list[] = {
-	{ DRM_MODE_SUBCONNECTOR_Unknown,   "Unknown"   }, /* DVI-I and TV-out */
+	{ DRM_MODE_SUBCONNECTOR_Unknown,   "Unknown"   }, /* DVI-I, TV-out and DP */
 	{ DRM_MODE_SUBCONNECTOR_Composite, "Composite" }, /* TV-out */
 	{ DRM_MODE_SUBCONNECTOR_SVIDEO,    "SVIDEO"    }, /* TV-out */
 	{ DRM_MODE_SUBCONNECTOR_Component, "Component" }, /* TV-out */
@@ -876,6 +876,19 @@ static const struct drm_prop_enum_list drm_tv_subconnector_enum_list[] = {
 DRM_ENUM_NAME_FN(drm_get_tv_subconnector_name,
 		 drm_tv_subconnector_enum_list)
 
+static const struct drm_prop_enum_list drm_dp_subconnector_enum_list[] = {
+	{ DRM_MODE_SUBCONNECTOR_Unknown,     "Unknown"   }, /* DVI-I, TV-out and DP */
+	{ DRM_MODE_SUBCONNECTOR_VGA,	     "VGA"       }, /* DP */
+	{ DRM_MODE_SUBCONNECTOR_DVID,	     "DVI-D"     }, /* DP */
+	{ DRM_MODE_SUBCONNECTOR_HDMIA,	     "HDMI"      }, /* DP */
+	{ DRM_MODE_SUBCONNECTOR_DisplayPort, "DP"        }, /* DP */
+	{ DRM_MODE_SUBCONNECTOR_Wireless,    "Wireless"  }, /* DP */
+	{ DRM_MODE_SUBCONNECTOR_Native,	     "Native"    }, /* DP */
+};
+
+DRM_ENUM_NAME_FN(drm_get_dp_subconnector_name,
+		 drm_dp_subconnector_enum_list)
+
 static const struct drm_prop_enum_list hdmi_colorspaces[] = {
 	/* For Default case, driver will set the colorspace */
 	{ DRM_MODE_COLORIMETRY_DEFAULT, "Default" },
@@ -1217,6 +1230,14 @@ static const struct drm_prop_enum_list dp_colorspaces[] = {
  *	can also expose this property to external outputs, in which case they
  *	must support "None", which should be the default (since external screens
  *	have a built-in scaler).
+ *
+ * subconnector:
+ *	This property is used by DVI-I, TVout and DisplayPort to indicate different
+ *	connector subtypes. Enum values more or less match with those from main
+ *	connector types.
+ *	For DVI-I and TVout there is also a matching property "select subconnector"
+ *	allowing to switch between signal types.
+ *	DP subconnector corresponds to a downstream port.
  */
 
 int drm_connector_create_standard_properties(struct drm_device *dev)
@@ -1305,6 +1326,30 @@ int drm_mode_create_dvi_i_properties(struct drm_device *dev)
 }
 EXPORT_SYMBOL(drm_mode_create_dvi_i_properties);
 
+/**
+ * drm_connector_attach_dp_subconnector_property - create subconnector property for DP
+ * @connector: drm_connector to attach property
+ *
+ * Called by a driver when DP connector is created.
+ */
+void drm_connector_attach_dp_subconnector_property(struct drm_connector *connector)
+{
+	struct drm_mode_config *mode_config = &connector->dev->mode_config;
+
+	if (!mode_config->dp_subconnector_property)
+		mode_config->dp_subconnector_property =
+			drm_property_create_enum(connector->dev,
+				DRM_MODE_PROP_IMMUTABLE,
+				"subconnector",
+				drm_dp_subconnector_enum_list,
+				ARRAY_SIZE(drm_dp_subconnector_enum_list));
+
+	drm_object_attach_property(&connector->base,
+				   mode_config->dp_subconnector_property,
+				   DRM_MODE_SUBCONNECTOR_Unknown);
+}
+EXPORT_SYMBOL(drm_connector_attach_dp_subconnector_property);
+
 /**
  * DOC: HDMI connector properties
  *
diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
index a3c82e726057..4c21cf69dad5 100644
--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c
@@ -597,6 +597,77 @@ void drm_dp_downstream_debug(struct seq_file *m,
 }
 EXPORT_SYMBOL(drm_dp_downstream_debug);
 
+/**
+ * drm_dp_subconnector_type() - get DP branch device type
+ *
+ */
+enum drm_mode_subconnector
+drm_dp_subconnector_type(const u8 dpcd[DP_RECEIVER_CAP_SIZE],
+			 const u8 port_cap[4])
+{
+	int type;
+	if (!drm_dp_is_branch(dpcd))
+		return DRM_MODE_SUBCONNECTOR_Native;
+	/* DP 1.0 approach */
+	if (dpcd[DP_DPCD_REV] == DP_DPCD_REV_10) {
+		type = dpcd[DP_DOWNSTREAMPORT_PRESENT] &
+		       DP_DWN_STRM_PORT_TYPE_MASK;
+
+		switch (type) {
+		case DP_DWN_STRM_PORT_TYPE_TMDS:
+			/* Can be HDMI or DVI-D, DVI-D is a safer option */
+			return DRM_MODE_SUBCONNECTOR_DVID;
+		case DP_DWN_STRM_PORT_TYPE_ANALOG:
+			/* Can be VGA or DVI-A, VGA is more popular */
+			return DRM_MODE_SUBCONNECTOR_VGA;
+		case DP_DWN_STRM_PORT_TYPE_DP:
+			return DRM_MODE_SUBCONNECTOR_DisplayPort;
+		case DP_DWN_STRM_PORT_TYPE_OTHER:
+		default:
+			return DRM_MODE_SUBCONNECTOR_Unknown;
+		}
+	}
+	type = port_cap[0] & DP_DS_PORT_TYPE_MASK;
+
+	switch (type) {
+	case DP_DS_PORT_TYPE_DP:
+	case DP_DS_PORT_TYPE_DP_DUALMODE:
+		return DRM_MODE_SUBCONNECTOR_DisplayPort;
+	case DP_DS_PORT_TYPE_VGA:
+		return DRM_MODE_SUBCONNECTOR_VGA;
+	case DP_DS_PORT_TYPE_DVI:
+		return DRM_MODE_SUBCONNECTOR_DVID;
+	case DP_DS_PORT_TYPE_HDMI:
+		return DRM_MODE_SUBCONNECTOR_HDMIA;
+	case DP_DS_PORT_TYPE_WIRELESS:
+		return DRM_MODE_SUBCONNECTOR_Wireless;
+	case DP_DS_PORT_TYPE_NON_EDID:
+	default:
+		return DRM_MODE_SUBCONNECTOR_Unknown;
+	}
+}
+EXPORT_SYMBOL(drm_dp_subconnector_type);
+
+/**
+ * drm_mode_set_dp_subconnector_property - set subconnector for DP connector
+ *
+ * Called by a driver on every detect event.
+ */
+void drm_dp_set_subconnector_property(struct drm_connector *connector,
+				      enum drm_connector_status status,
+				      const u8 *dpcd,
+				      const u8 port_cap[4])
+{
+	enum drm_mode_subconnector subconnector = DRM_MODE_SUBCONNECTOR_Unknown;
+
+	if (status == connector_status_connected)
+		subconnector = drm_dp_subconnector_type(dpcd, port_cap);
+	drm_object_property_set_value(&connector->base,
+			connector->dev->mode_config.dp_subconnector_property,
+			subconnector);
+}
+EXPORT_SYMBOL(drm_dp_set_subconnector_property);
+
 /*
  * I2C-over-AUX implementation
  */
diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index af145608b5ed..928136556174 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -1604,10 +1604,13 @@ const char *drm_get_dvi_i_subconnector_name(int val);
 const char *drm_get_dvi_i_select_name(int val);
 const char *drm_get_tv_subconnector_name(int val);
 const char *drm_get_tv_select_name(int val);
+const char *drm_get_dp_subconnector_name(int val);
 const char *drm_get_content_protection_name(int val);
 const char *drm_get_hdcp_content_type_name(int val);
 
 int drm_mode_create_dvi_i_properties(struct drm_device *dev);
+void drm_connector_attach_dp_subconnector_property(struct drm_connector *connector);
+
 int drm_mode_create_tv_margin_properties(struct drm_device *dev);
 int drm_mode_create_tv_properties(struct drm_device *dev,
 				  unsigned int num_modes,
diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h
index e47dc22ebf50..5c2819924862 100644
--- a/include/drm/drm_dp_helper.h
+++ b/include/drm/drm_dp_helper.h
@@ -26,6 +26,7 @@
 #include <linux/delay.h>
 #include <linux/i2c.h>
 #include <linux/types.h>
+#include <drm/drm_connector.h>
 
 /*
  * Unless otherwise noted, all values are from the DP 1.1a spec.  Note that
@@ -1619,6 +1620,13 @@ int drm_dp_downstream_max_bpc(const u8 dpcd[DP_RECEIVER_CAP_SIZE],
 int drm_dp_downstream_id(struct drm_dp_aux *aux, char id[6]);
 void drm_dp_downstream_debug(struct seq_file *m, const u8 dpcd[DP_RECEIVER_CAP_SIZE],
 			     const u8 port_cap[4], struct drm_dp_aux *aux);
+enum drm_mode_subconnector
+drm_dp_subconnector_type(const u8 dpcd[DP_RECEIVER_CAP_SIZE],
+			 const u8 port_cap[4]);
+void drm_dp_set_subconnector_property(struct drm_connector *connector,
+				      enum drm_connector_status status,
+				      const u8 *dpcd,
+				      const u8 port_cap[4]);
 
 void drm_dp_remote_aux_init(struct drm_dp_aux *aux);
 void drm_dp_aux_init(struct drm_dp_aux *aux);
diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h
index ffb9852a0638..f768c7cf7de3 100644
--- a/include/drm/drm_mode_config.h
+++ b/include/drm/drm_mode_config.h
@@ -680,6 +680,12 @@ struct drm_mode_config {
 	 */
 	struct drm_property *dvi_i_select_subconnector_property;
 
+	/**
+	 * @dp_subconnector_property: Optional DP property to differentiate
+	 * between different DP downstream port types.
+	 */
+	struct drm_property *dp_subconnector_property;
+
 	/**
 	 * @tv_subconnector_property: Optional TV property to differentiate
 	 * between different TV connector types.
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index deea447e5f22..863eda048265 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -332,14 +332,19 @@ struct drm_mode_get_encoder {
 /* This is for connectors with multiple signal types. */
 /* Try to match DRM_MODE_CONNECTOR_X as closely as possible. */
 enum drm_mode_subconnector {
-	DRM_MODE_SUBCONNECTOR_Automatic = 0,
-	DRM_MODE_SUBCONNECTOR_Unknown = 0,
-	DRM_MODE_SUBCONNECTOR_DVID = 3,
-	DRM_MODE_SUBCONNECTOR_DVIA = 4,
-	DRM_MODE_SUBCONNECTOR_Composite = 5,
-	DRM_MODE_SUBCONNECTOR_SVIDEO = 6,
-	DRM_MODE_SUBCONNECTOR_Component = 8,
-	DRM_MODE_SUBCONNECTOR_SCART = 9,
+	DRM_MODE_SUBCONNECTOR_Automatic   = 0,  /* DVI-I, TV     */
+	DRM_MODE_SUBCONNECTOR_Unknown     = 0,  /* DVI-I, TV, DP */
+	DRM_MODE_SUBCONNECTOR_VGA	  = 1,  /*            DP */
+	DRM_MODE_SUBCONNECTOR_DVID	  = 3,  /* DVI-I      DP */
+	DRM_MODE_SUBCONNECTOR_DVIA	  = 4,  /* DVI-I         */
+	DRM_MODE_SUBCONNECTOR_Composite   = 5,  /*        TV     */
+	DRM_MODE_SUBCONNECTOR_SVIDEO	  = 6,  /*        TV     */
+	DRM_MODE_SUBCONNECTOR_Component   = 8,  /*        TV     */
+	DRM_MODE_SUBCONNECTOR_SCART	  = 9,  /*        TV     */
+	DRM_MODE_SUBCONNECTOR_DisplayPort = 10, /*            DP */
+	DRM_MODE_SUBCONNECTOR_HDMIA       = 11, /*            DP */
+	DRM_MODE_SUBCONNECTOR_Native      = 15, /*            DP */
+	DRM_MODE_SUBCONNECTOR_Wireless    = 18, /*            DP */
 };
 
 #define DRM_MODE_CONNECTOR_Unknown	0
-- 
cgit v1.2.3


From cda9edd02425d7902714c60a6f6e31881d2f2741 Mon Sep 17 00:00:00 2001
From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Date: Tue, 4 Aug 2020 11:59:53 +0300
Subject: drm/i915: introduce a mechanism to extend execbuf2

We're planning to use this for a couple of new feature where we need
to provide additional parameters to execbuf.

v2: Check for invalid flags in execbuffer2 (Lionel)

v3: Rename I915_EXEC_EXT -> I915_EXEC_USE_EXTENSIONS (Chris)

v4: Rebase
    Move array fence parsing in i915_gem_do_execbuffer()

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20200804085954.350343-2-lionel.g.landwerlin@intel.com
Link: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2901
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 131 +++++++++++++++----------
 include/uapi/drm/i915_drm.h                    |  25 ++++-
 2 files changed, 102 insertions(+), 54 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 6b4ec66cb558..09d2f955b11e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -26,6 +26,7 @@
 #include "i915_gem_ioctls.h"
 #include "i915_sw_fence_work.h"
 #include "i915_trace.h"
+#include "i915_user_extensions.h"
 
 struct eb_vma {
 	struct i915_vma *vma;
@@ -281,6 +282,13 @@ struct i915_execbuffer {
 	int lut_size;
 	struct hlist_head *buckets; /** ht for relocation handles */
 	struct eb_vma_array *array;
+
+	struct i915_eb_fence {
+		struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
+	} *fences;
+	u32 n_fences;
+
+	u64 extension_flags; /** Available extensions parameters */
 };
 
 static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
@@ -1622,7 +1630,8 @@ static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
 		return -EINVAL;
 
 	/* Kernel clipping was a DRI1 misfeature */
-	if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) {
+	if (!(exec->flags & (I915_EXEC_FENCE_ARRAY |
+			     I915_EXEC_USE_EXTENSIONS))) {
 		if (exec->num_cliprects || exec->cliprects_ptr)
 			return -EINVAL;
 	}
@@ -2201,41 +2210,41 @@ eb_pin_engine(struct i915_execbuffer *eb,
 }
 
 static void
-__free_fence_array(struct drm_syncobj **fences, unsigned int n)
+__free_fence_array(struct i915_eb_fence *fences, unsigned int n)
 {
 	while (n--)
-		drm_syncobj_put(ptr_mask_bits(fences[n], 2));
+		drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
 	kvfree(fences);
 }
 
-static struct drm_syncobj **
+static int
 get_fence_array(struct drm_i915_gem_execbuffer2 *args,
-		struct drm_file *file)
+		struct i915_execbuffer *eb)
 {
 	const unsigned long nfences = args->num_cliprects;
 	struct drm_i915_gem_exec_fence __user *user;
-	struct drm_syncobj **fences;
+	struct i915_eb_fence *fences;
 	unsigned long n;
 	int err;
 
 	if (!(args->flags & I915_EXEC_FENCE_ARRAY))
-		return NULL;
+		return 0;
 
 	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
 	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
 	if (nfences > min_t(unsigned long,
 			    ULONG_MAX / sizeof(*user),
 			    SIZE_MAX / sizeof(*fences)))
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	user = u64_to_user_ptr(args->cliprects_ptr);
 	if (!access_ok(user, nfences * sizeof(*user)))
-		return ERR_PTR(-EFAULT);
+		return -EFAULT;
 
 	fences = kvmalloc_array(nfences, sizeof(*fences),
 				__GFP_NOWARN | GFP_KERNEL);
 	if (!fences)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
 	for (n = 0; n < nfences; n++) {
 		struct drm_i915_gem_exec_fence fence;
@@ -2251,7 +2260,7 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args,
 			goto err;
 		}
 
-		syncobj = drm_syncobj_find(file, fence.handle);
+		syncobj = drm_syncobj_find(eb->file, fence.handle);
 		if (!syncobj) {
 			DRM_DEBUG("Invalid syncobj handle provided\n");
 			err = -ENOENT;
@@ -2261,38 +2270,31 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args,
 		BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
 			     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
 
-		fences[n] = ptr_pack_bits(syncobj, fence.flags, 2);
+		fences[n].syncobj = ptr_pack_bits(syncobj, fence.flags, 2);
 	}
 
-	return fences;
+	eb->fences = fences;
+	eb->n_fences = nfences;
+
+	return 0;
 
 err:
 	__free_fence_array(fences, n);
-	return ERR_PTR(err);
-}
-
-static void
-put_fence_array(struct drm_i915_gem_execbuffer2 *args,
-		struct drm_syncobj **fences)
-{
-	if (fences)
-		__free_fence_array(fences, args->num_cliprects);
+	return err;
 }
 
 static int
-await_fence_array(struct i915_execbuffer *eb,
-		  struct drm_syncobj **fences)
+await_fence_array(struct i915_execbuffer *eb)
 {
-	const unsigned int nfences = eb->args->num_cliprects;
 	unsigned int n;
 	int err;
 
-	for (n = 0; n < nfences; n++) {
+	for (n = 0; n < eb->n_fences; n++) {
 		struct drm_syncobj *syncobj;
 		struct dma_fence *fence;
 		unsigned int flags;
 
-		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
+		syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2);
 		if (!(flags & I915_EXEC_FENCE_WAIT))
 			continue;
 
@@ -2310,18 +2312,16 @@ await_fence_array(struct i915_execbuffer *eb,
 }
 
 static void
-signal_fence_array(struct i915_execbuffer *eb,
-		   struct drm_syncobj **fences)
+signal_fence_array(struct i915_execbuffer *eb)
 {
-	const unsigned int nfences = eb->args->num_cliprects;
 	struct dma_fence * const fence = &eb->request->fence;
 	unsigned int n;
 
-	for (n = 0; n < nfences; n++) {
+	for (n = 0; n < eb->n_fences; n++) {
 		struct drm_syncobj *syncobj;
 		unsigned int flags;
 
-		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
+		syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2);
 		if (!(flags & I915_EXEC_FENCE_SIGNAL))
 			continue;
 
@@ -2370,12 +2370,38 @@ static void eb_request_add(struct i915_execbuffer *eb)
 	mutex_unlock(&tl->mutex);
 }
 
+static const i915_user_extension_fn execbuf_extensions[] = {
+};
+
+static int
+parse_execbuf2_extensions(struct drm_i915_gem_execbuffer2 *args,
+			  struct i915_execbuffer *eb)
+{
+	eb->extension_flags = 0;
+
+	if (!(args->flags & I915_EXEC_USE_EXTENSIONS))
+		return 0;
+
+	/* The execbuf2 extension mechanism reuses cliprects_ptr. So we cannot
+	 * have another flag also using it at the same time.
+	 */
+	if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
+		return -EINVAL;
+
+	if (args->num_cliprects != 0)
+		return -EINVAL;
+
+	return i915_user_extensions(u64_to_user_ptr(args->cliprects_ptr),
+				    execbuf_extensions,
+				    ARRAY_SIZE(execbuf_extensions),
+				    eb);
+}
+
 static int
 i915_gem_do_execbuffer(struct drm_device *dev,
 		       struct drm_file *file,
 		       struct drm_i915_gem_execbuffer2 *args,
-		       struct drm_i915_gem_exec_object2 *exec,
-		       struct drm_syncobj **fences)
+		       struct drm_i915_gem_exec_object2 *exec)
 {
 	struct drm_i915_private *i915 = to_i915(dev);
 	struct i915_execbuffer eb;
@@ -2405,6 +2431,9 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	eb.batch_len = args->batch_len;
 	eb.trampoline = NULL;
 
+	eb.fences = NULL;
+	eb.n_fences = 0;
+
 	eb.batch_flags = 0;
 	if (args->flags & I915_EXEC_SECURE) {
 		if (INTEL_GEN(i915) >= 11)
@@ -2441,10 +2470,18 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		}
 	}
 
-	err = eb_create(&eb);
+	err = parse_execbuf2_extensions(args, &eb);
 	if (err)
 		goto err_out_fence;
 
+	err = get_fence_array(args, &eb);
+	if (err)
+		goto err_arr_fence;
+
+	err = eb_create(&eb);
+	if (err)
+		goto err_arr_fence;
+
 	GEM_BUG_ON(!eb.lut_size);
 
 	err = eb_select_context(&eb);
@@ -2539,8 +2576,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 			goto err_request;
 	}
 
-	if (fences) {
-		err = await_fence_array(&eb, fences);
+	if (eb.n_fences) {
+		err = await_fence_array(&eb);
 		if (err)
 			goto err_request;
 	}
@@ -2571,8 +2608,8 @@ err_request:
 	i915_request_get(eb.request);
 	eb_request_add(&eb);
 
-	if (fences)
-		signal_fence_array(&eb, fences);
+	if (eb.n_fences)
+		signal_fence_array(&eb);
 
 	if (out_fence) {
 		if (err == 0) {
@@ -2600,6 +2637,8 @@ err_context:
 	i915_gem_context_put(eb.gem_context);
 err_destroy:
 	eb_destroy(&eb);
+err_arr_fence:
+	__free_fence_array(eb.fences, eb.n_fences);
 err_out_fence:
 	if (out_fence_fd != -1)
 		put_unused_fd(out_fence_fd);
@@ -2699,7 +2738,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
 			exec2_list[i].flags = 0;
 	}
 
-	err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL);
+	err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list);
 	if (exec2.flags & __EXEC_HAS_RELOC) {
 		struct drm_i915_gem_exec_object __user *user_exec_list =
 			u64_to_user_ptr(args->buffers_ptr);
@@ -2731,7 +2770,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
 	struct drm_i915_private *i915 = to_i915(dev);
 	struct drm_i915_gem_execbuffer2 *args = data;
 	struct drm_i915_gem_exec_object2 *exec2_list;
-	struct drm_syncobj **fences = NULL;
 	const size_t count = args->buffer_count;
 	int err;
 
@@ -2759,15 +2797,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
 		return -EFAULT;
 	}
 
-	if (args->flags & I915_EXEC_FENCE_ARRAY) {
-		fences = get_fence_array(args, file);
-		if (IS_ERR(fences)) {
-			kvfree(exec2_list);
-			return PTR_ERR(fences);
-		}
-	}
-
-	err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences);
+	err = i915_gem_do_execbuffer(dev, file, args, exec2_list);
 
 	/*
 	 * Now that we have begun execution of the batchbuffer, we ignore
@@ -2808,7 +2838,6 @@ end:;
 	}
 
 	args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
-	put_fence_array(args, fences);
 	kvfree(exec2_list);
 	return err;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 00546062e023..dcada8c3a693 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1046,6 +1046,10 @@ struct drm_i915_gem_exec_fence {
 	__u32 flags;
 };
 
+enum drm_i915_gem_execbuffer_ext {
+	DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
+};
+
 struct drm_i915_gem_execbuffer2 {
 	/**
 	 * List of gem_exec_object2 structs
@@ -1062,8 +1066,14 @@ struct drm_i915_gem_execbuffer2 {
 	__u32 num_cliprects;
 	/**
 	 * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY
-	 * is not set.  If I915_EXEC_FENCE_ARRAY is set, then this is a
-	 * struct drm_i915_gem_exec_fence *fences.
+	 * & I915_EXEC_USE_EXTENSIONS are not set.
+	 *
+	 * If I915_EXEC_FENCE_ARRAY is set, then this is a pointer to an array
+	 * of struct drm_i915_gem_exec_fence and num_cliprects is the length
+	 * of the array.
+	 *
+	 * If I915_EXEC_USE_EXTENSIONS is set, then this is a pointer to a
+	 * single struct i915_user_extension and num_cliprects is 0.
 	 */
 	__u64 cliprects_ptr;
 #define I915_EXEC_RING_MASK              (0x3f)
@@ -1181,7 +1191,16 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_FENCE_SUBMIT		(1 << 20)
 
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1))
+/*
+ * Setting I915_EXEC_USE_EXTENSIONS implies that
+ * drm_i915_gem_execbuffer2.cliprects_ptr is treated as a pointer to an linked
+ * list of i915_user_extension. Each i915_user_extension node is the base of a
+ * larger structure. The list of supported structures are listed in the
+ * drm_i915_gem_execbuffer_ext enum.
+ */
+#define I915_EXEC_USE_EXTENSIONS	(1 << 21)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_USE_EXTENSIONS << 1))
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
cgit v1.2.3


From 13149e8bafc4657254831ba6c16ed8780aa64a06 Mon Sep 17 00:00:00 2001
From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Date: Tue, 4 Aug 2020 11:59:54 +0300
Subject: drm/i915: add syncobj timeline support

Introduces a new parameters to execbuf so that we can specify syncobj
handles as well as timeline points.

v2: Reuse i915_user_extension_fn

v3: Check that the chained extension is only present once (Chris)

v4: Check that dma_fence_chain_find_seqno returns a non NULL fence (Lionel)

v5: Use BIT_ULL (Chris)

v6: Fix issue with already signaled timeline points,
    dma_fence_chain_find_seqno() setting fence to NULL (Chris)

v7: Report ENOENT with invalid syncobj handle (Lionel)

v8: Check for out of order timeline point insertion (Chris)

v9: After explanations on
    https://lists.freedesktop.org/archives/dri-devel/2019-August/229287.html
    drop the ordering check from v8 (Lionel)

v10: Set first extension enum item to 1 (Jason)

v11: Rebase

v12: Allow multiple extension nodes of timeline syncobj (Chris)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Co-authored-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> (v11)
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20200804085954.350343-3-lionel.g.landwerlin@intel.com
Link: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2901
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 312 +++++++++++++++++++------
 drivers/gpu/drm/i915/i915_drv.c                |   3 +-
 drivers/gpu/drm/i915/i915_getparam.c           |   1 +
 include/uapi/drm/i915_drm.h                    |  38 ++-
 4 files changed, 279 insertions(+), 75 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 09d2f955b11e..02b1630f513e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -223,6 +223,13 @@ struct eb_vma_array {
  * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
  */
 
+struct eb_fence {
+	struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
+	struct dma_fence *dma_fence;
+	u64 value;
+	struct dma_fence_chain *chain_fence;
+};
+
 struct i915_execbuffer {
 	struct drm_i915_private *i915; /** i915 backpointer */
 	struct drm_file *file; /** per-file lookup tables and limits */
@@ -283,12 +290,8 @@ struct i915_execbuffer {
 	struct hlist_head *buckets; /** ht for relocation handles */
 	struct eb_vma_array *array;
 
-	struct i915_eb_fence {
-		struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
-	} *fences;
-	u32 n_fences;
-
-	u64 extension_flags; /** Available extensions parameters */
+	struct eb_fence *fences;
+	unsigned long num_fences;
 };
 
 static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
@@ -2210,77 +2213,222 @@ eb_pin_engine(struct i915_execbuffer *eb,
 }
 
 static void
-__free_fence_array(struct i915_eb_fence *fences, unsigned int n)
+__free_fence_array(struct eb_fence *fences, unsigned int n)
 {
-	while (n--)
+	while (n--) {
 		drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
+		dma_fence_put(fences[n].dma_fence);
+		kfree(fences[n].chain_fence);
+	}
 	kvfree(fences);
 }
 
 static int
-get_fence_array(struct drm_i915_gem_execbuffer2 *args,
-		struct i915_execbuffer *eb)
+add_timeline_fence_array(struct i915_execbuffer *eb,
+			 const struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences)
 {
-	const unsigned long nfences = args->num_cliprects;
-	struct drm_i915_gem_exec_fence __user *user;
-	struct i915_eb_fence *fences;
-	unsigned long n;
-	int err;
+	struct drm_i915_gem_exec_fence __user *user_fences;
+	u64 __user *user_values;
+	struct eb_fence *f;
+	u64 nfences;
+	int err = 0;
 
-	if (!(args->flags & I915_EXEC_FENCE_ARRAY))
+	nfences = timeline_fences->fence_count;
+	if (!nfences)
 		return 0;
 
 	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
 	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
 	if (nfences > min_t(unsigned long,
-			    ULONG_MAX / sizeof(*user),
-			    SIZE_MAX / sizeof(*fences)))
+			    ULONG_MAX / sizeof(*user_fences),
+			    SIZE_MAX / sizeof(*f)) - eb->num_fences)
 		return -EINVAL;
 
-	user = u64_to_user_ptr(args->cliprects_ptr);
-	if (!access_ok(user, nfences * sizeof(*user)))
+	user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
+	if (!access_ok(user_fences, nfences * sizeof(*user_fences)))
+		return -EFAULT;
+
+	user_values = u64_to_user_ptr(timeline_fences->values_ptr);
+	if (!access_ok(user_values, nfences * sizeof(*user_values)))
 		return -EFAULT;
 
-	fences = kvmalloc_array(nfences, sizeof(*fences),
-				__GFP_NOWARN | GFP_KERNEL);
-	if (!fences)
+	f = krealloc(eb->fences,
+		     (eb->num_fences + nfences) * sizeof(*f),
+		     __GFP_NOWARN | GFP_KERNEL);
+	if (!f)
 		return -ENOMEM;
 
-	for (n = 0; n < nfences; n++) {
-		struct drm_i915_gem_exec_fence fence;
+	eb->fences = f;
+	f += eb->num_fences;
+
+	BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
+		     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
+
+	while (nfences--) {
+		struct drm_i915_gem_exec_fence user_fence;
 		struct drm_syncobj *syncobj;
+		struct dma_fence *fence = NULL;
+		u64 point;
+
+		if (__copy_from_user(&user_fence,
+				     user_fences++,
+				     sizeof(user_fence)))
+			return -EFAULT;
+
+		if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS)
+			return -EINVAL;
+
+		if (__get_user(point, user_values++))
+			return -EFAULT;
+
+		syncobj = drm_syncobj_find(eb->file, user_fence.handle);
+		if (!syncobj) {
+			DRM_DEBUG("Invalid syncobj handle provided\n");
+			return -ENOENT;
+		}
+
+		fence = drm_syncobj_fence_get(syncobj);
 
-		if (__copy_from_user(&fence, user++, sizeof(fence))) {
-			err = -EFAULT;
-			goto err;
+		if (!fence && user_fence.flags &&
+		    !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
+			DRM_DEBUG("Syncobj handle has no fence\n");
+			drm_syncobj_put(syncobj);
+			return -EINVAL;
 		}
 
-		if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
-			err = -EINVAL;
-			goto err;
+		if (fence)
+			err = dma_fence_chain_find_seqno(&fence, point);
+
+		if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
+			DRM_DEBUG("Syncobj handle missing requested point %llu\n", point);
+			drm_syncobj_put(syncobj);
+			return err;
+		}
+
+		/*
+		 * A point might have been signaled already and
+		 * garbage collected from the timeline. In this case
+		 * just ignore the point and carry on.
+		 */
+		if (!fence && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
+			drm_syncobj_put(syncobj);
+			continue;
+		}
+
+		/*
+		 * For timeline syncobjs we need to preallocate chains for
+		 * later signaling.
+		 */
+		if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
+			/*
+			 * Waiting and signaling the same point (when point !=
+			 * 0) would break the timeline.
+			 */
+			if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
+				DRM_DEBUG("Trying to wait & signal the same timeline point.\n");
+				dma_fence_put(fence);
+				drm_syncobj_put(syncobj);
+				return -EINVAL;
+			}
+
+			f->chain_fence =
+				kmalloc(sizeof(*f->chain_fence),
+					GFP_KERNEL);
+			if (!f->chain_fence) {
+				drm_syncobj_put(syncobj);
+				dma_fence_put(fence);
+				return -ENOMEM;
+			}
+		} else {
+			f->chain_fence = NULL;
 		}
 
-		syncobj = drm_syncobj_find(eb->file, fence.handle);
+		f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
+		f->dma_fence = fence;
+		f->value = point;
+		f++;
+		eb->num_fences++;
+	}
+
+	return 0;
+}
+
+static int add_fence_array(struct i915_execbuffer *eb)
+{
+	struct drm_i915_gem_execbuffer2 *args = eb->args;
+	struct drm_i915_gem_exec_fence __user *user;
+	unsigned long num_fences = args->num_cliprects;
+	struct eb_fence *f;
+
+	if (!(args->flags & I915_EXEC_FENCE_ARRAY))
+		return 0;
+
+	if (!num_fences)
+		return 0;
+
+	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
+	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
+	if (num_fences > min_t(unsigned long,
+			       ULONG_MAX / sizeof(*user),
+			       SIZE_MAX / sizeof(*f) - eb->num_fences))
+		return -EINVAL;
+
+	user = u64_to_user_ptr(args->cliprects_ptr);
+	if (!access_ok(user, num_fences * sizeof(*user)))
+		return -EFAULT;
+
+	f = krealloc(eb->fences,
+		     (eb->num_fences + num_fences) * sizeof(*f),
+		     __GFP_NOWARN | GFP_KERNEL);
+	if (!f)
+		return -ENOMEM;
+
+	eb->fences = f;
+	f += eb->num_fences;
+	while (num_fences--) {
+		struct drm_i915_gem_exec_fence user_fence;
+		struct drm_syncobj *syncobj;
+		struct dma_fence *fence = NULL;
+
+		if (__copy_from_user(&user_fence, user++, sizeof(user_fence)))
+			return -EFAULT;
+
+		if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS)
+			return -EINVAL;
+
+		syncobj = drm_syncobj_find(eb->file, user_fence.handle);
 		if (!syncobj) {
 			DRM_DEBUG("Invalid syncobj handle provided\n");
-			err = -ENOENT;
-			goto err;
+			return -ENOENT;
+		}
+
+		if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
+			fence = drm_syncobj_fence_get(syncobj);
+			if (!fence) {
+				DRM_DEBUG("Syncobj handle has no fence\n");
+				drm_syncobj_put(syncobj);
+				return -EINVAL;
+			}
 		}
 
 		BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
 			     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
 
-		fences[n].syncobj = ptr_pack_bits(syncobj, fence.flags, 2);
+		f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
+		f->dma_fence = fence;
+		f->value = 0;
+		f->chain_fence = NULL;
+		f++;
+		eb->num_fences++;
 	}
 
-	eb->fences = fences;
-	eb->n_fences = nfences;
-
 	return 0;
+}
 
-err:
-	__free_fence_array(fences, n);
-	return err;
+static void put_fence_array(struct eb_fence *fences, int num_fences)
+{
+	if (fences)
+		__free_fence_array(fences, num_fences);
 }
 
 static int
@@ -2289,21 +2437,17 @@ await_fence_array(struct i915_execbuffer *eb)
 	unsigned int n;
 	int err;
 
-	for (n = 0; n < eb->n_fences; n++) {
+	for (n = 0; n < eb->num_fences; n++) {
 		struct drm_syncobj *syncobj;
-		struct dma_fence *fence;
 		unsigned int flags;
 
 		syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2);
-		if (!(flags & I915_EXEC_FENCE_WAIT))
-			continue;
 
-		fence = drm_syncobj_fence_get(syncobj);
-		if (!fence)
-			return -EINVAL;
+		if (!eb->fences[n].dma_fence)
+			continue;
 
-		err = i915_request_await_dma_fence(eb->request, fence);
-		dma_fence_put(fence);
+		err = i915_request_await_dma_fence(eb->request,
+						   eb->fences[n].dma_fence);
 		if (err < 0)
 			return err;
 	}
@@ -2311,13 +2455,12 @@ await_fence_array(struct i915_execbuffer *eb)
 	return 0;
 }
 
-static void
-signal_fence_array(struct i915_execbuffer *eb)
+static void signal_fence_array(const struct i915_execbuffer *eb)
 {
 	struct dma_fence * const fence = &eb->request->fence;
 	unsigned int n;
 
-	for (n = 0; n < eb->n_fences; n++) {
+	for (n = 0; n < eb->num_fences; n++) {
 		struct drm_syncobj *syncobj;
 		unsigned int flags;
 
@@ -2325,10 +2468,34 @@ signal_fence_array(struct i915_execbuffer *eb)
 		if (!(flags & I915_EXEC_FENCE_SIGNAL))
 			continue;
 
-		drm_syncobj_replace_fence(syncobj, fence);
+		if (eb->fences[n].chain_fence) {
+			drm_syncobj_add_point(syncobj,
+					      eb->fences[n].chain_fence,
+					      fence,
+					      eb->fences[n].value);
+			/*
+			 * The chain's ownership is transferred to the
+			 * timeline.
+			 */
+			eb->fences[n].chain_fence = NULL;
+		} else {
+			drm_syncobj_replace_fence(syncobj, fence);
+		}
 	}
 }
 
+static int
+parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
+{
+	struct i915_execbuffer *eb = data;
+	struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
+
+	if (copy_from_user(&timeline_fences, ext, sizeof(timeline_fences)))
+		return -EFAULT;
+
+	return add_timeline_fence_array(eb, &timeline_fences);
+}
+
 static void retire_requests(struct intel_timeline *tl, struct i915_request *end)
 {
 	struct i915_request *rq, *rn;
@@ -2371,14 +2538,13 @@ static void eb_request_add(struct i915_execbuffer *eb)
 }
 
 static const i915_user_extension_fn execbuf_extensions[] = {
+	[DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
 };
 
 static int
 parse_execbuf2_extensions(struct drm_i915_gem_execbuffer2 *args,
 			  struct i915_execbuffer *eb)
 {
-	eb->extension_flags = 0;
-
 	if (!(args->flags & I915_EXEC_USE_EXTENSIONS))
 		return 0;
 
@@ -2432,7 +2598,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	eb.trampoline = NULL;
 
 	eb.fences = NULL;
-	eb.n_fences = 0;
+	eb.num_fences = 0;
 
 	eb.batch_flags = 0;
 	if (args->flags & I915_EXEC_SECURE) {
@@ -2451,14 +2617,24 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (args->flags & I915_EXEC_IS_PINNED)
 		eb.batch_flags |= I915_DISPATCH_PINNED;
 
+	err = parse_execbuf2_extensions(args, &eb);
+	if (err)
+		goto err_ext;
+
+	err = add_fence_array(&eb);
+	if (err)
+		goto err_ext;
+
 #define IN_FENCES (I915_EXEC_FENCE_IN | I915_EXEC_FENCE_SUBMIT)
 	if (args->flags & IN_FENCES) {
 		if ((args->flags & IN_FENCES) == IN_FENCES)
 			return -EINVAL;
 
 		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
-		if (!in_fence)
-			return -EINVAL;
+		if (!in_fence) {
+			err = -EINVAL;
+			goto err_ext;
+		}
 	}
 #undef IN_FENCES
 
@@ -2470,17 +2646,9 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		}
 	}
 
-	err = parse_execbuf2_extensions(args, &eb);
-	if (err)
-		goto err_out_fence;
-
-	err = get_fence_array(args, &eb);
-	if (err)
-		goto err_arr_fence;
-
 	err = eb_create(&eb);
 	if (err)
-		goto err_arr_fence;
+		goto err_out_fence;
 
 	GEM_BUG_ON(!eb.lut_size);
 
@@ -2576,7 +2744,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 			goto err_request;
 	}
 
-	if (eb.n_fences) {
+	if (eb.fences) {
 		err = await_fence_array(&eb);
 		if (err)
 			goto err_request;
@@ -2608,7 +2776,7 @@ err_request:
 	i915_request_get(eb.request);
 	eb_request_add(&eb);
 
-	if (eb.n_fences)
+	if (eb.fences)
 		signal_fence_array(&eb);
 
 	if (out_fence) {
@@ -2637,13 +2805,13 @@ err_context:
 	i915_gem_context_put(eb.gem_context);
 err_destroy:
 	eb_destroy(&eb);
-err_arr_fence:
-	__free_fence_array(eb.fences, eb.n_fences);
 err_out_fence:
 	if (out_fence_fd != -1)
 		put_unused_fd(out_fence_fd);
 err_in_fence:
 	dma_fence_put(in_fence);
+err_ext:
+	put_fence_array(eb.fences, eb.num_fences);
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5fd5af4bc855..2d10f6a2c042 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1846,7 +1846,8 @@ static struct drm_driver driver = {
 	 */
 	.driver_features =
 	    DRIVER_GEM |
-	    DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ,
+	    DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ |
+	    DRIVER_SYNCOBJ_TIMELINE,
 	.release = i915_driver_release,
 	.open = i915_driver_open,
 	.lastclose = i915_driver_lastclose,
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index 421613219ae9..f96032c60a12 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -132,6 +132,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_BATCH_FIRST:
 	case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
 	case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
+	case I915_PARAM_HAS_EXEC_TIMELINE_FENCES:
 		/* For the time being all of these are always true;
 		 * if some supported hardware does not have one of these
 		 * features this value needs to be provided from
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index dcada8c3a693..fa1f3d62f9a6 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -619,6 +619,12 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_PERF_REVISION	54
 
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
+ * timeline syncobj through drm_i915_gem_execbuffer_ext_timeline_fences. See
+ * I915_EXEC_USE_EXTENSIONS.
+ */
+#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
+
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1046,8 +1052,36 @@ struct drm_i915_gem_exec_fence {
 	__u32 flags;
 };
 
-enum drm_i915_gem_execbuffer_ext {
-	DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
+/**
+ * See drm_i915_gem_execbuffer_ext_timeline_fences.
+ */
+#define DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES 0
+
+/**
+ * This structure describes an array of drm_syncobj and associated points for
+ * timeline variants of drm_syncobj. It is invalid to append this structure to
+ * the execbuf if I915_EXEC_FENCE_ARRAY is set.
+ */
+struct drm_i915_gem_execbuffer_ext_timeline_fences {
+	struct i915_user_extension base;
+
+	/**
+	 * Number of element in the handles_ptr & value_ptr arrays.
+	 */
+	__u64 fence_count;
+
+	/**
+	 * Pointer to an array of struct drm_i915_gem_exec_fence of length
+	 * fence_count.
+	 */
+	__u64 handles_ptr;
+
+	/**
+	 * Pointer to an array of u64 values of length fence_count. Values
+	 * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline
+	 * drm_syncobj is invalid as it turns a drm_syncobj into a binary one.
+	 */
+	__u64 values_ptr;
 };
 
 struct drm_i915_gem_execbuffer2 {
-- 
cgit v1.2.3


From 592d9fba33c275b72cb4dae99c187444daafcd33 Mon Sep 17 00:00:00 2001
From: David Stevens <stevensd@chromium.org>
Date: Tue, 18 Aug 2020 16:13:42 +0900
Subject: virtio-gpu: add VIRTIO_GPU_F_RESOURCE_UUID feature

This feature allows the guest to request a UUID from the host for a
particular virtio_gpu resource. The UUID can then be shared with other
virtio devices, to allow the other host devices to access the
virtio_gpu's corresponding host resource.

Signed-off-by: David Stevens <stevensd@chromium.org>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20200818071343.3461203-3-stevensd@chromium.org
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 include/uapi/linux/virtio_gpu.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h
index 0c85914d9369..9721d58b4d58 100644
--- a/include/uapi/linux/virtio_gpu.h
+++ b/include/uapi/linux/virtio_gpu.h
@@ -50,6 +50,10 @@
  * VIRTIO_GPU_CMD_GET_EDID
  */
 #define VIRTIO_GPU_F_EDID                1
+/*
+ * VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID
+ */
+#define VIRTIO_GPU_F_RESOURCE_UUID       2
 
 enum virtio_gpu_ctrl_type {
 	VIRTIO_GPU_UNDEFINED = 0,
@@ -66,6 +70,7 @@ enum virtio_gpu_ctrl_type {
 	VIRTIO_GPU_CMD_GET_CAPSET_INFO,
 	VIRTIO_GPU_CMD_GET_CAPSET,
 	VIRTIO_GPU_CMD_GET_EDID,
+	VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID,
 
 	/* 3d commands */
 	VIRTIO_GPU_CMD_CTX_CREATE = 0x0200,
@@ -87,6 +92,7 @@ enum virtio_gpu_ctrl_type {
 	VIRTIO_GPU_RESP_OK_CAPSET_INFO,
 	VIRTIO_GPU_RESP_OK_CAPSET,
 	VIRTIO_GPU_RESP_OK_EDID,
+	VIRTIO_GPU_RESP_OK_RESOURCE_UUID,
 
 	/* error responses */
 	VIRTIO_GPU_RESP_ERR_UNSPEC = 0x1200,
@@ -340,4 +346,17 @@ enum virtio_gpu_formats {
 	VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM  = 134,
 };
 
+/* VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID */
+struct virtio_gpu_resource_assign_uuid {
+	struct virtio_gpu_ctrl_hdr hdr;
+	__le32 resource_id;
+	__le32 padding;
+};
+
+/* VIRTIO_GPU_RESP_OK_RESOURCE_UUID */
+struct virtio_gpu_resp_resource_uuid {
+	struct virtio_gpu_ctrl_hdr hdr;
+	__u8 uuid[16];
+};
+
 #endif
-- 
cgit v1.2.3


From 0032ce0f85a269a006e91277be5fdbc05fad8426 Mon Sep 17 00:00:00 2001
From: Peilin Ye <yepeilin.cs@gmail.com>
Date: Sat, 1 Aug 2020 11:20:44 -0400
Subject: ptrace: Prevent kernel-infoleak in ptrace_get_syscall_info()

ptrace_get_syscall_info() is potentially copying uninitialized stack
memory to userspace, since the compiler may leave a 3-byte hole near the
beginning of `info`. Fix it by adding a padding field to `struct
ptrace_syscall_info`.

Fixes: 201766a20e30 ("ptrace: add PTRACE_GET_SYSCALL_INFO request")
Suggested-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Peilin Ye <yepeilin.cs@gmail.com>
Reviewed-by: Dmitry V. Levin <ldv@altlinux.org>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20200801152044.230416-1-yepeilin.cs@gmail.com
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 include/uapi/linux/ptrace.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
index a71b6e3b03eb..83ee45fa634b 100644
--- a/include/uapi/linux/ptrace.h
+++ b/include/uapi/linux/ptrace.h
@@ -81,7 +81,8 @@ struct seccomp_metadata {
 
 struct ptrace_syscall_info {
 	__u8 op;	/* PTRACE_SYSCALL_INFO_* */
-	__u32 arch __attribute__((__aligned__(sizeof(__u32))));
+	__u8 pad[3];
+	__u32 arch;
 	__u64 instruction_pointer;
 	__u64 stack_pointer;
 	union {
-- 
cgit v1.2.3


From aa207a05f95abc3530b7415232f0f73278336bd3 Mon Sep 17 00:00:00 2001
From: Alexander Usyskin <alexander.usyskin@intel.com>
Date: Tue, 18 Aug 2020 14:51:45 +0300
Subject: mei: add connect with vtag ioctl

This IOCTL is used to associate the current file descriptor
with a FW Client (given by UUID), and virtual tag (vtag).
The IOCTL opens a communication channel between a host client
and a FW client on a tagged channel. From this point on,
every reader  and write will communicate with the associated
FW client on the tagged channel. Upon close() the communication
is terminated.

The IOCTL argument is a struct with a union that contains
the input parameter and the output parameter for this IOCTL.

The input parameter is UUID of the FW Client, a vtag [0,255]
The output parameter is the properties of the FW client

Clients that do not support tagged connection
will respond with -EOPNOTSUPP

Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Link: https://lore.kernel.org/r/20200818115147.2567012-12-tomas.winkler@intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/main.c  | 210 +++++++++++++++++++++++++++++++++++++++++++----
 include/uapi/linux/mei.h |  49 +++++++++++
 2 files changed, 243 insertions(+), 16 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c
index 401bf8743689..9f6682033ed7 100644
--- a/drivers/misc/mei/main.c
+++ b/drivers/misc/mei/main.c
@@ -395,17 +395,18 @@ out:
  * mei_ioctl_connect_client - the connect to fw client IOCTL function
  *
  * @file: private data of the file object
- * @data: IOCTL connect data, input and output parameters
+ * @in_client_uuid: requested UUID for connection
+ * @client: IOCTL connect data, output parameters
  *
  * Locking: called under "dev->device_lock" lock
  *
  * Return: 0 on success, <0 on failure.
  */
 static int mei_ioctl_connect_client(struct file *file,
-			struct mei_connect_client_data *data)
+				    const uuid_le *in_client_uuid,
+				    struct mei_client *client)
 {
 	struct mei_device *dev;
-	struct mei_client *client;
 	struct mei_me_client *me_cl;
 	struct mei_cl *cl;
 	int rets;
@@ -413,18 +414,15 @@ static int mei_ioctl_connect_client(struct file *file,
 	cl = file->private_data;
 	dev = cl->dev;
 
-	if (dev->dev_state != MEI_DEV_ENABLED)
-		return -ENODEV;
-
 	if (cl->state != MEI_FILE_INITIALIZING &&
 	    cl->state != MEI_FILE_DISCONNECTED)
 		return  -EBUSY;
 
 	/* find ME client we're trying to connect to */
-	me_cl = mei_me_cl_by_uuid(dev, &data->in_client_uuid);
+	me_cl = mei_me_cl_by_uuid(dev, in_client_uuid);
 	if (!me_cl) {
 		dev_dbg(dev->dev, "Cannot connect to FW Client UUID = %pUl\n",
-			&data->in_client_uuid);
+			in_client_uuid);
 		rets = -ENOTTY;
 		goto end;
 	}
@@ -434,7 +432,7 @@ static int mei_ioctl_connect_client(struct file *file,
 			 !dev->allow_fixed_address : !dev->hbm_f_fa_supported;
 		if (forbidden) {
 			dev_dbg(dev->dev, "Connection forbidden to FW Client UUID = %pUl\n",
-				&data->in_client_uuid);
+				in_client_uuid);
 			rets = -ENOTTY;
 			goto end;
 		}
@@ -448,7 +446,6 @@ static int mei_ioctl_connect_client(struct file *file,
 			me_cl->props.max_msg_length);
 
 	/* prepare the output buffer */
-	client = &data->out_client_properties;
 	client->max_msg_length = me_cl->props.max_msg_length;
 	client->protocol_version = me_cl->props.protocol_version;
 	dev_dbg(dev->dev, "Can connect?\n");
@@ -460,6 +457,135 @@ end:
 	return rets;
 }
 
+/**
+ * mei_vt_support_check - check if client support vtags
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * @dev: mei_device
+ * @uuid: client UUID
+ *
+ * Return:
+ *	0 - supported
+ *	-ENOTTY - no such client
+ *	-EOPNOTSUPP - vtags are not supported by client
+ */
+static int mei_vt_support_check(struct mei_device *dev, const uuid_le *uuid)
+{
+	struct mei_me_client *me_cl;
+	int ret;
+
+	if (!dev->hbm_f_vt_supported)
+		return -EOPNOTSUPP;
+
+	me_cl = mei_me_cl_by_uuid(dev, uuid);
+	if (!me_cl) {
+		dev_dbg(dev->dev, "Cannot connect to FW Client UUID = %pUl\n",
+			uuid);
+		return -ENOTTY;
+	}
+	ret = me_cl->props.vt_supported ? 0 : -EOPNOTSUPP;
+	mei_me_cl_put(me_cl);
+
+	return ret;
+}
+
+/**
+ * mei_ioctl_connect_vtag - connect to fw client with vtag IOCTL function
+ *
+ * @file: private data of the file object
+ * @in_client_uuid: requested UUID for connection
+ * @client: IOCTL connect data, output parameters
+ * @vtag: vm tag
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+static int mei_ioctl_connect_vtag(struct file *file,
+				  const uuid_le *in_client_uuid,
+				  struct mei_client *client,
+				  u8 vtag)
+{
+	struct mei_device *dev;
+	struct mei_cl *cl;
+	struct mei_cl *pos;
+	struct mei_cl_vtag *cl_vtag;
+
+	cl = file->private_data;
+	dev = cl->dev;
+
+	dev_dbg(dev->dev, "FW Client %pUl vtag %d\n", in_client_uuid, vtag);
+
+	switch (cl->state) {
+	case MEI_FILE_DISCONNECTED:
+		if (mei_cl_vtag_by_fp(cl, file) != vtag) {
+			dev_err(dev->dev, "reconnect with different vtag\n");
+			return -EINVAL;
+		}
+		break;
+	case MEI_FILE_INITIALIZING:
+		/* malicious connect from another thread may push vtag */
+		if (!IS_ERR(mei_cl_fp_by_vtag(cl, vtag))) {
+			dev_err(dev->dev, "vtag already filled\n");
+			return -EINVAL;
+		}
+
+		list_for_each_entry(pos, &dev->file_list, link) {
+			if (pos == cl)
+				continue;
+			if (!pos->me_cl)
+				continue;
+
+			/* only search for same UUID */
+			if (uuid_le_cmp(*mei_cl_uuid(pos), *in_client_uuid))
+				continue;
+
+			/* if tag already exist try another fp */
+			if (!IS_ERR(mei_cl_fp_by_vtag(pos, vtag)))
+				continue;
+
+			/* replace cl with acquired one */
+			dev_dbg(dev->dev, "replacing with existing cl\n");
+			mei_cl_unlink(cl);
+			kfree(cl);
+			file->private_data = pos;
+			cl = pos;
+			break;
+		}
+
+		cl_vtag = mei_cl_vtag_alloc(file, vtag);
+		if (IS_ERR(cl_vtag))
+			return -ENOMEM;
+
+		list_add_tail(&cl_vtag->list, &cl->vtag_map);
+		break;
+	default:
+		return -EBUSY;
+	}
+
+	while (cl->state != MEI_FILE_INITIALIZING &&
+	       cl->state != MEI_FILE_DISCONNECTED &&
+	       cl->state != MEI_FILE_CONNECTED) {
+		mutex_unlock(&dev->device_lock);
+		wait_event_timeout(cl->wait,
+				   (cl->state == MEI_FILE_CONNECTED ||
+				    cl->state == MEI_FILE_DISCONNECTED ||
+				    cl->state == MEI_FILE_DISCONNECT_REQUIRED ||
+				    cl->state == MEI_FILE_DISCONNECT_REPLY),
+				   mei_secs_to_jiffies(MEI_CL_CONNECT_TIMEOUT));
+		mutex_lock(&dev->device_lock);
+	}
+
+	if (!mei_cl_is_connected(cl))
+		return mei_ioctl_connect_client(file, in_client_uuid, client);
+
+	client->max_msg_length = cl->me_cl->props.max_msg_length;
+	client->protocol_version = cl->me_cl->props.protocol_version;
+
+	return 0;
+}
+
 /**
  * mei_ioctl_client_notify_request -
  *     propagate event notification request to client
@@ -516,7 +642,11 @@ static long mei_ioctl(struct file *file, unsigned int cmd, unsigned long data)
 {
 	struct mei_device *dev;
 	struct mei_cl *cl = file->private_data;
-	struct mei_connect_client_data connect_data;
+	struct mei_connect_client_data conn;
+	struct mei_connect_client_data_vtag conn_vtag;
+	const uuid_le *cl_uuid;
+	struct mei_client *props;
+	u8 vtag;
 	u32 notify_get, notify_req;
 	int rets;
 
@@ -537,20 +667,68 @@ static long mei_ioctl(struct file *file, unsigned int cmd, unsigned long data)
 	switch (cmd) {
 	case IOCTL_MEI_CONNECT_CLIENT:
 		dev_dbg(dev->dev, ": IOCTL_MEI_CONNECT_CLIENT.\n");
-		if (copy_from_user(&connect_data, (char __user *)data,
-				   sizeof(connect_data))) {
+		if (copy_from_user(&conn, (char __user *)data, sizeof(conn))) {
+			dev_dbg(dev->dev, "failed to copy data from userland\n");
+			rets = -EFAULT;
+			goto out;
+		}
+		cl_uuid = &conn.in_client_uuid;
+		props = &conn.out_client_properties;
+		vtag = 0;
+
+		rets = mei_vt_support_check(dev, cl_uuid);
+		if (rets == -ENOTTY)
+			goto out;
+		if (!rets)
+			rets = mei_ioctl_connect_vtag(file, cl_uuid, props,
+						      vtag);
+		else
+			rets = mei_ioctl_connect_client(file, cl_uuid, props);
+		if (rets)
+			goto out;
+
+		/* if all is ok, copying the data back to user. */
+		if (copy_to_user((char __user *)data, &conn, sizeof(conn))) {
+			dev_dbg(dev->dev, "failed to copy data to userland\n");
+			rets = -EFAULT;
+			goto out;
+		}
+
+		break;
+
+	case IOCTL_MEI_CONNECT_CLIENT_VTAG:
+		dev_dbg(dev->dev, "IOCTL_MEI_CONNECT_CLIENT_VTAG\n");
+		if (copy_from_user(&conn_vtag, (char __user *)data,
+				   sizeof(conn_vtag))) {
 			dev_dbg(dev->dev, "failed to copy data from userland\n");
 			rets = -EFAULT;
 			goto out;
 		}
 
-		rets = mei_ioctl_connect_client(file, &connect_data);
+		cl_uuid = &conn_vtag.connect.in_client_uuid;
+		props = &conn_vtag.out_client_properties;
+		vtag = conn_vtag.connect.vtag;
+
+		rets = mei_vt_support_check(dev, cl_uuid);
+		if (rets == -EOPNOTSUPP)
+			dev_dbg(dev->dev, "FW Client %pUl does not support vtags\n",
+				cl_uuid);
+		if (rets)
+			goto out;
+
+		if (!vtag) {
+			dev_dbg(dev->dev, "vtag can't be zero\n");
+			rets = -EINVAL;
+			goto out;
+		}
+
+		rets = mei_ioctl_connect_vtag(file, cl_uuid, props, vtag);
 		if (rets)
 			goto out;
 
 		/* if all is ok, copying the data back to user. */
-		if (copy_to_user((char __user *)data, &connect_data,
-				 sizeof(connect_data))) {
+		if (copy_to_user((char __user *)data, &conn_vtag,
+				 sizeof(conn_vtag))) {
 			dev_dbg(dev->dev, "failed to copy data to userland\n");
 			rets = -EFAULT;
 			goto out;
diff --git a/include/uapi/linux/mei.h b/include/uapi/linux/mei.h
index c6aec86cc5de..4f3638489d01 100644
--- a/include/uapi/linux/mei.h
+++ b/include/uapi/linux/mei.h
@@ -66,4 +66,53 @@ struct mei_connect_client_data {
  */
 #define IOCTL_MEI_NOTIFY_GET _IOR('H', 0x03, __u32)
 
+/**
+ * struct mei_connect_client_vtag - mei client information struct with vtag
+ *
+ * @in_client_uuid: UUID of client to connect
+ * @vtag: virtual tag
+ * @reserved: reserved for future use
+ */
+struct mei_connect_client_vtag {
+	uuid_le in_client_uuid;
+	__u8 vtag;
+	__u8 reserved[3];
+};
+
+/**
+ * struct mei_connect_client_data_vtag - IOCTL connect data union
+ *
+ * @connect: input connect data
+ * @out_client_properties: output client data
+ */
+struct mei_connect_client_data_vtag {
+	union {
+		struct mei_connect_client_vtag connect;
+		struct mei_client out_client_properties;
+	};
+};
+
+/**
+ * DOC:
+ * This IOCTL is used to associate the current file descriptor with a
+ * FW Client (given by UUID), and virtual tag (vtag).
+ * The IOCTL opens a communication channel between a host client and
+ * a FW client on a tagged channel. From this point on, every read
+ * and write will communicate with the associated FW client with
+ * on the tagged channel.
+ * Upone close() the communication is terminated.
+ *
+ * The IOCTL argument is a struct with a union that contains
+ * the input parameter and the output parameter for this IOCTL.
+ *
+ * The input parameter is UUID of the FW Client, a vtag [0,255]
+ * The output parameter is the properties of the FW client
+ * (FW protocool version and max message size).
+ *
+ * Clients that do not support tagged connection
+ * will respond with -EOPNOTSUPP.
+ */
+#define IOCTL_MEI_CONNECT_CLIENT_VTAG \
+	_IOWR('H', 0x04, struct mei_connect_client_data_vtag)
+
 #endif /* _LINUX_MEI_H  */
-- 
cgit v1.2.3


From a4e6a1dd57469d6ecee084db1507d3e37908d1e2 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galpress@amazon.com>
Date: Fri, 31 Jul 2020 09:04:20 +0300
Subject: RDMA/efa: Introduce SRD RNR retry

This patch introduces the ability to configure SRD QPs with the RNR retry
parameter when issuing a modify QP command.

In addition, a capability bit was added to report support to the userspace
library.

Link: https://lore.kernel.org/r/20200731060420.17053-5-galpress@amazon.com
Reviewed-by: Firas JahJah <firasj@amazon.com>
Reviewed-by: Yossi Leybovich <sleybo@amazon.com>
Signed-off-by: Gal Pressman <galpress@amazon.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/efa/efa_admin_cmds_defs.h | 17 +++++++++++------
 drivers/infiniband/hw/efa/efa_com_cmd.c         |  2 ++
 drivers/infiniband/hw/efa/efa_com_cmd.h         |  2 ++
 drivers/infiniband/hw/efa/efa_verbs.c           | 19 ++++++++++++++++---
 include/uapi/rdma/efa-abi.h                     |  1 +
 5 files changed, 32 insertions(+), 9 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
index b0734d596f3b..d9676ca0b958 100644
--- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
+++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
@@ -197,7 +197,8 @@ struct efa_admin_modify_qp_cmd {
 	 * 2 : qkey
 	 * 3 : sq_psn
 	 * 4 : sq_drained_async_notify
-	 * 31:5 : reserved
+	 * 5 : rnr_retry
+	 * 31:6 : reserved
 	 */
 	u32 modify_mask;
 
@@ -219,8 +220,8 @@ struct efa_admin_modify_qp_cmd {
 	/* Enable async notification when SQ is drained */
 	u8 sq_drained_async_notify;
 
-	/* MBZ */
-	u8 reserved1;
+	/* Number of RNR retries (valid only for SRD QPs) */
+	u8 rnr_retry;
 
 	/* MBZ */
 	u16 reserved2;
@@ -255,8 +256,8 @@ struct efa_admin_query_qp_resp {
 	/* Indicates that draining is in progress */
 	u8 sq_draining;
 
-	/* MBZ */
-	u8 reserved1;
+	/* Number of RNR retries (valid only for SRD QPs) */
+	u8 rnr_retry;
 
 	/* MBZ */
 	u16 reserved2;
@@ -573,7 +574,9 @@ struct efa_admin_feature_device_attr_desc {
 	/*
 	 * 0 : rdma_read - If set, RDMA Read is supported on
 	 *    TX queues
-	 * 31:1 : reserved - MBZ
+	 * 1 : rnr_retry - If set, RNR retry is supported on
+	 *    modify QP command
+	 * 31:2 : reserved - MBZ
 	 */
 	u32 device_caps;
 
@@ -865,6 +868,7 @@ struct efa_admin_host_info {
 #define EFA_ADMIN_MODIFY_QP_CMD_QKEY_MASK                   BIT(2)
 #define EFA_ADMIN_MODIFY_QP_CMD_SQ_PSN_MASK                 BIT(3)
 #define EFA_ADMIN_MODIFY_QP_CMD_SQ_DRAINED_ASYNC_NOTIFY_MASK BIT(4)
+#define EFA_ADMIN_MODIFY_QP_CMD_RNR_RETRY_MASK              BIT(5)
 
 /* reg_mr_cmd */
 #define EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK      GENMASK(4, 0)
@@ -882,6 +886,7 @@ struct efa_admin_host_info {
 
 /* feature_device_attr_desc */
 #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK   BIT(0)
+#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RNR_RETRY_MASK   BIT(1)
 
 /* host_info */
 #define EFA_ADMIN_HOST_INFO_DRIVER_MODULE_TYPE_MASK         GENMASK(7, 0)
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c
index 6ac23627f65a..f24634cce1cb 100644
--- a/drivers/infiniband/hw/efa/efa_com_cmd.c
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.c
@@ -76,6 +76,7 @@ int efa_com_modify_qp(struct efa_com_dev *edev,
 	cmd.qkey = params->qkey;
 	cmd.sq_psn = params->sq_psn;
 	cmd.sq_drained_async_notify = params->sq_drained_async_notify;
+	cmd.rnr_retry = params->rnr_retry;
 
 	err = efa_com_cmd_exec(aq,
 			       (struct efa_admin_aq_entry *)&cmd,
@@ -121,6 +122,7 @@ int efa_com_query_qp(struct efa_com_dev *edev,
 	result->qkey = resp.qkey;
 	result->sq_draining = resp.sq_draining;
 	result->sq_psn = resp.sq_psn;
+	result->rnr_retry = resp.rnr_retry;
 
 	return 0;
 }
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h
index 190bac23f585..9ebee129f477 100644
--- a/drivers/infiniband/hw/efa/efa_com_cmd.h
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.h
@@ -47,6 +47,7 @@ struct efa_com_modify_qp_params {
 	u32 qkey;
 	u32 sq_psn;
 	u8 sq_drained_async_notify;
+	u8 rnr_retry;
 };
 
 struct efa_com_query_qp_params {
@@ -58,6 +59,7 @@ struct efa_com_query_qp_result {
 	u32 qkey;
 	u32 sq_draining;
 	u32 sq_psn;
+	u8 rnr_retry;
 };
 
 struct efa_com_destroy_qp_params {
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index 289ecdeb9a7b..3f7f19b9f463 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -223,6 +223,9 @@ int efa_query_device(struct ib_device *ibdev,
 		if (EFA_DEV_CAP(dev, RDMA_READ))
 			resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ;
 
+		if (EFA_DEV_CAP(dev, RNR_RETRY))
+			resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RNR_RETRY;
+
 		err = ib_copy_to_udata(udata, &resp,
 				       min(sizeof(resp), udata->outlen));
 		if (err) {
@@ -268,7 +271,7 @@ int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 
 #define EFA_QUERY_QP_SUPP_MASK \
 	(IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \
-	 IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP)
+	 IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP | IB_QP_RNR_RETRY)
 
 	if (qp_attr_mask & ~EFA_QUERY_QP_SUPP_MASK) {
 		ibdev_dbg(&dev->ibdev,
@@ -290,6 +293,7 @@ int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 	qp_attr->sq_psn = result.sq_psn;
 	qp_attr->sq_draining = result.sq_draining;
 	qp_attr->port_num = 1;
+	qp_attr->rnr_retry = result.rnr_retry;
 
 	qp_attr->cap.max_send_wr = qp->max_send_wr;
 	qp_attr->cap.max_recv_wr = qp->max_recv_wr;
@@ -776,7 +780,9 @@ static const struct {
 			.valid = 1,
 			.req_param = IB_QP_SQ_PSN,
 			.opt_param = IB_QP_CUR_STATE |
-				     IB_QP_QKEY,
+				     IB_QP_QKEY |
+				     IB_QP_RNR_RETRY,
+
 		}
 	},
 	[IB_QPS_RTS] = {
@@ -856,7 +862,8 @@ static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp,
 
 #define EFA_MODIFY_QP_SUPP_MASK \
 	(IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \
-	 IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN)
+	 IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN | \
+	 IB_QP_RNR_RETRY)
 
 	if (qp_attr_mask & ~EFA_MODIFY_QP_SUPP_MASK) {
 		ibdev_dbg(&dev->ibdev,
@@ -943,6 +950,12 @@ int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 		params.sq_psn = qp_attr->sq_psn;
 	}
 
+	if (qp_attr_mask & IB_QP_RNR_RETRY) {
+		EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_RNR_RETRY,
+			1);
+		params.rnr_retry = qp_attr->rnr_retry;
+	}
+
 	err = efa_com_modify_qp(&dev->edev, &params);
 	if (err)
 		return err;
diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h
index 507a2862bedb..f89fbb5b1e8d 100644
--- a/include/uapi/rdma/efa-abi.h
+++ b/include/uapi/rdma/efa-abi.h
@@ -105,6 +105,7 @@ struct efa_ibv_create_ah_resp {
 
 enum {
 	EFA_QUERY_DEVICE_CAPS_RDMA_READ = 1 << 0,
+	EFA_QUERY_DEVICE_CAPS_RNR_RETRY = 1 << 1,
 };
 
 struct efa_ibv_ex_query_device_resp {
-- 
cgit v1.2.3


From ba171d3f0850003216fd1a85190d17b1feddb961 Mon Sep 17 00:00:00 2001
From: Cedric Neveux <cedric.neveux@nxp.com>
Date: Mon, 4 Mar 2019 08:54:23 +0100
Subject: driver: tee: Handle NULL pointer indication from client

TEE Client introduce a new capability "TEE_GEN_CAP_MEMREF_NULL"
to handle the support of the shared memory buffer with a NULL pointer.

This capability depends on TEE Capabilities and driver support.
Driver and TEE exchange capabilities at driver initialization.

Signed-off-by: Michael Whitfield <michael.whitfield@nxp.com>
Signed-off-by: Cedric Neveux <cedric.neveux@nxp.com>
Reviewed-by: Joakim Bech <joakim.bech@linaro.org>
Tested-by: Joakim Bech <joakim.bech@linaro.org> (QEMU)
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 drivers/tee/optee/core.c      |  7 +++++++
 drivers/tee/optee/optee_smc.h |  3 +++
 drivers/tee/tee_core.c        | 49 +++++++++++++++++++++++++++----------------
 include/linux/tee_drv.h       |  3 +++
 include/uapi/linux/tee.h      | 13 ++++++++++++
 5 files changed, 57 insertions(+), 18 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c
index b373b1b08b6d..cf4718c6d35d 100644
--- a/drivers/tee/optee/core.c
+++ b/drivers/tee/optee/core.c
@@ -216,6 +216,8 @@ static void optee_get_version(struct tee_device *teedev,
 
 	if (optee->sec_caps & OPTEE_SMC_SEC_CAP_DYNAMIC_SHM)
 		v.gen_caps |= TEE_GEN_CAP_REG_MEM;
+	if (optee->sec_caps & OPTEE_SMC_SEC_CAP_MEMREF_NULL)
+		v.gen_caps |= TEE_GEN_CAP_MEMREF_NULL;
 	*vers = v;
 }
 
@@ -262,6 +264,11 @@ static int optee_open(struct tee_context *ctx)
 	mutex_init(&ctxdata->mutex);
 	INIT_LIST_HEAD(&ctxdata->sess_list);
 
+	if (optee->sec_caps & OPTEE_SMC_SEC_CAP_MEMREF_NULL)
+		ctx->cap_memref_null  = true;
+	else
+		ctx->cap_memref_null = false;
+
 	ctx->data = ctxdata;
 	return 0;
 }
diff --git a/drivers/tee/optee/optee_smc.h b/drivers/tee/optee/optee_smc.h
index c72122d9c997..777ad54d4c2c 100644
--- a/drivers/tee/optee/optee_smc.h
+++ b/drivers/tee/optee/optee_smc.h
@@ -215,6 +215,9 @@ struct optee_smc_get_shm_config_result {
  */
 #define OPTEE_SMC_SEC_CAP_DYNAMIC_SHM		BIT(2)
 
+/* Secure world supports Shared Memory with a NULL buffer reference */
+#define OPTEE_SMC_SEC_CAP_MEMREF_NULL		BIT(4)
+
 #define OPTEE_SMC_FUNCID_EXCHANGE_CAPABILITIES	9
 #define OPTEE_SMC_EXCHANGE_CAPABILITIES \
 	OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_EXCHANGE_CAPABILITIES)
diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c
index 64637e09a095..ce0f0309b6ac 100644
--- a/drivers/tee/tee_core.c
+++ b/drivers/tee/tee_core.c
@@ -383,25 +383,38 @@ static int params_from_user(struct tee_context *ctx, struct tee_param *params,
 		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT:
 		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT:
 			/*
-			 * If we fail to get a pointer to a shared memory
-			 * object (and increase the ref count) from an
-			 * identifier we return an error. All pointers that
-			 * has been added in params have an increased ref
-			 * count. It's the callers responibility to do
-			 * tee_shm_put() on all resolved pointers.
+			 * If a NULL pointer is passed to a TA in the TEE,
+			 * the ip.c IOCTL parameters is set to TEE_MEMREF_NULL
+			 * indicating a NULL memory reference.
 			 */
-			shm = tee_shm_get_from_id(ctx, ip.c);
-			if (IS_ERR(shm))
-				return PTR_ERR(shm);
-
-			/*
-			 * Ensure offset + size does not overflow offset
-			 * and does not overflow the size of the referred
-			 * shared memory object.
-			 */
-			if ((ip.a + ip.b) < ip.a ||
-			    (ip.a + ip.b) > shm->size) {
-				tee_shm_put(shm);
+			if (ip.c != TEE_MEMREF_NULL) {
+				/*
+				 * If we fail to get a pointer to a shared
+				 * memory object (and increase the ref count)
+				 * from an identifier we return an error. All
+				 * pointers that has been added in params have
+				 * an increased ref count. It's the callers
+				 * responibility to do tee_shm_put() on all
+				 * resolved pointers.
+				 */
+				shm = tee_shm_get_from_id(ctx, ip.c);
+				if (IS_ERR(shm))
+					return PTR_ERR(shm);
+
+				/*
+				 * Ensure offset + size does not overflow
+				 * offset and does not overflow the size of
+				 * the referred shared memory object.
+				 */
+				if ((ip.a + ip.b) < ip.a ||
+				    (ip.a + ip.b) > shm->size) {
+					tee_shm_put(shm);
+					return -EINVAL;
+				}
+			} else if (ctx->cap_memref_null) {
+				/* Pass NULL pointer to OP-TEE */
+				shm = NULL;
+			} else {
 				return -EINVAL;
 			}
 
diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h
index d074302989dd..cdd049a724b1 100644
--- a/include/linux/tee_drv.h
+++ b/include/linux/tee_drv.h
@@ -47,6 +47,8 @@ struct tee_shm_pool;
  *              and just return with an error code. It is needed for requests
  *              that arises from TEE based kernel drivers that should be
  *              non-blocking in nature.
+ * @cap_memref_null: flag indicating if the TEE Client support shared
+ *                   memory buffer with a NULL pointer.
  */
 struct tee_context {
 	struct tee_device *teedev;
@@ -54,6 +56,7 @@ struct tee_context {
 	struct kref refcount;
 	bool releasing;
 	bool supp_nowait;
+	bool cap_memref_null;
 };
 
 struct tee_param_memref {
diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h
index b619f37ee03e..d67cadf221fc 100644
--- a/include/uapi/linux/tee.h
+++ b/include/uapi/linux/tee.h
@@ -51,6 +51,9 @@
 #define TEE_GEN_CAP_GP		(1 << 0)/* GlobalPlatform compliant TEE */
 #define TEE_GEN_CAP_PRIVILEGED	(1 << 1)/* Privileged device (for supplicant) */
 #define TEE_GEN_CAP_REG_MEM	(1 << 2)/* Supports registering shared memory */
+#define TEE_GEN_CAP_MEMREF_NULL	(1 << 3)/* NULL MemRef support */
+
+#define TEE_MEMREF_NULL		(__u64)(-1) /* NULL MemRef Buffer */
 
 /*
  * TEE Implementation ID
@@ -200,6 +203,16 @@ struct tee_ioctl_buf_data {
  * a part of a shared memory by specifying an offset (@a) and size (@b) of
  * the object. To supply the entire shared memory object set the offset
  * (@a) to 0 and size (@b) to the previously returned size of the object.
+ *
+ * A client may need to present a NULL pointer in the argument
+ * passed to a trusted application in the TEE.
+ * This is also a requirement in GlobalPlatform Client API v1.0c
+ * (section 3.2.5 memory references), which can be found at
+ * http://www.globalplatform.org/specificationsdevice.asp
+ *
+ * If a NULL pointer is passed to a TA in the TEE, the (@c)
+ * IOCTL parameters value must be set to TEE_MEMREF_NULL indicating a NULL
+ * memory reference.
  */
 struct tee_ioctl_param {
 	__u64 attr;
-- 
cgit v1.2.3


From 6b0a249a301e2af9adda84adbced3a2988248b95 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Fri, 21 Aug 2020 11:44:18 -0700
Subject: bpf: Implement link_query for bpf iterators

This patch implemented bpf_link callback functions
show_fdinfo and fill_link_info to support link_query
interface.

The general interface for show_fdinfo and fill_link_info
will print/fill the target_name. Each targets can
register show_fdinfo and fill_link_info callbacks
to print/fill more target specific information.

For example, the below is a fdinfo result for a bpf
task iterator.
  $ cat /proc/1749/fdinfo/7
  pos:    0
  flags:  02000000
  mnt_id: 14
  link_type:      iter
  link_id:        11
  prog_tag:       990e1f8152f7e54f
  prog_id:        59
  target_name:    task

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200821184418.574122-1-yhs@fb.com
---
 include/linux/bpf.h            |  6 +++++
 include/uapi/linux/bpf.h       |  7 +++++
 kernel/bpf/bpf_iter.c          | 58 ++++++++++++++++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  7 +++++
 4 files changed, 78 insertions(+)

(limited to 'include/uapi')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a9b7185a6b37..529e9b183eeb 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1218,12 +1218,18 @@ typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
 					union bpf_iter_link_info *linfo,
 					struct bpf_iter_aux_info *aux);
 typedef void (*bpf_iter_detach_target_t)(struct bpf_iter_aux_info *aux);
+typedef void (*bpf_iter_show_fdinfo_t) (const struct bpf_iter_aux_info *aux,
+					struct seq_file *seq);
+typedef int (*bpf_iter_fill_link_info_t)(const struct bpf_iter_aux_info *aux,
+					 struct bpf_link_info *info);
 
 #define BPF_ITER_CTX_ARG_MAX 2
 struct bpf_iter_reg {
 	const char *target;
 	bpf_iter_attach_target_t attach_target;
 	bpf_iter_detach_target_t detach_target;
+	bpf_iter_show_fdinfo_t show_fdinfo;
+	bpf_iter_fill_link_info_t fill_link_info;
 	u32 ctx_arg_info_size;
 	struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
 	const struct bpf_iter_seq_info *seq_info;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0480f893facd..a1bbaff7a0af 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4071,6 +4071,13 @@ struct bpf_link_info {
 			__u64 cgroup_id;
 			__u32 attach_type;
 		} cgroup;
+		struct {
+			__aligned_u64 target_name; /* in/out: target_name buffer ptr */
+			__u32 target_name_len;	   /* in/out: target_name buffer len */
+			union {
+				__u32 map_id;
+			} map;
+		} iter;
 		struct  {
 			__u32 netns_ino;
 			__u32 attach_type;
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index b6715964b685..aeec7e174188 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -377,10 +377,68 @@ out_unlock:
 	return ret;
 }
 
+static void bpf_iter_link_show_fdinfo(const struct bpf_link *link,
+				      struct seq_file *seq)
+{
+	struct bpf_iter_link *iter_link =
+		container_of(link, struct bpf_iter_link, link);
+	bpf_iter_show_fdinfo_t show_fdinfo;
+
+	seq_printf(seq,
+		   "target_name:\t%s\n",
+		   iter_link->tinfo->reg_info->target);
+
+	show_fdinfo = iter_link->tinfo->reg_info->show_fdinfo;
+	if (show_fdinfo)
+		show_fdinfo(&iter_link->aux, seq);
+}
+
+static int bpf_iter_link_fill_link_info(const struct bpf_link *link,
+					struct bpf_link_info *info)
+{
+	struct bpf_iter_link *iter_link =
+		container_of(link, struct bpf_iter_link, link);
+	char __user *ubuf = u64_to_user_ptr(info->iter.target_name);
+	bpf_iter_fill_link_info_t fill_link_info;
+	u32 ulen = info->iter.target_name_len;
+	const char *target_name;
+	u32 target_len;
+
+	if (!ulen ^ !ubuf)
+		return -EINVAL;
+
+	target_name = iter_link->tinfo->reg_info->target;
+	target_len =  strlen(target_name);
+	info->iter.target_name_len = target_len + 1;
+
+	if (ubuf) {
+		if (ulen >= target_len + 1) {
+			if (copy_to_user(ubuf, target_name, target_len + 1))
+				return -EFAULT;
+		} else {
+			char zero = '\0';
+
+			if (copy_to_user(ubuf, target_name, ulen - 1))
+				return -EFAULT;
+			if (put_user(zero, ubuf + ulen - 1))
+				return -EFAULT;
+			return -ENOSPC;
+		}
+	}
+
+	fill_link_info = iter_link->tinfo->reg_info->fill_link_info;
+	if (fill_link_info)
+		return fill_link_info(&iter_link->aux, info);
+
+	return 0;
+}
+
 static const struct bpf_link_ops bpf_iter_link_lops = {
 	.release = bpf_iter_link_release,
 	.dealloc = bpf_iter_link_dealloc,
 	.update_prog = bpf_iter_link_replace,
+	.show_fdinfo = bpf_iter_link_show_fdinfo,
+	.fill_link_info = bpf_iter_link_fill_link_info,
 };
 
 bool bpf_link_is_iter(struct bpf_link *link)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 0480f893facd..a1bbaff7a0af 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4071,6 +4071,13 @@ struct bpf_link_info {
 			__u64 cgroup_id;
 			__u32 attach_type;
 		} cgroup;
+		struct {
+			__aligned_u64 target_name; /* in/out: target_name buffer ptr */
+			__u32 target_name_len;	   /* in/out: target_name buffer len */
+			union {
+				__u32 map_id;
+			} map;
+		} iter;
 		struct  {
 			__u32 netns_ino;
 			__u32 attach_type;
-- 
cgit v1.2.3


From 4ffa22fd22a7cbde1a1394b2707ea73593dc0fda Mon Sep 17 00:00:00 2001
From: Matt Ranostay <matt.ranostay@konsulko.com>
Date: Thu, 23 Jul 2020 09:29:43 +0300
Subject: iio: add IIO_MOD_O2 modifier

Add modifier IIO_MOD_O2 for O2 concentration reporting

Signed-off-by: Matt Ranostay <matt.ranostay@konsulko.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Documentation/ABI/testing/sysfs-bus-iio | 2 ++
 drivers/iio/industrialio-core.c         | 1 +
 include/uapi/linux/iio/types.h          | 1 +
 tools/iio/iio_event_monitor.c           | 2 ++
 4 files changed, 6 insertions(+)

(limited to 'include/uapi')

diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio
index 5c62bfb0f3f5..405181fde40a 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio
+++ b/Documentation/ABI/testing/sysfs-bus-iio
@@ -1564,6 +1564,8 @@ What:		/sys/bus/iio/devices/iio:deviceX/in_concentration_ethanol_raw
 What:		/sys/bus/iio/devices/iio:deviceX/in_concentrationX_ethanol_raw
 What:		/sys/bus/iio/devices/iio:deviceX/in_concentration_h2_raw
 What:		/sys/bus/iio/devices/iio:deviceX/in_concentrationX_h2_raw
+What:		/sys/bus/iio/devices/iio:deviceX/in_concentration_o2_raw
+What:		/sys/bus/iio/devices/iio:deviceX/in_concentrationX_o2_raw
 What:		/sys/bus/iio/devices/iio:deviceX/in_concentration_voc_raw
 What:		/sys/bus/iio/devices/iio:deviceX/in_concentrationX_voc_raw
 KernelVersion:	4.3
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 606d5e61c575..59003dc44e60 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -133,6 +133,7 @@ static const char * const iio_modifier_names[] = {
 	[IIO_MOD_PM10] = "pm10",
 	[IIO_MOD_ETHANOL] = "ethanol",
 	[IIO_MOD_H2] = "h2",
+	[IIO_MOD_O2] = "o2",
 };
 
 /* relies on pairs of these shared then separate */
diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h
index fdd81affca4b..48c13147c0a8 100644
--- a/include/uapi/linux/iio/types.h
+++ b/include/uapi/linux/iio/types.h
@@ -94,6 +94,7 @@ enum iio_modifier {
 	IIO_MOD_PM10,
 	IIO_MOD_ETHANOL,
 	IIO_MOD_H2,
+	IIO_MOD_O2,
 };
 
 enum iio_event_type {
diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c
index f115d166c985..bb03859db89d 100644
--- a/tools/iio/iio_event_monitor.c
+++ b/tools/iio/iio_event_monitor.c
@@ -119,6 +119,7 @@ static const char * const iio_modifier_names[] = {
 	[IIO_MOD_PM2P5] = "pm2p5",
 	[IIO_MOD_PM4] = "pm4",
 	[IIO_MOD_PM10] = "pm10",
+	[IIO_MOD_O2] = "o2",
 };
 
 static bool event_is_known(struct iio_event_data *event)
@@ -211,6 +212,7 @@ static bool event_is_known(struct iio_event_data *event)
 	case IIO_MOD_PM2P5:
 	case IIO_MOD_PM4:
 	case IIO_MOD_PM10:
+	case IIO_MOD_O2:
 		break;
 	default:
 		return false;
-- 
cgit v1.2.3


From eee049c0ef5b5b433f36841801e34c21c9f82a23 Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Sat, 22 Aug 2020 15:59:08 +0100
Subject: l2tp: remove tunnel and session debug flags field

The l2tp subsystem now uses standard kernel logging APIs for
informational and warning messages, and tracepoints for debug
information.

Now that the tunnel and session debug flags are unused, remove the field
from the core structures.

Various system calls (in the case of l2tp_ppp) and netlink messages
handle the getting and setting of debug flags.  To avoid userspace
breakage don't modify the API of these calls; simply ignore set
requests, and send dummy data for get requests.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_pppol2tp.h |  2 +-
 include/uapi/linux/l2tp.h        |  6 ++++--
 net/l2tp/l2tp_core.c             |  8 --------
 net/l2tp/l2tp_core.h             |  4 ----
 net/l2tp/l2tp_debugfs.c          |  4 ++--
 net/l2tp/l2tp_netlink.c          | 16 ++--------------
 net/l2tp/l2tp_ppp.c              | 15 ++++++++-------
 7 files changed, 17 insertions(+), 38 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/if_pppol2tp.h b/include/uapi/linux/if_pppol2tp.h
index 060b4d1f3129..a91044328bc9 100644
--- a/include/uapi/linux/if_pppol2tp.h
+++ b/include/uapi/linux/if_pppol2tp.h
@@ -75,7 +75,7 @@ struct pppol2tpv3in6_addr {
 };
 
 /* Socket options:
- * DEBUG	- bitmask of debug message categories
+ * DEBUG	- bitmask of debug message categories (not used)
  * SENDSEQ	- 0 => don't send packets with sequence numbers
  *		  1 => send packets with sequence numbers
  * RECVSEQ	- 0 => receive packet sequence numbers are optional
diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h
index 61158f5a1a5b..88a0d32b8c07 100644
--- a/include/uapi/linux/l2tp.h
+++ b/include/uapi/linux/l2tp.h
@@ -108,7 +108,7 @@ enum {
 	L2TP_ATTR_VLAN_ID,		/* u16 (not used) */
 	L2TP_ATTR_COOKIE,		/* 0, 4 or 8 bytes */
 	L2TP_ATTR_PEER_COOKIE,		/* 0, 4 or 8 bytes */
-	L2TP_ATTR_DEBUG,		/* u32, enum l2tp_debug_flags */
+	L2TP_ATTR_DEBUG,		/* u32, enum l2tp_debug_flags (not used) */
 	L2TP_ATTR_RECV_SEQ,		/* u8 */
 	L2TP_ATTR_SEND_SEQ,		/* u8 */
 	L2TP_ATTR_LNS_MODE,		/* u8 */
@@ -177,7 +177,9 @@ enum l2tp_seqmode {
 };
 
 /**
- * enum l2tp_debug_flags - debug message categories for L2TP tunnels/sessions
+ * enum l2tp_debug_flags - debug message categories for L2TP tunnels/sessions.
+ *
+ * Unused.
  *
  * @L2TP_MSG_DEBUG: verbose debug (if compiled in)
  * @L2TP_MSG_CONTROL: userspace - kernel interface
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index d8435b6f6fee..560c687f5457 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1401,16 +1401,12 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	tunnel->version = version;
 	tunnel->tunnel_id = tunnel_id;
 	tunnel->peer_tunnel_id = peer_tunnel_id;
-	tunnel->debug = L2TP_DEFAULT_DEBUG_FLAGS;
 
 	tunnel->magic = L2TP_TUNNEL_MAGIC;
 	sprintf(&tunnel->name[0], "tunl %u", tunnel_id);
 	rwlock_init(&tunnel->hlist_lock);
 	tunnel->acpt_newsess = true;
 
-	if (cfg)
-		tunnel->debug = cfg->debug;
-
 	tunnel->encap = encap;
 
 	refcount_set(&tunnel->ref_count, 1);
@@ -1608,12 +1604,8 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
 		INIT_HLIST_NODE(&session->hlist);
 		INIT_HLIST_NODE(&session->global_hlist);
 
-		/* Inherit debug options from tunnel */
-		session->debug = tunnel->debug;
-
 		if (cfg) {
 			session->pwtype = cfg->pw_type;
-			session->debug = cfg->debug;
 			session->send_seq = cfg->send_seq;
 			session->recv_seq = cfg->recv_seq;
 			session->lns_mode = cfg->lns_mode;
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 7a06ac135a9b..07249c5f22ef 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -51,7 +51,6 @@ struct l2tp_session_cfg {
 	unsigned int		lns_mode:1;	/* behave as LNS?
 						 * LAC enables sequence numbers under LNS control.
 						 */
-	int			debug;		/* bitmask of debug message categories */
 	u16			l2specific_type; /* Layer 2 specific type */
 	u8			cookie[8];	/* optional cookie */
 	int			cookie_len;	/* 0, 4 or 8 bytes */
@@ -98,7 +97,6 @@ struct l2tp_session {
 	unsigned int		lns_mode:1;	/* behave as LNS?
 						 * LAC enables sequence numbers under LNS control.
 						 */
-	int			debug;		/* bitmask of debug message categories */
 	int			reorder_timeout; /* configured reorder timeout (in jiffies) */
 	int			reorder_skip;	/* set if skip to next nr */
 	enum l2tp_pwtype	pwtype;
@@ -132,7 +130,6 @@ struct l2tp_session {
 
 /* L2TP tunnel configuration */
 struct l2tp_tunnel_cfg {
-	int			debug;		/* bitmask of debug message categories */
 	enum l2tp_encap_type	encap;
 
 	/* Used only for kernel-created sockets */
@@ -173,7 +170,6 @@ struct l2tp_tunnel {
 	int			version;	/* 2=>L2TPv2, 3=>L2TPv3 */
 
 	char			name[L2TP_TUNNEL_NAME_MAX]; /* for logging */
-	int			debug;		/* bitmask of debug message categories */
 	enum l2tp_encap_type	encap;
 	struct l2tp_stats	stats;
 
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 96cb9601c21b..bca75bef8282 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -167,7 +167,7 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
 		   tunnel->sock ? refcount_read(&tunnel->sock->sk_refcnt) : 0,
 		   refcount_read(&tunnel->ref_count));
 	seq_printf(m, " %08x rx %ld/%ld/%ld rx %ld/%ld/%ld\n",
-		   tunnel->debug,
+		   0,
 		   atomic_long_read(&tunnel->stats.tx_packets),
 		   atomic_long_read(&tunnel->stats.tx_bytes),
 		   atomic_long_read(&tunnel->stats.tx_errors),
@@ -192,7 +192,7 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
 		   session->recv_seq ? 'R' : '-',
 		   session->send_seq ? 'S' : '-',
 		   session->lns_mode ? "LNS" : "LAC",
-		   session->debug,
+		   0,
 		   jiffies_to_msecs(session->reorder_timeout));
 	seq_printf(m, "   offset 0 l2specific %hu/%hu\n",
 		   session->l2specific_type, l2tp_get_l2specific_len(session));
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index def78eebca4c..31a1e27eab20 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -229,9 +229,6 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
 			goto out;
 	}
 
-	if (attrs[L2TP_ATTR_DEBUG])
-		cfg.debug = nla_get_u32(attrs[L2TP_ATTR_DEBUG]);
-
 	ret = -EINVAL;
 	switch (cfg.encap) {
 	case L2TP_ENCAPTYPE_UDP:
@@ -307,9 +304,6 @@ static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info
 		goto out;
 	}
 
-	if (info->attrs[L2TP_ATTR_DEBUG])
-		tunnel->debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
-
 	ret = l2tp_tunnel_notify(&l2tp_nl_family, info,
 				 tunnel, L2TP_CMD_TUNNEL_MODIFY);
 
@@ -400,7 +394,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
 	if (nla_put_u8(skb, L2TP_ATTR_PROTO_VERSION, tunnel->version) ||
 	    nla_put_u32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id) ||
 	    nla_put_u32(skb, L2TP_ATTR_PEER_CONN_ID, tunnel->peer_tunnel_id) ||
-	    nla_put_u32(skb, L2TP_ATTR_DEBUG, tunnel->debug) ||
+	    nla_put_u32(skb, L2TP_ATTR_DEBUG, 0) ||
 	    nla_put_u16(skb, L2TP_ATTR_ENCAP_TYPE, tunnel->encap))
 		goto nla_put_failure;
 
@@ -605,9 +599,6 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
 			cfg.ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]);
 	}
 
-	if (info->attrs[L2TP_ATTR_DEBUG])
-		cfg.debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
-
 	if (info->attrs[L2TP_ATTR_RECV_SEQ])
 		cfg.recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]);
 
@@ -689,9 +680,6 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
 		goto out;
 	}
 
-	if (info->attrs[L2TP_ATTR_DEBUG])
-		session->debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
-
 	if (info->attrs[L2TP_ATTR_RECV_SEQ])
 		session->recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]);
 
@@ -730,7 +718,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
 	    nla_put_u32(skb, L2TP_ATTR_SESSION_ID, session->session_id) ||
 	    nla_put_u32(skb, L2TP_ATTR_PEER_CONN_ID, tunnel->peer_tunnel_id) ||
 	    nla_put_u32(skb, L2TP_ATTR_PEER_SESSION_ID, session->peer_session_id) ||
-	    nla_put_u32(skb, L2TP_ATTR_DEBUG, session->debug) ||
+	    nla_put_u32(skb, L2TP_ATTR_DEBUG, 0) ||
 	    nla_put_u16(skb, L2TP_ATTR_PW_TYPE, session->pwtype))
 		goto nla_put_failure;
 
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index bd6bb17dfadb..450637ffa557 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -702,7 +702,6 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 		if (!tunnel) {
 			struct l2tp_tunnel_cfg tcfg = {
 				.encap = L2TP_ENCAPTYPE_UDP,
-				.debug = 0,
 			};
 
 			/* Prevent l2tp_tunnel_register() from trying to set up
@@ -1147,7 +1146,7 @@ static int pppol2tp_tunnel_setsockopt(struct sock *sk,
 
 	switch (optname) {
 	case PPPOL2TP_SO_DEBUG:
-		tunnel->debug = val;
+		/* Tunnel debug flags option is deprecated */
 		break;
 
 	default:
@@ -1199,7 +1198,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
 		break;
 
 	case PPPOL2TP_SO_DEBUG:
-		session->debug = val;
+		/* Session debug flags option is deprecated */
 		break;
 
 	case PPPOL2TP_SO_REORDERTO:
@@ -1271,7 +1270,8 @@ static int pppol2tp_tunnel_getsockopt(struct sock *sk,
 
 	switch (optname) {
 	case PPPOL2TP_SO_DEBUG:
-		*val = tunnel->debug;
+		/* Tunnel debug flags option is deprecated */
+		*val = 0;
 		break;
 
 	default:
@@ -1304,7 +1304,8 @@ static int pppol2tp_session_getsockopt(struct sock *sk,
 		break;
 
 	case PPPOL2TP_SO_DEBUG:
-		*val = session->debug;
+		/* Session debug flags option is deprecated */
+		*val = 0;
 		break;
 
 	case PPPOL2TP_SO_REORDERTO:
@@ -1496,7 +1497,7 @@ static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v)
 		   (tunnel == tunnel->sock->sk_user_data) ? 'Y' : 'N',
 		   refcount_read(&tunnel->ref_count) - 1);
 	seq_printf(m, " %08x %ld/%ld/%ld %ld/%ld/%ld\n",
-		   tunnel->debug,
+		   0,
 		   atomic_long_read(&tunnel->stats.tx_packets),
 		   atomic_long_read(&tunnel->stats.tx_bytes),
 		   atomic_long_read(&tunnel->stats.tx_errors),
@@ -1542,7 +1543,7 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
 		   session->recv_seq ? 'R' : '-',
 		   session->send_seq ? 'S' : '-',
 		   session->lns_mode ? "LNS" : "LAC",
-		   session->debug,
+		   0,
 		   jiffies_to_msecs(session->reorder_timeout));
 	seq_printf(m, "   %hu/%hu %ld/%ld/%ld %ld/%ld/%ld\n",
 		   session->nr, session->ns,
-- 
cgit v1.2.3


From 2b8ee4f05d4f6a6c427ad30dd6c1bb49eb2efd3b Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 20 Aug 2020 12:00:21 -0700
Subject: tcp: bpf: Add TCP_BPF_DELACK_MAX setsockopt

This change is mostly from an internal patch and adapts it from sysctl
config to the bpf_setsockopt setup.

The bpf_prog can set the max delay ack by using
bpf_setsockopt(TCP_BPF_DELACK_MAX).  This max delay ack can be communicated
to its peer through bpf header option.  The receiving peer can then use
this max delay ack and set a potentially lower rto by using
bpf_setsockopt(TCP_BPF_RTO_MIN) which will be introduced
in the next patch.

Another later selftest patch will also use it like the above to show
how to write and parse bpf tcp header option.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200820190021.2884000-1-kafai@fb.com
---
 include/net/inet_connection_sock.h | 1 +
 include/uapi/linux/bpf.h           | 1 +
 net/core/filter.c                  | 8 ++++++++
 net/ipv4/tcp.c                     | 2 ++
 net/ipv4/tcp_output.c              | 2 ++
 tools/include/uapi/linux/bpf.h     | 1 +
 6 files changed, 15 insertions(+)

(limited to 'include/uapi')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index aa8893c68c50..da7264a1ebfc 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -86,6 +86,7 @@ struct inet_connection_sock {
  	struct timer_list	  icsk_retransmit_timer;
  	struct timer_list	  icsk_delack_timer;
 	__u32			  icsk_rto;
+	__u32                     icsk_delack_max;
 	__u32			  icsk_pmtu_cookie;
 	const struct tcp_congestion_ops *icsk_ca_ops;
 	const struct inet_connection_sock_af_ops *icsk_af_ops;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a1bbaff7a0af..7b905cb0213e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4257,6 +4257,7 @@ enum {
 enum {
 	TCP_BPF_IW		= 1001,	/* Set TCP initial congestion window */
 	TCP_BPF_SNDCWND_CLAMP	= 1002,	/* Set sndcwnd_clamp */
+	TCP_BPF_DELACK_MAX	= 1003, /* Max delay ack in usecs */
 };
 
 struct bpf_perf_event_value {
diff --git a/net/core/filter.c b/net/core/filter.c
index c847b1285acd..80fe7420f609 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4459,6 +4459,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
 		} else {
 			struct inet_connection_sock *icsk = inet_csk(sk);
 			struct tcp_sock *tp = tcp_sk(sk);
+			unsigned long timeout;
 
 			if (optlen != sizeof(int))
 				return -EINVAL;
@@ -4480,6 +4481,13 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
 					tp->snd_ssthresh = val;
 				}
 				break;
+			case TCP_BPF_DELACK_MAX:
+				timeout = usecs_to_jiffies(val);
+				if (timeout > TCP_DELACK_MAX ||
+				    timeout < TCP_TIMEOUT_MIN)
+					return -EINVAL;
+				inet_csk(sk)->icsk_delack_max = timeout;
+				break;
 			case TCP_SAVE_SYN:
 				if (val < 0 || val > 1)
 					ret = -EINVAL;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 87d3036d8bd8..44c353a39ad4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -418,6 +418,7 @@ void tcp_init_sock(struct sock *sk)
 	INIT_LIST_HEAD(&tp->tsorted_sent_queue);
 
 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
+	icsk->icsk_delack_max = TCP_DELACK_MAX;
 	tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
 	minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U);
 
@@ -2685,6 +2686,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	icsk->icsk_backoff = 0;
 	icsk->icsk_probes_out = 0;
 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
+	icsk->icsk_delack_max = TCP_DELACK_MAX;
 	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 	tp->snd_cwnd = TCP_INIT_CWND;
 	tp->snd_cwnd_cnt = 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 85ff417bda7f..44ffa4891beb 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3741,6 +3741,8 @@ void tcp_send_delayed_ack(struct sock *sk)
 		ato = min(ato, max_ato);
 	}
 
+	ato = min_t(u32, ato, inet_csk(sk)->icsk_delack_max);
+
 	/* Stay within the limit we were given */
 	timeout = jiffies + ato;
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index a1bbaff7a0af..7b905cb0213e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4257,6 +4257,7 @@ enum {
 enum {
 	TCP_BPF_IW		= 1001,	/* Set TCP initial congestion window */
 	TCP_BPF_SNDCWND_CLAMP	= 1002,	/* Set sndcwnd_clamp */
+	TCP_BPF_DELACK_MAX	= 1003, /* Max delay ack in usecs */
 };
 
 struct bpf_perf_event_value {
-- 
cgit v1.2.3


From ca584ba070864c606f3a54faaafe774726d5b4a1 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 20 Aug 2020 12:00:27 -0700
Subject: tcp: bpf: Add TCP_BPF_RTO_MIN for bpf_setsockopt

This patch adds bpf_setsockopt(TCP_BPF_RTO_MIN) to allow bpf prog
to set the min rto of a connection.  It could be used together
with the earlier patch which has added bpf_setsockopt(TCP_BPF_DELACK_MAX).

A later selftest patch will communicate the max delay ack in a
bpf tcp header option and then the receiving side can use
bpf_setsockopt(TCP_BPF_RTO_MIN) to set a shorter rto.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200820190027.2884170-1-kafai@fb.com
---
 include/net/inet_connection_sock.h | 1 +
 include/net/tcp.h                  | 2 +-
 include/uapi/linux/bpf.h           | 1 +
 net/core/filter.c                  | 7 +++++++
 net/ipv4/tcp.c                     | 2 ++
 tools/include/uapi/linux/bpf.h     | 1 +
 6 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index da7264a1ebfc..c738abeb3265 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -86,6 +86,7 @@ struct inet_connection_sock {
  	struct timer_list	  icsk_retransmit_timer;
  	struct timer_list	  icsk_delack_timer;
 	__u32			  icsk_rto;
+	__u32                     icsk_rto_min;
 	__u32                     icsk_delack_max;
 	__u32			  icsk_pmtu_cookie;
 	const struct tcp_congestion_ops *icsk_ca_ops;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index eab6c7510b5b..dda778c782fe 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -699,7 +699,7 @@ static inline void tcp_fast_path_check(struct sock *sk)
 static inline u32 tcp_rto_min(struct sock *sk)
 {
 	const struct dst_entry *dst = __sk_dst_get(sk);
-	u32 rto_min = TCP_RTO_MIN;
+	u32 rto_min = inet_csk(sk)->icsk_rto_min;
 
 	if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
 		rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 7b905cb0213e..1ae20058b574 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4258,6 +4258,7 @@ enum {
 	TCP_BPF_IW		= 1001,	/* Set TCP initial congestion window */
 	TCP_BPF_SNDCWND_CLAMP	= 1002,	/* Set sndcwnd_clamp */
 	TCP_BPF_DELACK_MAX	= 1003, /* Max delay ack in usecs */
+	TCP_BPF_RTO_MIN		= 1004, /* Min delay ack in usecs */
 };
 
 struct bpf_perf_event_value {
diff --git a/net/core/filter.c b/net/core/filter.c
index 80fe7420f609..075ab71b985c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4488,6 +4488,13 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
 					return -EINVAL;
 				inet_csk(sk)->icsk_delack_max = timeout;
 				break;
+			case TCP_BPF_RTO_MIN:
+				timeout = usecs_to_jiffies(val);
+				if (timeout > TCP_RTO_MIN ||
+				    timeout < TCP_TIMEOUT_MIN)
+					return -EINVAL;
+				inet_csk(sk)->icsk_rto_min = timeout;
+				break;
 			case TCP_SAVE_SYN:
 				if (val < 0 || val > 1)
 					ret = -EINVAL;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 44c353a39ad4..6075cb091a20 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -418,6 +418,7 @@ void tcp_init_sock(struct sock *sk)
 	INIT_LIST_HEAD(&tp->tsorted_sent_queue);
 
 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
+	icsk->icsk_rto_min = TCP_RTO_MIN;
 	icsk->icsk_delack_max = TCP_DELACK_MAX;
 	tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
 	minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U);
@@ -2686,6 +2687,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	icsk->icsk_backoff = 0;
 	icsk->icsk_probes_out = 0;
 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
+	icsk->icsk_rto_min = TCP_RTO_MIN;
 	icsk->icsk_delack_max = TCP_DELACK_MAX;
 	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 	tp->snd_cwnd = TCP_INIT_CWND;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 7b905cb0213e..1ae20058b574 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4258,6 +4258,7 @@ enum {
 	TCP_BPF_IW		= 1001,	/* Set TCP initial congestion window */
 	TCP_BPF_SNDCWND_CLAMP	= 1002,	/* Set sndcwnd_clamp */
 	TCP_BPF_DELACK_MAX	= 1003, /* Max delay ack in usecs */
+	TCP_BPF_RTO_MIN		= 1004, /* Min delay ack in usecs */
 };
 
 struct bpf_perf_event_value {
-- 
cgit v1.2.3


From 00d211a4ea6f48e8e3b758813fe23ad28193d3bf Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 20 Aug 2020 12:00:46 -0700
Subject: bpf: tcp: Add bpf_skops_parse_hdr()

The patch adds a function bpf_skops_parse_hdr().
It will call the bpf prog to parse the TCP header received at
a tcp_sock that has at least reached the ESTABLISHED state.

For the packets received during the 3WHS (SYN, SYNACK and ACK),
the received skb will be available to the bpf prog during the callback
in bpf_skops_established() introduced in the previous patch and
in the bpf_skops_write_hdr_opt() that will be added in the
next patch.

Calling bpf prog to parse header is controlled by two new flags in
tp->bpf_sock_ops_cb_flags:
BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG and
BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG.

When BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG is set,
the bpf prog will only be called when there is unknown
option in the TCP header.

When BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG is set,
the bpf prog will be called on all received TCP header.

This function is half implemented to highlight the changes in
TCP stack.  The actual codes preparing the bpf running context and
invoking the bpf prog will be added in the later patch with other
necessary bpf pieces.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/bpf/20200820190046.2885054-1-kafai@fb.com
---
 include/uapi/linux/bpf.h       |  4 +++-
 net/ipv4/tcp_input.c           | 36 ++++++++++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  4 +++-
 3 files changed, 42 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1ae20058b574..010ed2abcb66 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4173,8 +4173,10 @@ enum {
 	BPF_SOCK_OPS_RETRANS_CB_FLAG	= (1<<1),
 	BPF_SOCK_OPS_STATE_CB_FLAG	= (1<<2),
 	BPF_SOCK_OPS_RTT_CB_FLAG	= (1<<3),
+	BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG  = (1<<4),
+	BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5),
 /* Mask of all currently supported cb flags */
-	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0xF,
+	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0x3F,
 };
 
 /* List of known BPF sock_ops operators.
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7b0faa2bfe32..b520450170d1 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -139,6 +139,36 @@ EXPORT_SYMBOL_GPL(clean_acked_data_flush);
 #endif
 
 #ifdef CONFIG_CGROUP_BPF
+static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
+{
+	bool unknown_opt = tcp_sk(sk)->rx_opt.saw_unknown &&
+		BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk),
+				       BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG);
+	bool parse_all_opt = BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk),
+						    BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG);
+
+	if (likely(!unknown_opt && !parse_all_opt))
+		return;
+
+	/* The skb will be handled in the
+	 * bpf_skops_established() or
+	 * bpf_skops_write_hdr_opt().
+	 */
+	switch (sk->sk_state) {
+	case TCP_SYN_RECV:
+	case TCP_SYN_SENT:
+	case TCP_LISTEN:
+		return;
+	}
+
+	/* BPF prog will have access to the sk and skb.
+	 *
+	 * The bpf running context preparation and the actual bpf prog
+	 * calling will be implemented in a later PATCH together with
+	 * other bpf pieces.
+	 */
+}
+
 static void bpf_skops_established(struct sock *sk, int bpf_op,
 				  struct sk_buff *skb)
 {
@@ -155,6 +185,10 @@ static void bpf_skops_established(struct sock *sk, int bpf_op,
 	BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
 }
 #else
+static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
+{
+}
+
 static void bpf_skops_established(struct sock *sk, int bpf_op,
 				  struct sk_buff *skb)
 {
@@ -5623,6 +5657,8 @@ syn_challenge:
 		goto discard;
 	}
 
+	bpf_skops_parse_hdr(sk, skb);
+
 	return true;
 
 discard:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 1ae20058b574..010ed2abcb66 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4173,8 +4173,10 @@ enum {
 	BPF_SOCK_OPS_RETRANS_CB_FLAG	= (1<<1),
 	BPF_SOCK_OPS_STATE_CB_FLAG	= (1<<2),
 	BPF_SOCK_OPS_RTT_CB_FLAG	= (1<<3),
+	BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG  = (1<<4),
+	BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5),
 /* Mask of all currently supported cb flags */
-	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0xF,
+	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0x3F,
 };
 
 /* List of known BPF sock_ops operators.
-- 
cgit v1.2.3


From 331fca4315efa3bbd258fbdf8209d59d253c0480 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 20 Aug 2020 12:00:52 -0700
Subject: bpf: tcp: Add bpf_skops_hdr_opt_len() and bpf_skops_write_hdr_opt()

The bpf prog needs to parse the SYN header to learn what options have
been sent by the peer's bpf-prog before writing its options into SYNACK.
This patch adds a "syn_skb" arg to tcp_make_synack() and send_synack().
This syn_skb will eventually be made available (as read-only) to the
bpf prog.  This will be the only SYN packet available to the bpf
prog during syncookie.  For other regular cases, the bpf prog can
also use the saved_syn.

When writing options, the bpf prog will first be called to tell the
kernel its required number of bytes.  It is done by the new
bpf_skops_hdr_opt_len().  The bpf prog will only be called when the new
BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG is set in tp->bpf_sock_ops_cb_flags.
When the bpf prog returns, the kernel will know how many bytes are needed
and then update the "*remaining" arg accordingly.  4 byte alignment will
be included in the "*remaining" before this function returns.  The 4 byte
aligned number of bytes will also be stored into the opts->bpf_opt_len.
"bpf_opt_len" is a newly added member to the struct tcp_out_options.

Then the new bpf_skops_write_hdr_opt() will call the bpf prog to write the
header options.  The bpf prog is only called if it has reserved spaces
before (opts->bpf_opt_len > 0).

The bpf prog is the last one getting a chance to reserve header space
and writing the header option.

These two functions are half implemented to highlight the changes in
TCP stack.  The actual codes preparing the bpf running context and
invoking the bpf prog will be added in the later patch with other
necessary bpf pieces.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/bpf/20200820190052.2885316-1-kafai@fb.com
---
 include/net/tcp.h              |   6 ++-
 include/uapi/linux/bpf.h       |   3 +-
 net/ipv4/tcp_input.c           |   5 +-
 net/ipv4/tcp_ipv4.c            |   5 +-
 net/ipv4/tcp_output.c          | 105 ++++++++++++++++++++++++++++++++++++-----
 net/ipv6/tcp_ipv6.c            |   5 +-
 tools/include/uapi/linux/bpf.h |   3 +-
 7 files changed, 109 insertions(+), 23 deletions(-)

(limited to 'include/uapi')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index c186dbf731e1..3e768a6b8264 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -455,7 +455,8 @@ enum tcp_synack_type {
 struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
 				struct request_sock *req,
 				struct tcp_fastopen_cookie *foc,
-				enum tcp_synack_type synack_type);
+				enum tcp_synack_type synack_type,
+				struct sk_buff *syn_skb);
 int tcp_disconnect(struct sock *sk, int flags);
 
 void tcp_finish_connect(struct sock *sk, struct sk_buff *skb);
@@ -2035,7 +2036,8 @@ struct tcp_request_sock_ops {
 	int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
 			   struct flowi *fl, struct request_sock *req,
 			   struct tcp_fastopen_cookie *foc,
-			   enum tcp_synack_type synack_type);
+			   enum tcp_synack_type synack_type,
+			   struct sk_buff *syn_skb);
 };
 
 extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 010ed2abcb66..18d0e128bc3c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4175,8 +4175,9 @@ enum {
 	BPF_SOCK_OPS_RTT_CB_FLAG	= (1<<3),
 	BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG  = (1<<4),
 	BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5),
+	BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6),
 /* Mask of all currently supported cb flags */
-	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0x3F,
+	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0x7F,
 };
 
 /* List of known BPF sock_ops operators.
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b520450170d1..8c9da4b65dae 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6824,7 +6824,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	}
 	if (fastopen_sk) {
 		af_ops->send_synack(fastopen_sk, dst, &fl, req,
-				    &foc, TCP_SYNACK_FASTOPEN);
+				    &foc, TCP_SYNACK_FASTOPEN, skb);
 		/* Add the child socket directly into the accept queue */
 		if (!inet_csk_reqsk_queue_add(sk, req, fastopen_sk)) {
 			reqsk_fastopen_remove(fastopen_sk, req, false);
@@ -6842,7 +6842,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 				tcp_timeout_init((struct sock *)req));
 		af_ops->send_synack(sk, dst, &fl, req, &foc,
 				    !want_cookie ? TCP_SYNACK_NORMAL :
-						   TCP_SYNACK_COOKIE);
+						   TCP_SYNACK_COOKIE,
+				    skb);
 		if (want_cookie) {
 			reqsk_free(req);
 			return 0;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5084333b5ab6..631a5ee0dd4e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -965,7 +965,8 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
 			      struct flowi *fl,
 			      struct request_sock *req,
 			      struct tcp_fastopen_cookie *foc,
-			      enum tcp_synack_type synack_type)
+			      enum tcp_synack_type synack_type,
+			      struct sk_buff *syn_skb)
 {
 	const struct inet_request_sock *ireq = inet_rsk(req);
 	struct flowi4 fl4;
@@ -976,7 +977,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
 	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
 		return -1;
 
-	skb = tcp_make_synack(sk, dst, req, foc, synack_type);
+	skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
 
 	if (skb) {
 		__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 44ffa4891beb..673db6879e46 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -438,6 +438,7 @@ struct tcp_out_options {
 	u8 ws;			/* window scale, 0 to disable */
 	u8 num_sack_blocks;	/* number of SACK blocks to include */
 	u8 hash_size;		/* bytes in hash_location */
+	u8 bpf_opt_len;		/* length of BPF hdr option */
 	__u8 *hash_location;	/* temporary pointer, overloaded */
 	__u32 tsval, tsecr;	/* need to include OPTION_TS */
 	struct tcp_fastopen_cookie *fastopen_cookie;	/* Fast open cookie */
@@ -452,6 +453,59 @@ static void mptcp_options_write(__be32 *ptr, struct tcp_out_options *opts)
 #endif
 }
 
+#ifdef CONFIG_CGROUP_BPF
+/* req, syn_skb and synack_type are used when writing synack */
+static void bpf_skops_hdr_opt_len(struct sock *sk, struct sk_buff *skb,
+				  struct request_sock *req,
+				  struct sk_buff *syn_skb,
+				  enum tcp_synack_type synack_type,
+				  struct tcp_out_options *opts,
+				  unsigned int *remaining)
+{
+	if (likely(!BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk),
+					   BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG)) ||
+	    !*remaining)
+		return;
+
+	/* The bpf running context preparation and the actual bpf prog
+	 * calling will be implemented in a later PATCH together with
+	 * other bpf pieces.
+	 */
+}
+
+static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb,
+				    struct request_sock *req,
+				    struct sk_buff *syn_skb,
+				    enum tcp_synack_type synack_type,
+				    struct tcp_out_options *opts)
+{
+	if (likely(!opts->bpf_opt_len))
+		return;
+
+	/* The bpf running context preparation and the actual bpf prog
+	 * calling will be implemented in a later PATCH together with
+	 * other bpf pieces.
+	 */
+}
+#else
+static void bpf_skops_hdr_opt_len(struct sock *sk, struct sk_buff *skb,
+				  struct request_sock *req,
+				  struct sk_buff *syn_skb,
+				  enum tcp_synack_type synack_type,
+				  struct tcp_out_options *opts,
+				  unsigned int *remaining)
+{
+}
+
+static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb,
+				    struct request_sock *req,
+				    struct sk_buff *syn_skb,
+				    enum tcp_synack_type synack_type,
+				    struct tcp_out_options *opts)
+{
+}
+#endif
+
 /* Write previously computed TCP options to the packet.
  *
  * Beware: Something in the Internet is very sensitive to the ordering of
@@ -691,6 +745,8 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 		}
 	}
 
+	bpf_skops_hdr_opt_len(sk, skb, NULL, NULL, 0, opts, &remaining);
+
 	return MAX_TCP_OPTION_SPACE - remaining;
 }
 
@@ -701,7 +757,8 @@ static unsigned int tcp_synack_options(const struct sock *sk,
 				       struct tcp_out_options *opts,
 				       const struct tcp_md5sig_key *md5,
 				       struct tcp_fastopen_cookie *foc,
-				       enum tcp_synack_type synack_type)
+				       enum tcp_synack_type synack_type,
+				       struct sk_buff *syn_skb)
 {
 	struct inet_request_sock *ireq = inet_rsk(req);
 	unsigned int remaining = MAX_TCP_OPTION_SPACE;
@@ -758,6 +815,9 @@ static unsigned int tcp_synack_options(const struct sock *sk,
 
 	smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
 
+	bpf_skops_hdr_opt_len((struct sock *)sk, skb, req, syn_skb,
+			      synack_type, opts, &remaining);
+
 	return MAX_TCP_OPTION_SPACE - remaining;
 }
 
@@ -826,6 +886,15 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
 			opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
 	}
 
+	if (unlikely(BPF_SOCK_OPS_TEST_FLAG(tp,
+					    BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG))) {
+		unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
+
+		bpf_skops_hdr_opt_len(sk, skb, NULL, NULL, 0, opts, &remaining);
+
+		size = MAX_TCP_OPTION_SPACE - remaining;
+	}
+
 	return size;
 }
 
@@ -1213,6 +1282,9 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
 	}
 #endif
 
+	/* BPF prog is the last one writing header option */
+	bpf_skops_write_hdr_opt(sk, skb, NULL, NULL, 0, &opts);
+
 	INDIRECT_CALL_INET(icsk->icsk_af_ops->send_check,
 			   tcp_v6_send_check, tcp_v4_send_check,
 			   sk, skb);
@@ -3336,20 +3408,20 @@ int tcp_send_synack(struct sock *sk)
 }
 
 /**
- * tcp_make_synack - Prepare a SYN-ACK.
- * sk: listener socket
- * dst: dst entry attached to the SYNACK
- * req: request_sock pointer
- * foc: cookie for tcp fast open
- * synack_type: Type of synback to prepare
- *
- * Allocate one skb and build a SYNACK packet.
- * @dst is consumed : Caller should not use it again.
+ * tcp_make_synack - Allocate one skb and build a SYNACK packet.
+ * @sk: listener socket
+ * @dst: dst entry attached to the SYNACK. It is consumed and caller
+ *       should not use it again.
+ * @req: request_sock pointer
+ * @foc: cookie for tcp fast open
+ * @synack_type: Type of synack to prepare
+ * @syn_skb: SYN packet just received.  It could be NULL for rtx case.
  */
 struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
 				struct request_sock *req,
 				struct tcp_fastopen_cookie *foc,
-				enum tcp_synack_type synack_type)
+				enum tcp_synack_type synack_type,
+				struct sk_buff *syn_skb)
 {
 	struct inet_request_sock *ireq = inet_rsk(req);
 	const struct tcp_sock *tp = tcp_sk(sk);
@@ -3408,8 +3480,11 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
 	md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
 #endif
 	skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
+	/* bpf program will be interested in the tcp_flags */
+	TCP_SKB_CB(skb)->tcp_flags = TCPHDR_SYN | TCPHDR_ACK;
 	tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
-					     foc, synack_type) + sizeof(*th);
+					     foc, synack_type,
+					     syn_skb) + sizeof(*th);
 
 	skb_push(skb, tcp_header_size);
 	skb_reset_transport_header(skb);
@@ -3441,6 +3516,9 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
 	rcu_read_unlock();
 #endif
 
+	bpf_skops_write_hdr_opt((struct sock *)sk, skb, req, syn_skb,
+				synack_type, &opts);
+
 	skb->skb_mstamp_ns = now;
 	tcp_add_tx_delay(skb, tp);
 
@@ -3936,7 +4014,8 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
 	int res;
 
 	tcp_rsk(req)->txhash = net_tx_rndhash();
-	res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL);
+	res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL,
+				  NULL);
 	if (!res) {
 		__TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 305870a72352..87a633e1fbef 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -501,7 +501,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
 			      struct flowi *fl,
 			      struct request_sock *req,
 			      struct tcp_fastopen_cookie *foc,
-			      enum tcp_synack_type synack_type)
+			      enum tcp_synack_type synack_type,
+			      struct sk_buff *syn_skb)
 {
 	struct inet_request_sock *ireq = inet_rsk(req);
 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
@@ -515,7 +516,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
 					       IPPROTO_TCP)) == NULL)
 		goto done;
 
-	skb = tcp_make_synack(sk, dst, req, foc, synack_type);
+	skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
 
 	if (skb) {
 		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 010ed2abcb66..18d0e128bc3c 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4175,8 +4175,9 @@ enum {
 	BPF_SOCK_OPS_RTT_CB_FLAG	= (1<<3),
 	BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG  = (1<<4),
 	BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5),
+	BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6),
 /* Mask of all currently supported cb flags */
-	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0x3F,
+	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0x7F,
 };
 
 /* List of known BPF sock_ops operators.
-- 
cgit v1.2.3


From 0813a841566f0962a5551be7749b43c45f0022a0 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 20 Aug 2020 12:01:04 -0700
Subject: bpf: tcp: Allow bpf prog to write and parse TCP header option
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Note: The TCP changes here is mainly to implement the bpf
  pieces into the bpf_skops_*() functions introduced
  in the earlier patches. ]

The earlier effort in BPF-TCP-CC allows the TCP Congestion Control
algorithm to be written in BPF.  It opens up opportunities to allow
a faster turnaround time in testing/releasing new congestion control
ideas to production environment.

The same flexibility can be extended to writing TCP header option.
It is not uncommon that people want to test new TCP header option
to improve the TCP performance.  Another use case is for data-center
that has a more controlled environment and has more flexibility in
putting header options for internal only use.

For example, we want to test the idea in putting maximum delay
ACK in TCP header option which is similar to a draft RFC proposal [1].

This patch introduces the necessary BPF API and use them in the
TCP stack to allow BPF_PROG_TYPE_SOCK_OPS program to parse
and write TCP header options.  It currently supports most of
the TCP packet except RST.

Supported TCP header option:
───────────────────────────
This patch allows the bpf-prog to write any option kind.
Different bpf-progs can write its own option by calling the new helper
bpf_store_hdr_opt().  The helper will ensure there is no duplicated
option in the header.

By allowing bpf-prog to write any option kind, this gives a lot of
flexibility to the bpf-prog.  Different bpf-prog can write its
own option kind.  It could also allow the bpf-prog to support a
recently standardized option on an older kernel.

Sockops Callback Flags:
──────────────────────
The bpf program will only be called to parse/write tcp header option
if the following newly added callback flags are enabled
in tp->bpf_sock_ops_cb_flags:
BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG
BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG
BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG

A few words on the PARSE CB flags.  When the above PARSE CB flags are
turned on, the bpf-prog will be called on packets received
at a sk that has at least reached the ESTABLISHED state.
The parsing of the SYN-SYNACK-ACK will be discussed in the
"3 Way HandShake" section.

The default is off for all of the above new CB flags, i.e. the bpf prog
will not be called to parse or write bpf hdr option.  There are
details comment on these new cb flags in the UAPI bpf.h.

sock_ops->skb_data and bpf_load_hdr_opt()
─────────────────────────────────────────
sock_ops->skb_data and sock_ops->skb_data_end covers the whole
TCP header and its options.  They are read only.

The new bpf_load_hdr_opt() helps to read a particular option "kind"
from the skb_data.

Please refer to the comment in UAPI bpf.h.  It has details
on what skb_data contains under different sock_ops->op.

3 Way HandShake
───────────────
The bpf-prog can learn if it is sending SYN or SYNACK by reading the
sock_ops->skb_tcp_flags.

* Passive side

When writing SYNACK (i.e. sock_ops->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB),
the received SYN skb will be available to the bpf prog.  The bpf prog can
use the SYN skb (which may carry the header option sent from the remote bpf
prog) to decide what bpf header option should be written to the outgoing
SYNACK skb.  The SYN packet can be obtained by getsockopt(TCP_BPF_SYN*).
More on this later.  Also, the bpf prog can learn if it is in syncookie
mode (by checking sock_ops->args[0] == BPF_WRITE_HDR_TCP_SYNACK_COOKIE).

The bpf prog can store the received SYN pkt by using the existing
bpf_setsockopt(TCP_SAVE_SYN).  The example in a later patch does it.
[ Note that the fullsock here is a listen sk, bpf_sk_storage
  is not very useful here since the listen sk will be shared
  by many concurrent connection requests.

  Extending bpf_sk_storage support to request_sock will add weight
  to the minisock and it is not necessary better than storing the
  whole ~100 bytes SYN pkt. ]

When the connection is established, the bpf prog will be called
in the existing PASSIVE_ESTABLISHED_CB callback.  At that time,
the bpf prog can get the header option from the saved syn and
then apply the needed operation to the newly established socket.
The later patch will use the max delay ack specified in the SYN
header and set the RTO of this newly established connection
as an example.

The received ACK (that concludes the 3WHS) will also be available to
the bpf prog during PASSIVE_ESTABLISHED_CB through the sock_ops->skb_data.
It could be useful in syncookie scenario.  More on this later.

There is an existing getsockopt "TCP_SAVED_SYN" to return the whole
saved syn pkt which includes the IP[46] header and the TCP header.
A few "TCP_BPF_SYN*" getsockopt has been added to allow specifying where to
start getting from, e.g. starting from TCP header, or from IP[46] header.

The new getsockopt(TCP_BPF_SYN*) will also know where it can get
the SYN's packet from:
  - (a) the just received syn (available when the bpf prog is writing SYNACK)
        and it is the only way to get SYN during syncookie mode.
  or
  - (b) the saved syn (available in PASSIVE_ESTABLISHED_CB and also other
        existing CB).

The bpf prog does not need to know where the SYN pkt is coming from.
The getsockopt(TCP_BPF_SYN*) will hide this details.

Similarly, a flags "BPF_LOAD_HDR_OPT_TCP_SYN" is also added to
bpf_load_hdr_opt() to read a particular header option from the SYN packet.

* Fastopen

Fastopen should work the same as the regular non fastopen case.
This is a test in a later patch.

* Syncookie

For syncookie, the later example patch asks the active
side's bpf prog to resend the header options in ACK.  The server
can use bpf_load_hdr_opt() to look at the options in this
received ACK during PASSIVE_ESTABLISHED_CB.

* Active side

The bpf prog will get a chance to write the bpf header option
in the SYN packet during WRITE_HDR_OPT_CB.  The received SYNACK
pkt will also be available to the bpf prog during the existing
ACTIVE_ESTABLISHED_CB callback through the sock_ops->skb_data
and bpf_load_hdr_opt().

* Turn off header CB flags after 3WHS

If the bpf prog does not need to write/parse header options
beyond the 3WHS, the bpf prog can clear the bpf_sock_ops_cb_flags
to avoid being called for header options.
Or the bpf-prog can select to leave the UNKNOWN_HDR_OPT_CB_FLAG on
so that the kernel will only call it when there is option that
the kernel cannot handle.

[1]: draft-wang-tcpm-low-latency-opt-00
     https://tools.ietf.org/html/draft-wang-tcpm-low-latency-opt-00

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200820190104.2885895-1-kafai@fb.com
---
 include/linux/bpf-cgroup.h     |  25 +++
 include/linux/filter.h         |   4 +
 include/net/tcp.h              |  49 ++++++
 include/uapi/linux/bpf.h       | 300 ++++++++++++++++++++++++++++++++-
 net/core/filter.c              | 365 +++++++++++++++++++++++++++++++++++++++++
 net/ipv4/tcp_input.c           |  20 ++-
 net/ipv4/tcp_minisocks.c       |   1 +
 net/ipv4/tcp_output.c          | 104 +++++++++++-
 tools/include/uapi/linux/bpf.h | 300 ++++++++++++++++++++++++++++++++-
 9 files changed, 1150 insertions(+), 18 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 64f367044e25..2f98d2fce62e 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -279,6 +279,31 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
 #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr)			\
 	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_RECVMSG, NULL)
 
+/* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a
+ * fullsock and its parent fullsock cannot be traced by
+ * sk_to_full_sk().
+ *
+ * e.g. sock_ops->sk is a request_sock and it is under syncookie mode.
+ * Its listener-sk is not attached to the rsk_listener.
+ * In this case, the caller holds the listener-sk (unlocked),
+ * set its sock_ops->sk to req_sk, and call this SOCK_OPS"_SK" with
+ * the listener-sk such that the cgroup-bpf-progs of the
+ * listener-sk will be run.
+ *
+ * Regardless of syncookie mode or not,
+ * calling bpf_setsockopt on listener-sk will not make sense anyway,
+ * so passing 'sock_ops->sk == req_sk' to the bpf prog is appropriate here.
+ */
+#define BPF_CGROUP_RUN_PROG_SOCK_OPS_SK(sock_ops, sk)			\
+({									\
+	int __ret = 0;							\
+	if (cgroup_bpf_enabled)						\
+		__ret = __cgroup_bpf_run_filter_sock_ops(sk,		\
+							 sock_ops,	\
+							 BPF_CGROUP_SOCK_OPS); \
+	__ret;								\
+})
+
 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops)				       \
 ({									       \
 	int __ret = 0;							       \
diff --git a/include/linux/filter.h b/include/linux/filter.h
index c427dfa5f908..995625950cc1 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1241,8 +1241,12 @@ struct bpf_sock_ops_kern {
 		u32 reply;
 		u32 replylong[4];
 	};
+	struct sk_buff	*syn_skb;
+	struct sk_buff	*skb;
+	void	*skb_data_end;
 	u8	op;
 	u8	is_fullsock;
+	u8	remaining_opt_len;
 	u64	temp;			/* temp and everything after is not
 					 * initialized to 0 before calling
 					 * the BPF program. New fields that
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 3e768a6b8264..1f967b4e22f6 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2235,6 +2235,55 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
 		      struct msghdr *msg, int len, int flags);
 #endif /* CONFIG_NET_SOCK_MSG */
 
+#ifdef CONFIG_CGROUP_BPF
+/* Copy the listen sk's HDR_OPT_CB flags to its child.
+ *
+ * During 3-Way-HandShake, the synack is usually sent from
+ * the listen sk with the HDR_OPT_CB flags set so that
+ * bpf-prog will be called to write the BPF hdr option.
+ *
+ * In fastopen, the child sk is used to send synack instead
+ * of the listen sk.  Thus, inheriting the HDR_OPT_CB flags
+ * from the listen sk gives the bpf-prog a chance to write
+ * BPF hdr option in the synack pkt during fastopen.
+ *
+ * Both fastopen and non-fastopen child will inherit the
+ * HDR_OPT_CB flags to keep the bpf-prog having a consistent
+ * behavior when deciding to clear this cb flags (or not)
+ * during the PASSIVE_ESTABLISHED_CB.
+ *
+ * In the future, other cb flags could be inherited here also.
+ */
+static inline void bpf_skops_init_child(const struct sock *sk,
+					struct sock *child)
+{
+	tcp_sk(child)->bpf_sock_ops_cb_flags =
+		tcp_sk(sk)->bpf_sock_ops_cb_flags &
+		(BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG |
+		 BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
+		 BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG);
+}
+
+static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
+				      struct sk_buff *skb,
+				      unsigned int end_offset)
+{
+	skops->skb = skb;
+	skops->skb_data_end = skb->data + end_offset;
+}
+#else
+static inline void bpf_skops_init_child(const struct sock *sk,
+					struct sock *child)
+{
+}
+
+static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
+				      struct sk_buff *skb,
+				      unsigned int end_offset)
+{
+}
+#endif
+
 /* Call BPF_SOCK_OPS program that returns an int. If the return value
  * is < 0, then the BPF op failed (for example if the loaded BPF
  * program does not support the chosen operation or there is no BPF
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 18d0e128bc3c..f67ec5d9e57d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3395,6 +3395,120 @@ union bpf_attr {
  *		A non-negative value equal to or less than *size* on success,
  *		or a negative error in case of failure.
  *
+ * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags)
+ *	Description
+ *		Load header option.  Support reading a particular TCP header
+ *		option for bpf program (BPF_PROG_TYPE_SOCK_OPS).
+ *
+ *		If *flags* is 0, it will search the option from the
+ *		sock_ops->skb_data.  The comment in "struct bpf_sock_ops"
+ *		has details on what skb_data contains under different
+ *		sock_ops->op.
+ *
+ *		The first byte of the *searchby_res* specifies the
+ *		kind that it wants to search.
+ *
+ *		If the searching kind is an experimental kind
+ *		(i.e. 253 or 254 according to RFC6994).  It also
+ *		needs to specify the "magic" which is either
+ *		2 bytes or 4 bytes.  It then also needs to
+ *		specify the size of the magic by using
+ *		the 2nd byte which is "kind-length" of a TCP
+ *		header option and the "kind-length" also
+ *		includes the first 2 bytes "kind" and "kind-length"
+ *		itself as a normal TCP header option also does.
+ *
+ *		For example, to search experimental kind 254 with
+ *		2 byte magic 0xeB9F, the searchby_res should be
+ *		[ 254, 4, 0xeB, 0x9F, 0, 0, .... 0 ].
+ *
+ *		To search for the standard window scale option (3),
+ *		the searchby_res should be [ 3, 0, 0, .... 0 ].
+ *		Note, kind-length must be 0 for regular option.
+ *
+ *		Searching for No-Op (0) and End-of-Option-List (1) are
+ *		not supported.
+ *
+ *		*len* must be at least 2 bytes which is the minimal size
+ *		of a header option.
+ *
+ *		Supported flags:
+ *		* **BPF_LOAD_HDR_OPT_TCP_SYN** to search from the
+ *		  saved_syn packet or the just-received syn packet.
+ *
+ *	Return
+ *		>0 when found, the header option is copied to *searchby_res*.
+ *		The return value is the total length copied.
+ *
+ *		**-EINVAL** If param is invalid
+ *
+ *		**-ENOMSG** The option is not found
+ *
+ *		**-ENOENT** No syn packet available when
+ *			    **BPF_LOAD_HDR_OPT_TCP_SYN** is used
+ *
+ *		**-ENOSPC** Not enough space.  Only *len* number of
+ *			    bytes are copied.
+ *
+ *		**-EFAULT** Cannot parse the header options in the packet
+ *
+ *		**-EPERM** This helper cannot be used under the
+ *			   current sock_ops->op.
+ *
+ * long bpf_store_hdr_opt(struct bpf_sock_ops *skops, const void *from, u32 len, u64 flags)
+ *	Description
+ *		Store header option.  The data will be copied
+ *		from buffer *from* with length *len* to the TCP header.
+ *
+ *		The buffer *from* should have the whole option that
+ *		includes the kind, kind-length, and the actual
+ *		option data.  The *len* must be at least kind-length
+ *		long.  The kind-length does not have to be 4 byte
+ *		aligned.  The kernel will take care of the padding
+ *		and setting the 4 bytes aligned value to th->doff.
+ *
+ *		This helper will check for duplicated option
+ *		by searching the same option in the outgoing skb.
+ *
+ *		This helper can only be called during
+ *		BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ *
+ *	Return
+ *		0 on success, or negative error in case of failure:
+ *
+ *		**-EINVAL** If param is invalid
+ *
+ *		**-ENOSPC** Not enough space in the header.
+ *			    Nothing has been written
+ *
+ *		**-EEXIST** The option has already existed
+ *
+ *		**-EFAULT** Cannot parse the existing header options
+ *
+ *		**-EPERM** This helper cannot be used under the
+ *			   current sock_ops->op.
+ *
+ * long bpf_reserve_hdr_opt(struct bpf_sock_ops *skops, u32 len, u64 flags)
+ *	Description
+ *		Reserve *len* bytes for the bpf header option.  The
+ *		space will be used by bpf_store_hdr_opt() later in
+ *		BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ *
+ *		If bpf_reserve_hdr_opt() is called multiple times,
+ *		the total number of bytes will be reserved.
+ *
+ *		This helper can only be called during
+ *		BPF_SOCK_OPS_HDR_OPT_LEN_CB.
+ *
+ *	Return
+ *		0 on success, or negative error in case of failure:
+ *
+ *		**-EINVAL** if param is invalid
+ *
+ *		**-ENOSPC** Not enough space in the header.
+ *
+ *		**-EPERM** This helper cannot be used under the
+ *			   current sock_ops->op.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3539,6 +3653,9 @@ union bpf_attr {
 	FN(skc_to_tcp_request_sock),	\
 	FN(skc_to_udp6_sock),		\
 	FN(get_task_stack),		\
+	FN(load_hdr_opt),		\
+	FN(store_hdr_opt),		\
+	FN(reserve_hdr_opt),
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4165,6 +4282,36 @@ struct bpf_sock_ops {
 	__u64 bytes_received;
 	__u64 bytes_acked;
 	__bpf_md_ptr(struct bpf_sock *, sk);
+	/* [skb_data, skb_data_end) covers the whole TCP header.
+	 *
+	 * BPF_SOCK_OPS_PARSE_HDR_OPT_CB: The packet received
+	 * BPF_SOCK_OPS_HDR_OPT_LEN_CB:   Not useful because the
+	 *                                header has not been written.
+	 * BPF_SOCK_OPS_WRITE_HDR_OPT_CB: The header and options have
+	 *				  been written so far.
+	 * BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:  The SYNACK that concludes
+	 *					the 3WHS.
+	 * BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: The ACK that concludes
+	 *					the 3WHS.
+	 *
+	 * bpf_load_hdr_opt() can also be used to read a particular option.
+	 */
+	__bpf_md_ptr(void *, skb_data);
+	__bpf_md_ptr(void *, skb_data_end);
+	__u32 skb_len;		/* The total length of a packet.
+				 * It includes the header, options,
+				 * and payload.
+				 */
+	__u32 skb_tcp_flags;	/* tcp_flags of the header.  It provides
+				 * an easy way to check for tcp_flags
+				 * without parsing skb_data.
+				 *
+				 * In particular, the skb_tcp_flags
+				 * will still be available in
+				 * BPF_SOCK_OPS_HDR_OPT_LEN even though
+				 * the outgoing header has not
+				 * been written yet.
+				 */
 };
 
 /* Definitions for bpf_sock_ops_cb_flags */
@@ -4173,8 +4320,48 @@ enum {
 	BPF_SOCK_OPS_RETRANS_CB_FLAG	= (1<<1),
 	BPF_SOCK_OPS_STATE_CB_FLAG	= (1<<2),
 	BPF_SOCK_OPS_RTT_CB_FLAG	= (1<<3),
-	BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG  = (1<<4),
+	/* Call bpf for all received TCP headers.  The bpf prog will be
+	 * called under sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB
+	 *
+	 * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB
+	 * for the header option related helpers that will be useful
+	 * to the bpf programs.
+	 *
+	 * It could be used at the client/active side (i.e. connect() side)
+	 * when the server told it that the server was in syncookie
+	 * mode and required the active side to resend the bpf-written
+	 * options.  The active side can keep writing the bpf-options until
+	 * it received a valid packet from the server side to confirm
+	 * the earlier packet (and options) has been received.  The later
+	 * example patch is using it like this at the active side when the
+	 * server is in syncookie mode.
+	 *
+	 * The bpf prog will usually turn this off in the common cases.
+	 */
+	BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG	= (1<<4),
+	/* Call bpf when kernel has received a header option that
+	 * the kernel cannot handle.  The bpf prog will be called under
+	 * sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB.
+	 *
+	 * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB
+	 * for the header option related helpers that will be useful
+	 * to the bpf programs.
+	 */
 	BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5),
+	/* Call bpf when the kernel is writing header options for the
+	 * outgoing packet.  The bpf prog will first be called
+	 * to reserve space in a skb under
+	 * sock_ops->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB.  Then
+	 * the bpf prog will be called to write the header option(s)
+	 * under sock_ops->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+	 *
+	 * Please refer to the comment in BPF_SOCK_OPS_HDR_OPT_LEN_CB
+	 * and BPF_SOCK_OPS_WRITE_HDR_OPT_CB for the header option
+	 * related helpers that will be useful to the bpf programs.
+	 *
+	 * The kernel gets its chance to reserve space and write
+	 * options first before the BPF program does.
+	 */
 	BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6),
 /* Mask of all currently supported cb flags */
 	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0x7F,
@@ -4233,6 +4420,63 @@ enum {
 					 */
 	BPF_SOCK_OPS_RTT_CB,		/* Called on every RTT.
 					 */
+	BPF_SOCK_OPS_PARSE_HDR_OPT_CB,	/* Parse the header option.
+					 * It will be called to handle
+					 * the packets received at
+					 * an already established
+					 * connection.
+					 *
+					 * sock_ops->skb_data:
+					 * Referring to the received skb.
+					 * It covers the TCP header only.
+					 *
+					 * bpf_load_hdr_opt() can also
+					 * be used to search for a
+					 * particular option.
+					 */
+	BPF_SOCK_OPS_HDR_OPT_LEN_CB,	/* Reserve space for writing the
+					 * header option later in
+					 * BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+					 * Arg1: bool want_cookie. (in
+					 *       writing SYNACK only)
+					 *
+					 * sock_ops->skb_data:
+					 * Not available because no header has
+					 * been	written yet.
+					 *
+					 * sock_ops->skb_tcp_flags:
+					 * The tcp_flags of the
+					 * outgoing skb. (e.g. SYN, ACK, FIN).
+					 *
+					 * bpf_reserve_hdr_opt() should
+					 * be used to reserve space.
+					 */
+	BPF_SOCK_OPS_WRITE_HDR_OPT_CB,	/* Write the header options
+					 * Arg1: bool want_cookie. (in
+					 *       writing SYNACK only)
+					 *
+					 * sock_ops->skb_data:
+					 * Referring to the outgoing skb.
+					 * It covers the TCP header
+					 * that has already been written
+					 * by the kernel and the
+					 * earlier bpf-progs.
+					 *
+					 * sock_ops->skb_tcp_flags:
+					 * The tcp_flags of the outgoing
+					 * skb. (e.g. SYN, ACK, FIN).
+					 *
+					 * bpf_store_hdr_opt() should
+					 * be used to write the
+					 * option.
+					 *
+					 * bpf_load_hdr_opt() can also
+					 * be used to search for a
+					 * particular option that
+					 * has already been written
+					 * by the kernel or the
+					 * earlier bpf-progs.
+					 */
 };
 
 /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
@@ -4262,6 +4506,60 @@ enum {
 	TCP_BPF_SNDCWND_CLAMP	= 1002,	/* Set sndcwnd_clamp */
 	TCP_BPF_DELACK_MAX	= 1003, /* Max delay ack in usecs */
 	TCP_BPF_RTO_MIN		= 1004, /* Min delay ack in usecs */
+	/* Copy the SYN pkt to optval
+	 *
+	 * BPF_PROG_TYPE_SOCK_OPS only.  It is similar to the
+	 * bpf_getsockopt(TCP_SAVED_SYN) but it does not limit
+	 * to only getting from the saved_syn.  It can either get the
+	 * syn packet from:
+	 *
+	 * 1. the just-received SYN packet (only available when writing the
+	 *    SYNACK).  It will be useful when it is not necessary to
+	 *    save the SYN packet for latter use.  It is also the only way
+	 *    to get the SYN during syncookie mode because the syn
+	 *    packet cannot be saved during syncookie.
+	 *
+	 * OR
+	 *
+	 * 2. the earlier saved syn which was done by
+	 *    bpf_setsockopt(TCP_SAVE_SYN).
+	 *
+	 * The bpf_getsockopt(TCP_BPF_SYN*) option will hide where the
+	 * SYN packet is obtained.
+	 *
+	 * If the bpf-prog does not need the IP[46] header,  the
+	 * bpf-prog can avoid parsing the IP header by using
+	 * TCP_BPF_SYN.  Otherwise, the bpf-prog can get both
+	 * IP[46] and TCP header by using TCP_BPF_SYN_IP.
+	 *
+	 *      >0: Total number of bytes copied
+	 * -ENOSPC: Not enough space in optval. Only optlen number of
+	 *          bytes is copied.
+	 * -ENOENT: The SYN skb is not available now and the earlier SYN pkt
+	 *	    is not saved by setsockopt(TCP_SAVE_SYN).
+	 */
+	TCP_BPF_SYN		= 1005, /* Copy the TCP header */
+	TCP_BPF_SYN_IP		= 1006, /* Copy the IP[46] and TCP header */
+};
+
+enum {
+	BPF_LOAD_HDR_OPT_TCP_SYN = (1ULL << 0),
+};
+
+/* args[0] value during BPF_SOCK_OPS_HDR_OPT_LEN_CB and
+ * BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ */
+enum {
+	BPF_WRITE_HDR_TCP_CURRENT_MSS = 1,	/* Kernel is finding the
+						 * total option spaces
+						 * required for an established
+						 * sk in order to calculate the
+						 * MSS.  No skb is actually
+						 * sent.
+						 */
+	BPF_WRITE_HDR_TCP_SYNACK_COOKIE = 2,	/* Kernel is in syncookie mode
+						 * when sending a SYN.
+						 */
 };
 
 struct bpf_perf_event_value {
diff --git a/net/core/filter.c b/net/core/filter.c
index 1608f4b3987f..ab5603d5b62a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4669,9 +4669,82 @@ static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = {
 	.arg5_type	= ARG_CONST_SIZE,
 };
 
+static int bpf_sock_ops_get_syn(struct bpf_sock_ops_kern *bpf_sock,
+				int optname, const u8 **start)
+{
+	struct sk_buff *syn_skb = bpf_sock->syn_skb;
+	const u8 *hdr_start;
+	int ret;
+
+	if (syn_skb) {
+		/* sk is a request_sock here */
+
+		if (optname == TCP_BPF_SYN) {
+			hdr_start = syn_skb->data;
+			ret = tcp_hdrlen(syn_skb);
+		} else {
+			/* optname == TCP_BPF_SYN_IP */
+			hdr_start = skb_network_header(syn_skb);
+			ret = skb_network_header_len(syn_skb) +
+				tcp_hdrlen(syn_skb);
+		}
+	} else {
+		struct sock *sk = bpf_sock->sk;
+		struct saved_syn *saved_syn;
+
+		if (sk->sk_state == TCP_NEW_SYN_RECV)
+			/* synack retransmit. bpf_sock->syn_skb will
+			 * not be available.  It has to resort to
+			 * saved_syn (if it is saved).
+			 */
+			saved_syn = inet_reqsk(sk)->saved_syn;
+		else
+			saved_syn = tcp_sk(sk)->saved_syn;
+
+		if (!saved_syn)
+			return -ENOENT;
+
+		if (optname == TCP_BPF_SYN) {
+			hdr_start = saved_syn->data +
+				saved_syn->network_hdrlen;
+			ret = saved_syn->tcp_hdrlen;
+		} else {
+			/* optname == TCP_BPF_SYN_IP */
+			hdr_start = saved_syn->data;
+			ret = saved_syn->network_hdrlen +
+				saved_syn->tcp_hdrlen;
+		}
+	}
+
+	*start = hdr_start;
+	return ret;
+}
+
 BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
 	   int, level, int, optname, char *, optval, int, optlen)
 {
+	if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP &&
+	    optname >= TCP_BPF_SYN && optname <= TCP_BPF_SYN_IP) {
+		int ret, copy_len = 0;
+		const u8 *start;
+
+		ret = bpf_sock_ops_get_syn(bpf_sock, optname, &start);
+		if (ret > 0) {
+			copy_len = ret;
+			if (optlen < copy_len) {
+				copy_len = optlen;
+				ret = -ENOSPC;
+			}
+
+			memcpy(optval, start, copy_len);
+		}
+
+		/* Zero out unused buffer at the end */
+		memset(optval + copy_len, 0, optlen - copy_len);
+
+		return ret;
+	}
+
 	return _bpf_getsockopt(bpf_sock->sk, level, optname, optval, optlen);
 }
 
@@ -6165,6 +6238,232 @@ static const struct bpf_func_proto bpf_sk_assign_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
+static const u8 *bpf_search_tcp_opt(const u8 *op, const u8 *opend,
+				    u8 search_kind, const u8 *magic,
+				    u8 magic_len, bool *eol)
+{
+	u8 kind, kind_len;
+
+	*eol = false;
+
+	while (op < opend) {
+		kind = op[0];
+
+		if (kind == TCPOPT_EOL) {
+			*eol = true;
+			return ERR_PTR(-ENOMSG);
+		} else if (kind == TCPOPT_NOP) {
+			op++;
+			continue;
+		}
+
+		if (opend - op < 2 || opend - op < op[1] || op[1] < 2)
+			/* Something is wrong in the received header.
+			 * Follow the TCP stack's tcp_parse_options()
+			 * and just bail here.
+			 */
+			return ERR_PTR(-EFAULT);
+
+		kind_len = op[1];
+		if (search_kind == kind) {
+			if (!magic_len)
+				return op;
+
+			if (magic_len > kind_len - 2)
+				return ERR_PTR(-ENOMSG);
+
+			if (!memcmp(&op[2], magic, magic_len))
+				return op;
+		}
+
+		op += kind_len;
+	}
+
+	return ERR_PTR(-ENOMSG);
+}
+
+BPF_CALL_4(bpf_sock_ops_load_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
+	   void *, search_res, u32, len, u64, flags)
+{
+	bool eol, load_syn = flags & BPF_LOAD_HDR_OPT_TCP_SYN;
+	const u8 *op, *opend, *magic, *search = search_res;
+	u8 search_kind, search_len, copy_len, magic_len;
+	int ret;
+
+	/* 2 byte is the minimal option len except TCPOPT_NOP and
+	 * TCPOPT_EOL which are useless for the bpf prog to learn
+	 * and this helper disallow loading them also.
+	 */
+	if (len < 2 || flags & ~BPF_LOAD_HDR_OPT_TCP_SYN)
+		return -EINVAL;
+
+	search_kind = search[0];
+	search_len = search[1];
+
+	if (search_len > len || search_kind == TCPOPT_NOP ||
+	    search_kind == TCPOPT_EOL)
+		return -EINVAL;
+
+	if (search_kind == TCPOPT_EXP || search_kind == 253) {
+		/* 16 or 32 bit magic.  +2 for kind and kind length */
+		if (search_len != 4 && search_len != 6)
+			return -EINVAL;
+		magic = &search[2];
+		magic_len = search_len - 2;
+	} else {
+		if (search_len)
+			return -EINVAL;
+		magic = NULL;
+		magic_len = 0;
+	}
+
+	if (load_syn) {
+		ret = bpf_sock_ops_get_syn(bpf_sock, TCP_BPF_SYN, &op);
+		if (ret < 0)
+			return ret;
+
+		opend = op + ret;
+		op += sizeof(struct tcphdr);
+	} else {
+		if (!bpf_sock->skb ||
+		    bpf_sock->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB)
+			/* This bpf_sock->op cannot call this helper */
+			return -EPERM;
+
+		opend = bpf_sock->skb_data_end;
+		op = bpf_sock->skb->data + sizeof(struct tcphdr);
+	}
+
+	op = bpf_search_tcp_opt(op, opend, search_kind, magic, magic_len,
+				&eol);
+	if (IS_ERR(op))
+		return PTR_ERR(op);
+
+	copy_len = op[1];
+	ret = copy_len;
+	if (copy_len > len) {
+		ret = -ENOSPC;
+		copy_len = len;
+	}
+
+	memcpy(search_res, op, copy_len);
+	return ret;
+}
+
+static const struct bpf_func_proto bpf_sock_ops_load_hdr_opt_proto = {
+	.func		= bpf_sock_ops_load_hdr_opt,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+	.arg4_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_sock_ops_store_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
+	   const void *, from, u32, len, u64, flags)
+{
+	u8 new_kind, new_kind_len, magic_len = 0, *opend;
+	const u8 *op, *new_op, *magic = NULL;
+	struct sk_buff *skb;
+	bool eol;
+
+	if (bpf_sock->op != BPF_SOCK_OPS_WRITE_HDR_OPT_CB)
+		return -EPERM;
+
+	if (len < 2 || flags)
+		return -EINVAL;
+
+	new_op = from;
+	new_kind = new_op[0];
+	new_kind_len = new_op[1];
+
+	if (new_kind_len > len || new_kind == TCPOPT_NOP ||
+	    new_kind == TCPOPT_EOL)
+		return -EINVAL;
+
+	if (new_kind_len > bpf_sock->remaining_opt_len)
+		return -ENOSPC;
+
+	/* 253 is another experimental kind */
+	if (new_kind == TCPOPT_EXP || new_kind == 253)  {
+		if (new_kind_len < 4)
+			return -EINVAL;
+		/* Match for the 2 byte magic also.
+		 * RFC 6994: the magic could be 2 or 4 bytes.
+		 * Hence, matching by 2 byte only is on the
+		 * conservative side but it is the right
+		 * thing to do for the 'search-for-duplication'
+		 * purpose.
+		 */
+		magic = &new_op[2];
+		magic_len = 2;
+	}
+
+	/* Check for duplication */
+	skb = bpf_sock->skb;
+	op = skb->data + sizeof(struct tcphdr);
+	opend = bpf_sock->skb_data_end;
+
+	op = bpf_search_tcp_opt(op, opend, new_kind, magic, magic_len,
+				&eol);
+	if (!IS_ERR(op))
+		return -EEXIST;
+
+	if (PTR_ERR(op) != -ENOMSG)
+		return PTR_ERR(op);
+
+	if (eol)
+		/* The option has been ended.  Treat it as no more
+		 * header option can be written.
+		 */
+		return -ENOSPC;
+
+	/* No duplication found.  Store the header option. */
+	memcpy(opend, from, new_kind_len);
+
+	bpf_sock->remaining_opt_len -= new_kind_len;
+	bpf_sock->skb_data_end += new_kind_len;
+
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_sock_ops_store_hdr_opt_proto = {
+	.func		= bpf_sock_ops_store_hdr_opt,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+	.arg4_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_3(bpf_sock_ops_reserve_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
+	   u32, len, u64, flags)
+{
+	if (bpf_sock->op != BPF_SOCK_OPS_HDR_OPT_LEN_CB)
+		return -EPERM;
+
+	if (flags || len < 2)
+		return -EINVAL;
+
+	if (len > bpf_sock->remaining_opt_len)
+		return -ENOSPC;
+
+	bpf_sock->remaining_opt_len -= len;
+
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_sock_ops_reserve_hdr_opt_proto = {
+	.func		= bpf_sock_ops_reserve_hdr_opt,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+};
+
 #endif /* CONFIG_INET */
 
 bool bpf_helper_changes_pkt_data(void *func)
@@ -6193,6 +6492,9 @@ bool bpf_helper_changes_pkt_data(void *func)
 	    func == bpf_lwt_seg6_store_bytes ||
 	    func == bpf_lwt_seg6_adjust_srh ||
 	    func == bpf_lwt_seg6_action ||
+#endif
+#ifdef CONFIG_INET
+	    func == bpf_sock_ops_store_hdr_opt ||
 #endif
 	    func == bpf_lwt_in_push_encap ||
 	    func == bpf_lwt_xmit_push_encap)
@@ -6565,6 +6867,12 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_sk_storage_delete:
 		return &bpf_sk_storage_delete_proto;
 #ifdef CONFIG_INET
+	case BPF_FUNC_load_hdr_opt:
+		return &bpf_sock_ops_load_hdr_opt_proto;
+	case BPF_FUNC_store_hdr_opt:
+		return &bpf_sock_ops_store_hdr_opt_proto;
+	case BPF_FUNC_reserve_hdr_opt:
+		return &bpf_sock_ops_reserve_hdr_opt_proto;
 	case BPF_FUNC_tcp_sock:
 		return &bpf_tcp_sock_proto;
 #endif /* CONFIG_INET */
@@ -7364,6 +7672,20 @@ static bool sock_ops_is_valid_access(int off, int size,
 				return false;
 			info->reg_type = PTR_TO_SOCKET_OR_NULL;
 			break;
+		case offsetof(struct bpf_sock_ops, skb_data):
+			if (size != sizeof(__u64))
+				return false;
+			info->reg_type = PTR_TO_PACKET;
+			break;
+		case offsetof(struct bpf_sock_ops, skb_data_end):
+			if (size != sizeof(__u64))
+				return false;
+			info->reg_type = PTR_TO_PACKET_END;
+			break;
+		case offsetof(struct bpf_sock_ops, skb_tcp_flags):
+			bpf_ctx_record_field_size(info, size_default);
+			return bpf_ctx_narrow_access_ok(off, size,
+							size_default);
 		default:
 			if (size != size_default)
 				return false;
@@ -8701,6 +9023,49 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
 	case offsetof(struct bpf_sock_ops, sk):
 		SOCK_OPS_GET_SK();
 		break;
+	case offsetof(struct bpf_sock_ops, skb_data_end):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
+						       skb_data_end),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_sock_ops_kern,
+					       skb_data_end));
+		break;
+	case offsetof(struct bpf_sock_ops, skb_data):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
+						       skb),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_sock_ops_kern,
+					       skb));
+		*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
+				      si->dst_reg, si->dst_reg,
+				      offsetof(struct sk_buff, data));
+		break;
+	case offsetof(struct bpf_sock_ops, skb_len):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
+						       skb),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_sock_ops_kern,
+					       skb));
+		*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),
+				      si->dst_reg, si->dst_reg,
+				      offsetof(struct sk_buff, len));
+		break;
+	case offsetof(struct bpf_sock_ops, skb_tcp_flags):
+		off = offsetof(struct sk_buff, cb);
+		off += offsetof(struct tcp_skb_cb, tcp_flags);
+		*target_size = sizeof_field(struct tcp_skb_cb, tcp_flags);
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
+						       skb),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_sock_ops_kern,
+					       skb));
+		*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_skb_cb,
+						       tcp_flags),
+				      si->dst_reg, si->dst_reg, off);
+		break;
 	}
 	return insn - insn_buf;
 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8c9da4b65dae..319cc7fd5117 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -146,6 +146,7 @@ static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
 				       BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG);
 	bool parse_all_opt = BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk),
 						    BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG);
+	struct bpf_sock_ops_kern sock_ops;
 
 	if (likely(!unknown_opt && !parse_all_opt))
 		return;
@@ -161,12 +162,15 @@ static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
 		return;
 	}
 
-	/* BPF prog will have access to the sk and skb.
-	 *
-	 * The bpf running context preparation and the actual bpf prog
-	 * calling will be implemented in a later PATCH together with
-	 * other bpf pieces.
-	 */
+	sock_owned_by_me(sk);
+
+	memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
+	sock_ops.op = BPF_SOCK_OPS_PARSE_HDR_OPT_CB;
+	sock_ops.is_fullsock = 1;
+	sock_ops.sk = sk;
+	bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb));
+
+	BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
 }
 
 static void bpf_skops_established(struct sock *sk, int bpf_op,
@@ -180,7 +184,9 @@ static void bpf_skops_established(struct sock *sk, int bpf_op,
 	sock_ops.op = bpf_op;
 	sock_ops.is_fullsock = 1;
 	sock_ops.sk = sk;
-	/* skb will be passed to the bpf prog in a later patch. */
+	/* sk with TCP_REPAIR_ON does not have skb in tcp_finish_connect */
+	if (skb)
+		bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb));
 
 	BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
 }
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 495dda2449fe..56c306e3cd2f 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -548,6 +548,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 	newtp->fastopen_req = NULL;
 	RCU_INIT_POINTER(newtp->fastopen_rsk, NULL);
 
+	bpf_skops_init_child(sk, newsk);
 	tcp_bpf_clone(sk, newsk);
 
 	__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 673db6879e46..ab79d36ed07f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -454,6 +454,18 @@ static void mptcp_options_write(__be32 *ptr, struct tcp_out_options *opts)
 }
 
 #ifdef CONFIG_CGROUP_BPF
+static int bpf_skops_write_hdr_opt_arg0(struct sk_buff *skb,
+					enum tcp_synack_type synack_type)
+{
+	if (unlikely(!skb))
+		return BPF_WRITE_HDR_TCP_CURRENT_MSS;
+
+	if (unlikely(synack_type == TCP_SYNACK_COOKIE))
+		return BPF_WRITE_HDR_TCP_SYNACK_COOKIE;
+
+	return 0;
+}
+
 /* req, syn_skb and synack_type are used when writing synack */
 static void bpf_skops_hdr_opt_len(struct sock *sk, struct sk_buff *skb,
 				  struct request_sock *req,
@@ -462,15 +474,60 @@ static void bpf_skops_hdr_opt_len(struct sock *sk, struct sk_buff *skb,
 				  struct tcp_out_options *opts,
 				  unsigned int *remaining)
 {
+	struct bpf_sock_ops_kern sock_ops;
+	int err;
+
 	if (likely(!BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk),
 					   BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG)) ||
 	    !*remaining)
 		return;
 
-	/* The bpf running context preparation and the actual bpf prog
-	 * calling will be implemented in a later PATCH together with
-	 * other bpf pieces.
-	 */
+	/* *remaining has already been aligned to 4 bytes, so *remaining >= 4 */
+
+	/* init sock_ops */
+	memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
+
+	sock_ops.op = BPF_SOCK_OPS_HDR_OPT_LEN_CB;
+
+	if (req) {
+		/* The listen "sk" cannot be passed here because
+		 * it is not locked.  It would not make too much
+		 * sense to do bpf_setsockopt(listen_sk) based
+		 * on individual connection request also.
+		 *
+		 * Thus, "req" is passed here and the cgroup-bpf-progs
+		 * of the listen "sk" will be run.
+		 *
+		 * "req" is also used here for fastopen even the "sk" here is
+		 * a fullsock "child" sk.  It is to keep the behavior
+		 * consistent between fastopen and non-fastopen on
+		 * the bpf programming side.
+		 */
+		sock_ops.sk = (struct sock *)req;
+		sock_ops.syn_skb = syn_skb;
+	} else {
+		sock_owned_by_me(sk);
+
+		sock_ops.is_fullsock = 1;
+		sock_ops.sk = sk;
+	}
+
+	sock_ops.args[0] = bpf_skops_write_hdr_opt_arg0(skb, synack_type);
+	sock_ops.remaining_opt_len = *remaining;
+	/* tcp_current_mss() does not pass a skb */
+	if (skb)
+		bpf_skops_init_skb(&sock_ops, skb, 0);
+
+	err = BPF_CGROUP_RUN_PROG_SOCK_OPS_SK(&sock_ops, sk);
+
+	if (err || sock_ops.remaining_opt_len == *remaining)
+		return;
+
+	opts->bpf_opt_len = *remaining - sock_ops.remaining_opt_len;
+	/* round up to 4 bytes */
+	opts->bpf_opt_len = (opts->bpf_opt_len + 3) & ~3;
+
+	*remaining -= opts->bpf_opt_len;
 }
 
 static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb,
@@ -479,13 +536,42 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb,
 				    enum tcp_synack_type synack_type,
 				    struct tcp_out_options *opts)
 {
-	if (likely(!opts->bpf_opt_len))
+	u8 first_opt_off, nr_written, max_opt_len = opts->bpf_opt_len;
+	struct bpf_sock_ops_kern sock_ops;
+	int err;
+
+	if (likely(!max_opt_len))
 		return;
 
-	/* The bpf running context preparation and the actual bpf prog
-	 * calling will be implemented in a later PATCH together with
-	 * other bpf pieces.
-	 */
+	memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
+
+	sock_ops.op = BPF_SOCK_OPS_WRITE_HDR_OPT_CB;
+
+	if (req) {
+		sock_ops.sk = (struct sock *)req;
+		sock_ops.syn_skb = syn_skb;
+	} else {
+		sock_owned_by_me(sk);
+
+		sock_ops.is_fullsock = 1;
+		sock_ops.sk = sk;
+	}
+
+	sock_ops.args[0] = bpf_skops_write_hdr_opt_arg0(skb, synack_type);
+	sock_ops.remaining_opt_len = max_opt_len;
+	first_opt_off = tcp_hdrlen(skb) - max_opt_len;
+	bpf_skops_init_skb(&sock_ops, skb, first_opt_off);
+
+	err = BPF_CGROUP_RUN_PROG_SOCK_OPS_SK(&sock_ops, sk);
+
+	if (err)
+		nr_written = 0;
+	else
+		nr_written = max_opt_len - sock_ops.remaining_opt_len;
+
+	if (nr_written < max_opt_len)
+		memset(skb->data + first_opt_off + nr_written, TCPOPT_NOP,
+		       max_opt_len - nr_written);
 }
 #else
 static void bpf_skops_hdr_opt_len(struct sock *sk, struct sk_buff *skb,
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 18d0e128bc3c..f67ec5d9e57d 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3395,6 +3395,120 @@ union bpf_attr {
  *		A non-negative value equal to or less than *size* on success,
  *		or a negative error in case of failure.
  *
+ * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags)
+ *	Description
+ *		Load header option.  Support reading a particular TCP header
+ *		option for bpf program (BPF_PROG_TYPE_SOCK_OPS).
+ *
+ *		If *flags* is 0, it will search the option from the
+ *		sock_ops->skb_data.  The comment in "struct bpf_sock_ops"
+ *		has details on what skb_data contains under different
+ *		sock_ops->op.
+ *
+ *		The first byte of the *searchby_res* specifies the
+ *		kind that it wants to search.
+ *
+ *		If the searching kind is an experimental kind
+ *		(i.e. 253 or 254 according to RFC6994).  It also
+ *		needs to specify the "magic" which is either
+ *		2 bytes or 4 bytes.  It then also needs to
+ *		specify the size of the magic by using
+ *		the 2nd byte which is "kind-length" of a TCP
+ *		header option and the "kind-length" also
+ *		includes the first 2 bytes "kind" and "kind-length"
+ *		itself as a normal TCP header option also does.
+ *
+ *		For example, to search experimental kind 254 with
+ *		2 byte magic 0xeB9F, the searchby_res should be
+ *		[ 254, 4, 0xeB, 0x9F, 0, 0, .... 0 ].
+ *
+ *		To search for the standard window scale option (3),
+ *		the searchby_res should be [ 3, 0, 0, .... 0 ].
+ *		Note, kind-length must be 0 for regular option.
+ *
+ *		Searching for No-Op (0) and End-of-Option-List (1) are
+ *		not supported.
+ *
+ *		*len* must be at least 2 bytes which is the minimal size
+ *		of a header option.
+ *
+ *		Supported flags:
+ *		* **BPF_LOAD_HDR_OPT_TCP_SYN** to search from the
+ *		  saved_syn packet or the just-received syn packet.
+ *
+ *	Return
+ *		>0 when found, the header option is copied to *searchby_res*.
+ *		The return value is the total length copied.
+ *
+ *		**-EINVAL** If param is invalid
+ *
+ *		**-ENOMSG** The option is not found
+ *
+ *		**-ENOENT** No syn packet available when
+ *			    **BPF_LOAD_HDR_OPT_TCP_SYN** is used
+ *
+ *		**-ENOSPC** Not enough space.  Only *len* number of
+ *			    bytes are copied.
+ *
+ *		**-EFAULT** Cannot parse the header options in the packet
+ *
+ *		**-EPERM** This helper cannot be used under the
+ *			   current sock_ops->op.
+ *
+ * long bpf_store_hdr_opt(struct bpf_sock_ops *skops, const void *from, u32 len, u64 flags)
+ *	Description
+ *		Store header option.  The data will be copied
+ *		from buffer *from* with length *len* to the TCP header.
+ *
+ *		The buffer *from* should have the whole option that
+ *		includes the kind, kind-length, and the actual
+ *		option data.  The *len* must be at least kind-length
+ *		long.  The kind-length does not have to be 4 byte
+ *		aligned.  The kernel will take care of the padding
+ *		and setting the 4 bytes aligned value to th->doff.
+ *
+ *		This helper will check for duplicated option
+ *		by searching the same option in the outgoing skb.
+ *
+ *		This helper can only be called during
+ *		BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ *
+ *	Return
+ *		0 on success, or negative error in case of failure:
+ *
+ *		**-EINVAL** If param is invalid
+ *
+ *		**-ENOSPC** Not enough space in the header.
+ *			    Nothing has been written
+ *
+ *		**-EEXIST** The option has already existed
+ *
+ *		**-EFAULT** Cannot parse the existing header options
+ *
+ *		**-EPERM** This helper cannot be used under the
+ *			   current sock_ops->op.
+ *
+ * long bpf_reserve_hdr_opt(struct bpf_sock_ops *skops, u32 len, u64 flags)
+ *	Description
+ *		Reserve *len* bytes for the bpf header option.  The
+ *		space will be used by bpf_store_hdr_opt() later in
+ *		BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ *
+ *		If bpf_reserve_hdr_opt() is called multiple times,
+ *		the total number of bytes will be reserved.
+ *
+ *		This helper can only be called during
+ *		BPF_SOCK_OPS_HDR_OPT_LEN_CB.
+ *
+ *	Return
+ *		0 on success, or negative error in case of failure:
+ *
+ *		**-EINVAL** if param is invalid
+ *
+ *		**-ENOSPC** Not enough space in the header.
+ *
+ *		**-EPERM** This helper cannot be used under the
+ *			   current sock_ops->op.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3539,6 +3653,9 @@ union bpf_attr {
 	FN(skc_to_tcp_request_sock),	\
 	FN(skc_to_udp6_sock),		\
 	FN(get_task_stack),		\
+	FN(load_hdr_opt),		\
+	FN(store_hdr_opt),		\
+	FN(reserve_hdr_opt),
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4165,6 +4282,36 @@ struct bpf_sock_ops {
 	__u64 bytes_received;
 	__u64 bytes_acked;
 	__bpf_md_ptr(struct bpf_sock *, sk);
+	/* [skb_data, skb_data_end) covers the whole TCP header.
+	 *
+	 * BPF_SOCK_OPS_PARSE_HDR_OPT_CB: The packet received
+	 * BPF_SOCK_OPS_HDR_OPT_LEN_CB:   Not useful because the
+	 *                                header has not been written.
+	 * BPF_SOCK_OPS_WRITE_HDR_OPT_CB: The header and options have
+	 *				  been written so far.
+	 * BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:  The SYNACK that concludes
+	 *					the 3WHS.
+	 * BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: The ACK that concludes
+	 *					the 3WHS.
+	 *
+	 * bpf_load_hdr_opt() can also be used to read a particular option.
+	 */
+	__bpf_md_ptr(void *, skb_data);
+	__bpf_md_ptr(void *, skb_data_end);
+	__u32 skb_len;		/* The total length of a packet.
+				 * It includes the header, options,
+				 * and payload.
+				 */
+	__u32 skb_tcp_flags;	/* tcp_flags of the header.  It provides
+				 * an easy way to check for tcp_flags
+				 * without parsing skb_data.
+				 *
+				 * In particular, the skb_tcp_flags
+				 * will still be available in
+				 * BPF_SOCK_OPS_HDR_OPT_LEN even though
+				 * the outgoing header has not
+				 * been written yet.
+				 */
 };
 
 /* Definitions for bpf_sock_ops_cb_flags */
@@ -4173,8 +4320,48 @@ enum {
 	BPF_SOCK_OPS_RETRANS_CB_FLAG	= (1<<1),
 	BPF_SOCK_OPS_STATE_CB_FLAG	= (1<<2),
 	BPF_SOCK_OPS_RTT_CB_FLAG	= (1<<3),
-	BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG  = (1<<4),
+	/* Call bpf for all received TCP headers.  The bpf prog will be
+	 * called under sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB
+	 *
+	 * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB
+	 * for the header option related helpers that will be useful
+	 * to the bpf programs.
+	 *
+	 * It could be used at the client/active side (i.e. connect() side)
+	 * when the server told it that the server was in syncookie
+	 * mode and required the active side to resend the bpf-written
+	 * options.  The active side can keep writing the bpf-options until
+	 * it received a valid packet from the server side to confirm
+	 * the earlier packet (and options) has been received.  The later
+	 * example patch is using it like this at the active side when the
+	 * server is in syncookie mode.
+	 *
+	 * The bpf prog will usually turn this off in the common cases.
+	 */
+	BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG	= (1<<4),
+	/* Call bpf when kernel has received a header option that
+	 * the kernel cannot handle.  The bpf prog will be called under
+	 * sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB.
+	 *
+	 * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB
+	 * for the header option related helpers that will be useful
+	 * to the bpf programs.
+	 */
 	BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5),
+	/* Call bpf when the kernel is writing header options for the
+	 * outgoing packet.  The bpf prog will first be called
+	 * to reserve space in a skb under
+	 * sock_ops->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB.  Then
+	 * the bpf prog will be called to write the header option(s)
+	 * under sock_ops->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+	 *
+	 * Please refer to the comment in BPF_SOCK_OPS_HDR_OPT_LEN_CB
+	 * and BPF_SOCK_OPS_WRITE_HDR_OPT_CB for the header option
+	 * related helpers that will be useful to the bpf programs.
+	 *
+	 * The kernel gets its chance to reserve space and write
+	 * options first before the BPF program does.
+	 */
 	BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6),
 /* Mask of all currently supported cb flags */
 	BPF_SOCK_OPS_ALL_CB_FLAGS       = 0x7F,
@@ -4233,6 +4420,63 @@ enum {
 					 */
 	BPF_SOCK_OPS_RTT_CB,		/* Called on every RTT.
 					 */
+	BPF_SOCK_OPS_PARSE_HDR_OPT_CB,	/* Parse the header option.
+					 * It will be called to handle
+					 * the packets received at
+					 * an already established
+					 * connection.
+					 *
+					 * sock_ops->skb_data:
+					 * Referring to the received skb.
+					 * It covers the TCP header only.
+					 *
+					 * bpf_load_hdr_opt() can also
+					 * be used to search for a
+					 * particular option.
+					 */
+	BPF_SOCK_OPS_HDR_OPT_LEN_CB,	/* Reserve space for writing the
+					 * header option later in
+					 * BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+					 * Arg1: bool want_cookie. (in
+					 *       writing SYNACK only)
+					 *
+					 * sock_ops->skb_data:
+					 * Not available because no header has
+					 * been	written yet.
+					 *
+					 * sock_ops->skb_tcp_flags:
+					 * The tcp_flags of the
+					 * outgoing skb. (e.g. SYN, ACK, FIN).
+					 *
+					 * bpf_reserve_hdr_opt() should
+					 * be used to reserve space.
+					 */
+	BPF_SOCK_OPS_WRITE_HDR_OPT_CB,	/* Write the header options
+					 * Arg1: bool want_cookie. (in
+					 *       writing SYNACK only)
+					 *
+					 * sock_ops->skb_data:
+					 * Referring to the outgoing skb.
+					 * It covers the TCP header
+					 * that has already been written
+					 * by the kernel and the
+					 * earlier bpf-progs.
+					 *
+					 * sock_ops->skb_tcp_flags:
+					 * The tcp_flags of the outgoing
+					 * skb. (e.g. SYN, ACK, FIN).
+					 *
+					 * bpf_store_hdr_opt() should
+					 * be used to write the
+					 * option.
+					 *
+					 * bpf_load_hdr_opt() can also
+					 * be used to search for a
+					 * particular option that
+					 * has already been written
+					 * by the kernel or the
+					 * earlier bpf-progs.
+					 */
 };
 
 /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
@@ -4262,6 +4506,60 @@ enum {
 	TCP_BPF_SNDCWND_CLAMP	= 1002,	/* Set sndcwnd_clamp */
 	TCP_BPF_DELACK_MAX	= 1003, /* Max delay ack in usecs */
 	TCP_BPF_RTO_MIN		= 1004, /* Min delay ack in usecs */
+	/* Copy the SYN pkt to optval
+	 *
+	 * BPF_PROG_TYPE_SOCK_OPS only.  It is similar to the
+	 * bpf_getsockopt(TCP_SAVED_SYN) but it does not limit
+	 * to only getting from the saved_syn.  It can either get the
+	 * syn packet from:
+	 *
+	 * 1. the just-received SYN packet (only available when writing the
+	 *    SYNACK).  It will be useful when it is not necessary to
+	 *    save the SYN packet for latter use.  It is also the only way
+	 *    to get the SYN during syncookie mode because the syn
+	 *    packet cannot be saved during syncookie.
+	 *
+	 * OR
+	 *
+	 * 2. the earlier saved syn which was done by
+	 *    bpf_setsockopt(TCP_SAVE_SYN).
+	 *
+	 * The bpf_getsockopt(TCP_BPF_SYN*) option will hide where the
+	 * SYN packet is obtained.
+	 *
+	 * If the bpf-prog does not need the IP[46] header,  the
+	 * bpf-prog can avoid parsing the IP header by using
+	 * TCP_BPF_SYN.  Otherwise, the bpf-prog can get both
+	 * IP[46] and TCP header by using TCP_BPF_SYN_IP.
+	 *
+	 *      >0: Total number of bytes copied
+	 * -ENOSPC: Not enough space in optval. Only optlen number of
+	 *          bytes is copied.
+	 * -ENOENT: The SYN skb is not available now and the earlier SYN pkt
+	 *	    is not saved by setsockopt(TCP_SAVE_SYN).
+	 */
+	TCP_BPF_SYN		= 1005, /* Copy the TCP header */
+	TCP_BPF_SYN_IP		= 1006, /* Copy the IP[46] and TCP header */
+};
+
+enum {
+	BPF_LOAD_HDR_OPT_TCP_SYN = (1ULL << 0),
+};
+
+/* args[0] value during BPF_SOCK_OPS_HDR_OPT_LEN_CB and
+ * BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ */
+enum {
+	BPF_WRITE_HDR_TCP_CURRENT_MSS = 1,	/* Kernel is finding the
+						 * total option spaces
+						 * required for an established
+						 * sk in order to calculate the
+						 * MSS.  No skb is actually
+						 * sent.
+						 */
+	BPF_WRITE_HDR_TCP_SYNACK_COOKIE = 2,	/* Kernel is in syncookie mode
+						 * when sending a SYN.
+						 */
 };
 
 struct bpf_perf_event_value {
-- 
cgit v1.2.3


From 267cf9fa43d1c9d525d5d818a8651f2900e3aa9e Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 20 Aug 2020 12:01:23 -0700
Subject: tcp: bpf: Optionally store mac header in TCP_SAVE_SYN

This patch is adapted from Eric's patch in an earlier discussion [1].

The TCP_SAVE_SYN currently only stores the network header and
tcp header.  This patch allows it to optionally store
the mac header also if the setsockopt's optval is 2.

It requires one more bit for the "save_syn" bit field in tcp_sock.
This patch achieves this by moving the syn_smc bit next to the is_mptcp.
The syn_smc is currently used with the TCP experimental option.  Since
syn_smc is only used when CONFIG_SMC is enabled, this patch also puts
the "IS_ENABLED(CONFIG_SMC)" around it like the is_mptcp did
with "IS_ENABLED(CONFIG_MPTCP)".

The mac_hdrlen is also stored in the "struct saved_syn"
to allow a quick offset from the bpf prog if it chooses to start
getting from the network header or the tcp header.

[1]: https://lore.kernel.org/netdev/CANn89iLJNWh6bkH7DNhy_kmcAexuUCccqERqe7z2QsvPhGrYPQ@mail.gmail.com/

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/bpf/20200820190123.2886935-1-kafai@fb.com
---
 include/linux/tcp.h            | 13 ++++++++-----
 include/net/request_sock.h     |  1 +
 include/uapi/linux/bpf.h       |  1 +
 net/core/filter.c              | 27 ++++++++++++++++++++++-----
 net/ipv4/tcp.c                 |  3 ++-
 net/ipv4/tcp_input.c           | 14 +++++++++++++-
 tools/include/uapi/linux/bpf.h |  1 +
 7 files changed, 48 insertions(+), 12 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 29d166263ae7..56ff2952edaf 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -239,14 +239,13 @@ struct tcp_sock {
 		repair      : 1,
 		frto        : 1;/* F-RTO (RFC5682) activated in CA_Loss */
 	u8	repair_queue;
-	u8	syn_data:1,	/* SYN includes data */
+	u8	save_syn:2,	/* Save headers of SYN packet */
+		syn_data:1,	/* SYN includes data */
 		syn_fastopen:1,	/* SYN includes Fast Open option */
 		syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
 		syn_fastopen_ch:1, /* Active TFO re-enabling probe */
 		syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
-		save_syn:1,	/* Save headers of SYN packet */
-		is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
-		syn_smc:1;	/* SYN includes SMC */
+		is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */
 	u32	tlp_high_seq;	/* snd_nxt at the time of TLP */
 
 	u32	tcp_tx_delay;	/* delay (in usec) added to TX packets */
@@ -393,6 +392,9 @@ struct tcp_sock {
 #if IS_ENABLED(CONFIG_MPTCP)
 	bool	is_mptcp;
 #endif
+#if IS_ENABLED(CONFIG_SMC)
+	bool	syn_smc;	/* SYN includes SMC */
+#endif
 
 #ifdef CONFIG_TCP_MD5SIG
 /* TCP AF-Specific parts; only used by MD5 Signature support so far */
@@ -488,7 +490,8 @@ static inline void tcp_saved_syn_free(struct tcp_sock *tp)
 
 static inline u32 tcp_saved_syn_len(const struct saved_syn *saved_syn)
 {
-	return saved_syn->network_hdrlen + saved_syn->tcp_hdrlen;
+	return saved_syn->mac_hdrlen + saved_syn->network_hdrlen +
+		saved_syn->tcp_hdrlen;
 }
 
 struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 7d9ed99a77bd..29e41ff3ec93 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -42,6 +42,7 @@ struct request_sock_ops {
 int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req);
 
 struct saved_syn {
+	u32 mac_hdrlen;
 	u32 network_hdrlen;
 	u32 tcp_hdrlen;
 	u8 data[];
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f67ec5d9e57d..544b89a64918 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4540,6 +4540,7 @@ enum {
 	 */
 	TCP_BPF_SYN		= 1005, /* Copy the TCP header */
 	TCP_BPF_SYN_IP		= 1006, /* Copy the IP[46] and TCP header */
+	TCP_BPF_SYN_MAC         = 1007, /* Copy the MAC, IP[46], and TCP header */
 };
 
 enum {
diff --git a/net/core/filter.c b/net/core/filter.c
index ab5603d5b62a..47eef9a0be6a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4682,11 +4682,16 @@ static int bpf_sock_ops_get_syn(struct bpf_sock_ops_kern *bpf_sock,
 		if (optname == TCP_BPF_SYN) {
 			hdr_start = syn_skb->data;
 			ret = tcp_hdrlen(syn_skb);
-		} else {
-			/* optname == TCP_BPF_SYN_IP */
+		} else if (optname == TCP_BPF_SYN_IP) {
 			hdr_start = skb_network_header(syn_skb);
 			ret = skb_network_header_len(syn_skb) +
 				tcp_hdrlen(syn_skb);
+		} else {
+			/* optname == TCP_BPF_SYN_MAC */
+			hdr_start = skb_mac_header(syn_skb);
+			ret = skb_mac_header_len(syn_skb) +
+				skb_network_header_len(syn_skb) +
+				tcp_hdrlen(syn_skb);
 		}
 	} else {
 		struct sock *sk = bpf_sock->sk;
@@ -4706,12 +4711,24 @@ static int bpf_sock_ops_get_syn(struct bpf_sock_ops_kern *bpf_sock,
 
 		if (optname == TCP_BPF_SYN) {
 			hdr_start = saved_syn->data +
+				saved_syn->mac_hdrlen +
 				saved_syn->network_hdrlen;
 			ret = saved_syn->tcp_hdrlen;
+		} else if (optname == TCP_BPF_SYN_IP) {
+			hdr_start = saved_syn->data +
+				saved_syn->mac_hdrlen;
+			ret = saved_syn->network_hdrlen +
+				saved_syn->tcp_hdrlen;
 		} else {
-			/* optname == TCP_BPF_SYN_IP */
+			/* optname == TCP_BPF_SYN_MAC */
+
+			/* TCP_SAVE_SYN may not have saved the mac hdr */
+			if (!saved_syn->mac_hdrlen)
+				return -ENOENT;
+
 			hdr_start = saved_syn->data;
-			ret = saved_syn->network_hdrlen +
+			ret = saved_syn->mac_hdrlen +
+				saved_syn->network_hdrlen +
 				saved_syn->tcp_hdrlen;
 		}
 	}
@@ -4724,7 +4741,7 @@ BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
 	   int, level, int, optname, char *, optval, int, optlen)
 {
 	if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP &&
-	    optname >= TCP_BPF_SYN && optname <= TCP_BPF_SYN_IP) {
+	    optname >= TCP_BPF_SYN && optname <= TCP_BPF_SYN_MAC) {
 		int ret, copy_len = 0;
 		const u8 *start;
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6075cb091a20..57a568875539 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3211,7 +3211,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
 		break;
 
 	case TCP_SAVE_SYN:
-		if (val < 0 || val > 1)
+		/* 0: disable, 1: enable, 2: start from ether_header */
+		if (val < 0 || val > 2)
 			err = -EINVAL;
 		else
 			tp->save_syn = val;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 319cc7fd5117..4337841faeff 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6676,13 +6676,25 @@ static void tcp_reqsk_record_syn(const struct sock *sk,
 	if (tcp_sk(sk)->save_syn) {
 		u32 len = skb_network_header_len(skb) + tcp_hdrlen(skb);
 		struct saved_syn *saved_syn;
+		u32 mac_hdrlen;
+		void *base;
+
+		if (tcp_sk(sk)->save_syn == 2) {  /* Save full header. */
+			base = skb_mac_header(skb);
+			mac_hdrlen = skb_mac_header_len(skb);
+			len += mac_hdrlen;
+		} else {
+			base = skb_network_header(skb);
+			mac_hdrlen = 0;
+		}
 
 		saved_syn = kmalloc(struct_size(saved_syn, data, len),
 				    GFP_ATOMIC);
 		if (saved_syn) {
+			saved_syn->mac_hdrlen = mac_hdrlen;
 			saved_syn->network_hdrlen = skb_network_header_len(skb);
 			saved_syn->tcp_hdrlen = tcp_hdrlen(skb);
-			memcpy(saved_syn->data, skb_network_header(skb), len);
+			memcpy(saved_syn->data, base, len);
 			req->saved_syn = saved_syn;
 		}
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f67ec5d9e57d..544b89a64918 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4540,6 +4540,7 @@ enum {
 	 */
 	TCP_BPF_SYN		= 1005, /* Copy the TCP header */
 	TCP_BPF_SYN_IP		= 1006, /* Copy the IP[46] and TCP header */
+	TCP_BPF_SYN_MAC         = 1007, /* Copy the MAC, IP[46], and TCP header */
 };
 
 enum {
-- 
cgit v1.2.3


From f836a56e84ffc9f1a1cd73f77e10404ca46a4616 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Tue, 25 Aug 2020 20:29:15 +0200
Subject: bpf: Generalize bpf_sk_storage

Refactor the functionality in bpf_sk_storage.c so that concept of
storage linked to kernel objects can be extended to other objects like
inode, task_struct etc.

Each new local storage will still be a separate map and provide its own
set of helpers. This allows for future object specific extensions and
still share a lot of the underlying implementation.

This includes the changes suggested by Martin in:

  https://lore.kernel.org/bpf/20200725013047.4006241-1-kafai@fb.com/

adding new map operations to support bpf_local_storage maps:

* storages for different kernel objects to optionally have different
  memory charging strategy (map_local_storage_charge,
  map_local_storage_uncharge)
* Functionality to extract the storage pointer from a pointer to the
  owning object (map_owner_storage_ptr)

Co-developed-by: Martin KaFai Lau <kafai@fb.com>

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200825182919.1118197-4-kpsingh@chromium.org
---
 include/linux/bpf.h            |   8 ++
 include/net/bpf_sk_storage.h   |  52 +++++++++
 include/uapi/linux/bpf.h       |   8 +-
 net/core/bpf_sk_storage.c      | 238 +++++++++++++++++++++++++++--------------
 tools/include/uapi/linux/bpf.h |   8 +-
 5 files changed, 228 insertions(+), 86 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 81f38e2fda78..8c443b93ac11 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -34,6 +34,8 @@ struct btf_type;
 struct exception_table_entry;
 struct seq_operations;
 struct bpf_iter_aux_info;
+struct bpf_local_storage;
+struct bpf_local_storage_map;
 
 extern struct idr btf_idr;
 extern spinlock_t btf_idr_lock;
@@ -104,6 +106,12 @@ struct bpf_map_ops {
 	__poll_t (*map_poll)(struct bpf_map *map, struct file *filp,
 			     struct poll_table_struct *pts);
 
+	/* Functions called by bpf_local_storage maps */
+	int (*map_local_storage_charge)(struct bpf_local_storage_map *smap,
+					void *owner, u32 size);
+	void (*map_local_storage_uncharge)(struct bpf_local_storage_map *smap,
+					   void *owner, u32 size);
+	struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner);
 	/* BTF name and id of struct allocated by map_alloc */
 	const char * const map_btf_name;
 	int *map_btf_id;
diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h
index 950c5aaba15e..9e631b5466e3 100644
--- a/include/net/bpf_sk_storage.h
+++ b/include/net/bpf_sk_storage.h
@@ -3,8 +3,15 @@
 #ifndef _BPF_SK_STORAGE_H
 #define _BPF_SK_STORAGE_H
 
+#include <linux/rculist.h>
+#include <linux/list.h>
+#include <linux/hash.h>
 #include <linux/types.h>
 #include <linux/spinlock.h>
+#include <linux/bpf.h>
+#include <net/sock.h>
+#include <uapi/linux/sock_diag.h>
+#include <uapi/linux/btf.h>
 
 struct sock;
 
@@ -13,6 +20,7 @@ void bpf_sk_storage_free(struct sock *sk);
 extern const struct bpf_func_proto bpf_sk_storage_get_proto;
 extern const struct bpf_func_proto bpf_sk_storage_delete_proto;
 
+struct bpf_local_storage_elem;
 struct bpf_sk_storage_diag;
 struct sk_buff;
 struct nlattr;
@@ -34,6 +42,50 @@ u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache);
 void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
 				      u16 idx);
 
+/* Helper functions for bpf_local_storage */
+int bpf_local_storage_map_alloc_check(union bpf_attr *attr);
+
+struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr);
+
+struct bpf_local_storage_data *
+bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
+			 struct bpf_local_storage_map *smap,
+			 bool cacheit_lockit);
+
+void bpf_local_storage_map_free(struct bpf_local_storage_map *smap);
+
+int bpf_local_storage_map_check_btf(const struct bpf_map *map,
+				    const struct btf *btf,
+				    const struct btf_type *key_type,
+				    const struct btf_type *value_type);
+
+void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
+				   struct bpf_local_storage_elem *selem);
+
+bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
+				     struct bpf_local_storage_elem *selem,
+				     bool uncharge_omem);
+
+void bpf_selem_unlink(struct bpf_local_storage_elem *selem);
+
+void bpf_selem_link_map(struct bpf_local_storage_map *smap,
+			struct bpf_local_storage_elem *selem);
+
+void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem);
+
+struct bpf_local_storage_elem *
+bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value,
+		bool charge_mem);
+
+int
+bpf_local_storage_alloc(void *owner,
+			struct bpf_local_storage_map *smap,
+			struct bpf_local_storage_elem *first_selem);
+
+struct bpf_local_storage_data *
+bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
+			 void *value, u64 map_flags);
+
 #ifdef CONFIG_BPF_SYSCALL
 int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk);
 struct bpf_sk_storage_diag *
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 544b89a64918..2cbd137eed86 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3765,9 +3765,13 @@ enum {
 	BPF_F_SYSCTL_BASE_NAME		= (1ULL << 0),
 };
 
-/* BPF_FUNC_sk_storage_get flags */
+/* BPF_FUNC_<kernel_obj>_storage_get flags */
 enum {
-	BPF_SK_STORAGE_GET_F_CREATE	= (1ULL << 0),
+	BPF_LOCAL_STORAGE_GET_F_CREATE	= (1ULL << 0),
+	/* BPF_SK_STORAGE_GET_F_CREATE is only kept for backward compatibility
+	 * and BPF_LOCAL_STORAGE_GET_F_CREATE must be used instead.
+	 */
+	BPF_SK_STORAGE_GET_F_CREATE  = BPF_LOCAL_STORAGE_GET_F_CREATE,
 };
 
 /* BPF_FUNC_read_branch_records flags. */
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index ec61ee7c7ee4..cd8b7017913b 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -84,7 +84,7 @@ struct bpf_local_storage_elem {
 struct bpf_local_storage {
 	struct bpf_local_storage_data __rcu *cache[BPF_LOCAL_STORAGE_CACHE_SIZE];
 	struct hlist_head list; /* List of bpf_local_storage_elem */
-	struct sock *owner;	/* The object that owns the above "list" of
+	void *owner;		/* The object that owns the above "list" of
 				 * bpf_local_storage_elem.
 				 */
 	struct rcu_head rcu;
@@ -110,6 +110,33 @@ static int omem_charge(struct sock *sk, unsigned int size)
 	return -ENOMEM;
 }
 
+static int mem_charge(struct bpf_local_storage_map *smap, void *owner, u32 size)
+{
+	struct bpf_map *map = &smap->map;
+
+	if (!map->ops->map_local_storage_charge)
+		return 0;
+
+	return map->ops->map_local_storage_charge(smap, owner, size);
+}
+
+static void mem_uncharge(struct bpf_local_storage_map *smap, void *owner,
+			 u32 size)
+{
+	struct bpf_map *map = &smap->map;
+
+	if (map->ops->map_local_storage_uncharge)
+		map->ops->map_local_storage_uncharge(smap, owner, size);
+}
+
+static struct bpf_local_storage __rcu **
+owner_storage(struct bpf_local_storage_map *smap, void *owner)
+{
+	struct bpf_map *map = &smap->map;
+
+	return map->ops->map_owner_storage_ptr(owner);
+}
+
 static bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem)
 {
 	return !hlist_unhashed(&selem->snode);
@@ -120,13 +147,13 @@ static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
 	return !hlist_unhashed(&selem->map_node);
 }
 
-static struct bpf_local_storage_elem *
-bpf_selem_alloc(struct bpf_local_storage_map *smap, struct sock *sk,
-		void *value, bool charge_omem)
+struct bpf_local_storage_elem *
+bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
+		void *value, bool charge_mem)
 {
 	struct bpf_local_storage_elem *selem;
 
-	if (charge_omem && omem_charge(sk, smap->elem_size))
+	if (charge_mem && mem_charge(smap, owner, smap->elem_size))
 		return NULL;
 
 	selem = kzalloc(smap->elem_size, GFP_ATOMIC | __GFP_NOWARN);
@@ -136,8 +163,8 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, struct sock *sk,
 		return selem;
 	}
 
-	if (charge_omem)
-		atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
+	if (charge_mem)
+		mem_uncharge(smap, owner, smap->elem_size);
 
 	return NULL;
 }
@@ -146,32 +173,32 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, struct sock *sk,
  * The caller must ensure selem->smap is still valid to be
  * dereferenced for its smap->elem_size and smap->cache_idx.
  */
-static bool
-bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
-				struct bpf_local_storage_elem *selem,
-				bool uncharge_omem)
+bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
+				     struct bpf_local_storage_elem *selem,
+				     bool uncharge_mem)
 {
 	struct bpf_local_storage_map *smap;
 	bool free_local_storage;
-	struct sock *sk;
+	void *owner;
 
 	smap = rcu_dereference(SDATA(selem)->smap);
-	sk = local_storage->owner;
+	owner = local_storage->owner;
 
 	/* All uncharging on the owner must be done first.
 	 * The owner may be freed once the last selem is unlinked
 	 * from local_storage.
 	 */
-	if (uncharge_omem)
-		atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
+	if (uncharge_mem)
+		mem_uncharge(smap, owner, smap->elem_size);
 
 	free_local_storage = hlist_is_singular_node(&selem->snode,
 						    &local_storage->list);
 	if (free_local_storage) {
-		atomic_sub(sizeof(struct bpf_local_storage), &sk->sk_omem_alloc);
+		mem_uncharge(smap, owner, sizeof(struct bpf_local_storage));
 		local_storage->owner = NULL;
-		/* After this RCU_INIT, sk may be freed and cannot be used */
-		RCU_INIT_POINTER(sk->sk_bpf_storage, NULL);
+
+		/* After this RCU_INIT, owner may be freed and cannot be used */
+		RCU_INIT_POINTER(*owner_storage(smap, owner), NULL);
 
 		/* local_storage is not freed now.  local_storage->lock is
 		 * still held and raw_spin_unlock_bh(&local_storage->lock)
@@ -209,23 +236,22 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
 	local_storage = rcu_dereference(selem->local_storage);
 	raw_spin_lock_bh(&local_storage->lock);
 	if (likely(selem_linked_to_storage(selem)))
-		free_local_storage =
-			bpf_selem_unlink_storage_nolock(local_storage, selem, true);
+		free_local_storage = bpf_selem_unlink_storage_nolock(
+			local_storage, selem, true);
 	raw_spin_unlock_bh(&local_storage->lock);
 
 	if (free_local_storage)
 		kfree_rcu(local_storage, rcu);
 }
 
-static void
-bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
-			      struct bpf_local_storage_elem *selem)
+void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
+				   struct bpf_local_storage_elem *selem)
 {
 	RCU_INIT_POINTER(selem->local_storage, local_storage);
 	hlist_add_head(&selem->snode, &local_storage->list);
 }
 
-static void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
+void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
 {
 	struct bpf_local_storage_map *smap;
 	struct bpf_local_storage_map_bucket *b;
@@ -242,8 +268,8 @@ static void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
 	raw_spin_unlock_bh(&b->lock);
 }
 
-static void bpf_selem_link_map(struct bpf_local_storage_map *smap,
-			       struct bpf_local_storage_elem *selem)
+void bpf_selem_link_map(struct bpf_local_storage_map *smap,
+			struct bpf_local_storage_elem *selem)
 {
 	struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem);
 
@@ -253,7 +279,7 @@ static void bpf_selem_link_map(struct bpf_local_storage_map *smap,
 	raw_spin_unlock_bh(&b->lock);
 }
 
-static void bpf_selem_unlink(struct bpf_local_storage_elem *selem)
+void bpf_selem_unlink(struct bpf_local_storage_elem *selem)
 {
 	/* Always unlink from map before unlinking from local_storage
 	 * because selem will be freed after successfully unlinked from
@@ -263,7 +289,7 @@ static void bpf_selem_unlink(struct bpf_local_storage_elem *selem)
 	__bpf_selem_unlink_storage(selem);
 }
 
-static struct bpf_local_storage_data *
+struct bpf_local_storage_data *
 bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
 			 struct bpf_local_storage_map *smap,
 			 bool cacheit_lockit)
@@ -329,40 +355,45 @@ static int check_flags(const struct bpf_local_storage_data *old_sdata,
 	return 0;
 }
 
-static int sk_storage_alloc(struct sock *sk,
+int bpf_local_storage_alloc(void *owner,
 			    struct bpf_local_storage_map *smap,
 			    struct bpf_local_storage_elem *first_selem)
 {
-	struct bpf_local_storage *prev_sk_storage, *sk_storage;
+	struct bpf_local_storage *prev_storage, *storage;
+	struct bpf_local_storage **owner_storage_ptr;
 	int err;
 
-	err = omem_charge(sk, sizeof(*sk_storage));
+	err = mem_charge(smap, owner, sizeof(*storage));
 	if (err)
 		return err;
 
-	sk_storage = kzalloc(sizeof(*sk_storage), GFP_ATOMIC | __GFP_NOWARN);
-	if (!sk_storage) {
+	storage = kzalloc(sizeof(*storage), GFP_ATOMIC | __GFP_NOWARN);
+	if (!storage) {
 		err = -ENOMEM;
 		goto uncharge;
 	}
-	INIT_HLIST_HEAD(&sk_storage->list);
-	raw_spin_lock_init(&sk_storage->lock);
-	sk_storage->owner = sk;
 
-	bpf_selem_link_storage_nolock(sk_storage, first_selem);
+	INIT_HLIST_HEAD(&storage->list);
+	raw_spin_lock_init(&storage->lock);
+	storage->owner = owner;
+
+	bpf_selem_link_storage_nolock(storage, first_selem);
 	bpf_selem_link_map(smap, first_selem);
-	/* Publish sk_storage to sk.  sk->sk_lock cannot be acquired.
-	 * Hence, atomic ops is used to set sk->sk_bpf_storage
-	 * from NULL to the newly allocated sk_storage ptr.
+
+	owner_storage_ptr =
+		(struct bpf_local_storage **)owner_storage(smap, owner);
+	/* Publish storage to the owner.
+	 * Instead of using any lock of the kernel object (i.e. owner),
+	 * cmpxchg will work with any kernel object regardless what
+	 * the running context is, bh, irq...etc.
 	 *
-	 * From now on, the sk->sk_bpf_storage pointer is protected
-	 * by the sk_storage->lock.  Hence,  when freeing
-	 * the sk->sk_bpf_storage, the sk_storage->lock must
-	 * be held before setting sk->sk_bpf_storage to NULL.
+	 * From now on, the owner->storage pointer (e.g. sk->sk_bpf_storage)
+	 * is protected by the storage->lock.  Hence, when freeing
+	 * the owner->storage, the storage->lock must be held before
+	 * setting owner->storage ptr to NULL.
 	 */
-	prev_sk_storage = cmpxchg((struct bpf_local_storage **)&sk->sk_bpf_storage,
-				  NULL, sk_storage);
-	if (unlikely(prev_sk_storage)) {
+	prev_storage = cmpxchg(owner_storage_ptr, NULL, storage);
+	if (unlikely(prev_storage)) {
 		bpf_selem_unlink_map(first_selem);
 		err = -EAGAIN;
 		goto uncharge;
@@ -380,8 +411,8 @@ static int sk_storage_alloc(struct sock *sk,
 	return 0;
 
 uncharge:
-	kfree(sk_storage);
-	atomic_sub(sizeof(*sk_storage), &sk->sk_omem_alloc);
+	kfree(storage);
+	mem_uncharge(smap, owner, sizeof(*storage));
 	return err;
 }
 
@@ -390,38 +421,37 @@ uncharge:
  * Otherwise, it will become a leak (and other memory issues
  * during map destruction).
  */
-static struct bpf_local_storage_data *
-bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value,
-			 u64 map_flags)
+struct bpf_local_storage_data *
+bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
+			 void *value, u64 map_flags)
 {
 	struct bpf_local_storage_data *old_sdata = NULL;
 	struct bpf_local_storage_elem *selem;
 	struct bpf_local_storage *local_storage;
-	struct bpf_local_storage_map *smap;
 	int err;
 
 	/* BPF_EXIST and BPF_NOEXIST cannot be both set */
 	if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) ||
 	    /* BPF_F_LOCK can only be used in a value with spin_lock */
-	    unlikely((map_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)))
+	    unlikely((map_flags & BPF_F_LOCK) &&
+		     !map_value_has_spin_lock(&smap->map)))
 		return ERR_PTR(-EINVAL);
 
-	smap = (struct bpf_local_storage_map *)map;
-	local_storage = rcu_dereference(sk->sk_bpf_storage);
+	local_storage = rcu_dereference(*owner_storage(smap, owner));
 	if (!local_storage || hlist_empty(&local_storage->list)) {
 		/* Very first elem for the owner */
 		err = check_flags(NULL, map_flags);
 		if (err)
 			return ERR_PTR(err);
 
-		selem = bpf_selem_alloc(smap, sk, value, true);
+		selem = bpf_selem_alloc(smap, owner, value, true);
 		if (!selem)
 			return ERR_PTR(-ENOMEM);
 
-		err = sk_storage_alloc(sk, smap, selem);
+		err = bpf_local_storage_alloc(owner, smap, selem);
 		if (err) {
 			kfree(selem);
-			atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
+			mem_uncharge(smap, owner, smap->elem_size);
 			return ERR_PTR(err);
 		}
 
@@ -439,7 +469,7 @@ bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value,
 		if (err)
 			return ERR_PTR(err);
 		if (old_sdata && selem_linked_to_storage(SELEM(old_sdata))) {
-			copy_map_value_locked(map, old_sdata->data,
+			copy_map_value_locked(&smap->map, old_sdata->data,
 					      value, false);
 			return old_sdata;
 		}
@@ -464,7 +494,8 @@ bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value,
 		goto unlock_err;
 
 	if (old_sdata && (map_flags & BPF_F_LOCK)) {
-		copy_map_value_locked(map, old_sdata->data, value, false);
+		copy_map_value_locked(&smap->map, old_sdata->data, value,
+				      false);
 		selem = SELEM(old_sdata);
 		goto unlock;
 	}
@@ -478,7 +509,7 @@ bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value,
 	 * old_sdata will not be uncharged later during
 	 * bpf_selem_unlink_storage_nolock().
 	 */
-	selem = bpf_selem_alloc(smap, sk, value, !old_sdata);
+	selem = bpf_selem_alloc(smap, owner, value, !old_sdata);
 	if (!selem) {
 		err = -ENOMEM;
 		goto unlock_err;
@@ -591,17 +622,12 @@ void bpf_sk_storage_free(struct sock *sk)
 		kfree_rcu(sk_storage, rcu);
 }
 
-static void bpf_local_storage_map_free(struct bpf_map *map)
+void bpf_local_storage_map_free(struct bpf_local_storage_map *smap)
 {
 	struct bpf_local_storage_elem *selem;
-	struct bpf_local_storage_map *smap;
 	struct bpf_local_storage_map_bucket *b;
 	unsigned int i;
 
-	smap = (struct bpf_local_storage_map *)map;
-
-	bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx);
-
 	/* Note that this map might be concurrently cloned from
 	 * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
 	 * RCU read section to finish before proceeding. New RCU
@@ -646,7 +672,16 @@ static void bpf_local_storage_map_free(struct bpf_map *map)
 	synchronize_rcu();
 
 	kvfree(smap->buckets);
-	kfree(map);
+	kfree(smap);
+}
+
+static void sk_storage_map_free(struct bpf_map *map)
+{
+	struct bpf_local_storage_map *smap;
+
+	smap = (struct bpf_local_storage_map *)map;
+	bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx);
+	bpf_local_storage_map_free(smap);
 }
 
 /* U16_MAX is much more than enough for sk local storage
@@ -658,7 +693,7 @@ static void bpf_local_storage_map_free(struct bpf_map *map)
 	       sizeof(struct bpf_local_storage_elem)),			\
 	      (U16_MAX - sizeof(struct bpf_local_storage_elem)))
 
-static int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
+int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
 {
 	if (attr->map_flags & ~BPF_LOCAL_STORAGE_CREATE_FLAG_MASK ||
 	    !(attr->map_flags & BPF_F_NO_PREALLOC) ||
@@ -677,7 +712,7 @@ static int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
 	return 0;
 }
 
-static struct bpf_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
+struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
 {
 	struct bpf_local_storage_map *smap;
 	unsigned int i;
@@ -717,8 +752,19 @@ static struct bpf_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
 
 	smap->elem_size =
 		sizeof(struct bpf_local_storage_elem) + attr->value_size;
-	smap->cache_idx = bpf_local_storage_cache_idx_get(&sk_cache);
 
+	return smap;
+}
+
+static struct bpf_map *sk_storage_map_alloc(union bpf_attr *attr)
+{
+	struct bpf_local_storage_map *smap;
+
+	smap = bpf_local_storage_map_alloc(attr);
+	if (IS_ERR(smap))
+		return ERR_CAST(smap);
+
+	smap->cache_idx = bpf_local_storage_cache_idx_get(&sk_cache);
 	return &smap->map;
 }
 
@@ -728,10 +774,10 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key,
 	return -ENOTSUPP;
 }
 
-static int bpf_local_storage_map_check_btf(const struct bpf_map *map,
-					   const struct btf *btf,
-					   const struct btf_type *key_type,
-					   const struct btf_type *value_type)
+int bpf_local_storage_map_check_btf(const struct bpf_map *map,
+				    const struct btf *btf,
+				    const struct btf_type *key_type,
+				    const struct btf_type *value_type)
 {
 	u32 int_data;
 
@@ -772,8 +818,9 @@ static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key,
 	fd = *(int *)key;
 	sock = sockfd_lookup(fd, &err);
 	if (sock) {
-		sdata = bpf_local_storage_update(sock->sk, map, value,
-						 map_flags);
+		sdata = bpf_local_storage_update(
+			sock->sk, (struct bpf_local_storage_map *)map, value,
+			map_flags);
 		sockfd_put(sock);
 		return PTR_ERR_OR_ZERO(sdata);
 	}
@@ -862,7 +909,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
 			bpf_selem_link_map(smap, copy_selem);
 			bpf_selem_link_storage_nolock(new_sk_storage, copy_selem);
 		} else {
-			ret = sk_storage_alloc(newsk, smap, copy_selem);
+			ret = bpf_local_storage_alloc(newsk, smap, copy_selem);
 			if (ret) {
 				kfree(copy_selem);
 				atomic_sub(smap->elem_size,
@@ -906,7 +953,9 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
 	     *  destruction).
 	     */
 	    refcount_inc_not_zero(&sk->sk_refcnt)) {
-		sdata = bpf_local_storage_update(sk, map, value, BPF_NOEXIST);
+		sdata = bpf_local_storage_update(
+			sk, (struct bpf_local_storage_map *)map, value,
+			BPF_NOEXIST);
 		/* sk must be a fullsock (guaranteed by verifier),
 		 * so sock_gen_put() is unnecessary.
 		 */
@@ -931,11 +980,33 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
 	return -ENOENT;
 }
 
+static int sk_storage_charge(struct bpf_local_storage_map *smap,
+			     void *owner, u32 size)
+{
+	return omem_charge(owner, size);
+}
+
+static void sk_storage_uncharge(struct bpf_local_storage_map *smap,
+				void *owner, u32 size)
+{
+	struct sock *sk = owner;
+
+	atomic_sub(size, &sk->sk_omem_alloc);
+}
+
+static struct bpf_local_storage __rcu **
+sk_storage_ptr(void *owner)
+{
+	struct sock *sk = owner;
+
+	return &sk->sk_bpf_storage;
+}
+
 static int sk_storage_map_btf_id;
 const struct bpf_map_ops sk_storage_map_ops = {
 	.map_alloc_check = bpf_local_storage_map_alloc_check,
-	.map_alloc = bpf_local_storage_map_alloc,
-	.map_free = bpf_local_storage_map_free,
+	.map_alloc = sk_storage_map_alloc,
+	.map_free = sk_storage_map_free,
 	.map_get_next_key = notsupp_get_next_key,
 	.map_lookup_elem = bpf_fd_sk_storage_lookup_elem,
 	.map_update_elem = bpf_fd_sk_storage_update_elem,
@@ -943,6 +1014,9 @@ const struct bpf_map_ops sk_storage_map_ops = {
 	.map_check_btf = bpf_local_storage_map_check_btf,
 	.map_btf_name = "bpf_local_storage_map",
 	.map_btf_id = &sk_storage_map_btf_id,
+	.map_local_storage_charge = sk_storage_charge,
+	.map_local_storage_uncharge = sk_storage_uncharge,
+	.map_owner_storage_ptr = sk_storage_ptr,
 };
 
 const struct bpf_func_proto bpf_sk_storage_get_proto = {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 544b89a64918..2cbd137eed86 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3765,9 +3765,13 @@ enum {
 	BPF_F_SYSCTL_BASE_NAME		= (1ULL << 0),
 };
 
-/* BPF_FUNC_sk_storage_get flags */
+/* BPF_FUNC_<kernel_obj>_storage_get flags */
 enum {
-	BPF_SK_STORAGE_GET_F_CREATE	= (1ULL << 0),
+	BPF_LOCAL_STORAGE_GET_F_CREATE	= (1ULL << 0),
+	/* BPF_SK_STORAGE_GET_F_CREATE is only kept for backward compatibility
+	 * and BPF_LOCAL_STORAGE_GET_F_CREATE must be used instead.
+	 */
+	BPF_SK_STORAGE_GET_F_CREATE  = BPF_LOCAL_STORAGE_GET_F_CREATE,
 };
 
 /* BPF_FUNC_read_branch_records flags. */
-- 
cgit v1.2.3


From 8ea636848aca35b9f97c5b5dee30225cf2dd0fe6 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Tue, 25 Aug 2020 20:29:17 +0200
Subject: bpf: Implement bpf_local_storage for inodes

Similar to bpf_local_storage for sockets, add local storage for inodes.
The life-cycle of storage is managed with the life-cycle of the inode.
i.e. the storage is destroyed along with the owning inode.

The BPF LSM allocates an __rcu pointer to the bpf_local_storage in the
security blob which are now stackable and can co-exist with other LSMs.

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200825182919.1118197-6-kpsingh@chromium.org
---
 include/linux/bpf_lsm.h                         |  29 +++
 include/linux/bpf_types.h                       |   3 +
 include/uapi/linux/bpf.h                        |  40 +++-
 kernel/bpf/Makefile                             |   1 +
 kernel/bpf/bpf_inode_storage.c                  | 273 ++++++++++++++++++++++++
 kernel/bpf/syscall.c                            |   3 +-
 kernel/bpf/verifier.c                           |  10 +
 security/bpf/hooks.c                            |   6 +
 tools/bpf/bpftool/Documentation/bpftool-map.rst |   2 +-
 tools/bpf/bpftool/bash-completion/bpftool       |   3 +-
 tools/bpf/bpftool/map.c                         |   3 +-
 tools/include/uapi/linux/bpf.h                  |  40 +++-
 tools/lib/bpf/libbpf_probes.c                   |   5 +-
 13 files changed, 410 insertions(+), 8 deletions(-)
 create mode 100644 kernel/bpf/bpf_inode_storage.c

(limited to 'include/uapi')

diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index af74712af585..aaacb6aafc87 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -17,9 +17,28 @@
 #include <linux/lsm_hook_defs.h>
 #undef LSM_HOOK
 
+struct bpf_storage_blob {
+	struct bpf_local_storage __rcu *storage;
+};
+
+extern struct lsm_blob_sizes bpf_lsm_blob_sizes;
+
 int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
 			const struct bpf_prog *prog);
 
+static inline struct bpf_storage_blob *bpf_inode(
+	const struct inode *inode)
+{
+	if (unlikely(!inode->i_security))
+		return NULL;
+
+	return inode->i_security + bpf_lsm_blob_sizes.lbs_inode;
+}
+
+extern const struct bpf_func_proto bpf_inode_storage_get_proto;
+extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
+void bpf_inode_storage_free(struct inode *inode);
+
 #else /* !CONFIG_BPF_LSM */
 
 static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
@@ -28,6 +47,16 @@ static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
 	return -EOPNOTSUPP;
 }
 
+static inline struct bpf_storage_blob *bpf_inode(
+	const struct inode *inode)
+{
+	return NULL;
+}
+
+static inline void bpf_inode_storage_free(struct inode *inode)
+{
+}
+
 #endif /* CONFIG_BPF_LSM */
 
 #endif /* _LINUX_BPF_LSM_H */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index a52a5688418e..2e6f568377f1 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -107,6 +107,9 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
 #endif
+#ifdef CONFIG_BPF_LSM
+BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
+#endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
 #if defined(CONFIG_XDP_SOCKETS)
 BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2cbd137eed86..b6bfcd085a76 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -155,6 +155,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_DEVMAP_HASH,
 	BPF_MAP_TYPE_STRUCT_OPS,
 	BPF_MAP_TYPE_RINGBUF,
+	BPF_MAP_TYPE_INODE_STORAGE,
 };
 
 /* Note that tracing related programs such as
@@ -3509,6 +3510,41 @@ union bpf_attr {
  *
  *		**-EPERM** This helper cannot be used under the
  *			   current sock_ops->op.
+ * void *bpf_inode_storage_get(struct bpf_map *map, void *inode, void *value, u64 flags)
+ *	Description
+ *		Get a bpf_local_storage from an *inode*.
+ *
+ *		Logically, it could be thought of as getting the value from
+ *		a *map* with *inode* as the **key**.  From this
+ *		perspective,  the usage is not much different from
+ *		**bpf_map_lookup_elem**\ (*map*, **&**\ *inode*) except this
+ *		helper enforces the key must be an inode and the map must also
+ *		be a **BPF_MAP_TYPE_INODE_STORAGE**.
+ *
+ *		Underneath, the value is stored locally at *inode* instead of
+ *		the *map*.  The *map* is used as the bpf-local-storage
+ *		"type". The bpf-local-storage "type" (i.e. the *map*) is
+ *		searched against all bpf_local_storage residing at *inode*.
+ *
+ *		An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
+ *		used such that a new bpf_local_storage will be
+ *		created if one does not exist.  *value* can be used
+ *		together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
+ *		the initial value of a bpf_local_storage.  If *value* is
+ *		**NULL**, the new bpf_local_storage will be zero initialized.
+ *	Return
+ *		A bpf_local_storage pointer is returned on success.
+ *
+ *		**NULL** if not found or there was an error in adding
+ *		a new bpf_local_storage.
+ *
+ * int bpf_inode_storage_delete(struct bpf_map *map, void *inode)
+ *	Description
+ *		Delete a bpf_local_storage from an *inode*.
+ *	Return
+ *		0 on success.
+ *
+ *		**-ENOENT** if the bpf_local_storage cannot be found.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3655,7 +3691,9 @@ union bpf_attr {
 	FN(get_task_stack),		\
 	FN(load_hdr_opt),		\
 	FN(store_hdr_opt),		\
-	FN(reserve_hdr_opt),
+	FN(reserve_hdr_opt),		\
+	FN(inode_storage_get),		\
+	FN(inode_storage_delete),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 6961ff400cba..bdc8cd1b6767 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -5,6 +5,7 @@ CFLAGS_core.o += $(call cc-disable-warning, override-init)
 obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
 obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
+obj-${CONFIG_BPF_LSM}	  += bpf_inode_storage.o
 obj-$(CONFIG_BPF_SYSCALL) += disasm.o
 obj-$(CONFIG_BPF_JIT) += trampoline.o
 obj-$(CONFIG_BPF_SYSCALL) += btf.o
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
new file mode 100644
index 000000000000..f3a44e929447
--- /dev/null
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 Facebook
+ * Copyright 2020 Google LLC.
+ */
+
+#include <linux/rculist.h>
+#include <linux/list.h>
+#include <linux/hash.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/bpf.h>
+#include <linux/bpf_local_storage.h>
+#include <net/sock.h>
+#include <uapi/linux/sock_diag.h>
+#include <uapi/linux/btf.h>
+#include <linux/bpf_lsm.h>
+#include <linux/btf_ids.h>
+#include <linux/fdtable.h>
+
+DEFINE_BPF_STORAGE_CACHE(inode_cache);
+
+static struct bpf_local_storage __rcu **
+inode_storage_ptr(void *owner)
+{
+	struct inode *inode = owner;
+	struct bpf_storage_blob *bsb;
+
+	bsb = bpf_inode(inode);
+	if (!bsb)
+		return NULL;
+	return &bsb->storage;
+}
+
+static struct bpf_local_storage_data *inode_storage_lookup(struct inode *inode,
+							   struct bpf_map *map,
+							   bool cacheit_lockit)
+{
+	struct bpf_local_storage *inode_storage;
+	struct bpf_local_storage_map *smap;
+	struct bpf_storage_blob *bsb;
+
+	bsb = bpf_inode(inode);
+	if (!bsb)
+		return NULL;
+
+	inode_storage = rcu_dereference(bsb->storage);
+	if (!inode_storage)
+		return NULL;
+
+	smap = (struct bpf_local_storage_map *)map;
+	return bpf_local_storage_lookup(inode_storage, smap, cacheit_lockit);
+}
+
+void bpf_inode_storage_free(struct inode *inode)
+{
+	struct bpf_local_storage_elem *selem;
+	struct bpf_local_storage *local_storage;
+	bool free_inode_storage = false;
+	struct bpf_storage_blob *bsb;
+	struct hlist_node *n;
+
+	bsb = bpf_inode(inode);
+	if (!bsb)
+		return;
+
+	rcu_read_lock();
+
+	local_storage = rcu_dereference(bsb->storage);
+	if (!local_storage) {
+		rcu_read_unlock();
+		return;
+	}
+
+	/* Netiher the bpf_prog nor the bpf-map's syscall
+	 * could be modifying the local_storage->list now.
+	 * Thus, no elem can be added-to or deleted-from the
+	 * local_storage->list by the bpf_prog or by the bpf-map's syscall.
+	 *
+	 * It is racing with bpf_local_storage_map_free() alone
+	 * when unlinking elem from the local_storage->list and
+	 * the map's bucket->list.
+	 */
+	raw_spin_lock_bh(&local_storage->lock);
+	hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
+		/* Always unlink from map before unlinking from
+		 * local_storage.
+		 */
+		bpf_selem_unlink_map(selem);
+		free_inode_storage = bpf_selem_unlink_storage_nolock(
+			local_storage, selem, false);
+	}
+	raw_spin_unlock_bh(&local_storage->lock);
+	rcu_read_unlock();
+
+	/* free_inoode_storage should always be true as long as
+	 * local_storage->list was non-empty.
+	 */
+	if (free_inode_storage)
+		kfree_rcu(local_storage, rcu);
+}
+
+static void *bpf_fd_inode_storage_lookup_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_local_storage_data *sdata;
+	struct file *f;
+	int fd;
+
+	fd = *(int *)key;
+	f = fget_raw(fd);
+	if (!f)
+		return NULL;
+
+	sdata = inode_storage_lookup(f->f_inode, map, true);
+	fput(f);
+	return sdata ? sdata->data : NULL;
+}
+
+static int bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key,
+					 void *value, u64 map_flags)
+{
+	struct bpf_local_storage_data *sdata;
+	struct file *f;
+	int fd;
+
+	fd = *(int *)key;
+	f = fget_raw(fd);
+	if (!f || !inode_storage_ptr(f->f_inode))
+		return -EBADF;
+
+	sdata = bpf_local_storage_update(f->f_inode,
+					 (struct bpf_local_storage_map *)map,
+					 value, map_flags);
+	fput(f);
+	return PTR_ERR_OR_ZERO(sdata);
+}
+
+static int inode_storage_delete(struct inode *inode, struct bpf_map *map)
+{
+	struct bpf_local_storage_data *sdata;
+
+	sdata = inode_storage_lookup(inode, map, false);
+	if (!sdata)
+		return -ENOENT;
+
+	bpf_selem_unlink(SELEM(sdata));
+
+	return 0;
+}
+
+static int bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key)
+{
+	struct file *f;
+	int fd, err;
+
+	fd = *(int *)key;
+	f = fget_raw(fd);
+	if (!f)
+		return -EBADF;
+
+	err = inode_storage_delete(f->f_inode, map);
+	fput(f);
+	return err;
+}
+
+BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
+	   void *, value, u64, flags)
+{
+	struct bpf_local_storage_data *sdata;
+
+	if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
+		return (unsigned long)NULL;
+
+	/* explicitly check that the inode_storage_ptr is not
+	 * NULL as inode_storage_lookup returns NULL in this case and
+	 * bpf_local_storage_update expects the owner to have a
+	 * valid storage pointer.
+	 */
+	if (!inode_storage_ptr(inode))
+		return (unsigned long)NULL;
+
+	sdata = inode_storage_lookup(inode, map, true);
+	if (sdata)
+		return (unsigned long)sdata->data;
+
+	/* This helper must only called from where the inode is gurranteed
+	 * to have a refcount and cannot be freed.
+	 */
+	if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
+		sdata = bpf_local_storage_update(
+			inode, (struct bpf_local_storage_map *)map, value,
+			BPF_NOEXIST);
+		return IS_ERR(sdata) ? (unsigned long)NULL :
+					     (unsigned long)sdata->data;
+	}
+
+	return (unsigned long)NULL;
+}
+
+BPF_CALL_2(bpf_inode_storage_delete,
+	   struct bpf_map *, map, struct inode *, inode)
+{
+	/* This helper must only called from where the inode is gurranteed
+	 * to have a refcount and cannot be freed.
+	 */
+	return inode_storage_delete(inode, map);
+}
+
+static int notsupp_get_next_key(struct bpf_map *map, void *key,
+				void *next_key)
+{
+	return -ENOTSUPP;
+}
+
+static struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr)
+{
+	struct bpf_local_storage_map *smap;
+
+	smap = bpf_local_storage_map_alloc(attr);
+	if (IS_ERR(smap))
+		return ERR_CAST(smap);
+
+	smap->cache_idx = bpf_local_storage_cache_idx_get(&inode_cache);
+	return &smap->map;
+}
+
+static void inode_storage_map_free(struct bpf_map *map)
+{
+	struct bpf_local_storage_map *smap;
+
+	smap = (struct bpf_local_storage_map *)map;
+	bpf_local_storage_cache_idx_free(&inode_cache, smap->cache_idx);
+	bpf_local_storage_map_free(smap);
+}
+
+static int inode_storage_map_btf_id;
+const struct bpf_map_ops inode_storage_map_ops = {
+	.map_alloc_check = bpf_local_storage_map_alloc_check,
+	.map_alloc = inode_storage_map_alloc,
+	.map_free = inode_storage_map_free,
+	.map_get_next_key = notsupp_get_next_key,
+	.map_lookup_elem = bpf_fd_inode_storage_lookup_elem,
+	.map_update_elem = bpf_fd_inode_storage_update_elem,
+	.map_delete_elem = bpf_fd_inode_storage_delete_elem,
+	.map_check_btf = bpf_local_storage_map_check_btf,
+	.map_btf_name = "bpf_local_storage_map",
+	.map_btf_id = &inode_storage_map_btf_id,
+	.map_owner_storage_ptr = inode_storage_ptr,
+};
+
+BTF_ID_LIST(bpf_inode_storage_btf_ids)
+BTF_ID_UNUSED
+BTF_ID(struct, inode)
+
+const struct bpf_func_proto bpf_inode_storage_get_proto = {
+	.func		= bpf_inode_storage_get,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
+	.arg1_type	= ARG_CONST_MAP_PTR,
+	.arg2_type	= ARG_PTR_TO_BTF_ID,
+	.arg3_type	= ARG_PTR_TO_MAP_VALUE_OR_NULL,
+	.arg4_type	= ARG_ANYTHING,
+	.btf_id		= bpf_inode_storage_btf_ids,
+};
+
+const struct bpf_func_proto bpf_inode_storage_delete_proto = {
+	.func		= bpf_inode_storage_delete,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_CONST_MAP_PTR,
+	.arg2_type	= ARG_PTR_TO_BTF_ID,
+	.btf_id		= bpf_inode_storage_btf_ids,
+};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b46e973faee9..5443cea86cef 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -769,7 +769,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 		if (map->map_type != BPF_MAP_TYPE_HASH &&
 		    map->map_type != BPF_MAP_TYPE_ARRAY &&
 		    map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
-		    map->map_type != BPF_MAP_TYPE_SK_STORAGE)
+		    map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
+		    map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
 			return -ENOTSUPP;
 		if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
 		    map->value_size) {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index dd24503ab3d3..38748794518e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4311,6 +4311,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		    func_id != BPF_FUNC_sk_storage_delete)
 			goto error;
 		break;
+	case BPF_MAP_TYPE_INODE_STORAGE:
+		if (func_id != BPF_FUNC_inode_storage_get &&
+		    func_id != BPF_FUNC_inode_storage_delete)
+			goto error;
+		break;
 	default:
 		break;
 	}
@@ -4384,6 +4389,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
 			goto error;
 		break;
+	case BPF_FUNC_inode_storage_get:
+	case BPF_FUNC_inode_storage_delete:
+		if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
+			goto error;
+		break;
 	default:
 		break;
 	}
diff --git a/security/bpf/hooks.c b/security/bpf/hooks.c
index 32d32d485451..788667d582ae 100644
--- a/security/bpf/hooks.c
+++ b/security/bpf/hooks.c
@@ -11,6 +11,7 @@ static struct security_hook_list bpf_lsm_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(NAME, bpf_lsm_##NAME),
 	#include <linux/lsm_hook_defs.h>
 	#undef LSM_HOOK
+	LSM_HOOK_INIT(inode_free_security, bpf_inode_storage_free),
 };
 
 static int __init bpf_lsm_init(void)
@@ -20,7 +21,12 @@ static int __init bpf_lsm_init(void)
 	return 0;
 }
 
+struct lsm_blob_sizes bpf_lsm_blob_sizes __lsm_ro_after_init = {
+	.lbs_inode = sizeof(struct bpf_storage_blob),
+};
+
 DEFINE_LSM(bpf) = {
 	.name = "bpf",
 	.init = bpf_lsm_init,
+	.blobs = &bpf_lsm_blob_sizes
 };
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 41e2a74252d0..083db6c2fc67 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -49,7 +49,7 @@ MAP COMMANDS
 |		| **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps**
 |		| **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
 |		| **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
-|		| **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** }
+|		| **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage** }
 
 DESCRIPTION
 ===========
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index f53ed2f1a4aa..7b68e3c0a5fb 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -704,7 +704,8 @@ _bpftool()
                                 lru_percpu_hash lpm_trie array_of_maps \
                                 hash_of_maps devmap devmap_hash sockmap cpumap \
                                 xskmap sockhash cgroup_storage reuseport_sockarray \
-                                percpu_cgroup_storage queue stack' -- \
+                                percpu_cgroup_storage queue stack sk_storage \
+                                struct_ops inode_storage' -- \
                                                    "$cur" ) )
                             return 0
                             ;;
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 3a27d31a1856..bc0071228f88 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -50,6 +50,7 @@ const char * const map_type_name[] = {
 	[BPF_MAP_TYPE_SK_STORAGE]		= "sk_storage",
 	[BPF_MAP_TYPE_STRUCT_OPS]		= "struct_ops",
 	[BPF_MAP_TYPE_RINGBUF]			= "ringbuf",
+	[BPF_MAP_TYPE_INODE_STORAGE]		= "inode_storage",
 };
 
 const size_t map_type_name_size = ARRAY_SIZE(map_type_name);
@@ -1442,7 +1443,7 @@ static int do_help(int argc, char **argv)
 		"                 lru_percpu_hash | lpm_trie | array_of_maps | hash_of_maps |\n"
 		"                 devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
 		"                 cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
-		"                 queue | stack | sk_storage | struct_ops | ringbuf }\n"
+		"                 queue | stack | sk_storage | struct_ops | ringbuf | inode_storage }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, argv[-2]);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 2cbd137eed86..b6bfcd085a76 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -155,6 +155,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_DEVMAP_HASH,
 	BPF_MAP_TYPE_STRUCT_OPS,
 	BPF_MAP_TYPE_RINGBUF,
+	BPF_MAP_TYPE_INODE_STORAGE,
 };
 
 /* Note that tracing related programs such as
@@ -3509,6 +3510,41 @@ union bpf_attr {
  *
  *		**-EPERM** This helper cannot be used under the
  *			   current sock_ops->op.
+ * void *bpf_inode_storage_get(struct bpf_map *map, void *inode, void *value, u64 flags)
+ *	Description
+ *		Get a bpf_local_storage from an *inode*.
+ *
+ *		Logically, it could be thought of as getting the value from
+ *		a *map* with *inode* as the **key**.  From this
+ *		perspective,  the usage is not much different from
+ *		**bpf_map_lookup_elem**\ (*map*, **&**\ *inode*) except this
+ *		helper enforces the key must be an inode and the map must also
+ *		be a **BPF_MAP_TYPE_INODE_STORAGE**.
+ *
+ *		Underneath, the value is stored locally at *inode* instead of
+ *		the *map*.  The *map* is used as the bpf-local-storage
+ *		"type". The bpf-local-storage "type" (i.e. the *map*) is
+ *		searched against all bpf_local_storage residing at *inode*.
+ *
+ *		An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
+ *		used such that a new bpf_local_storage will be
+ *		created if one does not exist.  *value* can be used
+ *		together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
+ *		the initial value of a bpf_local_storage.  If *value* is
+ *		**NULL**, the new bpf_local_storage will be zero initialized.
+ *	Return
+ *		A bpf_local_storage pointer is returned on success.
+ *
+ *		**NULL** if not found or there was an error in adding
+ *		a new bpf_local_storage.
+ *
+ * int bpf_inode_storage_delete(struct bpf_map *map, void *inode)
+ *	Description
+ *		Delete a bpf_local_storage from an *inode*.
+ *	Return
+ *		0 on success.
+ *
+ *		**-ENOENT** if the bpf_local_storage cannot be found.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3655,7 +3691,9 @@ union bpf_attr {
 	FN(get_task_stack),		\
 	FN(load_hdr_opt),		\
 	FN(store_hdr_opt),		\
-	FN(reserve_hdr_opt),
+	FN(reserve_hdr_opt),		\
+	FN(inode_storage_get),		\
+	FN(inode_storage_delete),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 010c9a76fd2b..5482a9b7ae2d 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -170,7 +170,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
 	return btf_fd;
 }
 
-static int load_sk_storage_btf(void)
+static int load_local_storage_btf(void)
 {
 	const char strs[] = "\0bpf_spin_lock\0val\0cnt\0l";
 	/* struct bpf_spin_lock {
@@ -229,12 +229,13 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
 		key_size	= 0;
 		break;
 	case BPF_MAP_TYPE_SK_STORAGE:
+	case BPF_MAP_TYPE_INODE_STORAGE:
 		btf_key_type_id = 1;
 		btf_value_type_id = 3;
 		value_size = 8;
 		max_entries = 0;
 		map_flags = BPF_F_NO_PREALLOC;
-		btf_fd = load_sk_storage_btf();
+		btf_fd = load_local_storage_btf();
 		if (btf_fd < 0)
 			return false;
 		break;
-- 
cgit v1.2.3


From 30897832d8b97e93833fb52c0a02951db3692ed2 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Tue, 25 Aug 2020 20:29:18 +0200
Subject: bpf: Allow local storage to be used from LSM programs

Adds support for both bpf_{sk, inode}_storage_{get, delete} to be used
in LSM programs. These helpers are not used for tracing programs
(currently) as their usage is tied to the life-cycle of the object and
should only be used where the owning object won't be freed (when the
owning object is passed as an argument to the LSM hook). Thus, they
are safer to use in LSM hooks than tracing. Usage of local storage in
tracing programs will probably follow a per function based whitelist
approach.

Since the UAPI helper signature for bpf_sk_storage expect a bpf_sock,
it, leads to a compilation warning for LSM programs, it's also updated
to accept a void * pointer instead.

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200825182919.1118197-7-kpsingh@chromium.org
---
 include/net/bpf_sk_storage.h   |  2 ++
 include/uapi/linux/bpf.h       |  7 +++++--
 kernel/bpf/bpf_lsm.c           | 21 ++++++++++++++++++++-
 net/core/bpf_sk_storage.c      | 25 +++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  7 +++++--
 5 files changed, 57 insertions(+), 5 deletions(-)

(limited to 'include/uapi')

diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h
index 3c516dd07caf..119f4c9c3a9c 100644
--- a/include/net/bpf_sk_storage.h
+++ b/include/net/bpf_sk_storage.h
@@ -20,6 +20,8 @@ void bpf_sk_storage_free(struct sock *sk);
 
 extern const struct bpf_func_proto bpf_sk_storage_get_proto;
 extern const struct bpf_func_proto bpf_sk_storage_delete_proto;
+extern const struct bpf_func_proto sk_storage_get_btf_proto;
+extern const struct bpf_func_proto sk_storage_delete_btf_proto;
 
 struct bpf_local_storage_elem;
 struct bpf_sk_storage_diag;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b6bfcd085a76..0e1cdf806fe1 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2808,7 +2808,7 @@ union bpf_attr {
  *
  *		**-ERANGE** if resulting value was out of range.
  *
- * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags)
+ * void *bpf_sk_storage_get(struct bpf_map *map, void *sk, void *value, u64 flags)
  *	Description
  *		Get a bpf-local-storage from a *sk*.
  *
@@ -2824,6 +2824,9 @@ union bpf_attr {
  *		"type". The bpf-local-storage "type" (i.e. the *map*) is
  *		searched against all bpf-local-storages residing at *sk*.
  *
+ *		*sk* is a kernel **struct sock** pointer for LSM program.
+ *		*sk* is a **struct bpf_sock** pointer for other program types.
+ *
  *		An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be
  *		used such that a new bpf-local-storage will be
  *		created if one does not exist.  *value* can be used
@@ -2836,7 +2839,7 @@ union bpf_attr {
  *		**NULL** if not found or there was an error in adding
  *		a new bpf-local-storage.
  *
- * long bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk)
+ * long bpf_sk_storage_delete(struct bpf_map *map, void *sk)
  *	Description
  *		Delete a bpf-local-storage from a *sk*.
  *	Return
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index fb278144e9fd..9cd1428c7199 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -11,6 +11,8 @@
 #include <linux/bpf_lsm.h>
 #include <linux/kallsyms.h>
 #include <linux/bpf_verifier.h>
+#include <net/bpf_sk_storage.h>
+#include <linux/bpf_local_storage.h>
 
 /* For every LSM hook that allows attachment of BPF programs, declare a nop
  * function where a BPF program can be attached.
@@ -45,10 +47,27 @@ int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
 	return 0;
 }
 
+static const struct bpf_func_proto *
+bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	switch (func_id) {
+	case BPF_FUNC_inode_storage_get:
+		return &bpf_inode_storage_get_proto;
+	case BPF_FUNC_inode_storage_delete:
+		return &bpf_inode_storage_delete_proto;
+	case BPF_FUNC_sk_storage_get:
+		return &sk_storage_get_btf_proto;
+	case BPF_FUNC_sk_storage_delete:
+		return &sk_storage_delete_btf_proto;
+	default:
+		return tracing_prog_func_proto(func_id, prog);
+	}
+}
+
 const struct bpf_prog_ops lsm_prog_ops = {
 };
 
 const struct bpf_verifier_ops lsm_verifier_ops = {
-	.get_func_proto = tracing_prog_func_proto,
+	.get_func_proto = bpf_lsm_func_proto,
 	.is_valid_access = btf_ctx_access,
 };
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index f29d9a9b4ea4..55fae03b4cc3 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -12,6 +12,7 @@
 #include <net/sock.h>
 #include <uapi/linux/sock_diag.h>
 #include <uapi/linux/btf.h>
+#include <linux/btf_ids.h>
 
 DEFINE_BPF_STORAGE_CACHE(sk_cache);
 
@@ -377,6 +378,30 @@ const struct bpf_func_proto bpf_sk_storage_delete_proto = {
 	.arg2_type	= ARG_PTR_TO_SOCKET,
 };
 
+BTF_ID_LIST(sk_storage_btf_ids)
+BTF_ID_UNUSED
+BTF_ID(struct, sock)
+
+const struct bpf_func_proto sk_storage_get_btf_proto = {
+	.func		= bpf_sk_storage_get,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
+	.arg1_type	= ARG_CONST_MAP_PTR,
+	.arg2_type	= ARG_PTR_TO_BTF_ID,
+	.arg3_type	= ARG_PTR_TO_MAP_VALUE_OR_NULL,
+	.arg4_type	= ARG_ANYTHING,
+	.btf_id		= sk_storage_btf_ids,
+};
+
+const struct bpf_func_proto sk_storage_delete_btf_proto = {
+	.func		= bpf_sk_storage_delete,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_CONST_MAP_PTR,
+	.arg2_type	= ARG_PTR_TO_BTF_ID,
+	.btf_id		= sk_storage_btf_ids,
+};
+
 struct bpf_sk_storage_diag {
 	u32 nr_maps;
 	struct bpf_map *maps[];
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b6bfcd085a76..0e1cdf806fe1 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2808,7 +2808,7 @@ union bpf_attr {
  *
  *		**-ERANGE** if resulting value was out of range.
  *
- * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags)
+ * void *bpf_sk_storage_get(struct bpf_map *map, void *sk, void *value, u64 flags)
  *	Description
  *		Get a bpf-local-storage from a *sk*.
  *
@@ -2824,6 +2824,9 @@ union bpf_attr {
  *		"type". The bpf-local-storage "type" (i.e. the *map*) is
  *		searched against all bpf-local-storages residing at *sk*.
  *
+ *		*sk* is a kernel **struct sock** pointer for LSM program.
+ *		*sk* is a **struct bpf_sock** pointer for other program types.
+ *
  *		An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be
  *		used such that a new bpf-local-storage will be
  *		created if one does not exist.  *value* can be used
@@ -2836,7 +2839,7 @@ union bpf_attr {
  *		**NULL** if not found or there was an error in adding
  *		a new bpf-local-storage.
  *
- * long bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk)
+ * long bpf_sk_storage_delete(struct bpf_map *map, void *sk)
  *	Description
  *		Delete a bpf-local-storage from a *sk*.
  *	Return
-- 
cgit v1.2.3


From 6e22ab9da79343532cd3cde39df25e5a5478c692 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 25 Aug 2020 21:21:20 +0200
Subject: bpf: Add d_path helper

Adding d_path helper function that returns full path for
given 'struct path' object, which needs to be the kernel
BTF 'path' object. The path is returned in buffer provided
'buf' of size 'sz' and is zero terminated.

  bpf_d_path(&file->f_path, buf, size);

The helper calls directly d_path function, so there's only
limited set of function it can be called from. Adding just
very modest set for the start.

Updating also bpf.h tools uapi header and adding 'path' to
bpf_helpers_doc.py script.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: KP Singh <kpsingh@google.com>
Link: https://lore.kernel.org/bpf/20200825192124.710397-11-jolsa@kernel.org
---
 include/uapi/linux/bpf.h       | 14 ++++++++++++
 kernel/trace/bpf_trace.c       | 48 ++++++++++++++++++++++++++++++++++++++++++
 scripts/bpf_helpers_doc.py     |  2 ++
 tools/include/uapi/linux/bpf.h | 14 ++++++++++++
 4 files changed, 78 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0e1cdf806fe1..0388bc0200b0 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3513,6 +3513,7 @@ union bpf_attr {
  *
  *		**-EPERM** This helper cannot be used under the
  *			   current sock_ops->op.
+ *
  * void *bpf_inode_storage_get(struct bpf_map *map, void *inode, void *value, u64 flags)
  *	Description
  *		Get a bpf_local_storage from an *inode*.
@@ -3548,6 +3549,18 @@ union bpf_attr {
  *		0 on success.
  *
  *		**-ENOENT** if the bpf_local_storage cannot be found.
+ *
+ * long bpf_d_path(struct path *path, char *buf, u32 sz)
+ *	Description
+ *		Return full path for given 'struct path' object, which
+ *		needs to be the kernel BTF 'path' object. The path is
+ *		returned in the provided buffer 'buf' of size 'sz' and
+ *		is zero terminated.
+ *
+ *	Return
+ *		On success, the strictly positive length of the string,
+ *		including the trailing NUL character. On error, a negative
+ *		value.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3697,6 +3710,7 @@ union bpf_attr {
 	FN(reserve_hdr_opt),		\
 	FN(inode_storage_get),		\
 	FN(inode_storage_delete),	\
+	FN(d_path),			\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index a8d4f253ed77..d973d891f2e2 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1098,6 +1098,52 @@ static const struct bpf_func_proto bpf_send_signal_thread_proto = {
 	.arg1_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_3(bpf_d_path, struct path *, path, char *, buf, u32, sz)
+{
+	long len;
+	char *p;
+
+	if (!sz)
+		return 0;
+
+	p = d_path(path, buf, sz);
+	if (IS_ERR(p)) {
+		len = PTR_ERR(p);
+	} else {
+		len = buf + sz - p;
+		memmove(buf, p, len);
+	}
+
+	return len;
+}
+
+BTF_SET_START(btf_allowlist_d_path)
+BTF_ID(func, vfs_truncate)
+BTF_ID(func, vfs_fallocate)
+BTF_ID(func, dentry_open)
+BTF_ID(func, vfs_getattr)
+BTF_ID(func, filp_close)
+BTF_SET_END(btf_allowlist_d_path)
+
+static bool bpf_d_path_allowed(const struct bpf_prog *prog)
+{
+	return btf_id_set_contains(&btf_allowlist_d_path, prog->aux->attach_btf_id);
+}
+
+BTF_ID_LIST(bpf_d_path_btf_ids)
+BTF_ID(struct, path)
+
+static const struct bpf_func_proto bpf_d_path_proto = {
+	.func		= bpf_d_path,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_BTF_ID,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
+	.btf_id		= bpf_d_path_btf_ids,
+	.allowed	= bpf_d_path_allowed,
+};
+
 const struct bpf_func_proto *
 bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -1579,6 +1625,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return prog->expected_attach_type == BPF_TRACE_ITER ?
 		       &bpf_seq_write_proto :
 		       NULL;
+	case BPF_FUNC_d_path:
+		return &bpf_d_path_proto;
 	default:
 		return raw_tp_prog_func_proto(func_id, prog);
 	}
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index 5bfa448b4704..08388173973f 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -432,6 +432,7 @@ class PrinterHelpers(Printer):
             'struct __sk_buff',
             'struct sk_msg_md',
             'struct xdp_md',
+            'struct path',
     ]
     known_types = {
             '...',
@@ -472,6 +473,7 @@ class PrinterHelpers(Printer):
             'struct tcp_request_sock',
             'struct udp6_sock',
             'struct task_struct',
+            'struct path',
     }
     mapped_types = {
             'u8': '__u8',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 0e1cdf806fe1..0388bc0200b0 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3513,6 +3513,7 @@ union bpf_attr {
  *
  *		**-EPERM** This helper cannot be used under the
  *			   current sock_ops->op.
+ *
  * void *bpf_inode_storage_get(struct bpf_map *map, void *inode, void *value, u64 flags)
  *	Description
  *		Get a bpf_local_storage from an *inode*.
@@ -3548,6 +3549,18 @@ union bpf_attr {
  *		0 on success.
  *
  *		**-ENOENT** if the bpf_local_storage cannot be found.
+ *
+ * long bpf_d_path(struct path *path, char *buf, u32 sz)
+ *	Description
+ *		Return full path for given 'struct path' object, which
+ *		needs to be the kernel BTF 'path' object. The path is
+ *		returned in the provided buffer 'buf' of size 'sz' and
+ *		is zero terminated.
+ *
+ *	Return
+ *		On success, the strictly positive length of the string,
+ *		including the trailing NUL character. On error, a negative
+ *		value.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3697,6 +3710,7 @@ union bpf_attr {
 	FN(reserve_hdr_opt),		\
 	FN(inode_storage_get),		\
 	FN(inode_storage_delete),	\
+	FN(d_path),			\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 76ab546cd8f0c64d4603b2faad4558c5b670561e Mon Sep 17 00:00:00 2001
From: Karol Trzcinski <karolx.trzcinski@linux.intel.com>
Date: Tue, 25 Aug 2020 16:58:51 -0700
Subject: ASoC: SOF: IPC: make sof_ipc_window monosized

This step is needed to add possibility to pack sof_ipc_window inside
another one in used FW build tools - for example in extended manifest.
Structure reusability leads to easy parsing function reuse, so source
code is shorter and easier to maintain.

Using structures with constant size is less tricky and properly
supported by each toolchain by contrast to variable size elements.

This is minor ABI change - backward compatibility is kept.

Signed-off-by: Karol Trzcinski <karolx.trzcinski@linux.intel.com>
Reviewed-by: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
Reviewed-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Link: https://lore.kernel.org/r/20200825235854.1588034-2-ranjani.sridharan@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/sof/info.h     | 2 +-
 include/uapi/sound/sof/abi.h | 2 +-
 sound/soc/sof/loader.c       | 5 ++---
 3 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/uapi')

diff --git a/include/sound/sof/info.h b/include/sound/sof/info.h
index 5a55ba8b7e56..313e3e70c630 100644
--- a/include/sound/sof/info.h
+++ b/include/sound/sof/info.h
@@ -99,7 +99,7 @@ struct sof_ipc_window_elem {
 struct sof_ipc_window {
 	struct sof_ipc_ext_data_hdr ext_hdr;
 	uint32_t num_windows;
-	struct sof_ipc_window_elem window[];
+	struct sof_ipc_window_elem window[SOF_IPC_MAX_ELEMS];
 }  __packed;
 
 struct sof_ipc_cc_version {
diff --git a/include/uapi/sound/sof/abi.h b/include/uapi/sound/sof/abi.h
index d54be303090f..6af32f82fb99 100644
--- a/include/uapi/sound/sof/abi.h
+++ b/include/uapi/sound/sof/abi.h
@@ -26,7 +26,7 @@
 
 /* SOF ABI version major, minor and patch numbers */
 #define SOF_ABI_MAJOR 3
-#define SOF_ABI_MINOR 16
+#define SOF_ABI_MINOR 17
 #define SOF_ABI_PATCH 0
 
 /* SOF ABI version number. Format within 32bit word is MMmmmppp */
diff --git a/sound/soc/sof/loader.c b/sound/soc/sof/loader.c
index b94fa5f5d480..25dc28ebafb7 100644
--- a/sound/soc/sof/loader.c
+++ b/sound/soc/sof/loader.c
@@ -20,13 +20,12 @@ static int get_ext_windows(struct snd_sof_dev *sdev,
 {
 	const struct sof_ipc_window *w =
 		container_of(ext_hdr, struct sof_ipc_window, ext_hdr);
-	size_t w_size = struct_size(w, window, w->num_windows);
 
 	if (w->num_windows == 0 || w->num_windows > SOF_IPC_MAX_ELEMS)
 		return -EINVAL;
 
 	if (sdev->info_window) {
-		if (memcmp(sdev->info_window, w, w_size)) {
+		if (memcmp(sdev->info_window, w, ext_hdr->hdr.size)) {
 			dev_err(sdev->dev, "error: mismatch between window descriptor from extended manifest and mailbox");
 			return -EINVAL;
 		}
@@ -34,7 +33,7 @@ static int get_ext_windows(struct snd_sof_dev *sdev,
 	}
 
 	/* keep a local copy of the data */
-	sdev->info_window = kmemdup(w, w_size, GFP_KERNEL);
+	sdev->info_window = kmemdup(w, ext_hdr->hdr.size, GFP_KERNEL);
 	if (!sdev->info_window)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From b305dfe2e93434b12d438434461b709641f62af4 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Thu, 20 Aug 2020 12:47:16 +0200
Subject: media: videodev2.h: RGB BT2020 and HSV are always full range

The default RGB quantization range for BT.2020 is full range (just as for
all the other RGB pixel encodings), not limited range.

Update the V4L2_MAP_QUANTIZATION_DEFAULT macro and documentation
accordingly.

Also mention that HSV is always full range and cannot be limited range.

When RGB BT2020 was introduced in V4L2 it was not clear whether it should
be limited or full range, but full range is the right (and consistent)
choice.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../userspace-api/media/v4l/colorspaces-defs.rst        |  9 ++++-----
 .../userspace-api/media/v4l/colorspaces-details.rst     |  5 ++---
 include/uapi/linux/videodev2.h                          | 17 ++++++++---------
 3 files changed, 14 insertions(+), 17 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/userspace-api/media/v4l/colorspaces-defs.rst b/Documentation/userspace-api/media/v4l/colorspaces-defs.rst
index 01404e1f609a..4089f426258d 100644
--- a/Documentation/userspace-api/media/v4l/colorspaces-defs.rst
+++ b/Documentation/userspace-api/media/v4l/colorspaces-defs.rst
@@ -36,8 +36,7 @@ whole range, 0-255, dividing the angular value by 1.41. The enum
 :c:type:`v4l2_hsv_encoding` specifies which encoding is used.
 
 .. note:: The default R'G'B' quantization is full range for all
-   colorspaces except for BT.2020 which uses limited range R'G'B'
-   quantization.
+   colorspaces. HSV formats are always full range.
 
 .. tabularcolumns:: |p{6.7cm}|p{10.8cm}|
 
@@ -169,8 +168,8 @@ whole range, 0-255, dividing the angular value by 1.41. The enum
       - Details
     * - ``V4L2_QUANTIZATION_DEFAULT``
       - Use the default quantization encoding as defined by the
-	colorspace. This is always full range for R'G'B' (except for the
-	BT.2020 colorspace) and HSV. It is usually limited range for Y'CbCr.
+	colorspace. This is always full range for R'G'B' and HSV.
+	It is usually limited range for Y'CbCr.
     * - ``V4L2_QUANTIZATION_FULL_RANGE``
       - Use the full range quantization encoding. I.e. the range [0…1] is
 	mapped to [0…255] (with possible clipping to [1…254] to avoid the
@@ -180,4 +179,4 @@ whole range, 0-255, dividing the angular value by 1.41. The enum
     * - ``V4L2_QUANTIZATION_LIM_RANGE``
       - Use the limited range quantization encoding. I.e. the range [0…1]
 	is mapped to [16…235]. Cb and Cr are mapped from [-0.5…0.5] to
-	[16…240].
+	[16…240]. Limited Range cannot be used with HSV.
diff --git a/Documentation/userspace-api/media/v4l/colorspaces-details.rst b/Documentation/userspace-api/media/v4l/colorspaces-details.rst
index 300c5d2e7d0f..cf1b825ec34a 100644
--- a/Documentation/userspace-api/media/v4l/colorspaces-details.rst
+++ b/Documentation/userspace-api/media/v4l/colorspaces-details.rst
@@ -377,9 +377,8 @@ Colorspace BT.2020 (V4L2_COLORSPACE_BT2020)
 The :ref:`itu2020` standard defines the colorspace used by Ultra-high
 definition television (UHDTV). The default transfer function is
 ``V4L2_XFER_FUNC_709``. The default Y'CbCr encoding is
-``V4L2_YCBCR_ENC_BT2020``. The default R'G'B' quantization is limited
-range (!), and so is the default Y'CbCr quantization. The chromaticities
-of the primary colors and the white reference are:
+``V4L2_YCBCR_ENC_BT2020``. The default Y'CbCr quantization is limited range.
+The chromaticities of the primary colors and the white reference are:
 
 
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index c7b70ff53bc1..4769628790da 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -375,9 +375,9 @@ enum v4l2_hsv_encoding {
 
 enum v4l2_quantization {
 	/*
-	 * The default for R'G'B' quantization is always full range, except
-	 * for the BT2020 colorspace. For Y'CbCr the quantization is always
-	 * limited range, except for COLORSPACE_JPEG: this is full range.
+	 * The default for R'G'B' quantization is always full range.
+	 * For Y'CbCr the quantization is always limited range, except
+	 * for COLORSPACE_JPEG: this is full range.
 	 */
 	V4L2_QUANTIZATION_DEFAULT     = 0,
 	V4L2_QUANTIZATION_FULL_RANGE  = 1,
@@ -386,14 +386,13 @@ enum v4l2_quantization {
 
 /*
  * Determine how QUANTIZATION_DEFAULT should map to a proper quantization.
- * This depends on whether the image is RGB or not, the colorspace and the
- * Y'CbCr encoding.
+ * This depends on whether the image is RGB or not, the colorspace.
+ * The Y'CbCr encoding is not used anymore, but is still there for backwards
+ * compatibility.
  */
 #define V4L2_MAP_QUANTIZATION_DEFAULT(is_rgb_or_hsv, colsp, ycbcr_enc) \
-	(((is_rgb_or_hsv) && (colsp) == V4L2_COLORSPACE_BT2020) ? \
-	 V4L2_QUANTIZATION_LIM_RANGE : \
-	 (((is_rgb_or_hsv) || (colsp) == V4L2_COLORSPACE_JPEG) ? \
-	 V4L2_QUANTIZATION_FULL_RANGE : V4L2_QUANTIZATION_LIM_RANGE))
+	(((is_rgb_or_hsv) || (colsp) == V4L2_COLORSPACE_JPEG) ? \
+	 V4L2_QUANTIZATION_FULL_RANGE : V4L2_QUANTIZATION_LIM_RANGE)
 
 /*
  * Deprecated names for opRGB colorspace (IEC 61966-2-5)
-- 
cgit v1.2.3


From 493a0ebd804c986e6bd207603c5e1ca748470d3d Mon Sep 17 00:00:00 2001
From: James Prestwood <prestwoj@gmail.com>
Date: Mon, 13 Apr 2020 09:20:53 -0700
Subject: nl80211: fix PORT_AUTHORIZED wording to reflect behavior

The CMD_PORT_AUTHORIZED event was described as an event which indicated
a successfully completed 4-way handshake. But the behavior was
not as advertized. The only driver which uses this is brcmfmac, and
this driver only sends the event after a successful 802.1X-FT roam.

This prevents userspace applications from knowing if the 4-way completed
on:

1. Normal 802.1X connects
2. Normal PSK connections
3. FT-PSK roams

wpa_supplicant handles this incorrect behavior by just completing
the connection after association, before the 4-way has completed.
If the 4-way ends up failing it disconnects at that point.

Since this behavior appears to be expected (wpa_s handles it this
way) I have changed the wording in the API description to reflect the
actual behavior.

Signed-off-by: James Prestwood <prestwoj@gmail.com>
Link: https://lore.kernel.org/r/20200413162053.3711-1-prestwoj@gmail.com
[fix spelling of 802.1X throughout ...]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 631f3a997b3c..8cc2b825e4e4 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -647,13 +647,9 @@
  *	authentication/association or not receiving a response from the AP.
  *	Non-zero %NL80211_ATTR_STATUS_CODE value is indicated in that case as
  *	well to remain backwards compatible.
- *	When establishing a security association, drivers that support 4 way
- *	handshake offload should send %NL80211_CMD_PORT_AUTHORIZED event when
- *	the 4 way handshake is completed successfully.
  * @NL80211_CMD_ROAM: Notification indicating the card/driver roamed by itself.
- *	When a security association was established with the new AP (e.g. if
- *	the FT protocol was used for roaming or the driver completed the 4 way
- *	handshake), this event should be followed by an
+ *	When a security association was established on an 802.1X network using
+ *	fast transition, this event should be followed by an
  *	%NL80211_CMD_PORT_AUTHORIZED event.
  * @NL80211_CMD_DISCONNECT: drop a given connection; also used to notify
  *	userspace that a connection was dropped by the AP or due to other
@@ -1067,13 +1063,11 @@
  * @NL80211_CMD_DEL_PMK: For offloaded 4-Way handshake, delete the previously
  *	configured PMK for the authenticator address identified by
  *	%NL80211_ATTR_MAC.
- * @NL80211_CMD_PORT_AUTHORIZED: An event that indicates that the 4 way
- *	handshake was completed successfully by the driver. The BSSID is
- *	specified with %NL80211_ATTR_MAC. Drivers that support 4 way handshake
- *	offload should send this event after indicating 802.11 association with
- *	%NL80211_CMD_CONNECT or %NL80211_CMD_ROAM. If the 4 way handshake failed
- *	%NL80211_CMD_DISCONNECT should be indicated instead.
- *
+ * @NL80211_CMD_PORT_AUTHORIZED: An event that indicates an 802.1X FT roam was
+ *	completed successfully. Drivers that support 4 way handshake offload
+ *	should send this event after indicating 802.1X FT assocation with
+ *	%NL80211_CMD_ROAM. If the 4 way handshake failed %NL80211_CMD_DISCONNECT
+ *	should be indicated instead.
  * @NL80211_CMD_CONTROL_PORT_FRAME: Control Port (e.g. PAE) frame TX request
  *	and RX notification.  This command is used both as a request to transmit
  *	a control port frame and as a notification that a control port frame
-- 
cgit v1.2.3


From eb89a6a6b7a1af2d9c8d83ee44fa67700d6337e7 Mon Sep 17 00:00:00 2001
From: Miles Hu <milehu@codeaurora.org>
Date: Tue, 4 Aug 2020 10:16:29 +0200
Subject: nl80211: add support for setting fixed HE rate/gi/ltf

This patch adds the nl80211 structs, definitions, policies and parsing
code required to pass fixed HE rate, GI and LTF settings.

Signed-off-by: Miles Hu <milehu@codeaurora.org>
Signed-off-by: John Crispin <john@phrozen.org>
Link: https://lore.kernel.org/r/20200804081630.2013619-1-john@phrozen.org
[fix comment]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |   3 +
 include/uapi/linux/nl80211.h |  28 +++++++++
 net/wireless/nl80211.c       | 137 ++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 160 insertions(+), 8 deletions(-)

(limited to 'include/uapi')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index d9e6b9fbd95b..c9bce9bba511 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -678,7 +678,10 @@ struct cfg80211_bitrate_mask {
 		u32 legacy;
 		u8 ht_mcs[IEEE80211_HT_MCS_MASK_LEN];
 		u16 vht_mcs[NL80211_VHT_NSS_MAX];
+		u16 he_mcs[NL80211_HE_NSS_MAX];
 		enum nl80211_txrate_gi gi;
+		enum nl80211_he_gi he_gi;
+		enum nl80211_he_ltf he_ltf;
 	} control[NUM_NL80211_BANDS];
 };
 
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 8cc2b825e4e4..1a4b922f489f 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3180,6 +3180,18 @@ enum nl80211_he_gi {
 	NL80211_RATE_INFO_HE_GI_3_2,
 };
 
+/**
+ * enum nl80211_he_ltf - HE long training field
+ * @NL80211_RATE_INFO_HE_1xLTF: 3.2 usec
+ * @NL80211_RATE_INFO_HE_2xLTF: 6.4 usec
+ * @NL80211_RATE_INFO_HE_4xLTF: 12.8 usec
+ */
+enum nl80211_he_ltf {
+	NL80211_RATE_INFO_HE_1XLTF,
+	NL80211_RATE_INFO_HE_2XLTF,
+	NL80211_RATE_INFO_HE_4XLTF,
+};
+
 /**
  * enum nl80211_he_ru_alloc - HE RU allocation values
  * @NL80211_RATE_INFO_HE_RU_ALLOC_26: 26-tone RU allocation
@@ -4735,6 +4747,10 @@ enum nl80211_key_attributes {
  * @NL80211_TXRATE_VHT: VHT rates allowed for TX rate selection,
  *	see &struct nl80211_txrate_vht
  * @NL80211_TXRATE_GI: configure GI, see &enum nl80211_txrate_gi
+ * @NL80211_TXRATE_HE: HE rates allowed for TX rate selection,
+ *	see &struct nl80211_txrate_he
+ * @NL80211_TXRATE_HE_GI: configure HE GI, 0.8us, 1.6us and 3.2us.
+ * @NL80211_TXRATE_HE_LTF: configure HE LTF, 1XLTF, 2XLTF and 4XLTF.
  * @__NL80211_TXRATE_AFTER_LAST: internal
  * @NL80211_TXRATE_MAX: highest TX rate attribute
  */
@@ -4744,6 +4760,9 @@ enum nl80211_tx_rate_attributes {
 	NL80211_TXRATE_HT,
 	NL80211_TXRATE_VHT,
 	NL80211_TXRATE_GI,
+	NL80211_TXRATE_HE,
+	NL80211_TXRATE_HE_GI,
+	NL80211_TXRATE_HE_LTF,
 
 	/* keep last */
 	__NL80211_TXRATE_AFTER_LAST,
@@ -4761,6 +4780,15 @@ struct nl80211_txrate_vht {
 	__u16 mcs[NL80211_VHT_NSS_MAX];
 };
 
+#define NL80211_HE_NSS_MAX		8
+/**
+ * struct nl80211_txrate_he - HE MCS/NSS txrate bitmap
+ * @mcs: MCS bitmap table for each NSS (array index 0 for 1 stream, etc.)
+ */
+struct nl80211_txrate_he {
+	__u16 mcs[NL80211_HE_NSS_MAX];
+};
+
 enum nl80211_txrate_gi {
 	NL80211_TXRATE_DEFAULT_GI,
 	NL80211_TXRATE_FORCE_SGI,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 6ee3bc48d776..da0f33c2d2d8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -336,6 +336,13 @@ static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = {
 				.len = NL80211_MAX_SUPP_HT_RATES },
 	[NL80211_TXRATE_VHT] = NLA_POLICY_EXACT_LEN_WARN(sizeof(struct nl80211_txrate_vht)),
 	[NL80211_TXRATE_GI] = { .type = NLA_U8 },
+	[NL80211_TXRATE_HE] = NLA_POLICY_EXACT_LEN(sizeof(struct nl80211_txrate_he)),
+	[NL80211_TXRATE_HE_GI] =  NLA_POLICY_RANGE(NLA_U8,
+						   NL80211_RATE_INFO_HE_GI_0_8,
+						   NL80211_RATE_INFO_HE_GI_3_2),
+	[NL80211_TXRATE_HE_LTF] = NLA_POLICY_RANGE(NLA_U8,
+						   NL80211_RATE_INFO_HE_1XLTF,
+						   NL80211_RATE_INFO_HE_4XLTF),
 };
 
 static const struct nla_policy
@@ -4430,21 +4437,106 @@ static bool vht_set_mcs_mask(struct ieee80211_supported_band *sband,
 	return true;
 }
 
+static u16 he_mcs_map_to_mcs_mask(u8 he_mcs_map)
+{
+	switch (he_mcs_map) {
+	case IEEE80211_HE_MCS_NOT_SUPPORTED:
+		return 0;
+	case IEEE80211_HE_MCS_SUPPORT_0_7:
+		return 0x00FF;
+	case IEEE80211_HE_MCS_SUPPORT_0_9:
+		return 0x03FF;
+	case IEEE80211_HE_MCS_SUPPORT_0_11:
+		return 0xFFF;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static void he_build_mcs_mask(u16 he_mcs_map,
+			      u16 he_mcs_mask[NL80211_HE_NSS_MAX])
+{
+	u8 nss;
+
+	for (nss = 0; nss < NL80211_HE_NSS_MAX; nss++) {
+		he_mcs_mask[nss] = he_mcs_map_to_mcs_mask(he_mcs_map & 0x03);
+		he_mcs_map >>= 2;
+	}
+}
+
+static u16 he_get_txmcsmap(struct genl_info *info,
+			   const struct ieee80211_sta_he_cap *he_cap)
+{
+	struct net_device *dev = info->user_ptr[1];
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	__le16	tx_mcs;
+
+	switch (wdev->chandef.width) {
+	case NL80211_CHAN_WIDTH_80P80:
+		tx_mcs = he_cap->he_mcs_nss_supp.tx_mcs_80p80;
+		break;
+	case NL80211_CHAN_WIDTH_160:
+		tx_mcs = he_cap->he_mcs_nss_supp.tx_mcs_160;
+		break;
+	default:
+		tx_mcs = he_cap->he_mcs_nss_supp.tx_mcs_80;
+		break;
+	}
+	return le16_to_cpu(tx_mcs);
+}
+
+static bool he_set_mcs_mask(struct genl_info *info,
+			    struct wireless_dev *wdev,
+			    struct ieee80211_supported_band *sband,
+			    struct nl80211_txrate_he *txrate,
+			    u16 mcs[NL80211_HE_NSS_MAX])
+{
+	const struct ieee80211_sta_he_cap *he_cap;
+	u16 tx_mcs_mask[NL80211_HE_NSS_MAX] = {};
+	u16 tx_mcs_map = 0;
+	u8 i;
+
+	he_cap = ieee80211_get_he_iftype_cap(sband, wdev->iftype);
+	if (!he_cap)
+		return false;
+
+	memset(mcs, 0, sizeof(u16) * NL80211_HE_NSS_MAX);
+
+	tx_mcs_map = he_get_txmcsmap(info, he_cap);
+
+	/* Build he_mcs_mask from HE capabilities */
+	he_build_mcs_mask(tx_mcs_map, tx_mcs_mask);
+
+	for (i = 0; i < NL80211_HE_NSS_MAX; i++) {
+		if ((tx_mcs_mask[i] & txrate->mcs[i]) == txrate->mcs[i])
+			mcs[i] = txrate->mcs[i];
+		else
+			return false;
+	}
+
+	return true;
+}
+
 static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
 					 struct nlattr *attrs[],
 					 enum nl80211_attrs attr,
-					 struct cfg80211_bitrate_mask *mask)
+					 struct cfg80211_bitrate_mask *mask,
+					 struct net_device *dev)
 {
 	struct nlattr *tb[NL80211_TXRATE_MAX + 1];
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int rem, i;
 	struct nlattr *tx_rates;
 	struct ieee80211_supported_band *sband;
-	u16 vht_tx_mcs_map;
+	u16 vht_tx_mcs_map, he_tx_mcs_map;
 
 	memset(mask, 0, sizeof(*mask));
 	/* Default to all rates enabled */
 	for (i = 0; i < NUM_NL80211_BANDS; i++) {
+		const struct ieee80211_sta_he_cap *he_cap;
+
 		sband = rdev->wiphy.bands[i];
 
 		if (!sband)
@@ -4460,6 +4552,16 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
 
 		vht_tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map);
 		vht_build_mcs_mask(vht_tx_mcs_map, mask->control[i].vht_mcs);
+
+		he_cap = ieee80211_get_he_iftype_cap(sband, wdev->iftype);
+		if (!he_cap)
+			continue;
+
+		he_tx_mcs_map = he_get_txmcsmap(info, he_cap);
+		he_build_mcs_mask(he_tx_mcs_map, mask->control[i].he_mcs);
+
+		mask->control[i].he_gi = 0xFF;
+		mask->control[i].he_ltf = 0xFF;
 	}
 
 	/* if no rates are given set it back to the defaults */
@@ -4515,13 +4617,25 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
 			if (mask->control[band].gi > NL80211_TXRATE_FORCE_LGI)
 				return -EINVAL;
 		}
+		if (tb[NL80211_TXRATE_HE] &&
+		    !he_set_mcs_mask(info, wdev, sband,
+				     nla_data(tb[NL80211_TXRATE_HE]),
+				     mask->control[band].he_mcs))
+			return -EINVAL;
+		if (tb[NL80211_TXRATE_HE_GI])
+			mask->control[band].he_gi =
+				nla_get_u8(tb[NL80211_TXRATE_HE_GI]);
+		if (tb[NL80211_TXRATE_HE_LTF])
+			mask->control[band].he_ltf =
+				nla_get_u8(tb[NL80211_TXRATE_HE_LTF]);
 
 		if (mask->control[band].legacy == 0) {
-			/* don't allow empty legacy rates if HT or VHT
+			/* don't allow empty legacy rates if HT, VHT or HE
 			 * are not even supported.
 			 */
 			if (!(rdev->wiphy.bands[band]->ht_cap.ht_supported ||
-			      rdev->wiphy.bands[band]->vht_cap.vht_supported))
+			      rdev->wiphy.bands[band]->vht_cap.vht_supported ||
+			      ieee80211_get_he_iftype_cap(sband, wdev->iftype)))
 				return -EINVAL;
 
 			for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++)
@@ -4532,6 +4646,10 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
 				if (mask->control[band].vht_mcs[i])
 					goto out;
 
+			for (i = 0; i < NL80211_HE_NSS_MAX; i++)
+				if (mask->control[band].he_mcs[i])
+					goto out;
+
 			/* legacy and mcs rates may not be both empty */
 			return -EINVAL;
 		}
@@ -4976,7 +5094,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NL80211_ATTR_TX_RATES]) {
 		err = nl80211_parse_tx_bitrate_mask(info, info->attrs,
 						    NL80211_ATTR_TX_RATES,
-						    &params.beacon_rate);
+						    &params.beacon_rate,
+						    dev);
 		if (err)
 			return err;
 
@@ -10780,7 +10899,8 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
 		return -EOPNOTSUPP;
 
 	err = nl80211_parse_tx_bitrate_mask(info, info->attrs,
-					    NL80211_ATTR_TX_RATES, &mask);
+					    NL80211_ATTR_TX_RATES, &mask,
+					    dev);
 	if (err)
 		return err;
 
@@ -11388,7 +11508,8 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NL80211_ATTR_TX_RATES]) {
 		err = nl80211_parse_tx_bitrate_mask(info, info->attrs,
 						    NL80211_ATTR_TX_RATES,
-						    &setup.beacon_rate);
+						    &setup.beacon_rate,
+						    dev);
 		if (err)
 			return err;
 
@@ -14168,7 +14289,7 @@ static int parse_tid_conf(struct cfg80211_registered_device *rdev,
 		if (tid_conf->txrate_type != NL80211_TX_RATE_AUTOMATIC) {
 			attr = NL80211_TID_CONFIG_ATTR_TX_RATE;
 			err = nl80211_parse_tx_bitrate_mask(info, attrs, attr,
-						    &tid_conf->txrate_mask);
+						    &tid_conf->txrate_mask, dev);
 			if (err)
 				return err;
 
-- 
cgit v1.2.3


From 00c207edfb2bff9cf03a8f21e57c9c752a1d9f16 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Tue, 11 Aug 2020 10:01:03 +0200
Subject: nl80211: rename csa counter attributes countdown counters

We want to reuse the attributes for other counters such as BSS color
change. Rename them to more generic names.

Signed-off-by: John Crispin <john@phrozen.org>
Link: https://lore.kernel.org/r/20200811080107.3615705-1-john@phrozen.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 14 ++++++++------
 net/wireless/nl80211.c       | 16 ++++++++--------
 2 files changed, 16 insertions(+), 14 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 1a4b922f489f..ec96d5fe0e05 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2076,10 +2076,10 @@ enum nl80211_commands {
  *	operation).
  * @NL80211_ATTR_CSA_IES: Nested set of attributes containing the IE information
  *	for the time while performing a channel switch.
- * @NL80211_ATTR_CSA_C_OFF_BEACON: An array of offsets (u16) to the channel
- *	switch counters in the beacons tail (%NL80211_ATTR_BEACON_TAIL).
- * @NL80211_ATTR_CSA_C_OFF_PRESP: An array of offsets (u16) to the channel
- *	switch counters in the probe response (%NL80211_ATTR_PROBE_RESP).
+ * @NL80211_ATTR_CNTDWN_OFFS_BEACON: An array of offsets (u16) to the channel
+ *	switch or color change counters in the beacons tail (%NL80211_ATTR_BEACON_TAIL).
+ * @NL80211_ATTR_CNTDWN_OFFS_PRESP: An array of offsets (u16) to the channel
+ *	switch or color change counters in the probe response (%NL80211_ATTR_PROBE_RESP).
  *
  * @NL80211_ATTR_RXMGMT_FLAGS: flags for nl80211_send_mgmt(), u32.
  *	As specified in the &enum nl80211_rxmgmt_flags.
@@ -2815,8 +2815,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_CH_SWITCH_COUNT,
 	NL80211_ATTR_CH_SWITCH_BLOCK_TX,
 	NL80211_ATTR_CSA_IES,
-	NL80211_ATTR_CSA_C_OFF_BEACON,
-	NL80211_ATTR_CSA_C_OFF_PRESP,
+	NL80211_ATTR_CNTDWN_OFFS_BEACON,
+	NL80211_ATTR_CNTDWN_OFFS_PRESP,
 
 	NL80211_ATTR_RXMGMT_FLAGS,
 
@@ -3003,6 +3003,8 @@ enum nl80211_attrs {
 #define	NL80211_ATTR_MESH_PARAMS NL80211_ATTR_MESH_CONFIG
 #define NL80211_ATTR_IFACE_SOCKET_OWNER NL80211_ATTR_SOCKET_OWNER
 #define NL80211_ATTR_SAE_DATA NL80211_ATTR_AUTH_DATA
+#define NL80211_ATTR_CSA_C_OFF_BEACON NL80211_ATTR_CNTDWN_OFFS_BEACON
+#define NL80211_ATTR_CSA_C_OFF_PRESP NL80211_ATTR_CNTDWN_OFFS_PRESP
 
 /*
  * Allow user space programs to use #ifdef on new attributes by defining them
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index da0f33c2d2d8..e640e65f3255 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -578,8 +578,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_CH_SWITCH_COUNT] = { .type = NLA_U32 },
 	[NL80211_ATTR_CH_SWITCH_BLOCK_TX] = { .type = NLA_FLAG },
 	[NL80211_ATTR_CSA_IES] = { .type = NLA_NESTED },
-	[NL80211_ATTR_CSA_C_OFF_BEACON] = { .type = NLA_BINARY },
-	[NL80211_ATTR_CSA_C_OFF_PRESP] = { .type = NLA_BINARY },
+	[NL80211_ATTR_CNTDWN_OFFS_BEACON] = { .type = NLA_BINARY },
+	[NL80211_ATTR_CNTDWN_OFFS_PRESP] = { .type = NLA_BINARY },
 	[NL80211_ATTR_STA_SUPPORTED_CHANNELS] = NLA_POLICY_MIN_LEN(2),
 	/*
 	 * The value of the Length field of the Supported Operating
@@ -8891,10 +8891,10 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
 	if (err)
 		return err;
 
-	if (!csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON])
+	if (!csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON])
 		return -EINVAL;
 
-	len = nla_len(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]);
+	len = nla_len(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON]);
 	if (!len || (len % sizeof(u16)))
 		return -EINVAL;
 
@@ -8905,7 +8905,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
 		return -EINVAL;
 
 	params.counter_offsets_beacon =
-		nla_data(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]);
+		nla_data(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON]);
 
 	/* sanity checks - counters should fit and be the same */
 	for (i = 0; i < params.n_counter_offsets_beacon; i++) {
@@ -8918,8 +8918,8 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
 			return -EINVAL;
 	}
 
-	if (csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]) {
-		len = nla_len(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]);
+	if (csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP]) {
+		len = nla_len(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP]);
 		if (!len || (len % sizeof(u16)))
 			return -EINVAL;
 
@@ -8930,7 +8930,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
 			return -EINVAL;
 
 		params.counter_offsets_presp =
-			nla_data(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]);
+			nla_data(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP]);
 
 		/* sanity checks - counters should fit and be the same */
 		for (i = 0; i < params.n_counter_offsets_presp; i++) {
-- 
cgit v1.2.3


From 5f9e2822d12fe5050da5db0e65924d5ddc86bf29 Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Thu, 20 Aug 2020 17:46:23 -0500
Subject: RDMA/rxe: Fix style warnings

Fixed several minor checkpatch warnings in existing rxe source.

Link: https://lore.kernel.org/r/20200820224638.3212-3-rpearson@hpe.com
Signed-off-by: Bob Pearson <rpearson@hpe.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_comp.c | 3 +--
 drivers/infiniband/sw/rxe/rxe_net.c  | 2 +-
 drivers/infiniband/sw/rxe/rxe_qp.c   | 3 +--
 drivers/infiniband/sw/rxe/rxe_task.h | 2 +-
 include/uapi/rdma/rdma_user_rxe.h    | 6 +++---
 5 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 4bc88708b355..8e28ebb42fce 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -690,9 +690,8 @@ int rxe_completer(void *arg)
 			 */
 
 			/* there is nothing to retry in this case */
-			if (!wqe || (wqe->state == wqe_state_posted)) {
+			if (!wqe || (wqe->state == wqe_state_posted))
 				goto exit;
-			}
 
 			/* if we've started a retry, don't start another
 			 * retry sequence, unless this is a timeout.
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 0c3808611f95..80abd417f2b9 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -120,7 +120,7 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev,
 	ndst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(recv_sockets.sk6->sk),
 					       recv_sockets.sk6->sk, &fl6,
 					       NULL);
-	if (unlikely(IS_ERR(ndst))) {
+	if (IS_ERR(ndst)) {
 		pr_err_ratelimited("no route to %pI6\n", daddr);
 		return NULL;
 	}
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index 6c11c3aeeca6..3562b3876101 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -628,9 +628,8 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
 	if (mask & IB_QP_QKEY)
 		qp->attr.qkey = attr->qkey;
 
-	if (mask & IB_QP_AV) {
+	if (mask & IB_QP_AV)
 		rxe_init_av(&attr->ah_attr, &qp->pri_av);
-	}
 
 	if (mask & IB_QP_ALT_PATH) {
 		rxe_init_av(&attr->alt_ah_attr, &qp->alt_av);
diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h
index 08ff42d451c6..66af2f92358b 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.h
+++ b/drivers/infiniband/sw/rxe/rxe_task.h
@@ -60,7 +60,7 @@ struct rxe_task {
 /*
  * init rxe_task structure
  *	arg  => parameter to pass to fcn
- *	fcn  => function to call until it returns != 0
+ *	func => function to call until it returns != 0
  */
 int rxe_init_task(void *obj, struct rxe_task *task,
 		  void *arg, int (*func)(void *), char *name);
diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h
index aae2e696bb38..d8f2e0e46dab 100644
--- a/include/uapi/rdma/rdma_user_rxe.h
+++ b/include/uapi/rdma/rdma_user_rxe.h
@@ -99,8 +99,8 @@ struct rxe_send_wr {
 				struct ib_mr *mr;
 				__aligned_u64 reserved;
 			};
-			__u32        key;
-			__u32        access;
+			__u32	     key;
+			__u32	     access;
 		} reg;
 	} wr;
 };
@@ -112,7 +112,7 @@ struct rxe_sge {
 };
 
 struct mminfo {
-	__aligned_u64  		offset;
+	__aligned_u64		offset;
 	__u32			size;
 	__u32			pad;
 };
-- 
cgit v1.2.3


From 2831a631022eed6e3f800f08892132c6edde652c Mon Sep 17 00:00:00 2001
From: Chung-Hsien Hsu <stanley.hsu@cypress.com>
Date: Mon, 17 Aug 2020 02:33:15 -0500
Subject: nl80211: support SAE authentication offload in AP mode

Let drivers advertise support for AP-mode SAE authentication offload
with a new NL80211_EXT_FEATURE_SAE_OFFLOAD_AP flag.

Signed-off-by: Chung-Hsien Hsu <stanley.hsu@cypress.com>
Signed-off-by: Chi-Hsien Lin <chi-hsien.lin@cypress.com>
Link: https://lore.kernel.org/r/20200817073316.33402-4-stanley.hsu@cypress.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 14 +++++++++++---
 net/wireless/nl80211.c       |  9 ++++++---
 2 files changed, 17 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index ec96d5fe0e05..0584e0d349f0 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -252,9 +252,13 @@
  * DOC: SAE authentication offload
  *
  * By setting @NL80211_EXT_FEATURE_SAE_OFFLOAD flag drivers can indicate they
- * support offloading SAE authentication for WPA3-Personal networks. In
- * %NL80211_CMD_CONNECT the password for SAE should be specified using
- * %NL80211_ATTR_SAE_PASSWORD.
+ * support offloading SAE authentication for WPA3-Personal networks in station
+ * mode. Similarly @NL80211_EXT_FEATURE_SAE_OFFLOAD_AP flag can be set by
+ * drivers indicating the offload support in AP mode.
+ *
+ * The password for SAE should be specified using %NL80211_ATTR_SAE_PASSWORD in
+ * %NL80211_CMD_CONNECT and %NL80211_CMD_START_AP for station and AP mode
+ * respectively.
  */
 
 /**
@@ -5845,6 +5849,9 @@ enum nl80211_feature_flags {
  *	handshake with PSK in AP mode (PSK is passed as part of the start AP
  *	command).
  *
+ * @NL80211_EXT_FEATURE_SAE_OFFLOAD_AP: Device wants to do SAE authentication
+ *	in AP mode (SAE password is passed as part of the start AP command).
+ *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
  */
@@ -5902,6 +5909,7 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_TX_STATUS,
 	NL80211_EXT_FEATURE_OPERATING_CHANNEL_VALIDATION,
 	NL80211_EXT_FEATURE_4WAY_HANDSHAKE_AP_PSK,
+	NL80211_EXT_FEATURE_SAE_OFFLOAD_AP,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index e640e65f3255..201d029687cc 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4960,8 +4960,9 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev,
 			return false;
 		return true;
 	case NL80211_CMD_START_AP:
-		/* SAE not supported yet */
-		if (auth_type == NL80211_AUTHTYPE_SAE)
+		if (!wiphy_ext_feature_isset(&rdev->wiphy,
+					     NL80211_EXT_FEATURE_SAE_OFFLOAD_AP) &&
+		    auth_type == NL80211_AUTHTYPE_SAE)
 			return false;
 		/* FILS not supported yet */
 		if (auth_type == NL80211_AUTHTYPE_FILS_SK ||
@@ -9552,7 +9553,9 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
 
 	if (info->attrs[NL80211_ATTR_SAE_PASSWORD]) {
 		if (!wiphy_ext_feature_isset(&rdev->wiphy,
-					     NL80211_EXT_FEATURE_SAE_OFFLOAD))
+					     NL80211_EXT_FEATURE_SAE_OFFLOAD) &&
+		    !wiphy_ext_feature_isset(&rdev->wiphy,
+					     NL80211_EXT_FEATURE_SAE_OFFLOAD_AP))
 			return -EINVAL;
 		settings->sae_pwd =
 			nla_data(info->attrs[NL80211_ATTR_SAE_PASSWORD]);
-- 
cgit v1.2.3


From 50aba46c234ea6ab3134cebb5ab27885f33a3e5d Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 27 Aug 2020 14:19:23 +0200
Subject: gtp: add notification mechanism

Like all other network functions, let's notify gtp context on creation and
deletion.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Tested-by: Gabriel Ganne <gabriel.ganne@6wind.com>
Acked-by: Harald Welte <laforge@gnumonks.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/gtp.c        | 58 ++++++++++++++++++++++++++++++++++++++++--------
 include/uapi/linux/gtp.h |  2 ++
 2 files changed, 51 insertions(+), 9 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 21640a035d7d..c84a10569388 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -928,8 +928,8 @@ static void ipv4_pdp_fill(struct pdp_ctx *pctx, struct genl_info *info)
 	}
 }
 
-static int gtp_pdp_add(struct gtp_dev *gtp, struct sock *sk,
-		       struct genl_info *info)
+static struct pdp_ctx *gtp_pdp_add(struct gtp_dev *gtp, struct sock *sk,
+				   struct genl_info *info)
 {
 	struct pdp_ctx *pctx, *pctx_tid = NULL;
 	struct net_device *dev = gtp->dev;
@@ -956,12 +956,12 @@ static int gtp_pdp_add(struct gtp_dev *gtp, struct sock *sk,
 
 	if (found) {
 		if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
-			return -EEXIST;
+			return ERR_PTR(-EEXIST);
 		if (info->nlhdr->nlmsg_flags & NLM_F_REPLACE)
-			return -EOPNOTSUPP;
+			return ERR_PTR(-EOPNOTSUPP);
 
 		if (pctx && pctx_tid)
-			return -EEXIST;
+			return ERR_PTR(-EEXIST);
 		if (!pctx)
 			pctx = pctx_tid;
 
@@ -974,13 +974,13 @@ static int gtp_pdp_add(struct gtp_dev *gtp, struct sock *sk,
 			netdev_dbg(dev, "GTPv1-U: update tunnel id = %x/%x (pdp %p)\n",
 				   pctx->u.v1.i_tei, pctx->u.v1.o_tei, pctx);
 
-		return 0;
+		return pctx;
 
 	}
 
 	pctx = kmalloc(sizeof(*pctx), GFP_ATOMIC);
 	if (pctx == NULL)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	sock_hold(sk);
 	pctx->sk = sk;
@@ -1018,7 +1018,7 @@ static int gtp_pdp_add(struct gtp_dev *gtp, struct sock *sk,
 		break;
 	}
 
-	return 0;
+	return pctx;
 }
 
 static void pdp_context_free(struct rcu_head *head)
@@ -1036,9 +1036,12 @@ static void pdp_context_delete(struct pdp_ctx *pctx)
 	call_rcu(&pctx->rcu_head, pdp_context_free);
 }
 
+static int gtp_tunnel_notify(struct pdp_ctx *pctx, u8 cmd);
+
 static int gtp_genl_new_pdp(struct sk_buff *skb, struct genl_info *info)
 {
 	unsigned int version;
+	struct pdp_ctx *pctx;
 	struct gtp_dev *gtp;
 	struct sock *sk;
 	int err;
@@ -1088,7 +1091,13 @@ static int gtp_genl_new_pdp(struct sk_buff *skb, struct genl_info *info)
 		goto out_unlock;
 	}
 
-	err = gtp_pdp_add(gtp, sk, info);
+	pctx = gtp_pdp_add(gtp, sk, info);
+	if (IS_ERR(pctx)) {
+		err = PTR_ERR(pctx);
+	} else {
+		gtp_tunnel_notify(pctx, GTP_CMD_NEWPDP);
+		err = 0;
+	}
 
 out_unlock:
 	rcu_read_unlock();
@@ -1159,6 +1168,7 @@ static int gtp_genl_del_pdp(struct sk_buff *skb, struct genl_info *info)
 		netdev_dbg(pctx->dev, "GTPv1-U: deleting tunnel id = %x/%x (pdp %p)\n",
 			   pctx->u.v1.i_tei, pctx->u.v1.o_tei, pctx);
 
+	gtp_tunnel_notify(pctx, GTP_CMD_DELPDP);
 	pdp_context_delete(pctx);
 
 out_unlock:
@@ -1168,6 +1178,14 @@ out_unlock:
 
 static struct genl_family gtp_genl_family;
 
+enum gtp_multicast_groups {
+	GTP_GENL_MCGRP,
+};
+
+static const struct genl_multicast_group gtp_genl_mcgrps[] = {
+	[GTP_GENL_MCGRP] = { .name = GTP_GENL_MCGRP_NAME },
+};
+
 static int gtp_genl_fill_info(struct sk_buff *skb, u32 snd_portid, u32 snd_seq,
 			      int flags, u32 type, struct pdp_ctx *pctx)
 {
@@ -1204,6 +1222,26 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
+static int gtp_tunnel_notify(struct pdp_ctx *pctx, u8 cmd)
+{
+	struct sk_buff *msg;
+	int ret;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	if (!msg)
+		return -ENOMEM;
+
+	ret = gtp_genl_fill_info(msg, 0, 0, 0, cmd, pctx);
+	if (ret < 0) {
+		nlmsg_free(msg);
+		return ret;
+	}
+
+	ret = genlmsg_multicast_netns(&gtp_genl_family, dev_net(pctx->dev), msg,
+				      0, GTP_GENL_MCGRP, GFP_ATOMIC);
+	return ret;
+}
+
 static int gtp_genl_get_pdp(struct sk_buff *skb, struct genl_info *info)
 {
 	struct pdp_ctx *pctx = NULL;
@@ -1334,6 +1372,8 @@ static struct genl_family gtp_genl_family __ro_after_init = {
 	.module		= THIS_MODULE,
 	.ops		= gtp_genl_ops,
 	.n_ops		= ARRAY_SIZE(gtp_genl_ops),
+	.mcgrps		= gtp_genl_mcgrps,
+	.n_mcgrps	= ARRAY_SIZE(gtp_genl_mcgrps),
 };
 
 static int __net_init gtp_net_init(struct net *net)
diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h
index c7d66755d212..79f9191bbb24 100644
--- a/include/uapi/linux/gtp.h
+++ b/include/uapi/linux/gtp.h
@@ -2,6 +2,8 @@
 #ifndef _UAPI_LINUX_GTP_H_
 #define _UAPI_LINUX_GTP_H_
 
+#define GTP_GENL_MCGRP_NAME	"gtp"
+
 enum gtp_genl_cmds {
 	GTP_CMD_NEWPDP,
 	GTP_CMD_DELPDP,
-- 
cgit v1.2.3


From dab741e0e02bd3c4f5e2e97be74b39df2523fc6e Mon Sep 17 00:00:00 2001
From: Mattias Nissler <mnissler@chromium.org>
Date: Thu, 27 Aug 2020 11:09:46 -0600
Subject: Add a "nosymfollow" mount option.

For mounts that have the new "nosymfollow" option, don't follow symlinks
when resolving paths. The new option is similar in spirit to the
existing "nodev", "noexec", and "nosuid" options, as well as to the
LOOKUP_NO_SYMLINKS resolve flag in the openat2(2) syscall. Various BSD
variants have been supporting the "nosymfollow" mount option for a long
time with equivalent implementations.

Note that symlinks may still be created on file systems mounted with
the "nosymfollow" option present. readlink() remains functional, so
user space code that is aware of symlinks can still choose to follow
them explicitly.

Setting the "nosymfollow" mount option helps prevent privileged
writers from modifying files unintentionally in case there is an
unexpected link along the accessed path. The "nosymfollow" option is
thus useful as a defensive measure for systems that need to deal with
untrusted file systems in privileged contexts.

More information on the history and motivation for this patch can be
found here:

https://sites.google.com/a/chromium.org/dev/chromium-os/chromiumos-design-docs/hardening-against-malicious-stateful-data#TOC-Restricting-symlink-traversal

Signed-off-by: Mattias Nissler <mnissler@chromium.org>
Signed-off-by: Ross Zwisler <zwisler@google.com>
Reviewed-by: Aleksa Sarai <cyphar@cyphar.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c                 | 3 ++-
 fs/namespace.c             | 2 ++
 fs/proc_namespace.c        | 1 +
 fs/statfs.c                | 2 ++
 include/linux/mount.h      | 3 ++-
 include/linux/statfs.h     | 1 +
 include/uapi/linux/mount.h | 1 +
 7 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/namei.c b/fs/namei.c
index e99e2a9da0f7..33e8c79bc761 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1626,7 +1626,8 @@ static const char *pick_link(struct nameidata *nd, struct path *link,
 			return ERR_PTR(error);
 	}
 
-	if (unlikely(nd->flags & LOOKUP_NO_SYMLINKS))
+	if (unlikely(nd->flags & LOOKUP_NO_SYMLINKS) ||
+			unlikely(link->mnt->mnt_flags & MNT_NOSYMFOLLOW))
 		return ERR_PTR(-ELOOP);
 
 	if (!(nd->flags & LOOKUP_RCU)) {
diff --git a/fs/namespace.c b/fs/namespace.c
index bae0e95b3713..6408788a649e 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3160,6 +3160,8 @@ int path_mount(const char *dev_name, struct path *path,
 		mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
 	if (flags & MS_RDONLY)
 		mnt_flags |= MNT_READONLY;
+	if (flags & MS_NOSYMFOLLOW)
+		mnt_flags |= MNT_NOSYMFOLLOW;
 
 	/* The default atime for remount is preservation */
 	if ((flags & MS_REMOUNT) &&
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 3059a9394c2d..e59d4bb3a89e 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -70,6 +70,7 @@ static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
 		{ MNT_NOATIME, ",noatime" },
 		{ MNT_NODIRATIME, ",nodiratime" },
 		{ MNT_RELATIME, ",relatime" },
+		{ MNT_NOSYMFOLLOW, ",nosymfollow" },
 		{ 0, NULL }
 	};
 	const struct proc_fs_opts *fs_infop;
diff --git a/fs/statfs.c b/fs/statfs.c
index 2616424012ea..59f33752c131 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -29,6 +29,8 @@ static int flags_by_mnt(int mnt_flags)
 		flags |= ST_NODIRATIME;
 	if (mnt_flags & MNT_RELATIME)
 		flags |= ST_RELATIME;
+	if (mnt_flags & MNT_NOSYMFOLLOW)
+		flags |= ST_NOSYMFOLLOW;
 	return flags;
 }
 
diff --git a/include/linux/mount.h b/include/linux/mount.h
index de657bd211fa..aaf343b38671 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -30,6 +30,7 @@ struct fs_context;
 #define MNT_NODIRATIME	0x10
 #define MNT_RELATIME	0x20
 #define MNT_READONLY	0x40	/* does the user want this to be r/o? */
+#define MNT_NOSYMFOLLOW	0x80
 
 #define MNT_SHRINKABLE	0x100
 #define MNT_WRITE_HOLD	0x200
@@ -46,7 +47,7 @@ struct fs_context;
 #define MNT_SHARED_MASK	(MNT_UNBINDABLE)
 #define MNT_USER_SETTABLE_MASK  (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \
 				 | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \
-				 | MNT_READONLY)
+				 | MNT_READONLY | MNT_NOSYMFOLLOW)
 #define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME )
 
 #define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
diff --git a/include/linux/statfs.h b/include/linux/statfs.h
index 9bc69edb8f18..fac4356ea1bf 100644
--- a/include/linux/statfs.h
+++ b/include/linux/statfs.h
@@ -40,6 +40,7 @@ struct kstatfs {
 #define ST_NOATIME	0x0400	/* do not update access times */
 #define ST_NODIRATIME	0x0800	/* do not update directory access times */
 #define ST_RELATIME	0x1000	/* update atime relative to mtime/ctime */
+#define ST_NOSYMFOLLOW	0x2000	/* do not follow symlinks */
 
 struct dentry;
 extern int vfs_get_fsid(struct dentry *dentry, __kernel_fsid_t *fsid);
diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
index 96a0240f23fe..dd8306ea336c 100644
--- a/include/uapi/linux/mount.h
+++ b/include/uapi/linux/mount.h
@@ -16,6 +16,7 @@
 #define MS_REMOUNT	32	/* Alter flags of a mounted FS */
 #define MS_MANDLOCK	64	/* Allow mandatory locks on an FS */
 #define MS_DIRSYNC	128	/* Directory modifications are synchronous */
+#define MS_NOSYMFOLLOW	256	/* Do not follow symlinks */
 #define MS_NOATIME	1024	/* Do not update access times. */
 #define MS_NODIRATIME	2048	/* Do not update directory access times */
 #define MS_BIND		4096
-- 
cgit v1.2.3


From b0c9eb37817943840a1a82dbc998c491609a0afd Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 27 Aug 2020 22:19:22 -0700
Subject: bpf: Make bpf_link_info.iter similar to bpf_iter_link_info

bpf_link_info.iter is used by link_query to return bpf_iter_link_info
to user space. Fields may be different, e.g., map_fd vs. map_id, so
we cannot reuse the exact structure. But make them similar, e.g.,

  struct bpf_link_info {
     /* common fields */
     union {
	struct { ... } raw_tracepoint;
	struct { ... } tracing;
	...
	struct {
	    /* common fields for iter */
	    union {
		struct {
		    __u32 map_id;
		} map;
		/* other structs for other targets */
	    };
	};
    };
 };

so the structure is extensible the same way as bpf_iter_link_info.

Fixes: 6b0a249a301e ("bpf: Implement link_query for bpf iterators")
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200828051922.758950-1-yhs@fb.com
---
 include/uapi/linux/bpf.h       | 6 ++++--
 tools/include/uapi/linux/bpf.h | 6 ++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0388bc0200b0..ef7af384f5ee 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4251,8 +4251,10 @@ struct bpf_link_info {
 			__aligned_u64 target_name; /* in/out: target_name buffer ptr */
 			__u32 target_name_len;	   /* in/out: target_name buffer len */
 			union {
-				__u32 map_id;
-			} map;
+				struct {
+					__u32 map_id;
+				} map;
+			};
 		} iter;
 		struct  {
 			__u32 netns_ino;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 0388bc0200b0..ef7af384f5ee 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4251,8 +4251,10 @@ struct bpf_link_info {
 			__aligned_u64 target_name; /* in/out: target_name buffer ptr */
 			__u32 target_name_len;	   /* in/out: target_name buffer len */
 			union {
-				__u32 map_id;
-			} map;
+				struct {
+					__u32 map_id;
+				} map;
+			};
 		} iter;
 		struct  {
 			__u32 netns_ino;
-- 
cgit v1.2.3


From 7a81575b806e5dab214025e6757362c62d946405 Mon Sep 17 00:00:00 2001
From: "Jose M. Guisado Gomez" <guigom@riseup.net>
Date: Thu, 20 Aug 2020 10:19:01 +0200
Subject: netfilter: nf_tables: add userdata attributes to nft_table

Enables storing userdata for nft_table. Field udata points to user data
and udlen store its length.

Adds new attribute flag NFTA_TABLE_USERDATA

Signed-off-by: Jose M. Guisado Gomez <guigom@riseup.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        |  2 ++
 include/uapi/linux/netfilter/nf_tables.h |  2 ++
 net/netfilter/nf_tables_api.c            | 22 +++++++++++++++++++++-
 3 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index bf9491b77d16..97a7e147a59a 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1080,6 +1080,8 @@ struct nft_table {
 					flags:8,
 					genmask:2;
 	char				*name;
+	u16				udlen;
+	u8				*udata;
 };
 
 void nft_register_chain_type(const struct nft_chain_type *);
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 42f351c1f5c5..aeb88cbd303e 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -172,6 +172,7 @@ enum nft_table_flags {
  * @NFTA_TABLE_NAME: name of the table (NLA_STRING)
  * @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32)
  * @NFTA_TABLE_USE: number of chains in this table (NLA_U32)
+ * @NFTA_TABLE_USERDATA: user data (NLA_BINARY)
  */
 enum nft_table_attributes {
 	NFTA_TABLE_UNSPEC,
@@ -180,6 +181,7 @@ enum nft_table_attributes {
 	NFTA_TABLE_USE,
 	NFTA_TABLE_HANDLE,
 	NFTA_TABLE_PAD,
+	NFTA_TABLE_USERDATA,
 	__NFTA_TABLE_MAX
 };
 #define NFTA_TABLE_MAX		(__NFTA_TABLE_MAX - 1)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index fd814e514f94..6ccce2a2e715 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -650,6 +650,8 @@ static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
 				    .len = NFT_TABLE_MAXNAMELEN - 1 },
 	[NFTA_TABLE_FLAGS]	= { .type = NLA_U32 },
 	[NFTA_TABLE_HANDLE]	= { .type = NLA_U64 },
+	[NFTA_TABLE_USERDATA]	= { .type = NLA_BINARY,
+				    .len = NFT_USERDATA_MAXLEN }
 };
 
 static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
@@ -676,6 +678,11 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
 			 NFTA_TABLE_PAD))
 		goto nla_put_failure;
 
+	if (table->udata) {
+		if (nla_put(skb, NFTA_TABLE_USERDATA, table->udlen, table->udata))
+			goto nla_put_failure;
+	}
+
 	nlmsg_end(skb, nlh);
 	return 0;
 
@@ -977,8 +984,9 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 	int family = nfmsg->nfgen_family;
 	const struct nlattr *attr;
 	struct nft_table *table;
-	u32 flags = 0;
 	struct nft_ctx ctx;
+	u32 flags = 0;
+	u16 udlen = 0;
 	int err;
 
 	lockdep_assert_held(&net->nft.commit_mutex);
@@ -1014,6 +1022,16 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 	if (table->name == NULL)
 		goto err_strdup;
 
+	if (nla[NFTA_TABLE_USERDATA]) {
+		udlen = nla_len(nla[NFTA_TABLE_USERDATA]);
+		table->udata = kzalloc(udlen, GFP_KERNEL);
+		if (table->udata == NULL)
+			goto err_table_udata;
+
+		nla_memcpy(table->udata, nla[NFTA_TABLE_USERDATA], udlen);
+		table->udlen = udlen;
+	}
+
 	err = rhltable_init(&table->chains_ht, &nft_chain_ht_params);
 	if (err)
 		goto err_chain_ht;
@@ -1036,6 +1054,8 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 err_trans:
 	rhltable_destroy(&table->chains_ht);
 err_chain_ht:
+	kfree(table->udata);
+err_table_udata:
 	kfree(table->name);
 err_strdup:
 	kfree(table);
-- 
cgit v1.2.3


From 4afc41dfa5a716e9e7a90c22972583f337c0bcbf Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 26 Aug 2020 00:52:43 +0200
Subject: netfilter: conntrack: remove ignore stats

This counter increments when nf_conntrack_in sees a packet that already
has a conntrack attached or when the packet is marked as UNTRACKED.
Neither is an error.

The former is normal for loopback traffic.  The second happens for
certain ICMPv6 packets or when nftables/ip(6)tables rules are in place.

In case someone needs to count UNTRACKED packets, or packets
that are marked as untracked before conntrack_in this can be done with
both nftables and ip(6)tables rules.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_common.h      | 1 -
 include/uapi/linux/netfilter/nfnetlink_conntrack.h | 2 +-
 net/netfilter/nf_conntrack_core.c                  | 4 +---
 net/netfilter/nf_conntrack_netlink.c               | 1 -
 net/netfilter/nf_conntrack_standalone.c            | 2 +-
 5 files changed, 3 insertions(+), 7 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 1db83c931d9c..96b90d7e361f 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -8,7 +8,6 @@
 struct ip_conntrack_stat {
 	unsigned int found;
 	unsigned int invalid;
-	unsigned int ignore;
 	unsigned int insert;
 	unsigned int insert_failed;
 	unsigned int drop;
diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
index 262881792671..3e471558da82 100644
--- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
@@ -247,7 +247,7 @@ enum ctattr_stats_cpu {
 	CTA_STATS_FOUND,
 	CTA_STATS_NEW,		/* no longer used */
 	CTA_STATS_INVALID,
-	CTA_STATS_IGNORE,
+	CTA_STATS_IGNORE,	/* no longer used */
 	CTA_STATS_DELETE,	/* no longer used */
 	CTA_STATS_DELETE_LIST,	/* no longer used */
 	CTA_STATS_INSERT,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3cfbafdff941..a111bcf1b93c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1800,10 +1800,8 @@ nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state)
 	if (tmpl || ctinfo == IP_CT_UNTRACKED) {
 		/* Previously seen (loopback or untracked)?  Ignore. */
 		if ((tmpl && !nf_ct_is_template(tmpl)) ||
-		     ctinfo == IP_CT_UNTRACKED) {
-			NF_CT_STAT_INC_ATOMIC(state->net, ignore);
+		     ctinfo == IP_CT_UNTRACKED)
 			return NF_ACCEPT;
-		}
 		skb->_nfct = 0;
 	}
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 832eabecfbdd..c64f23a8f373 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2509,7 +2509,6 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
 
 	if (nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) ||
 	    nla_put_be32(skb, CTA_STATS_INVALID, htonl(st->invalid)) ||
-	    nla_put_be32(skb, CTA_STATS_IGNORE, htonl(st->ignore)) ||
 	    nla_put_be32(skb, CTA_STATS_INSERT, htonl(st->insert)) ||
 	    nla_put_be32(skb, CTA_STATS_INSERT_FAILED,
 				htonl(st->insert_failed)) ||
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index a604f43e3e6b..b673a03624d2 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -439,7 +439,7 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 		   st->found,
 		   0,
 		   st->invalid,
-		   st->ignore,
+		   0,
 		   0,
 		   0,
 		   st->insert,
-- 
cgit v1.2.3


From bc92470413f3af152db0d8f90ef3eb13f8cc417a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 26 Aug 2020 00:52:44 +0200
Subject: netfilter: conntrack: add clash resolution stat counter

There is a misconception about what "insert_failed" means.

We increment this even when a clash got resolved, so it might not indicate
a problem.

Add a dedicated counter for clash resolution and only increment
insert_failed if a clash cannot be resolved.

For the old /proc interface, export this in place of an older stat
that got removed a while back.
For ctnetlink, export this with a new attribute.

Also correct an outdated comment that implies we add a duplicate tuple --
we only add the (unique) reply direction.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_common.h      | 1 +
 include/uapi/linux/netfilter/nfnetlink_conntrack.h | 1 +
 net/netfilter/nf_conntrack_core.c                  | 9 +++++----
 net/netfilter/nf_conntrack_netlink.c               | 4 +++-
 net/netfilter/nf_conntrack_standalone.c            | 2 +-
 5 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 96b90d7e361f..0c7d8d1e945d 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -10,6 +10,7 @@ struct ip_conntrack_stat {
 	unsigned int invalid;
 	unsigned int insert;
 	unsigned int insert_failed;
+	unsigned int clash_resolve;
 	unsigned int drop;
 	unsigned int early_drop;
 	unsigned int error;
diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
index 3e471558da82..d8484be72fdc 100644
--- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
@@ -256,6 +256,7 @@ enum ctattr_stats_cpu {
 	CTA_STATS_EARLY_DROP,
 	CTA_STATS_ERROR,
 	CTA_STATS_SEARCH_RESTART,
+	CTA_STATS_CLASH_RESOLVE,
 	__CTA_STATS_MAX,
 };
 #define CTA_STATS_MAX (__CTA_STATS_MAX - 1)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index a111bcf1b93c..93e77ca0efad 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -859,7 +859,6 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
 
 out:
 	nf_conntrack_double_unlock(hash, reply_hash);
-	NF_CT_STAT_INC(net, insert_failed);
 	local_bh_enable();
 	return -EEXIST;
 }
@@ -934,7 +933,7 @@ static int __nf_ct_resolve_clash(struct sk_buff *skb,
 		nf_conntrack_put(&loser_ct->ct_general);
 		nf_ct_set(skb, ct, ctinfo);
 
-		NF_CT_STAT_INC(net, insert_failed);
+		NF_CT_STAT_INC(net, clash_resolve);
 		return NF_ACCEPT;
 	}
 
@@ -998,6 +997,8 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx)
 
 	hlist_nulls_add_head_rcu(&loser_ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
 				 &nf_conntrack_hash[repl_idx]);
+
+	NF_CT_STAT_INC(net, clash_resolve);
 	return NF_ACCEPT;
 }
 
@@ -1027,10 +1028,10 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx)
  *
  * Failing that, the new, unconfirmed conntrack is still added to the table
  * provided that the collision only occurs in the ORIGINAL direction.
- * The new entry will be added after the existing one in the hash list,
+ * The new entry will be added only in the non-clashing REPLY direction,
  * so packets in the ORIGINAL direction will continue to match the existing
  * entry.  The new entry will also have a fixed timeout so it expires --
- * due to the collision, it will not see bidirectional traffic.
+ * due to the collision, it will only see reply traffic.
  *
  * Returns NF_DROP if the clash could not be resolved.
  */
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index c64f23a8f373..89d99f6dfd0a 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2516,7 +2516,9 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
 	    nla_put_be32(skb, CTA_STATS_EARLY_DROP, htonl(st->early_drop)) ||
 	    nla_put_be32(skb, CTA_STATS_ERROR, htonl(st->error)) ||
 	    nla_put_be32(skb, CTA_STATS_SEARCH_RESTART,
-				htonl(st->search_restart)))
+				htonl(st->search_restart)) ||
+	    nla_put_be32(skb, CTA_STATS_CLASH_RESOLVE,
+				htonl(st->clash_resolve)))
 		goto nla_put_failure;
 
 	nlmsg_end(skb, nlh);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index b673a03624d2..0ff39740797d 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -435,7 +435,7 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 	seq_printf(seq, "%08x  %08x %08x %08x %08x %08x %08x %08x "
 			"%08x %08x %08x %08x %08x  %08x %08x %08x %08x\n",
 		   nr_conntracks,
-		   0,
+		   st->clash_resolve, /* was: searched */
 		   st->found,
 		   0,
 		   st->invalid,
-- 
cgit v1.2.3


From 1e6c62a8821557720a9b2ea9617359b264f2f67c Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 27 Aug 2020 15:01:11 -0700
Subject: bpf: Introduce sleepable BPF programs

Introduce sleepable BPF programs that can request such property for themselves
via BPF_F_SLEEPABLE flag at program load time. In such case they will be able
to use helpers like bpf_copy_from_user() that might sleep. At present only
fentry/fexit/fmod_ret and lsm programs can request to be sleepable and only
when they are attached to kernel functions that are known to allow sleeping.

The non-sleepable programs are relying on implicit rcu_read_lock() and
migrate_disable() to protect life time of programs, maps that they use and
per-cpu kernel structures used to pass info between bpf programs and the
kernel. The sleepable programs cannot be enclosed into rcu_read_lock().
migrate_disable() maps to preempt_disable() in non-RT kernels, so the progs
should not be enclosed in migrate_disable() as well. Therefore
rcu_read_lock_trace is used to protect the life time of sleepable progs.

There are many networking and tracing program types. In many cases the
'struct bpf_prog *' pointer itself is rcu protected within some other kernel
data structure and the kernel code is using rcu_dereference() to load that
program pointer and call BPF_PROG_RUN() on it. All these cases are not touched.
Instead sleepable bpf programs are allowed with bpf trampoline only. The
program pointers are hard-coded into generated assembly of bpf trampoline and
synchronize_rcu_tasks_trace() is used to protect the life time of the program.
The same trampoline can hold both sleepable and non-sleepable progs.

When rcu_read_lock_trace is held it means that some sleepable bpf program is
running from bpf trampoline. Those programs can use bpf arrays and preallocated
hash/lru maps. These map types are waiting on programs to complete via
synchronize_rcu_tasks_trace();

Updates to trampoline now has to do synchronize_rcu_tasks_trace() and
synchronize_rcu_tasks() to wait for sleepable progs to finish and for
trampoline assembly to finish.

This is the first step of introducing sleepable progs. Eventually dynamically
allocated hash maps can be allowed and networking program types can become
sleepable too.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: KP Singh <kpsingh@google.com>
Link: https://lore.kernel.org/bpf/20200827220114.69225-3-alexei.starovoitov@gmail.com
---
 arch/x86/net/bpf_jit_comp.c    | 32 +++++++++++------
 include/linux/bpf.h            |  3 ++
 include/uapi/linux/bpf.h       |  8 +++++
 init/Kconfig                   |  1 +
 kernel/bpf/arraymap.c          |  1 +
 kernel/bpf/hashtab.c           | 12 +++----
 kernel/bpf/syscall.c           | 13 +++++--
 kernel/bpf/trampoline.c        | 28 +++++++++++++--
 kernel/bpf/verifier.c          | 81 ++++++++++++++++++++++++++++++++++++++++--
 tools/include/uapi/linux/bpf.h |  8 +++++
 10 files changed, 162 insertions(+), 25 deletions(-)

(limited to 'include/uapi')

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 42b6709e6dc7..7d9ea7b41c71 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1379,10 +1379,15 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
 	u8 *prog = *pprog;
 	int cnt = 0;
 
-	if (emit_call(&prog, __bpf_prog_enter, prog))
-		return -EINVAL;
-	/* remember prog start time returned by __bpf_prog_enter */
-	emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
+	if (p->aux->sleepable) {
+		if (emit_call(&prog, __bpf_prog_enter_sleepable, prog))
+			return -EINVAL;
+	} else {
+		if (emit_call(&prog, __bpf_prog_enter, prog))
+			return -EINVAL;
+		/* remember prog start time returned by __bpf_prog_enter */
+		emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
+	}
 
 	/* arg1: lea rdi, [rbp - stack_size] */
 	EMIT4(0x48, 0x8D, 0x7D, -stack_size);
@@ -1402,13 +1407,18 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
 	if (mod_ret)
 		emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
 
-	/* arg1: mov rdi, progs[i] */
-	emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32,
-		       (u32) (long) p);
-	/* arg2: mov rsi, rbx <- start time in nsec */
-	emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
-	if (emit_call(&prog, __bpf_prog_exit, prog))
-		return -EINVAL;
+	if (p->aux->sleepable) {
+		if (emit_call(&prog, __bpf_prog_exit_sleepable, prog))
+			return -EINVAL;
+	} else {
+		/* arg1: mov rdi, progs[i] */
+		emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32,
+			       (u32) (long) p);
+		/* arg2: mov rsi, rbx <- start time in nsec */
+		emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
+		if (emit_call(&prog, __bpf_prog_exit, prog))
+			return -EINVAL;
+	}
 
 	*pprog = prog;
 	return 0;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index dbba82a80087..4dd7e927621d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -539,6 +539,8 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
 /* these two functions are called from generated trampoline */
 u64 notrace __bpf_prog_enter(void);
 void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
+void notrace __bpf_prog_enter_sleepable(void);
+void notrace __bpf_prog_exit_sleepable(void);
 
 struct bpf_ksym {
 	unsigned long		 start;
@@ -734,6 +736,7 @@ struct bpf_prog_aux {
 	bool offload_requested;
 	bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
 	bool func_proto_unreliable;
+	bool sleepable;
 	enum bpf_tramp_prog_type trampoline_prog_type;
 	struct bpf_trampoline *trampoline;
 	struct hlist_node tramp_hlist;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ef7af384f5ee..6e8b706aeb05 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -346,6 +346,14 @@ enum bpf_link_type {
 /* The verifier internal test flag. Behavior is undefined */
 #define BPF_F_TEST_STATE_FREQ	(1U << 3)
 
+/* If BPF_F_SLEEPABLE is used in BPF_PROG_LOAD command, the verifier will
+ * restrict map and helper usage for such programs. Sleepable BPF programs can
+ * only be attached to hooks where kernel execution context allows sleeping.
+ * Such programs are allowed to use helpers that may sleep like
+ * bpf_copy_from_user().
+ */
+#define BPF_F_SLEEPABLE		(1U << 4)
+
 /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
  * two extensions:
  *
diff --git a/init/Kconfig b/init/Kconfig
index fc10f7ede5f6..6ecc00e130ff 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1691,6 +1691,7 @@ config BPF_SYSCALL
 	bool "Enable bpf() system call"
 	select BPF
 	select IRQ_WORK
+	select TASKS_TRACE_RCU
 	default n
 	help
 	  Enable the bpf() system call that allows to manipulate eBPF
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index d851ebbcf302..e046fb7d17cd 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -10,6 +10,7 @@
 #include <linux/filter.h>
 #include <linux/perf_event.h>
 #include <uapi/linux/btf.h>
+#include <linux/rcupdate_trace.h>
 
 #include "map_in_map.h"
 
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index ad80f45774e7..fe0e06284d33 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -9,6 +9,7 @@
 #include <linux/rculist_nulls.h>
 #include <linux/random.h>
 #include <uapi/linux/btf.h>
+#include <linux/rcupdate_trace.h>
 #include "percpu_freelist.h"
 #include "bpf_lru_list.h"
 #include "map_in_map.h"
@@ -577,8 +578,7 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
 	struct htab_elem *l;
 	u32 hash, key_size;
 
-	/* Must be called with rcu_read_lock. */
-	WARN_ON_ONCE(!rcu_read_lock_held());
+	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
 
 	key_size = map->key_size;
 
@@ -941,7 +941,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 		/* unknown flags */
 		return -EINVAL;
 
-	WARN_ON_ONCE(!rcu_read_lock_held());
+	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
 
 	key_size = map->key_size;
 
@@ -1032,7 +1032,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
 		/* unknown flags */
 		return -EINVAL;
 
-	WARN_ON_ONCE(!rcu_read_lock_held());
+	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
 
 	key_size = map->key_size;
 
@@ -1220,7 +1220,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
 	u32 hash, key_size;
 	int ret = -ENOENT;
 
-	WARN_ON_ONCE(!rcu_read_lock_held());
+	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
 
 	key_size = map->key_size;
 
@@ -1252,7 +1252,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
 	u32 hash, key_size;
 	int ret = -ENOENT;
 
-	WARN_ON_ONCE(!rcu_read_lock_held());
+	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
 
 	key_size = map->key_size;
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b86b1155b748..4108ef3b828b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -29,6 +29,7 @@
 #include <linux/bpf_lsm.h>
 #include <linux/poll.h>
 #include <linux/bpf-netns.h>
+#include <linux/rcupdate_trace.h>
 
 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
 			  (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
@@ -1731,10 +1732,14 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
 	btf_put(prog->aux->btf);
 	bpf_prog_free_linfo(prog);
 
-	if (deferred)
-		call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
-	else
+	if (deferred) {
+		if (prog->aux->sleepable)
+			call_rcu_tasks_trace(&prog->aux->rcu, __bpf_prog_put_rcu);
+		else
+			call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
+	} else {
 		__bpf_prog_put_rcu(&prog->aux->rcu);
+	}
 }
 
 static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
@@ -2104,6 +2109,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 	if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT |
 				 BPF_F_ANY_ALIGNMENT |
 				 BPF_F_TEST_STATE_FREQ |
+				 BPF_F_SLEEPABLE |
 				 BPF_F_TEST_RND_HI32))
 		return -EINVAL;
 
@@ -2159,6 +2165,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 	}
 
 	prog->aux->offload_requested = !!attr->prog_ifindex;
+	prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
 
 	err = security_bpf_prog_alloc(prog->aux);
 	if (err)
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 9be85aa4ec5f..c2b76545153c 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -7,6 +7,8 @@
 #include <linux/rbtree_latch.h>
 #include <linux/perf_event.h>
 #include <linux/btf.h>
+#include <linux/rcupdate_trace.h>
+#include <linux/rcupdate_wait.h>
 
 /* dummy _ops. The verifier will operate on target program's ops. */
 const struct bpf_verifier_ops bpf_extension_verifier_ops = {
@@ -210,9 +212,12 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
 	 * updates to trampoline would change the code from underneath the
 	 * preempted task. Hence wait for tasks to voluntarily schedule or go
 	 * to userspace.
+	 * The same trampoline can hold both sleepable and non-sleepable progs.
+	 * synchronize_rcu_tasks_trace() is needed to make sure all sleepable
+	 * programs finish executing.
+	 * Wait for these two grace periods together.
 	 */
-
-	synchronize_rcu_tasks();
+	synchronize_rcu_mult(call_rcu_tasks, call_rcu_tasks_trace);
 
 	err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2,
 					  &tr->func.model, flags, tprogs,
@@ -344,7 +349,14 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
 	if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
 		goto out;
 	bpf_image_ksym_del(&tr->ksym);
-	/* wait for tasks to get out of trampoline before freeing it */
+	/* This code will be executed when all bpf progs (both sleepable and
+	 * non-sleepable) went through
+	 * bpf_prog_put()->call_rcu[_tasks_trace]()->bpf_prog_free_deferred().
+	 * Hence no need for another synchronize_rcu_tasks_trace() here,
+	 * but synchronize_rcu_tasks() is still needed, since trampoline
+	 * may not have had any sleepable programs and we need to wait
+	 * for tasks to get out of trampoline code before freeing it.
+	 */
 	synchronize_rcu_tasks();
 	bpf_jit_free_exec(tr->image);
 	hlist_del(&tr->hlist);
@@ -394,6 +406,16 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
 	rcu_read_unlock();
 }
 
+void notrace __bpf_prog_enter_sleepable(void)
+{
+	rcu_read_lock_trace();
+}
+
+void notrace __bpf_prog_exit_sleepable(void)
+{
+	rcu_read_unlock_trace();
+}
+
 int __weak
 arch_prepare_bpf_trampoline(void *image, void *image_end,
 			    const struct btf_func_model *m, u32 flags,
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6f5a9f51cc03..3ebfdb7bd427 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -21,6 +21,7 @@
 #include <linux/ctype.h>
 #include <linux/error-injection.h>
 #include <linux/bpf_lsm.h>
+#include <linux/btf_ids.h>
 
 #include "disasm.h"
 
@@ -9367,6 +9368,23 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
 		return -EINVAL;
 	}
 
+	if (prog->aux->sleepable)
+		switch (map->map_type) {
+		case BPF_MAP_TYPE_HASH:
+		case BPF_MAP_TYPE_LRU_HASH:
+		case BPF_MAP_TYPE_ARRAY:
+			if (!is_preallocated_map(map)) {
+				verbose(env,
+					"Sleepable programs can only use preallocated hash maps\n");
+				return -EINVAL;
+			}
+			break;
+		default:
+			verbose(env,
+				"Sleepable programs can only use array and hash maps\n");
+			return -EINVAL;
+		}
+
 	return 0;
 }
 
@@ -10985,6 +11003,36 @@ static int check_attach_modify_return(struct bpf_prog *prog, unsigned long addr)
 	return -EINVAL;
 }
 
+/* non exhaustive list of sleepable bpf_lsm_*() functions */
+BTF_SET_START(btf_sleepable_lsm_hooks)
+#ifdef CONFIG_BPF_LSM
+BTF_ID(func, bpf_lsm_file_mprotect)
+BTF_ID(func, bpf_lsm_bprm_committed_creds)
+#endif
+BTF_SET_END(btf_sleepable_lsm_hooks)
+
+static int check_sleepable_lsm_hook(u32 btf_id)
+{
+	return btf_id_set_contains(&btf_sleepable_lsm_hooks, btf_id);
+}
+
+/* list of non-sleepable functions that are otherwise on
+ * ALLOW_ERROR_INJECTION list
+ */
+BTF_SET_START(btf_non_sleepable_error_inject)
+/* Three functions below can be called from sleepable and non-sleepable context.
+ * Assume non-sleepable from bpf safety point of view.
+ */
+BTF_ID(func, __add_to_page_cache_locked)
+BTF_ID(func, should_fail_alloc_page)
+BTF_ID(func, should_failslab)
+BTF_SET_END(btf_non_sleepable_error_inject)
+
+static int check_non_sleepable_error_inject(u32 btf_id)
+{
+	return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
+}
+
 static int check_attach_btf_id(struct bpf_verifier_env *env)
 {
 	struct bpf_prog *prog = env->prog;
@@ -11002,6 +11050,12 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
 	long addr;
 	u64 key;
 
+	if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
+	    prog->type != BPF_PROG_TYPE_LSM) {
+		verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n");
+		return -EINVAL;
+	}
+
 	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
 		return check_struct_ops_btf_id(env);
 
@@ -11210,13 +11264,36 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
 			}
 		}
 
-		if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
+		if (prog->aux->sleepable) {
+			ret = -EINVAL;
+			switch (prog->type) {
+			case BPF_PROG_TYPE_TRACING:
+				/* fentry/fexit/fmod_ret progs can be sleepable only if they are
+				 * attached to ALLOW_ERROR_INJECTION and are not in denylist.
+				 */
+				if (!check_non_sleepable_error_inject(btf_id) &&
+				    within_error_injection_list(addr))
+					ret = 0;
+				break;
+			case BPF_PROG_TYPE_LSM:
+				/* LSM progs check that they are attached to bpf_lsm_*() funcs.
+				 * Only some of them are sleepable.
+				 */
+				if (check_sleepable_lsm_hook(btf_id))
+					ret = 0;
+				break;
+			default:
+				break;
+			}
+			if (ret)
+				verbose(env, "%s is not sleepable\n",
+					prog->aux->attach_func_name);
+		} else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
 			ret = check_attach_modify_return(prog, addr);
 			if (ret)
 				verbose(env, "%s() is not modifiable\n",
 					prog->aux->attach_func_name);
 		}
-
 		if (ret)
 			goto out;
 		tr->func.addr = (void *)addr;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index ef7af384f5ee..6e8b706aeb05 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -346,6 +346,14 @@ enum bpf_link_type {
 /* The verifier internal test flag. Behavior is undefined */
 #define BPF_F_TEST_STATE_FREQ	(1U << 3)
 
+/* If BPF_F_SLEEPABLE is used in BPF_PROG_LOAD command, the verifier will
+ * restrict map and helper usage for such programs. Sleepable BPF programs can
+ * only be attached to hooks where kernel execution context allows sleeping.
+ * Such programs are allowed to use helpers that may sleep like
+ * bpf_copy_from_user().
+ */
+#define BPF_F_SLEEPABLE		(1U << 4)
+
 /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
  * two extensions:
  *
-- 
cgit v1.2.3


From 07be4c4a3e7a0db148e44b16c5190e753d1c8569 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 27 Aug 2020 15:01:12 -0700
Subject: bpf: Add bpf_copy_from_user() helper.

Sleepable BPF programs can now use copy_from_user() to access user memory.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: KP Singh <kpsingh@google.com>
Link: https://lore.kernel.org/bpf/20200827220114.69225-4-alexei.starovoitov@gmail.com
---
 include/linux/bpf.h            |  1 +
 include/uapi/linux/bpf.h       |  8 ++++++++
 kernel/bpf/helpers.c           | 22 ++++++++++++++++++++++
 kernel/trace/bpf_trace.c       |  2 ++
 tools/include/uapi/linux/bpf.h |  8 ++++++++
 5 files changed, 41 insertions(+)

(limited to 'include/uapi')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 4dd7e927621d..c6d9f2c444f4 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1784,6 +1784,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
+extern const struct bpf_func_proto bpf_copy_from_user_proto;
 
 const struct bpf_func_proto *bpf_tracing_func_proto(
 	enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6e8b706aeb05..a613750d5515 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3569,6 +3569,13 @@ union bpf_attr {
  *		On success, the strictly positive length of the string,
  *		including the trailing NUL character. On error, a negative
  *		value.
+ *
+ * long bpf_copy_from_user(void *dst, u32 size, const void *user_ptr)
+ * 	Description
+ * 		Read *size* bytes from user space address *user_ptr* and store
+ * 		the data in *dst*. This is a wrapper of copy_from_user().
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3719,6 +3726,7 @@ union bpf_attr {
 	FN(inode_storage_get),		\
 	FN(inode_storage_delete),	\
 	FN(d_path),			\
+	FN(copy_from_user),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index be43ab3e619f..5cc7425ee476 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -601,6 +601,28 @@ const struct bpf_func_proto bpf_event_output_data_proto =  {
 	.arg5_type      = ARG_CONST_SIZE_OR_ZERO,
 };
 
+BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size,
+	   const void __user *, user_ptr)
+{
+	int ret = copy_from_user(dst, user_ptr, size);
+
+	if (unlikely(ret)) {
+		memset(dst, 0, size);
+		ret = -EFAULT;
+	}
+
+	return ret;
+}
+
+const struct bpf_func_proto bpf_copy_from_user_proto = {
+	.func		= bpf_copy_from_user,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
+	.arg3_type	= ARG_ANYTHING,
+};
+
 const struct bpf_func_proto bpf_get_current_task_proto __weak;
 const struct bpf_func_proto bpf_probe_read_user_proto __weak;
 const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index d973d891f2e2..b2a5380eb187 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1228,6 +1228,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_jiffies64_proto;
 	case BPF_FUNC_get_task_stack:
 		return &bpf_get_task_stack_proto;
+	case BPF_FUNC_copy_from_user:
+		return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
 	default:
 		return NULL;
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 6e8b706aeb05..a613750d5515 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3569,6 +3569,13 @@ union bpf_attr {
  *		On success, the strictly positive length of the string,
  *		including the trailing NUL character. On error, a negative
  *		value.
+ *
+ * long bpf_copy_from_user(void *dst, u32 size, const void *user_ptr)
+ * 	Description
+ * 		Read *size* bytes from user space address *user_ptr* and store
+ * 		the data in *dst*. This is a wrapper of copy_from_user().
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3719,6 +3726,7 @@ union bpf_attr {
 	FN(inode_storage_get),		\
 	FN(inode_storage_delete),	\
 	FN(d_path),			\
+	FN(copy_from_user),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 67407a406db337acdaabecd3747d160d89a929e4 Mon Sep 17 00:00:00 2001
From: Balazs Scheidler <bazsi77@gmail.com>
Date: Sat, 29 Aug 2020 08:19:15 +0200
Subject: netfilter: nft_socket: add wildcard support

Add NFT_SOCKET_WILDCARD to match to wildcard socket listener.

Signed-off-by: Balazs Scheidler <bazsi77@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |  2 ++
 net/netfilter/nft_socket.c               | 27 +++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index aeb88cbd303e..543dc697b796 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1010,10 +1010,12 @@ enum nft_socket_attributes {
  *
  * @NFT_SOCKET_TRANSPARENT: Value of the IP(V6)_TRANSPARENT socket option
  * @NFT_SOCKET_MARK: Value of the socket mark
+ * @NFT_SOCKET_WILDCARD: Whether the socket is zero-bound (e.g. 0.0.0.0 or ::0)
  */
 enum nft_socket_keys {
 	NFT_SOCKET_TRANSPARENT,
 	NFT_SOCKET_MARK,
+	NFT_SOCKET_WILDCARD,
 	__NFT_SOCKET_MAX
 };
 #define NFT_SOCKET_MAX	(__NFT_SOCKET_MAX - 1)
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index 637ce3e8c575..a28aca5124ce 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -14,6 +14,25 @@ struct nft_socket {
 	};
 };
 
+static void nft_socket_wildcard(const struct nft_pktinfo *pkt,
+				struct nft_regs *regs, struct sock *sk,
+				u32 *dest)
+{
+	switch (nft_pf(pkt)) {
+	case NFPROTO_IPV4:
+		nft_reg_store8(dest, inet_sk(sk)->inet_rcv_saddr == 0);
+		break;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+	case NFPROTO_IPV6:
+		nft_reg_store8(dest, ipv6_addr_any(&sk->sk_v6_rcv_saddr));
+		break;
+#endif
+	default:
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+}
+
 static void nft_socket_eval(const struct nft_expr *expr,
 			    struct nft_regs *regs,
 			    const struct nft_pktinfo *pkt)
@@ -59,6 +78,13 @@ static void nft_socket_eval(const struct nft_expr *expr,
 			return;
 		}
 		break;
+	case NFT_SOCKET_WILDCARD:
+		if (!sk_fullsock(sk)) {
+			regs->verdict.code = NFT_BREAK;
+			return;
+		}
+		nft_socket_wildcard(pkt, regs, sk, dest);
+		break;
 	default:
 		WARN_ON(1);
 		regs->verdict.code = NFT_BREAK;
@@ -97,6 +123,7 @@ static int nft_socket_init(const struct nft_ctx *ctx,
 	priv->key = ntohl(nla_get_u32(tb[NFTA_SOCKET_KEY]));
 	switch(priv->key) {
 	case NFT_SOCKET_TRANSPARENT:
+	case NFT_SOCKET_WILDCARD:
 		len = sizeof(u8);
 		break;
 	case NFT_SOCKET_MARK:
-- 
cgit v1.2.3


From 55977744f9d862512a524fea93fc5226b09e76a9 Mon Sep 17 00:00:00 2001
From: Mukul Joshi <mukul.joshi@amd.com>
Date: Fri, 28 Aug 2020 18:50:42 -0400
Subject: drm/amdkfd: Add GPU reset SMI event

Add support for reporting GPU reset events through SMI. KFD
would report both pre and post GPU reset events.

Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c     |  4 ++++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h       |  2 ++
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 34 +++++++++++++++++++++++++++--
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h |  1 +
 include/uapi/linux/kfd_ioctl.h              |  2 ++
 5 files changed, 41 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index e1cd6599529f..0e71a0543f98 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -812,6 +812,8 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd)
 	if (!kfd->init_complete)
 		return 0;
 
+	kfd_smi_event_update_gpu_reset(kfd, false);
+
 	kfd->dqm->ops.pre_reset(kfd->dqm);
 
 	kgd2kfd_suspend(kfd, false);
@@ -840,6 +842,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
 
 	atomic_set(&kfd->sram_ecc_flag, 0);
 
+	kfd_smi_event_update_gpu_reset(kfd, true);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index f14beb93acb4..023629f28495 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -312,6 +312,8 @@ struct kfd_dev {
 	/* Clients watching SMI events */
 	struct list_head smi_clients;
 	spinlock_t smi_lock;
+
+	uint32_t reset_seq_num;
 };
 
 enum kfd_mempool {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index 4d4b6e3ab697..17d1736367ea 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -174,6 +174,36 @@ static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event,
 	rcu_read_unlock();
 }
 
+void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
+{
+	/*
+	 * GpuReset msg = Reset seq number (incremented for
+	 * every reset message sent before GPU reset).
+	 * 1 byte event + 1 byte space + 8 bytes seq num +
+	 * 1 byte \n + 1 byte \0 = 12
+	 */
+	char fifo_in[12];
+	int len;
+	unsigned int event;
+
+	if (list_empty(&dev->smi_clients))
+		return;
+
+	memset(fifo_in, 0x0, sizeof(fifo_in));
+
+	if (post_reset) {
+		event = KFD_SMI_EVENT_GPU_POST_RESET;
+	} else {
+		event = KFD_SMI_EVENT_GPU_PRE_RESET;
+		++(dev->reset_seq_num);
+	}
+
+	len = snprintf(fifo_in, sizeof(fifo_in), "%x %x\n", event,
+						dev->reset_seq_num);
+
+	add_event_to_kfifo(dev, event, fifo_in, len);
+}
+
 void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
 					     uint32_t throttle_bitmask)
 {
@@ -191,7 +221,7 @@ void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
 	if (list_empty(&dev->smi_clients))
 		return;
 
-	len = snprintf(fifo_in, 29, "%x %x:%llx\n",
+	len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%llx\n",
 		       KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
 		       atomic64_read(&adev->smu.throttle_int_counter));
 
@@ -218,7 +248,7 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
 	if (!task_info.pid)
 		return;
 
-	len = snprintf(fifo_in, 29, "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
+	len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
 		task_info.pid, task_info.task_name);
 
 	add_event_to_kfifo(dev, KFD_SMI_EVENT_VMFAULT, fifo_in, len);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
index 15537b2cccb5..b9b0438202e2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
@@ -27,5 +27,6 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd);
 void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
 void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
 					     uint32_t throttle_bitmask);
+void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset);
 
 #endif
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index cb1f963a84e0..8b7368bfbd84 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -453,6 +453,8 @@ enum kfd_smi_event {
         KFD_SMI_EVENT_NONE = 0, /* not used */
         KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */
         KFD_SMI_EVENT_THERMAL_THROTTLE = 2,
+	KFD_SMI_EVENT_GPU_PRE_RESET = 3,
+	KFD_SMI_EVENT_GPU_POST_RESET = 4,
 };
 
 #define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1))
-- 
cgit v1.2.3


From 5dc1a0bcb758c343b873e8330ee986417f5a1727 Mon Sep 17 00:00:00 2001
From: Mukul Joshi <mukul.joshi@amd.com>
Date: Fri, 28 Aug 2020 19:53:08 -0400
Subject: include/uapi/linux: Fix indentation in kfd_smi_event enum

Replace spaces with Tabs to fix indentation in kfd_smi_event
enum.

Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/linux/kfd_ioctl.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 8b7368bfbd84..695b606da4b1 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -450,9 +450,9 @@ struct kfd_ioctl_import_dmabuf_args {
  * KFD SMI(System Management Interface) events
  */
 enum kfd_smi_event {
-        KFD_SMI_EVENT_NONE = 0, /* not used */
-        KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */
-        KFD_SMI_EVENT_THERMAL_THROTTLE = 2,
+	KFD_SMI_EVENT_NONE = 0, /* not used */
+	KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */
+	KFD_SMI_EVENT_THERMAL_THROTTLE = 2,
 	KFD_SMI_EVENT_GPU_PRE_RESET = 3,
 	KFD_SMI_EVENT_GPU_POST_RESET = 4,
 };
-- 
cgit v1.2.3


From 4ad1b0d410c88c7c8e8fd1298c9d2293b651e35c Mon Sep 17 00:00:00 2001
From: Maheshwar Ajja <majja@codeaurora.org>
Date: Sat, 23 May 2020 03:05:26 +0200
Subject: media: v4l2-ctrls: Add encoder constant quality control

When V4L2_CID_MPEG_VIDEO_BITRATE_MODE value is
V4L2_MPEG_VIDEO_BITRATE_MODE_CQ, encoder will produce
constant quality output indicated by
V4L2_CID_MPEG_VIDEO_CONSTANT_QUALITY control value.
Encoder will choose appropriate quantization parameter
and bitrate to produce requested frame quality level.

Signed-off-by: Maheshwar Ajja <majja@codeaurora.org>
Reviewed-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Stanimir Varbanov <stanimir.varbanov@linaro.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst | 10 ++++++++++
 drivers/media/v4l2-core/v4l2-ctrls.c                      |  2 ++
 include/uapi/linux/v4l2-controls.h                        |  2 ++
 3 files changed, 14 insertions(+)

(limited to 'include/uapi')

diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
index e2b94b1d0ab0..ebd367dcf8b9 100644
--- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
+++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
@@ -581,6 +581,8 @@ enum v4l2_mpeg_video_bitrate_mode -
       - Variable bitrate
     * - ``V4L2_MPEG_VIDEO_BITRATE_MODE_CBR``
       - Constant bitrate
+    * - ``V4L2_MPEG_VIDEO_BITRATE_MODE_CQ``
+      - Constant quality
 
 
@@ -592,6 +594,14 @@ enum v4l2_mpeg_video_bitrate_mode -
     the average video bitrate. It is ignored if the video bitrate mode
     is set to constant bitrate.
 
+``V4L2_CID_MPEG_VIDEO_CONSTANT_QUALITY (integer)``
+    Constant quality level control. This control is applicable when
+    ``V4L2_CID_MPEG_VIDEO_BITRATE_MODE`` value is
+    ``V4L2_MPEG_VIDEO_BITRATE_MODE_CQ``. Valid range is 1 to 100
+    where 1 indicates lowest quality and 100 indicates highest quality.
+    Encoder will decide the appropriate quantization parameter and
+    bitrate to produce requested frame quality.
+
 ``V4L2_CID_MPEG_VIDEO_TEMPORAL_DECIMATION (integer)``
     For every captured frame, skip this many subsequent frames (default
     0).
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index b846f5b089c9..83372789eec3 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -200,6 +200,7 @@ const char * const *v4l2_ctrl_get_menu(u32 id)
 	static const char * const mpeg_video_bitrate_mode[] = {
 		"Variable Bitrate",
 		"Constant Bitrate",
+		"Constant Quality",
 		NULL
 	};
 	static const char * const mpeg_stream_type[] = {
@@ -832,6 +833,7 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_MPEG_VIDEO_GOP_CLOSURE:	return "Video GOP Closure";
 	case V4L2_CID_MPEG_VIDEO_PULLDOWN:	return "Video Pulldown";
 	case V4L2_CID_MPEG_VIDEO_BITRATE_MODE:	return "Video Bitrate Mode";
+	case V4L2_CID_MPEG_VIDEO_CONSTANT_QUALITY:	return "Constant Quality";
 	case V4L2_CID_MPEG_VIDEO_BITRATE:	return "Video Bitrate";
 	case V4L2_CID_MPEG_VIDEO_BITRATE_PEAK:	return "Video Peak Bitrate";
 	case V4L2_CID_MPEG_VIDEO_TEMPORAL_DECIMATION: return "Video Temporal Decimation";
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 62271418c1be..0f7e4388dcce 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -375,6 +375,7 @@ enum v4l2_mpeg_video_aspect {
 enum v4l2_mpeg_video_bitrate_mode {
 	V4L2_MPEG_VIDEO_BITRATE_MODE_VBR = 0,
 	V4L2_MPEG_VIDEO_BITRATE_MODE_CBR = 1,
+	V4L2_MPEG_VIDEO_BITRATE_MODE_CQ  = 2,
 };
 #define V4L2_CID_MPEG_VIDEO_BITRATE		(V4L2_CID_MPEG_BASE+207)
 #define V4L2_CID_MPEG_VIDEO_BITRATE_PEAK	(V4L2_CID_MPEG_BASE+208)
@@ -742,6 +743,7 @@ enum v4l2_cid_mpeg_video_hevc_size_of_length_field {
 #define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L6_BR	(V4L2_CID_MPEG_BASE + 642)
 #define V4L2_CID_MPEG_VIDEO_REF_NUMBER_FOR_PFRAMES	(V4L2_CID_MPEG_BASE + 643)
 #define V4L2_CID_MPEG_VIDEO_PREPEND_SPSPPS_TO_IDR	(V4L2_CID_MPEG_BASE + 644)
+#define V4L2_CID_MPEG_VIDEO_CONSTANT_QUALITY		(V4L2_CID_MPEG_BASE + 645)
 
 /*  MPEG-class control IDs specific to the CX2341x driver as defined by V4L2 */
 #define V4L2_CID_MPEG_CX2341X_BASE				(V4L2_CTRL_CLASS_MPEG | 0x1000)
-- 
cgit v1.2.3


From 44f5b2fffc3213c919f53adddadb1a05519bdc0e Mon Sep 17 00:00:00 2001
From: Stanimir Varbanov <stanimir.varbanov@linaro.org>
Date: Sun, 5 Jul 2020 01:41:00 +0200
Subject: media: v4l2-ctrl: Add frame-skip std encoder control

Adds encoders standard v4l2 control for frame-skip. The control
is a copy of a custom encoder control so that other v4l2 encoder
drivers can use it.

Signed-off-by: Stanimir Varbanov <stanimir.varbanov@linaro.org>
Reviewed-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../userspace-api/media/v4l/ext-ctrls-codec.rst    | 38 ++++++++++++++++++++++
 drivers/media/v4l2-core/v4l2-ctrls.c               | 10 ++++++
 include/uapi/linux/v4l2-controls.h                 |  6 ++++
 3 files changed, 54 insertions(+)

(limited to 'include/uapi')

diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
index ebd367dcf8b9..750a6d4fadaf 100644
--- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
+++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
@@ -602,6 +602,40 @@ enum v4l2_mpeg_video_bitrate_mode -
     Encoder will decide the appropriate quantization parameter and
     bitrate to produce requested frame quality.
 
+
+``V4L2_CID_MPEG_VIDEO_FRAME_SKIP_MODE (enum)``
+
+enum v4l2_mpeg_video_frame_skip_mode -
+    Indicates in what conditions the encoder should skip frames. If
+    encoding a frame would cause the encoded stream to be larger then a
+    chosen data limit then the frame will be skipped. Possible values
+    are:
+
+
+.. tabularcolumns:: |p{9.2cm}|p{8.3cm}|
+
+.. raw:: latex
+
+    \small
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    * - ``V4L2_MPEG_FRAME_SKIP_MODE_DISABLED``
+      - Frame skip mode is disabled.
+    * - ``V4L2_MPEG_FRAME_SKIP_MODE_LEVEL_LIMIT``
+      - Frame skip mode enabled and buffer limit is set by the chosen
+        level and is defined by the standard.
+    * - ``V4L2_MPEG_FRAME_SKIP_MODE_BUF_LIMIT``
+      - Frame skip mode enabled and buffer limit is set by the
+        :ref:`VBV (MPEG1/2/4) <v4l2-mpeg-video-vbv-size>` or
+        :ref:`CPB (H264) buffer size <v4l2-mpeg-video-h264-cpb-size>` control.
+
+.. raw:: latex
+
+    \normalsize
+
 ``V4L2_CID_MPEG_VIDEO_TEMPORAL_DECIMATION (integer)``
     For every captured frame, skip this many subsequent frames (default
     0).
@@ -1173,6 +1207,8 @@ enum v4l2_mpeg_video_h264_entropy_mode -
     Quantization parameter for an B frame for MPEG4. Valid range: from 1
     to 31.
 
+.. _v4l2-mpeg-video-vbv-size:
+
 ``V4L2_CID_MPEG_VIDEO_VBV_SIZE (integer)``
     The Video Buffer Verifier size in kilobytes, it is used as a
     limitation of frame skip. The VBV is defined in the standard as a
@@ -1210,6 +1246,8 @@ enum v4l2_mpeg_video_h264_entropy_mode -
     Force a key frame for the next queued buffer. Applicable to
     encoders. This is a general, codec-agnostic keyframe control.
 
+.. _v4l2-mpeg-video-h264-cpb-size:
+
 ``V4L2_CID_MPEG_VIDEO_H264_CPB_SIZE (integer)``
     The Coded Picture Buffer size in kilobytes, it is used as a
     limitation of frame skip. The CPB is defined in the H264 standard as
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 83372789eec3..c138914f507b 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -591,6 +591,12 @@ const char * const *v4l2_ctrl_get_menu(u32 id)
 		"External",
 		NULL,
 	};
+	static const char * const mpeg_video_frame_skip[] = {
+		"Disabled",
+		"Level Limit",
+		"VBV/CPB Limit",
+		NULL,
+	};
 
 	switch (id) {
 	case V4L2_CID_MPEG_AUDIO_SAMPLING_FREQ:
@@ -652,6 +658,8 @@ const char * const *v4l2_ctrl_get_menu(u32 id)
 		return flash_strobe_source;
 	case V4L2_CID_MPEG_VIDEO_HEADER_MODE:
 		return header_mode;
+	case V4L2_CID_MPEG_VIDEO_FRAME_SKIP_MODE:
+		return mpeg_video_frame_skip;
 	case V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MODE:
 		return multi_slice;
 	case V4L2_CID_MPEG_VIDEO_H264_ENTROPY_MODE:
@@ -846,6 +854,7 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_MPEG_VIDEO_MB_RC_ENABLE:			return "H264 MB Level Rate Control";
 	case V4L2_CID_MPEG_VIDEO_HEADER_MODE:			return "Sequence Header Mode";
 	case V4L2_CID_MPEG_VIDEO_MAX_REF_PIC:			return "Max Number of Reference Pics";
+	case V4L2_CID_MPEG_VIDEO_FRAME_SKIP_MODE:		return "Frame Skip Mode";
 	case V4L2_CID_MPEG_VIDEO_H263_I_FRAME_QP:		return "H263 I-Frame QP Value";
 	case V4L2_CID_MPEG_VIDEO_H263_P_FRAME_QP:		return "H263 P-Frame QP Value";
 	case V4L2_CID_MPEG_VIDEO_H263_B_FRAME_QP:		return "H263 B-Frame QP Value";
@@ -1268,6 +1277,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_FLASH_LED_MODE:
 	case V4L2_CID_FLASH_STROBE_SOURCE:
 	case V4L2_CID_MPEG_VIDEO_HEADER_MODE:
+	case V4L2_CID_MPEG_VIDEO_FRAME_SKIP_MODE:
 	case V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MODE:
 	case V4L2_CID_MPEG_VIDEO_H264_ENTROPY_MODE:
 	case V4L2_CID_MPEG_VIDEO_H264_LEVEL:
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 0f7e4388dcce..053827cda8e6 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -744,6 +744,12 @@ enum v4l2_cid_mpeg_video_hevc_size_of_length_field {
 #define V4L2_CID_MPEG_VIDEO_REF_NUMBER_FOR_PFRAMES	(V4L2_CID_MPEG_BASE + 643)
 #define V4L2_CID_MPEG_VIDEO_PREPEND_SPSPPS_TO_IDR	(V4L2_CID_MPEG_BASE + 644)
 #define V4L2_CID_MPEG_VIDEO_CONSTANT_QUALITY		(V4L2_CID_MPEG_BASE + 645)
+#define V4L2_CID_MPEG_VIDEO_FRAME_SKIP_MODE		(V4L2_CID_MPEG_BASE + 646)
+enum v4l2_mpeg_video_frame_skip_mode {
+	V4L2_MPEG_VIDEO_FRAME_SKIP_MODE_DISABLED	= 0,
+	V4L2_MPEG_VIDEO_FRAME_SKIP_MODE_LEVEL_LIMIT	= 1,
+	V4L2_MPEG_VIDEO_FRAME_SKIP_MODE_BUF_LIMIT	= 2,
+};
 
 /*  MPEG-class control IDs specific to the CX2341x driver as defined by V4L2 */
 #define V4L2_CID_MPEG_CX2341X_BASE				(V4L2_CTRL_CLASS_MPEG | 0x1000)
-- 
cgit v1.2.3


From 9d3a39a5f1e45827b008fff1ee9cf3cac3409665 Mon Sep 17 00:00:00 2001
From: Khazhismel Kumykov <khazhy@google.com>
Date: Mon, 24 Aug 2020 15:10:34 -0700
Subject: block: grant IOPRIO_CLASS_RT to CAP_SYS_NICE

CAP_SYS_ADMIN is too broad, and ionice fits into CAP_SYS_NICE's grouping.

Retain CAP_SYS_ADMIN permission for backwards compatibility.

Signed-off-by: Khazhismel Kumykov <khazhy@google.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Acked-by: Serge Hallyn <serge@hallyn.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/ioprio.c                  | 2 +-
 include/uapi/linux/capability.h | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/block/ioprio.c b/block/ioprio.c
index 04ebd37966f1..364d2294ba90 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -69,7 +69,7 @@ int ioprio_check_cap(int ioprio)
 
 	switch (class) {
 		case IOPRIO_CLASS_RT:
-			if (!capable(CAP_SYS_ADMIN))
+			if (!capable(CAP_SYS_NICE) && !capable(CAP_SYS_ADMIN))
 				return -EPERM;
 			fallthrough;
 			/* rt has prio field too */
diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h
index 395dd0df8d08..c6ca33034147 100644
--- a/include/uapi/linux/capability.h
+++ b/include/uapi/linux/capability.h
@@ -288,6 +288,8 @@ struct vfs_ns_cap_data {
    processes and setting the scheduling algorithm used by another
    process. */
 /* Allow setting cpu affinity on other processes */
+/* Allow setting realtime ioprio class */
+/* Allow setting ioprio class on other processes */
 
 #define CAP_SYS_NICE         23
 
-- 
cgit v1.2.3


From c1077616142907bb6ee987ecd136d6857ffd8787 Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Tue, 1 Sep 2020 15:10:08 -0700
Subject: ip: expose inet sockopts through inet_diag

Expose all exisiting inet sockopt bits through inet_diag for debug purpose.
Corresponding changes in iproute2 ss will be submitted to output all
these values.

Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h      |  2 ++
 include/uapi/linux/inet_diag.h | 18 ++++++++++++++++++
 net/ipv4/inet_diag.c           | 17 +++++++++++++++++
 3 files changed, 37 insertions(+)

(limited to 'include/uapi')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 0ef2d800fda7..84abb30a3fbb 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -75,6 +75,8 @@ static inline size_t inet_diag_msg_attrs_size(void)
 #ifdef CONFIG_SOCK_CGROUP_DATA
 		+ nla_total_size_64bit(sizeof(u64))  /* INET_DIAG_CGROUP_ID */
 #endif
+		+ nla_total_size(sizeof(struct inet_diag_sockopt))
+						     /* INET_DIAG_SOCKOPT */
 		;
 }
 int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index 5ba122c1949a..20ee93f0f876 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -160,6 +160,7 @@ enum {
 	INET_DIAG_ULP_INFO,
 	INET_DIAG_SK_BPF_STORAGES,
 	INET_DIAG_CGROUP_ID,
+	INET_DIAG_SOCKOPT,
 	__INET_DIAG_MAX,
 };
 
@@ -183,6 +184,23 @@ struct inet_diag_meminfo {
 	__u32	idiag_tmem;
 };
 
+/* INET_DIAG_SOCKOPT */
+
+struct inet_diag_sockopt {
+	__u8	recverr:1,
+		is_icsk:1,
+		freebind:1,
+		hdrincl:1,
+		mc_loop:1,
+		transparent:1,
+		mc_all:1,
+		nodefrag:1;
+	__u8	bind_address_no_port:1,
+		recverr_rfc4884:1,
+		defer_connect:1,
+		unused:5;
+};
+
 /* INET_DIAG_VEGASINFO */
 
 struct tcpvegas_info {
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 4a98dd736270..93816d47e55a 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -125,6 +125,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
 			     bool net_admin)
 {
 	const struct inet_sock *inet = inet_sk(sk);
+	struct inet_diag_sockopt inet_sockopt;
 
 	if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown))
 		goto errout;
@@ -180,6 +181,22 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
 	r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
 	r->idiag_inode = sock_i_ino(sk);
 
+	memset(&inet_sockopt, 0, sizeof(inet_sockopt));
+	inet_sockopt.recverr	= inet->recverr;
+	inet_sockopt.is_icsk	= inet->is_icsk;
+	inet_sockopt.freebind	= inet->freebind;
+	inet_sockopt.hdrincl	= inet->hdrincl;
+	inet_sockopt.mc_loop	= inet->mc_loop;
+	inet_sockopt.transparent = inet->transparent;
+	inet_sockopt.mc_all	= inet->mc_all;
+	inet_sockopt.nodefrag	= inet->nodefrag;
+	inet_sockopt.bind_address_no_port = inet->bind_address_no_port;
+	inet_sockopt.recverr_rfc4884 = inet->recverr_rfc4884;
+	inet_sockopt.defer_connect = inet->defer_connect;
+	if (nla_put(skb, INET_DIAG_SOCKOPT, sizeof(inet_sockopt),
+		    &inet_sockopt))
+		goto errout;
+
 	return 0;
 errout:
 	return 1;
-- 
cgit v1.2.3


From d1c6c4a9fd3da5c735386b0cdb44d79667f10a1b Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
Date: Wed, 2 Sep 2020 17:07:55 +0300
Subject: ASoC: SOF: support topology components on secondary cores

Currently SOF supports running pipelines on secondary DSP cores in a
limited way. This patch represents the next step in SOF multi-core DSP
support, it adds checks for core ID to individual topology components.
It takes care to power up all the requested cores. More advanced DSP
core power management should be added in the future.

Signed-off-by: Pan Xiuli <xiuli.pan@linux.intel.com>
Signed-off-by: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Signed-off-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Link: https://lore.kernel.org/r/20200902140756.1427005-3-kai.vehmanen@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/uapi/sound/sof/tokens.h |   1 +
 sound/soc/sof/pm.c              |   1 +
 sound/soc/sof/sof-audio.c       |  25 ++++++++
 sound/soc/sof/sof-audio.h       |   5 ++
 sound/soc/sof/topology.c        | 128 +++++++++++++++++++++++++++++-----------
 5 files changed, 126 insertions(+), 34 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/sound/sof/tokens.h b/include/uapi/sound/sof/tokens.h
index 5941e2eb1588..37f5aaa09c2b 100644
--- a/include/uapi/sound/sof/tokens.h
+++ b/include/uapi/sound/sof/tokens.h
@@ -73,6 +73,7 @@
 /* Token retired with ABI 3.2, do not use for new capabilities
  * #define SOF_TKN_COMP_PRELOAD_COUNT		403
  */
+#define SOF_TKN_COMP_CORE_ID			404
 
 /* SSP */
 #define SOF_TKN_INTEL_SSP_CLKS_CONTROL		500
diff --git a/sound/soc/sof/pm.c b/sound/soc/sof/pm.c
index 92e5f9b15f3a..a5f7c7f024a1 100644
--- a/sound/soc/sof/pm.c
+++ b/sound/soc/sof/pm.c
@@ -256,6 +256,7 @@ suspend:
 
 	/* reset FW state */
 	sdev->fw_state = SOF_FW_BOOT_NOT_STARTED;
+	sdev->enabled_cores_mask = 0;
 
 	return ret;
 }
diff --git a/sound/soc/sof/sof-audio.c b/sound/soc/sof/sof-audio.c
index 33d84405cf9c..d05f99cd7919 100644
--- a/sound/soc/sof/sof-audio.c
+++ b/sound/soc/sof/sof-audio.c
@@ -142,6 +142,22 @@ static int sof_restore_kcontrols(struct device *dev)
 	return 0;
 }
 
+const struct sof_ipc_pipe_new *snd_sof_pipeline_find(struct snd_sof_dev *sdev,
+						     int pipeline_id)
+{
+	const struct snd_sof_widget *swidget;
+
+	list_for_each_entry(swidget, &sdev->widget_list, list)
+		if (swidget->id == snd_soc_dapm_scheduler) {
+			const struct sof_ipc_pipe_new *pipeline =
+				swidget->private;
+			if (pipeline->pipeline_id == pipeline_id)
+				return pipeline;
+		}
+
+	return NULL;
+}
+
 int sof_restore_pipelines(struct device *dev)
 {
 	struct snd_sof_dev *sdev = dev_get_drvdata(dev);
@@ -161,6 +177,15 @@ int sof_restore_pipelines(struct device *dev)
 		if (!swidget->private)
 			continue;
 
+		ret = sof_pipeline_core_enable(sdev, swidget);
+		if (ret < 0) {
+			dev_err(dev,
+				"error: failed to enable target core: %d\n",
+				ret);
+
+			return ret;
+		}
+
 		switch (swidget->id) {
 		case snd_soc_dapm_dai_in:
 		case snd_soc_dapm_dai_out:
diff --git a/sound/soc/sof/sof-audio.h b/sound/soc/sof/sof-audio.h
index 9629994fe463..7f8be8817e69 100644
--- a/sound/soc/sof/sof-audio.h
+++ b/sound/soc/sof/sof-audio.h
@@ -83,6 +83,7 @@ struct snd_sof_widget {
 	int comp_id;
 	int pipeline_id;
 	int complete;
+	int core;
 	int id;
 
 	struct snd_soc_dapm_widget *widget;
@@ -151,6 +152,8 @@ int snd_sof_complete_pipeline(struct device *dev,
 int sof_load_pipeline_ipc(struct device *dev,
 			  struct sof_ipc_pipe_new *pipeline,
 			  struct sof_ipc_comp_reply *r);
+int sof_pipeline_core_enable(struct snd_sof_dev *sdev,
+			     const struct snd_sof_widget *swidget);
 
 /*
  * Stream IPC
@@ -190,6 +193,8 @@ struct snd_sof_pcm *snd_sof_find_spcm_comp(struct snd_soc_component *scomp,
 					   int *direction);
 struct snd_sof_pcm *snd_sof_find_spcm_pcm_id(struct snd_soc_component *scomp,
 					     unsigned int pcm_id);
+const struct sof_ipc_pipe_new *snd_sof_pipeline_find(struct snd_sof_dev *sdev,
+						     int pipeline_id);
 void snd_sof_pcm_period_elapsed(struct snd_pcm_substream *substream);
 void snd_sof_pcm_period_elapsed_work(struct work_struct *work);
 
diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c
index d41df9337328..46468fb7b6d1 100644
--- a/sound/soc/sof/topology.c
+++ b/sound/soc/sof/topology.c
@@ -8,6 +8,9 @@
 // Author: Liam Girdwood <liam.r.girdwood@linux.intel.com>
 //
 
+#include <linux/bits.h>
+#include <linux/device.h>
+#include <linux/errno.h>
 #include <linux/firmware.h>
 #include <linux/workqueue.h>
 #include <sound/tlv.h>
@@ -715,6 +718,13 @@ static const struct sof_topology_token sai_tokens[] = {
 		offsetof(struct sof_ipc_dai_sai_params, mclk_id), 0},
 };
 
+/* Core tokens */
+static const struct sof_topology_token core_tokens[] = {
+	{SOF_TKN_COMP_CORE_ID,
+		SND_SOC_TPLG_TUPLE_TYPE_WORD, get_token_u32,
+		offsetof(struct sof_ipc_comp, core), 0},
+};
+
 /*
  * DMIC PDM Tokens
  * SOF_TKN_INTEL_DMIC_PDM_CTRL_ID should be the first token
@@ -1278,6 +1288,65 @@ static int sof_control_unload(struct snd_soc_component *scomp,
  * DAI Topology
  */
 
+/* Static DSP core power management so far, should be extended in the future */
+static int sof_core_enable(struct snd_sof_dev *sdev, int core)
+{
+	struct sof_ipc_pm_core_config pm_core_config = {
+		.hdr = {
+			.cmd = SOF_IPC_GLB_PM_MSG | SOF_IPC_PM_CORE_ENABLE,
+			.size = sizeof(pm_core_config),
+		},
+		.enable_mask = sdev->enabled_cores_mask | BIT(core),
+	};
+	int ret;
+
+	if (sdev->enabled_cores_mask & BIT(core))
+		return 0;
+
+	/* power up the core */
+	ret = snd_sof_dsp_core_power_up(sdev, BIT(core));
+	if (ret < 0) {
+		dev_err(sdev->dev, "error: %d powering up core %d\n",
+			ret, core);
+		return ret;
+	}
+
+	/* update enabled cores mask */
+	sdev->enabled_cores_mask |= BIT(core);
+
+	/* Now notify DSP that the core has been powered up */
+	ret = sof_ipc_tx_message(sdev->ipc, pm_core_config.hdr.cmd,
+				 &pm_core_config, sizeof(pm_core_config),
+				 &pm_core_config, sizeof(pm_core_config));
+	if (ret < 0)
+		dev_err(sdev->dev, "error: core %d enable ipc failure %d\n",
+			core, ret);
+
+	return ret;
+}
+
+int sof_pipeline_core_enable(struct snd_sof_dev *sdev,
+			     const struct snd_sof_widget *swidget)
+{
+	const struct sof_ipc_pipe_new *pipeline;
+	int ret;
+
+	if (swidget->id == snd_soc_dapm_scheduler) {
+		pipeline = swidget->private;
+	} else {
+		pipeline = snd_sof_pipeline_find(sdev, swidget->pipeline_id);
+		if (!pipeline)
+			return -ENOENT;
+	}
+
+	/* First enable the pipeline core */
+	ret = sof_core_enable(sdev, pipeline->core);
+	if (ret < 0)
+		return ret;
+
+	return sof_core_enable(sdev, swidget->core);
+}
+
 static int sof_connect_dai_widget(struct snd_soc_component *scomp,
 				  struct snd_soc_dapm_widget *w,
 				  struct snd_soc_tplg_dapm_widget *tw,
@@ -1553,44 +1622,15 @@ int sof_load_pipeline_ipc(struct device *dev,
 			  struct sof_ipc_comp_reply *r)
 {
 	struct snd_sof_dev *sdev = dev_get_drvdata(dev);
-	struct sof_ipc_pm_core_config pm_core_config;
-	int ret;
+	int ret = sof_core_enable(sdev, pipeline->core);
 
-	ret = sof_ipc_tx_message(sdev->ipc, pipeline->hdr.cmd, pipeline,
-				 sizeof(*pipeline), r, sizeof(*r));
-	if (ret < 0) {
-		dev_err(dev, "error: load pipeline ipc failure\n");
-		return ret;
-	}
-
-	/* power up the core that this pipeline is scheduled on */
-	ret = snd_sof_dsp_core_power_up(sdev, 1 << pipeline->core);
-	if (ret < 0) {
-		dev_err(dev, "error: powering up pipeline schedule core %d\n",
-			pipeline->core);
+	if (ret < 0)
 		return ret;
-	}
 
-	/* update enabled cores mask */
-	sdev->enabled_cores_mask |= 1 << pipeline->core;
-
-	/*
-	 * Now notify DSP that the core that this pipeline is scheduled on
-	 * has been powered up
-	 */
-	memset(&pm_core_config, 0, sizeof(pm_core_config));
-	pm_core_config.enable_mask = sdev->enabled_cores_mask;
-
-	/* configure CORE_ENABLE ipc message */
-	pm_core_config.hdr.size = sizeof(pm_core_config);
-	pm_core_config.hdr.cmd = SOF_IPC_GLB_PM_MSG | SOF_IPC_PM_CORE_ENABLE;
-
-	/* send ipc */
-	ret = sof_ipc_tx_message(sdev->ipc, pm_core_config.hdr.cmd,
-				 &pm_core_config, sizeof(pm_core_config),
-				 &pm_core_config, sizeof(pm_core_config));
+	ret = sof_ipc_tx_message(sdev->ipc, pipeline->hdr.cmd, pipeline,
+				 sizeof(*pipeline), r, sizeof(*r));
 	if (ret < 0)
-		dev_err(dev, "error: core enable ipc failure\n");
+		dev_err(dev, "error: load pipeline ipc failure\n");
 
 	return ret;
 }
@@ -2316,6 +2356,26 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index,
 		strnlen(tw->sname, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) > 0
 			? tw->sname : "none");
 
+	ret = sof_parse_tokens(scomp, &comp, core_tokens,
+			       ARRAY_SIZE(core_tokens), tw->priv.array,
+			       le32_to_cpu(tw->priv.size));
+	if (ret != 0) {
+		dev_err(scomp->dev, "error: parsing core tokens failed %d\n",
+			ret);
+		kfree(swidget);
+		return ret;
+	}
+
+	swidget->core = comp.core;
+
+	/* default is primary core, safe to call for already enabled cores */
+	ret = sof_core_enable(sdev, comp.core);
+	if (ret < 0) {
+		dev_err(scomp->dev, "error: enable core: %d\n", ret);
+		kfree(swidget);
+		return ret;
+	}
+
 	/* handle any special case widgets */
 	switch (w->id) {
 	case snd_soc_dapm_dai_in:
-- 
cgit v1.2.3


From 6da73d15258a1e5e86d03d4ffba8776d17a8a287 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner@ubuntu.com>
Date: Wed, 2 Sep 2020 12:21:27 +0200
Subject: pidfd: support PIDFD_NONBLOCK in pidfd_open()

Introduce PIDFD_NONBLOCK to support non-blocking pidfd file descriptors.

Ever since the introduction of pidfds and more advanced async io various
programming languages such as Rust have grown support for async event
libraries. These libraries are created to help build epoll-based event loops
around file descriptors. A common pattern is to automatically make all file
descriptors they manage to O_NONBLOCK.

For such libraries the EAGAIN error code is treated specially. When a function
is called that returns EAGAIN the function isn't called again until the event
loop indicates the the file descriptor is ready. Supporting EAGAIN when
waiting on pidfds makes such libraries just work with little effort. In the
following patch we will extend waitid() internally to support non-blocking
pidfds.

This introduces a new flag PIDFD_NONBLOCK that is equivalent to O_NONBLOCK.
This follows the same patterns we have for other (anon inode) file descriptors
such as EFD_NONBLOCK, IN_NONBLOCK, SFD_NONBLOCK, TFD_NONBLOCK and the same for
close-on-exec flags.

Suggested-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Sargun Dhillon <sargun@sargun.me>
Cc: Oleg Nesterov <oleg@redhat.com>
Link: https://lore.kernel.org/lkml/20200811181236.GA18763@localhost/
Link: https://github.com/joshtriplett/async-pidfd
Link: https://lore.kernel.org/r/20200902102130.147672-2-christian.brauner@ubuntu.com
---
 include/uapi/linux/pidfd.h | 12 ++++++++++++
 kernel/pid.c               | 12 +++++++-----
 2 files changed, 19 insertions(+), 5 deletions(-)
 create mode 100644 include/uapi/linux/pidfd.h

(limited to 'include/uapi')

diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
new file mode 100644
index 000000000000..5406fbc13074
--- /dev/null
+++ b/include/uapi/linux/pidfd.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+#ifndef _UAPI_LINUX_PIDFD_H
+#define _UAPI_LINUX_PIDFD_H
+
+#include <linux/types.h>
+#include <linux/fcntl.h>
+
+/* Flags for pidfd_open().  */
+#define PIDFD_NONBLOCK O_NONBLOCK
+
+#endif /* _UAPI_LINUX_PIDFD_H */
diff --git a/kernel/pid.c b/kernel/pid.c
index b2562a7ce525..74ddbff1a6ba 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -43,6 +43,7 @@
 #include <linux/sched/task.h>
 #include <linux/idr.h>
 #include <net/sock.h>
+#include <uapi/linux/pidfd.h>
 
 struct pid init_struct_pid = {
 	.count		= REFCOUNT_INIT(1),
@@ -522,7 +523,8 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
 /**
  * pidfd_create() - Create a new pid file descriptor.
  *
- * @pid:  struct pid that the pidfd will reference
+ * @pid:   struct pid that the pidfd will reference
+ * @flags: flags to pass
  *
  * This creates a new pid file descriptor with the O_CLOEXEC flag set.
  *
@@ -532,12 +534,12 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
  * Return: On success, a cloexec pidfd is returned.
  *         On error, a negative errno number will be returned.
  */
-static int pidfd_create(struct pid *pid)
+static int pidfd_create(struct pid *pid, unsigned int flags)
 {
 	int fd;
 
 	fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
-			      O_RDWR | O_CLOEXEC);
+			      flags | O_RDWR | O_CLOEXEC);
 	if (fd < 0)
 		put_pid(pid);
 
@@ -565,7 +567,7 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
 	int fd;
 	struct pid *p;
 
-	if (flags)
+	if (flags & ~PIDFD_NONBLOCK)
 		return -EINVAL;
 
 	if (pid <= 0)
@@ -576,7 +578,7 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
 		return -ESRCH;
 
 	if (pid_has_task(p, PIDTYPE_TGID))
-		fd = pidfd_create(p);
+		fd = pidfd_create(p, flags);
 	else
 		fd = -EINVAL;
 
-- 
cgit v1.2.3


From 74f1082487feb90bbf880af14beb8e29c3030c9f Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Wed, 7 Aug 2019 12:21:05 +0100
Subject: arm64: mte: Add specific SIGSEGV codes

Add MTE-specific SIGSEGV codes to siginfo.h and update the x86
BUILD_BUG_ON(NSIGSEGV != 7) compile check.

Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
[catalin.marinas@arm.com: renamed precise/imprecise to sync/async]
[catalin.marinas@arm.com: dropped #ifdef __aarch64__, renumbered]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Will Deacon <will@kernel.org>
---
 arch/x86/kernel/signal_compat.c    | 2 +-
 include/uapi/asm-generic/siginfo.h | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index 9ccbf0576cd0..a7f3e12cfbdb 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -27,7 +27,7 @@ static inline void signal_compat_build_tests(void)
 	 */
 	BUILD_BUG_ON(NSIGILL  != 11);
 	BUILD_BUG_ON(NSIGFPE  != 15);
-	BUILD_BUG_ON(NSIGSEGV != 7);
+	BUILD_BUG_ON(NSIGSEGV != 9);
 	BUILD_BUG_ON(NSIGBUS  != 5);
 	BUILD_BUG_ON(NSIGTRAP != 5);
 	BUILD_BUG_ON(NSIGCHLD != 6);
diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h
index cb3d6c267181..7aacf9389010 100644
--- a/include/uapi/asm-generic/siginfo.h
+++ b/include/uapi/asm-generic/siginfo.h
@@ -229,7 +229,9 @@ typedef struct siginfo {
 #define SEGV_ACCADI	5	/* ADI not enabled for mapped object */
 #define SEGV_ADIDERR	6	/* Disrupting MCD error */
 #define SEGV_ADIPERR	7	/* Precise MCD exception */
-#define NSIGSEGV	7
+#define SEGV_MTEAERR	8	/* Asynchronous ARM MTE error */
+#define SEGV_MTESERR	9	/* Synchronous ARM MTE exception */
+#define NSIGSEGV	9
 
 /*
  * SIGBUS si_codes
-- 
cgit v1.2.3


From 1c101da8b971a36695319dce7a24711dc567a0dd Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 27 Nov 2019 10:30:15 +0000
Subject: arm64: mte: Allow user control of the tag check mode via prctl()

By default, even if PROT_MTE is set on a memory range, there is no tag
check fault reporting (SIGSEGV). Introduce a set of option to the
exiting prctl(PR_SET_TAGGED_ADDR_CTRL) to allow user control of the tag
check fault mode:

  PR_MTE_TCF_NONE  - no reporting (default)
  PR_MTE_TCF_SYNC  - synchronous tag check fault reporting
  PR_MTE_TCF_ASYNC - asynchronous tag check fault reporting

These options translate into the corresponding SCTLR_EL1.TCF0 bitfield,
context-switched by the kernel. Note that the kernel accesses to the
user address space (e.g. read() system call) are not checked if the user
thread tag checking mode is PR_MTE_TCF_NONE or PR_MTE_TCF_ASYNC. If the
tag checking mode is PR_MTE_TCF_SYNC, the kernel makes a best effort to
check its user address accesses, however it cannot always guarantee it.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/mte.h       | 14 +++++++
 arch/arm64/include/asm/processor.h |  3 ++
 arch/arm64/kernel/mte.c            | 77 ++++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/process.c        | 26 +++++++++++--
 include/uapi/linux/prctl.h         |  6 +++
 5 files changed, 123 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index b2577eee62c2..df2efbc9f8f1 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -21,6 +21,9 @@ void mte_clear_page_tags(void *addr);
 void mte_sync_tags(pte_t *ptep, pte_t pte);
 void mte_copy_page_tags(void *kto, const void *kfrom);
 void flush_mte_state(void);
+void mte_thread_switch(struct task_struct *next);
+long set_mte_ctrl(unsigned long arg);
+long get_mte_ctrl(void);
 
 #else
 
@@ -36,6 +39,17 @@ static inline void mte_copy_page_tags(void *kto, const void *kfrom)
 static inline void flush_mte_state(void)
 {
 }
+static inline void mte_thread_switch(struct task_struct *next)
+{
+}
+static inline long set_mte_ctrl(unsigned long arg)
+{
+	return 0;
+}
+static inline long get_mte_ctrl(void)
+{
+	return 0;
+}
 
 #endif
 
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 240fe5e5b720..80e7f0573309 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -151,6 +151,9 @@ struct thread_struct {
 	struct ptrauth_keys_user	keys_user;
 	struct ptrauth_keys_kernel	keys_kernel;
 #endif
+#ifdef CONFIG_ARM64_MTE
+	u64			sctlr_tcf0;
+#endif
 };
 
 static inline void arch_thread_struct_whitelist(unsigned long *offset,
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 5f54fd140610..375483a1f573 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -5,6 +5,8 @@
 
 #include <linux/bitops.h>
 #include <linux/mm.h>
+#include <linux/prctl.h>
+#include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/thread_info.h>
 
@@ -49,6 +51,26 @@ int memcmp_pages(struct page *page1, struct page *page2)
 	return ret;
 }
 
+static void update_sctlr_el1_tcf0(u64 tcf0)
+{
+	/* ISB required for the kernel uaccess routines */
+	sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF0_MASK, tcf0);
+	isb();
+}
+
+static void set_sctlr_el1_tcf0(u64 tcf0)
+{
+	/*
+	 * mte_thread_switch() checks current->thread.sctlr_tcf0 as an
+	 * optimisation. Disable preemption so that it does not see
+	 * the variable update before the SCTLR_EL1.TCF0 one.
+	 */
+	preempt_disable();
+	current->thread.sctlr_tcf0 = tcf0;
+	update_sctlr_el1_tcf0(tcf0);
+	preempt_enable();
+}
+
 void flush_mte_state(void)
 {
 	if (!system_supports_mte())
@@ -58,4 +80,59 @@ void flush_mte_state(void)
 	dsb(ish);
 	write_sysreg_s(0, SYS_TFSRE0_EL1);
 	clear_thread_flag(TIF_MTE_ASYNC_FAULT);
+	/* disable tag checking */
+	set_sctlr_el1_tcf0(SCTLR_EL1_TCF0_NONE);
+}
+
+void mte_thread_switch(struct task_struct *next)
+{
+	if (!system_supports_mte())
+		return;
+
+	/* avoid expensive SCTLR_EL1 accesses if no change */
+	if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0)
+		update_sctlr_el1_tcf0(next->thread.sctlr_tcf0);
+}
+
+long set_mte_ctrl(unsigned long arg)
+{
+	u64 tcf0;
+
+	if (!system_supports_mte())
+		return 0;
+
+	switch (arg & PR_MTE_TCF_MASK) {
+	case PR_MTE_TCF_NONE:
+		tcf0 = SCTLR_EL1_TCF0_NONE;
+		break;
+	case PR_MTE_TCF_SYNC:
+		tcf0 = SCTLR_EL1_TCF0_SYNC;
+		break;
+	case PR_MTE_TCF_ASYNC:
+		tcf0 = SCTLR_EL1_TCF0_ASYNC;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	set_sctlr_el1_tcf0(tcf0);
+
+	return 0;
+}
+
+long get_mte_ctrl(void)
+{
+	if (!system_supports_mte())
+		return 0;
+
+	switch (current->thread.sctlr_tcf0) {
+	case SCTLR_EL1_TCF0_NONE:
+		return PR_MTE_TCF_NONE;
+	case SCTLR_EL1_TCF0_SYNC:
+		return PR_MTE_TCF_SYNC;
+	case SCTLR_EL1_TCF0_ASYNC:
+		return PR_MTE_TCF_ASYNC;
+	}
+
+	return 0;
 }
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index a49028efab68..bb759b88d44a 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -577,6 +577,13 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
 	 */
 	dsb(ish);
 
+	/*
+	 * MTE thread switching must happen after the DSB above to ensure that
+	 * any asynchronous tag check faults have been logged in the TFSR*_EL1
+	 * registers.
+	 */
+	mte_thread_switch(next);
+
 	/* the actual thread switch */
 	last = cpu_switch_to(prev, next);
 
@@ -636,9 +643,15 @@ static unsigned int tagged_addr_disabled;
 
 long set_tagged_addr_ctrl(unsigned long arg)
 {
+	unsigned long valid_mask = PR_TAGGED_ADDR_ENABLE;
+
 	if (is_compat_task())
 		return -EINVAL;
-	if (arg & ~PR_TAGGED_ADDR_ENABLE)
+
+	if (system_supports_mte())
+		valid_mask |= PR_MTE_TCF_MASK;
+
+	if (arg & ~valid_mask)
 		return -EINVAL;
 
 	/*
@@ -648,6 +661,9 @@ long set_tagged_addr_ctrl(unsigned long arg)
 	if (arg & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled)
 		return -EINVAL;
 
+	if (set_mte_ctrl(arg) != 0)
+		return -EINVAL;
+
 	update_thread_flag(TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE);
 
 	return 0;
@@ -655,13 +671,17 @@ long set_tagged_addr_ctrl(unsigned long arg)
 
 long get_tagged_addr_ctrl(void)
 {
+	long ret = 0;
+
 	if (is_compat_task())
 		return -EINVAL;
 
 	if (test_thread_flag(TIF_TAGGED_ADDR))
-		return PR_TAGGED_ADDR_ENABLE;
+		ret = PR_TAGGED_ADDR_ENABLE;
 
-	return 0;
+	ret |= get_mte_ctrl();
+
+	return ret;
 }
 
 /*
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 07b4f8131e36..2390ab324afa 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -233,6 +233,12 @@ struct prctl_mm_map {
 #define PR_SET_TAGGED_ADDR_CTRL		55
 #define PR_GET_TAGGED_ADDR_CTRL		56
 # define PR_TAGGED_ADDR_ENABLE		(1UL << 0)
+/* MTE tag check fault modes */
+# define PR_MTE_TCF_SHIFT		1
+# define PR_MTE_TCF_NONE		(0UL << PR_MTE_TCF_SHIFT)
+# define PR_MTE_TCF_SYNC		(1UL << PR_MTE_TCF_SHIFT)
+# define PR_MTE_TCF_ASYNC		(2UL << PR_MTE_TCF_SHIFT)
+# define PR_MTE_TCF_MASK		(3UL << PR_MTE_TCF_SHIFT)
 
 /* Control reclaim behavior when allocating memory */
 #define PR_SET_IO_FLUSHER		57
-- 
cgit v1.2.3


From af5ce95282dc99d08a27a407a02c763dde1c5558 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Tue, 10 Dec 2019 11:19:15 +0000
Subject: arm64: mte: Allow user control of the generated random tags via
 prctl()

The IRG, ADDG and SUBG instructions insert a random tag in the resulting
address. Certain tags can be excluded via the GCR_EL1.Exclude bitmap
when, for example, the user wants a certain colour for freed buffers.
Since the GCR_EL1 register is not accessible at EL0, extend the
prctl(PR_SET_TAGGED_ADDR_CTRL) interface to include a 16-bit field in
the first argument for controlling which tags can be generated by the
above instruction (an include rather than exclude mask). Note that by
default all non-zero tags are excluded. This setting is per-thread.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/processor.h |  1 +
 arch/arm64/include/asm/sysreg.h    |  7 +++++++
 arch/arm64/kernel/mte.c            | 35 ++++++++++++++++++++++++++++++++---
 arch/arm64/kernel/process.c        |  2 +-
 include/uapi/linux/prctl.h         |  3 +++
 5 files changed, 44 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 80e7f0573309..e1b1c2a6086e 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -153,6 +153,7 @@ struct thread_struct {
 #endif
 #ifdef CONFIG_ARM64_MTE
 	u64			sctlr_tcf0;
+	u64			gcr_user_incl;
 #endif
 };
 
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index daf030a05de0..52eefe2f7d95 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -1078,6 +1078,13 @@
 		write_sysreg(__scs_new, sysreg);			\
 } while (0)
 
+#define sysreg_clear_set_s(sysreg, clear, set) do {			\
+	u64 __scs_val = read_sysreg_s(sysreg);				\
+	u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set);		\
+	if (__scs_new != __scs_val)					\
+		write_sysreg_s(__scs_new, sysreg);			\
+} while (0)
+
 #endif
 
 #endif	/* __ASM_SYSREG_H */
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 375483a1f573..07798b8d5039 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -71,6 +71,25 @@ static void set_sctlr_el1_tcf0(u64 tcf0)
 	preempt_enable();
 }
 
+static void update_gcr_el1_excl(u64 incl)
+{
+	u64 excl = ~incl & SYS_GCR_EL1_EXCL_MASK;
+
+	/*
+	 * Note that 'incl' is an include mask (controlled by the user via
+	 * prctl()) while GCR_EL1 accepts an exclude mask.
+	 * No need for ISB since this only affects EL0 currently, implicit
+	 * with ERET.
+	 */
+	sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK, excl);
+}
+
+static void set_gcr_el1_excl(u64 incl)
+{
+	current->thread.gcr_user_incl = incl;
+	update_gcr_el1_excl(incl);
+}
+
 void flush_mte_state(void)
 {
 	if (!system_supports_mte())
@@ -82,6 +101,8 @@ void flush_mte_state(void)
 	clear_thread_flag(TIF_MTE_ASYNC_FAULT);
 	/* disable tag checking */
 	set_sctlr_el1_tcf0(SCTLR_EL1_TCF0_NONE);
+	/* reset tag generation mask */
+	set_gcr_el1_excl(0);
 }
 
 void mte_thread_switch(struct task_struct *next)
@@ -92,6 +113,7 @@ void mte_thread_switch(struct task_struct *next)
 	/* avoid expensive SCTLR_EL1 accesses if no change */
 	if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0)
 		update_sctlr_el1_tcf0(next->thread.sctlr_tcf0);
+	update_gcr_el1_excl(next->thread.gcr_user_incl);
 }
 
 long set_mte_ctrl(unsigned long arg)
@@ -116,23 +138,30 @@ long set_mte_ctrl(unsigned long arg)
 	}
 
 	set_sctlr_el1_tcf0(tcf0);
+	set_gcr_el1_excl((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT);
 
 	return 0;
 }
 
 long get_mte_ctrl(void)
 {
+	unsigned long ret;
+
 	if (!system_supports_mte())
 		return 0;
 
+	ret = current->thread.gcr_user_incl << PR_MTE_TAG_SHIFT;
+
 	switch (current->thread.sctlr_tcf0) {
 	case SCTLR_EL1_TCF0_NONE:
 		return PR_MTE_TCF_NONE;
 	case SCTLR_EL1_TCF0_SYNC:
-		return PR_MTE_TCF_SYNC;
+		ret |= PR_MTE_TCF_SYNC;
+		break;
 	case SCTLR_EL1_TCF0_ASYNC:
-		return PR_MTE_TCF_ASYNC;
+		ret |= PR_MTE_TCF_ASYNC;
+		break;
 	}
 
-	return 0;
+	return ret;
 }
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index bb759b88d44a..c80383f30d6a 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -649,7 +649,7 @@ long set_tagged_addr_ctrl(unsigned long arg)
 		return -EINVAL;
 
 	if (system_supports_mte())
-		valid_mask |= PR_MTE_TCF_MASK;
+		valid_mask |= PR_MTE_TCF_MASK | PR_MTE_TAG_MASK;
 
 	if (arg & ~valid_mask)
 		return -EINVAL;
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 2390ab324afa..7f0827705c9a 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -239,6 +239,9 @@ struct prctl_mm_map {
 # define PR_MTE_TCF_SYNC		(1UL << PR_MTE_TCF_SHIFT)
 # define PR_MTE_TCF_ASYNC		(2UL << PR_MTE_TCF_SHIFT)
 # define PR_MTE_TCF_MASK		(3UL << PR_MTE_TCF_SHIFT)
+/* MTE tag inclusion mask */
+# define PR_MTE_TAG_SHIFT		3
+# define PR_MTE_TAG_MASK		(0xffffUL << PR_MTE_TAG_SHIFT)
 
 /* Control reclaim behavior when allocating memory */
 #define PR_SET_IO_FLUSHER		57
-- 
cgit v1.2.3


From 2200aa7154cb7ef76bac93e98326883ba64bfa2e Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 3 Jul 2020 15:12:57 +0100
Subject: arm64: mte: ptrace: Add NT_ARM_TAGGED_ADDR_CTRL regset

This regset allows read/write access to a ptraced process
prctl(PR_SET_TAGGED_ADDR_CTRL) setting.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Alan Hayward <Alan.Hayward@arm.com>
Cc: Luis Machado <luis.machado@linaro.org>
Cc: Omair Javaid <omair.javaid@linaro.org>
---
 arch/arm64/kernel/ptrace.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/elf.h   |  1 +
 2 files changed, 43 insertions(+)

(limited to 'include/uapi')

diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 101040a37d40..f49b349e16a3 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1033,6 +1033,35 @@ static int pac_generic_keys_set(struct task_struct *target,
 #endif /* CONFIG_CHECKPOINT_RESTORE */
 #endif /* CONFIG_ARM64_PTR_AUTH */
 
+#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
+static int tagged_addr_ctrl_get(struct task_struct *target,
+				const struct user_regset *regset,
+				struct membuf to)
+{
+	long ctrl = get_tagged_addr_ctrl(target);
+
+	if (IS_ERR_VALUE(ctrl))
+		return ctrl;
+
+	return membuf_write(&to, &ctrl, sizeof(ctrl));
+}
+
+static int tagged_addr_ctrl_set(struct task_struct *target, const struct
+				user_regset *regset, unsigned int pos,
+				unsigned int count, const void *kbuf, const
+				void __user *ubuf)
+{
+	int ret;
+	long ctrl;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1);
+	if (ret)
+		return ret;
+
+	return set_tagged_addr_ctrl(target, ctrl);
+}
+#endif
+
 enum aarch64_regset {
 	REGSET_GPR,
 	REGSET_FPR,
@@ -1052,6 +1081,9 @@ enum aarch64_regset {
 	REGSET_PACG_KEYS,
 #endif
 #endif
+#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
+	REGSET_TAGGED_ADDR_CTRL,
+#endif
 };
 
 static const struct user_regset aarch64_regsets[] = {
@@ -1149,6 +1181,16 @@ static const struct user_regset aarch64_regsets[] = {
 	},
 #endif
 #endif
+#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
+	[REGSET_TAGGED_ADDR_CTRL] = {
+		.core_note_type = NT_ARM_TAGGED_ADDR_CTRL,
+		.n = 1,
+		.size = sizeof(long),
+		.align = sizeof(long),
+		.regset_get = tagged_addr_ctrl_get,
+		.set = tagged_addr_ctrl_set,
+	},
+#endif
 };
 
 static const struct user_regset_view user_aarch64_view = {
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index 22220945a5fd..30f68b42eeb5 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -425,6 +425,7 @@ typedef struct elf64_shdr {
 #define NT_ARM_PAC_MASK		0x406	/* ARM pointer authentication code masks */
 #define NT_ARM_PACA_KEYS	0x407	/* ARM pointer authentication address keys */
 #define NT_ARM_PACG_KEYS	0x408	/* ARM pointer authentication generic key */
+#define NT_ARM_TAGGED_ADDR_CTRL	0x409	/* arm64 tagged address control (prctl()) */
 #define NT_ARC_V2	0x600		/* ARCv2 accumulator/extra registers */
 #define NT_VMCOREDD	0x700		/* Vmcore Device Dump Note */
 #define NT_MIPS_DSP	0x800		/* MIPS DSP ASE registers */
-- 
cgit v1.2.3


From 16270a92355722e387e9ca19627c5a4d7bae1354 Mon Sep 17 00:00:00 2001
From: Hou Zhiqiang <Zhiqiang.Hou@nxp.com>
Date: Tue, 18 Aug 2020 17:27:46 +0800
Subject: PCI: designware-ep: Fix the Header Type check

The current check will result in the multiple function device
fails to initialize. So fix the check by masking out the
multiple function bit.

Link: https://lore.kernel.org/r/20200818092746.24366-1-Zhiqiang.Hou@nxp.com
Fixes: 0b24134f7888 ("PCI: dwc: Add validation that PCIe core is set to correct mode")
Signed-off-by: Hou Zhiqiang <Zhiqiang.Hou@nxp.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Rob Herring <robh@kernel.org>
---
 drivers/pci/controller/dwc/pcie-designware-ep.c | 3 ++-
 include/uapi/linux/pci_regs.h                   | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c
index 305bfec2424d..29f5c616c3bc 100644
--- a/drivers/pci/controller/dwc/pcie-designware-ep.c
+++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
@@ -505,7 +505,8 @@ int dw_pcie_ep_init_complete(struct dw_pcie_ep *ep)
 	u32 reg;
 	int i;
 
-	hdr_type = dw_pcie_readb_dbi(pci, PCI_HEADER_TYPE);
+	hdr_type = dw_pcie_readb_dbi(pci, PCI_HEADER_TYPE) &
+		   PCI_HEADER_TYPE_MASK;
 	if (hdr_type != PCI_HEADER_TYPE_NORMAL) {
 		dev_err(pci->dev,
 			"PCIe controller is not set to EP mode (hdr_type:0x%x)!\n",
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index f9701410d3b5..57a222014cd2 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -76,6 +76,7 @@
 #define PCI_CACHE_LINE_SIZE	0x0c	/* 8 bits */
 #define PCI_LATENCY_TIMER	0x0d	/* 8 bits */
 #define PCI_HEADER_TYPE		0x0e	/* 8 bits */
+#define  PCI_HEADER_TYPE_MASK		0x7f
 #define  PCI_HEADER_TYPE_NORMAL		0
 #define  PCI_HEADER_TYPE_BRIDGE		1
 #define  PCI_HEADER_TYPE_CARDBUS	2
-- 
cgit v1.2.3


From 43fbb0860c682859780907d00bdb4abbb1b6359e Mon Sep 17 00:00:00 2001
From: Keyon Jie <yang.jie@linux.intel.com>
Date: Fri, 4 Sep 2020 16:27:29 +0300
Subject: ASoC: SOF: tokens: add token for component UUID

Add the definition SOF_TKN_COMP_UUID for the component UUID token, this
shall be used for all types of component in the future.

Signed-off-by: Keyon Jie <yang.jie@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Signed-off-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Link: https://lore.kernel.org/r/20200904132744.1699575-2-kai.vehmanen@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/uapi/sound/sof/tokens.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/sound/sof/tokens.h b/include/uapi/sound/sof/tokens.h
index 37f5aaa09c2b..d3aae4ad8959 100644
--- a/include/uapi/sound/sof/tokens.h
+++ b/include/uapi/sound/sof/tokens.h
@@ -74,6 +74,7 @@
  * #define SOF_TKN_COMP_PRELOAD_COUNT		403
  */
 #define SOF_TKN_COMP_CORE_ID			404
+#define SOF_TKN_COMP_UUID                       405
 
 /* SSP */
 #define SOF_TKN_INTEL_SSP_CLKS_CONTROL		500
-- 
cgit v1.2.3


From 938c3efd9e650ca343d04e70d11a17c64119e17c Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Fri, 4 Sep 2020 17:14:53 +0100
Subject: bpf: Fix formatting in documentation for BPF helpers

Fix a formatting error in the description of bpf_load_hdr_opt() (rst2man
complains about a wrong indentation, but what is missing is actually a
blank line before the bullet list).

Fix and harmonise the formatting for other helpers.

Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200904161454.31135-3-quentin@isovalent.com
---
 include/uapi/linux/bpf.h | 87 +++++++++++++++++++++++++-----------------------
 1 file changed, 45 insertions(+), 42 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 8dda13880957..90359cab501d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3349,38 +3349,38 @@ union bpf_attr {
  *	Description
  *		Dynamically cast a *sk* pointer to a *tcp6_sock* pointer.
  *	Return
- *		*sk* if casting is valid, or NULL otherwise.
+ *		*sk* if casting is valid, or **NULL** otherwise.
  *
  * struct tcp_sock *bpf_skc_to_tcp_sock(void *sk)
  *	Description
  *		Dynamically cast a *sk* pointer to a *tcp_sock* pointer.
  *	Return
- *		*sk* if casting is valid, or NULL otherwise.
+ *		*sk* if casting is valid, or **NULL** otherwise.
  *
  * struct tcp_timewait_sock *bpf_skc_to_tcp_timewait_sock(void *sk)
  * 	Description
  *		Dynamically cast a *sk* pointer to a *tcp_timewait_sock* pointer.
  *	Return
- *		*sk* if casting is valid, or NULL otherwise.
+ *		*sk* if casting is valid, or **NULL** otherwise.
  *
  * struct tcp_request_sock *bpf_skc_to_tcp_request_sock(void *sk)
  * 	Description
  *		Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer.
  *	Return
- *		*sk* if casting is valid, or NULL otherwise.
+ *		*sk* if casting is valid, or **NULL** otherwise.
  *
  * struct udp6_sock *bpf_skc_to_udp6_sock(void *sk)
  * 	Description
  *		Dynamically cast a *sk* pointer to a *udp6_sock* pointer.
  *	Return
- *		*sk* if casting is valid, or NULL otherwise.
+ *		*sk* if casting is valid, or **NULL** otherwise.
  *
  * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags)
  *	Description
  *		Return a user or a kernel stack in bpf program provided buffer.
  *		To achieve this, the helper needs *task*, which is a valid
- *		pointer to struct task_struct. To store the stacktrace, the
- *		bpf program provides *buf* with	a nonnegative *size*.
+ *		pointer to **struct task_struct**. To store the stacktrace, the
+ *		bpf program provides *buf* with a nonnegative *size*.
  *
  *		The last argument, *flags*, holds the number of stack frames to
  *		skip (from 0 to 255), masked with
@@ -3410,12 +3410,12 @@ union bpf_attr {
  * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags)
  *	Description
  *		Load header option.  Support reading a particular TCP header
- *		option for bpf program (BPF_PROG_TYPE_SOCK_OPS).
+ *		option for bpf program (**BPF_PROG_TYPE_SOCK_OPS**).
  *
  *		If *flags* is 0, it will search the option from the
- *		sock_ops->skb_data.  The comment in "struct bpf_sock_ops"
+ *		*skops*\ **->skb_data**.  The comment in **struct bpf_sock_ops**
  *		has details on what skb_data contains under different
- *		sock_ops->op.
+ *		*skops*\ **->op**.
  *
  *		The first byte of the *searchby_res* specifies the
  *		kind that it wants to search.
@@ -3435,7 +3435,7 @@ union bpf_attr {
  *		[ 254, 4, 0xeB, 0x9F, 0, 0, .... 0 ].
  *
  *		To search for the standard window scale option (3),
- *		the searchby_res should be [ 3, 0, 0, .... 0 ].
+ *		the *searchby_res* should be [ 3, 0, 0, .... 0 ].
  *		Note, kind-length must be 0 for regular option.
  *
  *		Searching for No-Op (0) and End-of-Option-List (1) are
@@ -3445,27 +3445,30 @@ union bpf_attr {
  *		of a header option.
  *
  *		Supported flags:
+ *
  *		* **BPF_LOAD_HDR_OPT_TCP_SYN** to search from the
  *		  saved_syn packet or the just-received syn packet.
  *
  *	Return
- *		>0 when found, the header option is copied to *searchby_res*.
- *		The return value is the total length copied.
+ *		> 0 when found, the header option is copied to *searchby_res*.
+ *		The return value is the total length copied. On failure, a
+ *		negative error code is returned:
  *
- *		**-EINVAL** If param is invalid
+ *		**-EINVAL** if a parameter is invalid.
  *
- *		**-ENOMSG** The option is not found
+ *		**-ENOMSG** if the option is not found.
  *
- *		**-ENOENT** No syn packet available when
- *			    **BPF_LOAD_HDR_OPT_TCP_SYN** is used
+ *		**-ENOENT** if no syn packet is available when
+ *		**BPF_LOAD_HDR_OPT_TCP_SYN** is used.
  *
- *		**-ENOSPC** Not enough space.  Only *len* number of
- *			    bytes are copied.
+ *		**-ENOSPC** if there is not enough space.  Only *len* number of
+ *		bytes are copied.
  *
- *		**-EFAULT** Cannot parse the header options in the packet
+ *		**-EFAULT** on failure to parse the header options in the
+ *		packet.
  *
- *		**-EPERM** This helper cannot be used under the
- *			   current sock_ops->op.
+ *		**-EPERM** if the helper cannot be used under the current
+ *		*skops*\ **->op**.
  *
  * long bpf_store_hdr_opt(struct bpf_sock_ops *skops, const void *from, u32 len, u64 flags)
  *	Description
@@ -3483,44 +3486,44 @@ union bpf_attr {
  *		by searching the same option in the outgoing skb.
  *
  *		This helper can only be called during
- *		BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ *		**BPF_SOCK_OPS_WRITE_HDR_OPT_CB**.
  *
  *	Return
  *		0 on success, or negative error in case of failure:
  *
- *		**-EINVAL** If param is invalid
+ *		**-EINVAL** If param is invalid.
  *
- *		**-ENOSPC** Not enough space in the header.
- *			    Nothing has been written
+ *		**-ENOSPC** if there is not enough space in the header.
+ *		Nothing has been written
  *
- *		**-EEXIST** The option has already existed
+ *		**-EEXIST** if the option already exists.
  *
- *		**-EFAULT** Cannot parse the existing header options
+ *		**-EFAULT** on failrue to parse the existing header options.
  *
- *		**-EPERM** This helper cannot be used under the
- *			   current sock_ops->op.
+ *		**-EPERM** if the helper cannot be used under the current
+ *		*skops*\ **->op**.
  *
  * long bpf_reserve_hdr_opt(struct bpf_sock_ops *skops, u32 len, u64 flags)
  *	Description
  *		Reserve *len* bytes for the bpf header option.  The
- *		space will be used by bpf_store_hdr_opt() later in
- *		BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ *		space will be used by **bpf_store_hdr_opt**\ () later in
+ *		**BPF_SOCK_OPS_WRITE_HDR_OPT_CB**.
  *
- *		If bpf_reserve_hdr_opt() is called multiple times,
+ *		If **bpf_reserve_hdr_opt**\ () is called multiple times,
  *		the total number of bytes will be reserved.
  *
  *		This helper can only be called during
- *		BPF_SOCK_OPS_HDR_OPT_LEN_CB.
+ *		**BPF_SOCK_OPS_HDR_OPT_LEN_CB**.
  *
  *	Return
  *		0 on success, or negative error in case of failure:
  *
- *		**-EINVAL** if param is invalid
+ *		**-EINVAL** if a parameter is invalid.
  *
- *		**-ENOSPC** Not enough space in the header.
+ *		**-ENOSPC** if there is not enough space in the header.
  *
- *		**-EPERM** This helper cannot be used under the
- *			   current sock_ops->op.
+ *		**-EPERM** if the helper cannot be used under the current
+ *		*skops*\ **->op**.
  *
  * void *bpf_inode_storage_get(struct bpf_map *map, void *inode, void *value, u64 flags)
  *	Description
@@ -3560,9 +3563,9 @@ union bpf_attr {
  *
  * long bpf_d_path(struct path *path, char *buf, u32 sz)
  *	Description
- *		Return full path for given 'struct path' object, which
- *		needs to be the kernel BTF 'path' object. The path is
- *		returned in the provided buffer 'buf' of size 'sz' and
+ *		Return full path for given **struct path** object, which
+ *		needs to be the kernel BTF *path* object. The path is
+ *		returned in the provided buffer *buf* of size *sz* and
  *		is zero terminated.
  *
  *	Return
@@ -3573,7 +3576,7 @@ union bpf_attr {
  * long bpf_copy_from_user(void *dst, u32 size, const void *user_ptr)
  * 	Description
  * 		Read *size* bytes from user space address *user_ptr* and store
- * 		the data in *dst*. This is a wrapper of copy_from_user().
+ * 		the data in *dst*. This is a wrapper of **copy_from_user**\ ().
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  */
-- 
cgit v1.2.3


From 5205e919c9f0c5b48678f2c787871c96f665ca1b Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Mon, 7 Sep 2020 12:56:08 +0300
Subject: net: bridge: mcast: add support for src list and filter mode dumping

Support per port group src list (address and timer) and filter mode
dumping. Protected by either multicast_lock or rcu.

v3: add IPv6 support
v2: require RCU or multicast_lock to traverse src groups

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/if_bridge.h | 21 +++++++++++
 net/bridge/br_mdb.c            | 85 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 104 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index c1227aecd38f..75a2ac479247 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -455,10 +455,31 @@ enum {
 enum {
 	MDBA_MDB_EATTR_UNSPEC,
 	MDBA_MDB_EATTR_TIMER,
+	MDBA_MDB_EATTR_SRC_LIST,
+	MDBA_MDB_EATTR_GROUP_MODE,
 	__MDBA_MDB_EATTR_MAX
 };
 #define MDBA_MDB_EATTR_MAX (__MDBA_MDB_EATTR_MAX - 1)
 
+/* per mdb entry source */
+enum {
+	MDBA_MDB_SRCLIST_UNSPEC,
+	MDBA_MDB_SRCLIST_ENTRY,
+	__MDBA_MDB_SRCLIST_MAX
+};
+#define MDBA_MDB_SRCLIST_MAX (__MDBA_MDB_SRCLIST_MAX - 1)
+
+/* per mdb entry per source attributes
+ * these are embedded in MDBA_MDB_SRCLIST_ENTRY
+ */
+enum {
+	MDBA_MDB_SRCATTR_UNSPEC,
+	MDBA_MDB_SRCATTR_ADDRESS,
+	MDBA_MDB_SRCATTR_TIMER,
+	__MDBA_MDB_SRCATTR_MAX
+};
+#define MDBA_MDB_SRCATTR_MAX (__MDBA_MDB_SRCATTR_MAX - 1)
+
 /* multicast router types */
 enum {
 	MDB_RTR_TYPE_DISABLED,
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 559bdc256a1e..9dc12ce61018 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -77,10 +77,67 @@ static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip)
 #endif
 }
 
+static int __mdb_fill_srcs(struct sk_buff *skb,
+			   struct net_bridge_port_group *p)
+{
+	struct net_bridge_group_src *ent;
+	struct nlattr *nest, *nest_ent;
+
+	if (hlist_empty(&p->src_list))
+		return 0;
+
+	nest = nla_nest_start(skb, MDBA_MDB_EATTR_SRC_LIST);
+	if (!nest)
+		return -EMSGSIZE;
+
+	hlist_for_each_entry_rcu(ent, &p->src_list, node,
+				 lockdep_is_held(&p->port->br->multicast_lock)) {
+		nest_ent = nla_nest_start(skb, MDBA_MDB_SRCLIST_ENTRY);
+		if (!nest_ent)
+			goto out_cancel_err;
+		switch (ent->addr.proto) {
+		case htons(ETH_P_IP):
+			if (nla_put_in_addr(skb, MDBA_MDB_SRCATTR_ADDRESS,
+					    ent->addr.u.ip4)) {
+				nla_nest_cancel(skb, nest_ent);
+				goto out_cancel_err;
+			}
+			break;
+#if IS_ENABLED(CONFIG_IPV6)
+		case htons(ETH_P_IPV6):
+			if (nla_put_in6_addr(skb, MDBA_MDB_SRCATTR_ADDRESS,
+					     &ent->addr.u.ip6)) {
+				nla_nest_cancel(skb, nest_ent);
+				goto out_cancel_err;
+			}
+			break;
+#endif
+		default:
+			nla_nest_cancel(skb, nest_ent);
+			continue;
+		}
+		if (nla_put_u32(skb, MDBA_MDB_SRCATTR_TIMER,
+				br_timer_value(&ent->timer))) {
+			nla_nest_cancel(skb, nest_ent);
+			goto out_cancel_err;
+		}
+		nla_nest_end(skb, nest_ent);
+	}
+
+	nla_nest_end(skb, nest);
+
+	return 0;
+
+out_cancel_err:
+	nla_nest_cancel(skb, nest);
+	return -EMSGSIZE;
+}
+
 static int __mdb_fill_info(struct sk_buff *skb,
 			   struct net_bridge_mdb_entry *mp,
 			   struct net_bridge_port_group *p)
 {
+	bool dump_srcs_mode = false;
 	struct timer_list *mtimer;
 	struct nlattr *nest_ent;
 	struct br_mdb_entry e;
@@ -119,6 +176,23 @@ static int __mdb_fill_info(struct sk_buff *skb,
 		nla_nest_cancel(skb, nest_ent);
 		return -EMSGSIZE;
 	}
+	switch (mp->addr.proto) {
+	case htons(ETH_P_IP):
+		dump_srcs_mode = !!(p && mp->br->multicast_igmp_version == 3);
+		break;
+#if IS_ENABLED(CONFIG_IPV6)
+	case htons(ETH_P_IPV6):
+		dump_srcs_mode = !!(p && mp->br->multicast_mld_version == 2);
+		break;
+#endif
+	}
+	if (dump_srcs_mode &&
+	    (__mdb_fill_srcs(skb, p) ||
+	     nla_put_u8(skb, MDBA_MDB_EATTR_GROUP_MODE, p->filter_mode))) {
+		nla_nest_cancel(skb, nest_ent);
+		return -EMSGSIZE;
+	}
+
 	nla_nest_end(skb, nest_ent);
 
 	return 0;
@@ -127,7 +201,7 @@ static int __mdb_fill_info(struct sk_buff *skb,
 static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
 			    struct net_device *dev)
 {
-	int idx = 0, s_idx = cb->args[1], err = 0;
+	int idx = 0, s_idx = cb->args[1], err = 0, pidx = 0, s_pidx = cb->args[2];
 	struct net_bridge *br = netdev_priv(dev);
 	struct net_bridge_mdb_entry *mp;
 	struct nlattr *nest, *nest2;
@@ -152,7 +226,7 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
 			break;
 		}
 
-		if (mp->host_joined) {
+		if (!s_pidx && mp->host_joined) {
 			err = __mdb_fill_info(skb, mp, NULL);
 			if (err) {
 				nla_nest_cancel(skb, nest2);
@@ -164,13 +238,19 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
 		      pp = &p->next) {
 			if (!p->port)
 				continue;
+			if (pidx < s_pidx)
+				goto skip_pg;
 
 			err = __mdb_fill_info(skb, mp, p);
 			if (err) {
 				nla_nest_cancel(skb, nest2);
 				goto out;
 			}
+skip_pg:
+			pidx++;
 		}
+		pidx = 0;
+		s_pidx = 0;
 		nla_nest_end(skb, nest2);
 skip:
 		idx++;
@@ -178,6 +258,7 @@ skip:
 
 out:
 	cb->args[1] = idx;
+	cb->args[2] = pidx;
 	nla_nest_end(skb, nest);
 	return err;
 }
-- 
cgit v1.2.3


From 0db0c34cfbc9838c1a14cb04dd880602abd699a7 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 3 Sep 2020 16:14:31 -0700
Subject: net: tighten the definition of interface statistics

This patch is born out of an investigation into which IEEE statistics
correspond to which struct rtnl_link_stats64 members. Turns out that
there seems to be reasonable consensus on the matter, among many drivers.
To save others the time (and it took more time than I'm comfortable
admitting) I'm adding comments referring to IEEE attributes to
struct rtnl_link_stats64.

Up until now we had two forms of documentation for stats - in
Documentation/ABI/testing/sysfs-class-net-statistics and the comments
on struct rtnl_link_stats64 itself. While the former is very cautious
in defining the expected behavior, the latter feel quite dated and
may not be easy to understand for modern day driver author
(e.g. rx_over_errors). At the same time modern systems are far more
complex and once obvious definitions lost their clarity. For example
- does rx_packet count at the MAC layer (aFramesReceivedOK)?
packets processed correctly by hardware? received by the driver?
or maybe received by the stack?

I tried to clarify the expectations, further clarifications from
others are very welcome.

The part hardest to untangle is rx_over_errors vs rx_fifo_errors
vs rx_missed_errors. After much deliberation I concluded that for
modern HW only two of the counters will make sense. The distinction
between internal FIFO overflow and packets dropped due to back-pressure
from the host is likely too implementation (driver and device) specific
to expose in the standard stats.

Now - which two of those counters we select to use is anyone's pick:

sysfs documentation suggests rx_over_errors counts packets which
did not fit into buffers due to MTU being too small, which I reused.
There don't seem to be many modern drivers using it (well, CAN drivers
seem to love this statistic).

Of the remaining two I picked rx_missed_errors to report device drops.
bnxt reports it and it's folded into "drop"s in procfs (while
rx_fifo_errors is an error, and modern devices usually receive the frame
OK, they just can't admit it into the pipeline).

Of the drivers I looked at only AMD Lance-like and NS8390-like use all
three of these counters. rx_missed_errors counts missed frames,
rx_over_errors counts overflow events, and rx_fifo_errors counts frames
which were truncated because they didn't fit into buffers. This suggests
that rx_fifo_errors may be the correct stat for truncated packets, but
I'd think a FIFO stat counting truncated packets would be very confusing
to a modern reader.

v2:
 - add driver developer notes about ethtool stat count and reset
 - replace Ethernet with IEEE 802.3 to better indicate source of attrs
 - mention byte counters don't count FCS
 - clarify RX counter is from device to host
 - drop "sightly" from sysfs paragraph
 - add examples of ethtool stats
 - s/incoming/received/ s/incoming/transmitted/

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/index.rst      |   1 +
 Documentation/networking/statistics.rst | 132 +++++++++++++++++++++
 include/uapi/linux/if_link.h            | 204 +++++++++++++++++++++++++++++---
 3 files changed, 320 insertions(+), 17 deletions(-)
 create mode 100644 Documentation/networking/statistics.rst

(limited to 'include/uapi')

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index c29496fff81c..4167acc5c076 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -93,6 +93,7 @@ Contents:
    sctp
    secid
    seg6-sysctl
+   statistics
    strparser
    switchdev
    tc-actions-env-rules
diff --git a/Documentation/networking/statistics.rst b/Documentation/networking/statistics.rst
new file mode 100644
index 000000000000..d490b535cd14
--- /dev/null
+++ b/Documentation/networking/statistics.rst
@@ -0,0 +1,132 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+Interface statistics
+====================
+
+This document is a guide to Linux network interface statistics.
+
+There are two main sources of interface statistics in Linux:
+
+ - standard interface statistics based on
+   :c:type:`struct rtnl_link_stats64 <rtnl_link_stats64>`; and
+ - driver-defined statistics available via ethtool.
+
+There are multiple interfaces to reach the former. Most commonly used
+is the `ip` command from `iproute2`::
+
+  $ ip -s -s link show dev ens4u1u1
+  6: ens4u1u1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP mode DEFAULT group default qlen 1000
+    link/ether 48:2a:e3:4c:b1:d1 brd ff:ff:ff:ff:ff:ff
+    RX: bytes  packets  errors  dropped overrun mcast
+    74327665117 69016965 0       0       0       0
+    RX errors: length   crc     frame   fifo    missed
+               0        0       0       0       0
+    TX: bytes  packets  errors  dropped carrier collsns
+    21405556176 44608960 0       0       0       0
+    TX errors: aborted  fifo   window heartbeat transns
+               0        0       0       0       128
+    altname enp58s0u1u1
+
+Note that `-s` has been specified twice to see all members of
+:c:type:`struct rtnl_link_stats64 <rtnl_link_stats64>`.
+If `-s` is specified once the detailed errors won't be shown.
+
+`ip` supports JSON formatting via the `-j` option.
+
+Ethtool statistics can be dumped using `ethtool -S $ifc`, e.g.::
+
+  $ ethtool -S ens4u1u1
+  NIC statistics:
+     tx_single_collisions: 0
+     tx_multi_collisions: 0
+
+uAPIs
+=====
+
+procfs
+------
+
+The historical `/proc/net/dev` text interface gives access to the list
+of interfaces as well as their statistics.
+
+Note that even though this interface is using
+:c:type:`struct rtnl_link_stats64 <rtnl_link_stats64>`
+internally it combines some of the fields.
+
+sysfs
+-----
+
+Each device directory in sysfs contains a `statistics` directory (e.g.
+`/sys/class/net/lo/statistics/`) with files corresponding to
+members of :c:type:`struct rtnl_link_stats64 <rtnl_link_stats64>`.
+
+This simple interface is convenient especially in constrained/embedded
+environments without access to tools. However, it's inefficient when
+reading multiple stats as it internally performs a full dump of
+:c:type:`struct rtnl_link_stats64 <rtnl_link_stats64>`
+and reports only the stat corresponding to the accessed file.
+
+Sysfs files are documented in
+`Documentation/ABI/testing/sysfs-class-net-statistics`.
+
+
+netlink
+-------
+
+`rtnetlink` (`NETLINK_ROUTE`) is the preferred method of accessing
+:c:type:`struct rtnl_link_stats64 <rtnl_link_stats64>` stats.
+
+Statistics are reported both in the responses to link information
+requests (`RTM_GETLINK`) and statistic requests (`RTM_GETSTATS`,
+when `IFLA_STATS_LINK_64` bit is set in the `.filter_mask` of the request).
+
+ethtool
+-------
+
+Ethtool IOCTL interface allows drivers to report implementation
+specific statistics. Historically it has also been used to report
+statistics for which other APIs did not exist, like per-device-queue
+statistics, or standard-based statistics (e.g. RFC 2863).
+
+Statistics and their string identifiers are retrieved separately.
+Identifiers via `ETHTOOL_GSTRINGS` with `string_set` set to `ETH_SS_STATS`,
+and values via `ETHTOOL_GSTATS`. User space should use `ETHTOOL_GDRVINFO`
+to retrieve the number of statistics (`.n_stats`).
+
+debugfs
+-------
+
+Some drivers expose extra statistics via `debugfs`.
+
+struct rtnl_link_stats64
+========================
+
+.. kernel-doc:: include/uapi/linux/if_link.h
+    :identifiers: rtnl_link_stats64
+
+Notes for driver authors
+========================
+
+Drivers should report all statistics which have a matching member in
+:c:type:`struct rtnl_link_stats64 <rtnl_link_stats64>` exclusively
+via `.ndo_get_stats64`. Reporting such standard stats via ethtool
+or debugfs will not be accepted.
+
+Drivers must ensure best possible compliance with
+:c:type:`struct rtnl_link_stats64 <rtnl_link_stats64>`.
+Please note for example that detailed error statistics must be
+added into the general `rx_error` / `tx_error` counters.
+
+The `.ndo_get_stats64` callback can not sleep because of accesses
+via `/proc/net/dev`. If driver may sleep when retrieving the statistics
+from the device it should do so periodically asynchronously and only return
+a recent copy from `.ndo_get_stats64`. Ethtool interrupt coalescing interface
+allows setting the frequency of refreshing statistics, if needed.
+
+Retrieving ethtool statistics is a multi-syscall process, drivers are advised
+to keep the number of statistics constant to avoid race conditions with
+user space trying to read them.
+
+Statistics must persist across routine operations like bringing the interface
+down and up.
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 7fba4de511de..bf4667403cab 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -40,26 +40,197 @@ struct rtnl_link_stats {
 	__u32	rx_nohandler;		/* dropped, no handler found	*/
 };
 
-/* The main device statistics structure */
+/**
+ * struct rtnl_link_stats64 - The main device statistics structure.
+ *
+ * @rx_packets: Number of good packets received by the interface.
+ *   For hardware interfaces counts all good packets received from the device
+ *   by the host, including packets which host had to drop at various stages
+ *   of processing (even in the driver).
+ *
+ * @tx_packets: Number of packets successfully transmitted.
+ *   For hardware interfaces counts packets which host was able to successfully
+ *   hand over to the device, which does not necessarily mean that packets
+ *   had been successfully transmitted out of the device, only that device
+ *   acknowledged it copied them out of host memory.
+ *
+ * @rx_bytes: Number of good received bytes, corresponding to @rx_packets.
+ *
+ *   For IEEE 802.3 devices should count the length of Ethernet Frames
+ *   excluding the FCS.
+ *
+ * @tx_bytes: Number of good transmitted bytes, corresponding to @tx_packets.
+ *
+ *   For IEEE 802.3 devices should count the length of Ethernet Frames
+ *   excluding the FCS.
+ *
+ * @rx_errors: Total number of bad packets received on this network device.
+ *   This counter must include events counted by @rx_length_errors,
+ *   @rx_crc_errors, @rx_frame_errors and other errors not otherwise
+ *   counted.
+ *
+ * @tx_errors: Total number of transmit problems.
+ *   This counter must include events counter by @tx_aborted_errors,
+ *   @tx_carrier_errors, @tx_fifo_errors, @tx_heartbeat_errors,
+ *   @tx_window_errors and other errors not otherwise counted.
+ *
+ * @rx_dropped: Number of packets received but not processed,
+ *   e.g. due to lack of resources or unsupported protocol.
+ *   For hardware interfaces this counter should not include packets
+ *   dropped by the device which are counted separately in
+ *   @rx_missed_errors (since procfs folds those two counters together).
+ *
+ * @tx_dropped: Number of packets dropped on their way to transmission,
+ *   e.g. due to lack of resources.
+ *
+ * @multicast: Multicast packets received.
+ *   For hardware interfaces this statistic is commonly calculated
+ *   at the device level (unlike @rx_packets) and therefore may include
+ *   packets which did not reach the host.
+ *
+ *   For IEEE 802.3 devices this counter may be equivalent to:
+ *
+ *    - 30.3.1.1.21 aMulticastFramesReceivedOK
+ *
+ * @collisions: Number of collisions during packet transmissions.
+ *
+ * @rx_length_errors: Number of packets dropped due to invalid length.
+ *   Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter should be equivalent to a sum
+ *   of the following attributes:
+ *
+ *    - 30.3.1.1.23 aInRangeLengthErrors
+ *    - 30.3.1.1.24 aOutOfRangeLengthField
+ *    - 30.3.1.1.25 aFrameTooLongErrors
+ *
+ * @rx_over_errors: Receiver FIFO overflow event counter.
+ *
+ *   Historically the count of overflow events. Such events may be
+ *   reported in the receive descriptors or via interrupts, and may
+ *   not correspond one-to-one with dropped packets.
+ *
+ *   The recommended interpretation for high speed interfaces is -
+ *   number of packets dropped because they did not fit into buffers
+ *   provided by the host, e.g. packets larger than MTU or next buffer
+ *   in the ring was not available for a scatter transfer.
+ *
+ *   Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ *   This statistics was historically used interchangeably with
+ *   @rx_fifo_errors.
+ *
+ *   This statistic corresponds to hardware events and is not commonly used
+ *   on software devices.
+ *
+ * @rx_crc_errors: Number of packets received with a CRC error.
+ *   Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter must be equivalent to:
+ *
+ *    - 30.3.1.1.6 aFrameCheckSequenceErrors
+ *
+ * @rx_frame_errors: Receiver frame alignment errors.
+ *   Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter should be equivalent to:
+ *
+ *    - 30.3.1.1.7 aAlignmentErrors
+ *
+ * @rx_fifo_errors: Receiver FIFO error counter.
+ *
+ *   Historically the count of overflow events. Those events may be
+ *   reported in the receive descriptors or via interrupts, and may
+ *   not correspond one-to-one with dropped packets.
+ *
+ *   This statistics was used interchangeably with @rx_over_errors.
+ *   Not recommended for use in drivers for high speed interfaces.
+ *
+ *   This statistic is used on software devices, e.g. to count software
+ *   packet queue overflow (can) or sequencing errors (GRE).
+ *
+ * @rx_missed_errors: Count of packets missed by the host.
+ *   Folded into the "drop" counter in `/proc/net/dev`.
+ *
+ *   Counts number of packets dropped by the device due to lack
+ *   of buffer space. This usually indicates that the host interface
+ *   is slower than the network interface, or host is not keeping up
+ *   with the receive packet rate.
+ *
+ *   This statistic corresponds to hardware events and is not used
+ *   on software devices.
+ *
+ * @tx_aborted_errors:
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *   For IEEE 802.3 devices capable of half-duplex operation this counter
+ *   must be equivalent to:
+ *
+ *    - 30.3.1.1.11 aFramesAbortedDueToXSColls
+ *
+ *   High speed interfaces may use this counter as a general device
+ *   discard counter.
+ *
+ * @tx_carrier_errors: Number of frame transmission errors due to loss
+ *   of carrier during transmission.
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter must be equivalent to:
+ *
+ *    - 30.3.1.1.13 aCarrierSenseErrors
+ *
+ * @tx_fifo_errors: Number of frame transmission errors due to device
+ *   FIFO underrun / underflow. This condition occurs when the device
+ *   begins transmission of a frame but is unable to deliver the
+ *   entire frame to the transmitter in time for transmission.
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ * @tx_heartbeat_errors: Number of Heartbeat / SQE Test errors for
+ *   old half-duplex Ethernet.
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices possibly equivalent to:
+ *
+ *    - 30.3.2.1.4 aSQETestErrors
+ *
+ * @tx_window_errors: Number of frame transmission errors due
+ *   to late collisions (for Ethernet - after the first 64B of transmission).
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter must be equivalent to:
+ *
+ *    - 30.3.1.1.10 aLateCollisions
+ *
+ * @rx_compressed: Number of correctly received compressed packets.
+ *   This counters is only meaningful for interfaces which support
+ *   packet compression (e.g. CSLIP, PPP).
+ *
+ * @tx_compressed: Number of transmitted compressed packets.
+ *   This counters is only meaningful for interfaces which support
+ *   packet compression (e.g. CSLIP, PPP).
+ *
+ * @rx_nohandler: Number of packets received on the interface
+ *   but dropped by the networking stack because the device is
+ *   not designated to receive packets (e.g. backup link in a bond).
+ */
 struct rtnl_link_stats64 {
-	__u64	rx_packets;		/* total packets received	*/
-	__u64	tx_packets;		/* total packets transmitted	*/
-	__u64	rx_bytes;		/* total bytes received 	*/
-	__u64	tx_bytes;		/* total bytes transmitted	*/
-	__u64	rx_errors;		/* bad packets received		*/
-	__u64	tx_errors;		/* packet transmit problems	*/
-	__u64	rx_dropped;		/* no space in linux buffers	*/
-	__u64	tx_dropped;		/* no space available in linux	*/
-	__u64	multicast;		/* multicast packets received	*/
+	__u64	rx_packets;
+	__u64	tx_packets;
+	__u64	rx_bytes;
+	__u64	tx_bytes;
+	__u64	rx_errors;
+	__u64	tx_errors;
+	__u64	rx_dropped;
+	__u64	tx_dropped;
+	__u64	multicast;
 	__u64	collisions;
 
 	/* detailed rx_errors: */
 	__u64	rx_length_errors;
-	__u64	rx_over_errors;		/* receiver ring buff overflow	*/
-	__u64	rx_crc_errors;		/* recved pkt with crc error	*/
-	__u64	rx_frame_errors;	/* recv'd frame alignment error */
-	__u64	rx_fifo_errors;		/* recv'r fifo overrun		*/
-	__u64	rx_missed_errors;	/* receiver missed packet	*/
+	__u64	rx_over_errors;
+	__u64	rx_crc_errors;
+	__u64	rx_frame_errors;
+	__u64	rx_fifo_errors;
+	__u64	rx_missed_errors;
 
 	/* detailed tx_errors */
 	__u64	tx_aborted_errors;
@@ -71,8 +242,7 @@ struct rtnl_link_stats64 {
 	/* for cslip etc */
 	__u64	rx_compressed;
 	__u64	tx_compressed;
-
-	__u64	rx_nohandler;		/* dropped, no handler found	*/
+	__u64	rx_nohandler;
 };
 
 /* The struct should be in sync with struct ifmap */
-- 
cgit v1.2.3


From 783560d02dd61aee20d1d00c1c061bcafea30264 Mon Sep 17 00:00:00 2001
From: Dharageswari R <dharageswari.r@intel.com>
Date: Tue, 8 Sep 2020 12:28:25 +0300
Subject: ASoC: SOF: Implement snd_sof_bytes_ext_volatile_get kcontrol IO

This patch implements the snd_sof_bytes_ext_volatile_get() to read the
actual parameters from DSP by sending the SOF_IPC_COMP_GET_DATA IPC
for the kcontrol of type SOF_TPLG_KCTL_BYTES_VOLATILE_RO.

Signed-off-by: Dharageswari R <dharageswari.r@intel.com>
Reviewed-by: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Link: https://lore.kernel.org/r/20200908092825.1813847-2-kai.vehmanen@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/uapi/sound/sof/tokens.h |  3 +++
 sound/soc/sof/control.c         | 58 +++++++++++++++++++++++++++++++++++++++++
 sound/soc/sof/sof-audio.h       |  2 ++
 sound/soc/sof/topology.c        |  1 +
 4 files changed, 64 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/sound/sof/tokens.h b/include/uapi/sound/sof/tokens.h
index d3aae4ad8959..a642bf30c027 100644
--- a/include/uapi/sound/sof/tokens.h
+++ b/include/uapi/sound/sof/tokens.h
@@ -24,6 +24,9 @@
 #define SOF_TPLG_KCTL_ENUM_ID	257
 #define SOF_TPLG_KCTL_BYTES_ID	258
 #define SOF_TPLG_KCTL_SWITCH_ID	259
+#define SOF_TPLG_KCTL_BYTES_VOLATILE_RO 260
+#define SOF_TPLG_KCTL_BYTES_VOLATILE_RW 261
+#define SOF_TPLG_KCTL_BYTES_WO_ID 262
 
 /*
  * Tokens - must match values in topology configurations
diff --git a/sound/soc/sof/control.c b/sound/soc/sof/control.c
index 186eea105bb1..d5e2966cafac 100644
--- a/sound/soc/sof/control.c
+++ b/sound/soc/sof/control.c
@@ -353,6 +353,64 @@ int snd_sof_bytes_ext_put(struct snd_kcontrol *kcontrol,
 	return 0;
 }
 
+int snd_sof_bytes_ext_volatile_get(struct snd_kcontrol *kcontrol, unsigned int __user *binary_data,
+				   unsigned int size)
+{
+	struct soc_bytes_ext *be = (struct soc_bytes_ext *)kcontrol->private_value;
+	struct snd_sof_control *scontrol = be->dobj.private;
+	struct snd_soc_component *scomp = scontrol->scomp;
+	struct sof_ipc_ctrl_data *cdata = scontrol->control_data;
+	struct snd_ctl_tlv header;
+	struct snd_ctl_tlv __user *tlvd = (struct snd_ctl_tlv __user *)binary_data;
+	size_t data_size;
+	int ret;
+	int err;
+
+	ret = pm_runtime_get_sync(scomp->dev);
+	if (ret < 0) {
+		dev_err_ratelimited(scomp->dev, "error: bytes_ext get failed to resume %d\n", ret);
+		pm_runtime_put_noidle(scomp->dev);
+		return ret;
+	}
+
+	/* set the ABI header values */
+	cdata->data->magic = SOF_ABI_MAGIC;
+	cdata->data->abi = SOF_ABI_VERSION;
+	/* get all the component data from DSP */
+	ret = snd_sof_ipc_set_get_comp_data(scontrol, SOF_IPC_COMP_GET_DATA, SOF_CTRL_TYPE_DATA_GET,
+					    scontrol->cmd, false);
+	if (ret < 0)
+		goto out;
+
+	/* check data size doesn't exceed max coming from topology */
+	if (cdata->data->size > be->max - sizeof(const struct sof_abi_hdr)) {
+		dev_err_ratelimited(scomp->dev, "error: user data size %d exceeds max size %zu.\n",
+				    cdata->data->size,
+				    be->max - sizeof(const struct sof_abi_hdr));
+		ret = -EINVAL;
+		goto out;
+	}
+
+	data_size = cdata->data->size + sizeof(const struct sof_abi_hdr);
+
+	header.numid = scontrol->cmd;
+	header.length = data_size;
+	if (copy_to_user(tlvd, &header, sizeof(const struct snd_ctl_tlv))) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	if (copy_to_user(tlvd->tlv, cdata->data, data_size))
+		ret = -EFAULT;
+out:
+	pm_runtime_mark_last_busy(scomp->dev);
+	err = pm_runtime_put_autosuspend(scomp->dev);
+	if (err < 0)
+		dev_err_ratelimited(scomp->dev, "error: bytes_ext get failed to idle %d\n", err);
+
+	return ret;
+}
+
 int snd_sof_bytes_ext_get(struct snd_kcontrol *kcontrol,
 			  unsigned int __user *binary_data,
 			  unsigned int size)
diff --git a/sound/soc/sof/sof-audio.h b/sound/soc/sof/sof-audio.h
index 196cbd322893..9f645a2e5a6c 100644
--- a/sound/soc/sof/sof-audio.h
+++ b/sound/soc/sof/sof-audio.h
@@ -142,6 +142,8 @@ int snd_sof_bytes_ext_put(struct snd_kcontrol *kcontrol,
 int snd_sof_bytes_ext_get(struct snd_kcontrol *kcontrol,
 			  unsigned int __user *binary_data,
 			  unsigned int size);
+int snd_sof_bytes_ext_volatile_get(struct snd_kcontrol *kcontrol, unsigned int __user *binary_data,
+				   unsigned int size);
 
 /*
  * Topology.
diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c
index 49fae48961a9..d5efac3af5c2 100644
--- a/sound/soc/sof/topology.c
+++ b/sound/soc/sof/topology.c
@@ -3688,6 +3688,7 @@ static const struct snd_soc_tplg_kcontrol_ops sof_io_ops[] = {
 /* vendor specific bytes ext handlers available for binding */
 static const struct snd_soc_tplg_bytes_ext_ops sof_bytes_ext_ops[] = {
 	{SOF_TPLG_KCTL_BYTES_ID, snd_sof_bytes_ext_get, snd_sof_bytes_ext_put},
+	{SOF_TPLG_KCTL_BYTES_VOLATILE_RO, snd_sof_bytes_ext_volatile_get},
 };
 
 static struct snd_soc_tplg_ops sof_tplg_ops = {
-- 
cgit v1.2.3


From b131c96496b369c7b14125e7c50e89ac7cec8051 Mon Sep 17 00:00:00 2001
From: "Jose M. Guisado Gomez" <guigom@riseup.net>
Date: Tue, 8 Sep 2020 13:01:41 +0200
Subject: netfilter: nf_tables: add userdata support for nft_object

Enables storing userdata for nft_object. Initially this will store an
optional comment but can be extended in the future as needed.

Adds new attribute NFTA_OBJ_USERDATA to nft_object.

Signed-off-by: Jose M. Guisado Gomez <guigom@riseup.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        |  2 ++
 include/uapi/linux/netfilter/nf_tables.h |  2 ++
 net/netfilter/nf_tables_api.c            | 35 ++++++++++++++++++++++++--------
 3 files changed, 31 insertions(+), 8 deletions(-)

(limited to 'include/uapi')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 97a7e147a59a..99c1b3188b1e 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1123,6 +1123,8 @@ struct nft_object {
 	u32				genmask:2,
 					use:30;
 	u64				handle;
+	u16				udlen;
+	u8				*udata;
 	/* runtime data below here */
 	const struct nft_object_ops	*ops ____cacheline_aligned;
 	unsigned char			data[]
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 543dc697b796..2a6e09dea1a0 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1559,6 +1559,7 @@ enum nft_ct_expectation_attributes {
  * @NFTA_OBJ_DATA: stateful object data (NLA_NESTED)
  * @NFTA_OBJ_USE: number of references to this expression (NLA_U32)
  * @NFTA_OBJ_HANDLE: object handle (NLA_U64)
+ * @NFTA_OBJ_USERDATA: user data (NLA_BINARY)
  */
 enum nft_object_attributes {
 	NFTA_OBJ_UNSPEC,
@@ -1569,6 +1570,7 @@ enum nft_object_attributes {
 	NFTA_OBJ_USE,
 	NFTA_OBJ_HANDLE,
 	NFTA_OBJ_PAD,
+	NFTA_OBJ_USERDATA,
 	__NFTA_OBJ_MAX
 };
 #define NFTA_OBJ_MAX		(__NFTA_OBJ_MAX - 1)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 6ccce2a2e715..e9b4848e9dd0 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5755,6 +5755,8 @@ static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = {
 	[NFTA_OBJ_TYPE]		= { .type = NLA_U32 },
 	[NFTA_OBJ_DATA]		= { .type = NLA_NESTED },
 	[NFTA_OBJ_HANDLE]	= { .type = NLA_U64},
+	[NFTA_OBJ_USERDATA]	= { .type = NLA_BINARY,
+				    .len = NFT_USERDATA_MAXLEN },
 };
 
 static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
@@ -5902,6 +5904,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
 	struct nft_object *obj;
 	struct nft_ctx ctx;
 	u32 objtype;
+	u16 udlen;
 	int err;
 
 	if (!nla[NFTA_OBJ_TYPE] ||
@@ -5946,7 +5949,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
 	obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
-		goto err1;
+		goto err_init;
 	}
 	obj->key.table = table;
 	obj->handle = nf_tables_alloc_handle(table);
@@ -5954,32 +5957,44 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
 	obj->key.name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
 	if (!obj->key.name) {
 		err = -ENOMEM;
-		goto err2;
+		goto err_strdup;
+	}
+
+	if (nla[NFTA_OBJ_USERDATA]) {
+		udlen = nla_len(nla[NFTA_OBJ_USERDATA]);
+		obj->udata = kzalloc(udlen, GFP_KERNEL);
+		if (obj->udata == NULL)
+			goto err_userdata;
+
+		nla_memcpy(obj->udata, nla[NFTA_OBJ_USERDATA], udlen);
+		obj->udlen = udlen;
 	}
 
 	err = nft_trans_obj_add(&ctx, NFT_MSG_NEWOBJ, obj);
 	if (err < 0)
-		goto err3;
+		goto err_trans;
 
 	err = rhltable_insert(&nft_objname_ht, &obj->rhlhead,
 			      nft_objname_ht_params);
 	if (err < 0)
-		goto err4;
+		goto err_obj_ht;
 
 	list_add_tail_rcu(&obj->list, &table->objects);
 	table->use++;
 	return 0;
-err4:
+err_obj_ht:
 	/* queued in transaction log */
 	INIT_LIST_HEAD(&obj->list);
 	return err;
-err3:
+err_trans:
 	kfree(obj->key.name);
-err2:
+err_userdata:
+	kfree(obj->udata);
+err_strdup:
 	if (obj->ops->destroy)
 		obj->ops->destroy(&ctx, obj);
 	kfree(obj);
-err1:
+err_init:
 	module_put(type->owner);
 	return err;
 }
@@ -6011,6 +6026,10 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
 			 NFTA_OBJ_PAD))
 		goto nla_put_failure;
 
+	if (obj->udata &&
+	    nla_put(skb, NFTA_OBJ_USERDATA, obj->udlen, obj->udata))
+		goto nla_put_failure;
+
 	nlmsg_end(skb, nlh);
 	return 0;
 
-- 
cgit v1.2.3


From 05b595e9c44acaca94192c6db430a489c1b212a7 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Wed, 9 Sep 2020 07:50:36 +0300
Subject: devlink: Introduce external controller flag

A devlink eswitch port may represent PCI PF/VF ports of a controller.

A controller either located on same system or it can be an external
controller located in host where such NIC is plugged in.

Add the ability for driver to specify if a port is for external
controller.

Use such flag in the mlx5_core driver.

An example of an external controller having VF1 of PF0 belong to
controller 1.

$ devlink port show pci/0000:06:00.0/2
pci/0000:06:00.0/2: type eth netdev ens2f0pf0vf1 flavour pcivf pfnum 0 vfnum 1 external true splittable false
  function:
    hw_addr 00:00:00:00:00:00
$ devlink port show pci/0000:06:00.0/2 -jp
{
    "port": {
        "pci/0000:06:00.0/2": {
            "type": "eth",
            "netdev": "ens2f0pf0vf1",
            "flavour": "pcivf",
            "pfnum": 0,
            "vfnum": 1,
            "external": true,
            "splittable": false,
            "function": {
                "hw_addr": "00:00:00:00:00:00"
            }
        }
    }
}

Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c |  6 ++++--
 include/net/devlink.h                            |  8 ++++++--
 include/uapi/linux/devlink.h                     |  1 +
 net/core/devlink.c                               | 12 ++++++++++--
 4 files changed, 21 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index e13e5d1b3eae..5b3599caa007 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -1215,11 +1215,13 @@ static int register_devlink_port(struct mlx5_core_dev *dev,
 	struct devlink_port_attrs attrs = {};
 	struct netdev_phys_item_id ppid = {};
 	unsigned int dl_port_index = 0;
+	bool external;
 	u16 pfnum;
 
 	if (!is_devlink_port_supported(dev, rpriv))
 		return 0;
 
+	external = mlx5_core_is_ecpf_esw_manager(dev);
 	mlx5e_rep_get_port_parent_id(rpriv->netdev, &ppid);
 	dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, rep->vport);
 	pfnum = PCI_FUNC(dev->pdev->devfn);
@@ -1232,12 +1234,12 @@ static int register_devlink_port(struct mlx5_core_dev *dev,
 	} else if (rep->vport == MLX5_VPORT_PF) {
 		memcpy(rpriv->dl_port.attrs.switch_id.id, &ppid.id[0], ppid.id_len);
 		rpriv->dl_port.attrs.switch_id.id_len = ppid.id_len;
-		devlink_port_attrs_pci_pf_set(&rpriv->dl_port, pfnum);
+		devlink_port_attrs_pci_pf_set(&rpriv->dl_port, pfnum, external);
 	} else if (mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport)) {
 		memcpy(rpriv->dl_port.attrs.switch_id.id, &ppid.id[0], ppid.id_len);
 		rpriv->dl_port.attrs.switch_id.id_len = ppid.id_len;
 		devlink_port_attrs_pci_vf_set(&rpriv->dl_port,
-					      pfnum, rep->vport - 1);
+					      pfnum, rep->vport - 1, external);
 	}
 	return devlink_port_register(devlink, &rpriv->dl_port, dl_port_index);
 }
diff --git a/include/net/devlink.h b/include/net/devlink.h
index efff9274d248..2dad8c9151f4 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -60,19 +60,23 @@ struct devlink_port_phys_attrs {
 /**
  * struct devlink_port_pci_pf_attrs - devlink port's PCI PF attributes
  * @pf: Associated PCI PF number for this port.
+ * @external: when set, indicates if a port is for an external controller
  */
 struct devlink_port_pci_pf_attrs {
 	u16 pf;
+	u8 external:1;
 };
 
 /**
  * struct devlink_port_pci_vf_attrs - devlink port's PCI VF attributes
  * @pf: Associated PCI PF number for this port.
  * @vf: Associated PCI VF for of the PCI PF for this port.
+ * @external: when set, indicates if a port is for an external controller
  */
 struct devlink_port_pci_vf_attrs {
 	u16 pf;
 	u16 vf;
+	u8 external:1;
 };
 
 /**
@@ -1215,9 +1219,9 @@ void devlink_port_type_ib_set(struct devlink_port *devlink_port,
 void devlink_port_type_clear(struct devlink_port *devlink_port);
 void devlink_port_attrs_set(struct devlink_port *devlink_port,
 			    struct devlink_port_attrs *devlink_port_attrs);
-void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u16 pf);
+void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u16 pf, bool external);
 void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port,
-				   u16 pf, u16 vf);
+				   u16 pf, u16 vf, bool external);
 int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
 			u32 size, u16 ingress_pools_count,
 			u16 egress_pools_count, u16 ingress_tc_count,
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index cfef4245ea5a..40823ed7e05a 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -458,6 +458,7 @@ enum devlink_attr {
 	DEVLINK_ATTR_PORT_LANES,			/* u32 */
 	DEVLINK_ATTR_PORT_SPLITTABLE,			/* u8 */
 
+	DEVLINK_ATTR_PORT_EXTERNAL,		/* u8 */
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 49e911c19881..6f5f85372721 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -526,6 +526,8 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg,
 		if (nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER,
 				attrs->pci_pf.pf))
 			return -EMSGSIZE;
+		if (nla_put_u8(msg, DEVLINK_ATTR_PORT_EXTERNAL, attrs->pci_pf.external))
+			return -EMSGSIZE;
 		break;
 	case DEVLINK_PORT_FLAVOUR_PCI_VF:
 		if (nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER,
@@ -533,6 +535,8 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg,
 		    nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_VF_NUMBER,
 				attrs->pci_vf.vf))
 			return -EMSGSIZE;
+		if (nla_put_u8(msg, DEVLINK_ATTR_PORT_EXTERNAL, attrs->pci_vf.external))
+			return -EMSGSIZE;
 		break;
 	case DEVLINK_PORT_FLAVOUR_PHYSICAL:
 	case DEVLINK_PORT_FLAVOUR_CPU:
@@ -7716,8 +7720,9 @@ EXPORT_SYMBOL_GPL(devlink_port_attrs_set);
  *
  *	@devlink_port: devlink port
  *	@pf: associated PF for the devlink port instance
+ *	@external: indicates if the port is for an external controller
  */
-void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u16 pf)
+void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u16 pf, bool external)
 {
 	struct devlink_port_attrs *attrs = &devlink_port->attrs;
 	int ret;
@@ -7728,6 +7733,7 @@ void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u16 pf)
 		return;
 
 	attrs->pci_pf.pf = pf;
+	attrs->pci_pf.external = external;
 }
 EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_pf_set);
 
@@ -7737,9 +7743,10 @@ EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_pf_set);
  *	@devlink_port: devlink port
  *	@pf: associated PF for the devlink port instance
  *	@vf: associated VF of a PF for the devlink port instance
+ *	@external: indicates if the port is for an external controller
  */
 void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port,
-				   u16 pf, u16 vf)
+				   u16 pf, u16 vf, bool external)
 {
 	struct devlink_port_attrs *attrs = &devlink_port->attrs;
 	int ret;
@@ -7750,6 +7757,7 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port,
 		return;
 	attrs->pci_vf.pf = pf;
 	attrs->pci_vf.vf = vf;
+	attrs->pci_vf.external = external;
 }
 EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_vf_set);
 
-- 
cgit v1.2.3


From 3a2d9588c4f79adae6a0e986b64ebdd5b38085c6 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Wed, 9 Sep 2020 07:50:37 +0300
Subject: devlink: Introduce controller number

A devlink port may be for a controller consist of PCI device.
A devlink instance holds ports of two types of controllers.
(1) controller discovered on same system where eswitch resides
This is the case where PCI PF/VF of a controller and devlink eswitch
instance both are located on a single system.
(2) controller located on external host system.
This is the case where a controller is located in one system and its
devlink eswitch ports are located in a different system.

When a devlink eswitch instance serves the devlink ports of both
controllers together, PCI PF/VF numbers may overlap.
Due to this a unique phys_port_name cannot be constructed.

For example in below such system controller-0 and controller-1, each has
PCI PF pf0 whose eswitch ports can be present in controller-0.
These results in phys_port_name as "pf0" for both.
Similar problem exists for VFs and upcoming Sub functions.

An example view of two controller systems:

             ---------------------------------------------------------
             |                                                       |
             |           --------- ---------         ------- ------- |
-----------  |           | vf(s) | | sf(s) |         |vf(s)| |sf(s)| |
| server  |  | -------   ----/---- ---/----- ------- ---/--- ---/--- |
| pci rc  |=== | pf0 |______/________/       | pf1 |___/_______/     |
| connect |  | -------                       -------                 |
-----------  |     | controller_num=1 (no eswitch)                   |
             ------|--------------------------------------------------
             (internal wire)
                   |
             ---------------------------------------------------------
             | devlink eswitch ports and reps                        |
             | ----------------------------------------------------- |
             | |ctrl-0 | ctrl-0 | ctrl-0 | ctrl-0 | ctrl-0 |ctrl-0 | |
             | |pf0    | pf0vfN | pf0sfN | pf1    | pf1vfN |pf1sfN | |
             | ----------------------------------------------------- |
             | |ctrl-1 | ctrl-1 | ctrl-1 | ctrl-1 | ctrl-1 |ctrl-1 | |
             | |pf1    | pf1vfN | pf1sfN | pf1    | pf1vfN |pf0sfN | |
             | ----------------------------------------------------- |
             |                                                       |
             |                                                       |
             |           --------- ---------         ------- ------- |
             |           | vf(s) | | sf(s) |         |vf(s)| |sf(s)| |
             | -------   ----/---- ---/----- ------- ---/--- ---/--- |
             | | pf0 |______/________/       | pf1 |___/_______/     |
             | -------                       -------                 |
             |                                                       |
             |  local controller_num=0 (eswitch)                     |
             ---------------------------------------------------------

An example devlink port for external controller with controller
number = 1 for a VF 1 of PF 0:

$ devlink port show pci/0000:06:00.0/2
pci/0000:06:00.0/2: type eth netdev ens2f0pf0vf1 flavour pcivf controller 1 pfnum 0 vfnum 1 external true splittable false
  function:
    hw_addr 00:00:00:00:00:00

$ devlink port show pci/0000:06:00.0/2 -jp
{
    "port": {
        "pci/0000:06:00.0/2": {
            "type": "eth",
            "netdev": "ens2f0pf0vf1",
            "flavour": "pcivf",
            "controller": 1,
            "pfnum": 0,
            "vfnum": 1,
            "external": true,
            "splittable": false,
            "function": {
                "hw_addr": "00:00:00:00:00:00"
            }
        }
    }
}

Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c |  9 +++++++--
 include/net/devlink.h                            |  9 +++++++--
 include/uapi/linux/devlink.h                     |  1 +
 net/core/devlink.c                               | 23 ++++++++++++++---------
 4 files changed, 29 insertions(+), 13 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 5b3599caa007..135ee26881c9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -1210,11 +1210,13 @@ is_devlink_port_supported(const struct mlx5_core_dev *dev,
 static int register_devlink_port(struct mlx5_core_dev *dev,
 				 struct mlx5e_rep_priv *rpriv)
 {
+	struct mlx5_esw_offload *offloads = &dev->priv.eswitch->offloads;
 	struct devlink *devlink = priv_to_devlink(dev);
 	struct mlx5_eswitch_rep *rep = rpriv->rep;
 	struct devlink_port_attrs attrs = {};
 	struct netdev_phys_item_id ppid = {};
 	unsigned int dl_port_index = 0;
+	u32 controller_num = 0;
 	bool external;
 	u16 pfnum;
 
@@ -1222,6 +1224,8 @@ static int register_devlink_port(struct mlx5_core_dev *dev,
 		return 0;
 
 	external = mlx5_core_is_ecpf_esw_manager(dev);
+	if (external)
+		controller_num = offloads->host_number + 1;
 	mlx5e_rep_get_port_parent_id(rpriv->netdev, &ppid);
 	dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, rep->vport);
 	pfnum = PCI_FUNC(dev->pdev->devfn);
@@ -1234,11 +1238,12 @@ static int register_devlink_port(struct mlx5_core_dev *dev,
 	} else if (rep->vport == MLX5_VPORT_PF) {
 		memcpy(rpriv->dl_port.attrs.switch_id.id, &ppid.id[0], ppid.id_len);
 		rpriv->dl_port.attrs.switch_id.id_len = ppid.id_len;
-		devlink_port_attrs_pci_pf_set(&rpriv->dl_port, pfnum, external);
+		devlink_port_attrs_pci_pf_set(&rpriv->dl_port, controller_num,
+					      pfnum, external);
 	} else if (mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport)) {
 		memcpy(rpriv->dl_port.attrs.switch_id.id, &ppid.id[0], ppid.id_len);
 		rpriv->dl_port.attrs.switch_id.id_len = ppid.id_len;
-		devlink_port_attrs_pci_vf_set(&rpriv->dl_port,
+		devlink_port_attrs_pci_vf_set(&rpriv->dl_port, controller_num,
 					      pfnum, rep->vport - 1, external);
 	}
 	return devlink_port_register(devlink, &rpriv->dl_port, dl_port_index);
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 2dad8c9151f4..eaec0a8cc5ef 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -59,21 +59,25 @@ struct devlink_port_phys_attrs {
 
 /**
  * struct devlink_port_pci_pf_attrs - devlink port's PCI PF attributes
+ * @controller: Associated controller number
  * @pf: Associated PCI PF number for this port.
  * @external: when set, indicates if a port is for an external controller
  */
 struct devlink_port_pci_pf_attrs {
+	u32 controller;
 	u16 pf;
 	u8 external:1;
 };
 
 /**
  * struct devlink_port_pci_vf_attrs - devlink port's PCI VF attributes
+ * @controller: Associated controller number
  * @pf: Associated PCI PF number for this port.
  * @vf: Associated PCI VF for of the PCI PF for this port.
  * @external: when set, indicates if a port is for an external controller
  */
 struct devlink_port_pci_vf_attrs {
+	u32 controller;
 	u16 pf;
 	u16 vf;
 	u8 external:1;
@@ -1219,8 +1223,9 @@ void devlink_port_type_ib_set(struct devlink_port *devlink_port,
 void devlink_port_type_clear(struct devlink_port *devlink_port);
 void devlink_port_attrs_set(struct devlink_port *devlink_port,
 			    struct devlink_port_attrs *devlink_port_attrs);
-void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u16 pf, bool external);
-void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port,
+void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 controller,
+				   u16 pf, bool external);
+void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 controller,
 				   u16 pf, u16 vf, bool external);
 int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
 			u32 size, u16 ingress_pools_count,
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 40823ed7e05a..40d35145c879 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -459,6 +459,7 @@ enum devlink_attr {
 	DEVLINK_ATTR_PORT_SPLITTABLE,			/* u8 */
 
 	DEVLINK_ATTR_PORT_EXTERNAL,		/* u8 */
+	DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,	/* u32 */
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 6f5f85372721..9cf5b118253b 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -523,17 +523,18 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg,
 		return -EMSGSIZE;
 	switch (devlink_port->attrs.flavour) {
 	case DEVLINK_PORT_FLAVOUR_PCI_PF:
-		if (nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER,
-				attrs->pci_pf.pf))
+		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,
+				attrs->pci_pf.controller) ||
+		    nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, attrs->pci_pf.pf))
 			return -EMSGSIZE;
 		if (nla_put_u8(msg, DEVLINK_ATTR_PORT_EXTERNAL, attrs->pci_pf.external))
 			return -EMSGSIZE;
 		break;
 	case DEVLINK_PORT_FLAVOUR_PCI_VF:
-		if (nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER,
-				attrs->pci_vf.pf) ||
-		    nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_VF_NUMBER,
-				attrs->pci_vf.vf))
+		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,
+				attrs->pci_vf.controller) ||
+		    nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, attrs->pci_vf.pf) ||
+		    nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_VF_NUMBER, attrs->pci_vf.vf))
 			return -EMSGSIZE;
 		if (nla_put_u8(msg, DEVLINK_ATTR_PORT_EXTERNAL, attrs->pci_vf.external))
 			return -EMSGSIZE;
@@ -7719,10 +7720,12 @@ EXPORT_SYMBOL_GPL(devlink_port_attrs_set);
  *	devlink_port_attrs_pci_pf_set - Set PCI PF port attributes
  *
  *	@devlink_port: devlink port
+ *	@controller: associated controller number for the devlink port instance
  *	@pf: associated PF for the devlink port instance
  *	@external: indicates if the port is for an external controller
  */
-void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u16 pf, bool external)
+void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 controller,
+				   u16 pf, bool external)
 {
 	struct devlink_port_attrs *attrs = &devlink_port->attrs;
 	int ret;
@@ -7731,7 +7734,7 @@ void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u16 pf, bo
 				       DEVLINK_PORT_FLAVOUR_PCI_PF);
 	if (ret)
 		return;
-
+	attrs->pci_pf.controller = controller;
 	attrs->pci_pf.pf = pf;
 	attrs->pci_pf.external = external;
 }
@@ -7741,11 +7744,12 @@ EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_pf_set);
  *	devlink_port_attrs_pci_vf_set - Set PCI VF port attributes
  *
  *	@devlink_port: devlink port
+ *	@controller: associated controller number for the devlink port instance
  *	@pf: associated PF for the devlink port instance
  *	@vf: associated VF of a PF for the devlink port instance
  *	@external: indicates if the port is for an external controller
  */
-void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port,
+void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 controller,
 				   u16 pf, u16 vf, bool external)
 {
 	struct devlink_port_attrs *attrs = &devlink_port->attrs;
@@ -7755,6 +7759,7 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port,
 				       DEVLINK_PORT_FLAVOUR_PCI_VF);
 	if (ret)
 		return;
+	attrs->pci_vf.controller = controller;
 	attrs->pci_vf.pf = pf;
 	attrs->pci_vf.vf = vf;
 	attrs->pci_vf.external = external;
-- 
cgit v1.2.3


From ad47ff330b26a9fefa882032be2122700e1625ab Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Wed, 9 Sep 2020 09:34:13 -0700
Subject: quota: widen timestamps for the fs_disk_quota structure

Soon, XFS will support quota grace period expiration timestamps beyond
the year 2038, widen the timestamp fields to handle the extra time bits.
Internally, XFS now stores unsigned 34-bit quantities, so the extra 8
bits here should work fine.  (Note that XFS is the only user of this
structure.)

Link: https://lore.kernel.org/r/20200909163413.GJ7955@magnolia
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/quota.c               | 42 ++++++++++++++++++++++++++++++++++++------
 include/uapi/linux/dqblk_xfs.h | 11 ++++++++++-
 2 files changed, 46 insertions(+), 7 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 47f9e151988b..52362eeaea94 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -481,6 +481,14 @@ static inline u64 quota_btobb(u64 bytes)
 	return (bytes + (1 << XFS_BB_SHIFT) - 1) >> XFS_BB_SHIFT;
 }
 
+static inline s64 copy_from_xfs_dqblk_ts(const struct fs_disk_quota *d,
+		__s32 timer, __s8 timer_hi)
+{
+	if (d->d_fieldmask & FS_DQ_BIGTIME)
+		return (u32)timer | (s64)timer_hi << 32;
+	return timer;
+}
+
 static void copy_from_xfs_dqblk(struct qc_dqblk *dst, struct fs_disk_quota *src)
 {
 	dst->d_spc_hardlimit = quota_bbtob(src->d_blk_hardlimit);
@@ -489,14 +497,17 @@ static void copy_from_xfs_dqblk(struct qc_dqblk *dst, struct fs_disk_quota *src)
 	dst->d_ino_softlimit = src->d_ino_softlimit;
 	dst->d_space = quota_bbtob(src->d_bcount);
 	dst->d_ino_count = src->d_icount;
-	dst->d_ino_timer = src->d_itimer;
-	dst->d_spc_timer = src->d_btimer;
+	dst->d_ino_timer = copy_from_xfs_dqblk_ts(src, src->d_itimer,
+						  src->d_itimer_hi);
+	dst->d_spc_timer = copy_from_xfs_dqblk_ts(src, src->d_btimer,
+						  src->d_btimer_hi);
 	dst->d_ino_warns = src->d_iwarns;
 	dst->d_spc_warns = src->d_bwarns;
 	dst->d_rt_spc_hardlimit = quota_bbtob(src->d_rtb_hardlimit);
 	dst->d_rt_spc_softlimit = quota_bbtob(src->d_rtb_softlimit);
 	dst->d_rt_space = quota_bbtob(src->d_rtbcount);
-	dst->d_rt_spc_timer = src->d_rtbtimer;
+	dst->d_rt_spc_timer = copy_from_xfs_dqblk_ts(src, src->d_rtbtimer,
+						     src->d_rtbtimer_hi);
 	dst->d_rt_spc_warns = src->d_rtbwarns;
 	dst->d_fieldmask = 0;
 	if (src->d_fieldmask & FS_DQ_ISOFT)
@@ -588,10 +599,26 @@ static int quota_setxquota(struct super_block *sb, int type, qid_t id,
 	return sb->s_qcop->set_dqblk(sb, qid, &qdq);
 }
 
+static inline void copy_to_xfs_dqblk_ts(const struct fs_disk_quota *d,
+		__s32 *timer_lo, __s8 *timer_hi, s64 timer)
+{
+	*timer_lo = timer;
+	if (d->d_fieldmask & FS_DQ_BIGTIME)
+		*timer_hi = timer >> 32;
+}
+
+static inline bool want_bigtime(s64 timer)
+{
+	return timer > S32_MAX || timer < S32_MIN;
+}
+
 static void copy_to_xfs_dqblk(struct fs_disk_quota *dst, struct qc_dqblk *src,
 			      int type, qid_t id)
 {
 	memset(dst, 0, sizeof(*dst));
+	if (want_bigtime(src->d_ino_timer) || want_bigtime(src->d_spc_timer) ||
+	    want_bigtime(src->d_rt_spc_timer))
+		dst->d_fieldmask |= FS_DQ_BIGTIME;
 	dst->d_version = FS_DQUOT_VERSION;
 	dst->d_id = id;
 	if (type == USRQUOTA)
@@ -606,14 +633,17 @@ static void copy_to_xfs_dqblk(struct fs_disk_quota *dst, struct qc_dqblk *src,
 	dst->d_ino_softlimit = src->d_ino_softlimit;
 	dst->d_bcount = quota_btobb(src->d_space);
 	dst->d_icount = src->d_ino_count;
-	dst->d_itimer = src->d_ino_timer;
-	dst->d_btimer = src->d_spc_timer;
+	copy_to_xfs_dqblk_ts(dst, &dst->d_itimer, &dst->d_itimer_hi,
+			     src->d_ino_timer);
+	copy_to_xfs_dqblk_ts(dst, &dst->d_btimer, &dst->d_btimer_hi,
+			     src->d_spc_timer);
 	dst->d_iwarns = src->d_ino_warns;
 	dst->d_bwarns = src->d_spc_warns;
 	dst->d_rtb_hardlimit = quota_btobb(src->d_rt_spc_hardlimit);
 	dst->d_rtb_softlimit = quota_btobb(src->d_rt_spc_softlimit);
 	dst->d_rtbcount = quota_btobb(src->d_rt_space);
-	dst->d_rtbtimer = src->d_rt_spc_timer;
+	copy_to_xfs_dqblk_ts(dst, &dst->d_rtbtimer, &dst->d_rtbtimer_hi,
+			     src->d_rt_spc_timer);
 	dst->d_rtbwarns = src->d_rt_spc_warns;
 }
 
diff --git a/include/uapi/linux/dqblk_xfs.h b/include/uapi/linux/dqblk_xfs.h
index 03d890b80ebc..16d73f54376d 100644
--- a/include/uapi/linux/dqblk_xfs.h
+++ b/include/uapi/linux/dqblk_xfs.h
@@ -66,7 +66,10 @@ typedef struct fs_disk_quota {
 	__s32		d_btimer;	/* similar to above; for disk blocks */
 	__u16	  	d_iwarns;       /* # warnings issued wrt num inodes */
 	__u16	  	d_bwarns;       /* # warnings issued wrt disk blocks */
-	__s32		d_padding2;	/* padding2 - for future use */
+	__s8		d_itimer_hi;	/* upper 8 bits of timer values */
+	__s8		d_btimer_hi;
+	__s8		d_rtbtimer_hi;
+	__s8		d_padding2;	/* padding2 - for future use */
 	__u64		d_rtb_hardlimit;/* absolute limit on realtime blks */
 	__u64		d_rtb_softlimit;/* preferred limit on RT disk blks */
 	__u64		d_rtbcount;	/* # realtime blocks owned */
@@ -121,6 +124,12 @@ typedef struct fs_disk_quota {
 #define FS_DQ_RTBCOUNT		(1<<14)
 #define FS_DQ_ACCT_MASK		(FS_DQ_BCOUNT | FS_DQ_ICOUNT | FS_DQ_RTBCOUNT)
 
+/*
+ * Quota expiration timestamps are 40-bit signed integers, with the upper 8
+ * bits encoded in the _hi fields.
+ */
+#define FS_DQ_BIGTIME		(1<<15)
+
 /*
  * Various flags related to quotactl(2).
  */
-- 
cgit v1.2.3


From d1c10767837c4181f2e054865a58166fc117783b Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 9 Sep 2020 15:54:46 +0200
Subject: quota: Expand comment describing d_itimer

Expand comment describing d_itimer in struct fs_disk_quota.

Reported-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/uapi/linux/dqblk_xfs.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/dqblk_xfs.h b/include/uapi/linux/dqblk_xfs.h
index 16d73f54376d..c71d909addda 100644
--- a/include/uapi/linux/dqblk_xfs.h
+++ b/include/uapi/linux/dqblk_xfs.h
@@ -61,8 +61,9 @@ typedef struct fs_disk_quota {
 	__u64		d_ino_softlimit;/* preferred inode limit */
 	__u64		d_bcount;	/* # disk blocks owned by the user */
 	__u64		d_icount;	/* # inodes owned by the user */
-	__s32		d_itimer;	/* zero if within inode limits */
-					/* if not, we refuse service */
+	__s32		d_itimer;	/* Zero if within inode limits. If
+					 * not, we refuse service at this time
+					 * (in seconds since Unix epoch) */
 	__s32		d_btimer;	/* similar to above; for disk blocks */
 	__u16	  	d_iwarns;       /* # warnings issued wrt num inodes */
 	__u16	  	d_bwarns;       /* # warnings issued wrt disk blocks */
-- 
cgit v1.2.3


From 0dd4ff93f4c8dba016ad79384007da4938cd54a1 Mon Sep 17 00:00:00 2001
From: Sebastien Boeuf <sebastien.boeuf@intel.com>
Date: Wed, 19 Aug 2020 18:19:42 -0400
Subject: virtio: Implement get_shm_region for PCI transport

On PCI the shm regions are found using capability entries;
find a region by searching for the capability.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: kbuild test robot <lkp@intel.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Cc: kvm@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 drivers/virtio/virtio_pci_modern.c | 95 ++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/virtio_pci.h    | 11 ++++-
 2 files changed, 105 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index 3e14e700b231..3d6ae5a5e252 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -444,6 +444,99 @@ static void del_vq(struct virtio_pci_vq_info *info)
 	vring_del_virtqueue(vq);
 }
 
+static int virtio_pci_find_shm_cap(struct pci_dev *dev, u8 required_id,
+				   u8 *bar, u64 *offset, u64 *len)
+{
+	int pos;
+
+	for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR); pos > 0;
+	     pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) {
+		u8 type, cap_len, id;
+		u32 tmp32;
+		u64 res_offset, res_length;
+
+		pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
+							 cfg_type), &type);
+		if (type != VIRTIO_PCI_CAP_SHARED_MEMORY_CFG)
+			continue;
+
+		pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
+							 cap_len), &cap_len);
+		if (cap_len != sizeof(struct virtio_pci_cap64)) {
+			dev_err(&dev->dev, "%s: shm cap with bad size offset:"
+				" %d size: %d\n", __func__, pos, cap_len);
+			continue;
+		}
+
+		pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
+							 id), &id);
+		if (id != required_id)
+			continue;
+
+		/* Type, and ID match, looks good */
+		pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
+							 bar), bar);
+
+		/* Read the lower 32bit of length and offset */
+		pci_read_config_dword(dev, pos + offsetof(struct virtio_pci_cap,
+							  offset), &tmp32);
+		res_offset = tmp32;
+		pci_read_config_dword(dev, pos + offsetof(struct virtio_pci_cap,
+							  length), &tmp32);
+		res_length = tmp32;
+
+		/* and now the top half */
+		pci_read_config_dword(dev,
+				      pos + offsetof(struct virtio_pci_cap64,
+						     offset_hi), &tmp32);
+		res_offset |= ((u64)tmp32) << 32;
+		pci_read_config_dword(dev,
+				      pos + offsetof(struct virtio_pci_cap64,
+						     length_hi), &tmp32);
+		res_length |= ((u64)tmp32) << 32;
+
+		*offset = res_offset;
+		*len = res_length;
+
+		return pos;
+	}
+	return 0;
+}
+
+static bool vp_get_shm_region(struct virtio_device *vdev,
+			      struct virtio_shm_region *region, u8 id)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	struct pci_dev *pci_dev = vp_dev->pci_dev;
+	u8 bar;
+	u64 offset, len;
+	phys_addr_t phys_addr;
+	size_t bar_len;
+
+	if (!virtio_pci_find_shm_cap(pci_dev, id, &bar, &offset, &len))
+		return false;
+
+	phys_addr = pci_resource_start(pci_dev, bar);
+	bar_len = pci_resource_len(pci_dev, bar);
+
+	if ((offset + len) < offset) {
+		dev_err(&pci_dev->dev, "%s: cap offset+len overflow detected\n",
+			__func__);
+		return false;
+	}
+
+	if (offset + len > bar_len) {
+		dev_err(&pci_dev->dev, "%s: bar shorter than cap offset+len\n",
+			__func__);
+		return false;
+	}
+
+	region->len = len;
+	region->addr = (u64) phys_addr + offset;
+
+	return true;
+}
+
 static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
 	.get		= NULL,
 	.set		= NULL,
@@ -458,6 +551,7 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
 	.bus_name	= vp_bus_name,
 	.set_vq_affinity = vp_set_vq_affinity,
 	.get_vq_affinity = vp_get_vq_affinity,
+	.get_shm_region  = vp_get_shm_region,
 };
 
 static const struct virtio_config_ops virtio_pci_config_ops = {
@@ -474,6 +568,7 @@ static const struct virtio_config_ops virtio_pci_config_ops = {
 	.bus_name	= vp_bus_name,
 	.set_vq_affinity = vp_set_vq_affinity,
 	.get_vq_affinity = vp_get_vq_affinity,
+	.get_shm_region  = vp_get_shm_region,
 };
 
 /**
diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h
index 90007a1abcab..3a86f36d7e3d 100644
--- a/include/uapi/linux/virtio_pci.h
+++ b/include/uapi/linux/virtio_pci.h
@@ -113,6 +113,8 @@
 #define VIRTIO_PCI_CAP_DEVICE_CFG	4
 /* PCI configuration access */
 #define VIRTIO_PCI_CAP_PCI_CFG		5
+/* Additional shared memory capability */
+#define VIRTIO_PCI_CAP_SHARED_MEMORY_CFG 8
 
 /* This is the PCI capability header: */
 struct virtio_pci_cap {
@@ -121,11 +123,18 @@ struct virtio_pci_cap {
 	__u8 cap_len;		/* Generic PCI field: capability length */
 	__u8 cfg_type;		/* Identifies the structure. */
 	__u8 bar;		/* Where to find it. */
-	__u8 padding[3];	/* Pad to full dword. */
+	__u8 id;		/* Multiple capabilities of the same type */
+	__u8 padding[2];	/* Pad to full dword. */
 	__le32 offset;		/* Offset within bar. */
 	__le32 length;		/* Length of the structure, in bytes. */
 };
 
+struct virtio_pci_cap64 {
+	struct virtio_pci_cap cap;
+	__le32 offset_hi;             /* Most sig 32 bits of offset */
+	__le32 length_hi;             /* Most sig 32 bits of length */
+};
+
 struct virtio_pci_notify_cap {
 	struct virtio_pci_cap cap;
 	__le32 notify_off_multiplier;	/* Multiplier for queue_notify_off. */
-- 
cgit v1.2.3


From 38e895487afc2ed42c11045853cbb3fa20b52b6e Mon Sep 17 00:00:00 2001
From: Sebastien Boeuf <sebastien.boeuf@intel.com>
Date: Wed, 19 Aug 2020 18:19:43 -0400
Subject: virtio: Implement get_shm_region for MMIO transport

On MMIO a new set of registers is defined for finding SHM
regions.  Add their definitions and use them to find the region.

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
Cc: kvm@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 drivers/virtio/virtio_mmio.c     | 31 +++++++++++++++++++++++++++++++
 include/uapi/linux/virtio_mmio.h | 11 +++++++++++
 2 files changed, 42 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 627ac0487494..238383ff1064 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -498,6 +498,36 @@ static const char *vm_bus_name(struct virtio_device *vdev)
 	return vm_dev->pdev->name;
 }
 
+static bool vm_get_shm_region(struct virtio_device *vdev,
+			      struct virtio_shm_region *region, u8 id)
+{
+	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
+	u64 len, addr;
+
+	/* Select the region we're interested in */
+	writel(id, vm_dev->base + VIRTIO_MMIO_SHM_SEL);
+
+	/* Read the region size */
+	len = (u64) readl(vm_dev->base + VIRTIO_MMIO_SHM_LEN_LOW);
+	len |= (u64) readl(vm_dev->base + VIRTIO_MMIO_SHM_LEN_HIGH) << 32;
+
+	region->len = len;
+
+	/* Check if region length is -1. If that's the case, the shared memory
+	 * region does not exist and there is no need to proceed further.
+	 */
+	if (len == ~(u64)0)
+		return false;
+
+	/* Read the region base address */
+	addr = (u64) readl(vm_dev->base + VIRTIO_MMIO_SHM_BASE_LOW);
+	addr |= (u64) readl(vm_dev->base + VIRTIO_MMIO_SHM_BASE_HIGH) << 32;
+
+	region->addr = addr;
+
+	return true;
+}
+
 static const struct virtio_config_ops virtio_mmio_config_ops = {
 	.get		= vm_get,
 	.set		= vm_set,
@@ -510,6 +540,7 @@ static const struct virtio_config_ops virtio_mmio_config_ops = {
 	.get_features	= vm_get_features,
 	.finalize_features = vm_finalize_features,
 	.bus_name	= vm_bus_name,
+	.get_shm_region = vm_get_shm_region,
 };
 
 
diff --git a/include/uapi/linux/virtio_mmio.h b/include/uapi/linux/virtio_mmio.h
index c4b09689ab64..0650f91bea6c 100644
--- a/include/uapi/linux/virtio_mmio.h
+++ b/include/uapi/linux/virtio_mmio.h
@@ -122,6 +122,17 @@
 #define VIRTIO_MMIO_QUEUE_USED_LOW	0x0a0
 #define VIRTIO_MMIO_QUEUE_USED_HIGH	0x0a4
 
+/* Shared memory region id */
+#define VIRTIO_MMIO_SHM_SEL             0x0ac
+
+/* Shared memory region length, 64 bits in two halves */
+#define VIRTIO_MMIO_SHM_LEN_LOW         0x0b0
+#define VIRTIO_MMIO_SHM_LEN_HIGH        0x0b4
+
+/* Shared memory region base address, 64 bits in two halves */
+#define VIRTIO_MMIO_SHM_BASE_LOW        0x0b8
+#define VIRTIO_MMIO_SHM_BASE_HIGH       0x0bc
+
 /* Configuration atomicity value */
 #define VIRTIO_MMIO_CONFIG_GENERATION	0x0fc
 
-- 
cgit v1.2.3


From 22f3787e9d95e72d1f09795f294fb010e2998f43 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Wed, 19 Aug 2020 18:19:46 -0400
Subject: virtiofs: set up virtio_fs dax_device

Setup a dax device.

Use the shm capability to find the cache entry and map it.

The DAX window is accessed by the fs/dax.c infrastructure and must have
struct pages (at least on x86).  Use devm_memremap_pages() to map the
DAX window PCI BAR and allocate struct page.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/virtio_fs.c            | 138 +++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/virtio_fs.h |   3 +
 2 files changed, 141 insertions(+)

(limited to 'include/uapi')

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 47ecdc15f25d..f31a59f74475 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -5,12 +5,16 @@
  */
 
 #include <linux/fs.h>
+#include <linux/dax.h>
+#include <linux/pci.h>
+#include <linux/pfn_t.h>
 #include <linux/module.h>
 #include <linux/virtio.h>
 #include <linux/virtio_fs.h>
 #include <linux/delay.h>
 #include <linux/fs_context.h>
 #include <linux/highmem.h>
+#include <linux/uio.h>
 #include "fuse_i.h"
 
 /* List of virtio-fs device instances and a lock for the list. Also provides
@@ -49,6 +53,12 @@ struct virtio_fs {
 	struct virtio_fs_vq *vqs;
 	unsigned int nvqs;               /* number of virtqueues */
 	unsigned int num_request_queues; /* number of request queues */
+	struct dax_device *dax_dev;
+
+	/* DAX memory window where file contents are mapped */
+	void *window_kaddr;
+	phys_addr_t window_phys_addr;
+	size_t window_len;
 };
 
 struct virtio_fs_forget_req {
@@ -686,6 +696,130 @@ static void virtio_fs_cleanup_vqs(struct virtio_device *vdev,
 	vdev->config->del_vqs(vdev);
 }
 
+/* Map a window offset to a page frame number.  The window offset will have
+ * been produced by .iomap_begin(), which maps a file offset to a window
+ * offset.
+ */
+static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
+				    long nr_pages, void **kaddr, pfn_t *pfn)
+{
+	struct virtio_fs *fs = dax_get_private(dax_dev);
+	phys_addr_t offset = PFN_PHYS(pgoff);
+	size_t max_nr_pages = fs->window_len/PAGE_SIZE - pgoff;
+
+	if (kaddr)
+		*kaddr = fs->window_kaddr + offset;
+	if (pfn)
+		*pfn = phys_to_pfn_t(fs->window_phys_addr + offset,
+					PFN_DEV | PFN_MAP);
+	return nr_pages > max_nr_pages ? max_nr_pages : nr_pages;
+}
+
+static size_t virtio_fs_copy_from_iter(struct dax_device *dax_dev,
+				       pgoff_t pgoff, void *addr,
+				       size_t bytes, struct iov_iter *i)
+{
+	return copy_from_iter(addr, bytes, i);
+}
+
+static size_t virtio_fs_copy_to_iter(struct dax_device *dax_dev,
+				       pgoff_t pgoff, void *addr,
+				       size_t bytes, struct iov_iter *i)
+{
+	return copy_to_iter(addr, bytes, i);
+}
+
+static int virtio_fs_zero_page_range(struct dax_device *dax_dev,
+				     pgoff_t pgoff, size_t nr_pages)
+{
+	long rc;
+	void *kaddr;
+
+	rc = dax_direct_access(dax_dev, pgoff, nr_pages, &kaddr, NULL);
+	if (rc < 0)
+		return rc;
+	memset(kaddr, 0, nr_pages << PAGE_SHIFT);
+	dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT);
+	return 0;
+}
+
+static const struct dax_operations virtio_fs_dax_ops = {
+	.direct_access = virtio_fs_direct_access,
+	.copy_from_iter = virtio_fs_copy_from_iter,
+	.copy_to_iter = virtio_fs_copy_to_iter,
+	.zero_page_range = virtio_fs_zero_page_range,
+};
+
+static void virtio_fs_cleanup_dax(void *data)
+{
+	struct dax_device *dax_dev = data;
+
+	kill_dax(dax_dev);
+	put_dax(dax_dev);
+}
+
+static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
+{
+	struct virtio_shm_region cache_reg;
+	struct dev_pagemap *pgmap;
+	bool have_cache;
+
+	if (!IS_ENABLED(CONFIG_FUSE_DAX))
+		return 0;
+
+	/* Get cache region */
+	have_cache = virtio_get_shm_region(vdev, &cache_reg,
+					   (u8)VIRTIO_FS_SHMCAP_ID_CACHE);
+	if (!have_cache) {
+		dev_notice(&vdev->dev, "%s: No cache capability\n", __func__);
+		return 0;
+	}
+
+	if (!devm_request_mem_region(&vdev->dev, cache_reg.addr, cache_reg.len,
+				     dev_name(&vdev->dev))) {
+		dev_warn(&vdev->dev, "could not reserve region addr=0x%llx len=0x%llx\n",
+			 cache_reg.addr, cache_reg.len);
+		return -EBUSY;
+	}
+
+	dev_notice(&vdev->dev, "Cache len: 0x%llx @ 0x%llx\n", cache_reg.len,
+		   cache_reg.addr);
+
+	pgmap = devm_kzalloc(&vdev->dev, sizeof(*pgmap), GFP_KERNEL);
+	if (!pgmap)
+		return -ENOMEM;
+
+	pgmap->type = MEMORY_DEVICE_FS_DAX;
+
+	/* Ideally we would directly use the PCI BAR resource but
+	 * devm_memremap_pages() wants its own copy in pgmap.  So
+	 * initialize a struct resource from scratch (only the start
+	 * and end fields will be used).
+	 */
+	pgmap->res = (struct resource){
+		.name = "virtio-fs dax window",
+		.start = (phys_addr_t) cache_reg.addr,
+		.end = (phys_addr_t) cache_reg.addr + cache_reg.len - 1,
+	};
+
+	fs->window_kaddr = devm_memremap_pages(&vdev->dev, pgmap);
+	if (IS_ERR(fs->window_kaddr))
+		return PTR_ERR(fs->window_kaddr);
+
+	fs->window_phys_addr = (phys_addr_t) cache_reg.addr;
+	fs->window_len = (phys_addr_t) cache_reg.len;
+
+	dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n",
+		__func__, fs->window_kaddr, cache_reg.addr, cache_reg.len);
+
+	fs->dax_dev = alloc_dax(fs, NULL, &virtio_fs_dax_ops, 0);
+	if (IS_ERR(fs->dax_dev))
+		return PTR_ERR(fs->dax_dev);
+
+	return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax,
+					fs->dax_dev);
+}
+
 static int virtio_fs_probe(struct virtio_device *vdev)
 {
 	struct virtio_fs *fs;
@@ -707,6 +841,10 @@ static int virtio_fs_probe(struct virtio_device *vdev)
 
 	/* TODO vq affinity */
 
+	ret = virtio_fs_setup_dax(vdev, fs);
+	if (ret < 0)
+		goto out_vqs;
+
 	/* Bring the device online in case the filesystem is mounted and
 	 * requests need to be sent before we return.
 	 */
diff --git a/include/uapi/linux/virtio_fs.h b/include/uapi/linux/virtio_fs.h
index 3056b6e9f8ce..bea38291421b 100644
--- a/include/uapi/linux/virtio_fs.h
+++ b/include/uapi/linux/virtio_fs.h
@@ -16,4 +16,7 @@ struct virtio_fs_config {
 	__le32 num_request_queues;
 } __attribute__((packed));
 
+/* For the id field in virtio_pci_shm_cap */
+#define VIRTIO_FS_SHMCAP_ID_CACHE 0
+
 #endif /* _UAPI_LINUX_VIRTIO_FS_H */
-- 
cgit v1.2.3


From fd1a1dc6f5aa7361e3562790336e116935f8fcfa Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Wed, 19 Aug 2020 18:19:49 -0400
Subject: virtiofs: implement FUSE_INIT map_alignment field

The device communicates FUSE_SETUPMAPPING/FUSE_REMOVMAPPING alignment
constraints via the FUST_INIT map_alignment field.  Parse this field and
ensure our DAX mappings meet the alignment constraints.

We don't actually align anything differently since our mappings are
already 2MB aligned.  Just check the value when the connection is
established.  If it becomes necessary to honor arbitrary alignments in
the future we'll have to adjust how mappings are sized.

The upshot of this commit is that we can be confident that mappings will
work even when emulating x86 on Power and similar combinations where the
host page sizes are different.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/dax.c             | 15 ++++++++++++++-
 fs/fuse/fuse_i.h          |  1 +
 fs/fuse/inode.c           | 17 ++++++++++++++++-
 include/uapi/linux/fuse.h |  4 +++-
 4 files changed, 34 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
index 031106020f75..fec8a2bd75b3 100644
--- a/fs/fuse/dax.c
+++ b/fs/fuse/dax.c
@@ -9,7 +9,10 @@
 #include <linux/dax.h>
 #include <linux/pfn_t.h>
 
-/* Default memory range size, 2MB */
+/*
+ * Default memory range size.  A power of 2 so it agrees with common FUSE_INIT
+ * map_alignment values 4KB and 64KB.
+ */
 #define FUSE_DAX_SHIFT	21
 #define FUSE_DAX_SZ	(1 << FUSE_DAX_SHIFT)
 #define FUSE_DAX_PAGES	(FUSE_DAX_SZ / PAGE_SIZE)
@@ -123,3 +126,13 @@ int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev)
 	fc->dax = fcd;
 	return 0;
 }
+
+bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment)
+{
+	if (fc->dax && (map_alignment > FUSE_DAX_SHIFT)) {
+		pr_warn("FUSE: map_alignment %u incompatible with dax mem range size %u\n",
+			map_alignment, FUSE_DAX_SZ);
+		return false;
+	}
+	return true;
+}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 97af7952373a..2f3f04aa64c7 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -1106,5 +1106,6 @@ void fuse_free_conn(struct fuse_conn *fc);
 
 int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev);
 void fuse_dax_conn_free(struct fuse_conn *fc);
+bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment);
 
 #endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1780dfe063ab..67e99cee5a4f 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -908,9 +908,10 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args,
 {
 	struct fuse_init_args *ia = container_of(args, typeof(*ia), args);
 	struct fuse_init_out *arg = &ia->out;
+	bool ok = true;
 
 	if (error || arg->major != FUSE_KERNEL_VERSION)
-		fc->conn_error = 1;
+		ok = false;
 	else {
 		unsigned long ra_pages;
 
@@ -973,6 +974,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args,
 					min_t(unsigned int, FUSE_MAX_MAX_PAGES,
 					max_t(unsigned int, arg->max_pages, 1));
 			}
+			if (IS_ENABLED(CONFIG_FUSE_DAX) &&
+			    arg->flags & FUSE_MAP_ALIGNMENT &&
+			    !fuse_dax_check_alignment(fc, arg->map_alignment)) {
+				ok = false;
+			}
 		} else {
 			ra_pages = fc->max_read / PAGE_SIZE;
 			fc->no_lock = 1;
@@ -988,6 +994,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args,
 	}
 	kfree(ia);
 
+	if (!ok) {
+		fc->conn_init = 0;
+		fc->conn_error = 1;
+	}
+
 	fuse_set_initialized(fc);
 	wake_up_all(&fc->blocked_waitq);
 }
@@ -1011,6 +1022,10 @@ void fuse_send_init(struct fuse_conn *fc)
 		FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
 		FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
 		FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA;
+#ifdef CONFIG_FUSE_DAX
+	if (fc->dax)
+		ia->in.flags |= FUSE_MAP_ALIGNMENT;
+#endif
 	ia->args.opcode = FUSE_INIT;
 	ia->args.in_numargs = 1;
 	ia->args.in_args[0].size = sizeof(ia->in);
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 373cada89815..5b85819e045f 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -313,7 +313,9 @@ struct fuse_file_lock {
  * FUSE_CACHE_SYMLINKS: cache READLINK responses
  * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir
  * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request
- * FUSE_MAP_ALIGNMENT: map_alignment field is valid
+ * FUSE_MAP_ALIGNMENT: init_out.map_alignment contains log2(byte alignment) for
+ *		       foffset and moffset fields in struct
+ *		       fuse_setupmapping_out and fuse_removemapping_one.
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
-- 
cgit v1.2.3


From ceec02d4354a317cacce4b053a580ea3c7fc6cdc Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Wed, 19 Aug 2020 18:19:50 -0400
Subject: virtiofs: introduce setupmapping/removemapping commands

Introduce two new fuse commands to setup/remove memory mappings. This
will be used to setup/tear down file mapping in dax window.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Peng Tao <tao.peng@linux.alibaba.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/uapi/linux/fuse.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 5b85819e045f..60a7bfc787ce 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -894,4 +894,33 @@ struct fuse_copy_file_range_in {
 	uint64_t	flags;
 };
 
+#define FUSE_SETUPMAPPING_FLAG_WRITE (1ull << 0)
+struct fuse_setupmapping_in {
+	/* An already open handle */
+	uint64_t	fh;
+	/* Offset into the file to start the mapping */
+	uint64_t	foffset;
+	/* Length of mapping required */
+	uint64_t	len;
+	/* Flags, FUSE_SETUPMAPPING_FLAG_* */
+	uint64_t	flags;
+	/* Offset in Memory Window */
+	uint64_t	moffset;
+};
+
+struct fuse_removemapping_in {
+	/* number of fuse_removemapping_one follows */
+	uint32_t        count;
+};
+
+struct fuse_removemapping_one {
+	/* Offset into the dax window start the unmapping */
+	uint64_t        moffset;
+	/* Length of mapping required */
+	uint64_t	len;
+};
+
+#define FUSE_REMOVEMAPPING_MAX_ENTRY   \
+		(PAGE_SIZE / sizeof(struct fuse_removemapping_one))
+
 #endif /* _LINUX_FUSE_H */
-- 
cgit v1.2.3


From c2d0ad00d948de73c78f05d2b3e5bdfa605035cc Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Wed, 19 Aug 2020 18:19:51 -0400
Subject: virtiofs: implement dax read/write operations

This patch implements basic DAX support. mmap() is not implemented
yet and will come in later patches. This patch looks into implemeting
read/write.

We make use of interval tree to keep track of per inode dax mappings.

Do not use dax for file extending writes, instead just send WRITE message
to daemon (like we do for direct I/O path). This will keep write and
i_size change atomic w.r.t crash.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
Signed-off-by: Peng Tao <tao.peng@linux.alibaba.com>
Cc: Dave Chinner <david@fromorbit.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/Kconfig           |   1 +
 fs/fuse/dax.c             | 565 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/fuse/file.c            |  15 +-
 fs/fuse/fuse_i.h          |  15 ++
 fs/fuse/inode.c           |  21 +-
 include/uapi/linux/fuse.h |   1 +
 6 files changed, 612 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig
index fddd40630077..40ce9a1c12e5 100644
--- a/fs/fuse/Kconfig
+++ b/fs/fuse/Kconfig
@@ -42,6 +42,7 @@ config VIRTIO_FS
 config FUSE_DAX
 	bool "Virtio Filesystem Direct Host Memory Access support"
 	default y
+	select INTERVAL_TREE
 	depends on VIRTIO_FS
 	depends on FS_DAX
 	depends on DAX_DRIVER
diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
index fec8a2bd75b3..a8d311b2db8e 100644
--- a/fs/fuse/dax.c
+++ b/fs/fuse/dax.c
@@ -7,7 +7,10 @@
 #include "fuse_i.h"
 
 #include <linux/dax.h>
+#include <linux/uio.h>
 #include <linux/pfn_t.h>
+#include <linux/iomap.h>
+#include <linux/interval_tree.h>
 
 /*
  * Default memory range size.  A power of 2 so it agrees with common FUSE_INIT
@@ -22,22 +25,556 @@ struct fuse_dax_mapping {
 	/* Will connect in fcd->free_ranges to keep track of free memory */
 	struct list_head list;
 
+	/* For interval tree in file/inode */
+	struct interval_tree_node itn;
+
 	/** Position in DAX window */
 	u64 window_offset;
 
 	/** Length of mapping, in bytes */
 	loff_t length;
+
+	/* Is this mapping read-only or read-write */
+	bool writable;
+};
+
+/* Per-inode dax map */
+struct fuse_inode_dax {
+	/* Semaphore to protect modifications to the dmap tree */
+	struct rw_semaphore sem;
+
+	/* Sorted rb tree of struct fuse_dax_mapping elements */
+	struct rb_root_cached tree;
+	unsigned long nr;
 };
 
 struct fuse_conn_dax {
 	/* DAX device */
 	struct dax_device *dev;
 
+	/* Lock protecting accessess to  members of this structure */
+	spinlock_t lock;
+
 	/* DAX Window Free Ranges */
 	long nr_free_ranges;
 	struct list_head free_ranges;
 };
 
+static inline struct fuse_dax_mapping *
+node_to_dmap(struct interval_tree_node *node)
+{
+	if (!node)
+		return NULL;
+
+	return container_of(node, struct fuse_dax_mapping, itn);
+}
+
+static struct fuse_dax_mapping *alloc_dax_mapping(struct fuse_conn_dax *fcd)
+{
+	struct fuse_dax_mapping *dmap;
+
+	spin_lock(&fcd->lock);
+	dmap = list_first_entry_or_null(&fcd->free_ranges,
+					struct fuse_dax_mapping, list);
+	if (dmap) {
+		list_del_init(&dmap->list);
+		WARN_ON(fcd->nr_free_ranges <= 0);
+		fcd->nr_free_ranges--;
+	}
+	spin_unlock(&fcd->lock);
+	return dmap;
+}
+
+/* This assumes fcd->lock is held */
+static void __dmap_add_to_free_pool(struct fuse_conn_dax *fcd,
+				struct fuse_dax_mapping *dmap)
+{
+	list_add_tail(&dmap->list, &fcd->free_ranges);
+	fcd->nr_free_ranges++;
+}
+
+static void dmap_add_to_free_pool(struct fuse_conn_dax *fcd,
+				struct fuse_dax_mapping *dmap)
+{
+	/* Return fuse_dax_mapping to free list */
+	spin_lock(&fcd->lock);
+	__dmap_add_to_free_pool(fcd, dmap);
+	spin_unlock(&fcd->lock);
+}
+
+static int fuse_setup_one_mapping(struct inode *inode, unsigned long start_idx,
+				  struct fuse_dax_mapping *dmap, bool writable,
+				  bool upgrade)
+{
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_conn_dax *fcd = fc->dax;
+	struct fuse_inode *fi = get_fuse_inode(inode);
+	struct fuse_setupmapping_in inarg;
+	loff_t offset = start_idx << FUSE_DAX_SHIFT;
+	FUSE_ARGS(args);
+	ssize_t err;
+
+	WARN_ON(fcd->nr_free_ranges < 0);
+
+	/* Ask fuse daemon to setup mapping */
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.foffset = offset;
+	inarg.fh = -1;
+	inarg.moffset = dmap->window_offset;
+	inarg.len = FUSE_DAX_SZ;
+	inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ;
+	if (writable)
+		inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
+	args.opcode = FUSE_SETUPMAPPING;
+	args.nodeid = fi->nodeid;
+	args.in_numargs = 1;
+	args.in_args[0].size = sizeof(inarg);
+	args.in_args[0].value = &inarg;
+	err = fuse_simple_request(fc, &args);
+	if (err < 0)
+		return err;
+	dmap->writable = writable;
+	if (!upgrade) {
+		dmap->itn.start = dmap->itn.last = start_idx;
+		/* Protected by fi->dax->sem */
+		interval_tree_insert(&dmap->itn, &fi->dax->tree);
+		fi->dax->nr++;
+	}
+	return 0;
+}
+
+static int fuse_send_removemapping(struct inode *inode,
+				   struct fuse_removemapping_in *inargp,
+				   struct fuse_removemapping_one *remove_one)
+{
+	struct fuse_inode *fi = get_fuse_inode(inode);
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	FUSE_ARGS(args);
+
+	args.opcode = FUSE_REMOVEMAPPING;
+	args.nodeid = fi->nodeid;
+	args.in_numargs = 2;
+	args.in_args[0].size = sizeof(*inargp);
+	args.in_args[0].value = inargp;
+	args.in_args[1].size = inargp->count * sizeof(*remove_one);
+	args.in_args[1].value = remove_one;
+	return fuse_simple_request(fc, &args);
+}
+
+static int dmap_removemapping_list(struct inode *inode, unsigned int num,
+				   struct list_head *to_remove)
+{
+	struct fuse_removemapping_one *remove_one, *ptr;
+	struct fuse_removemapping_in inarg;
+	struct fuse_dax_mapping *dmap;
+	int ret, i = 0, nr_alloc;
+
+	nr_alloc = min_t(unsigned int, num, FUSE_REMOVEMAPPING_MAX_ENTRY);
+	remove_one = kmalloc_array(nr_alloc, sizeof(*remove_one), GFP_NOFS);
+	if (!remove_one)
+		return -ENOMEM;
+
+	ptr = remove_one;
+	list_for_each_entry(dmap, to_remove, list) {
+		ptr->moffset = dmap->window_offset;
+		ptr->len = dmap->length;
+		ptr++;
+		i++;
+		num--;
+		if (i >= nr_alloc || num == 0) {
+			memset(&inarg, 0, sizeof(inarg));
+			inarg.count = i;
+			ret = fuse_send_removemapping(inode, &inarg,
+						      remove_one);
+			if (ret)
+				goto out;
+			ptr = remove_one;
+			i = 0;
+		}
+	}
+out:
+	kfree(remove_one);
+	return ret;
+}
+
+/*
+ * Cleanup dmap entry and add back to free list. This should be called with
+ * fcd->lock held.
+ */
+static void dmap_reinit_add_to_free_pool(struct fuse_conn_dax *fcd,
+					    struct fuse_dax_mapping *dmap)
+{
+	pr_debug("fuse: freeing memory range start_idx=0x%lx end_idx=0x%lx window_offset=0x%llx length=0x%llx\n",
+		 dmap->itn.start, dmap->itn.last, dmap->window_offset,
+		 dmap->length);
+	dmap->itn.start = dmap->itn.last = 0;
+	__dmap_add_to_free_pool(fcd, dmap);
+}
+
+/*
+ * Free inode dmap entries whose range falls inside [start, end].
+ * Does not take any locks. At this point of time it should only be
+ * called from evict_inode() path where we know all dmap entries can be
+ * reclaimed.
+ */
+static void inode_reclaim_dmap_range(struct fuse_conn_dax *fcd,
+				     struct inode *inode,
+				     loff_t start, loff_t end)
+{
+	struct fuse_inode *fi = get_fuse_inode(inode);
+	struct fuse_dax_mapping *dmap, *n;
+	int err, num = 0;
+	LIST_HEAD(to_remove);
+	unsigned long start_idx = start >> FUSE_DAX_SHIFT;
+	unsigned long end_idx = end >> FUSE_DAX_SHIFT;
+	struct interval_tree_node *node;
+
+	while (1) {
+		node = interval_tree_iter_first(&fi->dax->tree, start_idx,
+						end_idx);
+		if (!node)
+			break;
+		dmap = node_to_dmap(node);
+		interval_tree_remove(&dmap->itn, &fi->dax->tree);
+		num++;
+		list_add(&dmap->list, &to_remove);
+	}
+
+	/* Nothing to remove */
+	if (list_empty(&to_remove))
+		return;
+
+	WARN_ON(fi->dax->nr < num);
+	fi->dax->nr -= num;
+	err = dmap_removemapping_list(inode, num, &to_remove);
+	if (err && err != -ENOTCONN) {
+		pr_warn("Failed to removemappings. start=0x%llx end=0x%llx\n",
+			start, end);
+	}
+	spin_lock(&fcd->lock);
+	list_for_each_entry_safe(dmap, n, &to_remove, list) {
+		list_del_init(&dmap->list);
+		dmap_reinit_add_to_free_pool(fcd, dmap);
+	}
+	spin_unlock(&fcd->lock);
+}
+
+/*
+ * It is called from evict_inode() and by that time inode is going away. So
+ * this function does not take any locks like fi->dax->sem for traversing
+ * that fuse inode interval tree. If that lock is taken then lock validator
+ * complains of deadlock situation w.r.t fs_reclaim lock.
+ */
+void fuse_dax_inode_cleanup(struct inode *inode)
+{
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_inode *fi = get_fuse_inode(inode);
+
+	/*
+	 * fuse_evict_inode() has already called truncate_inode_pages_final()
+	 * before we arrive here. So we should not have to worry about any
+	 * pages/exception entries still associated with inode.
+	 */
+	inode_reclaim_dmap_range(fc->dax, inode, 0, -1);
+	WARN_ON(fi->dax->nr);
+}
+
+static void fuse_fill_iomap_hole(struct iomap *iomap, loff_t length)
+{
+	iomap->addr = IOMAP_NULL_ADDR;
+	iomap->length = length;
+	iomap->type = IOMAP_HOLE;
+}
+
+static void fuse_fill_iomap(struct inode *inode, loff_t pos, loff_t length,
+			    struct iomap *iomap, struct fuse_dax_mapping *dmap,
+			    unsigned int flags)
+{
+	loff_t offset, len;
+	loff_t i_size = i_size_read(inode);
+
+	offset = pos - (dmap->itn.start << FUSE_DAX_SHIFT);
+	len = min(length, dmap->length - offset);
+
+	/* If length is beyond end of file, truncate further */
+	if (pos + len > i_size)
+		len = i_size - pos;
+
+	if (len > 0) {
+		iomap->addr = dmap->window_offset + offset;
+		iomap->length = len;
+		if (flags & IOMAP_FAULT)
+			iomap->length = ALIGN(len, PAGE_SIZE);
+		iomap->type = IOMAP_MAPPED;
+	} else {
+		/* Mapping beyond end of file is hole */
+		fuse_fill_iomap_hole(iomap, length);
+	}
+}
+
+static int fuse_setup_new_dax_mapping(struct inode *inode, loff_t pos,
+				      loff_t length, unsigned int flags,
+				      struct iomap *iomap)
+{
+	struct fuse_inode *fi = get_fuse_inode(inode);
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_conn_dax *fcd = fc->dax;
+	struct fuse_dax_mapping *dmap, *alloc_dmap = NULL;
+	int ret;
+	bool writable = flags & IOMAP_WRITE;
+	unsigned long start_idx = pos >> FUSE_DAX_SHIFT;
+	struct interval_tree_node *node;
+
+	alloc_dmap = alloc_dax_mapping(fcd);
+	if (!alloc_dmap)
+		return -EIO;
+
+	/*
+	 * Take write lock so that only one caller can try to setup mapping
+	 * and other waits.
+	 */
+	down_write(&fi->dax->sem);
+	/*
+	 * We dropped lock. Check again if somebody else setup
+	 * mapping already.
+	 */
+	node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx);
+	if (node) {
+		dmap = node_to_dmap(node);
+		fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
+		dmap_add_to_free_pool(fcd, alloc_dmap);
+		up_write(&fi->dax->sem);
+		return 0;
+	}
+
+	/* Setup one mapping */
+	ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, alloc_dmap,
+				     writable, false);
+	if (ret < 0) {
+		dmap_add_to_free_pool(fcd, alloc_dmap);
+		up_write(&fi->dax->sem);
+		return ret;
+	}
+	fuse_fill_iomap(inode, pos, length, iomap, alloc_dmap, flags);
+	up_write(&fi->dax->sem);
+	return 0;
+}
+
+static int fuse_upgrade_dax_mapping(struct inode *inode, loff_t pos,
+				    loff_t length, unsigned int flags,
+				    struct iomap *iomap)
+{
+	struct fuse_inode *fi = get_fuse_inode(inode);
+	struct fuse_dax_mapping *dmap;
+	int ret;
+	unsigned long idx = pos >> FUSE_DAX_SHIFT;
+	struct interval_tree_node *node;
+
+	/*
+	 * Take exclusive lock so that only one caller can try to setup
+	 * mapping and others wait.
+	 */
+	down_write(&fi->dax->sem);
+	node = interval_tree_iter_first(&fi->dax->tree, idx, idx);
+
+	/* We are holding either inode lock or i_mmap_sem, and that should
+	 * ensure that dmap can't reclaimed or truncated and it should still
+	 * be there in tree despite the fact we dropped and re-acquired the
+	 * lock.
+	 */
+	ret = -EIO;
+	if (WARN_ON(!node))
+		goto out_err;
+
+	dmap = node_to_dmap(node);
+
+	/* Maybe another thread already upgraded mapping while we were not
+	 * holding lock.
+	 */
+	if (dmap->writable) {
+		ret = 0;
+		goto out_fill_iomap;
+	}
+
+	ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, dmap, true,
+				     true);
+	if (ret < 0)
+		goto out_err;
+out_fill_iomap:
+	fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
+out_err:
+	up_write(&fi->dax->sem);
+	return ret;
+}
+
+/* This is just for DAX and the mapping is ephemeral, do not use it for other
+ * purposes since there is no block device with a permanent mapping.
+ */
+static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
+			    unsigned int flags, struct iomap *iomap,
+			    struct iomap *srcmap)
+{
+	struct fuse_inode *fi = get_fuse_inode(inode);
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_dax_mapping *dmap;
+	bool writable = flags & IOMAP_WRITE;
+	unsigned long start_idx = pos >> FUSE_DAX_SHIFT;
+	struct interval_tree_node *node;
+
+	/* We don't support FIEMAP */
+	if (WARN_ON(flags & IOMAP_REPORT))
+		return -EIO;
+
+	iomap->offset = pos;
+	iomap->flags = 0;
+	iomap->bdev = NULL;
+	iomap->dax_dev = fc->dax->dev;
+
+	/*
+	 * Both read/write and mmap path can race here. So we need something
+	 * to make sure if we are setting up mapping, then other path waits
+	 *
+	 * For now, use a semaphore for this. It probably needs to be
+	 * optimized later.
+	 */
+	down_read(&fi->dax->sem);
+	node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx);
+	if (node) {
+		dmap = node_to_dmap(node);
+		if (writable && !dmap->writable) {
+			/* Upgrade read-only mapping to read-write. This will
+			 * require exclusive fi->dax->sem lock as we don't want
+			 * two threads to be trying to this simultaneously
+			 * for same dmap. So drop shared lock and acquire
+			 * exclusive lock.
+			 */
+			up_read(&fi->dax->sem);
+			pr_debug("%s: Upgrading mapping at offset 0x%llx length 0x%llx\n",
+				 __func__, pos, length);
+			return fuse_upgrade_dax_mapping(inode, pos, length,
+							flags, iomap);
+		} else {
+			fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
+			up_read(&fi->dax->sem);
+			return 0;
+		}
+	} else {
+		up_read(&fi->dax->sem);
+		pr_debug("%s: no mapping at offset 0x%llx length 0x%llx\n",
+				__func__, pos, length);
+		if (pos >= i_size_read(inode))
+			goto iomap_hole;
+
+		return fuse_setup_new_dax_mapping(inode, pos, length, flags,
+						  iomap);
+	}
+
+	/*
+	 * If read beyond end of file happnes, fs code seems to return
+	 * it as hole
+	 */
+iomap_hole:
+	fuse_fill_iomap_hole(iomap, length);
+	pr_debug("%s returning hole mapping. pos=0x%llx length_asked=0x%llx length_returned=0x%llx\n",
+		 __func__, pos, length, iomap->length);
+	return 0;
+}
+
+static int fuse_iomap_end(struct inode *inode, loff_t pos, loff_t length,
+			  ssize_t written, unsigned int flags,
+			  struct iomap *iomap)
+{
+	/* DAX writes beyond end-of-file aren't handled using iomap, so the
+	 * file size is unchanged and there is nothing to do here.
+	 */
+	return 0;
+}
+
+static const struct iomap_ops fuse_iomap_ops = {
+	.iomap_begin = fuse_iomap_begin,
+	.iomap_end = fuse_iomap_end,
+};
+
+ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+	struct inode *inode = file_inode(iocb->ki_filp);
+	ssize_t ret;
+
+	if (iocb->ki_flags & IOCB_NOWAIT) {
+		if (!inode_trylock_shared(inode))
+			return -EAGAIN;
+	} else {
+		inode_lock_shared(inode);
+	}
+
+	ret = dax_iomap_rw(iocb, to, &fuse_iomap_ops);
+	inode_unlock_shared(inode);
+
+	/* TODO file_accessed(iocb->f_filp) */
+	return ret;
+}
+
+static bool file_extending_write(struct kiocb *iocb, struct iov_iter *from)
+{
+	struct inode *inode = file_inode(iocb->ki_filp);
+
+	return (iov_iter_rw(from) == WRITE &&
+		((iocb->ki_pos) >= i_size_read(inode) ||
+		  (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode))));
+}
+
+static ssize_t fuse_dax_direct_write(struct kiocb *iocb, struct iov_iter *from)
+{
+	struct inode *inode = file_inode(iocb->ki_filp);
+	struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
+	ssize_t ret;
+
+	ret = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE);
+	if (ret < 0)
+		return ret;
+
+	fuse_invalidate_attr(inode);
+	fuse_write_update_size(inode, iocb->ki_pos);
+	return ret;
+}
+
+ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+	struct inode *inode = file_inode(iocb->ki_filp);
+	ssize_t ret;
+
+	if (iocb->ki_flags & IOCB_NOWAIT) {
+		if (!inode_trylock(inode))
+			return -EAGAIN;
+	} else {
+		inode_lock(inode);
+	}
+
+	ret = generic_write_checks(iocb, from);
+	if (ret <= 0)
+		goto out;
+
+	ret = file_remove_privs(iocb->ki_filp);
+	if (ret)
+		goto out;
+	/* TODO file_update_time() but we don't want metadata I/O */
+
+	/* Do not use dax for file extending writes as write and on
+	 * disk i_size increase are not atomic otherwise.
+	 */
+	if (file_extending_write(iocb, from))
+		ret = fuse_dax_direct_write(iocb, from);
+	else
+		ret = dax_iomap_rw(iocb, from, &fuse_iomap_ops);
+
+out:
+	inode_unlock(inode);
+
+	if (ret > 0)
+		ret = generic_write_sync(iocb, ret);
+	return ret;
+}
+
 static void fuse_free_dax_mem_ranges(struct list_head *mem_list)
 {
 	struct fuse_dax_mapping *range, *temp;
@@ -116,6 +653,7 @@ int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev)
 	if (!fcd)
 		return -ENOMEM;
 
+	spin_lock_init(&fcd->lock);
 	fcd->dev = dax_dev;
 	err = fuse_dax_mem_range_init(fcd);
 	if (err) {
@@ -127,6 +665,33 @@ int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev)
 	return 0;
 }
 
+bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi)
+{
+	struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+	fi->dax = NULL;
+	if (fc->dax) {
+		fi->dax = kzalloc(sizeof(*fi->dax), GFP_KERNEL_ACCOUNT);
+		if (!fi->dax)
+			return false;
+
+		init_rwsem(&fi->dax->sem);
+		fi->dax->tree = RB_ROOT_CACHED;
+	}
+
+	return true;
+}
+
+void fuse_dax_inode_init(struct inode *inode)
+{
+	struct fuse_conn *fc = get_fuse_conn(inode);
+
+	if (!fc->dax)
+		return;
+
+	inode->i_flags |= S_DAX;
+}
+
 bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment)
 {
 	if (fc->dax && (map_alignment > FUSE_DAX_SHIFT)) {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 6611ef3269a8..6c586bc97b64 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1539,10 +1539,14 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
 	struct file *file = iocb->ki_filp;
 	struct fuse_file *ff = file->private_data;
+	struct inode *inode = file_inode(file);
 
-	if (is_bad_inode(file_inode(file)))
+	if (is_bad_inode(inode))
 		return -EIO;
 
+	if (FUSE_IS_DAX(inode))
+		return fuse_dax_read_iter(iocb, to);
+
 	if (!(ff->open_flags & FOPEN_DIRECT_IO))
 		return fuse_cache_read_iter(iocb, to);
 	else
@@ -1553,10 +1557,14 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct fuse_file *ff = file->private_data;
+	struct inode *inode = file_inode(file);
 
-	if (is_bad_inode(file_inode(file)))
+	if (is_bad_inode(inode))
 		return -EIO;
 
+	if (FUSE_IS_DAX(inode))
+		return fuse_dax_write_iter(iocb, from);
+
 	if (!(ff->open_flags & FOPEN_DIRECT_IO))
 		return fuse_cache_write_iter(iocb, from);
 	else
@@ -3440,4 +3448,7 @@ void fuse_init_file_inode(struct inode *inode)
 	fi->writectr = 0;
 	init_waitqueue_head(&fi->page_waitq);
 	fi->writepages = RB_ROOT;
+
+	if (IS_ENABLED(CONFIG_FUSE_DAX))
+		fuse_dax_inode_init(inode);
 }
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 2f3f04aa64c7..2d2bdd596194 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -148,6 +148,13 @@ struct fuse_inode {
 
 	/** Lock to protect write related fields */
 	spinlock_t lock;
+
+#ifdef CONFIG_FUSE_DAX
+	/*
+	 * Dax specific inode data
+	 */
+	struct fuse_inode_dax *dax;
+#endif
 };
 
 /** FUSE inode state bits */
@@ -1104,8 +1111,16 @@ void fuse_free_conn(struct fuse_conn *fc);
 
 /* dax.c */
 
+#define FUSE_IS_DAX(inode) (IS_ENABLED(CONFIG_FUSE_DAX) && IS_DAX(inode))
+
+ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to);
+ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from);
+int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma);
 int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev);
 void fuse_dax_conn_free(struct fuse_conn *fc);
+bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi);
+void fuse_dax_inode_init(struct inode *inode);
+void fuse_dax_inode_cleanup(struct inode *inode);
 bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment);
 
 #endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 67e99cee5a4f..cab4239bd78a 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -87,12 +87,19 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
 	mutex_init(&fi->mutex);
 	spin_lock_init(&fi->lock);
 	fi->forget = fuse_alloc_forget();
-	if (!fi->forget) {
-		kmem_cache_free(fuse_inode_cachep, fi);
-		return NULL;
-	}
+	if (!fi->forget)
+		goto out_free;
+
+	if (IS_ENABLED(CONFIG_FUSE_DAX) && !fuse_dax_inode_alloc(sb, fi))
+		goto out_free_forget;
 
 	return &fi->inode;
+
+out_free_forget:
+	kfree(fi->forget);
+out_free:
+	kmem_cache_free(fuse_inode_cachep, fi);
+	return NULL;
 }
 
 static void fuse_free_inode(struct inode *inode)
@@ -101,6 +108,9 @@ static void fuse_free_inode(struct inode *inode)
 
 	mutex_destroy(&fi->mutex);
 	kfree(fi->forget);
+#ifdef CONFIG_FUSE_DAX
+	kfree(fi->dax);
+#endif
 	kmem_cache_free(fuse_inode_cachep, fi);
 }
 
@@ -112,6 +122,9 @@ static void fuse_evict_inode(struct inode *inode)
 	clear_inode(inode);
 	if (inode->i_sb->s_flags & SB_ACTIVE) {
 		struct fuse_conn *fc = get_fuse_conn(inode);
+
+		if (FUSE_IS_DAX(inode))
+			fuse_dax_inode_cleanup(inode);
 		fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup);
 		fi->forget = NULL;
 	}
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 60a7bfc787ce..8899e4862309 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -895,6 +895,7 @@ struct fuse_copy_file_range_in {
 };
 
 #define FUSE_SETUPMAPPING_FLAG_WRITE (1ull << 0)
+#define FUSE_SETUPMAPPING_FLAG_READ (1ull << 1)
 struct fuse_setupmapping_in {
 	/* An already open handle */
 	uint64_t	fh;
-- 
cgit v1.2.3


From 501cb008906631a019f3ab2104a17ef8b2651ed0 Mon Sep 17 00:00:00 2001
From: Paul Davey <paul.davey@alliedtelesis.co.nz>
Date: Tue, 8 Sep 2020 10:04:06 +1200
Subject: ipmr: Add route table ID to netlink cache reports

Insert the multicast route table ID as a Netlink attribute to Netlink
cache report notifications.

When multiple route tables are in use it is necessary to have a way to
determine which route table a given cache report belongs to when
receiving the cache report.

Signed-off-by: Paul Davey <paul.davey@alliedtelesis.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/mroute.h | 1 +
 net/ipv4/ipmr.c             | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h
index 11c8c1fc1124..918f1ef32ffe 100644
--- a/include/uapi/linux/mroute.h
+++ b/include/uapi/linux/mroute.h
@@ -169,6 +169,7 @@ enum {
 	IPMRA_CREPORT_SRC_ADDR,
 	IPMRA_CREPORT_DST_ADDR,
 	IPMRA_CREPORT_PKT,
+	IPMRA_CREPORT_TABLE,
 	__IPMRA_CREPORT_MAX
 };
 #define IPMRA_CREPORT_MAX (__IPMRA_CREPORT_MAX - 1)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 876fd6ff1ff9..19b2f586319b 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2396,6 +2396,7 @@ static size_t igmpmsg_netlink_msgsize(size_t payloadlen)
 		+ nla_total_size(4)	/* IPMRA_CREPORT_VIF_ID */
 		+ nla_total_size(4)	/* IPMRA_CREPORT_SRC_ADDR */
 		+ nla_total_size(4)	/* IPMRA_CREPORT_DST_ADDR */
+		+ nla_total_size(4)	/* IPMRA_CREPORT_TABLE */
 					/* IPMRA_CREPORT_PKT */
 		+ nla_total_size(payloadlen)
 		;
@@ -2431,7 +2432,8 @@ static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
 	    nla_put_in_addr(skb, IPMRA_CREPORT_SRC_ADDR,
 			    msg->im_src.s_addr) ||
 	    nla_put_in_addr(skb, IPMRA_CREPORT_DST_ADDR,
-			    msg->im_dst.s_addr))
+			    msg->im_dst.s_addr) ||
+	    nla_put_u32(skb, IPMRA_CREPORT_TABLE, mrt->id))
 		goto nla_put_failure;
 
 	nla = nla_reserve(skb, IPMRA_CREPORT_PKT, payloadlen);
-- 
cgit v1.2.3


From c8715a8e9f38906e73d6d78764216742db13ba0e Mon Sep 17 00:00:00 2001
From: Paul Davey <paul.davey@alliedtelesis.co.nz>
Date: Tue, 8 Sep 2020 10:04:07 +1200
Subject: ipmr: Add high byte of VIF ID to igmpmsg

Use the unused3 byte in struct igmpmsg to hold the high 8 bits of the
VIF ID.

If using more than 255 IPv4 multicast interfaces it is necessary to have
access to a VIF ID for cache reports that is wider than 8 bits, the VIF
ID present in the igmpmsg reports sent to mroute_sk was only 8 bits wide
in the igmpmsg header.  Adding the high 8 bits of the 16 bit VIF ID in
the unused byte allows use of more than 255 IPv4 multicast interfaces.

Signed-off-by: Paul Davey <paul.davey@alliedtelesis.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/mroute.h | 4 ++--
 net/ipv4/ipmr.c             | 8 ++++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h
index 918f1ef32ffe..1a42f5f9b31b 100644
--- a/include/uapi/linux/mroute.h
+++ b/include/uapi/linux/mroute.h
@@ -113,8 +113,8 @@ struct igmpmsg {
 	__u32 unused1,unused2;
 	unsigned char im_msgtype;		/* What is this */
 	unsigned char im_mbz;			/* Must be zero */
-	unsigned char im_vif;			/* Interface (this ought to be a vifi_t!) */
-	unsigned char unused3;
+	unsigned char im_vif;			/* Low 8 bits of Interface */
+	unsigned char im_vif_hi;		/* High 8 bits of Interface */
 	struct in_addr im_src,im_dst;
 };
 
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 19b2f586319b..4809318f591b 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1038,10 +1038,13 @@ static int ipmr_cache_report(struct mr_table *mrt,
 		memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
 		msg->im_msgtype = assert;
 		msg->im_mbz = 0;
-		if (assert == IGMPMSG_WRVIFWHOLE)
+		if (assert == IGMPMSG_WRVIFWHOLE) {
 			msg->im_vif = vifi;
-		else
+			msg->im_vif_hi = vifi >> 8;
+		} else {
 			msg->im_vif = mrt->mroute_reg_vif_num;
+			msg->im_vif_hi = mrt->mroute_reg_vif_num >> 8;
+		}
 		ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
 		ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
 					     sizeof(struct iphdr));
@@ -1054,6 +1057,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
 		ip_hdr(skb)->protocol = 0;
 		msg = (struct igmpmsg *)skb_network_header(skb);
 		msg->im_vif = vifi;
+		msg->im_vif_hi = vifi >> 8;
 		skb_dst_set(skb, dst_clone(skb_dst(pkt)));
 		/* Add our header */
 		igmp = skb_put(skb, sizeof(struct igmphdr));
-- 
cgit v1.2.3


From 1aef5b4391f0c75c0a1523706a7b0311846ee12f Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Thu, 10 Sep 2020 13:33:14 -0700
Subject: bpf: Fix comment for helper bpf_current_task_under_cgroup()

This should be "current" not "skb".

Fixes: c6b5fb8690fa ("bpf: add documentation for eBPF helpers (42-50)")
Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/bpf/20200910203314.70018-1-songliubraving@fb.com
---
 include/uapi/linux/bpf.h       | 4 ++--
 tools/include/uapi/linux/bpf.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 90359cab501d..7dd314176df7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1447,8 +1447,8 @@ union bpf_attr {
  * 	Return
  * 		The return value depends on the result of the test, and can be:
  *
- * 		* 0, if the *skb* task belongs to the cgroup2.
- * 		* 1, if the *skb* task does not belong to the cgroup2.
+ *		* 0, if current task belongs to the cgroup2.
+ *		* 1, if current task does not belong to the cgroup2.
  * 		* A negative error code, if an error occurred.
  *
  * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 90359cab501d..7dd314176df7 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1447,8 +1447,8 @@ union bpf_attr {
  * 	Return
  * 		The return value depends on the result of the test, and can be:
  *
- * 		* 0, if the *skb* task belongs to the cgroup2.
- * 		* 1, if the *skb* task does not belong to the cgroup2.
+ *		* 0, if current task belongs to the cgroup2.
+ *		* 1, if current task does not belong to the cgroup2.
  * 		* A negative error code, if an error occurred.
  *
  * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
-- 
cgit v1.2.3


From b60b9c0274007fe68439cd9cefdabbd7fb8a2ce6 Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Thu, 3 Sep 2020 17:40:34 -0500
Subject: RDMA/core: Added missing WR and WC opcodes

Add work completion opcodes to a new ib_uverbs_wc_opcode enum in
ib_user_verbs.h. This plays the same role as ib_uverbs_wr_opcode
documenting the opcodes in the user space API.

Assigned the IB_WC_XXX opcodes in ib_verbs.h to the IB_UVERBS_WC_XXX
where they are defined. This follows the same pattern as the IB_WR_XXX
opcodes. This fixes an incorrect value for LSO that had crept in but
is not currently being used.

Added a missing IB_WR_BIND_MW opcode in ib_verbs.h.

Link: https://lore.kernel.org/r/20200903224039.437391-2-rpearson@hpe.com
Signed-off-by: Bob Pearson <rpearson@hpe.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/rdma/ib_verbs.h           | 16 +++++++++-------
 include/uapi/rdma/ib_user_verbs.h | 11 +++++++++++
 2 files changed, 20 insertions(+), 7 deletions(-)

(limited to 'include/uapi')

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 97658d706aa1..5ae1d9849881 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -952,13 +952,14 @@ enum ib_wc_status {
 const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status);
 
 enum ib_wc_opcode {
-	IB_WC_SEND,
-	IB_WC_RDMA_WRITE,
-	IB_WC_RDMA_READ,
-	IB_WC_COMP_SWAP,
-	IB_WC_FETCH_ADD,
-	IB_WC_LSO,
-	IB_WC_LOCAL_INV,
+	IB_WC_SEND = IB_UVERBS_WC_SEND,
+	IB_WC_RDMA_WRITE = IB_UVERBS_WC_RDMA_WRITE,
+	IB_WC_RDMA_READ = IB_UVERBS_WC_RDMA_READ,
+	IB_WC_COMP_SWAP = IB_UVERBS_WC_COMP_SWAP,
+	IB_WC_FETCH_ADD = IB_UVERBS_WC_FETCH_ADD,
+	IB_WC_BIND_MW = IB_UVERBS_WC_BIND_MW,
+	IB_WC_LOCAL_INV = IB_UVERBS_WC_LOCAL_INV,
+	IB_WC_LSO = IB_UVERBS_WC_TSO,
 	IB_WC_REG_MR,
 	IB_WC_MASKED_COMP_SWAP,
 	IB_WC_MASKED_FETCH_ADD,
@@ -1291,6 +1292,7 @@ enum ib_wr_opcode {
 	IB_WR_RDMA_READ = IB_UVERBS_WR_RDMA_READ,
 	IB_WR_ATOMIC_CMP_AND_SWP = IB_UVERBS_WR_ATOMIC_CMP_AND_SWP,
 	IB_WR_ATOMIC_FETCH_AND_ADD = IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD,
+	IB_WR_BIND_MW = IB_UVERBS_WR_BIND_MW,
 	IB_WR_LSO = IB_UVERBS_WR_TSO,
 	IB_WR_SEND_WITH_INV = IB_UVERBS_WR_SEND_WITH_INV,
 	IB_WR_RDMA_READ_WITH_INV = IB_UVERBS_WR_RDMA_READ_WITH_INV,
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 0474c7400268..456438c18c2c 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -457,6 +457,17 @@ struct ib_uverbs_poll_cq {
 	__u32 ne;
 };
 
+enum ib_uverbs_wc_opcode {
+	IB_UVERBS_WC_SEND = 0,
+	IB_UVERBS_WC_RDMA_WRITE = 1,
+	IB_UVERBS_WC_RDMA_READ = 2,
+	IB_UVERBS_WC_COMP_SWAP = 3,
+	IB_UVERBS_WC_FETCH_ADD = 4,
+	IB_UVERBS_WC_BIND_MW = 5,
+	IB_UVERBS_WC_LOCAL_INV = 6,
+	IB_UVERBS_WC_TSO = 7,
+};
+
 struct ib_uverbs_wc {
 	__aligned_u64 wr_id;
 	__u32 status;
-- 
cgit v1.2.3


From 5823833c9adab5a9ce5500e7f1ce7deeff00ee73 Mon Sep 17 00:00:00 2001
From: Stanimir Varbanov <stanimir.varbanov@linaro.org>
Date: Sat, 11 Jul 2020 14:52:36 +0200
Subject: media: v4l2-ctrl: Add VP9 codec levels

Add menu control for VP9 codec levels. A total of 14 levels are
defined for Profile 0 (8bit) and Profile 2 (10bit). Each level
is a set of constrained bitstreams coded with targeted resolutions,
frame rates, and bitrates.

The definitions have been taken from webm project [1].

[1] https://www.webmproject.org/vp9/levels/

Signed-off-by: Stanimir Varbanov <stanimir.varbanov@linaro.org>
Reviewed-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
Reviewed-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../userspace-api/media/v4l/ext-ctrls-codec.rst    | 43 ++++++++++++++++++++++
 drivers/media/v4l2-core/v4l2-ctrls.c               | 21 +++++++++++
 include/uapi/linux/v4l2-controls.h                 | 17 +++++++++
 3 files changed, 81 insertions(+)

(limited to 'include/uapi')

diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
index 289d380e2cf0..ce728c757eaf 100644
--- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
+++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
@@ -3383,6 +3383,49 @@ enum v4l2_mpeg_video_vp9_profile -
     * - ``V4L2_MPEG_VIDEO_VP9_PROFILE_3``
       - Profile 3
 
+.. _v4l2-mpeg-video-vp9-level:
+
+``V4L2_CID_MPEG_VIDEO_VP9_LEVEL (enum)``
+
+enum v4l2_mpeg_video_vp9_level -
+    This control allows selecting the level for VP9 encoder.
+    This is also used to enumerate supported levels by VP9 encoder or decoder.
+    More information can be found at
+    `webmproject <https://www.webmproject.org/vp9/levels/>`__. Possible values are:
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    * - ``V4L2_MPEG_VIDEO_VP9_LEVEL_1_0``
+      - Level 1
+    * - ``V4L2_MPEG_VIDEO_VP9_LEVEL_1_1``
+      - Level 1.1
+    * - ``V4L2_MPEG_VIDEO_VP9_LEVEL_2_0``
+      - Level 2
+    * - ``V4L2_MPEG_VIDEO_VP9_LEVEL_2_1``
+      - Level 2.1
+    * - ``V4L2_MPEG_VIDEO_VP9_LEVEL_3_0``
+      - Level 3
+    * - ``V4L2_MPEG_VIDEO_VP9_LEVEL_3_1``
+      - Level 3.1
+    * - ``V4L2_MPEG_VIDEO_VP9_LEVEL_4_0``
+      - Level 4
+    * - ``V4L2_MPEG_VIDEO_VP9_LEVEL_4_1``
+      - Level 4.1
+    * - ``V4L2_MPEG_VIDEO_VP9_LEVEL_5_0``
+      - Level 5
+    * - ``V4L2_MPEG_VIDEO_VP9_LEVEL_5_1``
+      - Level 5.1
+    * - ``V4L2_MPEG_VIDEO_VP9_LEVEL_5_2``
+      - Level 5.2
+    * - ``V4L2_MPEG_VIDEO_VP9_LEVEL_6_0``
+      - Level 6
+    * - ``V4L2_MPEG_VIDEO_VP9_LEVEL_6_1``
+      - Level 6.1
+    * - ``V4L2_MPEG_VIDEO_VP9_LEVEL_6_2``
+      - Level 6.2
+
 
 High Efficiency Video Coding (HEVC/H.265) Control Reference
 ===========================================================
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 73f3d65957ff..bd7f330c941c 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -475,6 +475,23 @@ const char * const *v4l2_ctrl_get_menu(u32 id)
 		"3",
 		NULL,
 	};
+	static const char * const vp9_level[] = {
+		"1",
+		"1.1",
+		"2",
+		"2.1",
+		"3",
+		"3.1",
+		"4",
+		"4.1",
+		"5",
+		"5.1",
+		"5.2",
+		"6",
+		"6.1",
+		"6.2",
+		NULL,
+	};
 
 	static const char * const flash_led_mode[] = {
 		"Off",
@@ -694,6 +711,8 @@ const char * const *v4l2_ctrl_get_menu(u32 id)
 		return vp8_profile;
 	case V4L2_CID_MPEG_VIDEO_VP9_PROFILE:
 		return vp9_profile;
+	case V4L2_CID_MPEG_VIDEO_VP9_LEVEL:
+		return vp9_level;
 	case V4L2_CID_JPEG_CHROMA_SUBSAMPLING:
 		return jpeg_chroma_subsampling;
 	case V4L2_CID_DV_TX_MODE:
@@ -950,6 +969,7 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_MPEG_VIDEO_VPX_P_FRAME_QP:		return "VPX P-Frame QP Value";
 	case V4L2_CID_MPEG_VIDEO_VP8_PROFILE:			return "VP8 Profile";
 	case V4L2_CID_MPEG_VIDEO_VP9_PROFILE:			return "VP9 Profile";
+	case V4L2_CID_MPEG_VIDEO_VP9_LEVEL:			return "VP9 Level";
 	case V4L2_CID_MPEG_VIDEO_VP8_FRAME_HEADER:		return "VP8 Frame Header";
 
 	/* HEVC controls */
@@ -1307,6 +1327,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_SEL:
 	case V4L2_CID_MPEG_VIDEO_VP8_PROFILE:
 	case V4L2_CID_MPEG_VIDEO_VP9_PROFILE:
+	case V4L2_CID_MPEG_VIDEO_VP9_LEVEL:
 	case V4L2_CID_DETECT_MD_MODE:
 	case V4L2_CID_MPEG_VIDEO_HEVC_PROFILE:
 	case V4L2_CID_MPEG_VIDEO_HEVC_LEVEL:
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 053827cda8e6..a184c4939438 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -651,6 +651,23 @@ enum v4l2_mpeg_video_vp9_profile {
 	V4L2_MPEG_VIDEO_VP9_PROFILE_2				= 2,
 	V4L2_MPEG_VIDEO_VP9_PROFILE_3				= 3,
 };
+#define V4L2_CID_MPEG_VIDEO_VP9_LEVEL			(V4L2_CID_MPEG_BASE+513)
+enum v4l2_mpeg_video_vp9_level {
+	V4L2_MPEG_VIDEO_VP9_LEVEL_1_0	= 0,
+	V4L2_MPEG_VIDEO_VP9_LEVEL_1_1	= 1,
+	V4L2_MPEG_VIDEO_VP9_LEVEL_2_0	= 2,
+	V4L2_MPEG_VIDEO_VP9_LEVEL_2_1	= 3,
+	V4L2_MPEG_VIDEO_VP9_LEVEL_3_0	= 4,
+	V4L2_MPEG_VIDEO_VP9_LEVEL_3_1	= 5,
+	V4L2_MPEG_VIDEO_VP9_LEVEL_4_0	= 6,
+	V4L2_MPEG_VIDEO_VP9_LEVEL_4_1	= 7,
+	V4L2_MPEG_VIDEO_VP9_LEVEL_5_0	= 8,
+	V4L2_MPEG_VIDEO_VP9_LEVEL_5_1	= 9,
+	V4L2_MPEG_VIDEO_VP9_LEVEL_5_2	= 10,
+	V4L2_MPEG_VIDEO_VP9_LEVEL_6_0	= 11,
+	V4L2_MPEG_VIDEO_VP9_LEVEL_6_1	= 12,
+	V4L2_MPEG_VIDEO_VP9_LEVEL_6_2	= 13,
+};
 
 /* CIDs for HEVC encoding. */
 
-- 
cgit v1.2.3


From e47168f3d1b14af5281cf50c59561d59d28201f9 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Mon, 31 Aug 2020 08:30:44 +0000
Subject: powerpc/8xx: Support 16k hugepages with 4k pages

The 8xx has 4 page sizes: 4k, 16k, 512k and 8M

4k and 16k can be selected at build time as standard page sizes,
and 512k and 8M are hugepages.

When 4k standard pages are selected, 16k pages are not available.

Allow 16k pages as hugepages when 4k pages are used.

To allow that, implement arch_make_huge_pte() which receives
the necessary arguments to allow setting the PTE in accordance
with the page size:
- 512 k pages must have _PAGE_HUGE and _PAGE_SPS. They are set
by pte_mkhuge(). arch_make_huge_pte() does nothing.
- 16 k pages must have only _PAGE_SPS. arch_make_huge_pte() clears
_PAGE_HUGE.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/a518abc29266a708dfbccc8fce9ae6694fe4c2c6.1598862623.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h | 14 ++++++++++++++
 arch/powerpc/include/asm/nohash/32/pgtable.h     |  2 ++
 arch/powerpc/mm/hugetlbpage.c                    |  2 +-
 arch/powerpc/mm/nohash/tlb.c                     |  4 ----
 arch/powerpc/mm/ptdump/8xx.c                     |  5 +++++
 include/uapi/asm-generic/hugetlb_encode.h        |  1 +
 include/uapi/linux/mman.h                        |  1 +
 7 files changed, 24 insertions(+), 5 deletions(-)

(limited to 'include/uapi')

diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
index e752a5807a59..39be9aea86db 100644
--- a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
@@ -65,4 +65,18 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 	pte_update(mm, addr, ptep, clr, set, 1);
 }
 
+#ifdef CONFIG_PPC_4K_PAGES
+static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
+				       struct page *page, int writable)
+{
+	size_t size = huge_page_size(hstate_vma(vma));
+
+	if (size == SZ_16K)
+		return __pte(pte_val(entry) & ~_PAGE_HUGE);
+	else
+		return entry;
+}
+#define arch_make_huge_pte arch_make_huge_pte
+#endif
+
 #endif /* _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 80bbc21b87f0..ee2243ba96cf 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -235,6 +235,8 @@ static int number_of_cells_per_pte(pmd_t *pmd, pte_basic_t val, int huge)
 		return PAGE_SIZE / SZ_4K;
 	else if (hugepd_ok(*((hugepd_t *)pmd)))
 		return 1;
+	else if (IS_ENABLED(CONFIG_PPC_4K_PAGES) && !(val & _PAGE_HUGE))
+		return SZ_16K / SZ_4K;
 	else
 		return SZ_512K / SZ_4K;
 }
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index e7ae2a2c4545..36c3800769fb 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -180,7 +180,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
 	if (!hpdp)
 		return NULL;
 
-	if (IS_ENABLED(CONFIG_PPC_8xx) && sz == SZ_512K)
+	if (IS_ENABLED(CONFIG_PPC_8xx) && pshift < PMD_SHIFT)
 		return pte_alloc_map(mm, (pmd_t *)hpdp, addr);
 
 	BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c
index 14514585db98..5872f69141d5 100644
--- a/arch/powerpc/mm/nohash/tlb.c
+++ b/arch/powerpc/mm/nohash/tlb.c
@@ -83,16 +83,12 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
 };
 #elif defined(CONFIG_PPC_8xx)
 struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
-	/* we only manage 4k and 16k pages as normal pages */
-#ifdef CONFIG_PPC_4K_PAGES
 	[MMU_PAGE_4K] = {
 		.shift	= 12,
 	},
-#else
 	[MMU_PAGE_16K] = {
 		.shift	= 14,
 	},
-#endif
 	[MMU_PAGE_512K] = {
 		.shift	= 19,
 	},
diff --git a/arch/powerpc/mm/ptdump/8xx.c b/arch/powerpc/mm/ptdump/8xx.c
index 8a797dcbf475..86da2a669680 100644
--- a/arch/powerpc/mm/ptdump/8xx.c
+++ b/arch/powerpc/mm/ptdump/8xx.c
@@ -11,8 +11,13 @@
 
 static const struct flag_info flag_array[] = {
 	{
+#ifdef CONFIG_PPC_16K_PAGES
 		.mask	= _PAGE_HUGE,
 		.val	= _PAGE_HUGE,
+#else
+		.mask	= _PAGE_SPS,
+		.val	= _PAGE_SPS,
+#endif
 		.set	= "huge",
 		.clear	= "    ",
 	}, {
diff --git a/include/uapi/asm-generic/hugetlb_encode.h b/include/uapi/asm-generic/hugetlb_encode.h
index b0f8e87235bd..4f3d5aaa11f5 100644
--- a/include/uapi/asm-generic/hugetlb_encode.h
+++ b/include/uapi/asm-generic/hugetlb_encode.h
@@ -20,6 +20,7 @@
 #define HUGETLB_FLAG_ENCODE_SHIFT	26
 #define HUGETLB_FLAG_ENCODE_MASK	0x3f
 
+#define HUGETLB_FLAG_ENCODE_16KB	(14 << HUGETLB_FLAG_ENCODE_SHIFT)
 #define HUGETLB_FLAG_ENCODE_64KB	(16 << HUGETLB_FLAG_ENCODE_SHIFT)
 #define HUGETLB_FLAG_ENCODE_512KB	(19 << HUGETLB_FLAG_ENCODE_SHIFT)
 #define HUGETLB_FLAG_ENCODE_1MB		(20 << HUGETLB_FLAG_ENCODE_SHIFT)
diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h
index 923cc162609c..f55bc680b5b0 100644
--- a/include/uapi/linux/mman.h
+++ b/include/uapi/linux/mman.h
@@ -27,6 +27,7 @@
 #define MAP_HUGE_SHIFT	HUGETLB_FLAG_ENCODE_SHIFT
 #define MAP_HUGE_MASK	HUGETLB_FLAG_ENCODE_MASK
 
+#define MAP_HUGE_16KB	HUGETLB_FLAG_ENCODE_16KB
 #define MAP_HUGE_64KB	HUGETLB_FLAG_ENCODE_64KB
 #define MAP_HUGE_512KB	HUGETLB_FLAG_ENCODE_512KB
 #define MAP_HUGE_1MB	HUGETLB_FLAG_ENCODE_1MB
-- 
cgit v1.2.3


From f8910ffa81b085030dc54814c85d338c26a3157e Mon Sep 17 00:00:00 2001
From: Xianting Tian <tian.xianting@h3c.com>
Date: Tue, 15 Sep 2020 15:18:17 +0800
Subject: ipmi:msghandler: retry to get device id on an error

We fail to get the BMCS's device id with low probability when loading
the ipmi driver and it causes BMC device registration failed. When this
issue occurs we got below kernel prints:

  [Wed Sep  9 19:52:03 2020] ipmi_si IPI0001:00: IPMI message handler:
     device id demangle failed: -22
  [Wed Sep  9 19:52:03 2020] IPMI BT: using default values
  [Wed Sep  9 19:52:03 2020] IPMI BT: req2rsp=5 secs retries=2
  [Wed Sep  9 19:52:03 2020] ipmi_si IPI0001:00: Unable to get the
     device id: -5
  [Wed Sep  9 19:52:04 2020] ipmi_si IPI0001:00: Unable to register
     device: error -5

When this issue happens, we want to manually unload the driver and try to
load it again, but it can't be unloaded by 'rmmod' as it is already 'in
use'.

We add a print in handle_one_recv_msg(), when this issue happens,
the msg we received is "Recv: 1c 01 d5", which means the data_len is 1,
data[0] is 0xd5 (completion code), which means "bmc cannot execute
command.  Command, or request parameter(s), not supported in present
state".  Debug code:
	static int handle_one_recv_msg(struct ipmi_smi *intf,
                               struct ipmi_smi_msg *msg) {
        	printk("Recv: %*ph\n", msg->rsp_size, msg->rsp);
		... ...
	}
Then in ipmi_demangle_device_id(), it returned '-EINVAL' as 'data_len < 7'
and 'data[0] != 0'.

We created this patch to retry the get device id when this error
happens.  We reproduced this issue again and the retry succeed on the
first retry, we finally got the correct msg and then all is ok:
Recv: 1c 01 00 01 81 05 84 02 af db 07 00 01 00 b9 00 10 00

So use a retry machanism in this patch to give bmc more opportunity to
correctly response kernel when we received specific completion codes.

Signed-off-by: Xianting Tian <tian.xianting@h3c.com>
Message-Id: <20200915071817.4484-1-tian.xianting@h3c.com>
[Cleaned up the verbage a bit in the header and prints.]
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_msghandler.c | 29 +++++++++++++++++++++++++----
 include/uapi/linux/ipmi_msgdefs.h   |  2 ++
 2 files changed, 27 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 9f61a1a30f2f..e56409659459 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -34,6 +34,7 @@
 #include <linux/uuid.h>
 #include <linux/nospec.h>
 #include <linux/vmalloc.h>
+#include <linux/delay.h>
 
 #define IPMI_DRIVER_VERSION "39.2"
 
@@ -60,6 +61,9 @@ enum ipmi_panic_event_op {
 #else
 #define IPMI_PANIC_DEFAULT IPMI_SEND_PANIC_EVENT_NONE
 #endif
+
+#define GET_DEVICE_ID_MAX_RETRY	5
+
 static enum ipmi_panic_event_op ipmi_send_panic_event = IPMI_PANIC_DEFAULT;
 
 static int panic_op_write_handler(const char *val,
@@ -317,6 +321,7 @@ struct bmc_device {
 	int                    dyn_guid_set;
 	struct kref	       usecount;
 	struct work_struct     remove_work;
+	char		       cc; /* completion code */
 };
 #define to_bmc_device(x) container_of((x), struct bmc_device, pdev.dev)
 
@@ -2381,6 +2386,8 @@ static void bmc_device_id_handler(struct ipmi_smi *intf,
 			msg->msg.data, msg->msg.data_len, &intf->bmc->fetch_id);
 	if (rv) {
 		dev_warn(intf->si_dev, "device id demangle failed: %d\n", rv);
+		/* record completion code when error */
+		intf->bmc->cc = msg->msg.data[0];
 		intf->bmc->dyn_id_set = 0;
 	} else {
 		/*
@@ -2426,19 +2433,34 @@ send_get_device_id_cmd(struct ipmi_smi *intf)
 static int __get_device_id(struct ipmi_smi *intf, struct bmc_device *bmc)
 {
 	int rv;
-
-	bmc->dyn_id_set = 2;
+	unsigned int retry_count = 0;
 
 	intf->null_user_handler = bmc_device_id_handler;
 
+retry:
+	bmc->cc = 0;
+	bmc->dyn_id_set = 2;
+
 	rv = send_get_device_id_cmd(intf);
 	if (rv)
 		goto out_reset_handler;
 
 	wait_event(intf->waitq, bmc->dyn_id_set != 2);
 
-	if (!bmc->dyn_id_set)
+	if (!bmc->dyn_id_set) {
+		if ((bmc->cc == IPMI_DEVICE_IN_FW_UPDATE_ERR
+		     || bmc->cc ==  IPMI_DEVICE_IN_INIT_ERR
+		     || bmc->cc ==  IPMI_NOT_IN_MY_STATE_ERR)
+		     && ++retry_count <= GET_DEVICE_ID_MAX_RETRY) {
+			msleep(500);
+			dev_warn(intf->si_dev,
+			    "BMC returned 0x%2.2x, retry get bmc device id\n",
+			    bmc->cc);
+			goto retry;
+		}
+
 		rv = -EIO; /* Something went wrong in the fetch. */
+	}
 
 	/* dyn_id_set makes the id data available. */
 	smp_rmb();
@@ -3246,7 +3268,6 @@ channel_handler(struct ipmi_smi *intf, struct ipmi_recv_msg *msg)
 		/* It's the one we want */
 		if (msg->msg.data[0] != 0) {
 			/* Got an error from the channel, just go on. */
-
 			if (msg->msg.data[0] == IPMI_INVALID_COMMAND_ERR) {
 				/*
 				 * If the MC does not support this
diff --git a/include/uapi/linux/ipmi_msgdefs.h b/include/uapi/linux/ipmi_msgdefs.h
index c2b23a9fdf3d..0934af3b8037 100644
--- a/include/uapi/linux/ipmi_msgdefs.h
+++ b/include/uapi/linux/ipmi_msgdefs.h
@@ -69,6 +69,8 @@
 #define IPMI_ERR_MSG_TRUNCATED		0xc6
 #define IPMI_REQ_LEN_INVALID_ERR	0xc7
 #define IPMI_REQ_LEN_EXCEEDED_ERR	0xc8
+#define IPMI_DEVICE_IN_FW_UPDATE_ERR	0xd1
+#define IPMI_DEVICE_IN_INIT_ERR		0xd2
 #define IPMI_NOT_IN_MY_STATE_ERR	0xd5	/* IPMI 2.0 */
 #define IPMI_LOST_ARBITRATION_ERR	0x81
 #define IPMI_BUS_ERR			0x82
-- 
cgit v1.2.3


From 9a27a33027f22a716ce362be48d70ae0eb012ab7 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 14 Sep 2020 17:11:52 -0700
Subject: ethtool: add standard pause stats

Currently drivers have to report their pause frames statistics
via ethtool -S, and there is a wide variety of names used for
these statistics.

Add the two statistics defined in IEEE 802.3x to the standard
API. Create a new ethtool request header flag for including
statistics in the response to GET commands.

Always create the ETHTOOL_A_PAUSE_STATS nest in replies when
flag is set. Testing if driver declares the op is not a reliable
way of checking if any stats will actually be included and therefore
we don't want to give the impression that presence of
ETHTOOL_A_PAUSE_STATS indicates driver support.

Note that this patch does not include PFC counters, which may fit
better in dcbnl? But mostly I don't need them/have a setup to test
them so I haven't looked deeply into exposing them :)

v3:
 - add a helper for "uninitializing" stats, rather than a cryptic
   memset() (Andrew)

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ethtool-netlink.rst | 11 +++++
 include/linux/ethtool.h                      | 26 ++++++++++++
 include/uapi/linux/ethtool_netlink.h         | 18 +++++++-
 net/ethtool/pause.c                          | 63 +++++++++++++++++++++++++++-
 4 files changed, 116 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index d53bcb31645a..2c8e0ddf548e 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -68,6 +68,7 @@ the flags may not apply to requests. Recognized flags are:
   =================================  ===================================
   ``ETHTOOL_FLAG_COMPACT_BITSETS``   use compact format bitsets in reply
   ``ETHTOOL_FLAG_OMIT_REPLY``        omit optional reply (_SET and _ACT)
+  ``ETHTOOL_FLAG_STATS``             include optional device statistics
   =================================  ===================================
 
 New request flags should follow the general idea that if the flag is not set,
@@ -989,8 +990,18 @@ Kernel response contents:
   ``ETHTOOL_A_PAUSE_AUTONEG``            bool    pause autonegotiation
   ``ETHTOOL_A_PAUSE_RX``                 bool    receive pause frames
   ``ETHTOOL_A_PAUSE_TX``                 bool    transmit pause frames
+  ``ETHTOOL_A_PAUSE_STATS``              nested  pause statistics
   =====================================  ======  ==========================
 
+``ETHTOOL_A_PAUSE_STATS`` are reported if ``ETHTOOL_FLAG_STATS`` was set
+in ``ETHTOOL_A_HEADER_FLAGS``.
+It will be empty if driver did not report any statistics. Drivers fill in
+the statistics in the following structure:
+
+.. kernel-doc:: include/linux/ethtool.h
+    :identifiers: ethtool_pause_stats
+
+Each member has a corresponding attribute defined.
 
 PAUSE_SET
 ============
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 969a80211df6..060b20f0b20f 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -241,6 +241,27 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
 	 ETHTOOL_COALESCE_PKT_RATE_LOW | ETHTOOL_COALESCE_PKT_RATE_HIGH | \
 	 ETHTOOL_COALESCE_RATE_SAMPLE_INTERVAL)
 
+#define ETHTOOL_STAT_NOT_SET	(~0ULL)
+
+/**
+ * struct ethtool_pause_stats - statistics for IEEE 802.3x pause frames
+ * @tx_pause_frames: transmitted pause frame count. Reported to user space
+ *	as %ETHTOOL_A_PAUSE_STAT_TX_FRAMES.
+ *
+ *	Equivalent to `30.3.4.2 aPAUSEMACCtrlFramesTransmitted`
+ *	from the standard.
+ *
+ * @rx_pause_frames: received pause frame count. Reported to user space
+ *	as %ETHTOOL_A_PAUSE_STAT_RX_FRAMES. Equivalent to:
+ *
+ *	Equivalent to `30.3.4.3 aPAUSEMACCtrlFramesReceived`
+ *	from the standard.
+ */
+struct ethtool_pause_stats {
+	u64 tx_pause_frames;
+	u64 rx_pause_frames;
+};
+
 /**
  * struct ethtool_ops - optional netdev operations
  * @supported_coalesce_params: supported types of interrupt coalescing.
@@ -282,6 +303,9 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
  *	Returns a negative error code or zero.
  * @get_ringparam: Report ring sizes
  * @set_ringparam: Set ring sizes.  Returns a negative error code or zero.
+ * @get_pause_stats: Report pause frame statistics. Drivers must not zero
+ *	statistics which they don't report. The stats structure is initialized
+ *	to ETHTOOL_STAT_NOT_SET indicating driver does not report statistics.
  * @get_pauseparam: Report pause parameters
  * @set_pauseparam: Set pause parameters.  Returns a negative error code
  *	or zero.
@@ -418,6 +442,8 @@ struct ethtool_ops {
 				 struct ethtool_ringparam *);
 	int	(*set_ringparam)(struct net_device *,
 				 struct ethtool_ringparam *);
+	void	(*get_pause_stats)(struct net_device *dev,
+				   struct ethtool_pause_stats *pause_stats);
 	void	(*get_pauseparam)(struct net_device *,
 				  struct ethtool_pauseparam*);
 	int	(*set_pauseparam)(struct net_device *,
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 5dcd24cb33ea..9cee6df01a10 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -91,9 +91,12 @@ enum {
 #define ETHTOOL_FLAG_COMPACT_BITSETS	(1 << 0)
 /* provide optional reply for SET or ACT requests */
 #define ETHTOOL_FLAG_OMIT_REPLY	(1 << 1)
+/* request statistics, if supported by the driver */
+#define ETHTOOL_FLAG_STATS		(1 << 2)
 
 #define ETHTOOL_FLAG_ALL (ETHTOOL_FLAG_COMPACT_BITSETS | \
-			  ETHTOOL_FLAG_OMIT_REPLY)
+			  ETHTOOL_FLAG_OMIT_REPLY | \
+			  ETHTOOL_FLAG_STATS)
 
 enum {
 	ETHTOOL_A_HEADER_UNSPEC,
@@ -376,12 +379,25 @@ enum {
 	ETHTOOL_A_PAUSE_AUTONEG,			/* u8 */
 	ETHTOOL_A_PAUSE_RX,				/* u8 */
 	ETHTOOL_A_PAUSE_TX,				/* u8 */
+	ETHTOOL_A_PAUSE_STATS,				/* nest - _PAUSE_STAT_* */
 
 	/* add new constants above here */
 	__ETHTOOL_A_PAUSE_CNT,
 	ETHTOOL_A_PAUSE_MAX = (__ETHTOOL_A_PAUSE_CNT - 1)
 };
 
+enum {
+	ETHTOOL_A_PAUSE_STAT_UNSPEC,
+	ETHTOOL_A_PAUSE_STAT_PAD,
+
+	ETHTOOL_A_PAUSE_STAT_TX_FRAMES,
+	ETHTOOL_A_PAUSE_STAT_RX_FRAMES,
+
+	/* add new constants above here */
+	__ETHTOOL_A_PAUSE_STAT_CNT,
+	ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1)
+};
+
 /* EEE */
 
 enum {
diff --git a/net/ethtool/pause.c b/net/ethtool/pause.c
index 7aea35d1e8a5..1980aa7eb2b6 100644
--- a/net/ethtool/pause.c
+++ b/net/ethtool/pause.c
@@ -10,6 +10,7 @@ struct pause_req_info {
 struct pause_reply_data {
 	struct ethnl_reply_data		base;
 	struct ethtool_pauseparam	pauseparam;
+	struct ethtool_pause_stats	pausestat;
 };
 
 #define PAUSE_REPDATA(__reply_base) \
@@ -22,8 +23,15 @@ pause_get_policy[ETHTOOL_A_PAUSE_MAX + 1] = {
 	[ETHTOOL_A_PAUSE_AUTONEG]		= { .type = NLA_REJECT },
 	[ETHTOOL_A_PAUSE_RX]			= { .type = NLA_REJECT },
 	[ETHTOOL_A_PAUSE_TX]			= { .type = NLA_REJECT },
+	[ETHTOOL_A_PAUSE_STATS]			= { .type = NLA_REJECT },
 };
 
+static void ethtool_stats_init(u64 *stats, unsigned int n)
+{
+	while (n--)
+		stats[n] = ETHTOOL_STAT_NOT_SET;
+}
+
 static int pause_prepare_data(const struct ethnl_req_info *req_base,
 			      struct ethnl_reply_data *reply_base,
 			      struct genl_info *info)
@@ -34,10 +42,17 @@ static int pause_prepare_data(const struct ethnl_req_info *req_base,
 
 	if (!dev->ethtool_ops->get_pauseparam)
 		return -EOPNOTSUPP;
+
 	ret = ethnl_ops_begin(dev);
 	if (ret < 0)
 		return ret;
 	dev->ethtool_ops->get_pauseparam(dev, &data->pauseparam);
+	if (req_base->flags & ETHTOOL_FLAG_STATS &&
+	    dev->ethtool_ops->get_pause_stats) {
+		ethtool_stats_init((u64 *)&data->pausestat,
+				   sizeof(data->pausestat) / 8);
+		dev->ethtool_ops->get_pause_stats(dev, &data->pausestat);
+	}
 	ethnl_ops_complete(dev);
 
 	return 0;
@@ -46,9 +61,50 @@ static int pause_prepare_data(const struct ethnl_req_info *req_base,
 static int pause_reply_size(const struct ethnl_req_info *req_base,
 			    const struct ethnl_reply_data *reply_base)
 {
-	return nla_total_size(sizeof(u8)) +	/* _PAUSE_AUTONEG */
+	int n = nla_total_size(sizeof(u8)) +	/* _PAUSE_AUTONEG */
 		nla_total_size(sizeof(u8)) +	/* _PAUSE_RX */
 		nla_total_size(sizeof(u8));	/* _PAUSE_TX */
+
+	if (req_base->flags & ETHTOOL_FLAG_STATS)
+		n += nla_total_size(0) +	/* _PAUSE_STATS */
+			nla_total_size_64bit(sizeof(u64)) *
+				(ETHTOOL_A_PAUSE_STAT_MAX - 2);
+	return n;
+}
+
+static int ethtool_put_stat(struct sk_buff *skb, u64 val, u16 attrtype,
+			    u16 padtype)
+{
+	if (val == ETHTOOL_STAT_NOT_SET)
+		return 0;
+	if (nla_put_u64_64bit(skb, attrtype, val, padtype))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+static int pause_put_stats(struct sk_buff *skb,
+			   const struct ethtool_pause_stats *pause_stats)
+{
+	const u16 pad = ETHTOOL_A_PAUSE_STAT_PAD;
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, ETHTOOL_A_PAUSE_STATS);
+	if (!nest)
+		return -EMSGSIZE;
+
+	if (ethtool_put_stat(skb, pause_stats->tx_pause_frames,
+			     ETHTOOL_A_PAUSE_STAT_TX_FRAMES, pad) ||
+	    ethtool_put_stat(skb, pause_stats->rx_pause_frames,
+			     ETHTOOL_A_PAUSE_STAT_RX_FRAMES, pad))
+		goto err_cancel;
+
+	nla_nest_end(skb, nest);
+	return 0;
+
+err_cancel:
+	nla_nest_cancel(skb, nest);
+	return -EMSGSIZE;
 }
 
 static int pause_fill_reply(struct sk_buff *skb,
@@ -63,6 +119,10 @@ static int pause_fill_reply(struct sk_buff *skb,
 	    nla_put_u8(skb, ETHTOOL_A_PAUSE_TX, !!pauseparam->tx_pause))
 		return -EMSGSIZE;
 
+	if (req_base->flags & ETHTOOL_FLAG_STATS &&
+	    pause_put_stats(skb, &data->pausestat))
+		return -EMSGSIZE;
+
 	return 0;
 }
 
@@ -89,6 +149,7 @@ pause_set_policy[ETHTOOL_A_PAUSE_MAX + 1] = {
 	[ETHTOOL_A_PAUSE_AUTONEG]		= { .type = NLA_U8 },
 	[ETHTOOL_A_PAUSE_RX]			= { .type = NLA_U8 },
 	[ETHTOOL_A_PAUSE_TX]			= { .type = NLA_U8 },
+	[ETHTOOL_A_PAUSE_STATS]			= { .type = NLA_REJECT },
 };
 
 int ethnl_set_pause(struct sk_buff *skb, struct genl_info *info)
-- 
cgit v1.2.3


From e2ce94dc1d89e0f76ddd202cea72e0f505083d0a Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Tue, 15 Sep 2020 11:40:57 +0300
Subject: devlink: introduce the health reporter test command

Introduce a test command for health reporters. User might use this
command to trigger test event on a reporter if the reporter supports it.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h        |  3 +++
 include/uapi/linux/devlink.h |  2 ++
 net/core/devlink.c           | 30 ++++++++++++++++++++++++++++++
 3 files changed, 35 insertions(+)

(limited to 'include/uapi')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index eaec0a8cc5ef..48b1c1ef1ebd 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -566,6 +566,7 @@ enum devlink_health_reporter_state {
  * @dump: callback to dump an object
  *        if priv_ctx is NULL, run a full dump
  * @diagnose: callback to diagnose the current status
+ * @test: callback to trigger a test event
  */
 
 struct devlink_health_reporter_ops {
@@ -578,6 +579,8 @@ struct devlink_health_reporter_ops {
 	int (*diagnose)(struct devlink_health_reporter *reporter,
 			struct devlink_fmsg *fmsg,
 			struct netlink_ext_ack *extack);
+	int (*test)(struct devlink_health_reporter *reporter,
+		    struct netlink_ext_ack *extack);
 };
 
 /**
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 40d35145c879..631f5bdf1707 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -122,6 +122,8 @@ enum devlink_command {
 	DEVLINK_CMD_TRAP_POLICER_NEW,
 	DEVLINK_CMD_TRAP_POLICER_DEL,
 
+	DEVLINK_CMD_HEALTH_REPORTER_TEST,
+
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
 	DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 19037f114307..e5b71f3c2d4d 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -6096,6 +6096,28 @@ devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
 	return 0;
 }
 
+static int devlink_nl_cmd_health_reporter_test_doit(struct sk_buff *skb,
+						    struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_health_reporter *reporter;
+	int err;
+
+	reporter = devlink_health_reporter_get_from_info(devlink, info);
+	if (!reporter)
+		return -EINVAL;
+
+	if (!reporter->ops->test) {
+		devlink_health_reporter_put(reporter);
+		return -EOPNOTSUPP;
+	}
+
+	err = reporter->ops->test(reporter, info->extack);
+
+	devlink_health_reporter_put(reporter);
+	return err;
+}
+
 struct devlink_stats {
 	u64 rx_bytes;
 	u64 rx_packets;
@@ -7316,6 +7338,14 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
 				  DEVLINK_NL_FLAG_NO_LOCK,
 	},
+	{
+		.cmd = DEVLINK_CMD_HEALTH_REPORTER_TEST,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_health_reporter_test_doit,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
+				  DEVLINK_NL_FLAG_NO_LOCK,
+	},
 	{
 		.cmd = DEVLINK_CMD_FLASH_UPDATE,
 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-- 
cgit v1.2.3


From ef15314aa5de955c6afd87d512e8b00f5ac08d06 Mon Sep 17 00:00:00 2001
From: YiFei Zhu <zhuyifei@google.com>
Date: Tue, 15 Sep 2020 16:45:40 -0700
Subject: bpf: Add BPF_PROG_BIND_MAP syscall

This syscall binds a map to a program. Returns success if the map is
already bound to the program.

Signed-off-by: YiFei Zhu <zhuyifei@google.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Cc: YiFei Zhu <zhuyifei1999@gmail.com>
Link: https://lore.kernel.org/bpf/20200915234543.3220146-3-sdf@google.com
---
 include/uapi/linux/bpf.h       |  7 +++++
 kernel/bpf/syscall.c           | 63 ++++++++++++++++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  7 +++++
 3 files changed, 77 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 7dd314176df7..a22812561064 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -124,6 +124,7 @@ enum bpf_cmd {
 	BPF_ENABLE_STATS,
 	BPF_ITER_CREATE,
 	BPF_LINK_DETACH,
+	BPF_PROG_BIND_MAP,
 };
 
 enum bpf_map_type {
@@ -658,6 +659,12 @@ union bpf_attr {
 		__u32		flags;
 	} iter_create;
 
+	struct { /* struct used by BPF_PROG_BIND_MAP command */
+		__u32		prog_fd;
+		__u32		map_fd;
+		__u32		flags;		/* extra flags */
+	} prog_bind_map;
+
 } __attribute__((aligned(8)));
 
 /* The description below is an attempt at providing documentation to eBPF
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a67b8c6746be..2ce32cad5c8e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -4161,6 +4161,66 @@ static int bpf_iter_create(union bpf_attr *attr)
 	return err;
 }
 
+#define BPF_PROG_BIND_MAP_LAST_FIELD prog_bind_map.flags
+
+static int bpf_prog_bind_map(union bpf_attr *attr)
+{
+	struct bpf_prog *prog;
+	struct bpf_map *map;
+	struct bpf_map **used_maps_old, **used_maps_new;
+	int i, ret = 0;
+
+	if (CHECK_ATTR(BPF_PROG_BIND_MAP))
+		return -EINVAL;
+
+	if (attr->prog_bind_map.flags)
+		return -EINVAL;
+
+	prog = bpf_prog_get(attr->prog_bind_map.prog_fd);
+	if (IS_ERR(prog))
+		return PTR_ERR(prog);
+
+	map = bpf_map_get(attr->prog_bind_map.map_fd);
+	if (IS_ERR(map)) {
+		ret = PTR_ERR(map);
+		goto out_prog_put;
+	}
+
+	mutex_lock(&prog->aux->used_maps_mutex);
+
+	used_maps_old = prog->aux->used_maps;
+
+	for (i = 0; i < prog->aux->used_map_cnt; i++)
+		if (used_maps_old[i] == map)
+			goto out_unlock;
+
+	used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1,
+				      sizeof(used_maps_new[0]),
+				      GFP_KERNEL);
+	if (!used_maps_new) {
+		ret = -ENOMEM;
+		goto out_unlock;
+	}
+
+	memcpy(used_maps_new, used_maps_old,
+	       sizeof(used_maps_old[0]) * prog->aux->used_map_cnt);
+	used_maps_new[prog->aux->used_map_cnt] = map;
+
+	prog->aux->used_map_cnt++;
+	prog->aux->used_maps = used_maps_new;
+
+	kfree(used_maps_old);
+
+out_unlock:
+	mutex_unlock(&prog->aux->used_maps_mutex);
+
+	if (ret)
+		bpf_map_put(map);
+out_prog_put:
+	bpf_prog_put(prog);
+	return ret;
+}
+
 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 {
 	union bpf_attr attr;
@@ -4294,6 +4354,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_LINK_DETACH:
 		err = link_detach(&attr);
 		break;
+	case BPF_PROG_BIND_MAP:
+		err = bpf_prog_bind_map(&attr);
+		break;
 	default:
 		err = -EINVAL;
 		break;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 7dd314176df7..a22812561064 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -124,6 +124,7 @@ enum bpf_cmd {
 	BPF_ENABLE_STATS,
 	BPF_ITER_CREATE,
 	BPF_LINK_DETACH,
+	BPF_PROG_BIND_MAP,
 };
 
 enum bpf_map_type {
@@ -658,6 +659,12 @@ union bpf_attr {
 		__u32		flags;
 	} iter_create;
 
+	struct { /* struct used by BPF_PROG_BIND_MAP command */
+		__u32		prog_fd;
+		__u32		map_fd;
+		__u32		flags;		/* extra flags */
+	} prog_bind_map;
+
 } __attribute__((aligned(8)));
 
 /* The description below is an attempt at providing documentation to eBPF
-- 
cgit v1.2.3


From 7c920da30e04c2dd78d988e0cefb8e5bd2e48b26 Mon Sep 17 00:00:00 2001
From: Jonathan Marek <jonathan@marek.ca>
Date: Tue, 8 Sep 2020 09:10:09 -0400
Subject: misc: fastrpc: fix indentation error in uapi header

Use tabs instead of spaces.

Fixes: 2419e55e532d ("misc: fastrpc: add mmap/unmap support")

Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Link: https://lore.kernel.org/r/20200908131013.19630-2-jonathan@marek.ca
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/misc/fastrpc.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h
index 07de2b7aac85..de31f0bd4779 100644
--- a/include/uapi/misc/fastrpc.h
+++ b/include/uapi/misc/fastrpc.h
@@ -10,8 +10,8 @@
 #define FASTRPC_IOCTL_INVOKE		_IOWR('R', 3, struct fastrpc_invoke)
 #define FASTRPC_IOCTL_INIT_ATTACH	_IO('R', 4)
 #define FASTRPC_IOCTL_INIT_CREATE	_IOWR('R', 5, struct fastrpc_init_create)
-#define FASTRPC_IOCTL_MMAP              _IOWR('R', 6, struct fastrpc_req_mmap)
-#define FASTRPC_IOCTL_MUNMAP            _IOWR('R', 7, struct fastrpc_req_munmap)
+#define FASTRPC_IOCTL_MMAP		_IOWR('R', 6, struct fastrpc_req_mmap)
+#define FASTRPC_IOCTL_MUNMAP		_IOWR('R', 7, struct fastrpc_req_munmap)
 
 struct fastrpc_invoke_args {
 	__u64 ptr;
-- 
cgit v1.2.3


From 6010d9befc8df899b61378adfd153f0b53075092 Mon Sep 17 00:00:00 2001
From: Jonathan Marek <jonathan@marek.ca>
Date: Tue, 8 Sep 2020 09:10:11 -0400
Subject: misc: fastrpc: add ioctl for attaching to sensors pd

Initializing sensors requires attaching to pd 2. Add an ioctl for that.

This corresponds to FASTRPC_INIT_ATTACH_SENSORS in the downstream driver.

Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Link: https://lore.kernel.org/r/20200908131013.19630-4-jonathan@marek.ca
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/fastrpc.c      | 9 ++++++---
 include/uapi/misc/fastrpc.h | 1 +
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index a7290ced73fb..994ab67bc2dc 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -1281,7 +1281,7 @@ static int fastrpc_dmabuf_alloc(struct fastrpc_user *fl, char __user *argp)
 	return 0;
 }
 
-static int fastrpc_init_attach(struct fastrpc_user *fl)
+static int fastrpc_init_attach(struct fastrpc_user *fl, int pd)
 {
 	struct fastrpc_invoke_args args[1];
 	int tgid = fl->tgid;
@@ -1292,7 +1292,7 @@ static int fastrpc_init_attach(struct fastrpc_user *fl)
 	args[0].fd = -1;
 	args[0].reserved = 0;
 	sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_ATTACH, 1, 0);
-	fl->pd = AUDIO_PD;
+	fl->pd = pd;
 
 	return fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE,
 				       sc, &args[0]);
@@ -1482,7 +1482,10 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
 		err = fastrpc_invoke(fl, argp);
 		break;
 	case FASTRPC_IOCTL_INIT_ATTACH:
-		err = fastrpc_init_attach(fl);
+		err = fastrpc_init_attach(fl, AUDIO_PD);
+		break;
+	case FASTRPC_IOCTL_INIT_ATTACH_SNS:
+		err = fastrpc_init_attach(fl, SENSORS_PD);
 		break;
 	case FASTRPC_IOCTL_INIT_CREATE:
 		err = fastrpc_init_create_process(fl, argp);
diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h
index de31f0bd4779..0a89f95463f6 100644
--- a/include/uapi/misc/fastrpc.h
+++ b/include/uapi/misc/fastrpc.h
@@ -12,6 +12,7 @@
 #define FASTRPC_IOCTL_INIT_CREATE	_IOWR('R', 5, struct fastrpc_init_create)
 #define FASTRPC_IOCTL_MMAP		_IOWR('R', 6, struct fastrpc_req_mmap)
 #define FASTRPC_IOCTL_MUNMAP		_IOWR('R', 7, struct fastrpc_req_munmap)
+#define FASTRPC_IOCTL_INIT_ATTACH_SNS	_IO('R', 8)
 
 struct fastrpc_invoke_args {
 	__u64 ptr;
-- 
cgit v1.2.3


From 4af8b3d3eb5032fe6f4a8104c48c176bf68a6946 Mon Sep 17 00:00:00 2001
From: Tingwei Zhang <tingwei@codeaurora.org>
Date: Wed, 16 Sep 2020 13:17:23 -0600
Subject: coresight: stm: Support marked packet

STP_PACKET_MARKED is not supported by STM currently.
Add STM_FLAG_MARKED to support marked packet in STM.

Signed-off-by: Tingwei Zhang <tingwei@codeaurora.org>
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Link: https://lore.kernel.org/r/20200916191737.4001561-3-mathieu.poirier@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/coresight/coresight-stm.c | 11 +++++++----
 include/uapi/linux/coresight-stm.h          |  1 +
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c
index 673d2f56ed1e..2ba819a47cf6 100644
--- a/drivers/hwtracing/coresight/coresight-stm.c
+++ b/drivers/hwtracing/coresight/coresight-stm.c
@@ -412,6 +412,7 @@ static ssize_t notrace stm_generic_packet(struct stm_data *stm_data,
 	void __iomem *ch_addr;
 	struct stm_drvdata *drvdata = container_of(stm_data,
 						   struct stm_drvdata, stm);
+	unsigned int stm_flags;
 
 	if (!(drvdata && local_read(&drvdata->mode)))
 		return -EACCES;
@@ -421,8 +422,9 @@ static ssize_t notrace stm_generic_packet(struct stm_data *stm_data,
 
 	ch_addr = stm_channel_addr(drvdata, channel);
 
-	flags = (flags == STP_PACKET_TIMESTAMPED) ? STM_FLAG_TIMESTAMPED : 0;
-	flags |= test_bit(channel, drvdata->chs.guaranteed) ?
+	stm_flags = (flags & STP_PACKET_TIMESTAMPED) ?
+			STM_FLAG_TIMESTAMPED : 0;
+	stm_flags |= test_bit(channel, drvdata->chs.guaranteed) ?
 			   STM_FLAG_GUARANTEED : 0;
 
 	if (size > drvdata->write_bytes)
@@ -432,7 +434,7 @@ static ssize_t notrace stm_generic_packet(struct stm_data *stm_data,
 
 	switch (packet) {
 	case STP_PACKET_FLAG:
-		ch_addr += stm_channel_off(STM_PKT_TYPE_FLAG, flags);
+		ch_addr += stm_channel_off(STM_PKT_TYPE_FLAG, stm_flags);
 
 		/*
 		 * The generic STM core sets a size of '0' on flag packets.
@@ -444,7 +446,8 @@ static ssize_t notrace stm_generic_packet(struct stm_data *stm_data,
 		break;
 
 	case STP_PACKET_DATA:
-		ch_addr += stm_channel_off(STM_PKT_TYPE_DATA, flags);
+		stm_flags |= (flags & STP_PACKET_MARKED) ? STM_FLAG_MARKED : 0;
+		ch_addr += stm_channel_off(STM_PKT_TYPE_DATA, stm_flags);
 		stm_send(ch_addr, payload, size,
 				drvdata->write_bytes);
 		break;
diff --git a/include/uapi/linux/coresight-stm.h b/include/uapi/linux/coresight-stm.h
index 8847dbf24151..7ff3709c01b8 100644
--- a/include/uapi/linux/coresight-stm.h
+++ b/include/uapi/linux/coresight-stm.h
@@ -5,6 +5,7 @@
 #include <linux/const.h>
 
 #define STM_FLAG_TIMESTAMPED   _BITUL(3)
+#define STM_FLAG_MARKED        _BITUL(4)
 #define STM_FLAG_GUARANTEED    _BITUL(7)
 
 /*
-- 
cgit v1.2.3


From 78a3ea5557137b0811f3c5a020afaafa7b61d6aa Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 17 Sep 2020 10:51:32 -0700
Subject: net: remove comments on struct rtnl_link_stats

We removed the misleading comments from struct rtnl_link_stats64
when we added proper kdoc. struct rtnl_link_stats has the same
inline comments, so remove them, too.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index bf4667403cab..c4b23f06f69e 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -7,24 +7,23 @@
 
 /* This struct should be in sync with struct rtnl_link_stats64 */
 struct rtnl_link_stats {
-	__u32	rx_packets;		/* total packets received	*/
-	__u32	tx_packets;		/* total packets transmitted	*/
-	__u32	rx_bytes;		/* total bytes received 	*/
-	__u32	tx_bytes;		/* total bytes transmitted	*/
-	__u32	rx_errors;		/* bad packets received		*/
-	__u32	tx_errors;		/* packet transmit problems	*/
-	__u32	rx_dropped;		/* no space in linux buffers	*/
-	__u32	tx_dropped;		/* no space available in linux	*/
-	__u32	multicast;		/* multicast packets received	*/
+	__u32	rx_packets;
+	__u32	tx_packets;
+	__u32	rx_bytes;
+	__u32	tx_bytes;
+	__u32	rx_errors;
+	__u32	tx_errors;
+	__u32	rx_dropped;
+	__u32	tx_dropped;
+	__u32	multicast;
 	__u32	collisions;
-
 	/* detailed rx_errors: */
 	__u32	rx_length_errors;
-	__u32	rx_over_errors;		/* receiver ring buff overflow	*/
-	__u32	rx_crc_errors;		/* recved pkt with crc error	*/
-	__u32	rx_frame_errors;	/* recv'd frame alignment error */
-	__u32	rx_fifo_errors;		/* recv'r fifo overrun		*/
-	__u32	rx_missed_errors;	/* receiver missed packet	*/
+	__u32	rx_over_errors;
+	__u32	rx_crc_errors;
+	__u32	rx_frame_errors;
+	__u32	rx_fifo_errors;
+	__u32	rx_missed_errors;
 
 	/* detailed tx_errors */
 	__u32	tx_aborted_errors;
@@ -37,7 +36,7 @@ struct rtnl_link_stats {
 	__u32	rx_compressed;
 	__u32	tx_compressed;
 
-	__u32	rx_nohandler;		/* dropped, no handler found	*/
+	__u32	rx_nohandler;
 };
 
 /**
-- 
cgit v1.2.3


From d65a977087f94f3bb97f351798d864556063109a Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@adapt-ip.com>
Date: Tue, 8 Sep 2020 12:03:03 -0700
Subject: nl80211: advertise supported channel width in S1G

S1G supports 5 channel widths: 1, 2, 4, 8, and 16. One
channel width is allowed per frequency in each operating
class, so it makes more sense to advertise the specific
channel width allowed.

Signed-off-by: Thomas Pedersen <thomas@adapt-ip.com>
Link: https://lore.kernel.org/r/20200908190323.15814-3-thomas@adapt-ip.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 15 +++++++++++++++
 include/uapi/linux/nl80211.h | 15 +++++++++++++++
 net/wireless/nl80211.c       | 15 +++++++++++++++
 3 files changed, 45 insertions(+)

(limited to 'include/uapi')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7ad530912b21..2a7561743717 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -96,6 +96,16 @@ struct wiphy;
  * @IEEE80211_CHAN_NO_10MHZ: 10 MHz bandwidth is not permitted
  *	on this channel.
  * @IEEE80211_CHAN_NO_HE: HE operation is not permitted on this channel.
+ * @IEEE80211_CHAN_1MHZ: 1 MHz bandwidth is permitted
+ *	on this channel.
+ * @IEEE80211_CHAN_2MHZ: 2 MHz bandwidth is permitted
+ *	on this channel.
+ * @IEEE80211_CHAN_4MHZ: 4 MHz bandwidth is permitted
+ *	on this channel.
+ * @IEEE80211_CHAN_8MHZ: 8 MHz bandwidth is permitted
+ *	on this channel.
+ * @IEEE80211_CHAN_16MHZ: 16 MHz bandwidth is permitted
+ *	on this channel.
  *
  */
 enum ieee80211_channel_flags {
@@ -113,6 +123,11 @@ enum ieee80211_channel_flags {
 	IEEE80211_CHAN_NO_20MHZ		= 1<<11,
 	IEEE80211_CHAN_NO_10MHZ		= 1<<12,
 	IEEE80211_CHAN_NO_HE		= 1<<13,
+	IEEE80211_CHAN_1MHZ		= 1<<14,
+	IEEE80211_CHAN_2MHZ		= 1<<15,
+	IEEE80211_CHAN_4MHZ		= 1<<16,
+	IEEE80211_CHAN_8MHZ		= 1<<17,
+	IEEE80211_CHAN_16MHZ		= 1<<18,
 };
 
 #define IEEE80211_CHAN_NO_HT40 \
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 0584e0d349f0..4e119c6afa31 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3737,6 +3737,16 @@ enum nl80211_wmm_rule {
  * @NL80211_FREQUENCY_ATTR_NO_HE: HE operation is not allowed on this channel
  *	in current regulatory domain.
  * @NL80211_FREQUENCY_ATTR_OFFSET: frequency offset in KHz
+ * @NL80211_FREQUENCY_ATTR_1MHZ: 1 MHz operation is allowed
+ *	on this channel in current regulatory domain.
+ * @NL80211_FREQUENCY_ATTR_2MHZ: 2 MHz operation is allowed
+ *	on this channel in current regulatory domain.
+ * @NL80211_FREQUENCY_ATTR_4MHZ: 4 MHz operation is allowed
+ *	on this channel in current regulatory domain.
+ * @NL80211_FREQUENCY_ATTR_8MHZ: 8 MHz operation is allowed
+ *	on this channel in current regulatory domain.
+ * @NL80211_FREQUENCY_ATTR_16MHZ: 16 MHz operation is allowed
+ *	on this channel in current regulatory domain.
  * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number
  *	currently defined
  * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use
@@ -3768,6 +3778,11 @@ enum nl80211_frequency_attr {
 	NL80211_FREQUENCY_ATTR_WMM,
 	NL80211_FREQUENCY_ATTR_NO_HE,
 	NL80211_FREQUENCY_ATTR_OFFSET,
+	NL80211_FREQUENCY_ATTR_1MHZ,
+	NL80211_FREQUENCY_ATTR_2MHZ,
+	NL80211_FREQUENCY_ATTR_4MHZ,
+	NL80211_FREQUENCY_ATTR_8MHZ,
+	NL80211_FREQUENCY_ATTR_16MHZ,
 
 	/* keep last */
 	__NL80211_FREQUENCY_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 52a35e788547..7da4d84bcc1a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1010,6 +1010,21 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy,
 		if ((chan->flags & IEEE80211_CHAN_NO_HE) &&
 		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HE))
 			goto nla_put_failure;
+		if ((chan->flags & IEEE80211_CHAN_1MHZ) &&
+		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_1MHZ))
+			goto nla_put_failure;
+		if ((chan->flags & IEEE80211_CHAN_2MHZ) &&
+		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_2MHZ))
+			goto nla_put_failure;
+		if ((chan->flags & IEEE80211_CHAN_4MHZ) &&
+		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_4MHZ))
+			goto nla_put_failure;
+		if ((chan->flags & IEEE80211_CHAN_8MHZ) &&
+		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_8MHZ))
+			goto nla_put_failure;
+		if ((chan->flags & IEEE80211_CHAN_16MHZ) &&
+		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_16MHZ))
+			goto nla_put_failure;
 	}
 
 	if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER,
-- 
cgit v1.2.3


From 291c49ded2fda1fd0d7bd6056de99fe47d2332e6 Mon Sep 17 00:00:00 2001
From: Aloka Dixit <alokad@codeaurora.org>
Date: Fri, 11 Sep 2020 00:05:29 +0000
Subject: nl80211: Add FILS discovery support

FILS discovery attribute, NL80211_ATTR_FILS_DISCOVERY, is nested which
supports following parameters as given in IEEE Std 802.11ai-2016,
Annex C.3 MIB detail:
(1) NL80211_FILS_DISCOVERY_ATTR_INT_MIN - Minimum packet interval
(2) NL80211_FILS_DISCOVERY_ATTR_INT_MAX - Maximum packet interval
(3) NL80211_FILS_DISCOVERY_ATTR_TMPL - Template data

Signed-off-by: Aloka Dixit <alokad@codeaurora.org>
Link: https://lore.kernel.org/r/20200805011838.28166-2-alokad@codeaurora.org
[fix attribute and other names, use NLA_RANGE(), use policy only once]
Link: https://lore.kernel.org/r/010101747a7b38a8-306f06b2-9061-4baf-81c1-054a42a18e22-000000@us-west-2.amazonses.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 19 +++++++++++++++++
 include/uapi/linux/nl80211.h | 44 +++++++++++++++++++++++++++++++++++++++
 net/wireless/nl80211.c       | 49 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 112 insertions(+)

(limited to 'include/uapi')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 44db9f80e495..c90700727945 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1082,6 +1082,23 @@ struct cfg80211_acl_data {
 	struct mac_address mac_addrs[];
 };
 
+/**
+ * struct cfg80211_fils_discovery - FILS discovery parameters from
+ * IEEE Std 802.11ai-2016, Annex C.3 MIB detail.
+ *
+ * @min_interval: Minimum packet interval in TUs (0 - 10000)
+ * @max_interval: Maximum packet interval in TUs (0 - 10000)
+ * @tmpl_len: Template length
+ * @tmpl: Template data for FILS discovery frame including the action
+ *	frame headers.
+ */
+struct cfg80211_fils_discovery {
+	u32 min_interval;
+	u32 max_interval;
+	size_t tmpl_len;
+	const u8 *tmpl;
+};
+
 /**
  * enum cfg80211_ap_settings_flags - AP settings flags
  *
@@ -1129,6 +1146,7 @@ enum cfg80211_ap_settings_flags {
  * @he_obss_pd: OBSS Packet Detection settings
  * @he_bss_color: BSS Color settings
  * @he_oper: HE operation IE (or %NULL if HE isn't enabled)
+ * @fils_discovery: FILS discovery transmission parameters
  */
 struct cfg80211_ap_settings {
 	struct cfg80211_chan_def chandef;
@@ -1159,6 +1177,7 @@ struct cfg80211_ap_settings {
 	u32 flags;
 	struct ieee80211_he_obss_pd he_obss_pd;
 	struct cfg80211_he_bss_color he_bss_color;
+	struct cfg80211_fils_discovery fils_discovery;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 4e119c6afa31..ad2bea3b07e3 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2513,6 +2513,10 @@ enum nl80211_commands {
  * @NL80211_ATTR_HE_6GHZ_CAPABILITY: HE 6 GHz Band Capability element (from
  *	association request when used with NL80211_CMD_NEW_STATION).
  *
+ * @NL80211_ATTR_FILS_DISCOVERY: Optional parameter to configure FILS
+ *	discovery. It is a nested attribute, see
+ *	&enum nl80211_fils_discovery_attributes.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2995,6 +2999,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_HE_6GHZ_CAPABILITY,
 
+	NL80211_ATTR_FILS_DISCOVERY,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -5867,6 +5873,9 @@ enum nl80211_feature_flags {
  * @NL80211_EXT_FEATURE_SAE_OFFLOAD_AP: Device wants to do SAE authentication
  *	in AP mode (SAE password is passed as part of the start AP command).
  *
+ * @NL80211_EXT_FEATURE_FILS_DISCOVERY: Driver/device supports FILS discovery
+ *	frames transmission
+ *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
  */
@@ -5925,6 +5934,7 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_OPERATING_CHANNEL_VALIDATION,
 	NL80211_EXT_FEATURE_4WAY_HANDSHAKE_AP_PSK,
 	NL80211_EXT_FEATURE_SAE_OFFLOAD_AP,
+	NL80211_EXT_FEATURE_FILS_DISCOVERY,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
@@ -7019,4 +7029,38 @@ enum nl80211_iftype_akm_attributes {
 	NL80211_IFTYPE_AKM_ATTR_MAX = __NL80211_IFTYPE_AKM_ATTR_LAST - 1,
 };
 
+/**
+ * enum nl80211_fils_discovery_attributes - FILS discovery configuration
+ * from IEEE Std 802.11ai-2016, Annex C.3 MIB detail.
+ *
+ * @__NL80211_FILS_DISCOVERY_ATTR_INVALID: Invalid
+ *
+ * @NL80211_FILS_DISCOVERY_ATTR_INT_MIN: Minimum packet interval (u32, TU).
+ *	Allowed range: 0..10000 (TU = Time Unit)
+ * @NL80211_FILS_DISCOVERY_ATTR_INT_MAX: Maximum packet interval (u32, TU).
+ *	Allowed range: 0..10000 (TU = Time Unit)
+ * @NL80211_FILS_DISCOVERY_ATTR_TMPL: Template data for FILS discovery action
+ *	frame including the headers.
+ *
+ * @__NL80211_FILS_DISCOVERY_ATTR_LAST: Internal
+ * @NL80211_FILS_DISCOVERY_ATTR_MAX: highest attribute
+ */
+enum nl80211_fils_discovery_attributes {
+	__NL80211_FILS_DISCOVERY_ATTR_INVALID,
+
+	NL80211_FILS_DISCOVERY_ATTR_INT_MIN,
+	NL80211_FILS_DISCOVERY_ATTR_INT_MAX,
+	NL80211_FILS_DISCOVERY_ATTR_TMPL,
+
+	/* keep last */
+	__NL80211_FILS_DISCOVERY_ATTR_LAST,
+	NL80211_FILS_DISCOVERY_ATTR_MAX = __NL80211_FILS_DISCOVERY_ATTR_LAST - 1
+};
+
+/*
+ * FILS discovery template minimum length with action frame headers and
+ * mandatory fields.
+ */
+#define NL80211_FILS_DISCOVERY_TMPL_MIN_LEN 42
+
 #endif /* __LINUX_NL80211_H */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 5d9d51cfc653..afe782887ca9 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -376,6 +376,15 @@ nl80211_tid_config_attr_policy[NL80211_TID_CONFIG_ATTR_MAX + 1] = {
 			NLA_POLICY_NESTED(nl80211_txattr_policy),
 };
 
+static const struct nla_policy
+nl80211_fils_discovery_policy[NL80211_FILS_DISCOVERY_ATTR_MAX + 1] = {
+	[NL80211_FILS_DISCOVERY_ATTR_INT_MIN] = NLA_POLICY_MAX(NLA_U32, 10000),
+	[NL80211_FILS_DISCOVERY_ATTR_INT_MAX] = NLA_POLICY_MAX(NLA_U32, 10000),
+	NLA_POLICY_RANGE(NLA_BINARY,
+			 NL80211_FILS_DISCOVERY_TMPL_MIN_LEN,
+			 IEEE80211_MAX_DATA_LEN),
+};
+
 static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD },
 	[NL80211_ATTR_WIPHY] = { .type = NLA_U32 },
@@ -684,6 +693,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_SCAN_FREQ_KHZ] = { .type = NLA_NESTED },
 	[NL80211_ATTR_HE_6GHZ_CAPABILITY] =
 		NLA_POLICY_EXACT_LEN(sizeof(struct ieee80211_he_6ghz_capa)),
+	[NL80211_ATTR_FILS_DISCOVERY] =
+		NLA_POLICY_NESTED(nl80211_fils_discovery_policy),
 };
 
 /* policy for the key attributes */
@@ -4874,6 +4885,36 @@ static int nl80211_parse_he_bss_color(struct nlattr *attrs,
 	return 0;
 }
 
+static int nl80211_parse_fils_discovery(struct cfg80211_registered_device *rdev,
+					struct nlattr *attrs,
+					struct cfg80211_ap_settings *params)
+{
+	struct nlattr *tb[NL80211_FILS_DISCOVERY_ATTR_MAX + 1];
+	int ret;
+	struct cfg80211_fils_discovery *fd = &params->fils_discovery;
+
+	if (!wiphy_ext_feature_isset(&rdev->wiphy,
+				     NL80211_EXT_FEATURE_FILS_DISCOVERY))
+		return -EINVAL;
+
+	ret = nla_parse_nested(tb, NL80211_FILS_DISCOVERY_ATTR_MAX, attrs,
+			       NULL, NULL);
+	if (ret)
+		return ret;
+
+	if (!tb[NL80211_FILS_DISCOVERY_ATTR_INT_MIN] ||
+	    !tb[NL80211_FILS_DISCOVERY_ATTR_INT_MAX] ||
+	    !tb[NL80211_FILS_DISCOVERY_ATTR_TMPL])
+		return -EINVAL;
+
+	fd->tmpl_len = nla_len(tb[NL80211_FILS_DISCOVERY_ATTR_TMPL]);
+	fd->tmpl = nla_data(tb[NL80211_FILS_DISCOVERY_ATTR_TMPL]);
+	fd->min_interval = nla_get_u32(tb[NL80211_FILS_DISCOVERY_ATTR_INT_MIN]);
+	fd->max_interval = nla_get_u32(tb[NL80211_FILS_DISCOVERY_ATTR_INT_MAX]);
+
+	return 0;
+}
+
 static void nl80211_check_ap_rate_selectors(struct cfg80211_ap_settings *params,
 					    const u8 *rates)
 {
@@ -5182,6 +5223,14 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
 			goto out;
 	}
 
+	if (info->attrs[NL80211_ATTR_FILS_DISCOVERY]) {
+		err = nl80211_parse_fils_discovery(rdev,
+						   info->attrs[NL80211_ATTR_FILS_DISCOVERY],
+						   &params);
+		if (err)
+			goto out;
+	}
+
 	nl80211_calculate_ap_params(&params);
 
 	if (info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT])
-- 
cgit v1.2.3


From 7443dcd1f1718a355e9c4ebeb7e95c3f9f27bb5f Mon Sep 17 00:00:00 2001
From: Aloka Dixit <alokad@codeaurora.org>
Date: Fri, 11 Sep 2020 00:33:00 +0000
Subject: nl80211: Unsolicited broadcast probe response support

This patch adds new attributes to support unsolicited broadcast
probe response transmission used for in-band
discovery in 6GHz band (IEEE P802.11ax/D6.0 26.17.2.3.2, AP behavior for
fast passive scanning).
The new attribute, NL80211_ATTR_UNSOL_BCAST_PROBE_RESP, is nested which
supports following parameters:
(1) NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT - Packet interval
(2) NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL - Template data

Signed-off-by: Aloka Dixit <alokad@codeaurora.org>
Link: https://lore.kernel.org/r/010101747a946698-aac263ae-2ed3-4dab-9590-0bc7131214e1-000000@us-west-2.amazonses.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 18 +++++++++++++++++
 include/uapi/linux/nl80211.h | 36 ++++++++++++++++++++++++++++++++++
 net/wireless/nl80211.c       | 46 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 100 insertions(+)

(limited to 'include/uapi')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index c90700727945..93d666a571da 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1099,6 +1099,22 @@ struct cfg80211_fils_discovery {
 	const u8 *tmpl;
 };
 
+/**
+ * struct cfg80211_unsol_bcast_probe_resp - Unsolicited broadcast probe
+ *	response parameters in 6GHz.
+ *
+ * @interval: Packet interval in TUs. Maximum allowed is 20 TU, as mentioned
+ *	in IEEE P802.11ax/D6.0 26.17.2.3.2 - AP behavior for fast passive
+ *	scanning
+ * @tmpl_len: Template length
+ * @tmpl: Template data for probe response
+ */
+struct cfg80211_unsol_bcast_probe_resp {
+	u32 interval;
+	size_t tmpl_len;
+	const u8 *tmpl;
+};
+
 /**
  * enum cfg80211_ap_settings_flags - AP settings flags
  *
@@ -1147,6 +1163,7 @@ enum cfg80211_ap_settings_flags {
  * @he_bss_color: BSS Color settings
  * @he_oper: HE operation IE (or %NULL if HE isn't enabled)
  * @fils_discovery: FILS discovery transmission parameters
+ * @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters
  */
 struct cfg80211_ap_settings {
 	struct cfg80211_chan_def chandef;
@@ -1178,6 +1195,7 @@ struct cfg80211_ap_settings {
 	struct ieee80211_he_obss_pd he_obss_pd;
 	struct cfg80211_he_bss_color he_bss_color;
 	struct cfg80211_fils_discovery fils_discovery;
+	struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index ad2bea3b07e3..bdc90b8dfd24 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2517,6 +2517,10 @@ enum nl80211_commands {
  *	discovery. It is a nested attribute, see
  *	&enum nl80211_fils_discovery_attributes.
  *
+ * @NL80211_ATTR_UNSOL_BCAST_PROBE_RESP: Optional parameter to configure
+ *	unsolicited broadcast probe response. It is a nested attribute, see
+ *	&enum nl80211_unsol_bcast_probe_resp_attributes.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -3001,6 +3005,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_FILS_DISCOVERY,
 
+	NL80211_ATTR_UNSOL_BCAST_PROBE_RESP,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -5876,6 +5882,9 @@ enum nl80211_feature_flags {
  * @NL80211_EXT_FEATURE_FILS_DISCOVERY: Driver/device supports FILS discovery
  *	frames transmission
  *
+ * @NL80211_EXT_FEATURE_UNSOL_BCAST_PROBE_RESP: Driver/device supports
+ *	unsolicited broadcast probe response transmission
+ *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
  */
@@ -5935,6 +5944,7 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_4WAY_HANDSHAKE_AP_PSK,
 	NL80211_EXT_FEATURE_SAE_OFFLOAD_AP,
 	NL80211_EXT_FEATURE_FILS_DISCOVERY,
+	NL80211_EXT_FEATURE_UNSOL_BCAST_PROBE_RESP,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
@@ -7063,4 +7073,30 @@ enum nl80211_fils_discovery_attributes {
  */
 #define NL80211_FILS_DISCOVERY_TMPL_MIN_LEN 42
 
+/**
+ * enum nl80211_unsol_bcast_probe_resp_attributes - Unsolicited broadcast probe
+ *	response configuration. Applicable only in 6GHz.
+ *
+ * @__NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INVALID: Invalid
+ *
+ * @NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT: Maximum packet interval (u32, TU).
+ *	Allowed range: 0..20 (TU = Time Unit). IEEE P802.11ax/D6.0
+ *	26.17.2.3.2 (AP behavior for fast passive scanning).
+ * @NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL: Unsolicited broadcast probe response
+ *	frame template (binary).
+ *
+ * @__NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_LAST: Internal
+ * @NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_MAX: highest attribute
+ */
+enum nl80211_unsol_bcast_probe_resp_attributes {
+	__NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INVALID,
+
+	NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT,
+	NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL,
+
+	/* keep last */
+	__NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_LAST,
+	NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_MAX =
+		__NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_LAST - 1
+};
 #endif /* __LINUX_NL80211_H */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index afe782887ca9..1a212db7a300 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -385,6 +385,13 @@ nl80211_fils_discovery_policy[NL80211_FILS_DISCOVERY_ATTR_MAX + 1] = {
 			 IEEE80211_MAX_DATA_LEN),
 };
 
+static const struct nla_policy
+nl80211_unsol_bcast_probe_resp_policy[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_MAX + 1] = {
+	[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT] = NLA_POLICY_MAX(NLA_U32, 20),
+	[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL] = { .type = NLA_BINARY,
+						       .len = IEEE80211_MAX_DATA_LEN }
+};
+
 static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD },
 	[NL80211_ATTR_WIPHY] = { .type = NLA_U32 },
@@ -695,6 +702,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 		NLA_POLICY_EXACT_LEN(sizeof(struct ieee80211_he_6ghz_capa)),
 	[NL80211_ATTR_FILS_DISCOVERY] =
 		NLA_POLICY_NESTED(nl80211_fils_discovery_policy),
+	[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP] =
+		NLA_POLICY_NESTED(nl80211_unsol_bcast_probe_resp_policy),
 };
 
 /* policy for the key attributes */
@@ -4915,6 +4924,35 @@ static int nl80211_parse_fils_discovery(struct cfg80211_registered_device *rdev,
 	return 0;
 }
 
+static int
+nl80211_parse_unsol_bcast_probe_resp(struct cfg80211_registered_device *rdev,
+				     struct nlattr *attrs,
+				     struct cfg80211_ap_settings *params)
+{
+	struct nlattr *tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_MAX + 1];
+	int ret;
+	struct cfg80211_unsol_bcast_probe_resp *presp =
+					&params->unsol_bcast_probe_resp;
+
+	if (!wiphy_ext_feature_isset(&rdev->wiphy,
+				     NL80211_EXT_FEATURE_UNSOL_BCAST_PROBE_RESP))
+		return -EINVAL;
+
+	ret = nla_parse_nested(tb, NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_MAX,
+			       attrs, NULL, NULL);
+	if (ret)
+		return ret;
+
+	if (!tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT] ||
+	    !tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL])
+		return -EINVAL;
+
+	presp->tmpl = nla_data(tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL]);
+	presp->tmpl_len = nla_len(tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL]);
+	presp->interval = nla_get_u32(tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT]);
+	return 0;
+}
+
 static void nl80211_check_ap_rate_selectors(struct cfg80211_ap_settings *params,
 					    const u8 *rates)
 {
@@ -5231,6 +5269,14 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
 			goto out;
 	}
 
+	if (info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP]) {
+		err = nl80211_parse_unsol_bcast_probe_resp(
+			rdev, info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP],
+			&params);
+		if (err)
+			return err;
+	}
+
 	nl80211_calculate_ap_params(&params);
 
 	if (info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT])
-- 
cgit v1.2.3


From c6ff213fe5b8696c9539a1b34ff03de9306dfff9 Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Tue, 8 Sep 2020 18:01:48 +0200
Subject: fuse: add submount support to <uapi/linux/fuse.h>

- Add fuse_attr.flags

- Add FUSE_ATTR_SUBMOUNT

  This is a flag for fuse_attr.flags that indicates that the given entry
  resides on a different filesystem than the parent, and as such should
  have a different st_dev.

- Add FUSE_SUBMOUNTS

  The client sets this flag if it supports automounting directories.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/uapi/linux/fuse.h | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 8899e4862309..7233502ea991 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -172,6 +172,9 @@
  *  - add FUSE_WRITE_KILL_PRIV flag
  *  - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING
  *  - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag
+ *
+ *  7.32
+ *  - add flags to fuse_attr, add FUSE_ATTR_SUBMOUNT, add FUSE_SUBMOUNTS
  */
 
 #ifndef _LINUX_FUSE_H
@@ -207,7 +210,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 31
+#define FUSE_KERNEL_MINOR_VERSION 32
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -231,7 +234,7 @@ struct fuse_attr {
 	uint32_t	gid;
 	uint32_t	rdev;
 	uint32_t	blksize;
-	uint32_t	padding;
+	uint32_t	flags;
 };
 
 struct fuse_kstatfs {
@@ -316,6 +319,7 @@ struct fuse_file_lock {
  * FUSE_MAP_ALIGNMENT: init_out.map_alignment contains log2(byte alignment) for
  *		       foffset and moffset fields in struct
  *		       fuse_setupmapping_out and fuse_removemapping_one.
+ * FUSE_SUBMOUNTS: kernel supports auto-mounting directory submounts
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -344,6 +348,7 @@ struct fuse_file_lock {
 #define FUSE_NO_OPENDIR_SUPPORT (1 << 24)
 #define FUSE_EXPLICIT_INVAL_DATA (1 << 25)
 #define FUSE_MAP_ALIGNMENT	(1 << 26)
+#define FUSE_SUBMOUNTS		(1 << 27)
 
 /**
  * CUSE INIT request/reply flags
@@ -419,6 +424,13 @@ struct fuse_file_lock {
  */
 #define FUSE_FSYNC_FDATASYNC	(1 << 0)
 
+/**
+ * fuse_attr flags
+ *
+ * FUSE_ATTR_SUBMOUNT: Object is a submount root
+ */
+#define FUSE_ATTR_SUBMOUNT      (1 << 0)
+
 enum fuse_opcode {
 	FUSE_LOOKUP		= 1,
 	FUSE_FORGET		= 2,  /* no reply */
-- 
cgit v1.2.3


From f92970c694b36a4dbac2b650b173c78c0f0954cc Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Thu, 17 Sep 2020 18:13:23 -0700
Subject: devlink: add timeout information to status_notify

Add a timeout element to the DEVLINK_CMD_FLASH_UPDATE_STATUS
netlink message for use by a userland utility to show that
a particular firmware flash activity may take a long but
bounded time to finish.  Also add a handy helper for drivers
to make use of the new timeout value.

UI usage hints:
 - if non-zero, add timeout display to the end of the status line
 	[component] status_msg  ( Xm Ys : Am Bs )
     using the timeout value for Am Bs and updating the Xm Ys
     every second
 - if the timeout expires while awaiting the next update,
   display something like
 	[component] status_msg  ( timeout reached : Am Bs )
 - if new status notify messages are received, remove
   the timeout and start over

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h        |  4 ++++
 include/uapi/linux/devlink.h |  3 +++
 net/core/devlink.c           | 29 +++++++++++++++++++++++------
 3 files changed, 30 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 48b1c1ef1ebd..be132c17fbcc 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1403,6 +1403,10 @@ void devlink_flash_update_status_notify(struct devlink *devlink,
 					const char *component,
 					unsigned long done,
 					unsigned long total);
+void devlink_flash_update_timeout_notify(struct devlink *devlink,
+					 const char *status_msg,
+					 const char *component,
+					 unsigned long timeout);
 
 int devlink_traps_register(struct devlink *devlink,
 			   const struct devlink_trap *traps,
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 631f5bdf1707..a2ecc8b00611 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -462,6 +462,9 @@ enum devlink_attr {
 
 	DEVLINK_ATTR_PORT_EXTERNAL,		/* u8 */
 	DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,	/* u32 */
+
+	DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT,	/* u64 */
+
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index e5b71f3c2d4d..a32e15851119 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -3024,7 +3024,9 @@ static int devlink_nl_flash_update_fill(struct sk_buff *msg,
 					enum devlink_command cmd,
 					const char *status_msg,
 					const char *component,
-					unsigned long done, unsigned long total)
+					unsigned long done,
+					unsigned long total,
+					unsigned long timeout)
 {
 	void *hdr;
 
@@ -3052,6 +3054,9 @@ static int devlink_nl_flash_update_fill(struct sk_buff *msg,
 	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL,
 			      total, DEVLINK_ATTR_PAD))
 		goto nla_put_failure;
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT,
+			      timeout, DEVLINK_ATTR_PAD))
+		goto nla_put_failure;
 
 out:
 	genlmsg_end(msg, hdr);
@@ -3067,7 +3072,8 @@ static void __devlink_flash_update_notify(struct devlink *devlink,
 					  const char *status_msg,
 					  const char *component,
 					  unsigned long done,
-					  unsigned long total)
+					  unsigned long total,
+					  unsigned long timeout)
 {
 	struct sk_buff *msg;
 	int err;
@@ -3081,7 +3087,7 @@ static void __devlink_flash_update_notify(struct devlink *devlink,
 		return;
 
 	err = devlink_nl_flash_update_fill(msg, devlink, cmd, status_msg,
-					   component, done, total);
+					   component, done, total, timeout);
 	if (err)
 		goto out_free_msg;
 
@@ -3097,7 +3103,7 @@ void devlink_flash_update_begin_notify(struct devlink *devlink)
 {
 	__devlink_flash_update_notify(devlink,
 				      DEVLINK_CMD_FLASH_UPDATE,
-				      NULL, NULL, 0, 0);
+				      NULL, NULL, 0, 0, 0);
 }
 EXPORT_SYMBOL_GPL(devlink_flash_update_begin_notify);
 
@@ -3105,7 +3111,7 @@ void devlink_flash_update_end_notify(struct devlink *devlink)
 {
 	__devlink_flash_update_notify(devlink,
 				      DEVLINK_CMD_FLASH_UPDATE_END,
-				      NULL, NULL, 0, 0);
+				      NULL, NULL, 0, 0, 0);
 }
 EXPORT_SYMBOL_GPL(devlink_flash_update_end_notify);
 
@@ -3117,10 +3123,21 @@ void devlink_flash_update_status_notify(struct devlink *devlink,
 {
 	__devlink_flash_update_notify(devlink,
 				      DEVLINK_CMD_FLASH_UPDATE_STATUS,
-				      status_msg, component, done, total);
+				      status_msg, component, done, total, 0);
 }
 EXPORT_SYMBOL_GPL(devlink_flash_update_status_notify);
 
+void devlink_flash_update_timeout_notify(struct devlink *devlink,
+					 const char *status_msg,
+					 const char *component,
+					 unsigned long timeout)
+{
+	__devlink_flash_update_notify(devlink,
+				      DEVLINK_CMD_FLASH_UPDATE_STATUS,
+				      status_msg, component, 0, 0, timeout);
+}
+EXPORT_SYMBOL_GPL(devlink_flash_update_timeout_notify);
+
 static int devlink_nl_cmd_flash_update(struct sk_buff *skb,
 				       struct genl_info *info)
 {
-- 
cgit v1.2.3


From daef1ee3798b25e8464b8eb618eaa74b8f423ac7 Mon Sep 17 00:00:00 2001
From: Tuong Lien <tuong.t.lien@dektech.com.au>
Date: Fri, 18 Sep 2020 08:17:27 +0700
Subject: tipc: introduce encryption master key

In addition to the supported cluster & per-node encryption keys for the
en/decryption of TIPC messages, we now introduce one option for user to
set a cluster key as 'master key', which is simply a symmetric key like
the former but has a longer life cycle. It has two purposes:

- Authentication of new member nodes in the cluster. New nodes, having
  no knowledge of current session keys in the cluster will still be
  able to join the cluster as long as they know the master key. This is
  because all neighbor discovery (LINK_CONFIG) messages must be
  encrypted with this key.

- Encryption of session encryption keys during automatic exchange and
  update of those.This is a feature we will introduce in a later commit
  in this series.

We insert the new key into the currently unused slot 0 in the key array
and start using it immediately once the user has set it.
After joining, a node only knowing the master key should be fully
communicable to existing nodes in the cluster, although those nodes may
have their own session keys activated (i.e. not the master one). To
support this, we define a 'grace period', starting from the time a node
itself reports having no RX keys, so the existing nodes will use the
master key for encryption instead. The grace period can be extended but
will automatically stop after e.g. 5 seconds without a new report. This
is also the basis for later key exchanging feature as the new node will
be impossible to decrypt anything without the support from master key.

For user to set a master key, we define a new netlink flag -
'TIPC_NLA_NODE_KEY_MASTER', so it can be added to the current 'set key'
netlink command to specify the setting key to be a master key.

Above all, the traditional cluster/per-node key mechanism is guaranteed
to work when user comes not to use this master key option. This is also
compatible to legacy nodes without the feature supported.

Even this master key can be updated without any interruption of cluster
connectivity but is so is needed, this has to be coordinated and set by
the user.

Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_netlink.h |   1 +
 net/tipc/crypto.c                 | 210 ++++++++++++++++++++++++++++----------
 net/tipc/crypto.h                 |  15 ++-
 net/tipc/msg.h                    |   4 +-
 net/tipc/netlink.c                |   1 +
 net/tipc/node.c                   |   6 +-
 6 files changed, 175 insertions(+), 62 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index dc0d23a50e69..d484baa9d365 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -165,6 +165,7 @@ enum {
 	TIPC_NLA_NODE_UP,		/* flag */
 	TIPC_NLA_NODE_ID,		/* data */
 	TIPC_NLA_NODE_KEY,		/* data */
+	TIPC_NLA_NODE_KEY_MASTER,	/* flag */
 
 	__TIPC_NLA_NODE_MAX,
 	TIPC_NLA_NODE_MAX = __TIPC_NLA_NODE_MAX - 1
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index 45a8f4d9d9de..2510b82d3cc1 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -38,6 +38,7 @@
 #include <crypto/aes.h>
 #include "crypto.h"
 
+#define TIPC_TX_GRACE_PERIOD	msecs_to_jiffies(5000) /* 5s */
 #define TIPC_TX_LASTING_TIME	msecs_to_jiffies(10000) /* 10s */
 #define TIPC_RX_ACTIVE_LIM	msecs_to_jiffies(3000) /* 3s */
 #define TIPC_RX_PASSIVE_LIM	msecs_to_jiffies(15000) /* 15s */
@@ -49,9 +50,9 @@
  * TIPC Key ids
  */
 enum {
-	KEY_UNUSED = 0,
-	KEY_MIN,
-	KEY_1 = KEY_MIN,
+	KEY_MASTER = 0,
+	KEY_MIN = KEY_MASTER,
+	KEY_1 = 1,
 	KEY_2,
 	KEY_3,
 	KEY_MAX = KEY_3,
@@ -166,27 +167,36 @@ struct tipc_crypto_stats {
  * @aead: array of pointers to AEAD keys for encryption/decryption
  * @peer_rx_active: replicated peer RX active key index
  * @key: the key states
- * @working: the crypto is working or not
  * @stats: the crypto statistics
  * @name: the crypto name
  * @sndnxt: the per-peer sndnxt (TX)
  * @timer1: general timer 1 (jiffies)
  * @timer2: general timer 2 (jiffies)
+ * @working: the crypto is working or not
+ * @key_master: flag indicates if master key exists
+ * @legacy_user: flag indicates if a peer joins w/o master key (for bwd comp.)
  * @lock: tipc_key lock
  */
 struct tipc_crypto {
 	struct net *net;
 	struct tipc_node *node;
-	struct tipc_aead __rcu *aead[KEY_MAX + 1]; /* key[0] is UNUSED */
+	struct tipc_aead __rcu *aead[KEY_MAX + 1];
 	atomic_t peer_rx_active;
 	struct tipc_key key;
-	u8 working:1;
 	struct tipc_crypto_stats __percpu *stats;
 	char name[48];
 
 	atomic64_t sndnxt ____cacheline_aligned;
 	unsigned long timer1;
 	unsigned long timer2;
+	union {
+		struct {
+			u8 working:1;
+			u8 key_master:1;
+			u8 legacy_user:1;
+		};
+		u8 flags;
+	};
 	spinlock_t lock; /* crypto lock */
 
 } ____cacheline_aligned;
@@ -236,13 +246,19 @@ static inline void tipc_crypto_key_set_state(struct tipc_crypto *c,
 					     u8 new_active,
 					     u8 new_pending);
 static int tipc_crypto_key_attach(struct tipc_crypto *c,
-				  struct tipc_aead *aead, u8 pos);
+				  struct tipc_aead *aead, u8 pos,
+				  bool master_key);
 static bool tipc_crypto_key_try_align(struct tipc_crypto *rx, u8 new_pending);
 static struct tipc_aead *tipc_crypto_key_pick_tx(struct tipc_crypto *tx,
 						 struct tipc_crypto *rx,
-						 struct sk_buff *skb);
+						 struct sk_buff *skb,
+						 u8 tx_key);
 static void tipc_crypto_key_synch(struct tipc_crypto *rx, struct sk_buff *skb);
 static int tipc_crypto_key_revoke(struct net *net, u8 tx_key);
+static inline void tipc_crypto_clone_msg(struct net *net, struct sk_buff *_skb,
+					 struct tipc_bearer *b,
+					 struct tipc_media_addr *dst,
+					 struct tipc_node *__dnode, u8 type);
 static void tipc_crypto_rcv_complete(struct net *net, struct tipc_aead *aead,
 				     struct tipc_bearer *b,
 				     struct sk_buff **skb, int err);
@@ -943,8 +959,6 @@ bool tipc_ehdr_validate(struct sk_buff *skb)
 		return false;
 	if (unlikely(skb->len <= ehsz + TIPC_AES_GCM_TAG_SIZE))
 		return false;
-	if (unlikely(!ehdr->tx_key))
-		return false;
 
 	return true;
 }
@@ -997,6 +1011,8 @@ static int tipc_ehdr_build(struct net *net, struct tipc_aead *aead,
 	ehdr->tx_key = tx_key;
 	ehdr->destined = (__rx) ? 1 : 0;
 	ehdr->rx_key_active = (__rx) ? __rx->key.active : 0;
+	ehdr->rx_nokey = (__rx) ? !__rx->key.keys : 0;
+	ehdr->master_key = aead->crypto->key_master;
 	ehdr->reserved_1 = 0;
 	ehdr->reserved_2 = 0;
 
@@ -1039,6 +1055,7 @@ static inline void tipc_crypto_key_set_state(struct tipc_crypto *c,
  * @c: TIPC crypto to which new key is attached
  * @ukey: the user key
  * @mode: the key mode (CLUSTER_KEY or PER_NODE_KEY)
+ * @master_key: specify this is a cluster master key
  *
  * A new TIPC AEAD key will be allocated and initiated with the specified user
  * key, then attached to the TIPC crypto.
@@ -1046,7 +1063,7 @@ static inline void tipc_crypto_key_set_state(struct tipc_crypto *c,
  * Return: new key id in case of success, otherwise: < 0
  */
 int tipc_crypto_key_init(struct tipc_crypto *c, struct tipc_aead_key *ukey,
-			 u8 mode)
+			 u8 mode, bool master_key)
 {
 	struct tipc_aead *aead = NULL;
 	int rc = 0;
@@ -1056,7 +1073,7 @@ int tipc_crypto_key_init(struct tipc_crypto *c, struct tipc_aead_key *ukey,
 
 	/* Attach it to the crypto */
 	if (likely(!rc)) {
-		rc = tipc_crypto_key_attach(c, aead, 0);
+		rc = tipc_crypto_key_attach(c, aead, 0, master_key);
 		if (rc < 0)
 			tipc_aead_free(&aead->rcu);
 	}
@@ -1069,11 +1086,13 @@ int tipc_crypto_key_init(struct tipc_crypto *c, struct tipc_aead_key *ukey,
  * @c: TIPC crypto to which the new AEAD key is attached
  * @aead: the new AEAD key pointer
  * @pos: desired slot in the crypto key array, = 0 if any!
+ * @master_key: specify this is a cluster master key
  *
  * Return: new key id in case of success, otherwise: -EBUSY
  */
 static int tipc_crypto_key_attach(struct tipc_crypto *c,
-				  struct tipc_aead *aead, u8 pos)
+				  struct tipc_aead *aead, u8 pos,
+				  bool master_key)
 {
 	struct tipc_key key;
 	int rc = -EBUSY;
@@ -1081,6 +1100,10 @@ static int tipc_crypto_key_attach(struct tipc_crypto *c,
 
 	spin_lock_bh(&c->lock);
 	key = c->key;
+	if (master_key) {
+		new_key = KEY_MASTER;
+		goto attach;
+	}
 	if (key.active && key.passive)
 		goto exit;
 	if (key.pending) {
@@ -1112,8 +1135,7 @@ attach:
 		tipc_crypto_key_set_state(c, key.passive, key.active,
 					  key.pending);
 	c->working = 1;
-	c->timer1 = jiffies;
-	c->timer2 = jiffies;
+	c->key_master |= master_key;
 	rc = new_key;
 
 exit:
@@ -1126,7 +1148,7 @@ void tipc_crypto_key_flush(struct tipc_crypto *c)
 	int k;
 
 	spin_lock_bh(&c->lock);
-	c->working = 0;
+	c->flags = 0;
 	tipc_crypto_key_set_state(c, 0, 0, 0);
 	for (k = KEY_MIN; k <= KEY_MAX; k++)
 		tipc_crypto_key_detach(c->aead[k], &c->lock);
@@ -1202,6 +1224,7 @@ exit:
  * @tx: TX crypto handle
  * @rx: RX crypto handle (can be NULL)
  * @skb: the message skb which will be decrypted later
+ * @tx_key: peer TX key id
  *
  * This function looks up the existing TX keys and pick one which is suitable
  * for the message decryption, that must be a cluster key and not used before
@@ -1211,7 +1234,8 @@ exit:
  */
 static struct tipc_aead *tipc_crypto_key_pick_tx(struct tipc_crypto *tx,
 						 struct tipc_crypto *rx,
-						 struct sk_buff *skb)
+						 struct sk_buff *skb,
+						 u8 tx_key)
 {
 	struct tipc_skb_cb *skb_cb = TIPC_SKB_CB(skb);
 	struct tipc_aead *aead = NULL;
@@ -1230,6 +1254,10 @@ static struct tipc_aead *tipc_crypto_key_pick_tx(struct tipc_crypto *tx,
 
 	/* Pick one TX key */
 	spin_lock(&tx->lock);
+	if (tx_key == KEY_MASTER) {
+		aead = tipc_aead_rcu_ptr(tx->aead[KEY_MASTER], &tx->lock);
+		goto done;
+	}
 	do {
 		k = (i == 0) ? key.pending :
 			((i == 1) ? key.active : key.passive);
@@ -1249,9 +1277,12 @@ static struct tipc_aead *tipc_crypto_key_pick_tx(struct tipc_crypto *tx,
 		skb->next = skb_clone(skb, GFP_ATOMIC);
 		if (unlikely(!skb->next))
 			pr_warn("Failed to clone skb for next round if any\n");
-		WARN_ON(!refcount_inc_not_zero(&aead->refcnt));
 		break;
 	} while (++i < 3);
+
+done:
+	if (likely(aead))
+		WARN_ON(!refcount_inc_not_zero(&aead->refcnt));
 	spin_unlock(&tx->lock);
 
 	return aead;
@@ -1266,6 +1297,9 @@ static struct tipc_aead *tipc_crypto_key_pick_tx(struct tipc_crypto *tx,
  * has changed, so the number of TX keys' users on this node are increased and
  * decreased correspondingly.
  *
+ * It also considers if peer has no key, then we need to make own master key
+ * (if any) taking over i.e. starting grace period.
+ *
  * The "per-peer" sndnxt is also reset when the peer key has switched.
  */
 static void tipc_crypto_key_synch(struct tipc_crypto *rx, struct sk_buff *skb)
@@ -1276,11 +1310,23 @@ static void tipc_crypto_key_synch(struct tipc_crypto *rx, struct sk_buff *skb)
 	u32 self = tipc_own_addr(rx->net);
 	u8 cur, new;
 
-	/* Ensure this message is destined to us first */
+	/* Update RX 'key_master' flag according to peer, also mark "legacy" if
+	 * a peer has no master key.
+	 */
+	rx->key_master = ehdr->master_key;
+	if (!rx->key_master)
+		tx->legacy_user = 1;
+
+	/* For later cases, apply only if message is destined to this node */
 	if (!ehdr->destined || msg_short(hdr) || msg_destnode(hdr) != self)
 		return;
 
-	/* Peer RX active key has changed, let's update own TX users */
+	/* Case 1: Peer has no keys, let's make master key take over */
+	if (ehdr->rx_nokey)
+		/* Set or extend grace period */
+		tx->timer2 = jiffies;
+
+	/* Case 2: Peer RX active key has changed, let's update own TX users */
 	cur = atomic_read(&rx->peer_rx_active);
 	new = ehdr->rx_key_active;
 	if (tx->key.keys &&
@@ -1338,7 +1384,7 @@ int tipc_crypto_start(struct tipc_crypto **crypto, struct net *net,
 		return -ENOMEM;
 	}
 
-	c->working = 0;
+	c->flags = 0;
 	c->net = net;
 	c->node = node;
 	tipc_crypto_key_set_state(c, 0, 0, 0);
@@ -1473,6 +1519,12 @@ s4:
 s5:
 	spin_unlock(&rx->lock);
 
+	/* Relax it here, the flag will be set again if it really is, but only
+	 * when we are not in grace period for safety!
+	 */
+	if (time_after(jiffies, tx->timer2 + TIPC_TX_GRACE_PERIOD))
+		tx->legacy_user = 0;
+
 	/* Limit max_tfms & do debug commands if needed */
 	if (likely(sysctl_tipc_max_tfms <= TIPC_MAX_TFMS_LIM))
 		return;
@@ -1482,6 +1534,22 @@ s5:
 	tipc_crypto_do_cmd(rx->net, cmd);
 }
 
+static inline void tipc_crypto_clone_msg(struct net *net, struct sk_buff *_skb,
+					 struct tipc_bearer *b,
+					 struct tipc_media_addr *dst,
+					 struct tipc_node *__dnode, u8 type)
+{
+	struct sk_buff *skb;
+
+	skb = skb_clone(_skb, GFP_ATOMIC);
+	if (skb) {
+		TIPC_SKB_CB(skb)->xmit_type = type;
+		tipc_crypto_xmit(net, &skb, b, dst, __dnode);
+		if (skb)
+			b->media->send_msg(net, skb, b, dst);
+	}
+}
+
 /**
  * tipc_crypto_xmit - Build & encrypt TIPC message for xmit
  * @net: struct net
@@ -1491,7 +1559,8 @@ s5:
  * @__dnode: destination node for reference if any
  *
  * First, build an encryption message header on the top of the message, then
- * encrypt the original TIPC message by using the active or pending TX key.
+ * encrypt the original TIPC message by using the pending, master or active
+ * key with this preference order.
  * If the encryption is successful, the encrypted skb is returned directly or
  * via the callback.
  * Otherwise, the skb is freed!
@@ -1514,46 +1583,63 @@ int tipc_crypto_xmit(struct net *net, struct sk_buff **skb,
 	struct tipc_msg *hdr = buf_msg(*skb);
 	struct tipc_key key = tx->key;
 	struct tipc_aead *aead = NULL;
-	struct sk_buff *_skb;
-	int rc = -ENOKEY;
 	u32 user = msg_user(hdr);
-	u8 tx_key;
+	u32 type = msg_type(hdr);
+	int rc = -ENOKEY;
+	u8 tx_key = 0;
 
 	/* No encryption? */
 	if (!tx->working)
 		return 0;
 
-	/* Try with the pending key if available and:
-	 * 1) This is the only choice (i.e. no active key) or;
-	 * 2) Peer has switched to this key (unicast only) or;
-	 * 3) It is time to do a pending key probe;
-	 */
+	/* Pending key if peer has active on it or probing time */
 	if (unlikely(key.pending)) {
 		tx_key = key.pending;
-		if (!key.active)
+		if (!tx->key_master && !key.active)
 			goto encrypt;
 		if (__rx && atomic_read(&__rx->peer_rx_active) == tx_key)
 			goto encrypt;
-		if (TIPC_SKB_CB(*skb)->probe) {
+		if (TIPC_SKB_CB(*skb)->xmit_type == SKB_PROBING) {
 			pr_debug("%s: probing for key[%d]\n", tx->name,
 				 key.pending);
 			goto encrypt;
 		}
-		if (user == LINK_CONFIG || user == LINK_PROTOCOL) {
-			_skb = skb_clone(*skb, GFP_ATOMIC);
-			if (_skb) {
-				TIPC_SKB_CB(_skb)->probe = 1;
-				tipc_crypto_xmit(net, &_skb, b, dst, __dnode);
-				if (_skb)
-					b->media->send_msg(net, _skb, b, dst);
+		if (user == LINK_CONFIG || user == LINK_PROTOCOL)
+			tipc_crypto_clone_msg(net, *skb, b, dst, __dnode,
+					      SKB_PROBING);
+	}
+
+	/* Master key if this is a *vital* message or in grace period */
+	if (tx->key_master) {
+		tx_key = KEY_MASTER;
+		if (!key.active)
+			goto encrypt;
+		if (TIPC_SKB_CB(*skb)->xmit_type == SKB_GRACING) {
+			pr_debug("%s: gracing for msg (%d %d)\n", tx->name,
+				 user, type);
+			goto encrypt;
+		}
+		if (user == LINK_CONFIG ||
+		    (user == LINK_PROTOCOL && type == RESET_MSG) ||
+		    time_before(jiffies, tx->timer2 + TIPC_TX_GRACE_PERIOD)) {
+			if (__rx && __rx->key_master &&
+			    !atomic_read(&__rx->peer_rx_active))
+				goto encrypt;
+			if (!__rx) {
+				if (likely(!tx->legacy_user))
+					goto encrypt;
+				tipc_crypto_clone_msg(net, *skb, b, dst,
+						      __dnode, SKB_GRACING);
 			}
 		}
 	}
+
 	/* Else, use the active key if any */
 	if (likely(key.active)) {
 		tx_key = key.active;
 		goto encrypt;
 	}
+
 	goto exit;
 
 encrypt:
@@ -1619,15 +1705,16 @@ int tipc_crypto_rcv(struct net *net, struct tipc_crypto *rx,
 	struct tipc_aead *aead = NULL;
 	struct tipc_key key;
 	int rc = -ENOKEY;
-	u8 tx_key = 0;
+	u8 tx_key;
+
+	tx_key = ((struct tipc_ehdr *)(*skb)->data)->tx_key;
 
 	/* New peer?
 	 * Let's try with TX key (i.e. cluster mode) & verify the skb first!
 	 */
-	if (unlikely(!rx))
+	if (unlikely(!rx || tx_key == KEY_MASTER))
 		goto pick_tx;
 
-	tx_key = ((struct tipc_ehdr *)(*skb)->data)->tx_key;
 	/* Pick RX key according to TX key if any */
 	key = rx->key;
 	if (tx_key == key.active || tx_key == key.pending ||
@@ -1640,7 +1727,7 @@ int tipc_crypto_rcv(struct net *net, struct tipc_crypto *rx,
 
 pick_tx:
 	/* No key suitable? Try to pick one from TX... */
-	aead = tipc_crypto_key_pick_tx(tx, rx, *skb);
+	aead = tipc_crypto_key_pick_tx(tx, rx, *skb, tx_key);
 	if (aead)
 		goto decrypt;
 	goto exit;
@@ -1722,9 +1809,12 @@ static void tipc_crypto_rcv_complete(struct net *net, struct tipc_aead *aead,
 				goto free_skb;
 		}
 
+		/* Ignore cloning if it was TX master key */
+		if (ehdr->tx_key == KEY_MASTER)
+			goto rcv;
 		if (tipc_aead_clone(&tmp, aead) < 0)
 			goto rcv;
-		if (tipc_crypto_key_attach(rx, tmp, ehdr->tx_key) < 0) {
+		if (tipc_crypto_key_attach(rx, tmp, ehdr->tx_key, false) < 0) {
 			tipc_aead_free(&tmp->rcu);
 			goto rcv;
 		}
@@ -1740,10 +1830,10 @@ static void tipc_crypto_rcv_complete(struct net *net, struct tipc_aead *aead,
 	/* Set the RX key's user */
 	tipc_aead_users_set(aead, 1);
 
-rcv:
 	/* Mark this point, RX works */
 	rx->timer1 = jiffies;
 
+rcv:
 	/* Remove ehdr & auth. tag prior to tipc_rcv() */
 	ehdr = (struct tipc_ehdr *)(*skb)->data;
 
@@ -1865,14 +1955,24 @@ static char *tipc_crypto_key_dump(struct tipc_crypto *c, char *buf)
 	char *s;
 
 	for (k = KEY_MIN; k <= KEY_MAX; k++) {
-		if (k == key.passive)
-			s = "PAS";
-		else if (k == key.active)
-			s = "ACT";
-		else if (k == key.pending)
-			s = "PEN";
-		else
-			s = "-";
+		if (k == KEY_MASTER) {
+			if (is_rx(c))
+				continue;
+			if (time_before(jiffies,
+					c->timer2 + TIPC_TX_GRACE_PERIOD))
+				s = "ACT";
+			else
+				s = "PAS";
+		} else {
+			if (k == key.passive)
+				s = "PAS";
+			else if (k == key.active)
+				s = "ACT";
+			else if (k == key.pending)
+				s = "PEN";
+			else
+				s = "-";
+		}
 		i += scnprintf(buf + i, 200 - i, "\tKey%d: %s", k, s);
 
 		rcu_read_lock();
@@ -1905,7 +2005,7 @@ static char *tipc_key_change_dump(struct tipc_key old, struct tipc_key new,
 	/* Output format: "[%s %s %s] -> [%s %s %s]", max len = 32 */
 again:
 	i += scnprintf(buf + i, 32 - i, "[");
-	for (k = KEY_MIN; k <= KEY_MAX; k++) {
+	for (k = KEY_1; k <= KEY_3; k++) {
 		if (k == key->passive)
 			s = "pas";
 		else if (k == key->active)
@@ -1915,7 +2015,7 @@ again:
 		else
 			s = "-";
 		i += scnprintf(buf + i, 32 - i,
-			       (k != KEY_MAX) ? "%s " : "%s", s);
+			       (k != KEY_3) ? "%s " : "%s", s);
 	}
 	if (key != &new) {
 		i += scnprintf(buf + i, 32 - i, "] -> ");
diff --git a/net/tipc/crypto.h b/net/tipc/crypto.h
index c387240e03d0..643b55077112 100644
--- a/net/tipc/crypto.h
+++ b/net/tipc/crypto.h
@@ -74,7 +74,7 @@ extern int sysctl_tipc_max_tfms __read_mostly;
  *     3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
  *     1 0 9 8 7 6 5 4|3 2 1 0 9 8 7 6|5 4 3 2 1 0 9 8|7 6 5 4 3 2 1 0
  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * w0:|Ver=7| User  |D|TX |RX |K|                 Rsvd                |
+ * w0:|Ver=7| User  |D|TX |RX |K|M|N|             Rsvd                |
  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  * w1:|                             Seqno                             |
  * w2:|                           (8 octets)                          |
@@ -101,6 +101,9 @@ extern int sysctl_tipc_max_tfms __read_mostly;
  *	RX	: Currently RX active key corresponding to the destination
  *	          node's TX key (when the "D" bit is set)
  *	K	: Keep-alive bit (for RPS, LINK_PROTOCOL/STATE_MSG only)
+ *	M       : Bit indicates if sender has master key
+ *	N	: Bit indicates if sender has no RX keys corresponding to the
+ *	          receiver's TX (when the "D" bit is set)
  *	Rsvd	: Reserved bit, field
  * Word1-2:
  *	Seqno	: The 64-bit sequence number of the encrypted message, also
@@ -117,7 +120,9 @@ struct tipc_ehdr {
 			__u8	destined:1,
 				user:4,
 				version:3;
-			__u8	reserved_1:3,
+			__u8	reserved_1:1,
+				rx_nokey:1,
+				master_key:1,
 				keepalive:1,
 				rx_key_active:2,
 				tx_key:2;
@@ -128,7 +133,9 @@ struct tipc_ehdr {
 			__u8	tx_key:2,
 				rx_key_active:2,
 				keepalive:1,
-				reserved_1:3;
+				master_key:1,
+				rx_nokey:1,
+				reserved_1:1;
 #else
 #error  "Please fix <asm/byteorder.h>"
 #endif
@@ -158,7 +165,7 @@ int tipc_crypto_xmit(struct net *net, struct sk_buff **skb,
 int tipc_crypto_rcv(struct net *net, struct tipc_crypto *rx,
 		    struct sk_buff **skb, struct tipc_bearer *b);
 int tipc_crypto_key_init(struct tipc_crypto *c, struct tipc_aead_key *ukey,
-			 u8 mode);
+			 u8 mode, bool master_key);
 void tipc_crypto_key_flush(struct tipc_crypto *c);
 int tipc_aead_key_validate(struct tipc_aead_key *ukey, struct genl_info *info);
 bool tipc_ehdr_validate(struct sk_buff *skb);
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 1016e96db5c4..25e5c5c8a6ff 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -127,7 +127,9 @@ struct tipc_skb_cb {
 #ifdef CONFIG_TIPC_CRYPTO
 			u8 encrypted:1;
 			u8 decrypted:1;
-			u8 probe:1;
+#define SKB_PROBING	1
+#define SKB_GRACING	2
+			u8 xmit_type:2;
 			u8 tx_clone_deferred:1;
 #endif
 		};
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index c4aee6247d55..1ec00fcc26ee 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -108,6 +108,7 @@ const struct nla_policy tipc_nl_node_policy[TIPC_NLA_NODE_MAX + 1] = {
 					    .len = TIPC_NODEID_LEN},
 	[TIPC_NLA_NODE_KEY]		= { .type = NLA_BINARY,
 					    .len = TIPC_AEAD_KEY_SIZE_MAX},
+	[TIPC_NLA_NODE_KEY_MASTER]	= { .type = NLA_FLAG },
 };
 
 /* Properties valid for media, bearer and link */
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 70045630e6bb..5da94d1dda77 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -2875,6 +2875,7 @@ static int __tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info)
 	struct tipc_crypto *tx = tipc_net(net)->crypto_tx, *c = tx;
 	struct tipc_node *n = NULL;
 	struct tipc_aead_key *ukey;
+	bool master_key = false;
 	u8 *id, *own_id, mode;
 	int rc = 0;
 
@@ -2905,6 +2906,7 @@ static int __tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info)
 	switch (rc) {
 	case -ENODATA:
 		mode = CLUSTER_KEY;
+		master_key = !!(attrs[TIPC_NLA_NODE_KEY_MASTER]);
 		break;
 	case 0:
 		mode = PER_NODE_KEY;
@@ -2921,11 +2923,11 @@ static int __tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	/* Initiate the TX/RX key */
-	rc = tipc_crypto_key_init(c, ukey, mode);
+	rc = tipc_crypto_key_init(c, ukey, mode, master_key);
 	if (n)
 		tipc_node_put(n);
 
-	if (rc < 0) {
+	if (unlikely(rc < 0)) {
 		GENL_SET_ERR_MSG(info, "unable to initiate or attach new key");
 		return rc;
 	}
-- 
cgit v1.2.3


From 23700da29b83e859a8c3727fddd33ba74c4f3a39 Mon Sep 17 00:00:00 2001
From: Tuong Lien <tuong.t.lien@dektech.com.au>
Date: Fri, 18 Sep 2020 08:17:29 +0700
Subject: tipc: add automatic rekeying for encryption key

Rekeying is required for security since a key is less secure when using
for a long time. Also, key will be detached when its nonce value (or
seqno ...) is exhausted. We now make the rekeying process automatic and
configurable by user.

Basically, TIPC will at a specific interval generate a new key by using
the kernel 'Random Number Generator' cipher, then attach it as the node
TX key and securely distribute to others in the cluster as RX keys (-
the key exchange). The automatic key switching will then take over, and
make the new key active shortly. Afterwards, the traffic from this node
will be encrypted with the new session key. The same can happen in peer
nodes but not necessarily at the same time.

For simplicity, the automatically generated key will be initiated as a
per node key. It is not too hard to also support a cluster key rekeying
(e.g. a given node will generate a unique cluster key and update to the
others in the cluster...), but that doesn't bring much benefit, while a
per-node key is even more secure.

We also enable user to force a rekeying or change the rekeying interval
via netlink, the new 'set key' command option: 'TIPC_NLA_NODE_REKEYING'
is added for these purposes as follows:
- A value >= 1 will be set as the rekeying interval (in minutes);
- A value of 0 will disable the rekeying;
- A value of 'TIPC_REKEYING_NOW' (~0) will force an immediate rekeying;

The default rekeying interval is (60 * 24) minutes i.e. done every day.
There isn't any restriction for the value but user shouldn't set it too
small or too large which results in an "ineffective" rekeying (thats ok
for testing though).

Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc.h         |   2 +
 include/uapi/linux/tipc_netlink.h |   1 +
 net/tipc/crypto.c                 | 113 +++++++++++++++++++++++++++++++++++++-
 net/tipc/crypto.h                 |   2 +
 net/tipc/netlink.c                |   1 +
 net/tipc/node.c                   |  25 ++++++++-
 6 files changed, 141 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h
index add01db1daef..80ea15e12113 100644
--- a/include/uapi/linux/tipc.h
+++ b/include/uapi/linux/tipc.h
@@ -254,6 +254,8 @@ static inline int tipc_aead_key_size(struct tipc_aead_key *key)
 	return sizeof(*key) + key->keylen;
 }
 
+#define TIPC_REKEYING_NOW		(~0U)
+
 /* The macros and functions below are deprecated:
  */
 
diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index d484baa9d365..d847dd671d79 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -166,6 +166,7 @@ enum {
 	TIPC_NLA_NODE_ID,		/* data */
 	TIPC_NLA_NODE_KEY,		/* data */
 	TIPC_NLA_NODE_KEY_MASTER,	/* flag */
+	TIPC_NLA_NODE_REKEYING,		/* u32 */
 
 	__TIPC_NLA_NODE_MAX,
 	TIPC_NLA_NODE_MAX = __TIPC_NLA_NODE_MAX - 1
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index 91d8b268cae0..40c44101fe8e 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -36,6 +36,7 @@
 
 #include <crypto/aead.h>
 #include <crypto/aes.h>
+#include <crypto/rng.h>
 #include "crypto.h"
 #include "msg.h"
 #include "bcast.h"
@@ -48,6 +49,8 @@
 #define TIPC_MAX_TFMS_DEF	10
 #define TIPC_MAX_TFMS_LIM	1000
 
+#define TIPC_REKEYING_INTV_DEF	(60 * 24) /* default: 1 day */
+
 /**
  * TIPC Key ids
  */
@@ -181,6 +184,7 @@ struct tipc_crypto_stats {
  * @wq: common workqueue on TX crypto
  * @work: delayed work sched for TX/RX
  * @key_distr: key distributing state
+ * @rekeying_intv: rekeying interval (in minutes)
  * @stats: the crypto statistics
  * @name: the crypto name
  * @sndnxt: the per-peer sndnxt (TX)
@@ -206,6 +210,7 @@ struct tipc_crypto {
 #define KEY_DISTR_SCHED		1
 #define KEY_DISTR_COMPL		2
 	atomic_t key_distr;
+	u32 rekeying_intv;
 
 	struct tipc_crypto_stats __percpu *stats;
 	char name[48];
@@ -294,7 +299,9 @@ static char *tipc_key_change_dump(struct tipc_key old, struct tipc_key new,
 static int tipc_crypto_key_xmit(struct net *net, struct tipc_aead_key *skey,
 				u16 gen, u8 mode, u32 dnode);
 static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr);
+static void tipc_crypto_work_tx(struct work_struct *work);
 static void tipc_crypto_work_rx(struct work_struct *work);
+static int tipc_aead_key_generate(struct tipc_aead_key *skey);
 
 #define is_tx(crypto) (!(crypto)->node)
 #define is_rx(crypto) (!is_tx(crypto))
@@ -346,6 +353,27 @@ int tipc_aead_key_validate(struct tipc_aead_key *ukey, struct genl_info *info)
 	return 0;
 }
 
+/**
+ * tipc_aead_key_generate - Generate new session key
+ * @skey: input/output key with new content
+ *
+ * Return: 0 in case of success, otherwise < 0
+ */
+static int tipc_aead_key_generate(struct tipc_aead_key *skey)
+{
+	int rc = 0;
+
+	/* Fill the key's content with a random value via RNG cipher */
+	rc = crypto_get_default_rng();
+	if (likely(!rc)) {
+		rc = crypto_rng_get_bytes(crypto_default_rng, skey->key,
+					  skey->keylen);
+		crypto_put_default_rng();
+	}
+
+	return rc;
+}
+
 static struct tipc_aead *tipc_aead_get(struct tipc_aead __rcu *aead)
 {
 	struct tipc_aead *tmp;
@@ -1471,6 +1499,7 @@ int tipc_crypto_start(struct tipc_crypto **crypto, struct net *net,
 	atomic64_set(&c->sndnxt, 0);
 	c->timer1 = jiffies;
 	c->timer2 = jiffies;
+	c->rekeying_intv = TIPC_REKEYING_INTV_DEF;
 	spin_lock_init(&c->lock);
 	scnprintf(c->name, 48, "%s(%s)", (is_rx(c)) ? "RX" : "TX",
 		  (is_rx(c)) ? tipc_node_get_id_str(c->node) :
@@ -1478,6 +1507,8 @@ int tipc_crypto_start(struct tipc_crypto **crypto, struct net *net,
 
 	if (is_rx(c))
 		INIT_DELAYED_WORK(&c->work, tipc_crypto_work_rx);
+	else
+		INIT_DELAYED_WORK(&c->work, tipc_crypto_work_tx);
 
 	*crypto = c;
 	return 0;
@@ -1492,8 +1523,11 @@ void tipc_crypto_stop(struct tipc_crypto **crypto)
 		return;
 
 	/* Flush any queued works & destroy wq */
-	if (is_tx(c))
+	if (is_tx(c)) {
+		c->rekeying_intv = 0;
+		cancel_delayed_work_sync(&c->work);
 		destroy_workqueue(c->wq);
+	}
 
 	/* Release AEAD keys */
 	rcu_read_lock();
@@ -2351,3 +2385,80 @@ static void tipc_crypto_work_rx(struct work_struct *work)
 
 	tipc_node_put(rx->node);
 }
+
+/**
+ * tipc_crypto_rekeying_sched - (Re)schedule rekeying w/o new interval
+ * @tx: TX crypto
+ * @changed: if the rekeying needs to be rescheduled with new interval
+ * @new_intv: new rekeying interval (when "changed" = true)
+ */
+void tipc_crypto_rekeying_sched(struct tipc_crypto *tx, bool changed,
+				u32 new_intv)
+{
+	unsigned long delay;
+	bool now = false;
+
+	if (changed) {
+		if (new_intv == TIPC_REKEYING_NOW)
+			now = true;
+		else
+			tx->rekeying_intv = new_intv;
+		cancel_delayed_work_sync(&tx->work);
+	}
+
+	if (tx->rekeying_intv || now) {
+		delay = (now) ? 0 : tx->rekeying_intv * 60 * 1000;
+		queue_delayed_work(tx->wq, &tx->work, msecs_to_jiffies(delay));
+	}
+}
+
+/**
+ * tipc_crypto_work_tx - Scheduled TX works handler
+ * @work: the struct TX work
+ *
+ * The function processes the previous scheduled work, i.e. key rekeying, by
+ * generating a new session key based on current one, then attaching it to the
+ * TX crypto and finally distributing it to peers. It also re-schedules the
+ * rekeying if needed.
+ */
+static void tipc_crypto_work_tx(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct tipc_crypto *tx = container_of(dwork, struct tipc_crypto, work);
+	struct tipc_aead_key *skey = NULL;
+	struct tipc_key key = tx->key;
+	struct tipc_aead *aead;
+	int rc = -ENOMEM;
+
+	if (unlikely(key.pending))
+		goto resched;
+
+	/* Take current key as a template */
+	rcu_read_lock();
+	aead = rcu_dereference(tx->aead[key.active ?: KEY_MASTER]);
+	if (unlikely(!aead)) {
+		rcu_read_unlock();
+		/* At least one key should exist for securing */
+		return;
+	}
+
+	/* Lets duplicate it first */
+	skey = kmemdup(aead->key, tipc_aead_key_size(aead->key), GFP_ATOMIC);
+	rcu_read_unlock();
+
+	/* Now, generate new key, initiate & distribute it */
+	if (likely(skey)) {
+		rc = tipc_aead_key_generate(skey) ?:
+		     tipc_crypto_key_init(tx, skey, PER_NODE_KEY, false);
+		if (likely(rc > 0))
+			rc = tipc_crypto_key_distr(tx, rc, NULL);
+		kzfree(skey);
+	}
+
+	if (unlikely(rc))
+		pr_warn_ratelimited("%s: rekeying returns %d\n", tx->name, rc);
+
+resched:
+	/* Re-schedule rekeying if any */
+	tipc_crypto_rekeying_sched(tx, false, 0);
+}
diff --git a/net/tipc/crypto.h b/net/tipc/crypto.h
index b2a9c9b90684..e71193bd5e36 100644
--- a/net/tipc/crypto.h
+++ b/net/tipc/crypto.h
@@ -171,6 +171,8 @@ void tipc_crypto_key_flush(struct tipc_crypto *c);
 int tipc_crypto_key_distr(struct tipc_crypto *tx, u8 key,
 			  struct tipc_node *dest);
 void tipc_crypto_msg_rcv(struct net *net, struct sk_buff *skb);
+void tipc_crypto_rekeying_sched(struct tipc_crypto *tx, bool changed,
+				u32 new_intv);
 int tipc_aead_key_validate(struct tipc_aead_key *ukey, struct genl_info *info);
 bool tipc_ehdr_validate(struct sk_buff *skb);
 
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 1ec00fcc26ee..c447cb5f879e 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -109,6 +109,7 @@ const struct nla_policy tipc_nl_node_policy[TIPC_NLA_NODE_MAX + 1] = {
 	[TIPC_NLA_NODE_KEY]		= { .type = NLA_BINARY,
 					    .len = TIPC_AEAD_KEY_SIZE_MAX},
 	[TIPC_NLA_NODE_KEY_MASTER]	= { .type = NLA_FLAG },
+	[TIPC_NLA_NODE_REKEYING]	= { .type = NLA_U32 },
 };
 
 /* Properties valid for media, bearer and link */
diff --git a/net/tipc/node.c b/net/tipc/node.c
index c9b6042e32b5..cf4b239fc569 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -2879,6 +2879,17 @@ static int tipc_nl_retrieve_nodeid(struct nlattr **attrs, u8 **node_id)
 	return 0;
 }
 
+static int tipc_nl_retrieve_rekeying(struct nlattr **attrs, u32 *intv)
+{
+	struct nlattr *attr = attrs[TIPC_NLA_NODE_REKEYING];
+
+	if (!attr)
+		return -ENODATA;
+
+	*intv = nla_get_u32(attr);
+	return 0;
+}
+
 static int __tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info)
 {
 	struct nlattr *attrs[TIPC_NLA_NODE_MAX + 1];
@@ -2886,8 +2897,9 @@ static int __tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info)
 	struct tipc_crypto *tx = tipc_net(net)->crypto_tx, *c = tx;
 	struct tipc_node *n = NULL;
 	struct tipc_aead_key *ukey;
-	bool master_key = false;
+	bool rekeying = true, master_key = false;
 	u8 *id, *own_id, mode;
+	u32 intv = 0;
 	int rc = 0;
 
 	if (!info->attrs[TIPC_NLA_NODE])
@@ -2905,8 +2917,14 @@ static int __tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info)
 		return -EPERM;
 	}
 
+	rc = tipc_nl_retrieve_rekeying(attrs, &intv);
+	if (rc == -ENODATA)
+		rekeying = false;
+
 	rc = tipc_nl_retrieve_key(attrs, &ukey);
-	if (rc)
+	if (rc == -ENODATA && rekeying)
+		goto rekeying;
+	else if (rc)
 		return rc;
 
 	rc = tipc_aead_key_validate(ukey, info);
@@ -2945,6 +2963,9 @@ static int __tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info)
 		/* Distribute TX key but not master one */
 		if (!master_key && tipc_crypto_key_distr(tx, rc, NULL))
 			GENL_SET_ERR_MSG(info, "failed to replicate new key");
+rekeying:
+		/* Schedule TX rekeying if needed */
+		tipc_crypto_rekeying_sched(tx, rekeying, intv);
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 55f13311785cebd60b9bab9ca7fd64205436c462 Mon Sep 17 00:00:00 2001
From: Dan Murphy <dmurphy@ti.com>
Date: Fri, 18 Sep 2020 14:14:51 -0500
Subject: ethtool: Add 100base-FX link mode entries

Add entries for the 100base-FX full and half duplex supported modes.

$ ethtool eth0
        Supported ports: [ FIBRE ]
        Supported link modes:  100baseFX/Half 100baseFX/Full
        Supported pause frame use: Symmetric Receive-only
        Supports auto-negotiation: No
        Supported FEC modes: Not reported
        Advertised link modes: 100baseFX/Half 100baseFX/Full
        Advertised pause frame use: No
        Advertised auto-negotiation: No
        Advertised FEC modes: Not reported
        Speed: 100Mb/s
        Duplex: Full
        Auto-negotiation: off
        Port: MII
        PHYAD: 1
        Transceiver: external
        Supports Wake-on: gs
        Wake-on: d
        SecureOn password: 00:00:00:00:00:00
        Current message level: 0x00000000 (0)

        Link detected: yes

Signed-off-by: Dan Murphy <dmurphy@ti.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy-core.c   | 4 +++-
 include/uapi/linux/ethtool.h | 2 ++
 net/ethtool/common.c         | 2 ++
 net/ethtool/linkmodes.c      | 2 ++
 4 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c
index ff8e14b01eeb..de5b869139d7 100644
--- a/drivers/net/phy/phy-core.c
+++ b/drivers/net/phy/phy-core.c
@@ -8,7 +8,7 @@
 
 const char *phy_speed_to_str(int speed)
 {
-	BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 90,
+	BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 92,
 		"Enum ethtool_link_mode_bit_indices and phylib are out of sync. "
 		"If a speed or mode has been added please update phy_speed_to_str "
 		"and the PHY settings array.\n");
@@ -160,6 +160,8 @@ static const struct phy_setting settings[] = {
 	PHY_SETTING(    100, FULL,    100baseT_Full		),
 	PHY_SETTING(    100, FULL,    100baseT1_Full		),
 	PHY_SETTING(    100, HALF,    100baseT_Half		),
+	PHY_SETTING(    100, HALF,    100baseFX_Half		),
+	PHY_SETTING(    100, FULL,    100baseFX_Full		),
 	/* 10M */
 	PHY_SETTING(     10, FULL,     10baseT_Full		),
 	PHY_SETTING(     10, HALF,     10baseT_Half		),
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index b4f2d134e713..9ca87bc73c44 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1617,6 +1617,8 @@ enum ethtool_link_mode_bit_indices {
 	ETHTOOL_LINK_MODE_400000baseLR4_ER4_FR4_Full_BIT = 87,
 	ETHTOOL_LINK_MODE_400000baseDR4_Full_BIT	 = 88,
 	ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT	 = 89,
+	ETHTOOL_LINK_MODE_100baseFX_Half_BIT		 = 90,
+	ETHTOOL_LINK_MODE_100baseFX_Full_BIT		 = 91,
 	/* must be last entry */
 	__ETHTOOL_LINK_MODE_MASK_NBITS
 };
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index ed19573fccd7..24036e3055a1 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -192,6 +192,8 @@ const char link_mode_names[][ETH_GSTRING_LEN] = {
 	__DEFINE_LINK_MODE_NAME(400000, LR4_ER4_FR4, Full),
 	__DEFINE_LINK_MODE_NAME(400000, DR4, Full),
 	__DEFINE_LINK_MODE_NAME(400000, CR4, Full),
+	__DEFINE_LINK_MODE_NAME(100, FX, Half),
+	__DEFINE_LINK_MODE_NAME(100, FX, Full),
 };
 static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
 
diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c
index 7044a2853886..29dcd675b65a 100644
--- a/net/ethtool/linkmodes.c
+++ b/net/ethtool/linkmodes.c
@@ -272,6 +272,8 @@ static const struct link_mode_info link_mode_params[] = {
 	__DEFINE_LINK_MODE_PARAMS(400000, LR4_ER4_FR4, Full),
 	__DEFINE_LINK_MODE_PARAMS(400000, DR4, Full),
 	__DEFINE_LINK_MODE_PARAMS(400000, CR4, Full),
+	__DEFINE_LINK_MODE_PARAMS(100, FX, Half),
+	__DEFINE_LINK_MODE_PARAMS(100, FX, Full),
 };
 
 static const struct nla_policy
-- 
cgit v1.2.3


From c12fa88c6d16ed3865072d91154cff6fd1cd9cd4 Mon Sep 17 00:00:00 2001
From: Zenghui Yu <yuzenghui@huawei.com>
Date: Thu, 10 Sep 2020 20:25:08 +0800
Subject: vfio: Fix typo of the device_state

A typo fix ("_RUNNNG" => "_RUNNING") in comment block of the uapi header.

Signed-off-by: Zenghui Yu <yuzenghui@huawei.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/uapi/linux/vfio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 920470502329..d4bd39e124bf 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -462,7 +462,7 @@ struct vfio_region_gfx_edid {
  * 5. Resumed
  *                  |--------->|
  *
- * 0. Default state of VFIO device is _RUNNNG when the user application starts.
+ * 0. Default state of VFIO device is _RUNNING when the user application starts.
  * 1. During normal shutdown of the user application, the user application may
  *    optionally change the VFIO device state from _RUNNING to _STOP. This
  *    transition is optional. The vendor driver must support this transition but
-- 
cgit v1.2.3


From 7d6e1329652ed971d1b6e0e7bea66fba5044e271 Mon Sep 17 00:00:00 2001
From: Matthew Rosato <mjrosato@linux.ibm.com>
Date: Tue, 15 Sep 2020 15:05:18 -0400
Subject: vfio iommu: Add dma available capability

Commit 492855939bdb ("vfio/type1: Limit DMA mappings per container")
added the ability to limit the number of memory backed DMA mappings.
However on s390x, when lazy mapping is in use, we use a very large
number of concurrent mappings.  Let's provide the current allowable
number of DMA mappings to userspace via the IOMMU info chain so that
userspace can take appropriate mitigation.

Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio_iommu_type1.c | 17 +++++++++++++++++
 include/uapi/linux/vfio.h       | 15 +++++++++++++++
 2 files changed, 32 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 5fbf0c1f7433..15e21dbffb16 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2609,6 +2609,20 @@ static int vfio_iommu_migration_build_caps(struct vfio_iommu *iommu,
 	return vfio_info_add_capability(caps, &cap_mig.header, sizeof(cap_mig));
 }
 
+static int vfio_iommu_dma_avail_build_caps(struct vfio_iommu *iommu,
+					   struct vfio_info_cap *caps)
+{
+	struct vfio_iommu_type1_info_dma_avail cap_dma_avail;
+
+	cap_dma_avail.header.id = VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL;
+	cap_dma_avail.header.version = 1;
+
+	cap_dma_avail.avail = iommu->dma_avail;
+
+	return vfio_info_add_capability(caps, &cap_dma_avail.header,
+					sizeof(cap_dma_avail));
+}
+
 static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu,
 				     unsigned long arg)
 {
@@ -2641,6 +2655,9 @@ static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu,
 
 	ret = vfio_iommu_migration_build_caps(iommu, &caps);
 
+	if (!ret)
+		ret = vfio_iommu_dma_avail_build_caps(iommu, &caps);
+
 	if (!ret)
 		ret = vfio_iommu_iova_build_caps(iommu, &caps);
 
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 920470502329..3891e03d3af0 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -1039,6 +1039,21 @@ struct vfio_iommu_type1_info_cap_migration {
 	__u64	max_dirty_bitmap_size;		/* in bytes */
 };
 
+/*
+ * The DMA available capability allows to report the current number of
+ * simultaneously outstanding DMA mappings that are allowed.
+ *
+ * The structure below defines version 1 of this capability.
+ *
+ * avail: specifies the current number of outstanding DMA mappings allowed.
+ */
+#define VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL 3
+
+struct vfio_iommu_type1_info_dma_avail {
+	struct	vfio_info_cap_header header;
+	__u32	avail;
+};
+
 #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
 
 /**
-- 
cgit v1.2.3


From 15b760c37ad3c3f2b922506eaca4ca8b4292e621 Mon Sep 17 00:00:00 2001
From: Andra Paraschiv <andraprs@amazon.com>
Date: Mon, 21 Sep 2020 15:17:15 +0300
Subject: nitro_enclaves: Add ioctl interface definition

The Nitro Enclaves driver handles the enclave lifetime management. This
includes enclave creation, termination and setting up its resources such
as memory and CPU.

An enclave runs alongside the VM that spawned it. It is abstracted as a
process running in the VM that launched it. The process interacts with
the NE driver, that exposes an ioctl interface for creating an enclave
and setting up its resources.

Changelog

v9 -> v10

* Update commit message to include the changelog before the SoB tag(s).

v8 -> v9

* No changes.

v7 -> v8

* Add NE custom error codes for user space memory regions not backed by
  pages multiple of 2 MiB, invalid flags and enclave CID.
* Add max flag value for enclave image load info.

v6 -> v7

* Clarify in the ioctls documentation that the return value is -1 and
  errno is set on failure.
* Update the error code value for NE_ERR_INVALID_MEM_REGION_SIZE as it
  gets in user space as value 25 (ENOTTY) instead of 515. Update the
  NE custom error codes values range to not be the same as the ones
  defined in include/linux/errno.h, although these are not propagated
  to user space.

v5 -> v6

* Fix typo in the description about the NE CPU pool.
* Update documentation to kernel-doc format.
* Remove the ioctl to query API version.

v4 -> v5

* Add more details about the ioctl calls usage e.g. error codes, file
  descriptors used.
* Update the ioctl to set an enclave vCPU to not return a file
  descriptor.
* Add specific NE error codes.

v3 -> v4

* Decouple NE ioctl interface from KVM API.
* Add NE API version and the corresponding ioctl call.
* Add enclave / image load flags options.

v2 -> v3

* Remove the GPL additional wording as SPDX-License-Identifier is
  already in place.

v1 -> v2

* Add ioctl for getting enclave image load metadata.
* Update NE_ENCLAVE_START ioctl name to NE_START_ENCLAVE.
* Add entry in Documentation/userspace-api/ioctl/ioctl-number.rst for NE
  ioctls.
* Update NE ioctls definition based on the updated ioctl range for major
  and minor.

Reviewed-by: Alexander Graf <graf@amazon.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Alexandru Vasile <lexnv@amazon.com>
Signed-off-by: Andra Paraschiv <andraprs@amazon.com>
Link: https://lore.kernel.org/r/20200921121732.44291-2-andraprs@amazon.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/userspace-api/ioctl/ioctl-number.rst |   5 +-
 include/linux/nitro_enclaves.h                     |  11 +
 include/uapi/linux/nitro_enclaves.h                | 359 +++++++++++++++++++++
 3 files changed, 374 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/nitro_enclaves.h
 create mode 100644 include/uapi/linux/nitro_enclaves.h

(limited to 'include/uapi')

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 2a198838fca9..5f7ff00f394e 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -328,8 +328,11 @@ Code  Seq#    Include File                                           Comments
 0xAC  00-1F  linux/raw.h
 0xAD  00                                                             Netfilter device in development:
                                                                      <mailto:rusty@rustcorp.com.au>
-0xAE  all    linux/kvm.h                                             Kernel-based Virtual Machine
+0xAE  00-1F  linux/kvm.h                                             Kernel-based Virtual Machine
                                                                      <mailto:kvm@vger.kernel.org>
+0xAE  40-FF  linux/kvm.h                                             Kernel-based Virtual Machine
+                                                                     <mailto:kvm@vger.kernel.org>
+0xAE  20-3F  linux/nitro_enclaves.h                                  Nitro Enclaves
 0xAF  00-1F  linux/fsl_hypervisor.h                                  Freescale hypervisor
 0xB0  all                                                            RATIO devices in development:
                                                                      <mailto:vgo@ratio.de>
diff --git a/include/linux/nitro_enclaves.h b/include/linux/nitro_enclaves.h
new file mode 100644
index 000000000000..d91ef2bfdf47
--- /dev/null
+++ b/include/linux/nitro_enclaves.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ */
+
+#ifndef _LINUX_NITRO_ENCLAVES_H_
+#define _LINUX_NITRO_ENCLAVES_H_
+
+#include <uapi/linux/nitro_enclaves.h>
+
+#endif /* _LINUX_NITRO_ENCLAVES_H_ */
diff --git a/include/uapi/linux/nitro_enclaves.h b/include/uapi/linux/nitro_enclaves.h
new file mode 100644
index 000000000000..b945073fe544
--- /dev/null
+++ b/include/uapi/linux/nitro_enclaves.h
@@ -0,0 +1,359 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ */
+
+#ifndef _UAPI_LINUX_NITRO_ENCLAVES_H_
+#define _UAPI_LINUX_NITRO_ENCLAVES_H_
+
+#include <linux/types.h>
+
+/**
+ * DOC: Nitro Enclaves (NE) Kernel Driver Interface
+ */
+
+/**
+ * NE_CREATE_VM - The command is used to create a slot that is associated with
+ *		  an enclave VM.
+ *		  The generated unique slot id is an output parameter.
+ *		  The ioctl can be invoked on the /dev/nitro_enclaves fd, before
+ *		  setting any resources, such as memory and vCPUs, for an
+ *		  enclave. Memory and vCPUs are set for the slot mapped to an enclave.
+ *		  A NE CPU pool has to be set before calling this function. The
+ *		  pool can be set after the NE driver load, using
+ *		  /sys/module/nitro_enclaves/parameters/ne_cpus.
+ *		  Its format is the detailed in the cpu-lists section:
+ *		  https://www.kernel.org/doc/html/latest/admin-guide/kernel-parameters.html
+ *		  CPU 0 and its siblings have to remain available for the
+ *		  primary / parent VM, so they cannot be set for enclaves. Full
+ *		  CPU core(s), from the same NUMA node, need(s) to be included
+ *		  in the CPU pool.
+ *
+ * Context: Process context.
+ * Return:
+ * * Enclave file descriptor		- Enclave file descriptor used with
+ *					  ioctl calls to set vCPUs and memory
+ *					  regions, then start the enclave.
+ * *  -1				- There was a failure in the ioctl logic.
+ * On failure, errno is set to:
+ * * EFAULT				- copy_to_user() failure.
+ * * ENOMEM				- Memory allocation failure for internal
+ *					  bookkeeping variables.
+ * * NE_ERR_NO_CPUS_AVAIL_IN_POOL	- No NE CPU pool set / no CPUs available
+ *					  in the pool.
+ * * Error codes from get_unused_fd_flags() and anon_inode_getfile().
+ * * Error codes from the NE PCI device request.
+ */
+#define NE_CREATE_VM			_IOR(0xAE, 0x20, __u64)
+
+/**
+ * NE_ADD_VCPU - The command is used to set a vCPU for an enclave. The vCPU can
+ *		 be auto-chosen from the NE CPU pool or it can be set by the
+ *		 caller, with the note that it needs to be available in the NE
+ *		 CPU pool. Full CPU core(s), from the same NUMA node, need(s) to
+ *		 be associated with an enclave.
+ *		 The vCPU id is an input / output parameter. If its value is 0,
+ *		 then a CPU is chosen from the enclave CPU pool and returned via
+ *		 this parameter.
+ *		 The ioctl can be invoked on the enclave fd, before an enclave
+ *		 is started.
+ *
+ * Context: Process context.
+ * Return:
+ * * 0					- Logic succesfully completed.
+ * *  -1				- There was a failure in the ioctl logic.
+ * On failure, errno is set to:
+ * * EFAULT				- copy_from_user() / copy_to_user() failure.
+ * * ENOMEM				- Memory allocation failure for internal
+ *					  bookkeeping variables.
+ * * EIO				- Current task mm is not the same as the one
+ *					  that created the enclave.
+ * * NE_ERR_NO_CPUS_AVAIL_IN_POOL	- No CPUs available in the NE CPU pool.
+ * * NE_ERR_VCPU_ALREADY_USED		- The provided vCPU is already used.
+ * * NE_ERR_VCPU_NOT_IN_CPU_POOL	- The provided vCPU is not available in the
+ *					  NE CPU pool.
+ * * NE_ERR_VCPU_INVALID_CPU_CORE	- The core id of the provided vCPU is invalid
+ *					  or out of range.
+ * * NE_ERR_NOT_IN_INIT_STATE		- The enclave is not in init state
+ *					  (init = before being started).
+ * * NE_ERR_INVALID_VCPU		- The provided vCPU is not in the available
+ *					  CPUs range.
+ * * Error codes from the NE PCI device request.
+ */
+#define NE_ADD_VCPU			_IOWR(0xAE, 0x21, __u32)
+
+/**
+ * NE_GET_IMAGE_LOAD_INFO - The command is used to get information needed for
+ *			    in-memory enclave image loading e.g. offset in
+ *			    enclave memory to start placing the enclave image.
+ *			    The image load info is an input / output parameter.
+ *			    It includes info provided by the caller - flags -
+ *			    and returns the offset in enclave memory where to
+ *			    start placing the enclave image.
+ *			    The ioctl can be invoked on the enclave fd, before
+ *			    an enclave is started.
+ *
+ * Context: Process context.
+ * Return:
+ * * 0				- Logic succesfully completed.
+ * *  -1			- There was a failure in the ioctl logic.
+ * On failure, errno is set to:
+ * * EFAULT			- copy_from_user() / copy_to_user() failure.
+ * * NE_ERR_NOT_IN_INIT_STATE	- The enclave is not in init state (init =
+ *				  before being started).
+ * * NE_ERR_INVALID_FLAG_VALUE	- The value of the provided flag is invalid.
+ */
+#define NE_GET_IMAGE_LOAD_INFO		_IOWR(0xAE, 0x22, struct ne_image_load_info)
+
+/**
+ * NE_SET_USER_MEMORY_REGION - The command is used to set a memory region for an
+ *			       enclave, given the allocated memory from the
+ *			       userspace. Enclave memory needs to be from the
+ *			       same NUMA node as the enclave CPUs.
+ *			       The user memory region is an input parameter. It
+ *			       includes info provided by the caller - flags,
+ *			       memory size and userspace address.
+ *			       The ioctl can be invoked on the enclave fd,
+ *			       before an enclave is started.
+ *
+ * Context: Process context.
+ * Return:
+ * * 0					- Logic succesfully completed.
+ * *  -1				- There was a failure in the ioctl logic.
+ * On failure, errno is set to:
+ * * EFAULT				- copy_from_user() failure.
+ * * EINVAL				- Invalid physical memory region(s) e.g.
+ *					  unaligned address.
+ * * EIO				- Current task mm is not the same as
+ *					  the one that created the enclave.
+ * * ENOMEM				- Memory allocation failure for internal
+ *					  bookkeeping variables.
+ * * NE_ERR_NOT_IN_INIT_STATE		- The enclave is not in init state
+ *					  (init = before being started).
+ * * NE_ERR_INVALID_MEM_REGION_SIZE	- The memory size of the region is not
+ *					  multiple of 2 MiB.
+ * * NE_ERR_INVALID_MEM_REGION_ADDR	- Invalid user space address given.
+ * * NE_ERR_UNALIGNED_MEM_REGION_ADDR	- Unaligned user space address given.
+ * * NE_ERR_MEM_REGION_ALREADY_USED	- The memory region is already used.
+ * * NE_ERR_MEM_NOT_HUGE_PAGE		- The memory region is not backed by
+ *					  huge pages.
+ * * NE_ERR_MEM_DIFFERENT_NUMA_NODE	- The memory region is not from the same
+ *					  NUMA node as the CPUs.
+ * * NE_ERR_MEM_MAX_REGIONS		- The number of memory regions set for
+ *					  the enclave reached maximum.
+ * * NE_ERR_INVALID_PAGE_SIZE		- The memory region is not backed by
+ *					  pages multiple of 2 MiB.
+ * * NE_ERR_INVALID_FLAG_VALUE		- The value of the provided flag is invalid.
+ * * Error codes from get_user_pages().
+ * * Error codes from the NE PCI device request.
+ */
+#define NE_SET_USER_MEMORY_REGION	_IOW(0xAE, 0x23, struct ne_user_memory_region)
+
+/**
+ * NE_START_ENCLAVE - The command is used to trigger enclave start after the
+ *		      enclave resources, such as memory and CPU, have been set.
+ *		      The enclave start info is an input / output parameter. It
+ *		      includes info provided by the caller - enclave cid and
+ *		      flags - and returns the cid (if input cid is 0).
+ *		      The ioctl can be invoked on the enclave fd, after an
+ *		      enclave slot is created and resources, such as memory and
+ *		      vCPUs are set for an enclave.
+ *
+ * Context: Process context.
+ * Return:
+ * * 0					- Logic succesfully completed.
+ * *  -1				- There was a failure in the ioctl logic.
+ * On failure, errno is set to:
+ * * EFAULT				- copy_from_user() / copy_to_user() failure.
+ * * NE_ERR_NOT_IN_INIT_STATE		- The enclave is not in init state
+ *					  (init = before being started).
+ * * NE_ERR_NO_MEM_REGIONS_ADDED	- No memory regions are set.
+ * * NE_ERR_NO_VCPUS_ADDED		- No vCPUs are set.
+ * *  NE_ERR_FULL_CORES_NOT_USED	- Full core(s) not set for the enclave.
+ * * NE_ERR_ENCLAVE_MEM_MIN_SIZE	- Enclave memory is less than minimum
+ *					  memory size (64 MiB).
+ * * NE_ERR_INVALID_FLAG_VALUE		- The value of the provided flag is invalid.
+ * *  NE_ERR_INVALID_ENCLAVE_CID	- The provided enclave CID is invalid.
+ * * Error codes from the NE PCI device request.
+ */
+#define NE_START_ENCLAVE		_IOWR(0xAE, 0x24, struct ne_enclave_start_info)
+
+/**
+ * DOC: NE specific error codes
+ */
+
+/**
+ * NE_ERR_VCPU_ALREADY_USED - The provided vCPU is already used.
+ */
+#define NE_ERR_VCPU_ALREADY_USED		(256)
+/**
+ * NE_ERR_VCPU_NOT_IN_CPU_POOL - The provided vCPU is not available in the
+ *				 NE CPU pool.
+ */
+#define NE_ERR_VCPU_NOT_IN_CPU_POOL		(257)
+/**
+ * NE_ERR_VCPU_INVALID_CPU_CORE - The core id of the provided vCPU is invalid
+ *				  or out of range of the NE CPU pool.
+ */
+#define NE_ERR_VCPU_INVALID_CPU_CORE		(258)
+/**
+ * NE_ERR_INVALID_MEM_REGION_SIZE - The user space memory region size is not
+ *				    multiple of 2 MiB.
+ */
+#define NE_ERR_INVALID_MEM_REGION_SIZE		(259)
+/**
+ * NE_ERR_INVALID_MEM_REGION_ADDR - The user space memory region address range
+ *				    is invalid.
+ */
+#define NE_ERR_INVALID_MEM_REGION_ADDR		(260)
+/**
+ * NE_ERR_UNALIGNED_MEM_REGION_ADDR - The user space memory region address is
+ *				      not aligned.
+ */
+#define NE_ERR_UNALIGNED_MEM_REGION_ADDR	(261)
+/**
+ * NE_ERR_MEM_REGION_ALREADY_USED - The user space memory region is already used.
+ */
+#define NE_ERR_MEM_REGION_ALREADY_USED		(262)
+/**
+ * NE_ERR_MEM_NOT_HUGE_PAGE - The user space memory region is not backed by
+ *			      contiguous physical huge page(s).
+ */
+#define NE_ERR_MEM_NOT_HUGE_PAGE		(263)
+/**
+ * NE_ERR_MEM_DIFFERENT_NUMA_NODE - The user space memory region is backed by
+ *				    pages from different NUMA nodes than the CPUs.
+ */
+#define NE_ERR_MEM_DIFFERENT_NUMA_NODE		(264)
+/**
+ * NE_ERR_MEM_MAX_REGIONS - The supported max memory regions per enclaves has
+ *			    been reached.
+ */
+#define NE_ERR_MEM_MAX_REGIONS			(265)
+/**
+ * NE_ERR_NO_MEM_REGIONS_ADDED - The command to start an enclave is triggered
+ *				 and no memory regions are added.
+ */
+#define NE_ERR_NO_MEM_REGIONS_ADDED		(266)
+/**
+ * NE_ERR_NO_VCPUS_ADDED - The command to start an enclave is triggered and no
+ *			   vCPUs are added.
+ */
+#define NE_ERR_NO_VCPUS_ADDED			(267)
+/**
+ * NE_ERR_ENCLAVE_MEM_MIN_SIZE - The enclave memory size is lower than the
+ *				 minimum supported.
+ */
+#define NE_ERR_ENCLAVE_MEM_MIN_SIZE		(268)
+/**
+ * NE_ERR_FULL_CORES_NOT_USED - The command to start an enclave is triggered and
+ *				full CPU cores are not set.
+ */
+#define NE_ERR_FULL_CORES_NOT_USED		(269)
+/**
+ * NE_ERR_NOT_IN_INIT_STATE - The enclave is not in init state when setting
+ *			      resources or triggering start.
+ */
+#define NE_ERR_NOT_IN_INIT_STATE		(270)
+/**
+ * NE_ERR_INVALID_VCPU - The provided vCPU is out of range of the available CPUs.
+ */
+#define NE_ERR_INVALID_VCPU			(271)
+/**
+ * NE_ERR_NO_CPUS_AVAIL_IN_POOL - The command to create an enclave is triggered
+ *				  and no CPUs are available in the pool.
+ */
+#define NE_ERR_NO_CPUS_AVAIL_IN_POOL		(272)
+/**
+ * NE_ERR_INVALID_PAGE_SIZE - The user space memory region is not backed by pages
+ *			      multiple of 2 MiB.
+ */
+#define NE_ERR_INVALID_PAGE_SIZE		(273)
+/**
+ * NE_ERR_INVALID_FLAG_VALUE - The provided flag value is invalid.
+ */
+#define NE_ERR_INVALID_FLAG_VALUE		(274)
+/**
+ * NE_ERR_INVALID_ENCLAVE_CID - The provided enclave CID is invalid, either
+ *				being a well-known value or the CID of the
+ *				parent / primary VM.
+ */
+#define NE_ERR_INVALID_ENCLAVE_CID		(275)
+
+/**
+ * DOC: Image load info flags
+ */
+
+/**
+ * NE_EIF_IMAGE - Enclave Image Format (EIF)
+ */
+#define NE_EIF_IMAGE			(0x01)
+
+#define NE_IMAGE_LOAD_MAX_FLAG_VAL	(0x02)
+
+/**
+ * struct ne_image_load_info - Info necessary for in-memory enclave image
+ *			       loading (in / out).
+ * @flags:		Flags to determine the enclave image type
+ *			(e.g. Enclave Image Format - EIF) (in).
+ * @memory_offset:	Offset in enclave memory where to start placing the
+ *			enclave image (out).
+ */
+struct ne_image_load_info {
+	__u64	flags;
+	__u64	memory_offset;
+};
+
+/**
+ * DOC: User memory region flags
+ */
+
+/**
+ * NE_DEFAULT_MEMORY_REGION - Memory region for enclave general usage.
+ */
+#define NE_DEFAULT_MEMORY_REGION	(0x00)
+
+#define NE_MEMORY_REGION_MAX_FLAG_VAL	(0x01)
+
+/**
+ * struct ne_user_memory_region - Memory region to be set for an enclave (in).
+ * @flags:		Flags to determine the usage for the memory region (in).
+ * @memory_size:	The size, in bytes, of the memory region to be set for
+ *			an enclave (in).
+ * @userspace_addr:	The start address of the userspace allocated memory of
+ *			the memory region to set for an enclave (in).
+ */
+struct ne_user_memory_region {
+	__u64	flags;
+	__u64	memory_size;
+	__u64	userspace_addr;
+};
+
+/**
+ * DOC: Enclave start info flags
+ */
+
+/**
+ * NE_ENCLAVE_PRODUCTION_MODE - Start enclave in production mode.
+ */
+#define NE_ENCLAVE_PRODUCTION_MODE	(0x00)
+/**
+ * NE_ENCLAVE_DEBUG_MODE - Start enclave in debug mode.
+ */
+#define NE_ENCLAVE_DEBUG_MODE		(0x01)
+
+#define NE_ENCLAVE_START_MAX_FLAG_VAL	(0x02)
+
+/**
+ * struct ne_enclave_start_info - Setup info necessary for enclave start (in / out).
+ * @flags:		Flags for the enclave to start with (e.g. debug mode) (in).
+ * @enclave_cid:	Context ID (CID) for the enclave vsock device. If 0 as
+ *			input, the CID is autogenerated by the hypervisor and
+ *			returned back as output by the driver (in / out).
+ */
+struct ne_enclave_start_info {
+	__u64	flags;
+	__u64	enclave_cid;
+};
+
+#endif /* _UAPI_LINUX_NITRO_ENCLAVES_H_ */
-- 
cgit v1.2.3


From c7f0207b613033c56b1217032d2f6326d0c69217 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Wed, 16 Sep 2020 21:11:33 -0700
Subject: fscrypt: make "#define fscrypt_policy" user-only

The fscrypt UAPI header defines fscrypt_policy to fscrypt_policy_v1,
for source compatibility with old userspace programs.

Internally, the kernel doesn't want that compatibility definition.
Instead, fscrypt_private.h #undefs it and re-defines it to a union.

That works for now.  However, in order to add
fscrypt_operations::get_dummy_policy(), we'll need to forward declare
'union fscrypt_policy' in include/linux/fscrypt.h.  That would cause
build errors because "fscrypt_policy" is used in ioctl numbers.

To avoid this, modify the UAPI header to make the fscrypt_policy
compatibility definition conditional on !__KERNEL__, and make the ioctls
use fscrypt_policy_v1 instead of fscrypt_policy.

Note that this doesn't change the actual ioctl numbers.

Acked-by: Jeff Layton <jlayton@kernel.org>
Link: https://lore.kernel.org/r/20200917041136.178600-11-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/crypto/fscrypt_private.h  | 1 -
 include/uapi/linux/fscrypt.h | 6 +++---
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 355f6d937751..ac3352086ee4 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -97,7 +97,6 @@ static inline const u8 *fscrypt_context_nonce(const union fscrypt_context *ctx)
 	return NULL;
 }
 
-#undef fscrypt_policy
 union fscrypt_policy {
 	u8 version;
 	struct fscrypt_policy_v1 v1;
diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h
index 7875709ccfeb..e5de60336938 100644
--- a/include/uapi/linux/fscrypt.h
+++ b/include/uapi/linux/fscrypt.h
@@ -45,7 +45,6 @@ struct fscrypt_policy_v1 {
 	__u8 flags;
 	__u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
 };
-#define fscrypt_policy	fscrypt_policy_v1
 
 /*
  * Process-subscribed "logon" key description prefix and payload format.
@@ -156,9 +155,9 @@ struct fscrypt_get_key_status_arg {
 	__u32 __out_reserved[13];
 };
 
-#define FS_IOC_SET_ENCRYPTION_POLICY		_IOR('f', 19, struct fscrypt_policy)
+#define FS_IOC_SET_ENCRYPTION_POLICY		_IOR('f', 19, struct fscrypt_policy_v1)
 #define FS_IOC_GET_ENCRYPTION_PWSALT		_IOW('f', 20, __u8[16])
-#define FS_IOC_GET_ENCRYPTION_POLICY		_IOW('f', 21, struct fscrypt_policy)
+#define FS_IOC_GET_ENCRYPTION_POLICY		_IOW('f', 21, struct fscrypt_policy_v1)
 #define FS_IOC_GET_ENCRYPTION_POLICY_EX		_IOWR('f', 22, __u8[9]) /* size + version */
 #define FS_IOC_ADD_ENCRYPTION_KEY		_IOWR('f', 23, struct fscrypt_add_key_arg)
 #define FS_IOC_REMOVE_ENCRYPTION_KEY		_IOWR('f', 24, struct fscrypt_remove_key_arg)
@@ -170,6 +169,7 @@ struct fscrypt_get_key_status_arg {
 
 /* old names; don't add anything new here! */
 #ifndef __KERNEL__
+#define fscrypt_policy			fscrypt_policy_v1
 #define FS_KEY_DESCRIPTOR_SIZE		FSCRYPT_KEY_DESCRIPTOR_SIZE
 #define FS_POLICY_FLAGS_PAD_4		FSCRYPT_POLICY_FLAGS_PAD_4
 #define FS_POLICY_FLAGS_PAD_8		FSCRYPT_POLICY_FLAGS_PAD_8
-- 
cgit v1.2.3


From 0a068adde505a90ece23caaf19b77567e1d18298 Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Tue, 21 Jul 2020 10:49:51 +0300
Subject: habanalabs: add information about PCIe controller

Update firmware header with new API for getting pcie info
such as tx/rx throughput and replay counter.
These counters are needed by customers for monitor and maintenance
of multiple devices.
Add new opcodes to the INFO ioctl to retrieve these counters.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/misc/habanalabs/common/firmware_if.c      | 48 +++++++++++++++++++++++
 drivers/misc/habanalabs/common/habanalabs.h       |  4 ++
 drivers/misc/habanalabs/common/habanalabs_ioctl.c | 41 +++++++++++++++++++
 drivers/misc/habanalabs/gaudi/gaudi.c             |  4 ++
 drivers/misc/habanalabs/goya/goya.c               |  4 ++
 drivers/misc/habanalabs/include/common/armcp_if.h | 10 +++++
 include/uapi/misc/habanalabs.h                    | 27 +++++++++++++
 7 files changed, 138 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index f52bc690dfc5..61f5edc96e16 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -363,6 +363,54 @@ out:
 	return rc;
 }
 
+int hl_fw_armcp_pci_counters_get(struct hl_device *hdev,
+		struct hl_info_pci_counters *counters)
+{
+	struct armcp_packet pkt = {};
+	long result;
+	int rc;
+
+	pkt.ctl = cpu_to_le32(ARMCP_PACKET_PCIE_THROUGHPUT_GET <<
+			ARMCP_PKT_CTL_OPCODE_SHIFT);
+
+	/* Fetch PCI rx counter */
+	pkt.index = cpu_to_le32(armcp_pcie_throughput_rx);
+	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+					HL_ARMCP_INFO_TIMEOUT_USEC, &result);
+	if (rc) {
+		dev_err(hdev->dev,
+			"Failed to handle ArmCP PCI info pkt, error %d\n", rc);
+		return rc;
+	}
+	counters->rx_throughput = result;
+
+	/* Fetch PCI tx counter */
+	pkt.index = cpu_to_le32(armcp_pcie_throughput_tx);
+	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+					HL_ARMCP_INFO_TIMEOUT_USEC, &result);
+	if (rc) {
+		dev_err(hdev->dev,
+			"Failed to handle ArmCP PCI info pkt, error %d\n", rc);
+		return rc;
+	}
+	counters->tx_throughput = result;
+
+	/* Fetch PCI replay counter */
+	pkt.ctl = cpu_to_le32(ARMCP_PACKET_PCIE_REPLAY_CNT_GET <<
+			ARMCP_PKT_CTL_OPCODE_SHIFT);
+
+	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+			HL_ARMCP_INFO_TIMEOUT_USEC, &result);
+	if (rc) {
+		dev_err(hdev->dev,
+			"Failed to handle ArmCP PCI info pkt, error %d\n", rc);
+		return rc;
+	}
+	counters->replay_cnt = (u32) result;
+
+	return rc;
+}
+
 static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg)
 {
 	u32 err_val;
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index f97eebc64979..2c9fcb513215 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1483,6 +1483,7 @@ struct hl_device_idle_busy_ts {
  * @soft_reset_cnt: number of soft reset since the driver was loaded.
  * @hard_reset_cnt: number of hard reset since the driver was loaded.
  * @idle_busy_ts_idx: index of current entry in idle_busy_ts_arr
+ * @clk_throttling_reason: bitmask represents the current clk throttling reasons
  * @id: device minor.
  * @id_control: minor of the control device
  * @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit
@@ -1587,6 +1588,7 @@ struct hl_device {
 	u32				soft_reset_cnt;
 	u32				hard_reset_cnt;
 	u32				idle_busy_ts_idx;
+	u32				clk_throttling_reason;
 	u16				id;
 	u16				id_control;
 	u16				cpu_pci_msb_addr;
@@ -1841,6 +1843,8 @@ void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
 int hl_fw_send_heartbeat(struct hl_device *hdev);
 int hl_fw_armcp_info_get(struct hl_device *hdev);
 int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
+int hl_fw_armcp_pci_counters_get(struct hl_device *hdev,
+		struct hl_info_pci_counters *counters);
 int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
 			u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
 			u32 boot_err0_reg, bool skip_bmc,
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 5af1c03da473..4d838b1a3bbe 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -276,6 +276,41 @@ static int time_sync_info(struct hl_device *hdev, struct hl_info_args *args)
 		min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0;
 }
 
+static int pci_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+	struct hl_device *hdev = hpriv->hdev;
+	struct hl_info_pci_counters pci_counters = {0};
+	u32 max_size = args->return_size;
+	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+	int rc;
+
+	if ((!max_size) || (!out))
+		return -EINVAL;
+
+	rc = hl_fw_armcp_pci_counters_get(hdev, &pci_counters);
+	if (rc)
+		return rc;
+
+	return copy_to_user(out, &pci_counters,
+		min((size_t) max_size, sizeof(pci_counters))) ? -EFAULT : 0;
+}
+
+static int clk_throttle_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+	struct hl_device *hdev = hpriv->hdev;
+	struct hl_info_clk_throttle clk_throttle = {0};
+	u32 max_size = args->return_size;
+	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+	if ((!max_size) || (!out))
+		return -EINVAL;
+
+	clk_throttle.clk_throttling_reason = hdev->clk_throttling_reason;
+
+	return copy_to_user(out, &clk_throttle,
+		min((size_t) max_size, sizeof(clk_throttle))) ? -EFAULT : 0;
+}
+
 static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 {
 	struct hl_device *hdev = hpriv->hdev;
@@ -360,6 +395,12 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 	case HL_INFO_CS_COUNTERS:
 		return cs_counters_info(hpriv, args);
 
+	case HL_INFO_PCI_COUNTERS:
+		return pci_counters_info(hpriv, args);
+
+	case HL_INFO_CLK_THROTTLE_REASON:
+		return clk_throttle_info(hpriv, args);
+
 	default:
 		dev_err(dev, "Invalid request %d\n", args->op);
 		rc = -ENOTTY;
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 4009b7df4caf..adb5c5594ac1 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -5653,21 +5653,25 @@ static void gaudi_print_clk_change_info(struct hl_device *hdev,
 {
 	switch (event_type) {
 	case GAUDI_EVENT_FIX_POWER_ENV_S:
+		hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
 		dev_info_ratelimited(hdev->dev,
 			"Clock throttling due to power consumption\n");
 		break;
 
 	case GAUDI_EVENT_FIX_POWER_ENV_E:
+		hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
 		dev_info_ratelimited(hdev->dev,
 			"Power envelop is safe, back to optimal clock\n");
 		break;
 
 	case GAUDI_EVENT_FIX_THERMAL_ENV_S:
+		hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
 		dev_info_ratelimited(hdev->dev,
 			"Clock throttling due to overheating\n");
 		break;
 
 	case GAUDI_EVENT_FIX_THERMAL_ENV_E:
+		hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
 		dev_info_ratelimited(hdev->dev,
 			"Thermal envelop is safe, back to optimal clock\n");
 		break;
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 33cd2ae653d2..954f2c022d33 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -4580,18 +4580,22 @@ static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type)
 {
 	switch (event_type) {
 	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
+		hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
 		dev_info_ratelimited(hdev->dev,
 			"Clock throttling due to power consumption\n");
 		break;
 	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
+		hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
 		dev_info_ratelimited(hdev->dev,
 			"Power envelop is safe, back to optimal clock\n");
 		break;
 	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
+		hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
 		dev_info_ratelimited(hdev->dev,
 			"Clock throttling due to overheating\n");
 		break;
 	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
+		hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
 		dev_info_ratelimited(hdev->dev,
 			"Thermal envelop is safe, back to optimal clock\n");
 		break;
diff --git a/drivers/misc/habanalabs/include/common/armcp_if.h b/drivers/misc/habanalabs/include/common/armcp_if.h
index 07f9972db28d..1403c937253c 100644
--- a/drivers/misc/habanalabs/include/common/armcp_if.h
+++ b/drivers/misc/habanalabs/include/common/armcp_if.h
@@ -243,6 +243,8 @@ enum armcp_packet_id {
 	ARMCP_PACKET_TEMPERATURE_SET,		/* sysfs */
 	ARMCP_PACKET_VOLTAGE_SET,		/* sysfs */
 	ARMCP_PACKET_CURRENT_SET,		/* sysfs */
+	ARMCP_PACKET_PCIE_THROUGHPUT_GET,	/* internal */
+	ARMCP_PACKET_PCIE_REPLAY_CNT_GET,	/* internal */
 };
 
 #define ARMCP_PACKET_FENCE_VAL	0xFE8CE7A5
@@ -277,6 +279,9 @@ struct armcp_packet {
 			__u8 pad; /* unused */
 		};
 
+		/* For any general request */
+		__le32 index;
+
 		/* For frequency get/set */
 		__le32 pll_index;
 
@@ -344,6 +349,11 @@ enum armcp_pwm_attributes {
 	armcp_pwm_enable
 };
 
+enum armcp_pcie_throughput_attributes {
+	armcp_pcie_throughput_tx,
+	armcp_pcie_throughput_rx
+};
+
 /* Event Queue Packets */
 
 struct eq_generic_event {
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index d5c4f983b7a8..ee13b919db35 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -264,6 +264,8 @@ enum hl_device_status {
  * HL_INFO_TIME_SYNC     - Retrieve the device's time alongside the host's time
  *                         for synchronization.
  * HL_INFO_CS_COUNTERS   - Retrieve command submission counters
+ * HL_INFO_PCI_COUNTERS  - Retrieve PCI counters
+ * HL_INFO_CLK_THROTTLE_REASON - Retrieve clock throttling reason
  */
 #define HL_INFO_HW_IP_INFO		0
 #define HL_INFO_HW_EVENTS		1
@@ -276,6 +278,8 @@ enum hl_device_status {
 #define HL_INFO_RESET_COUNT		9
 #define HL_INFO_TIME_SYNC		10
 #define HL_INFO_CS_COUNTERS		11
+#define HL_INFO_PCI_COUNTERS		12
+#define HL_INFO_CLK_THROTTLE_REASON	13
 
 #define HL_INFO_VERSION_MAX_LEN	128
 #define HL_INFO_CARD_NAME_MAX_LEN	16
@@ -340,6 +344,29 @@ struct hl_info_time_sync {
 	__u64 host_time;
 };
 
+/**
+ * struct hl_info_pci_counters - pci counters
+ * @rx_throughput: PCI rx throughput KBps
+ * @tx_throughput: PCI tx throughput KBps
+ * @replay_cnt: PCI replay counter
+ */
+struct hl_info_pci_counters {
+	__u64 rx_throughput;
+	__u64 tx_throughput;
+	__u64 replay_cnt;
+};
+
+#define HL_CLK_THROTTLE_POWER	0x1
+#define HL_CLK_THROTTLE_THERMAL	0x2
+
+/**
+ * struct hl_info_clk_throttle - clock throttling reason
+ * @clk_throttling_reason: each bit represents a clk throttling reason
+ */
+struct hl_info_clk_throttle {
+	__u32 clk_throttling_reason;
+};
+
 /**
  * struct hl_info_cs_counters - command submission counters
  * @out_of_mem_drop_cnt: dropped due to memory allocation issue
-- 
cgit v1.2.3


From 843839bec3a304f8313d6ae554f618a91e52731a Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Sun, 19 Jul 2020 11:08:09 +0300
Subject: habanalabs: expose sync manager resources allocation in INFO IOCTL

Although the driver defines the first user-available sync manager object
and monitor in habanalabs.h, we would like to also expose this information
via the INFO IOCTL so the runtime can get this information dynamically.
This is because in future ASICs we won't need to define it statically.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/misc/habanalabs/common/habanalabs.h       |  6 +++++
 drivers/misc/habanalabs/common/habanalabs_ioctl.c | 30 ++++++++++++++++++++++-
 drivers/misc/habanalabs/gaudi/gaudi.c             |  7 ++++++
 include/uapi/misc/habanalabs.h                    | 23 +++++++++++++++++
 4 files changed, 65 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 2c9fcb513215..caced12f278f 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -65,6 +65,8 @@
 
 #define HL_PCI_NUM_BARS			6
 
+#define HL_MAX_DCORES			4
+
 /**
  * struct pgt_info - MMU hop page info.
  * @node: hash linked-list node for the pgts shadow hash of pgts.
@@ -291,6 +293,8 @@ struct hl_mmu_properties {
  * @max_queues: maximum amount of queues in the system
  * @sync_stream_first_sob: first sync object available for sync stream use
  * @sync_stream_first_mon: first monitor available for sync stream use
+ * @first_available_user_sob: first sob available for the user
+ * @first_available_user_mon: first monitor available for the user
  * @tpc_enabled_mask: which TPCs are enabled.
  * @completion_queues_count: number of completion queues.
  */
@@ -337,6 +341,8 @@ struct asic_fixed_properties {
 	u32				max_queues;
 	u16				sync_stream_first_sob;
 	u16				sync_stream_first_mon;
+	u16				first_available_user_sob[HL_MAX_DCORES];
+	u16				first_available_user_mon[HL_MAX_DCORES];
 	u8				tpc_enabled_mask;
 	u8				completion_queues_count;
 };
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 4d838b1a3bbe..fe6c5534d378 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -8,6 +8,7 @@
 #include <uapi/misc/habanalabs.h>
 #include "habanalabs.h"
 
+#include <linux/kernel.h>
 #include <linux/fs.h>
 #include <linux/uaccess.h>
 #include <linux/slab.h>
@@ -314,7 +315,7 @@ static int clk_throttle_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 {
 	struct hl_device *hdev = hpriv->hdev;
-	struct hl_info_cs_counters cs_counters = {0};
+	struct hl_info_cs_counters cs_counters = { {0} };
 	u32 max_size = args->return_size;
 	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
 
@@ -332,6 +333,30 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 		min((size_t) max_size, sizeof(cs_counters))) ? -EFAULT : 0;
 }
 
+static int sync_manager_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+	struct hl_device *hdev = hpriv->hdev;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	struct hl_info_sync_manager sm_info = {0};
+	u32 max_size = args->return_size;
+	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+	if ((!max_size) || (!out))
+		return -EINVAL;
+
+	if (args->dcore_id >= HL_MAX_DCORES)
+		return -EINVAL;
+
+	sm_info.first_available_sync_object =
+			prop->first_available_user_sob[args->dcore_id];
+	sm_info.first_available_monitor =
+			prop->first_available_user_mon[args->dcore_id];
+
+
+	return copy_to_user(out, &sm_info, min_t(size_t, (size_t) max_size,
+			sizeof(sm_info))) ? -EFAULT : 0;
+}
+
 static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 				struct device *dev)
 {
@@ -401,6 +426,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 	case HL_INFO_CLK_THROTTLE_REASON:
 		return clk_throttle_info(hpriv, args);
 
+	case HL_INFO_SYNC_MANAGER:
+		return sync_manager_info(hpriv, args);
+
 	default:
 		dev_err(dev, "Invalid request %d\n", args->op);
 		rc = -ENOTTY;
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index adb5c5594ac1..45ba3a5f5b14 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -367,6 +367,7 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
 static int gaudi_get_fixed_properties(struct hl_device *hdev)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	u32 num_sync_stream_queues = 0;
 	int i;
 
 	prop->max_queues = GAUDI_QUEUE_ID_SIZE;
@@ -383,6 +384,7 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
 			prop->hw_queues_props[i].driver_only = 0;
 			prop->hw_queues_props[i].requires_kernel_cb = 1;
 			prop->hw_queues_props[i].supports_sync_stream = 1;
+			num_sync_stream_queues++;
 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
 			prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
 			prop->hw_queues_props[i].driver_only = 1;
@@ -469,6 +471,11 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
 
 	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
 
+	prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
+			num_sync_stream_queues * HL_RSVD_SOBS;
+	prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
+			num_sync_stream_queues * HL_RSVD_MONS;
+
 	return 0;
 }
 
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index ee13b919db35..ca6dc1fc250e 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -266,6 +266,7 @@ enum hl_device_status {
  * HL_INFO_CS_COUNTERS   - Retrieve command submission counters
  * HL_INFO_PCI_COUNTERS  - Retrieve PCI counters
  * HL_INFO_CLK_THROTTLE_REASON - Retrieve clock throttling reason
+ * HL_INFO_SYNC_MANAGER  - Retrieve sync manager info per dcore
  */
 #define HL_INFO_HW_IP_INFO		0
 #define HL_INFO_HW_EVENTS		1
@@ -280,6 +281,7 @@ enum hl_device_status {
 #define HL_INFO_CS_COUNTERS		11
 #define HL_INFO_PCI_COUNTERS		12
 #define HL_INFO_CLK_THROTTLE_REASON	13
+#define HL_INFO_SYNC_MANAGER		14
 
 #define HL_INFO_VERSION_MAX_LEN	128
 #define HL_INFO_CARD_NAME_MAX_LEN	16
@@ -367,6 +369,16 @@ struct hl_info_clk_throttle {
 	__u32 clk_throttling_reason;
 };
 
+/**
+ * struct hl_info_sync_manager - sync manager information
+ * @first_available_sync_object: first available sob
+ * @first_available_monitor: first available monitor
+ */
+struct hl_info_sync_manager {
+	__u32 first_available_sync_object;
+	__u32 first_available_monitor;
+};
+
 /**
  * struct hl_info_cs_counters - command submission counters
  * @out_of_mem_drop_cnt: dropped due to memory allocation issue
@@ -386,6 +398,13 @@ struct hl_info_cs_counters {
 	struct hl_cs_counters ctx_cs_counters;
 };
 
+enum gaudi_dcores {
+	HL_GAUDI_WS_DCORE,
+	HL_GAUDI_WN_DCORE,
+	HL_GAUDI_EN_DCORE,
+	HL_GAUDI_ES_DCORE
+};
+
 struct hl_info_args {
 	/* Location of relevant struct in userspace */
 	__u64 return_pointer;
@@ -402,6 +421,10 @@ struct hl_info_args {
 	__u32 op;
 
 	union {
+		/* Dcore id for which the information is relevant.
+		 * For Gaudi refer to 'enum gaudi_dcores'
+		 */
+		__u32 dcore_id;
 		/* Context ID - Currently not in use */
 		__u32 ctx_id;
 		/* Period value for utilization rate (100ms - 1000ms, in 100ms
-- 
cgit v1.2.3


From d90416c84d86ff78a2181f135d72d564430107b8 Mon Sep 17 00:00:00 2001
From: farah kassabri <fkassabri@habana.ai>
Date: Wed, 12 Aug 2020 17:20:13 +0300
Subject: habanalabs: extend busy engines mask to 64 bits

change busy engines bitmask to 64 bits in order to represent
more engines, needed for future ASIC support.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/misc/habanalabs/common/habanalabs.h       | 2 +-
 drivers/misc/habanalabs/common/habanalabs_ioctl.c | 2 +-
 drivers/misc/habanalabs/gaudi/gaudi.c             | 2 +-
 drivers/misc/habanalabs/goya/goya.c               | 2 +-
 include/uapi/misc/habanalabs.h                    | 6 ++++++
 5 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 2fd268e4cf10..fbdf105c4bb2 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -754,7 +754,7 @@ struct hl_asic_funcs {
 	void (*set_clock_gating)(struct hl_device *hdev);
 	void (*disable_clock_gating)(struct hl_device *hdev);
 	int (*debug_coresight)(struct hl_device *hdev, void *data);
-	bool (*is_device_idle)(struct hl_device *hdev, u32 *mask,
+	bool (*is_device_idle)(struct hl_device *hdev, u64 *mask,
 				struct seq_file *s);
 	int (*soft_reset_late_init)(struct hl_device *hdev);
 	void (*hw_queues_lock)(struct hl_device *hdev);
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index fe6c5534d378..a94800014243 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -132,7 +132,7 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
 		return -EINVAL;
 
 	hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev,
-					&hw_idle.busy_engines_mask, NULL);
+					&hw_idle.busy_engines_mask_ext, NULL);
 
 	return copy_to_user(out, &hw_idle,
 		min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index a0932261e67c..ba964a316b0b 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -6083,7 +6083,7 @@ static int gaudi_armcp_info_get(struct hl_device *hdev)
 	return 0;
 }
 
-static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask,
+static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
 					struct seq_file *s)
 {
 	struct gaudi_device *gaudi = hdev->asic_specific;
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index ac4d44fa56e4..5fb3565c80c5 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -5148,7 +5148,7 @@ static void goya_disable_clock_gating(struct hl_device *hdev)
 	/* clock gating not supported in Goya */
 }
 
-static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask,
+static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask,
 				struct seq_file *s)
 {
 	const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index ca6dc1fc250e..693081728ef3 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -319,6 +319,12 @@ struct hl_info_hw_idle {
 	 * Bits definition is according to `enum <chip>_enging_id'.
 	 */
 	__u32 busy_engines_mask;
+
+	/*
+	 * Extended Bitmask of busy engines.
+	 * Bits definition is according to `enum <chip>_enging_id'.
+	 */
+	__u64 busy_engines_mask_ext;
 };
 
 struct hl_info_device_status {
-- 
cgit v1.2.3


From 9f3064913e1b9b4153accbd33aaf1983be92c569 Mon Sep 17 00:00:00 2001
From: farah kassabri <fkassabri@habana.ai>
Date: Sun, 9 Aug 2020 16:25:53 +0300
Subject: habanalabs: add support for getting device total energy

Add driver implementation for reading the total energy consumption
from the device ARM FW.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/misc/habanalabs/common/firmware_if.c      | 24 +++++++++++++++++++++++
 drivers/misc/habanalabs/common/habanalabs.h       |  2 ++
 drivers/misc/habanalabs/common/habanalabs_ioctl.c | 24 +++++++++++++++++++++++
 drivers/misc/habanalabs/include/common/armcp_if.h |  1 +
 include/uapi/misc/habanalabs.h                    | 10 ++++++++++
 5 files changed, 61 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 61f5edc96e16..eb66ff532c6a 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -411,6 +411,30 @@ int hl_fw_armcp_pci_counters_get(struct hl_device *hdev,
 	return rc;
 }
 
+int hl_fw_armcp_total_energy_get(struct hl_device *hdev,
+			u64 *total_energy)
+{
+	struct armcp_packet pkt = {};
+	long result;
+	int rc;
+
+	pkt.ctl = cpu_to_le32(ARMCP_PACKET_TOTAL_ENERGY_GET <<
+			ARMCP_PKT_CTL_OPCODE_SHIFT);
+
+	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+					HL_ARMCP_INFO_TIMEOUT_USEC, &result);
+	if (rc) {
+		dev_err(hdev->dev,
+			"Failed to handle ArmCP total energy pkt, error %d\n",
+				rc);
+		return rc;
+	}
+
+	*total_energy = result;
+
+	return rc;
+}
+
 static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg)
 {
 	u32 err_val;
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 5ef487a3b843..6577a73e3227 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1852,6 +1852,8 @@ int hl_fw_armcp_info_get(struct hl_device *hdev);
 int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
 int hl_fw_armcp_pci_counters_get(struct hl_device *hdev,
 		struct hl_info_pci_counters *counters);
+int hl_fw_armcp_total_energy_get(struct hl_device *hdev,
+			u64 *total_energy);
 int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
 			u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
 			u32 boot_err0_reg, bool skip_bmc,
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index a94800014243..18ee14b4b0e1 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -357,6 +357,27 @@ static int sync_manager_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 			sizeof(sm_info))) ? -EFAULT : 0;
 }
 
+static int total_energy_consumption_info(struct hl_fpriv *hpriv,
+			struct hl_info_args *args)
+{
+	struct hl_device *hdev = hpriv->hdev;
+	struct hl_info_energy total_energy = {0};
+	u32 max_size = args->return_size;
+	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+	int rc;
+
+	if ((!max_size) || (!out))
+		return -EINVAL;
+
+	rc = hl_fw_armcp_total_energy_get(hdev,
+			&total_energy.total_energy_consumption);
+	if (rc)
+		return rc;
+
+	return copy_to_user(out, &total_energy,
+		min((size_t) max_size, sizeof(total_energy))) ? -EFAULT : 0;
+}
+
 static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 				struct device *dev)
 {
@@ -429,6 +450,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 	case HL_INFO_SYNC_MANAGER:
 		return sync_manager_info(hpriv, args);
 
+	case HL_INFO_TOTAL_ENERGY:
+		return total_energy_consumption_info(hpriv, args);
+
 	default:
 		dev_err(dev, "Invalid request %d\n", args->op);
 		rc = -ENOTTY;
diff --git a/drivers/misc/habanalabs/include/common/armcp_if.h b/drivers/misc/habanalabs/include/common/armcp_if.h
index 1403c937253c..4d78898524e9 100644
--- a/drivers/misc/habanalabs/include/common/armcp_if.h
+++ b/drivers/misc/habanalabs/include/common/armcp_if.h
@@ -245,6 +245,7 @@ enum armcp_packet_id {
 	ARMCP_PACKET_CURRENT_SET,		/* sysfs */
 	ARMCP_PACKET_PCIE_THROUGHPUT_GET,	/* internal */
 	ARMCP_PACKET_PCIE_REPLAY_CNT_GET,	/* internal */
+	ARMCP_PACKET_TOTAL_ENERGY_GET,		/* internal */
 };
 
 #define ARMCP_PACKET_FENCE_VAL	0xFE8CE7A5
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 693081728ef3..6803991726e8 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -267,6 +267,7 @@ enum hl_device_status {
  * HL_INFO_PCI_COUNTERS  - Retrieve PCI counters
  * HL_INFO_CLK_THROTTLE_REASON - Retrieve clock throttling reason
  * HL_INFO_SYNC_MANAGER  - Retrieve sync manager info per dcore
+ * HL_INFO_TOTAL_ENERGY  - Retrieve total energy consumption
  */
 #define HL_INFO_HW_IP_INFO		0
 #define HL_INFO_HW_EVENTS		1
@@ -282,6 +283,7 @@ enum hl_device_status {
 #define HL_INFO_PCI_COUNTERS		12
 #define HL_INFO_CLK_THROTTLE_REASON	13
 #define HL_INFO_SYNC_MANAGER		14
+#define HL_INFO_TOTAL_ENERGY		15
 
 #define HL_INFO_VERSION_MAX_LEN	128
 #define HL_INFO_CARD_NAME_MAX_LEN	16
@@ -375,6 +377,14 @@ struct hl_info_clk_throttle {
 	__u32 clk_throttling_reason;
 };
 
+/**
+ * struct hl_info_energy - device energy information
+ * @total_energy_consumption: total device energy consumption
+ */
+struct hl_info_energy {
+	__u64 total_energy_consumption;
+};
+
 /**
  * struct hl_info_sync_manager - sync manager information
  * @first_available_sync_object: first available sob
-- 
cgit v1.2.3


From 2f55342c5e4d3ea94c0b8237f3ad26963269f90f Mon Sep 17 00:00:00 2001
From: Oded Gabbay <oded.gabbay@gmail.com>
Date: Sat, 15 Aug 2020 16:28:10 +0300
Subject: habanalabs: replace armcp with the generic cpucp

ArmCP mandates that the device CPU is always an ARM processor, which might
be wrong in the future.

Most of this change is an internal renaming of variables, functions and
defines but there are two entries in sysfs which have armcp in their
names. Add identical cpucp entries but don't remove yet the armcp entries.
Those will be deprecated next year. Add the documentation about it in sysfs
documentation.

Signed-off-by: Moti Haimovski <mhaimovski@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 Documentation/ABI/testing/sysfs-driver-habanalabs |  18 +-
 drivers/misc/habanalabs/common/debugfs.c          |  18 +-
 drivers/misc/habanalabs/common/device.c           |   2 +-
 drivers/misc/habanalabs/common/firmware_if.c      | 127 ++++---
 drivers/misc/habanalabs/common/habanalabs.h       |  20 +-
 drivers/misc/habanalabs/common/habanalabs_ioctl.c |  12 +-
 drivers/misc/habanalabs/common/hwmon.c            |  60 ++--
 drivers/misc/habanalabs/common/irq.c              |   2 +-
 drivers/misc/habanalabs/common/sysfs.c            |  60 ++--
 drivers/misc/habanalabs/gaudi/gaudi.c             |  30 +-
 drivers/misc/habanalabs/gaudi/gaudiP.h            |   4 +-
 drivers/misc/habanalabs/goya/goya.c               |  34 +-
 drivers/misc/habanalabs/goya/goyaP.h              |   2 +-
 drivers/misc/habanalabs/include/common/armcp_if.h | 418 ----------------------
 drivers/misc/habanalabs/include/common/cpucp_if.h | 417 +++++++++++++++++++++
 include/uapi/misc/habanalabs.h                    |   4 +-
 16 files changed, 631 insertions(+), 597 deletions(-)
 delete mode 100644 drivers/misc/habanalabs/include/common/armcp_if.h
 create mode 100644 drivers/misc/habanalabs/include/common/cpucp_if.h

(limited to 'include/uapi')

diff --git a/Documentation/ABI/testing/sysfs-driver-habanalabs b/Documentation/ABI/testing/sysfs-driver-habanalabs
index 1a14bf9b22ba..169ae4b2a180 100644
--- a/Documentation/ABI/testing/sysfs-driver-habanalabs
+++ b/Documentation/ABI/testing/sysfs-driver-habanalabs
@@ -2,13 +2,17 @@ What:           /sys/class/habanalabs/hl<n>/armcp_kernel_ver
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        oded.gabbay@gmail.com
-Description:    Version of the Linux kernel running on the device's CPU
+Description:    Version of the Linux kernel running on the device's CPU.
+                Will be DEPRECATED in Linux kernel version 5.10, and be
+                replaced with cpucp_kernel_ver
 
 What:           /sys/class/habanalabs/hl<n>/armcp_ver
 Date:           Jan 2019
 KernelVersion:  5.1
 Contact:        oded.gabbay@gmail.com
 Description:    Version of the application running on the device's CPU
+                Will be DEPRECATED in Linux kernel version 5.10, and be
+                replaced with cpucp_ver
 
 What:           /sys/class/habanalabs/hl<n>/clk_max_freq_mhz
 Date:           Jun 2019
@@ -33,6 +37,18 @@ KernelVersion:  5.1
 Contact:        oded.gabbay@gmail.com
 Description:    Version of the Device's CPLD F/W
 
+What:           /sys/class/habanalabs/hl<n>/cpucp_kernel_ver
+Date:           Oct 2020
+KernelVersion:  5.10
+Contact:        oded.gabbay@gmail.com
+Description:    Version of the Linux kernel running on the device's CPU
+
+What:           /sys/class/habanalabs/hl<n>/cpucp_ver
+Date:           Oct 2020
+KernelVersion:  5.10
+Contact:        oded.gabbay@gmail.com
+Description:    Version of the application running on the device's CPU
+
 What:           /sys/class/habanalabs/hl<n>/device_type
 Date:           Jan 2019
 KernelVersion:  5.1
diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
index 4b416f64f6ec..c27c0f94c97a 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -21,7 +21,7 @@ static struct dentry *hl_debug_root;
 static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
 				u8 i2c_reg, long *val)
 {
-	struct armcp_packet pkt;
+	struct cpucp_packet pkt;
 	int rc;
 
 	if (hl_device_disabled_or_in_reset(hdev))
@@ -29,8 +29,8 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
 
 	memset(&pkt, 0, sizeof(pkt));
 
-	pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_RD <<
-				ARMCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_I2C_RD <<
+				CPUCP_PKT_CTL_OPCODE_SHIFT);
 	pkt.i2c_bus = i2c_bus;
 	pkt.i2c_addr = i2c_addr;
 	pkt.i2c_reg = i2c_reg;
@@ -47,7 +47,7 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
 static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
 				u8 i2c_reg, u32 val)
 {
-	struct armcp_packet pkt;
+	struct cpucp_packet pkt;
 	int rc;
 
 	if (hl_device_disabled_or_in_reset(hdev))
@@ -55,8 +55,8 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
 
 	memset(&pkt, 0, sizeof(pkt));
 
-	pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_WR <<
-				ARMCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_I2C_WR <<
+				CPUCP_PKT_CTL_OPCODE_SHIFT);
 	pkt.i2c_bus = i2c_bus;
 	pkt.i2c_addr = i2c_addr;
 	pkt.i2c_reg = i2c_reg;
@@ -73,7 +73,7 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
 
 static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state)
 {
-	struct armcp_packet pkt;
+	struct cpucp_packet pkt;
 	int rc;
 
 	if (hl_device_disabled_or_in_reset(hdev))
@@ -81,8 +81,8 @@ static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state)
 
 	memset(&pkt, 0, sizeof(pkt));
 
-	pkt.ctl = cpu_to_le32(ARMCP_PACKET_LED_SET <<
-				ARMCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_LED_SET <<
+				CPUCP_PKT_CTL_OPCODE_SHIFT);
 	pkt.led_index = cpu_to_le32(led);
 	pkt.value = cpu_to_le64(state);
 
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index dcb7f9ca7a67..6e916cc22a4c 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -871,7 +871,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
 			 * so this message won't be sent
 			 */
 			if (hl_fw_send_pci_access_msg(hdev,
-					ARMCP_PACKET_DISABLE_PCI_ACCESS))
+					CPUCP_PACKET_DISABLE_PCI_ACCESS))
 				dev_warn(hdev->dev,
 					"Failed to disable PCI access by F/W\n");
 		}
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index eb66ff532c6a..f2a38e95359a 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -68,9 +68,9 @@ out:
 
 int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
 {
-	struct armcp_packet pkt = {};
+	struct cpucp_packet pkt = {};
 
-	pkt.ctl = cpu_to_le32(opcode << ARMCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.ctl = cpu_to_le32(opcode << CPUCP_PKT_CTL_OPCODE_SHIFT);
 
 	return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt,
 						sizeof(pkt), 0, NULL);
@@ -79,7 +79,7 @@ int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
 int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
 				u16 len, u32 timeout, long *result)
 {
-	struct armcp_packet *pkt;
+	struct cpucp_packet *pkt;
 	dma_addr_t pkt_dma_addr;
 	u32 tmp;
 	int rc = 0;
@@ -111,7 +111,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
 	}
 
 	rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
-				(tmp == ARMCP_PACKET_FENCE_VAL), 1000,
+				(tmp == CPUCP_PACKET_FENCE_VAL), 1000,
 				timeout, true);
 
 	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
@@ -124,12 +124,12 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
 
 	tmp = le32_to_cpu(pkt->ctl);
 
-	rc = (tmp & ARMCP_PKT_CTL_RC_MASK) >> ARMCP_PKT_CTL_RC_SHIFT;
+	rc = (tmp & CPUCP_PKT_CTL_RC_MASK) >> CPUCP_PKT_CTL_RC_SHIFT;
 	if (rc) {
 		dev_err(hdev->dev, "F/W ERROR %d for CPU packet %d\n",
 			rc,
-			(tmp & ARMCP_PKT_CTL_OPCODE_MASK)
-						>> ARMCP_PKT_CTL_OPCODE_SHIFT);
+			(tmp & CPUCP_PKT_CTL_OPCODE_MASK)
+						>> CPUCP_PKT_CTL_OPCODE_SHIFT);
 		rc = -EIO;
 	} else if (result) {
 		*result = (long) le64_to_cpu(pkt->result);
@@ -145,14 +145,14 @@ out:
 
 int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type)
 {
-	struct armcp_packet pkt;
+	struct cpucp_packet pkt;
 	long result;
 	int rc;
 
 	memset(&pkt, 0, sizeof(pkt));
 
-	pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ <<
-				ARMCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ <<
+				CPUCP_PKT_CTL_OPCODE_SHIFT);
 	pkt.value = cpu_to_le64(event_type);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
@@ -167,12 +167,12 @@ int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type)
 int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
 		size_t irq_arr_size)
 {
-	struct armcp_unmask_irq_arr_packet *pkt;
+	struct cpucp_unmask_irq_arr_packet *pkt;
 	size_t total_pkt_size;
 	long result;
 	int rc;
 
-	total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) +
+	total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) +
 			irq_arr_size;
 
 	/* data should be aligned to 8 bytes in order to ArmCP to copy it */
@@ -191,8 +191,8 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
 	pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0]));
 	memcpy(&pkt->irqs, irq_arr, irq_arr_size);
 
-	pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
-						ARMCP_PKT_CTL_OPCODE_SHIFT);
+	pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
+						CPUCP_PKT_CTL_OPCODE_SHIFT);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
 						total_pkt_size, 0, &result);
@@ -207,19 +207,19 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
 
 int hl_fw_test_cpu_queue(struct hl_device *hdev)
 {
-	struct armcp_packet test_pkt = {};
+	struct cpucp_packet test_pkt = {};
 	long result;
 	int rc;
 
-	test_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST <<
-					ARMCP_PKT_CTL_OPCODE_SHIFT);
-	test_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL);
+	test_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST <<
+					CPUCP_PKT_CTL_OPCODE_SHIFT);
+	test_pkt.value = cpu_to_le64(CPUCP_PACKET_FENCE_VAL);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt,
 						sizeof(test_pkt), 0, &result);
 
 	if (!rc) {
-		if (result != ARMCP_PACKET_FENCE_VAL)
+		if (result != CPUCP_PACKET_FENCE_VAL)
 			dev_err(hdev->dev,
 				"CPU queue test failed (0x%08lX)\n", result);
 	} else {
@@ -251,61 +251,61 @@ void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
 
 int hl_fw_send_heartbeat(struct hl_device *hdev)
 {
-	struct armcp_packet hb_pkt = {};
+	struct cpucp_packet hb_pkt = {};
 	long result;
 	int rc;
 
-	hb_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST <<
-					ARMCP_PKT_CTL_OPCODE_SHIFT);
-	hb_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL);
+	hb_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST <<
+					CPUCP_PKT_CTL_OPCODE_SHIFT);
+	hb_pkt.value = cpu_to_le64(CPUCP_PACKET_FENCE_VAL);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt,
 						sizeof(hb_pkt), 0, &result);
 
-	if ((rc) || (result != ARMCP_PACKET_FENCE_VAL))
+	if ((rc) || (result != CPUCP_PACKET_FENCE_VAL))
 		rc = -EIO;
 
 	return rc;
 }
 
-int hl_fw_armcp_info_get(struct hl_device *hdev)
+int hl_fw_cpucp_info_get(struct hl_device *hdev)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
-	struct armcp_packet pkt = {};
-	void *armcp_info_cpu_addr;
-	dma_addr_t armcp_info_dma_addr;
+	struct cpucp_packet pkt = {};
+	void *cpucp_info_cpu_addr;
+	dma_addr_t cpucp_info_dma_addr;
 	long result;
 	int rc;
 
-	armcp_info_cpu_addr =
+	cpucp_info_cpu_addr =
 			hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
-					sizeof(struct armcp_info),
-					&armcp_info_dma_addr);
-	if (!armcp_info_cpu_addr) {
+					sizeof(struct cpucp_info),
+					&cpucp_info_dma_addr);
+	if (!cpucp_info_cpu_addr) {
 		dev_err(hdev->dev,
 			"Failed to allocate DMA memory for ArmCP info packet\n");
 		return -ENOMEM;
 	}
 
-	memset(armcp_info_cpu_addr, 0, sizeof(struct armcp_info));
+	memset(cpucp_info_cpu_addr, 0, sizeof(struct cpucp_info));
 
-	pkt.ctl = cpu_to_le32(ARMCP_PACKET_INFO_GET <<
-				ARMCP_PKT_CTL_OPCODE_SHIFT);
-	pkt.addr = cpu_to_le64(armcp_info_dma_addr);
-	pkt.data_max_size = cpu_to_le32(sizeof(struct armcp_info));
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_INFO_GET <<
+				CPUCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.addr = cpu_to_le64(cpucp_info_dma_addr);
+	pkt.data_max_size = cpu_to_le32(sizeof(struct cpucp_info));
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-					HL_ARMCP_INFO_TIMEOUT_USEC, &result);
+					HL_CPUCP_INFO_TIMEOUT_USEC, &result);
 	if (rc) {
 		dev_err(hdev->dev,
 			"Failed to handle ArmCP info pkt, error %d\n", rc);
 		goto out;
 	}
 
-	memcpy(&prop->armcp_info, armcp_info_cpu_addr,
-			sizeof(prop->armcp_info));
+	memcpy(&prop->cpucp_info, cpucp_info_cpu_addr,
+			sizeof(prop->cpucp_info));
 
-	rc = hl_build_hwmon_channel_info(hdev, prop->armcp_info.sensors);
+	rc = hl_build_hwmon_channel_info(hdev, prop->cpucp_info.sensors);
 	if (rc) {
 		dev_err(hdev->dev,
 			"Failed to build hwmon channel info, error %d\n", rc);
@@ -315,14 +315,14 @@ int hl_fw_armcp_info_get(struct hl_device *hdev)
 
 out:
 	hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
-			sizeof(struct armcp_info), armcp_info_cpu_addr);
+			sizeof(struct cpucp_info), cpucp_info_cpu_addr);
 
 	return rc;
 }
 
 int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
 {
-	struct armcp_packet pkt = {};
+	struct cpucp_packet pkt = {};
 	void *eeprom_info_cpu_addr;
 	dma_addr_t eeprom_info_dma_addr;
 	long result;
@@ -339,13 +339,13 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
 
 	memset(eeprom_info_cpu_addr, 0, max_size);
 
-	pkt.ctl = cpu_to_le32(ARMCP_PACKET_EEPROM_DATA_GET <<
-				ARMCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_EEPROM_DATA_GET <<
+				CPUCP_PKT_CTL_OPCODE_SHIFT);
 	pkt.addr = cpu_to_le64(eeprom_info_dma_addr);
 	pkt.data_max_size = cpu_to_le32(max_size);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-			HL_ARMCP_EEPROM_TIMEOUT_USEC, &result);
+			HL_CPUCP_EEPROM_TIMEOUT_USEC, &result);
 
 	if (rc) {
 		dev_err(hdev->dev,
@@ -363,20 +363,20 @@ out:
 	return rc;
 }
 
-int hl_fw_armcp_pci_counters_get(struct hl_device *hdev,
+int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
 		struct hl_info_pci_counters *counters)
 {
-	struct armcp_packet pkt = {};
+	struct cpucp_packet pkt = {};
 	long result;
 	int rc;
 
-	pkt.ctl = cpu_to_le32(ARMCP_PACKET_PCIE_THROUGHPUT_GET <<
-			ARMCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET <<
+			CPUCP_PKT_CTL_OPCODE_SHIFT);
 
 	/* Fetch PCI rx counter */
-	pkt.index = cpu_to_le32(armcp_pcie_throughput_rx);
+	pkt.index = cpu_to_le32(cpucp_pcie_throughput_rx);
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-					HL_ARMCP_INFO_TIMEOUT_USEC, &result);
+					HL_CPUCP_INFO_TIMEOUT_USEC, &result);
 	if (rc) {
 		dev_err(hdev->dev,
 			"Failed to handle ArmCP PCI info pkt, error %d\n", rc);
@@ -385,9 +385,9 @@ int hl_fw_armcp_pci_counters_get(struct hl_device *hdev,
 	counters->rx_throughput = result;
 
 	/* Fetch PCI tx counter */
-	pkt.index = cpu_to_le32(armcp_pcie_throughput_tx);
+	pkt.index = cpu_to_le32(cpucp_pcie_throughput_tx);
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-					HL_ARMCP_INFO_TIMEOUT_USEC, &result);
+					HL_CPUCP_INFO_TIMEOUT_USEC, &result);
 	if (rc) {
 		dev_err(hdev->dev,
 			"Failed to handle ArmCP PCI info pkt, error %d\n", rc);
@@ -396,11 +396,11 @@ int hl_fw_armcp_pci_counters_get(struct hl_device *hdev,
 	counters->tx_throughput = result;
 
 	/* Fetch PCI replay counter */
-	pkt.ctl = cpu_to_le32(ARMCP_PACKET_PCIE_REPLAY_CNT_GET <<
-			ARMCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_REPLAY_CNT_GET <<
+			CPUCP_PKT_CTL_OPCODE_SHIFT);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-			HL_ARMCP_INFO_TIMEOUT_USEC, &result);
+			HL_CPUCP_INFO_TIMEOUT_USEC, &result);
 	if (rc) {
 		dev_err(hdev->dev,
 			"Failed to handle ArmCP PCI info pkt, error %d\n", rc);
@@ -411,21 +411,20 @@ int hl_fw_armcp_pci_counters_get(struct hl_device *hdev,
 	return rc;
 }
 
-int hl_fw_armcp_total_energy_get(struct hl_device *hdev,
-			u64 *total_energy)
+int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy)
 {
-	struct armcp_packet pkt = {};
+	struct cpucp_packet pkt = {};
 	long result;
 	int rc;
 
-	pkt.ctl = cpu_to_le32(ARMCP_PACKET_TOTAL_ENERGY_GET <<
-			ARMCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_TOTAL_ENERGY_GET <<
+				CPUCP_PKT_CTL_OPCODE_SHIFT);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-					HL_ARMCP_INFO_TIMEOUT_USEC, &result);
+					HL_CPUCP_INFO_TIMEOUT_USEC, &result);
 	if (rc) {
 		dev_err(hdev->dev,
-			"Failed to handle ArmCP total energy pkt, error %d\n",
+			"Failed to handle CpuCP total energy pkt, error %d\n",
 				rc);
 		return rc;
 	}
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 6577a73e3227..6912f88a4b01 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -8,7 +8,7 @@
 #ifndef HABANALABSP_H_
 #define HABANALABSP_H_
 
-#include "../include/common/armcp_if.h"
+#include "../include/common/cpucp_if.h"
 #include "../include/common/qman_if.h"
 #include <uapi/misc/habanalabs.h>
 
@@ -34,8 +34,8 @@
 
 #define HL_PLL_LOW_JOB_FREQ_USEC	5000000 /* 5 s */
 
-#define HL_ARMCP_INFO_TIMEOUT_USEC	10000000 /* 10s */
-#define HL_ARMCP_EEPROM_TIMEOUT_USEC	10000000 /* 10s */
+#define HL_CPUCP_INFO_TIMEOUT_USEC	10000000 /* 10s */
+#define HL_CPUCP_EEPROM_TIMEOUT_USEC	10000000 /* 10s */
 
 #define HL_PCI_ELBI_TIMEOUT_MSEC	10 /* 10ms */
 
@@ -250,7 +250,7 @@ struct hl_mmu_properties {
 /**
  * struct asic_fixed_properties - ASIC specific immutable properties.
  * @hw_queues_props: H/W queues properties.
- * @armcp_info: received various information from ArmCP regarding the H/W, e.g.
+ * @cpucp_info: received various information from CPU-CP regarding the H/W, e.g.
  *		available sensors.
  * @uboot_ver: F/W U-boot version.
  * @preboot_ver: F/W Preboot version.
@@ -301,7 +301,7 @@ struct hl_mmu_properties {
  */
 struct asic_fixed_properties {
 	struct hw_queue_properties	*hw_queues_props;
-	struct armcp_info		armcp_info;
+	struct cpucp_info		cpucp_info;
 	char				uboot_ver[VERSION_MAX_LEN];
 	char				preboot_ver[VERSION_MAX_LEN];
 	struct hl_mmu_properties	dmmu;
@@ -1588,7 +1588,7 @@ struct hl_device {
 	u64				clock_gating_mask;
 	atomic_t			in_reset;
 	enum hl_pll_frequency		curr_pll_profile;
-	enum armcp_card_types		card_type;
+	enum cpucp_card_types		card_type;
 	int				cs_active_cnt;
 	u32				major;
 	u32				high_pll;
@@ -1776,7 +1776,7 @@ int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq);
 uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms);
 
 int hl_build_hwmon_channel_info(struct hl_device *hdev,
-		struct armcp_sensor *sensors_arr);
+		struct cpucp_sensor *sensors_arr);
 
 int hl_sysfs_init(struct hl_device *hdev);
 void hl_sysfs_fini(struct hl_device *hdev);
@@ -1848,11 +1848,11 @@ void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
 void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
 					void *vaddr);
 int hl_fw_send_heartbeat(struct hl_device *hdev);
-int hl_fw_armcp_info_get(struct hl_device *hdev);
+int hl_fw_cpucp_info_get(struct hl_device *hdev);
 int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
-int hl_fw_armcp_pci_counters_get(struct hl_device *hdev,
+int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
 		struct hl_info_pci_counters *counters);
-int hl_fw_armcp_total_energy_get(struct hl_device *hdev,
+int hl_fw_cpucp_total_energy_get(struct hl_device *hdev,
 			u64 *total_energy);
 int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
 			u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 18ee14b4b0e1..07317ea49129 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -65,14 +65,14 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
 		hw_ip.dram_enabled = 1;
 	hw_ip.num_of_events = prop->num_of_events;
 
-	memcpy(hw_ip.armcp_version, prop->armcp_info.armcp_version,
+	memcpy(hw_ip.cpucp_version, prop->cpucp_info.cpucp_version,
 		min(VERSION_MAX_LEN, HL_INFO_VERSION_MAX_LEN));
 
-	memcpy(hw_ip.card_name, prop->armcp_info.card_name,
+	memcpy(hw_ip.card_name, prop->cpucp_info.card_name,
 		min(CARD_NAME_MAX_LEN, HL_INFO_CARD_NAME_MAX_LEN));
 
-	hw_ip.armcp_cpld_version = le32_to_cpu(prop->armcp_info.cpld_version);
-	hw_ip.module_id = le32_to_cpu(prop->armcp_info.card_location);
+	hw_ip.cpld_version = le32_to_cpu(prop->cpucp_info.cpld_version);
+	hw_ip.module_id = le32_to_cpu(prop->cpucp_info.card_location);
 
 	hw_ip.psoc_pci_pll_nr = prop->psoc_pci_pll_nr;
 	hw_ip.psoc_pci_pll_nf = prop->psoc_pci_pll_nf;
@@ -288,7 +288,7 @@ static int pci_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 	if ((!max_size) || (!out))
 		return -EINVAL;
 
-	rc = hl_fw_armcp_pci_counters_get(hdev, &pci_counters);
+	rc = hl_fw_cpucp_pci_counters_get(hdev, &pci_counters);
 	if (rc)
 		return rc;
 
@@ -369,7 +369,7 @@ static int total_energy_consumption_info(struct hl_fpriv *hpriv,
 	if ((!max_size) || (!out))
 		return -EINVAL;
 
-	rc = hl_fw_armcp_total_energy_get(hdev,
+	rc = hl_fw_cpucp_total_energy_get(hdev,
 			&total_energy.total_energy_consumption);
 	if (rc)
 		return rc;
diff --git a/drivers/misc/habanalabs/common/hwmon.c b/drivers/misc/habanalabs/common/hwmon.c
index b997336fa75f..2ac29cb2fe61 100644
--- a/drivers/misc/habanalabs/common/hwmon.c
+++ b/drivers/misc/habanalabs/common/hwmon.c
@@ -13,7 +13,7 @@
 #define HWMON_NR_SENSOR_TYPES		(hwmon_pwm + 1)
 
 int hl_build_hwmon_channel_info(struct hl_device *hdev,
-				struct armcp_sensor *sensors_arr)
+				struct cpucp_sensor *sensors_arr)
 {
 	u32 counts[HWMON_NR_SENSOR_TYPES] = {0};
 	u32 *sensors_by_type[HWMON_NR_SENSOR_TYPES] = {NULL};
@@ -24,7 +24,7 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev,
 	enum hwmon_sensor_types type;
 	int rc, i, j;
 
-	for (i = 0 ; i < ARMCP_MAX_SENSORS ; i++) {
+	for (i = 0 ; i < CPUCP_MAX_SENSORS ; i++) {
 		type = le32_to_cpu(sensors_arr[i].type);
 
 		if ((type == 0) && (sensors_arr[i].flags == 0))
@@ -311,13 +311,13 @@ static const struct hwmon_ops hl_hwmon_ops = {
 int hl_get_temperature(struct hl_device *hdev,
 			int sensor_index, u32 attr, long *value)
 {
-	struct armcp_packet pkt;
+	struct cpucp_packet pkt;
 	int rc;
 
 	memset(&pkt, 0, sizeof(pkt));
 
-	pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_GET <<
-				ARMCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEMPERATURE_GET <<
+				CPUCP_PKT_CTL_OPCODE_SHIFT);
 	pkt.sensor_index = __cpu_to_le16(sensor_index);
 	pkt.type = __cpu_to_le16(attr);
 
@@ -337,13 +337,13 @@ int hl_get_temperature(struct hl_device *hdev,
 int hl_set_temperature(struct hl_device *hdev,
 			int sensor_index, u32 attr, long value)
 {
-	struct armcp_packet pkt;
+	struct cpucp_packet pkt;
 	int rc;
 
 	memset(&pkt, 0, sizeof(pkt));
 
-	pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_SET <<
-				ARMCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEMPERATURE_SET <<
+				CPUCP_PKT_CTL_OPCODE_SHIFT);
 	pkt.sensor_index = __cpu_to_le16(sensor_index);
 	pkt.type = __cpu_to_le16(attr);
 	pkt.value = __cpu_to_le64(value);
@@ -362,13 +362,13 @@ int hl_set_temperature(struct hl_device *hdev,
 int hl_get_voltage(struct hl_device *hdev,
 			int sensor_index, u32 attr, long *value)
 {
-	struct armcp_packet pkt;
+	struct cpucp_packet pkt;
 	int rc;
 
 	memset(&pkt, 0, sizeof(pkt));
 
-	pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_GET <<
-				ARMCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_VOLTAGE_GET <<
+				CPUCP_PKT_CTL_OPCODE_SHIFT);
 	pkt.sensor_index = __cpu_to_le16(sensor_index);
 	pkt.type = __cpu_to_le16(attr);
 
@@ -388,13 +388,13 @@ int hl_get_voltage(struct hl_device *hdev,
 int hl_get_current(struct hl_device *hdev,
 			int sensor_index, u32 attr, long *value)
 {
-	struct armcp_packet pkt;
+	struct cpucp_packet pkt;
 	int rc;
 
 	memset(&pkt, 0, sizeof(pkt));
 
-	pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_GET <<
-				ARMCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_CURRENT_GET <<
+				CPUCP_PKT_CTL_OPCODE_SHIFT);
 	pkt.sensor_index = __cpu_to_le16(sensor_index);
 	pkt.type = __cpu_to_le16(attr);
 
@@ -414,13 +414,13 @@ int hl_get_current(struct hl_device *hdev,
 int hl_get_fan_speed(struct hl_device *hdev,
 			int sensor_index, u32 attr, long *value)
 {
-	struct armcp_packet pkt;
+	struct cpucp_packet pkt;
 	int rc;
 
 	memset(&pkt, 0, sizeof(pkt));
 
-	pkt.ctl = cpu_to_le32(ARMCP_PACKET_FAN_SPEED_GET <<
-				ARMCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_FAN_SPEED_GET <<
+				CPUCP_PKT_CTL_OPCODE_SHIFT);
 	pkt.sensor_index = __cpu_to_le16(sensor_index);
 	pkt.type = __cpu_to_le16(attr);
 
@@ -440,13 +440,13 @@ int hl_get_fan_speed(struct hl_device *hdev,
 int hl_get_pwm_info(struct hl_device *hdev,
 			int sensor_index, u32 attr, long *value)
 {
-	struct armcp_packet pkt;
+	struct cpucp_packet pkt;
 	int rc;
 
 	memset(&pkt, 0, sizeof(pkt));
 
-	pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_GET <<
-				ARMCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_PWM_GET <<
+				CPUCP_PKT_CTL_OPCODE_SHIFT);
 	pkt.sensor_index = __cpu_to_le16(sensor_index);
 	pkt.type = __cpu_to_le16(attr);
 
@@ -466,13 +466,13 @@ int hl_get_pwm_info(struct hl_device *hdev,
 void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
 			long value)
 {
-	struct armcp_packet pkt;
+	struct cpucp_packet pkt;
 	int rc;
 
 	memset(&pkt, 0, sizeof(pkt));
 
-	pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_SET <<
-				ARMCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_PWM_SET <<
+				CPUCP_PKT_CTL_OPCODE_SHIFT);
 	pkt.sensor_index = __cpu_to_le16(sensor_index);
 	pkt.type = __cpu_to_le16(attr);
 	pkt.value = cpu_to_le64(value);
@@ -489,13 +489,13 @@ void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
 int hl_set_voltage(struct hl_device *hdev,
 			int sensor_index, u32 attr, long value)
 {
-	struct armcp_packet pkt;
+	struct cpucp_packet pkt;
 	int rc;
 
 	memset(&pkt, 0, sizeof(pkt));
 
-	pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_SET <<
-				ARMCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_VOLTAGE_SET <<
+				CPUCP_PKT_CTL_OPCODE_SHIFT);
 	pkt.sensor_index = __cpu_to_le16(sensor_index);
 	pkt.type = __cpu_to_le16(attr);
 	pkt.value = __cpu_to_le64(value);
@@ -514,13 +514,13 @@ int hl_set_voltage(struct hl_device *hdev,
 int hl_set_current(struct hl_device *hdev,
 			int sensor_index, u32 attr, long value)
 {
-	struct armcp_packet pkt;
+	struct cpucp_packet pkt;
 	int rc;
 
 	memset(&pkt, 0, sizeof(pkt));
 
-	pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_SET <<
-				ARMCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_CURRENT_SET <<
+				CPUCP_PKT_CTL_OPCODE_SHIFT);
 	pkt.sensor_index = __cpu_to_le16(sensor_index);
 	pkt.type = __cpu_to_le16(attr);
 	pkt.value = __cpu_to_le64(value);
@@ -549,7 +549,7 @@ int hl_hwmon_init(struct hl_device *hdev)
 		hdev->hl_chip_info->ops = &hl_hwmon_ops;
 
 		hdev->hwmon_dev = hwmon_device_register_with_info(dev,
-					prop->armcp_info.card_name, hdev,
+					prop->cpucp_info.card_name, hdev,
 					hdev->hl_chip_info, NULL);
 		if (IS_ERR(hdev->hwmon_dev)) {
 			rc = PTR_ERR(hdev->hwmon_dev);
diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c
index c8db717023f5..d20e40a53d70 100644
--- a/drivers/misc/habanalabs/common/irq.c
+++ b/drivers/misc/habanalabs/common/irq.c
@@ -11,7 +11,7 @@
 
 /**
  * struct hl_eqe_work - This structure is used to schedule work of EQ
- *                      entry and armcp_reset event
+ *                      entry and cpucp_reset event
  *
  * @eq_work:          workqueue object to run when EQ entry is received
  * @hdev:             pointer to device structure
diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c
index 5ae484cc84cd..3ceae87016b1 100644
--- a/drivers/misc/habanalabs/common/sysfs.c
+++ b/drivers/misc/habanalabs/common/sysfs.c
@@ -11,18 +11,18 @@
 
 long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
 {
-	struct armcp_packet pkt;
+	struct cpucp_packet pkt;
 	long result;
 	int rc;
 
 	memset(&pkt, 0, sizeof(pkt));
 
 	if (curr)
-		pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_CURR_GET <<
-						ARMCP_PKT_CTL_OPCODE_SHIFT);
+		pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_CURR_GET <<
+						CPUCP_PKT_CTL_OPCODE_SHIFT);
 	else
-		pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_GET <<
-						ARMCP_PKT_CTL_OPCODE_SHIFT);
+		pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_GET <<
+						CPUCP_PKT_CTL_OPCODE_SHIFT);
 	pkt.pll_index = cpu_to_le32(pll_index);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
@@ -40,13 +40,13 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
 
 void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
 {
-	struct armcp_packet pkt;
+	struct cpucp_packet pkt;
 	int rc;
 
 	memset(&pkt, 0, sizeof(pkt));
 
-	pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_SET <<
-					ARMCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_SET <<
+					CPUCP_PKT_CTL_OPCODE_SHIFT);
 	pkt.pll_index = cpu_to_le32(pll_index);
 	pkt.value = cpu_to_le64(freq);
 
@@ -61,14 +61,14 @@ void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
 
 u64 hl_get_max_power(struct hl_device *hdev)
 {
-	struct armcp_packet pkt;
+	struct cpucp_packet pkt;
 	long result;
 	int rc;
 
 	memset(&pkt, 0, sizeof(pkt));
 
-	pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_GET <<
-				ARMCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_GET <<
+				CPUCP_PKT_CTL_OPCODE_SHIFT);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
 						0, &result);
@@ -83,13 +83,13 @@ u64 hl_get_max_power(struct hl_device *hdev)
 
 void hl_set_max_power(struct hl_device *hdev)
 {
-	struct armcp_packet pkt;
+	struct cpucp_packet pkt;
 	int rc;
 
 	memset(&pkt, 0, sizeof(pkt));
 
-	pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_SET <<
-				ARMCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_SET <<
+				CPUCP_PKT_CTL_OPCODE_SHIFT);
 	pkt.value = cpu_to_le64(hdev->max_power);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
@@ -112,7 +112,7 @@ static ssize_t armcp_kernel_ver_show(struct device *dev,
 {
 	struct hl_device *hdev = dev_get_drvdata(dev);
 
-	return sprintf(buf, "%s", hdev->asic_prop.armcp_info.kernel_version);
+	return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.kernel_version);
 }
 
 static ssize_t armcp_ver_show(struct device *dev, struct device_attribute *attr,
@@ -120,7 +120,7 @@ static ssize_t armcp_ver_show(struct device *dev, struct device_attribute *attr,
 {
 	struct hl_device *hdev = dev_get_drvdata(dev);
 
-	return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.armcp_version);
+	return sprintf(buf, "%s\n", hdev->asic_prop.cpucp_info.cpucp_version);
 }
 
 static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr,
@@ -129,7 +129,23 @@ static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr,
 	struct hl_device *hdev = dev_get_drvdata(dev);
 
 	return sprintf(buf, "0x%08x\n",
-			hdev->asic_prop.armcp_info.cpld_version);
+			hdev->asic_prop.cpucp_info.cpld_version);
+}
+
+static ssize_t cpucp_kernel_ver_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct hl_device *hdev = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.kernel_version);
+}
+
+static ssize_t cpucp_ver_show(struct device *dev, struct device_attribute *attr,
+				char *buf)
+{
+	struct hl_device *hdev = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%s\n", hdev->asic_prop.cpucp_info.cpucp_version);
 }
 
 static ssize_t infineon_ver_show(struct device *dev,
@@ -138,7 +154,7 @@ static ssize_t infineon_ver_show(struct device *dev,
 	struct hl_device *hdev = dev_get_drvdata(dev);
 
 	return sprintf(buf, "0x%04x\n",
-			hdev->asic_prop.armcp_info.infineon_version);
+			hdev->asic_prop.cpucp_info.infineon_version);
 }
 
 static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr,
@@ -146,7 +162,7 @@ static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr,
 {
 	struct hl_device *hdev = dev_get_drvdata(dev);
 
-	return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.fuse_version);
+	return sprintf(buf, "%s\n", hdev->asic_prop.cpucp_info.fuse_version);
 }
 
 static ssize_t thermal_ver_show(struct device *dev,
@@ -154,7 +170,7 @@ static ssize_t thermal_ver_show(struct device *dev,
 {
 	struct hl_device *hdev = dev_get_drvdata(dev);
 
-	return sprintf(buf, "%s", hdev->asic_prop.armcp_info.thermal_version);
+	return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.thermal_version);
 }
 
 static ssize_t preboot_btl_ver_show(struct device *dev,
@@ -356,6 +372,8 @@ out:
 static DEVICE_ATTR_RO(armcp_kernel_ver);
 static DEVICE_ATTR_RO(armcp_ver);
 static DEVICE_ATTR_RO(cpld_ver);
+static DEVICE_ATTR_RO(cpucp_kernel_ver);
+static DEVICE_ATTR_RO(cpucp_ver);
 static DEVICE_ATTR_RO(device_type);
 static DEVICE_ATTR_RO(fuse_ver);
 static DEVICE_ATTR_WO(hard_reset);
@@ -380,6 +398,8 @@ static struct attribute *hl_dev_attrs[] = {
 	&dev_attr_armcp_kernel_ver.attr,
 	&dev_attr_armcp_ver.attr,
 	&dev_attr_cpld_ver.attr,
+	&dev_attr_cpucp_kernel_ver.attr,
+	&dev_attr_cpucp_ver.attr,
 	&dev_attr_device_type.attr,
 	&dev_attr_fuse_ver.attr,
 	&dev_attr_hard_reset.attr,
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 4a4327d9cbbf..076a7697f85d 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -359,7 +359,7 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
 				u32 tpc_id);
 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
-static int gaudi_armcp_info_get(struct hl_device *hdev);
+static int gaudi_cpucp_info_get(struct hl_device *hdev);
 static void gaudi_disable_clock_gating(struct hl_device *hdev);
 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
 
@@ -465,7 +465,7 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
 	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
 
-	strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
+	strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
 					CARD_NAME_MAX_LEN);
 
 	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
@@ -786,13 +786,13 @@ static int gaudi_late_init(struct hl_device *hdev)
 	struct gaudi_device *gaudi = hdev->asic_specific;
 	int rc;
 
-	rc = gaudi->armcp_info_get(hdev);
+	rc = gaudi->cpucp_info_get(hdev);
 	if (rc) {
-		dev_err(hdev->dev, "Failed to get armcp info\n");
+		dev_err(hdev->dev, "Failed to get cpucp info\n");
 		return rc;
 	}
 
-	rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
+	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
 	if (rc) {
 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
 		return rc;
@@ -817,7 +817,7 @@ static int gaudi_late_init(struct hl_device *hdev)
 	return 0;
 
 disable_pci_access:
-	hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
+	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
 
 	return rc;
 }
@@ -987,7 +987,7 @@ static int gaudi_sw_init(struct hl_device *hdev)
 		}
 	}
 
-	gaudi->armcp_info_get = gaudi_armcp_info_get;
+	gaudi->cpucp_info_get = gaudi_cpucp_info_get;
 
 	gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
 
@@ -3078,7 +3078,7 @@ static int gaudi_suspend(struct hl_device *hdev)
 {
 	int rc;
 
-	rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
+	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
 	if (rc)
 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
 
@@ -6053,7 +6053,7 @@ static int gaudi_send_heartbeat(struct hl_device *hdev)
 	return hl_fw_send_heartbeat(hdev);
 }
 
-static int gaudi_armcp_info_get(struct hl_device *hdev)
+static int gaudi_cpucp_info_get(struct hl_device *hdev)
 {
 	struct gaudi_device *gaudi = hdev->asic_specific;
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -6062,19 +6062,19 @@ static int gaudi_armcp_info_get(struct hl_device *hdev)
 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
 		return 0;
 
-	rc = hl_fw_armcp_info_get(hdev);
+	rc = hl_fw_cpucp_info_get(hdev);
 	if (rc)
 		return rc;
 
-	if (!strlen(prop->armcp_info.card_name))
-		strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
+	if (!strlen(prop->cpucp_info.card_name))
+		strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
 				CARD_NAME_MAX_LEN);
 
-	hdev->card_type = le32_to_cpu(hdev->asic_prop.armcp_info.card_type);
+	hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
 
-	if (hdev->card_type == armcp_card_type_pci)
+	if (hdev->card_type == cpucp_card_type_pci)
 		prop->max_power_default = MAX_POWER_DEFAULT_PCI;
-	else if (hdev->card_type == armcp_card_type_pmc)
+	else if (hdev->card_type == cpucp_card_type_pmc)
 		prop->max_power_default = MAX_POWER_DEFAULT_PMC;
 
 	hdev->max_power = prop->max_power_default;
diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h
index 1368f6298c80..b86eb98b145c 100644
--- a/drivers/misc/habanalabs/gaudi/gaudiP.h
+++ b/drivers/misc/habanalabs/gaudi/gaudiP.h
@@ -216,7 +216,7 @@ struct gaudi_internal_qman_info {
 
 /**
  * struct gaudi_device - ASIC specific manage structure.
- * @armcp_info_get: get information on device from ArmCP
+ * @cpucp_info_get: get information on device from CPU-CP
  * @hw_queues_lock: protects the H/W queues from concurrent access.
  * @clk_gate_mutex: protects code areas that require clock gating to be disabled
  *                  temporarily
@@ -239,7 +239,7 @@ struct gaudi_internal_qman_info {
  *                    8-bit value so use u8.
  */
 struct gaudi_device {
-	int (*armcp_info_get)(struct hl_device *hdev);
+	int (*cpucp_info_get)(struct hl_device *hdev);
 
 	/* TODO: remove hw_queues_lock after moving to scheduler code */
 	spinlock_t			hw_queues_lock;
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 5fb3565c80c5..c41f2917863b 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -449,7 +449,7 @@ int goya_get_fixed_properties(struct hl_device *hdev)
 	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
 
-	strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME,
+	strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
 		CARD_NAME_MAX_LEN);
 
 	prop->max_pending_cs = GOYA_MAX_PENDING_CS;
@@ -727,9 +727,9 @@ int goya_late_init(struct hl_device *hdev)
 	if (rc)
 		return rc;
 
-	rc = goya_armcp_info_get(hdev);
+	rc = goya_cpucp_info_get(hdev);
 	if (rc) {
-		dev_err(hdev->dev, "Failed to get armcp info %d\n", rc);
+		dev_err(hdev->dev, "Failed to get cpucp info %d\n", rc);
 		return rc;
 	}
 
@@ -739,7 +739,7 @@ int goya_late_init(struct hl_device *hdev)
 	 */
 	WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size));
 
-	rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
+	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
 	if (rc) {
 		dev_err(hdev->dev,
 			"Failed to enable PCI access from CPU %d\n", rc);
@@ -2648,7 +2648,7 @@ int goya_suspend(struct hl_device *hdev)
 {
 	int rc;
 
-	rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
+	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
 	if (rc)
 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
 
@@ -4500,14 +4500,14 @@ static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
 static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
 		size_t irq_arr_size)
 {
-	struct armcp_unmask_irq_arr_packet *pkt;
+	struct cpucp_unmask_irq_arr_packet *pkt;
 	size_t total_pkt_size;
 	long result;
 	int rc;
 	int irq_num_entries, irq_arr_index;
 	__le32 *goya_irq_arr;
 
-	total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) +
+	total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) +
 			irq_arr_size;
 
 	/* data should be aligned to 8 bytes in order to ArmCP to copy it */
@@ -4534,8 +4534,8 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
 		goya_irq_arr[irq_arr_index] =
 				cpu_to_le32(irq_arr[irq_arr_index]);
 
-	pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
-						ARMCP_PKT_CTL_OPCODE_SHIFT);
+	pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
+						CPUCP_PKT_CTL_OPCODE_SHIFT);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
 						total_pkt_size,	0, &result);
@@ -4560,14 +4560,14 @@ static int goya_soft_reset_late_init(struct hl_device *hdev)
 
 static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
 {
-	struct armcp_packet pkt;
+	struct cpucp_packet pkt;
 	long result;
 	int rc;
 
 	memset(&pkt, 0, sizeof(pkt));
 
-	pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ <<
-				ARMCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ <<
+				CPUCP_PKT_CTL_OPCODE_SHIFT);
 	pkt.value = cpu_to_le64(event_type);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
@@ -5103,7 +5103,7 @@ int goya_send_heartbeat(struct hl_device *hdev)
 	return hl_fw_send_heartbeat(hdev);
 }
 
-int goya_armcp_info_get(struct hl_device *hdev)
+int goya_cpucp_info_get(struct hl_device *hdev)
 {
 	struct goya_device *goya = hdev->asic_specific;
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -5113,11 +5113,11 @@ int goya_armcp_info_get(struct hl_device *hdev)
 	if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
 		return 0;
 
-	rc = hl_fw_armcp_info_get(hdev);
+	rc = hl_fw_cpucp_info_get(hdev);
 	if (rc)
 		return rc;
 
-	dram_size = le64_to_cpu(prop->armcp_info.dram_size);
+	dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
 	if (dram_size) {
 		if ((!is_power_of_2(dram_size)) ||
 				(dram_size < DRAM_PHYS_DEFAULT_SIZE)) {
@@ -5131,8 +5131,8 @@ int goya_armcp_info_get(struct hl_device *hdev)
 		prop->dram_end_address = prop->dram_base_address + dram_size;
 	}
 
-	if (!strlen(prop->armcp_info.card_name))
-		strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME,
+	if (!strlen(prop->cpucp_info.card_name))
+		strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
 				CARD_NAME_MAX_LEN);
 
 	return 0;
diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h
index bb7474ee9784..09b4006d4dc3 100644
--- a/drivers/misc/habanalabs/goya/goyaP.h
+++ b/drivers/misc/habanalabs/goya/goyaP.h
@@ -207,7 +207,7 @@ void goya_set_max_power(struct hl_device *hdev, u64 value);
 void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
 void goya_add_device_attr(struct hl_device *hdev,
 			struct attribute_group *dev_attr_grp);
-int goya_armcp_info_get(struct hl_device *hdev);
+int goya_cpucp_info_get(struct hl_device *hdev);
 int goya_debug_coresight(struct hl_device *hdev, void *data);
 void goya_halt_coresight(struct hl_device *hdev);
 
diff --git a/drivers/misc/habanalabs/include/common/armcp_if.h b/drivers/misc/habanalabs/include/common/armcp_if.h
deleted file mode 100644
index 4d78898524e9..000000000000
--- a/drivers/misc/habanalabs/include/common/armcp_if.h
+++ /dev/null
@@ -1,418 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * Copyright 2016-2020 HabanaLabs, Ltd.
- * All Rights Reserved.
- *
- */
-
-#ifndef ARMCP_IF_H
-#define ARMCP_IF_H
-
-#include <linux/types.h>
-
-/*
- * EVENT QUEUE
- */
-
-struct hl_eq_header {
-	__le32 reserved;
-	__le32 ctl;
-};
-
-struct hl_eq_ecc_data {
-	__le64 ecc_address;
-	__le64 ecc_syndrom;
-	__u8 memory_wrapper_idx;
-	__u8 pad[7];
-};
-
-struct hl_eq_entry {
-	struct hl_eq_header hdr;
-	union {
-		struct hl_eq_ecc_data ecc_data;
-		__le64 data[7];
-	};
-};
-
-#define HL_EQ_ENTRY_SIZE		sizeof(struct hl_eq_entry)
-
-#define EQ_CTL_READY_SHIFT		31
-#define EQ_CTL_READY_MASK		0x80000000
-
-#define EQ_CTL_EVENT_TYPE_SHIFT		16
-#define EQ_CTL_EVENT_TYPE_MASK		0x03FF0000
-
-enum pq_init_status {
-	PQ_INIT_STATUS_NA = 0,
-	PQ_INIT_STATUS_READY_FOR_CP,
-	PQ_INIT_STATUS_READY_FOR_HOST,
-	PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI
-};
-
-/*
- * ArmCP Primary Queue Packets
- *
- * During normal operation, the host's kernel driver needs to send various
- * messages to ArmCP, usually either to SET some value into a H/W periphery or
- * to GET the current value of some H/W periphery. For example, SET the
- * frequency of MME/TPC and GET the value of the thermal sensor.
- *
- * These messages can be initiated either by the User application or by the
- * host's driver itself, e.g. power management code. In either case, the
- * communication from the host's driver to ArmCP will *always* be in
- * synchronous mode, meaning that the host will send a single message and poll
- * until the message was acknowledged and the results are ready (if results are
- * needed).
- *
- * This means that only a single message can be sent at a time and the host's
- * driver must wait for its result before sending the next message. Having said
- * that, because these are control messages which are sent in a relatively low
- * frequency, this limitation seems acceptable. It's important to note that
- * in case of multiple devices, messages to different devices *can* be sent
- * at the same time.
- *
- * The message, inputs/outputs (if relevant) and fence object will be located
- * on the device DDR at an address that will be determined by the host's driver.
- * During device initialization phase, the host will pass to ArmCP that address.
- * Most of the message types will contain inputs/outputs inside the message
- * itself. The common part of each message will contain the opcode of the
- * message (its type) and a field representing a fence object.
- *
- * When the host's driver wishes to send a message to ArmCP, it will write the
- * message contents to the device DDR, clear the fence object and then write the
- * value 484 to the mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR register to issue
- * the 484 interrupt-id to the ARM core.
- *
- * Upon receiving the 484 interrupt-id, ArmCP will read the message from the
- * DDR. In case the message is a SET operation, ArmCP will first perform the
- * operation and then write to the fence object on the device DDR. In case the
- * message is a GET operation, ArmCP will first fill the results section on the
- * device DDR and then write to the fence object. If an error occurred, ArmCP
- * will fill the rc field with the right error code.
- *
- * In the meantime, the host's driver will poll on the fence object. Once the
- * host sees that the fence object is signaled, it will read the results from
- * the device DDR (if relevant) and resume the code execution in the host's
- * driver.
- *
- * To use QMAN packets, the opcode must be the QMAN opcode, shifted by 8
- * so the value being put by the host's driver matches the value read by ArmCP
- *
- * Non-QMAN packets should be limited to values 1 through (2^8 - 1)
- *
- * Detailed description:
- *
- * ARMCP_PACKET_DISABLE_PCI_ACCESS -
- *       After receiving this packet the embedded CPU must NOT issue PCI
- *       transactions (read/write) towards the Host CPU. This also include
- *       sending MSI-X interrupts.
- *       This packet is usually sent before the device is moved to D3Hot state.
- *
- * ARMCP_PACKET_ENABLE_PCI_ACCESS -
- *       After receiving this packet the embedded CPU is allowed to issue PCI
- *       transactions towards the Host CPU, including sending MSI-X interrupts.
- *       This packet is usually send after the device is moved to D0 state.
- *
- * ARMCP_PACKET_TEMPERATURE_GET -
- *       Fetch the current temperature / Max / Max Hyst / Critical /
- *       Critical Hyst of a specified thermal sensor. The packet's
- *       arguments specify the desired sensor and the field to get.
- *
- * ARMCP_PACKET_VOLTAGE_GET -
- *       Fetch the voltage / Max / Min of a specified sensor. The packet's
- *       arguments specify the sensor and type.
- *
- * ARMCP_PACKET_CURRENT_GET -
- *       Fetch the current / Max / Min of a specified sensor. The packet's
- *       arguments specify the sensor and type.
- *
- * ARMCP_PACKET_FAN_SPEED_GET -
- *       Fetch the speed / Max / Min of a specified fan. The packet's
- *       arguments specify the sensor and type.
- *
- * ARMCP_PACKET_PWM_GET -
- *       Fetch the pwm value / mode of a specified pwm. The packet's
- *       arguments specify the sensor and type.
- *
- * ARMCP_PACKET_PWM_SET -
- *       Set the pwm value / mode of a specified pwm. The packet's
- *       arguments specify the sensor, type and value.
- *
- * ARMCP_PACKET_FREQUENCY_SET -
- *       Set the frequency of a specified PLL. The packet's arguments specify
- *       the PLL and the desired frequency. The actual frequency in the device
- *       might differ from the requested frequency.
- *
- * ARMCP_PACKET_FREQUENCY_GET -
- *       Fetch the frequency of a specified PLL. The packet's arguments specify
- *       the PLL.
- *
- * ARMCP_PACKET_LED_SET -
- *       Set the state of a specified led. The packet's arguments
- *       specify the led and the desired state.
- *
- * ARMCP_PACKET_I2C_WR -
- *       Write 32-bit value to I2C device. The packet's arguments specify the
- *       I2C bus, address and value.
- *
- * ARMCP_PACKET_I2C_RD -
- *       Read 32-bit value from I2C device. The packet's arguments specify the
- *       I2C bus and address.
- *
- * ARMCP_PACKET_INFO_GET -
- *       Fetch information from the device as specified in the packet's
- *       structure. The host's driver passes the max size it allows the ArmCP to
- *       write to the structure, to prevent data corruption in case of
- *       mismatched driver/FW versions.
- *
- * ARMCP_PACKET_FLASH_PROGRAM_REMOVED - this packet was removed
- *
- * ARMCP_PACKET_UNMASK_RAZWI_IRQ -
- *       Unmask the given IRQ. The IRQ number is specified in the value field.
- *       The packet is sent after receiving an interrupt and printing its
- *       relevant information.
- *
- * ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY -
- *       Unmask the given IRQs. The IRQs numbers are specified in an array right
- *       after the armcp_packet structure, where its first element is the array
- *       length. The packet is sent after a soft reset was done in order to
- *       handle any interrupts that were sent during the reset process.
- *
- * ARMCP_PACKET_TEST -
- *       Test packet for ArmCP connectivity. The CPU will put the fence value
- *       in the result field.
- *
- * ARMCP_PACKET_FREQUENCY_CURR_GET -
- *       Fetch the current frequency of a specified PLL. The packet's arguments
- *       specify the PLL.
- *
- * ARMCP_PACKET_MAX_POWER_GET -
- *       Fetch the maximal power of the device.
- *
- * ARMCP_PACKET_MAX_POWER_SET -
- *       Set the maximal power of the device. The packet's arguments specify
- *       the power.
- *
- * ARMCP_PACKET_EEPROM_DATA_GET -
- *       Get EEPROM data from the ArmCP kernel. The buffer is specified in the
- *       addr field. The CPU will put the returned data size in the result
- *       field. In addition, the host's driver passes the max size it allows the
- *       ArmCP to write to the structure, to prevent data corruption in case of
- *       mismatched driver/FW versions.
- *
- * ARMCP_PACKET_TEMPERATURE_SET -
- *       Set the value of the offset property of a specified thermal sensor.
- *       The packet's arguments specify the desired sensor and the field to
- *       set.
- *
- * ARMCP_PACKET_VOLTAGE_SET -
- *       Trigger the reset_history property of a specified voltage sensor.
- *       The packet's arguments specify the desired sensor and the field to
- *       set.
- *
- * ARMCP_PACKET_CURRENT_SET -
- *       Trigger the reset_history property of a specified current sensor.
- *       The packet's arguments specify the desired sensor and the field to
- *       set.
- */
-
-enum armcp_packet_id {
-	ARMCP_PACKET_DISABLE_PCI_ACCESS = 1,	/* internal */
-	ARMCP_PACKET_ENABLE_PCI_ACCESS,		/* internal */
-	ARMCP_PACKET_TEMPERATURE_GET,		/* sysfs */
-	ARMCP_PACKET_VOLTAGE_GET,		/* sysfs */
-	ARMCP_PACKET_CURRENT_GET,		/* sysfs */
-	ARMCP_PACKET_FAN_SPEED_GET,		/* sysfs */
-	ARMCP_PACKET_PWM_GET,			/* sysfs */
-	ARMCP_PACKET_PWM_SET,			/* sysfs */
-	ARMCP_PACKET_FREQUENCY_SET,		/* sysfs */
-	ARMCP_PACKET_FREQUENCY_GET,		/* sysfs */
-	ARMCP_PACKET_LED_SET,			/* debugfs */
-	ARMCP_PACKET_I2C_WR,			/* debugfs */
-	ARMCP_PACKET_I2C_RD,			/* debugfs */
-	ARMCP_PACKET_INFO_GET,			/* IOCTL */
-	ARMCP_PACKET_FLASH_PROGRAM_REMOVED,
-	ARMCP_PACKET_UNMASK_RAZWI_IRQ,		/* internal */
-	ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY,	/* internal */
-	ARMCP_PACKET_TEST,			/* internal */
-	ARMCP_PACKET_FREQUENCY_CURR_GET,	/* sysfs */
-	ARMCP_PACKET_MAX_POWER_GET,		/* sysfs */
-	ARMCP_PACKET_MAX_POWER_SET,		/* sysfs */
-	ARMCP_PACKET_EEPROM_DATA_GET,		/* sysfs */
-	ARMCP_RESERVED,
-	ARMCP_PACKET_TEMPERATURE_SET,		/* sysfs */
-	ARMCP_PACKET_VOLTAGE_SET,		/* sysfs */
-	ARMCP_PACKET_CURRENT_SET,		/* sysfs */
-	ARMCP_PACKET_PCIE_THROUGHPUT_GET,	/* internal */
-	ARMCP_PACKET_PCIE_REPLAY_CNT_GET,	/* internal */
-	ARMCP_PACKET_TOTAL_ENERGY_GET,		/* internal */
-};
-
-#define ARMCP_PACKET_FENCE_VAL	0xFE8CE7A5
-
-#define ARMCP_PKT_CTL_RC_SHIFT		12
-#define ARMCP_PKT_CTL_RC_MASK		0x0000F000
-
-#define ARMCP_PKT_CTL_OPCODE_SHIFT	16
-#define ARMCP_PKT_CTL_OPCODE_MASK	0x1FFF0000
-
-struct armcp_packet {
-	union {
-		__le64 value;	/* For SET packets */
-		__le64 result;	/* For GET packets */
-		__le64 addr;	/* For PQ */
-	};
-
-	__le32 ctl;
-
-	__le32 fence;		/* Signal to host that message is completed */
-
-	union {
-		struct {/* For temperature/current/voltage/fan/pwm get/set */
-			__le16 sensor_index;
-			__le16 type;
-		};
-
-		struct {	/* For I2C read/write */
-			__u8 i2c_bus;
-			__u8 i2c_addr;
-			__u8 i2c_reg;
-			__u8 pad; /* unused */
-		};
-
-		/* For any general request */
-		__le32 index;
-
-		/* For frequency get/set */
-		__le32 pll_index;
-
-		/* For led set */
-		__le32 led_index;
-
-		/* For get Armcp info/EEPROM data */
-		__le32 data_max_size;
-	};
-
-	__le32 reserved;
-};
-
-struct armcp_unmask_irq_arr_packet {
-	struct armcp_packet armcp_pkt;
-	__le32 length;
-	__le32 irqs[0];
-};
-
-enum armcp_packet_rc {
-	armcp_packet_success,
-	armcp_packet_invalid,
-	armcp_packet_fault
-};
-
-/*
- * armcp_temp_type should adhere to hwmon_temp_attributes
- * defined in Linux kernel hwmon.h file
- */
-enum armcp_temp_type {
-	armcp_temp_input,
-	armcp_temp_max = 6,
-	armcp_temp_max_hyst,
-	armcp_temp_crit,
-	armcp_temp_crit_hyst,
-	armcp_temp_offset = 19,
-	armcp_temp_highest = 22,
-	armcp_temp_reset_history = 23
-};
-
-enum armcp_in_attributes {
-	armcp_in_input,
-	armcp_in_min,
-	armcp_in_max,
-	armcp_in_highest = 7,
-	armcp_in_reset_history
-};
-
-enum armcp_curr_attributes {
-	armcp_curr_input,
-	armcp_curr_min,
-	armcp_curr_max,
-	armcp_curr_highest = 7,
-	armcp_curr_reset_history
-};
-
-enum armcp_fan_attributes {
-	armcp_fan_input,
-	armcp_fan_min = 2,
-	armcp_fan_max
-};
-
-enum armcp_pwm_attributes {
-	armcp_pwm_input,
-	armcp_pwm_enable
-};
-
-enum armcp_pcie_throughput_attributes {
-	armcp_pcie_throughput_tx,
-	armcp_pcie_throughput_rx
-};
-
-/* Event Queue Packets */
-
-struct eq_generic_event {
-	__le64 data[7];
-};
-
-/*
- * ArmCP info
- */
-
-#define CARD_NAME_MAX_LEN		16
-#define VERSION_MAX_LEN			128
-#define ARMCP_MAX_SENSORS		128
-
-struct armcp_sensor {
-	__le32 type;
-	__le32 flags;
-};
-
-/**
- * struct armcp_card_types - ASIC card type.
- * @armcp_card_type_pci: PCI card.
- * @armcp_card_type_pmc: PCI Mezzanine Card.
- */
-enum armcp_card_types {
-	armcp_card_type_pci,
-	armcp_card_type_pmc
-};
-
-/**
- * struct armcp_info - Info from ArmCP that is necessary to the host's driver
- * @sensors: available sensors description.
- * @kernel_version: ArmCP linux kernel version.
- * @reserved: reserved field.
- * @card_type: card configuration type.
- * @card_location: in a server, each card has different connections topology
- *                 depending on its location (relevant for PMC card type)
- * @cpld_version: CPLD programmed F/W version.
- * @infineon_version: Infineon main DC-DC version.
- * @fuse_version: silicon production FUSE information.
- * @thermal_version: thermald S/W version.
- * @armcp_version: ArmCP S/W version.
- * @dram_size: available DRAM size.
- * @card_name: card name that will be displayed in HWMON subsystem on the host
- */
-struct armcp_info {
-	struct armcp_sensor sensors[ARMCP_MAX_SENSORS];
-	__u8 kernel_version[VERSION_MAX_LEN];
-	__le32 reserved;
-	__le32 card_type;
-	__le32 card_location;
-	__le32 cpld_version;
-	__le32 infineon_version;
-	__u8 fuse_version[VERSION_MAX_LEN];
-	__u8 thermal_version[VERSION_MAX_LEN];
-	__u8 armcp_version[VERSION_MAX_LEN];
-	__le64 dram_size;
-	char card_name[CARD_NAME_MAX_LEN];
-};
-
-#endif /* ARMCP_IF_H */
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h
new file mode 100644
index 000000000000..1e8480e978e2
--- /dev/null
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -0,0 +1,417 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2020 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+#ifndef CPUCP_IF_H
+#define CPUCP_IF_H
+
+#include <linux/types.h>
+
+/*
+ * EVENT QUEUE
+ */
+
+struct hl_eq_header {
+	__le32 reserved;
+	__le32 ctl;
+};
+
+struct hl_eq_ecc_data {
+	__le64 ecc_address;
+	__le64 ecc_syndrom;
+	__u8 memory_wrapper_idx;
+	__u8 pad[7];
+};
+
+struct hl_eq_entry {
+	struct hl_eq_header hdr;
+	union {
+		struct hl_eq_ecc_data ecc_data;
+		__le64 data[7];
+	};
+};
+
+#define HL_EQ_ENTRY_SIZE		sizeof(struct hl_eq_entry)
+
+#define EQ_CTL_READY_SHIFT		31
+#define EQ_CTL_READY_MASK		0x80000000
+
+#define EQ_CTL_EVENT_TYPE_SHIFT		16
+#define EQ_CTL_EVENT_TYPE_MASK		0x03FF0000
+
+enum pq_init_status {
+	PQ_INIT_STATUS_NA = 0,
+	PQ_INIT_STATUS_READY_FOR_CP,
+	PQ_INIT_STATUS_READY_FOR_HOST,
+	PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI
+};
+
+/*
+ * CpuCP Primary Queue Packets
+ *
+ * During normal operation, the host's kernel driver needs to send various
+ * messages to CpuCP, usually either to SET some value into a H/W periphery or
+ * to GET the current value of some H/W periphery. For example, SET the
+ * frequency of MME/TPC and GET the value of the thermal sensor.
+ *
+ * These messages can be initiated either by the User application or by the
+ * host's driver itself, e.g. power management code. In either case, the
+ * communication from the host's driver to CpuCP will *always* be in
+ * synchronous mode, meaning that the host will send a single message and poll
+ * until the message was acknowledged and the results are ready (if results are
+ * needed).
+ *
+ * This means that only a single message can be sent at a time and the host's
+ * driver must wait for its result before sending the next message. Having said
+ * that, because these are control messages which are sent in a relatively low
+ * frequency, this limitation seems acceptable. It's important to note that
+ * in case of multiple devices, messages to different devices *can* be sent
+ * at the same time.
+ *
+ * The message, inputs/outputs (if relevant) and fence object will be located
+ * on the device DDR at an address that will be determined by the host's driver.
+ * During device initialization phase, the host will pass to CpuCP that address.
+ * Most of the message types will contain inputs/outputs inside the message
+ * itself. The common part of each message will contain the opcode of the
+ * message (its type) and a field representing a fence object.
+ *
+ * When the host's driver wishes to send a message to CPU CP, it will write the
+ * message contents to the device DDR, clear the fence object and then write to
+ * the PSOC_ARC1_AUX_SW_INTR, to issue interrupt 121 to ARC Management CPU.
+ *
+ * Upon receiving the interrupt (#121), CpuCP will read the message from the
+ * DDR. In case the message is a SET operation, CpuCP will first perform the
+ * operation and then write to the fence object on the device DDR. In case the
+ * message is a GET operation, CpuCP will first fill the results section on the
+ * device DDR and then write to the fence object. If an error occurred, CpuCP
+ * will fill the rc field with the right error code.
+ *
+ * In the meantime, the host's driver will poll on the fence object. Once the
+ * host sees that the fence object is signaled, it will read the results from
+ * the device DDR (if relevant) and resume the code execution in the host's
+ * driver.
+ *
+ * To use QMAN packets, the opcode must be the QMAN opcode, shifted by 8
+ * so the value being put by the host's driver matches the value read by CpuCP
+ *
+ * Non-QMAN packets should be limited to values 1 through (2^8 - 1)
+ *
+ * Detailed description:
+ *
+ * CPUCP_PACKET_DISABLE_PCI_ACCESS -
+ *       After receiving this packet the embedded CPU must NOT issue PCI
+ *       transactions (read/write) towards the Host CPU. This also include
+ *       sending MSI-X interrupts.
+ *       This packet is usually sent before the device is moved to D3Hot state.
+ *
+ * CPUCP_PACKET_ENABLE_PCI_ACCESS -
+ *       After receiving this packet the embedded CPU is allowed to issue PCI
+ *       transactions towards the Host CPU, including sending MSI-X interrupts.
+ *       This packet is usually send after the device is moved to D0 state.
+ *
+ * CPUCP_PACKET_TEMPERATURE_GET -
+ *       Fetch the current temperature / Max / Max Hyst / Critical /
+ *       Critical Hyst of a specified thermal sensor. The packet's
+ *       arguments specify the desired sensor and the field to get.
+ *
+ * CPUCP_PACKET_VOLTAGE_GET -
+ *       Fetch the voltage / Max / Min of a specified sensor. The packet's
+ *       arguments specify the sensor and type.
+ *
+ * CPUCP_PACKET_CURRENT_GET -
+ *       Fetch the current / Max / Min of a specified sensor. The packet's
+ *       arguments specify the sensor and type.
+ *
+ * CPUCP_PACKET_FAN_SPEED_GET -
+ *       Fetch the speed / Max / Min of a specified fan. The packet's
+ *       arguments specify the sensor and type.
+ *
+ * CPUCP_PACKET_PWM_GET -
+ *       Fetch the pwm value / mode of a specified pwm. The packet's
+ *       arguments specify the sensor and type.
+ *
+ * CPUCP_PACKET_PWM_SET -
+ *       Set the pwm value / mode of a specified pwm. The packet's
+ *       arguments specify the sensor, type and value.
+ *
+ * CPUCP_PACKET_FREQUENCY_SET -
+ *       Set the frequency of a specified PLL. The packet's arguments specify
+ *       the PLL and the desired frequency. The actual frequency in the device
+ *       might differ from the requested frequency.
+ *
+ * CPUCP_PACKET_FREQUENCY_GET -
+ *       Fetch the frequency of a specified PLL. The packet's arguments specify
+ *       the PLL.
+ *
+ * CPUCP_PACKET_LED_SET -
+ *       Set the state of a specified led. The packet's arguments
+ *       specify the led and the desired state.
+ *
+ * CPUCP_PACKET_I2C_WR -
+ *       Write 32-bit value to I2C device. The packet's arguments specify the
+ *       I2C bus, address and value.
+ *
+ * CPUCP_PACKET_I2C_RD -
+ *       Read 32-bit value from I2C device. The packet's arguments specify the
+ *       I2C bus and address.
+ *
+ * CPUCP_PACKET_INFO_GET -
+ *       Fetch information from the device as specified in the packet's
+ *       structure. The host's driver passes the max size it allows the CpuCP to
+ *       write to the structure, to prevent data corruption in case of
+ *       mismatched driver/FW versions.
+ *
+ * CPUCP_PACKET_FLASH_PROGRAM_REMOVED - this packet was removed
+ *
+ * CPUCP_PACKET_UNMASK_RAZWI_IRQ -
+ *       Unmask the given IRQ. The IRQ number is specified in the value field.
+ *       The packet is sent after receiving an interrupt and printing its
+ *       relevant information.
+ *
+ * CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY -
+ *       Unmask the given IRQs. The IRQs numbers are specified in an array right
+ *       after the cpucp_packet structure, where its first element is the array
+ *       length. The packet is sent after a soft reset was done in order to
+ *       handle any interrupts that were sent during the reset process.
+ *
+ * CPUCP_PACKET_TEST -
+ *       Test packet for CpuCP connectivity. The CPU will put the fence value
+ *       in the result field.
+ *
+ * CPUCP_PACKET_FREQUENCY_CURR_GET -
+ *       Fetch the current frequency of a specified PLL. The packet's arguments
+ *       specify the PLL.
+ *
+ * CPUCP_PACKET_MAX_POWER_GET -
+ *       Fetch the maximal power of the device.
+ *
+ * CPUCP_PACKET_MAX_POWER_SET -
+ *       Set the maximal power of the device. The packet's arguments specify
+ *       the power.
+ *
+ * CPUCP_PACKET_EEPROM_DATA_GET -
+ *       Get EEPROM data from the CpuCP kernel. The buffer is specified in the
+ *       addr field. The CPU will put the returned data size in the result
+ *       field. In addition, the host's driver passes the max size it allows the
+ *       CpuCP to write to the structure, to prevent data corruption in case of
+ *       mismatched driver/FW versions.
+ *
+ * CPUCP_PACKET_TEMPERATURE_SET -
+ *       Set the value of the offset property of a specified thermal sensor.
+ *       The packet's arguments specify the desired sensor and the field to
+ *       set.
+ *
+ * CPUCP_PACKET_VOLTAGE_SET -
+ *       Trigger the reset_history property of a specified voltage sensor.
+ *       The packet's arguments specify the desired sensor and the field to
+ *       set.
+ *
+ * CPUCP_PACKET_CURRENT_SET -
+ *       Trigger the reset_history property of a specified current sensor.
+ *       The packet's arguments specify the desired sensor and the field to
+ *       set.
+ */
+
+enum cpucp_packet_id {
+	CPUCP_PACKET_DISABLE_PCI_ACCESS = 1,	/* internal */
+	CPUCP_PACKET_ENABLE_PCI_ACCESS,		/* internal */
+	CPUCP_PACKET_TEMPERATURE_GET,		/* sysfs */
+	CPUCP_PACKET_VOLTAGE_GET,		/* sysfs */
+	CPUCP_PACKET_CURRENT_GET,		/* sysfs */
+	CPUCP_PACKET_FAN_SPEED_GET,		/* sysfs */
+	CPUCP_PACKET_PWM_GET,			/* sysfs */
+	CPUCP_PACKET_PWM_SET,			/* sysfs */
+	CPUCP_PACKET_FREQUENCY_SET,		/* sysfs */
+	CPUCP_PACKET_FREQUENCY_GET,		/* sysfs */
+	CPUCP_PACKET_LED_SET,			/* debugfs */
+	CPUCP_PACKET_I2C_WR,			/* debugfs */
+	CPUCP_PACKET_I2C_RD,			/* debugfs */
+	CPUCP_PACKET_INFO_GET,			/* IOCTL */
+	CPUCP_PACKET_FLASH_PROGRAM_REMOVED,
+	CPUCP_PACKET_UNMASK_RAZWI_IRQ,		/* internal */
+	CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY,	/* internal */
+	CPUCP_PACKET_TEST,			/* internal */
+	CPUCP_PACKET_FREQUENCY_CURR_GET,	/* sysfs */
+	CPUCP_PACKET_MAX_POWER_GET,		/* sysfs */
+	CPUCP_PACKET_MAX_POWER_SET,		/* sysfs */
+	CPUCP_PACKET_EEPROM_DATA_GET,		/* sysfs */
+	CPUCP_RESERVED,
+	CPUCP_PACKET_TEMPERATURE_SET,		/* sysfs */
+	CPUCP_PACKET_VOLTAGE_SET,		/* sysfs */
+	CPUCP_PACKET_CURRENT_SET,		/* sysfs */
+	CPUCP_PACKET_PCIE_THROUGHPUT_GET,		/* internal */
+	CPUCP_PACKET_PCIE_REPLAY_CNT_GET,		/* internal */
+	CPUCP_PACKET_TOTAL_ENERGY_GET,		/* internal */
+};
+
+#define CPUCP_PACKET_FENCE_VAL	0xFE8CE7A5
+
+#define CPUCP_PKT_CTL_RC_SHIFT		12
+#define CPUCP_PKT_CTL_RC_MASK		0x0000F000
+
+#define CPUCP_PKT_CTL_OPCODE_SHIFT	16
+#define CPUCP_PKT_CTL_OPCODE_MASK	0x1FFF0000
+
+struct cpucp_packet {
+	union {
+		__le64 value;	/* For SET packets */
+		__le64 result;	/* For GET packets */
+		__le64 addr;	/* For PQ */
+	};
+
+	__le32 ctl;
+
+	__le32 fence;		/* Signal to host that message is completed */
+
+	union {
+		struct {/* For temperature/current/voltage/fan/pwm get/set */
+			__le16 sensor_index;
+			__le16 type;
+		};
+
+		struct {	/* For I2C read/write */
+			__u8 i2c_bus;
+			__u8 i2c_addr;
+			__u8 i2c_reg;
+			__u8 pad; /* unused */
+		};
+
+		/* For any general request */
+		__le32 index;
+
+		/* For frequency get/set */
+		__le32 pll_index;
+
+		/* For led set */
+		__le32 led_index;
+
+		/* For get CpuCP info/EEPROM data */
+		__le32 data_max_size;
+	};
+
+	__le32 reserved;
+};
+
+struct cpucp_unmask_irq_arr_packet {
+	struct cpucp_packet cpucp_pkt;
+	__le32 length;
+	__le32 irqs[0];
+};
+
+enum cpucp_packet_rc {
+	cpucp_packet_success,
+	cpucp_packet_invalid,
+	cpucp_packet_fault
+};
+
+/*
+ * cpucp_temp_type should adhere to hwmon_temp_attributes
+ * defined in Linux kernel hwmon.h file
+ */
+enum cpucp_temp_type {
+	cpucp_temp_input,
+	cpucp_temp_max = 6,
+	cpucp_temp_max_hyst,
+	cpucp_temp_crit,
+	cpucp_temp_crit_hyst,
+	cpucp_temp_offset = 19,
+	cpucp_temp_highest = 22,
+	cpucp_temp_reset_history = 23
+};
+
+enum cpucp_in_attributes {
+	cpucp_in_input,
+	cpucp_in_min,
+	cpucp_in_max,
+	cpucp_in_highest = 7,
+	cpucp_in_reset_history
+};
+
+enum cpucp_curr_attributes {
+	cpucp_curr_input,
+	cpucp_curr_min,
+	cpucp_curr_max,
+	cpucp_curr_highest = 7,
+	cpucp_curr_reset_history
+};
+
+enum cpucp_fan_attributes {
+	cpucp_fan_input,
+	cpucp_fan_min = 2,
+	cpucp_fan_max
+};
+
+enum cpucp_pwm_attributes {
+	cpucp_pwm_input,
+	cpucp_pwm_enable
+};
+
+enum cpucp_pcie_throughput_attributes {
+	cpucp_pcie_throughput_tx,
+	cpucp_pcie_throughput_rx
+};
+
+/* Event Queue Packets */
+
+struct eq_generic_event {
+	__le64 data[7];
+};
+
+/*
+ * CpuCP info
+ */
+
+#define CARD_NAME_MAX_LEN		16
+#define VERSION_MAX_LEN			128
+#define CPUCP_MAX_SENSORS		128
+
+struct cpucp_sensor {
+	__le32 type;
+	__le32 flags;
+};
+
+/**
+ * struct cpucp_card_types - ASIC card type.
+ * @cpucp_card_type_pci: PCI card.
+ * @cpucp_card_type_pmc: PCI Mezzanine Card.
+ */
+enum cpucp_card_types {
+	cpucp_card_type_pci,
+	cpucp_card_type_pmc
+};
+
+/**
+ * struct cpucp_info - Info from CpuCP that is necessary to the host's driver
+ * @sensors: available sensors description.
+ * @kernel_version: CpuCP linux kernel version.
+ * @reserved: reserved field.
+ * @card_type: card configuration type.
+ * @card_location: in a server, each card has different connections topology
+ *                 depending on its location (relevant for PMC card type)
+ * @cpld_version: CPLD programmed F/W version.
+ * @infineon_version: Infineon main DC-DC version.
+ * @fuse_version: silicon production FUSE information.
+ * @thermal_version: thermald S/W version.
+ * @cpucp_version: CpuCP S/W version.
+ * @dram_size: available DRAM size.
+ * @card_name: card name that will be displayed in HWMON subsystem on the host
+ */
+struct cpucp_info {
+	struct cpucp_sensor sensors[CPUCP_MAX_SENSORS];
+	__u8 kernel_version[VERSION_MAX_LEN];
+	__le32 reserved;
+	__le32 card_type;
+	__le32 card_location;
+	__le32 cpld_version;
+	__le32 infineon_version;
+	__u8 fuse_version[VERSION_MAX_LEN];
+	__u8 thermal_version[VERSION_MAX_LEN];
+	__u8 cpucp_version[VERSION_MAX_LEN];
+	__le64 dram_size;
+	char card_name[CARD_NAME_MAX_LEN];
+};
+
+#endif /* CPUCP_IF_H */
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 6803991726e8..a2dcad29340f 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -297,7 +297,7 @@ struct hl_info_hw_ip_info {
 	__u32 device_id; /* PCI Device ID */
 	__u32 module_id; /* For mezzanine cards in servers (From OCP spec.) */
 	__u32 reserved[2];
-	__u32 armcp_cpld_version;
+	__u32 cpld_version;
 	__u32 psoc_pci_pll_nr;
 	__u32 psoc_pci_pll_nf;
 	__u32 psoc_pci_pll_od;
@@ -305,7 +305,7 @@ struct hl_info_hw_ip_info {
 	__u8 tpc_enabled_mask;
 	__u8 dram_enabled;
 	__u8 pad[2];
-	__u8 armcp_version[HL_INFO_VERSION_MAX_LEN];
+	__u8 cpucp_version[HL_INFO_VERSION_MAX_LEN];
 	__u8 card_name[HL_INFO_CARD_NAME_MAX_LEN];
 };
 
-- 
cgit v1.2.3


From 975ab7b32b90c97046ddbdd53798391b7d8a6a1e Mon Sep 17 00:00:00 2001
From: Oded Gabbay <oded.gabbay@gmail.com>
Date: Tue, 1 Sep 2020 11:22:05 +0300
Subject: habanalabs: count dropped CS because max CS in-flight

There is a case where the user reaches the maximum number of CS in-flight.
In that case, the driver rejects the new CS of the user with EAGAIN. Count
that event so the user can query the driver later to see if it happened.

Reviewed-by: Tomer Tayar <ttayar@habana.ai>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/misc/habanalabs/common/command_submission.c | 5 ++++-
 include/uapi/misc/habanalabs.h                      | 2 ++
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index a811a9fdf13b..470bffbe9bdc 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -252,6 +252,8 @@ static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx)
 			ctx->cs_counters.parsing_drop_cnt;
 	hdev->aggregated_cs_counters.queue_full_drop_cnt +=
 			ctx->cs_counters.queue_full_drop_cnt;
+	hdev->aggregated_cs_counters.max_cs_in_flight_drop_cnt +=
+			ctx->cs_counters.max_cs_in_flight_drop_cnt;
 }
 
 static void cs_do_release(struct kref *ref)
@@ -431,8 +433,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 				(hdev->asic_prop.max_pending_cs - 1)];
 
 	if (other && !completion_done(&other->completion)) {
-		dev_dbg(hdev->dev,
+		dev_dbg_ratelimited(hdev->dev,
 			"Rejecting CS because of too many in-flights CS\n");
+		ctx->cs_counters.max_cs_in_flight_drop_cnt++;
 		rc = -EAGAIN;
 		goto free_fence;
 	}
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index a2dcad29340f..69fb44d35292 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -401,12 +401,14 @@ struct hl_info_sync_manager {
  * @parsing_drop_cnt: dropped due to error in packet parsing
  * @queue_full_drop_cnt: dropped due to queue full
  * @device_in_reset_drop_cnt: dropped due to device in reset
+ * @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight
  */
 struct hl_cs_counters {
 	__u64 out_of_mem_drop_cnt;
 	__u64 parsing_drop_cnt;
 	__u64 queue_full_drop_cnt;
 	__u64 device_in_reset_drop_cnt;
+	__u64 max_cs_in_flight_drop_cnt;
 };
 
 struct hl_info_cs_counters {
-- 
cgit v1.2.3


From 681a22f55f1506023da06ebf660a4a252b35bc93 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <oded.gabbay@gmail.com>
Date: Mon, 7 Sep 2020 18:08:51 +0300
Subject: habanalabs: allow to wait on CS without sleep

The user sometimes wants to check if a CS has completed to clean resources.
In that case, the user doesn't want to sleep but just to check if the CS
has finished and continue with his code.

Add a new definition to the API of the wait on CS. The new definition says
that if the timeout is 0, the driver won't sleep at all but return
immediately after checking if the CS has finished.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/misc/habanalabs/common/command_submission.c | 7 +++++--
 include/uapi/misc/habanalabs.h                      | 3 +++
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 470bffbe9bdc..b2b974ecc431 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -1180,8 +1180,11 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev,
 				"Can't wait on CS %llu because current CS is at seq %llu\n",
 				seq, ctx->cs_sequence);
 	} else if (fence) {
-		rc = wait_for_completion_interruptible_timeout(
-				&fence->completion, timeout);
+		if (!timeout_us)
+			rc = completion_done(&fence->completion);
+		else
+			rc = wait_for_completion_interruptible_timeout(
+					&fence->completion, timeout);
 
 		if (fence->error == -ETIMEDOUT)
 			rc = -ETIMEDOUT;
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 69fb44d35292..d449f8a31ce6 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -914,6 +914,9 @@ struct hl_debug_args {
  * inside the kernel until the CS has finished or until the user-requested
  * timeout has expired.
  *
+ * If the timeout value is 0, the driver won't sleep at all. It will check
+ * the status of the CS and return immediately
+ *
  * The return value of the IOCTL is a standard Linux error code. The possible
  * values are:
  *
-- 
cgit v1.2.3


From ef6a0f6caa4a5dbfbb42b642e23fb06182798d30 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Thu, 9 Jul 2020 16:17:48 +0300
Subject: habanalabs: Add an option to map CB to device MMU

There are cases in which the device should access the host memory of a
CB through the device MMU, and thus this memory should be mapped.
The patch adds a flag to the CB IOCTL, in which a user can ask the
driver to perform the mapping when creating a CB.
The mapping is allowed only if a dedicated VA range was allocated for
the specific ASIC.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/misc/habanalabs/common/command_buffer.c | 196 +++++++++++++++++++++++-
 drivers/misc/habanalabs/common/context.c        |  12 +-
 drivers/misc/habanalabs/common/habanalabs.h     |  20 ++-
 drivers/misc/habanalabs/gaudi/gaudi.c           |   4 +-
 drivers/misc/habanalabs/goya/goya.c             |   4 +-
 include/uapi/misc/habanalabs.h                  |  12 +-
 6 files changed, 237 insertions(+), 11 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c
index 0cb556fb4a8b..901e213daf40 100644
--- a/drivers/misc/habanalabs/common/command_buffer.c
+++ b/drivers/misc/habanalabs/common/command_buffer.c
@@ -13,6 +13,131 @@
 #include <linux/uaccess.h>
 #include <linux/genalloc.h>
 
+static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
+{
+	struct hl_device *hdev = ctx->hdev;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	struct hl_vm_va_block *va_block, *tmp;
+	dma_addr_t bus_addr;
+	u64 virt_addr;
+	u32 page_size = prop->pmmu.page_size;
+	s32 offset;
+	int rc;
+
+	if (!hdev->supports_cb_mapping) {
+		dev_err_ratelimited(hdev->dev,
+				"Cannot map CB because no VA range is allocated for CB mapping\n");
+		return -EINVAL;
+	}
+
+	if (!hdev->mmu_enable) {
+		dev_err_ratelimited(hdev->dev,
+				"Cannot map CB because MMU is disabled\n");
+		return -EINVAL;
+	}
+
+	INIT_LIST_HEAD(&cb->va_block_list);
+
+	for (bus_addr = cb->bus_address;
+			bus_addr < cb->bus_address + cb->size;
+			bus_addr += page_size) {
+
+		virt_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, page_size);
+		if (!virt_addr) {
+			dev_err(hdev->dev,
+				"Failed to allocate device virtual address for CB\n");
+			rc = -ENOMEM;
+			goto err_va_pool_free;
+		}
+
+		va_block = kzalloc(sizeof(*va_block), GFP_KERNEL);
+		if (!va_block) {
+			rc = -ENOMEM;
+			gen_pool_free(ctx->cb_va_pool, virt_addr, page_size);
+			goto err_va_pool_free;
+		}
+
+		va_block->start = virt_addr;
+		va_block->end = virt_addr + page_size;
+		va_block->size = page_size;
+		list_add_tail(&va_block->node, &cb->va_block_list);
+	}
+
+	mutex_lock(&ctx->mmu_lock);
+
+	bus_addr = cb->bus_address;
+	offset = 0;
+	list_for_each_entry(va_block, &cb->va_block_list, node) {
+		rc = hl_mmu_map(ctx, va_block->start, bus_addr, va_block->size,
+				list_is_last(&va_block->node,
+						&cb->va_block_list));
+		if (rc) {
+			dev_err(hdev->dev, "Failed to map VA %#llx to CB\n",
+				va_block->start);
+			goto err_va_umap;
+		}
+
+		bus_addr += va_block->size;
+		offset += va_block->size;
+	}
+
+	hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
+
+	mutex_unlock(&ctx->mmu_lock);
+
+	cb->is_mmu_mapped = true;
+
+	return 0;
+
+err_va_umap:
+	list_for_each_entry(va_block, &cb->va_block_list, node) {
+		if (offset <= 0)
+			break;
+		hl_mmu_unmap(ctx, va_block->start, va_block->size,
+				offset <= va_block->size);
+		offset -= va_block->size;
+	}
+
+	hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
+
+	mutex_unlock(&ctx->mmu_lock);
+
+err_va_pool_free:
+	list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) {
+		gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size);
+		list_del(&va_block->node);
+		kfree(va_block);
+	}
+
+	return rc;
+}
+
+static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb)
+{
+	struct hl_device *hdev = ctx->hdev;
+	struct hl_vm_va_block *va_block, *tmp;
+
+	mutex_lock(&ctx->mmu_lock);
+
+	list_for_each_entry(va_block, &cb->va_block_list, node)
+		if (hl_mmu_unmap(ctx, va_block->start, va_block->size,
+				list_is_last(&va_block->node,
+						&cb->va_block_list)))
+			dev_warn_ratelimited(hdev->dev,
+					"Failed to unmap CB's va 0x%llx\n",
+					va_block->start);
+
+	hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
+
+	mutex_unlock(&ctx->mmu_lock);
+
+	list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) {
+		gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size);
+		list_del(&va_block->node);
+		kfree(va_block);
+	}
+}
+
 static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
 {
 	if (cb->is_internal)
@@ -47,6 +172,9 @@ static void cb_release(struct kref *ref)
 
 	hl_debugfs_remove_cb(cb);
 
+	if (cb->is_mmu_mapped)
+		cb_unmap_mem(cb->ctx, cb);
+
 	hl_ctx_put(cb->ctx);
 
 	cb_do_release(hdev, cb);
@@ -110,7 +238,7 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
 
 int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
 			struct hl_ctx *ctx, u32 cb_size, bool internal_cb,
-			u64 *handle)
+			bool map_cb, u64 *handle)
 {
 	struct hl_cb *cb;
 	bool alloc_new_cb = true;
@@ -169,13 +297,26 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
 	cb->ctx = ctx;
 	hl_ctx_get(hdev, cb->ctx);
 
+	if (map_cb) {
+		if (ctx_id == HL_KERNEL_ASID_ID) {
+			dev_err(hdev->dev,
+				"CB mapping is not supported for kernel context\n");
+			rc = -EINVAL;
+			goto release_cb;
+		}
+
+		rc = cb_map_mem(ctx, cb);
+		if (rc)
+			goto release_cb;
+	}
+
 	spin_lock(&mgr->cb_lock);
 	rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC);
 	spin_unlock(&mgr->cb_lock);
 
 	if (rc < 0) {
 		dev_err(hdev->dev, "Failed to allocate IDR for a new CB\n");
-		goto release_cb;
+		goto unmap_mem;
 	}
 
 	cb->id = (u64) rc;
@@ -194,6 +335,9 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
 
 	return 0;
 
+unmap_mem:
+	if (cb->is_mmu_mapped)
+		cb_unmap_mem(cb->ctx, cb);
 release_cb:
 	hl_ctx_put(cb->ctx);
 	cb_do_release(hdev, cb);
@@ -256,7 +400,9 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
 			rc = -EINVAL;
 		} else {
 			rc = hl_cb_create(hdev, &hpriv->cb_mgr, hpriv->ctx,
-					args->in.cb_size, false, &handle);
+					args->in.cb_size, false,
+					!!(args->in.flags & HL_CB_FLAGS_MAP),
+					&handle);
 		}
 
 		memset(args, 0, sizeof(*args));
@@ -442,7 +588,7 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
 	int rc;
 
 	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, cb_size,
-				internal_cb, &cb_handle);
+				internal_cb, false, &cb_handle);
 	if (rc) {
 		dev_err(hdev->dev,
 			"Failed to allocate CB for the kernel driver %d\n", rc);
@@ -498,3 +644,45 @@ int hl_cb_pool_fini(struct hl_device *hdev)
 
 	return 0;
 }
+
+int hl_cb_va_pool_init(struct hl_ctx *ctx)
+{
+	struct hl_device *hdev = ctx->hdev;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	int rc;
+
+	if (!hdev->supports_cb_mapping)
+		return 0;
+
+	ctx->cb_va_pool = gen_pool_create(__ffs(prop->pmmu.page_size), -1);
+	if (!ctx->cb_va_pool) {
+		dev_err(hdev->dev,
+			"Failed to create VA gen pool for CB mapping\n");
+		return -ENOMEM;
+	}
+
+	rc = gen_pool_add(ctx->cb_va_pool, prop->cb_va_start_addr,
+			prop->cb_va_end_addr - prop->cb_va_start_addr, -1);
+	if (rc) {
+		dev_err(hdev->dev,
+			"Failed to add memory to VA gen pool for CB mapping\n");
+		goto err_pool_destroy;
+	}
+
+	return 0;
+
+err_pool_destroy:
+	gen_pool_destroy(ctx->cb_va_pool);
+
+	return rc;
+}
+
+void hl_cb_va_pool_fini(struct hl_ctx *ctx)
+{
+	struct hl_device *hdev = ctx->hdev;
+
+	if (!hdev->supports_cb_mapping)
+		return;
+
+	gen_pool_destroy(ctx->cb_va_pool);
+}
diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c
index b168a9fce817..df8171a2226c 100644
--- a/drivers/misc/habanalabs/common/context.c
+++ b/drivers/misc/habanalabs/common/context.c
@@ -37,6 +37,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
 		if ((hdev->in_debug) && (hdev->compute_ctx == ctx))
 			hl_device_set_debug_mode(hdev, false);
 
+		hl_cb_va_pool_fini(ctx);
 		hl_vm_ctx_fini(ctx);
 		hl_asid_free(hdev, ctx->asid);
 	} else {
@@ -155,15 +156,24 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 			goto err_asid_free;
 		}
 
+		rc = hl_cb_va_pool_init(ctx);
+		if (rc) {
+			dev_err(hdev->dev,
+				"Failed to init VA pool for mapped CB\n");
+			goto err_vm_ctx_fini;
+		}
+
 		rc = hdev->asic_funcs->ctx_init(ctx);
 		if (rc) {
 			dev_err(hdev->dev, "ctx_init failed\n");
-			goto err_vm_ctx_fini;
+			goto err_cb_va_pool_fini;
 		}
 	}
 
 	return 0;
 
+err_cb_va_pool_fini:
+	hl_cb_va_pool_fini(ctx);
 err_vm_ctx_fini:
 	hl_vm_ctx_fini(ctx);
 err_asid_free:
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 88c68b664ef6..eaa9bf3f82a3 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -291,6 +291,10 @@ struct hl_mmu_properties {
  * @pcie_aux_dbi_reg_addr: Address of the PCIE_AUX DBI register.
  * @mmu_pgt_addr: base physical address in DRAM of MMU page tables.
  * @mmu_dram_default_page_addr: DRAM default page physical address.
+ * @cb_va_start_addr: virtual start address of command buffers which are mapped
+ *                    to the device's MMU.
+ * @cb_va_end_addr: virtual end address of command buffers which are mapped to
+ *                  the device's MMU.
  * @mmu_pgt_size: MMU page tables total size.
  * @mmu_pte_size: PTE size in MMU page tables.
  * @mmu_hop_table_size: MMU hop table size.
@@ -339,6 +343,8 @@ struct asic_fixed_properties {
 	u64				pcie_aux_dbi_reg_addr;
 	u64				mmu_pgt_addr;
 	u64				mmu_dram_default_page_addr;
+	u64				cb_va_start_addr;
+	u64				cb_va_end_addr;
 	u32				mmu_pgt_size;
 	u32				mmu_pte_size;
 	u32				mmu_hop_table_size;
@@ -421,6 +427,8 @@ struct hl_cb_mgr {
  * @lock: spinlock to protect mmap/cs flows.
  * @debugfs_list: node in debugfs list of command buffers.
  * @pool_list: node in pool list of command buffers.
+ * @va_block_list: list of virtual addresses blocks of the CB if it is mapped to
+ *                 the device's MMU.
  * @id: the CB's ID.
  * @kernel_address: Holds the CB's kernel virtual address.
  * @bus_address: Holds the CB's DMA address.
@@ -430,6 +438,7 @@ struct hl_cb_mgr {
  * @mmap: true if the CB is currently mmaped to user.
  * @is_pool: true if CB was acquired from the pool, false otherwise.
  * @is_internal: internaly allocated
+ * @is_mmu_mapped: true if the CB is mapped to the device's MMU.
  */
 struct hl_cb {
 	struct kref		refcount;
@@ -438,6 +447,7 @@ struct hl_cb {
 	spinlock_t		lock;
 	struct list_head	debugfs_list;
 	struct list_head	pool_list;
+	struct list_head	va_block_list;
 	u64			id;
 	u64			kernel_address;
 	dma_addr_t		bus_address;
@@ -447,6 +457,7 @@ struct hl_cb {
 	u8			mmap;
 	u8			is_pool;
 	u8			is_internal;
+	u8			is_mmu_mapped;
 };
 
 
@@ -843,6 +854,8 @@ struct hl_va_range {
  * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the
  *            MMU hash or walking the PGT requires talking this lock.
  * @debugfs_list: node in debugfs list of contexts.
+ * @cb_va_pool: device VA pool for command buffers which are mapped to the
+ *              device's MMU.
  * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
  *			to user so user could inquire about CS. It is used as
  *			index to cs_pending array.
@@ -874,6 +887,7 @@ struct hl_ctx {
 	struct mutex		mmu_lock;
 	struct list_head	debugfs_list;
 	struct hl_cs_counters	cs_counters;
+	struct gen_pool		*cb_va_pool;
 	u64			cs_sequence;
 	u64			*dram_default_hops;
 	spinlock_t		cs_lock;
@@ -1574,6 +1588,7 @@ struct hl_mmu_funcs {
  * @sync_stream_queue_idx: helper index for sync stream queues initialization.
  * @supports_coresight: is CoreSight supported.
  * @supports_soft_reset: is soft reset supported.
+ * @supports_cb_mapping: is mapping a CB to the device's MMU supported.
  */
 struct hl_device {
 	struct pci_dev			*pdev;
@@ -1673,6 +1688,7 @@ struct hl_device {
 	u8				sync_stream_queue_idx;
 	u8				supports_coresight;
 	u8				supports_soft_reset;
+	u8				supports_cb_mapping;
 
 	/* Parameters for bring-up */
 	u8				mmu_enable;
@@ -1840,7 +1856,7 @@ void hl_hwmon_fini(struct hl_device *hdev);
 
 int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
 			struct hl_ctx *ctx, u32 cb_size, bool internal_cb,
-			u64 *handle);
+			bool map_cb, u64 *handle);
 int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle);
 int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
 struct hl_cb *hl_cb_get(struct hl_device *hdev,	struct hl_cb_mgr *mgr,
@@ -1852,6 +1868,8 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
 					bool internal_cb);
 int hl_cb_pool_init(struct hl_device *hdev);
 int hl_cb_pool_fini(struct hl_device *hdev);
+int hl_cb_va_pool_init(struct hl_ctx *ctx);
+void hl_cb_va_pool_fini(struct hl_ctx *ctx);
 
 void hl_cs_rollback_all(struct hl_device *hdev);
 struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index b51cc6c1d541..6f7f6ad7a358 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -4115,7 +4115,7 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev,
 			sizeof(struct packet_msg_prot) * 2;
 
 	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
-				parser->patched_cb_size, false,
+				parser->patched_cb_size, false, false,
 				&patched_cb_handle);
 
 	if (rc) {
@@ -4189,7 +4189,7 @@ static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
 		goto free_userptr;
 
 	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
-				parser->patched_cb_size, false,
+				parser->patched_cb_size, false, false,
 				&patched_cb_handle);
 	if (rc) {
 		dev_err(hdev->dev,
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 6c81a4b148de..5cddd46a8fb8 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -3811,7 +3811,7 @@ static int goya_parse_cb_mmu(struct hl_device *hdev,
 			sizeof(struct packet_msg_prot) * 2;
 
 	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
-				parser->patched_cb_size, false,
+				parser->patched_cb_size, false, false,
 				&patched_cb_handle);
 
 	if (rc) {
@@ -3885,7 +3885,7 @@ static int goya_parse_cb_no_mmu(struct hl_device *hdev,
 		goto free_userptr;
 
 	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
-				parser->patched_cb_size, false,
+				parser->patched_cb_size, false, false,
 				&patched_cb_handle);
 	if (rc) {
 		dev_err(hdev->dev,
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index d449f8a31ce6..9705b8adb60c 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -462,6 +462,9 @@ struct hl_info_args {
 /* 2MB minus 32 bytes for 2xMSG_PROT */
 #define HL_MAX_CB_SIZE		(0x200000 - 32)
 
+/* Indicates whether the command buffer should be mapped to the device's MMU */
+#define HL_CB_FLAGS_MAP		0x1
+
 struct hl_cb_in {
 	/* Handle of CB or 0 if we want to create one */
 	__u64 cb_handle;
@@ -473,7 +476,8 @@ struct hl_cb_in {
 	__u32 cb_size;
 	/* Context ID - Currently not in use */
 	__u32 ctx_id;
-	__u32 pad;
+	/* HL_CB_FLAGS_* */
+	__u32 flags;
 };
 
 struct hl_cb_out {
@@ -856,6 +860,12 @@ struct hl_debug_args {
  * When creating a new CB, the IOCTL returns a handle of it, and the user-space
  * process needs to use that handle to mmap the buffer so it can access them.
  *
+ * In some instances, the device must access the command buffer through the
+ * device's MMU, and thus its memory should be mapped. In these cases, user can
+ * indicate the driver that such a mapping is required.
+ * The resulting device virtual address will be used internally by the driver,
+ * and won't be returned to user.
+ *
  */
 #define HL_IOCTL_CB		\
 		_IOWR('H', 0x02, union hl_cb_args)
-- 
cgit v1.2.3


From 028abd9222df0cf5855dab5014a5ebaf06f90565 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 17 Sep 2020 10:22:34 +0200
Subject: fs: remove compat_sys_mount

compat_sys_mount is identical to the regular sys_mount now, so remove it
and use the native version everywhere.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/arm64/include/asm/unistd32.h                  |  2 +-
 arch/mips/kernel/syscalls/syscall_n32.tbl          |  2 +-
 arch/mips/kernel/syscalls/syscall_o32.tbl          |  2 +-
 arch/parisc/kernel/syscalls/syscall.tbl            |  2 +-
 arch/powerpc/kernel/syscalls/syscall.tbl           |  2 +-
 arch/s390/kernel/syscalls/syscall.tbl              |  2 +-
 arch/sparc/kernel/syscalls/syscall.tbl             |  2 +-
 arch/x86/entry/syscalls/syscall_32.tbl             |  2 +-
 fs/Makefile                                        |  1 -
 fs/compat.c                                        | 57 ----------------------
 fs/internal.h                                      |  3 --
 fs/namespace.c                                     |  4 +-
 include/linux/compat.h                             |  6 ---
 include/uapi/asm-generic/unistd.h                  |  2 +-
 tools/include/uapi/asm-generic/unistd.h            |  2 +-
 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl |  2 +-
 tools/perf/arch/s390/entry/syscalls/syscall.tbl    |  2 +-
 17 files changed, 14 insertions(+), 81 deletions(-)
 delete mode 100644 fs/compat.c

(limited to 'include/uapi')

diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 734860ac7cf9..5fd095d65450 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -53,7 +53,7 @@ __SYSCALL(__NR_lseek, compat_sys_lseek)
 #define __NR_getpid 20
 __SYSCALL(__NR_getpid, sys_getpid)
 #define __NR_mount 21
-__SYSCALL(__NR_mount, compat_sys_mount)
+__SYSCALL(__NR_mount, sys_mount)
 			/* 22 was sys_umount */
 __SYSCALL(22, sys_ni_syscall)
 #define __NR_setuid 23
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index f9df9edb67a4..61fa9e7013cb 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -167,7 +167,7 @@
 157	n32	sync				sys_sync
 158	n32	acct				sys_acct
 159	n32	settimeofday			compat_sys_settimeofday
-160	n32	mount				compat_sys_mount
+160	n32	mount				sys_mount
 161	n32	umount2				sys_umount
 162	n32	swapon				sys_swapon
 163	n32	swapoff				sys_swapoff
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 195b43cf27c8..b992e89be7ff 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -29,7 +29,7 @@
 18	o32	unused18			sys_ni_syscall
 19	o32	lseek				sys_lseek
 20	o32	getpid				sys_getpid
-21	o32	mount				sys_mount			compat_sys_mount
+21	o32	mount				sys_mount
 22	o32	umount				sys_oldumount
 23	o32	setuid				sys_setuid
 24	o32	getuid				sys_getuid
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index def64d221cd4..07efd978182f 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -29,7 +29,7 @@
 18	common	stat			sys_newstat			compat_sys_newstat
 19	common	lseek			sys_lseek			compat_sys_lseek
 20	common	getpid			sys_getpid
-21	common	mount			sys_mount			compat_sys_mount
+21	common	mount			sys_mount
 22	common	bind			sys_bind
 23	common	setuid			sys_setuid
 24	common	getuid			sys_getuid
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index c2d737ff2e7b..a36ad4fec73c 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -34,7 +34,7 @@
 18	spu	oldstat				sys_ni_syscall
 19	common	lseek				sys_lseek			compat_sys_lseek
 20	common	getpid				sys_getpid
-21	nospu	mount				sys_mount			compat_sys_mount
+21	nospu	mount				sys_mount
 22	32	umount				sys_oldumount
 22	64	umount				sys_ni_syscall
 22	spu	umount				sys_ni_syscall
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 10456bc936fb..4b803dfbee2b 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -26,7 +26,7 @@
 16   32		lchown			-				sys_lchown16
 19   common	lseek			sys_lseek			compat_sys_lseek
 20   common	getpid			sys_getpid			sys_getpid
-21   common	mount			sys_mount			compat_sys_mount
+21   common	mount			sys_mount			sys_mount
 22   common	umount			sys_oldumount			sys_oldumount
 23   32		setuid			-				sys_setuid16
 24   32		getuid			-				sys_getuid16
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index 4af114e84f20..d5ff798fa08f 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -201,7 +201,7 @@
 164	64	utrap_install		sys_utrap_install
 165	common	quotactl		sys_quotactl
 166	common	set_tid_address		sys_set_tid_address
-167	common	mount			sys_mount			compat_sys_mount
+167	common	mount			sys_mount
 168	common	ustat			sys_ustat			compat_sys_ustat
 169	common	setxattr		sys_setxattr
 170	common	lsetxattr		sys_lsetxattr
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 9d1102873666..5a40b226fb7b 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -32,7 +32,7 @@
 18	i386	oldstat			sys_stat
 19	i386	lseek			sys_lseek			compat_sys_lseek
 20	i386	getpid			sys_getpid
-21	i386	mount			sys_mount			compat_sys_mount
+21	i386	mount			sys_mount
 22	i386	umount			sys_oldumount
 23	i386	setuid			sys_setuid16
 24	i386	getuid			sys_getuid16
diff --git a/fs/Makefile b/fs/Makefile
index 1c7b0e3f6daa..d72ee2ce7af0 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -37,7 +37,6 @@ obj-$(CONFIG_FS_DAX)		+= dax.o
 obj-$(CONFIG_FS_ENCRYPTION)	+= crypto/
 obj-$(CONFIG_FS_VERITY)		+= verity/
 obj-$(CONFIG_FILE_LOCKING)      += locks.o
-obj-$(CONFIG_COMPAT)		+= compat.o
 obj-$(CONFIG_BINFMT_AOUT)	+= binfmt_aout.o
 obj-$(CONFIG_BINFMT_EM86)	+= binfmt_em86.o
 obj-$(CONFIG_BINFMT_MISC)	+= binfmt_misc.o
diff --git a/fs/compat.c b/fs/compat.c
deleted file mode 100644
index 9b00523d7fa5..000000000000
--- a/fs/compat.c
+++ /dev/null
@@ -1,57 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  linux/fs/compat.c
- *
- *  Kernel compatibililty routines for e.g. 32 bit syscall support
- *  on 64 bit kernels.
- *
- *  Copyright (C) 2002       Stephen Rothwell, IBM Corporation
- *  Copyright (C) 1997-2000  Jakub Jelinek  (jakub@redhat.com)
- *  Copyright (C) 1998       Eddie C. Dost  (ecd@skynet.be)
- *  Copyright (C) 2001,2002  Andi Kleen, SuSE Labs 
- *  Copyright (C) 2003       Pavel Machek (pavel@ucw.cz)
- */
-
-#include <linux/compat.h>
-#include <linux/nfs4_mount.h>
-#include <linux/syscalls.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-#include "internal.h"
-
-COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name,
-		       const char __user *, dir_name,
-		       const char __user *, type, compat_ulong_t, flags,
-		       const void __user *, data)
-{
-	char *kernel_type;
-	void *options;
-	char *kernel_dev;
-	int retval;
-
-	kernel_type = copy_mount_string(type);
-	retval = PTR_ERR(kernel_type);
-	if (IS_ERR(kernel_type))
-		goto out;
-
-	kernel_dev = copy_mount_string(dev_name);
-	retval = PTR_ERR(kernel_dev);
-	if (IS_ERR(kernel_dev))
-		goto out1;
-
-	options = copy_mount_options(data);
-	retval = PTR_ERR(options);
-	if (IS_ERR(options))
-		goto out2;
-
-	retval = do_mount(kernel_dev, dir_name, kernel_type, flags, options);
-
- out3:
-	kfree(options);
- out2:
-	kfree(kernel_dev);
- out1:
-	kfree(kernel_type);
- out:
-	return retval;
-}
diff --git a/fs/internal.h b/fs/internal.h
index 10517ece4516..a7cd0f64faa4 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -82,9 +82,6 @@ int may_linkat(struct path *link);
 /*
  * namespace.c
  */
-extern void *copy_mount_options(const void __user *);
-extern char *copy_mount_string(const void __user *);
-
 extern struct vfsmount *lookup_mnt(const struct path *);
 extern int finish_automount(struct vfsmount *, struct path *);
 
diff --git a/fs/namespace.c b/fs/namespace.c
index bae0e95b3713..12b431b61462 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3072,7 +3072,7 @@ static void shrink_submounts(struct mount *mnt)
 	}
 }
 
-void *copy_mount_options(const void __user * data)
+static void *copy_mount_options(const void __user * data)
 {
 	char *copy;
 	unsigned size;
@@ -3097,7 +3097,7 @@ void *copy_mount_options(const void __user * data)
 	return copy;
 }
 
-char *copy_mount_string(const void __user *data)
+static char *copy_mount_string(const void __user *data)
 {
 	return data ? strndup_user(data, PATH_MAX) : NULL;
 }
diff --git a/include/linux/compat.h b/include/linux/compat.h
index d38c4d7e83bd..100632280ccc 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -522,12 +522,6 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
 asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
 				 compat_ulong_t arg);
 
-/* fs/namespace.c */
-asmlinkage long compat_sys_mount(const char __user *dev_name,
-				 const char __user *dir_name,
-				 const char __user *type, compat_ulong_t flags,
-				 const void __user *data);
-
 /* fs/open.c */
 asmlinkage long compat_sys_statfs(const char __user *pathname,
 				  struct compat_statfs __user *buf);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 995b36c2ea7d..fc98c9437609 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -140,7 +140,7 @@ __SYSCALL(__NR_renameat, sys_renameat)
 #define __NR_umount2 39
 __SYSCALL(__NR_umount2, sys_umount)
 #define __NR_mount 40
-__SC_COMP(__NR_mount, sys_mount, compat_sys_mount)
+__SYSCALL(__NR_mount, sys_mount)
 #define __NR_pivot_root 41
 __SYSCALL(__NR_pivot_root, sys_pivot_root)
 
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 995b36c2ea7d..fc98c9437609 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -140,7 +140,7 @@ __SYSCALL(__NR_renameat, sys_renameat)
 #define __NR_umount2 39
 __SYSCALL(__NR_umount2, sys_umount)
 #define __NR_mount 40
-__SC_COMP(__NR_mount, sys_mount, compat_sys_mount)
+__SYSCALL(__NR_mount, sys_mount)
 #define __NR_pivot_root 41
 __SYSCALL(__NR_pivot_root, sys_pivot_root)
 
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 3ca6fe057a0b..c2866c659650 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -32,7 +32,7 @@
 18	spu	oldstat				sys_ni_syscall
 19	common	lseek				sys_lseek			compat_sys_lseek
 20	common	getpid				sys_getpid
-21	nospu	mount				sys_mount			compat_sys_mount
+21	nospu	mount				sys_mount
 22	32	umount				sys_oldumount
 22	64	umount				sys_ni_syscall
 22	spu	umount				sys_ni_syscall
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index 6a0bbea225db..8e0806f6c38e 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -26,7 +26,7 @@
 16   32		lchown			-				compat_sys_s390_lchown16
 19   common	lseek			sys_lseek			compat_sys_lseek
 20   common	getpid			sys_getpid			sys_getpid
-21   common	mount			sys_mount			compat_sys_mount
+21   common	mount			sys_mount
 22   common	umount			sys_oldumount			compat_sys_oldumount
 23   32		setuid			-				compat_sys_s390_setuid16
 24   32		getuid			-				compat_sys_s390_getuid16
-- 
cgit v1.2.3


From 9c4258c78a2a7624c79b797f40ae2dbfd2555e26 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 22 Sep 2020 10:30:18 +0300
Subject: net: bridge: mdb: add support to extend add/del commands

Since the MDB add/del code expects an exact struct br_mdb_entry we can't
really add any extensions, thus add a new nested attribute at the level of
MDBA_SET_ENTRY called MDBA_SET_ENTRY_ATTRS which will be used to pass
all new options via netlink attributes. This patch doesn't change
anything functionally since the new attribute is not used yet, only
parsed.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_bridge.h | 12 ++++++++++++
 net/bridge/br_mdb.c            | 22 +++++++++++++++++++---
 2 files changed, 31 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 75a2ac479247..dc52f8cffa0d 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -530,10 +530,22 @@ struct br_mdb_entry {
 enum {
 	MDBA_SET_ENTRY_UNSPEC,
 	MDBA_SET_ENTRY,
+	MDBA_SET_ENTRY_ATTRS,
 	__MDBA_SET_ENTRY_MAX,
 };
 #define MDBA_SET_ENTRY_MAX (__MDBA_SET_ENTRY_MAX - 1)
 
+/* [MDBA_SET_ENTRY_ATTRS] = {
+ *    [MDBE_ATTR_xxx]
+ *    ...
+ * }
+ */
+enum {
+	MDBE_ATTR_UNSPEC,
+	__MDBE_ATTR_MAX,
+};
+#define MDBE_ATTR_MAX (__MDBE_ATTR_MAX - 1)
+
 /* Embedded inside LINK_XSTATS_TYPE_BRIDGE */
 enum {
 	BRIDGE_XSTATS_UNSPEC,
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index a1ff0a372185..907df6d695ec 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -670,9 +670,12 @@ static bool is_valid_mdb_entry(struct br_mdb_entry *entry,
 	return true;
 }
 
+static const struct nla_policy br_mdbe_attrs_pol[MDBE_ATTR_MAX + 1] = {
+};
+
 static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
 			struct net_device **pdev, struct br_mdb_entry **pentry,
-			struct netlink_ext_ack *extack)
+			struct nlattr **mdb_attrs, struct netlink_ext_ack *extack)
 {
 	struct net *net = sock_net(skb->sk);
 	struct br_mdb_entry *entry;
@@ -719,6 +722,17 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return -EINVAL;
 	*pentry = entry;
 
+	if (tb[MDBA_SET_ENTRY_ATTRS]) {
+		err = nla_parse_nested(mdb_attrs, MDBE_ATTR_MAX,
+				       tb[MDBA_SET_ENTRY_ATTRS],
+				       br_mdbe_attrs_pol, extack);
+		if (err)
+			return err;
+	} else {
+		memset(mdb_attrs, 0,
+		       sizeof(struct nlattr *) * (MDBE_ATTR_MAX + 1));
+	}
+
 	return 0;
 }
 
@@ -803,6 +817,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
 static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 		      struct netlink_ext_ack *extack)
 {
+	struct nlattr *mdb_attrs[MDBE_ATTR_MAX + 1];
 	struct net *net = sock_net(skb->sk);
 	struct net_bridge_vlan_group *vg;
 	struct net_bridge_port *p = NULL;
@@ -812,7 +827,7 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct net_bridge *br;
 	int err;
 
-	err = br_mdb_parse(skb, nlh, &dev, &entry, extack);
+	err = br_mdb_parse(skb, nlh, &dev, &entry, mdb_attrs, extack);
 	if (err < 0)
 		return err;
 
@@ -921,6 +936,7 @@ unlock:
 static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
 		      struct netlink_ext_ack *extack)
 {
+	struct nlattr *mdb_attrs[MDBE_ATTR_MAX + 1];
 	struct net *net = sock_net(skb->sk);
 	struct net_bridge_vlan_group *vg;
 	struct net_bridge_port *p = NULL;
@@ -930,7 +946,7 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct net_bridge *br;
 	int err;
 
-	err = br_mdb_parse(skb, nlh, &dev, &entry, extack);
+	err = br_mdb_parse(skb, nlh, &dev, &entry, mdb_attrs, extack);
 	if (err < 0)
 		return err;
 
-- 
cgit v1.2.3


From 88d4bd180419a7cde3947f191dc4e26fbb19f80b Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 22 Sep 2020 10:30:19 +0300
Subject: net: bridge: mdb: add support for add/del/dump of entries with source

Add new mdb attributes (MDBE_ATTR_SOURCE for setting,
MDBA_MDB_EATTR_SOURCE for dumping) to allow add/del and dump of mdb
entries with a source address (S,G). New S,G entries are created with
filter mode of MCAST_INCLUDE. The same attributes are used for IPv4 and
IPv6, they're validated and parsed based on their protocol.
S,G host joined entries which are added by user are not allowed yet.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_bridge.h |   2 +
 net/bridge/br_mdb.c            | 142 +++++++++++++++++++++++++++++++++--------
 net/bridge/br_private.h        |  14 ++++
 3 files changed, 130 insertions(+), 28 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index dc52f8cffa0d..3e6377c865eb 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -457,6 +457,7 @@ enum {
 	MDBA_MDB_EATTR_TIMER,
 	MDBA_MDB_EATTR_SRC_LIST,
 	MDBA_MDB_EATTR_GROUP_MODE,
+	MDBA_MDB_EATTR_SOURCE,
 	__MDBA_MDB_EATTR_MAX
 };
 #define MDBA_MDB_EATTR_MAX (__MDBA_MDB_EATTR_MAX - 1)
@@ -542,6 +543,7 @@ enum {
  */
 enum {
 	MDBE_ATTR_UNSPEC,
+	MDBE_ATTR_SOURCE,
 	__MDBE_ATTR_MAX,
 };
 #define MDBE_ATTR_MAX (__MDBE_ATTR_MAX - 1)
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 907df6d695ec..7f9ca5c20120 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -64,17 +64,27 @@ static void __mdb_entry_fill_flags(struct br_mdb_entry *e, unsigned char flags)
 		e->flags |= MDB_FLAGS_FAST_LEAVE;
 }
 
-static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip)
+static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip,
+				 struct nlattr **mdb_attrs)
 {
 	memset(ip, 0, sizeof(struct br_ip));
 	ip->vid = entry->vid;
 	ip->proto = entry->addr.proto;
-	if (ip->proto == htons(ETH_P_IP))
+	switch (ip->proto) {
+	case htons(ETH_P_IP):
 		ip->dst.ip4 = entry->addr.u.ip4;
+		if (mdb_attrs && mdb_attrs[MDBE_ATTR_SOURCE])
+			ip->src.ip4 = nla_get_in_addr(mdb_attrs[MDBE_ATTR_SOURCE]);
+		break;
 #if IS_ENABLED(CONFIG_IPV6)
-	else
+	case htons(ETH_P_IPV6):
 		ip->dst.ip6 = entry->addr.u.ip6;
+		if (mdb_attrs && mdb_attrs[MDBE_ATTR_SOURCE])
+			ip->src.ip6 = nla_get_in6_addr(mdb_attrs[MDBE_ATTR_SOURCE]);
+		break;
 #endif
+	}
+
 }
 
 static int __mdb_fill_srcs(struct sk_buff *skb,
@@ -172,30 +182,41 @@ static int __mdb_fill_info(struct sk_buff *skb,
 	if (nla_put_nohdr(skb, sizeof(e), &e) ||
 	    nla_put_u32(skb,
 			MDBA_MDB_EATTR_TIMER,
-			br_timer_value(mtimer))) {
-		nla_nest_cancel(skb, nest_ent);
-		return -EMSGSIZE;
-	}
+			br_timer_value(mtimer)))
+		goto nest_err;
 	switch (mp->addr.proto) {
 	case htons(ETH_P_IP):
-		dump_srcs_mode = !!(p && mp->br->multicast_igmp_version == 3);
+		dump_srcs_mode = !!(mp->br->multicast_igmp_version == 3);
+		if (mp->addr.src.ip4) {
+			if (nla_put_in_addr(skb, MDBA_MDB_EATTR_SOURCE,
+					    mp->addr.src.ip4))
+				goto nest_err;
+			break;
+		}
 		break;
 #if IS_ENABLED(CONFIG_IPV6)
 	case htons(ETH_P_IPV6):
-		dump_srcs_mode = !!(p && mp->br->multicast_mld_version == 2);
+		dump_srcs_mode = !!(mp->br->multicast_mld_version == 2);
+		if (!ipv6_addr_any(&mp->addr.src.ip6)) {
+			if (nla_put_in6_addr(skb, MDBA_MDB_EATTR_SOURCE,
+					     &mp->addr.src.ip6))
+				goto nest_err;
+			break;
+		}
 		break;
 #endif
 	}
-	if (dump_srcs_mode &&
+	if (p && dump_srcs_mode &&
 	    (__mdb_fill_srcs(skb, p) ||
-	     nla_put_u8(skb, MDBA_MDB_EATTR_GROUP_MODE, p->filter_mode))) {
-		nla_nest_cancel(skb, nest_ent);
-		return -EMSGSIZE;
-	}
-
+	     nla_put_u8(skb, MDBA_MDB_EATTR_GROUP_MODE, p->filter_mode)))
+		goto nest_err;
 	nla_nest_end(skb, nest_ent);
 
 	return 0;
+
+nest_err:
+	nla_nest_cancel(skb, nest_ent);
+	return -EMSGSIZE;
 }
 
 static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
@@ -395,12 +416,18 @@ static size_t rtnl_mdb_nlmsg_size(struct net_bridge_port_group *pg)
 
 	switch (pg->addr.proto) {
 	case htons(ETH_P_IP):
+		/* MDBA_MDB_EATTR_SOURCE */
+		if (pg->addr.src.ip4)
+			nlmsg_size += nla_total_size(sizeof(__be32));
 		if (pg->port->br->multicast_igmp_version == 2)
 			goto out;
 		addr_size = sizeof(__be32);
 		break;
 #if IS_ENABLED(CONFIG_IPV6)
 	case htons(ETH_P_IPV6):
+		/* MDBA_MDB_EATTR_SOURCE */
+		if (!ipv6_addr_any(&pg->addr.src.ip6))
+			nlmsg_size += nla_total_size(sizeof(struct in6_addr));
 		if (pg->port->br->multicast_mld_version == 1)
 			goto out;
 		addr_size = sizeof(struct in6_addr);
@@ -670,7 +697,48 @@ static bool is_valid_mdb_entry(struct br_mdb_entry *entry,
 	return true;
 }
 
+static bool is_valid_mdb_source(struct nlattr *attr, __be16 proto,
+				struct netlink_ext_ack *extack)
+{
+	switch (proto) {
+	case htons(ETH_P_IP):
+		if (nla_len(attr) != sizeof(struct in_addr)) {
+			NL_SET_ERR_MSG_MOD(extack, "IPv4 invalid source address length");
+			return false;
+		}
+		if (ipv4_is_multicast(nla_get_in_addr(attr))) {
+			NL_SET_ERR_MSG_MOD(extack, "IPv4 multicast source address is not allowed");
+			return false;
+		}
+		break;
+#if IS_ENABLED(CONFIG_IPV6)
+	case htons(ETH_P_IPV6): {
+		struct in6_addr src;
+
+		if (nla_len(attr) != sizeof(struct in6_addr)) {
+			NL_SET_ERR_MSG_MOD(extack, "IPv6 invalid source address length");
+			return false;
+		}
+		src = nla_get_in6_addr(attr);
+		if (ipv6_addr_is_multicast(&src)) {
+			NL_SET_ERR_MSG_MOD(extack, "IPv6 multicast source address is not allowed");
+			return false;
+		}
+		break;
+	}
+#endif
+	default:
+		NL_SET_ERR_MSG_MOD(extack, "Invalid protocol used with source address");
+		return false;
+	}
+
+	return true;
+}
+
 static const struct nla_policy br_mdbe_attrs_pol[MDBE_ATTR_MAX + 1] = {
+	[MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY,
+					      sizeof(struct in_addr),
+					      sizeof(struct in6_addr)),
 };
 
 static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -728,6 +796,10 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
 				       br_mdbe_attrs_pol, extack);
 		if (err)
 			return err;
+		if (mdb_attrs[MDBE_ATTR_SOURCE] &&
+		    !is_valid_mdb_source(mdb_attrs[MDBE_ATTR_SOURCE],
+					 entry->addr.proto, extack))
+			return -EINVAL;
 	} else {
 		memset(mdb_attrs, 0,
 		       sizeof(struct nlattr *) * (MDBE_ATTR_MAX + 1));
@@ -744,8 +816,22 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
 	struct net_bridge_port_group *p;
 	struct net_bridge_port_group __rcu **pp;
 	unsigned long now = jiffies;
+	u8 filter_mode;
 	int err;
 
+	/* host join errors which can happen before creating the group */
+	if (!port) {
+		/* don't allow any flags for host-joined groups */
+		if (entry->state) {
+			NL_SET_ERR_MSG_MOD(extack, "Flags are not allowed for host groups");
+			return -EINVAL;
+		}
+		if (!br_multicast_is_star_g(group)) {
+			NL_SET_ERR_MSG_MOD(extack, "Groups with sources cannot be manually host joined");
+			return -EINVAL;
+		}
+	}
+
 	mp = br_mdb_ip_get(br, group);
 	if (!mp) {
 		mp = br_multicast_new_group(br, group);
@@ -756,11 +842,6 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
 
 	/* host join */
 	if (!port) {
-		/* don't allow any flags for host-joined groups */
-		if (entry->state) {
-			NL_SET_ERR_MSG_MOD(extack, "Flags are not allowed for host groups");
-			return -EINVAL;
-		}
 		if (mp->host_joined) {
 			NL_SET_ERR_MSG_MOD(extack, "Group is already joined by host");
 			return -EEXIST;
@@ -783,8 +864,11 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
 			break;
 	}
 
+	filter_mode = br_multicast_is_star_g(group) ? MCAST_EXCLUDE :
+						      MCAST_INCLUDE;
+
 	p = br_multicast_new_port_group(port, group, *pp, entry->state, NULL,
-					MCAST_EXCLUDE);
+					filter_mode);
 	if (unlikely(!p)) {
 		NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new port group");
 		return -ENOMEM;
@@ -800,12 +884,13 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
 static int __br_mdb_add(struct net *net, struct net_bridge *br,
 			struct net_bridge_port *p,
 			struct br_mdb_entry *entry,
+			struct nlattr **mdb_attrs,
 			struct netlink_ext_ack *extack)
 {
 	struct br_ip ip;
 	int ret;
 
-	__mdb_entry_to_br_ip(entry, &ip);
+	__mdb_entry_to_br_ip(entry, &ip, mdb_attrs);
 
 	spin_lock_bh(&br->multicast_lock);
 	ret = br_mdb_add_group(br, p, &ip, entry, extack);
@@ -875,18 +960,19 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) {
 		list_for_each_entry(v, &vg->vlan_list, vlist) {
 			entry->vid = v->vid;
-			err = __br_mdb_add(net, br, p, entry, extack);
+			err = __br_mdb_add(net, br, p, entry, mdb_attrs, extack);
 			if (err)
 				break;
 		}
 	} else {
-		err = __br_mdb_add(net, br, p, entry, extack);
+		err = __br_mdb_add(net, br, p, entry, mdb_attrs, extack);
 	}
 
 	return err;
 }
 
-static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
+static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry,
+			struct nlattr **mdb_attrs)
 {
 	struct net_bridge_mdb_entry *mp;
 	struct net_bridge_port_group *p;
@@ -897,7 +983,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
 	if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED))
 		return -EINVAL;
 
-	__mdb_entry_to_br_ip(entry, &ip);
+	__mdb_entry_to_br_ip(entry, &ip, mdb_attrs);
 
 	spin_lock_bh(&br->multicast_lock);
 	mp = br_mdb_ip_get(br, &ip);
@@ -971,10 +1057,10 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) {
 		list_for_each_entry(v, &vg->vlan_list, vlist) {
 			entry->vid = v->vid;
-			err = __br_mdb_del(br, entry);
+			err = __br_mdb_del(br, entry, mdb_attrs);
 		}
 	} else {
-		err = __br_mdb_del(br, entry);
+		err = __br_mdb_del(br, entry, mdb_attrs);
 	}
 
 	return err;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index a23d2bae56e1..0f54a7a7c186 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -873,6 +873,20 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br,
 	}
 }
 
+static inline bool br_multicast_is_star_g(const struct br_ip *ip)
+{
+	switch (ip->proto) {
+	case htons(ETH_P_IP):
+		return ipv4_is_zeronet(ip->src.ip4);
+#if IS_ENABLED(CONFIG_IPV6)
+	case htons(ETH_P_IPV6):
+		return ipv6_addr_any(&ip->src.ip6);
+#endif
+	default:
+		return false;
+	}
+}
+
 static inline int br_multicast_igmp_type(const struct sk_buff *skb)
 {
 	return BR_INPUT_SKB_CB(skb)->igmp;
-- 
cgit v1.2.3


From 8f8cb77e0b22d9044d8d57ab3bb18ea8d0474752 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 22 Sep 2020 10:30:21 +0300
Subject: net: bridge: mcast: add rt_protocol field to the port group struct

We need to be able to differentiate between pg entries created by
user-space and the kernel when we start generating S,G entries for
IGMPv3/MLDv2's fast path. User-space entries are created by default as
RTPROT_STATIC and the kernel entries are RTPROT_KERNEL. Later we can
allow user-space to provide the entry rt_protocol so we can
differentiate between who added the entries specifically (e.g. clag,
admin, frr etc).

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_bridge.h |  1 +
 net/bridge/br_mdb.c            | 42 ++++++++++++++++++++++++++----------------
 net/bridge/br_multicast.c      |  7 +++++--
 net/bridge/br_private.h        |  3 ++-
 4 files changed, 34 insertions(+), 19 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 3e6377c865eb..1054f151078d 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -458,6 +458,7 @@ enum {
 	MDBA_MDB_EATTR_SRC_LIST,
 	MDBA_MDB_EATTR_GROUP_MODE,
 	MDBA_MDB_EATTR_SOURCE,
+	MDBA_MDB_EATTR_RTPROT,
 	__MDBA_MDB_EATTR_MAX
 };
 #define MDBA_MDB_EATTR_MAX (__MDBA_MDB_EATTR_MAX - 1)
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 7f9ca5c20120..b386a5e07698 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -184,6 +184,7 @@ static int __mdb_fill_info(struct sk_buff *skb,
 			MDBA_MDB_EATTR_TIMER,
 			br_timer_value(mtimer)))
 		goto nest_err;
+
 	switch (mp->addr.proto) {
 	case htons(ETH_P_IP):
 		dump_srcs_mode = !!(mp->br->multicast_igmp_version == 3);
@@ -206,10 +207,15 @@ static int __mdb_fill_info(struct sk_buff *skb,
 		break;
 #endif
 	}
-	if (p && dump_srcs_mode &&
-	    (__mdb_fill_srcs(skb, p) ||
-	     nla_put_u8(skb, MDBA_MDB_EATTR_GROUP_MODE, p->filter_mode)))
-		goto nest_err;
+	if (p) {
+		if (nla_put_u8(skb, MDBA_MDB_EATTR_RTPROT, p->rt_protocol))
+			goto nest_err;
+		if (dump_srcs_mode &&
+		    (__mdb_fill_srcs(skb, p) ||
+		     nla_put_u8(skb, MDBA_MDB_EATTR_GROUP_MODE,
+				p->filter_mode)))
+			goto nest_err;
+	}
 	nla_nest_end(skb, nest_ent);
 
 	return 0;
@@ -414,6 +420,9 @@ static size_t rtnl_mdb_nlmsg_size(struct net_bridge_port_group *pg)
 	if (!pg)
 		goto out;
 
+	/* MDBA_MDB_EATTR_RTPROT */
+	nlmsg_size += nla_total_size(sizeof(u8));
+
 	switch (pg->addr.proto) {
 	case htons(ETH_P_IP):
 		/* MDBA_MDB_EATTR_SOURCE */
@@ -809,16 +818,20 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
 }
 
 static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
-			    struct br_ip *group, struct br_mdb_entry *entry,
+			    struct br_mdb_entry *entry,
+			    struct nlattr **mdb_attrs,
 			    struct netlink_ext_ack *extack)
 {
 	struct net_bridge_mdb_entry *mp;
 	struct net_bridge_port_group *p;
 	struct net_bridge_port_group __rcu **pp;
 	unsigned long now = jiffies;
+	struct br_ip group;
 	u8 filter_mode;
 	int err;
 
+	__mdb_entry_to_br_ip(entry, &group, mdb_attrs);
+
 	/* host join errors which can happen before creating the group */
 	if (!port) {
 		/* don't allow any flags for host-joined groups */
@@ -826,15 +839,15 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
 			NL_SET_ERR_MSG_MOD(extack, "Flags are not allowed for host groups");
 			return -EINVAL;
 		}
-		if (!br_multicast_is_star_g(group)) {
+		if (!br_multicast_is_star_g(&group)) {
 			NL_SET_ERR_MSG_MOD(extack, "Groups with sources cannot be manually host joined");
 			return -EINVAL;
 		}
 	}
 
-	mp = br_mdb_ip_get(br, group);
+	mp = br_mdb_ip_get(br, &group);
 	if (!mp) {
-		mp = br_multicast_new_group(br, group);
+		mp = br_multicast_new_group(br, &group);
 		err = PTR_ERR_OR_ZERO(mp);
 		if (err)
 			return err;
@@ -864,11 +877,11 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
 			break;
 	}
 
-	filter_mode = br_multicast_is_star_g(group) ? MCAST_EXCLUDE :
-						      MCAST_INCLUDE;
+	filter_mode = br_multicast_is_star_g(&group) ? MCAST_EXCLUDE :
+						       MCAST_INCLUDE;
 
-	p = br_multicast_new_port_group(port, group, *pp, entry->state, NULL,
-					filter_mode);
+	p = br_multicast_new_port_group(port, &group, *pp, entry->state, NULL,
+					filter_mode, RTPROT_STATIC);
 	if (unlikely(!p)) {
 		NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new port group");
 		return -ENOMEM;
@@ -887,13 +900,10 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
 			struct nlattr **mdb_attrs,
 			struct netlink_ext_ack *extack)
 {
-	struct br_ip ip;
 	int ret;
 
-	__mdb_entry_to_br_ip(entry, &ip, mdb_attrs);
-
 	spin_lock_bh(&br->multicast_lock);
-	ret = br_mdb_add_group(br, p, &ip, entry, extack);
+	ret = br_mdb_add_group(br, p, entry, mdb_attrs, extack);
 	spin_unlock_bh(&br->multicast_lock);
 
 	return ret;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 4fd690bc848f..b6e7b0ece422 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -795,7 +795,8 @@ struct net_bridge_port_group *br_multicast_new_port_group(
 			struct net_bridge_port_group __rcu *next,
 			unsigned char flags,
 			const unsigned char *src,
-			u8 filter_mode)
+			u8 filter_mode,
+			u8 rt_protocol)
 {
 	struct net_bridge_port_group *p;
 
@@ -807,6 +808,7 @@ struct net_bridge_port_group *br_multicast_new_port_group(
 	p->port = port;
 	p->flags = flags;
 	p->filter_mode = filter_mode;
+	p->rt_protocol = rt_protocol;
 	p->mcast_gc.destroy = br_multicast_destroy_port_group;
 	INIT_HLIST_HEAD(&p->src_list);
 	rcu_assign_pointer(p->next, next);
@@ -892,7 +894,8 @@ static int br_multicast_add_group(struct net_bridge *br,
 			break;
 	}
 
-	p = br_multicast_new_port_group(port, group, *pp, 0, src, filter_mode);
+	p = br_multicast_new_port_group(port, group, *pp, 0, src, filter_mode,
+					RTPROT_KERNEL);
 	if (unlikely(!p))
 		goto err;
 	rcu_assign_pointer(*pp, p);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 0f54a7a7c186..dae7e3526fc7 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -246,6 +246,7 @@ struct net_bridge_port_group {
 	unsigned char			flags;
 	unsigned char			filter_mode;
 	unsigned char			grp_query_rexmit_cnt;
+	unsigned char			rt_protocol;
 
 	struct hlist_head		src_list;
 	unsigned int			src_ents;
@@ -804,7 +805,7 @@ struct net_bridge_port_group *
 br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group,
 			    struct net_bridge_port_group __rcu *next,
 			    unsigned char flags, const unsigned char *src,
-			    u8 filter_mode);
+			    u8 filter_mode, u8 rt_protocol);
 int br_mdb_hash_init(struct net_bridge *br);
 void br_mdb_hash_fini(struct net_bridge *br);
 void br_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp,
-- 
cgit v1.2.3


From 8266a0491e92d39dc9af739e8380a0daa9b8836b Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 22 Sep 2020 10:30:24 +0300
Subject: net: bridge: mcast: handle port group filter modes

We need to handle group filter mode transitions and initial state.
To change a port group's INCLUDE -> EXCLUDE mode (or when we have added
a new port group in EXCLUDE mode) we need to add that port to all of
*,G ports' S,G entries for proper replication. When the EXCLUDE state is
changed from IGMPv3 report, br_multicast_fwd_filter_exclude() must be
called after the source list processing because the assumption is that
all of the group's S,G entries will be created before transitioning to
EXCLUDE mode, i.e. most importantly its blocked entries will already be
added so it will not get automatically added to them.
The transition EXCLUDE -> INCLUDE happens only when a port group timer
expires, it requires us to remove that port from all of *,G ports' S,G
entries where it was automatically added previously.
Finally when we are adding a new S,G entry we must add all of *,G's
EXCLUDE ports to it.
In order to distinguish automatically added *,G EXCLUDE ports we have a
new port group flag - MDB_PG_FLAGS_STAR_EXCL.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_bridge.h |   1 +
 net/bridge/br_mdb.c            |  25 +++++-
 net/bridge/br_multicast.c      | 172 +++++++++++++++++++++++++++++++++++++++++
 net/bridge/br_private.h        |  20 +++++
 4 files changed, 216 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 1054f151078d..e4bd30a25f6b 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -518,6 +518,7 @@ struct br_mdb_entry {
 	__u8 state;
 #define MDB_FLAGS_OFFLOAD	(1 << 0)
 #define MDB_FLAGS_FAST_LEAVE	(1 << 1)
+#define MDB_FLAGS_STAR_EXCL	(1 << 2)
 	__u8 flags;
 	__u16 vid;
 	struct {
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 4e3a5cefc626..28cd35a9cf37 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -62,6 +62,8 @@ static void __mdb_entry_fill_flags(struct br_mdb_entry *e, unsigned char flags)
 		e->flags |= MDB_FLAGS_OFFLOAD;
 	if (flags & MDB_PG_FLAGS_FAST_LEAVE)
 		e->flags |= MDB_FLAGS_FAST_LEAVE;
+	if (flags & MDB_PG_FLAGS_STAR_EXCL)
+		e->flags |= MDB_FLAGS_STAR_EXCL;
 }
 
 static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip,
@@ -822,11 +824,11 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
 			    struct nlattr **mdb_attrs,
 			    struct netlink_ext_ack *extack)
 {
-	struct net_bridge_mdb_entry *mp;
+	struct net_bridge_mdb_entry *mp, *star_mp;
 	struct net_bridge_port_group *p;
 	struct net_bridge_port_group __rcu **pp;
+	struct br_ip group, star_group;
 	unsigned long now = jiffies;
-	struct br_ip group;
 	u8 filter_mode;
 	int err;
 
@@ -890,6 +892,25 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
 	if (entry->state == MDB_TEMPORARY)
 		mod_timer(&p->timer, now + br->multicast_membership_interval);
 	br_mdb_notify(br->dev, mp, p, RTM_NEWMDB);
+	/* if we are adding a new EXCLUDE port group (*,G) it needs to be also
+	 * added to all S,G entries for proper replication, if we are adding
+	 * a new INCLUDE port (S,G) then all of *,G EXCLUDE ports need to be
+	 * added to it for proper replication
+	 */
+	if (br_multicast_should_handle_mode(br, group.proto)) {
+		switch (filter_mode) {
+		case MCAST_EXCLUDE:
+			br_multicast_star_g_handle_mode(p, MCAST_EXCLUDE);
+			break;
+		case MCAST_INCLUDE:
+			star_group = p->key.addr;
+			memset(&star_group.src, 0, sizeof(star_group.src));
+			star_mp = br_mdb_ip_get(br, &star_group);
+			if (star_mp)
+				br_multicast_sg_add_exclude_ports(star_mp, p);
+			break;
+		}
+	}
 
 	return 0;
 }
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index ece8ac805e98..f39bbd733722 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -73,6 +73,8 @@ __br_multicast_add_group(struct net_bridge *br,
 			 const unsigned char *src,
 			 u8 filter_mode,
 			 bool igmpv2_mldv1);
+static void br_multicast_find_del_pg(struct net_bridge *br,
+				     struct net_bridge_port_group *pg);
 
 static struct net_bridge_port_group *
 br_sg_port_find(struct net_bridge *br,
@@ -195,8 +197,163 @@ static bool br_port_group_equal(struct net_bridge_port_group *p,
 	return ether_addr_equal(src, p->eth_addr);
 }
 
+static void __fwd_add_star_excl(struct net_bridge_port_group *pg,
+				struct br_ip *sg_ip)
+{
+	struct net_bridge_port_group_sg_key sg_key;
+	struct net_bridge *br = pg->key.port->br;
+	struct net_bridge_port_group *src_pg;
+
+	memset(&sg_key, 0, sizeof(sg_key));
+	sg_key.port = pg->key.port;
+	sg_key.addr = *sg_ip;
+	if (br_sg_port_find(br, &sg_key))
+		return;
+
+	src_pg = __br_multicast_add_group(br, pg->key.port, sg_ip, pg->eth_addr,
+					  MCAST_INCLUDE, false);
+	if (IS_ERR_OR_NULL(src_pg) ||
+	    src_pg->rt_protocol != RTPROT_KERNEL)
+		return;
+
+	src_pg->flags |= MDB_PG_FLAGS_STAR_EXCL;
+}
+
+static void __fwd_del_star_excl(struct net_bridge_port_group *pg,
+				struct br_ip *sg_ip)
+{
+	struct net_bridge_port_group_sg_key sg_key;
+	struct net_bridge *br = pg->key.port->br;
+	struct net_bridge_port_group *src_pg;
+
+	memset(&sg_key, 0, sizeof(sg_key));
+	sg_key.port = pg->key.port;
+	sg_key.addr = *sg_ip;
+	src_pg = br_sg_port_find(br, &sg_key);
+	if (!src_pg || !(src_pg->flags & MDB_PG_FLAGS_STAR_EXCL) ||
+	    src_pg->rt_protocol != RTPROT_KERNEL)
+		return;
+
+	br_multicast_find_del_pg(br, src_pg);
+}
+
+/* When a port group transitions to (or is added as) EXCLUDE we need to add it
+ * to all other ports' S,G entries which are not blocked by the current group
+ * for proper replication, the assumption is that any S,G blocked entries
+ * are already added so the S,G,port lookup should skip them.
+ * When a port group transitions from EXCLUDE -> INCLUDE mode or is being
+ * deleted we need to remove it from all ports' S,G entries where it was
+ * automatically installed before (i.e. where it's MDB_PG_FLAGS_STAR_EXCL).
+ */
+void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg,
+				     u8 filter_mode)
+{
+	struct net_bridge *br = pg->key.port->br;
+	struct net_bridge_port_group *pg_lst;
+	struct net_bridge_mdb_entry *mp;
+	struct br_ip sg_ip;
+
+	if (WARN_ON(!br_multicast_is_star_g(&pg->key.addr)))
+		return;
+
+	mp = br_mdb_ip_get(br, &pg->key.addr);
+	if (!mp)
+		return;
+
+	memset(&sg_ip, 0, sizeof(sg_ip));
+	sg_ip = pg->key.addr;
+	for (pg_lst = mlock_dereference(mp->ports, br);
+	     pg_lst;
+	     pg_lst = mlock_dereference(pg_lst->next, br)) {
+		struct net_bridge_group_src *src_ent;
+
+		if (pg_lst == pg)
+			continue;
+		hlist_for_each_entry(src_ent, &pg_lst->src_list, node) {
+			if (!(src_ent->flags & BR_SGRP_F_INSTALLED))
+				continue;
+			sg_ip.src = src_ent->addr.src;
+			switch (filter_mode) {
+			case MCAST_INCLUDE:
+				__fwd_del_star_excl(pg, &sg_ip);
+				break;
+			case MCAST_EXCLUDE:
+				__fwd_add_star_excl(pg, &sg_ip);
+				break;
+			}
+		}
+	}
+}
+
+static void br_multicast_sg_del_exclude_ports(struct net_bridge_mdb_entry *sgmp)
+{
+	struct net_bridge_port_group __rcu **pp;
+	struct net_bridge_port_group *p;
+
+	/* *,G exclude ports are only added to S,G entries */
+	if (WARN_ON(br_multicast_is_star_g(&sgmp->addr)))
+		return;
+
+	/* we need the STAR_EXCLUDE ports if there are non-STAR_EXCLUDE ports
+	 * we should ignore perm entries since they're managed by user-space
+	 */
+	for (pp = &sgmp->ports;
+	     (p = mlock_dereference(*pp, sgmp->br)) != NULL;
+	     pp = &p->next)
+		if (!(p->flags & (MDB_PG_FLAGS_STAR_EXCL |
+				  MDB_PG_FLAGS_PERMANENT)))
+			return;
+
+	for (pp = &sgmp->ports;
+	     (p = mlock_dereference(*pp, sgmp->br)) != NULL;) {
+		if (!(p->flags & MDB_PG_FLAGS_PERMANENT))
+			br_multicast_del_pg(sgmp, p, pp);
+		else
+			pp = &p->next;
+	}
+}
+
+void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp,
+				       struct net_bridge_port_group *sg)
+{
+	struct net_bridge_port_group_sg_key sg_key;
+	struct net_bridge *br = star_mp->br;
+	struct net_bridge_port_group *pg;
+
+	if (WARN_ON(br_multicast_is_star_g(&sg->key.addr)))
+		return;
+	if (WARN_ON(!br_multicast_is_star_g(&star_mp->addr)))
+		return;
+
+	memset(&sg_key, 0, sizeof(sg_key));
+	sg_key.addr = sg->key.addr;
+	/* we need to add all exclude ports to the S,G */
+	for (pg = mlock_dereference(star_mp->ports, br);
+	     pg;
+	     pg = mlock_dereference(pg->next, br)) {
+		struct net_bridge_port_group *src_pg;
+
+		if (pg == sg || pg->filter_mode == MCAST_INCLUDE)
+			continue;
+
+		sg_key.port = pg->key.port;
+		if (br_sg_port_find(br, &sg_key))
+			continue;
+
+		src_pg = __br_multicast_add_group(br, pg->key.port,
+						  &sg->key.addr,
+						  sg->eth_addr,
+						  MCAST_INCLUDE, false);
+		if (IS_ERR_OR_NULL(src_pg) ||
+		    src_pg->rt_protocol != RTPROT_KERNEL)
+			continue;
+		src_pg->flags |= MDB_PG_FLAGS_STAR_EXCL;
+	}
+}
+
 static void br_multicast_fwd_src_add(struct net_bridge_group_src *src)
 {
+	struct net_bridge_mdb_entry *star_mp;
 	struct net_bridge_port_group *sg;
 	struct br_ip sg_ip;
 
@@ -211,6 +368,7 @@ static void br_multicast_fwd_src_add(struct net_bridge_group_src *src)
 	if (IS_ERR_OR_NULL(sg))
 		return;
 	src->flags |= BR_SGRP_F_INSTALLED;
+	sg->flags &= ~MDB_PG_FLAGS_STAR_EXCL;
 
 	/* if it was added by user-space as perm we can skip next steps */
 	if (sg->rt_protocol != RTPROT_KERNEL &&
@@ -219,6 +377,11 @@ static void br_multicast_fwd_src_add(struct net_bridge_group_src *src)
 
 	/* the kernel is now responsible for removing this S,G */
 	del_timer(&sg->timer);
+	star_mp = br_mdb_ip_get(src->br, &src->pg->key.addr);
+	if (!star_mp)
+		return;
+
+	br_multicast_sg_add_exclude_ports(star_mp, sg);
 }
 
 static void br_multicast_fwd_src_remove(struct net_bridge_group_src *src)
@@ -349,6 +512,10 @@ void br_multicast_del_pg(struct net_bridge_mdb_entry *mp,
 	hlist_for_each_entry_safe(ent, tmp, &pg->src_list, node)
 		br_multicast_del_group_src(ent);
 	br_mdb_notify(br->dev, mp, pg, RTM_DELMDB);
+	if (!br_multicast_is_star_g(&mp->addr))
+		br_multicast_sg_del_exclude_ports(mp);
+	else
+		br_multicast_star_g_handle_mode(pg, MCAST_INCLUDE);
 	hlist_add_head(&pg->mcast_gc.gc_node, &br->mcast_gc_list);
 	queue_work(system_long_wq, &br->mcast_gc_work);
 
@@ -407,6 +574,9 @@ static void br_multicast_port_group_expired(struct timer_list *t)
 	} else if (changed) {
 		struct net_bridge_mdb_entry *mp = br_mdb_ip_get(br, &pg->key.addr);
 
+		if (changed && br_multicast_is_star_g(&pg->key.addr))
+			br_multicast_star_g_handle_mode(pg, MCAST_INCLUDE);
+
 		if (WARN_ON(!mp))
 			goto out;
 		br_mdb_notify(br->dev, mp, pg, RTM_NEWMDB);
@@ -1641,6 +1811,7 @@ static bool br_multicast_isexc(struct net_bridge_port_group *pg,
 	switch (pg->filter_mode) {
 	case MCAST_INCLUDE:
 		__grp_src_isexc_incl(pg, srcs, nsrcs, src_size);
+		br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE);
 		changed = true;
 		break;
 	case MCAST_EXCLUDE:
@@ -1853,6 +2024,7 @@ static bool br_multicast_toex(struct net_bridge_port_group *pg,
 	switch (pg->filter_mode) {
 	case MCAST_INCLUDE:
 		__grp_src_toex_incl(pg, srcs, nsrcs, src_size);
+		br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE);
 		changed = true;
 		break;
 	case MCAST_EXCLUDE:
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 93d76b3dfc35..128d2d0417a0 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -213,6 +213,7 @@ struct net_bridge_fdb_entry {
 #define MDB_PG_FLAGS_PERMANENT	BIT(0)
 #define MDB_PG_FLAGS_OFFLOAD	BIT(1)
 #define MDB_PG_FLAGS_FAST_LEAVE	BIT(2)
+#define MDB_PG_FLAGS_STAR_EXCL	BIT(3)
 
 #define PG_SRC_ENT_LIMIT	32
 
@@ -833,6 +834,10 @@ void br_mdb_init(void);
 void br_mdb_uninit(void);
 void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify);
 void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify);
+void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg,
+				     u8 filter_mode);
+void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp,
+				       struct net_bridge_port_group *sg);
 
 #define mlock_dereference(X, br) \
 	rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
@@ -895,6 +900,21 @@ static inline bool br_multicast_is_star_g(const struct br_ip *ip)
 	}
 }
 
+static inline bool br_multicast_should_handle_mode(const struct net_bridge *br,
+						   __be16 proto)
+{
+	switch (proto) {
+	case htons(ETH_P_IP):
+		return !!(br->multicast_igmp_version == 3);
+#if IS_ENABLED(CONFIG_IPV6)
+	case htons(ETH_P_IPV6):
+		return !!(br->multicast_mld_version == 2);
+#endif
+	default:
+		return false;
+	}
+}
+
 static inline int br_multicast_igmp_type(const struct sk_buff *skb)
 {
 	return BR_INPUT_SKB_CB(skb)->igmp;
-- 
cgit v1.2.3


From 9116ffbf1dd71f953ffda4198d01f82d3ca16df8 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 22 Sep 2020 10:30:25 +0300
Subject: net: bridge: mcast: add support for blocked port groups

When excluding S,G entries we need a way to block a particular S,G,port.
The new port group flag is managed based on the source's timer as per
RFCs 3376 and 3810. When a source expires and its port group is in
EXCLUDE mode, it will be blocked.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_bridge.h |  1 +
 net/bridge/br_mdb.c            |  2 ++
 net/bridge/br_multicast.c      | 49 ++++++++++++++++++++++++++++++++++++------
 net/bridge/br_private.h        |  1 +
 4 files changed, 47 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index e4bd30a25f6b..4c687686aa8f 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -519,6 +519,7 @@ struct br_mdb_entry {
 #define MDB_FLAGS_OFFLOAD	(1 << 0)
 #define MDB_FLAGS_FAST_LEAVE	(1 << 1)
 #define MDB_FLAGS_STAR_EXCL	(1 << 2)
+#define MDB_FLAGS_BLOCKED	(1 << 3)
 	__u8 flags;
 	__u16 vid;
 	struct {
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 28cd35a9cf37..e15bab19a012 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -64,6 +64,8 @@ static void __mdb_entry_fill_flags(struct br_mdb_entry *e, unsigned char flags)
 		e->flags |= MDB_FLAGS_FAST_LEAVE;
 	if (flags & MDB_PG_FLAGS_STAR_EXCL)
 		e->flags |= MDB_FLAGS_STAR_EXCL;
+	if (flags & MDB_PG_FLAGS_BLOCKED)
+		e->flags |= MDB_FLAGS_BLOCKED;
 }
 
 static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip,
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index f39bbd733722..11d224c01914 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -72,7 +72,8 @@ __br_multicast_add_group(struct net_bridge *br,
 			 struct br_ip *group,
 			 const unsigned char *src,
 			 u8 filter_mode,
-			 bool igmpv2_mldv1);
+			 bool igmpv2_mldv1,
+			 bool blocked);
 static void br_multicast_find_del_pg(struct net_bridge *br,
 				     struct net_bridge_port_group *pg);
 
@@ -211,7 +212,7 @@ static void __fwd_add_star_excl(struct net_bridge_port_group *pg,
 		return;
 
 	src_pg = __br_multicast_add_group(br, pg->key.port, sg_ip, pg->eth_addr,
-					  MCAST_INCLUDE, false);
+					  MCAST_INCLUDE, false, false);
 	if (IS_ERR_OR_NULL(src_pg) ||
 	    src_pg->rt_protocol != RTPROT_KERNEL)
 		return;
@@ -343,7 +344,7 @@ void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp,
 		src_pg = __br_multicast_add_group(br, pg->key.port,
 						  &sg->key.addr,
 						  sg->eth_addr,
-						  MCAST_INCLUDE, false);
+						  MCAST_INCLUDE, false, false);
 		if (IS_ERR_OR_NULL(src_pg) ||
 		    src_pg->rt_protocol != RTPROT_KERNEL)
 			continue;
@@ -364,7 +365,8 @@ static void br_multicast_fwd_src_add(struct net_bridge_group_src *src)
 	sg_ip = src->pg->key.addr;
 	sg_ip.src = src->addr.src;
 	sg = __br_multicast_add_group(src->br, src->pg->key.port, &sg_ip,
-				      src->pg->eth_addr, MCAST_INCLUDE, false);
+				      src->pg->eth_addr, MCAST_INCLUDE, false,
+				      !timer_pending(&src->timer));
 	if (IS_ERR_OR_NULL(sg))
 		return;
 	src->flags |= BR_SGRP_F_INSTALLED;
@@ -415,9 +417,38 @@ static void br_multicast_fwd_src_remove(struct net_bridge_group_src *src)
 	src->flags &= ~BR_SGRP_F_INSTALLED;
 }
 
+/* install S,G and based on src's timer enable or disable forwarding */
 static void br_multicast_fwd_src_handle(struct net_bridge_group_src *src)
 {
+	struct net_bridge_port_group_sg_key sg_key;
+	struct net_bridge_port_group *sg;
+	u8 old_flags;
+
 	br_multicast_fwd_src_add(src);
+
+	memset(&sg_key, 0, sizeof(sg_key));
+	sg_key.addr = src->pg->key.addr;
+	sg_key.addr.src = src->addr.src;
+	sg_key.port = src->pg->key.port;
+
+	sg = br_sg_port_find(src->br, &sg_key);
+	if (!sg || (sg->flags & MDB_PG_FLAGS_PERMANENT))
+		return;
+
+	old_flags = sg->flags;
+	if (timer_pending(&src->timer))
+		sg->flags &= ~MDB_PG_FLAGS_BLOCKED;
+	else
+		sg->flags |= MDB_PG_FLAGS_BLOCKED;
+
+	if (old_flags != sg->flags) {
+		struct net_bridge_mdb_entry *sg_mp;
+
+		sg_mp = br_mdb_ip_get(src->br, &sg_key.addr);
+		if (!sg_mp)
+			return;
+		br_mdb_notify(src->br->dev, sg_mp, sg, RTM_NEWMDB);
+	}
 }
 
 static void br_multicast_destroy_mdb_entry(struct net_bridge_mcast_gc *gc)
@@ -995,7 +1026,10 @@ static void br_multicast_group_src_expired(struct timer_list *t)
 		if (!hlist_empty(&pg->src_list))
 			goto out;
 		br_multicast_find_del_pg(br, pg);
+	} else {
+		br_multicast_fwd_src_handle(src);
 	}
+
 out:
 	spin_unlock(&br->multicast_lock);
 }
@@ -1131,7 +1165,8 @@ __br_multicast_add_group(struct net_bridge *br,
 			 struct br_ip *group,
 			 const unsigned char *src,
 			 u8 filter_mode,
-			 bool igmpv2_mldv1)
+			 bool igmpv2_mldv1,
+			 bool blocked)
 {
 	struct net_bridge_port_group __rcu **pp;
 	struct net_bridge_port_group *p = NULL;
@@ -1167,6 +1202,8 @@ __br_multicast_add_group(struct net_bridge *br,
 		goto out;
 	}
 	rcu_assign_pointer(*pp, p);
+	if (blocked)
+		p->flags |= MDB_PG_FLAGS_BLOCKED;
 	br_mdb_notify(br->dev, mp, p, RTM_NEWMDB);
 
 found:
@@ -1189,7 +1226,7 @@ static int br_multicast_add_group(struct net_bridge *br,
 
 	spin_lock(&br->multicast_lock);
 	pg = __br_multicast_add_group(br, port, group, src, filter_mode,
-				      igmpv2_mldv1);
+				      igmpv2_mldv1, false);
 	/* NULL is considered valid for host joined groups */
 	err = IS_ERR(pg) ? PTR_ERR(pg) : 0;
 	spin_unlock(&br->multicast_lock);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 128d2d0417a0..345118e35c42 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -214,6 +214,7 @@ struct net_bridge_fdb_entry {
 #define MDB_PG_FLAGS_OFFLOAD	BIT(1)
 #define MDB_PG_FLAGS_FAST_LEAVE	BIT(2)
 #define MDB_PG_FLAGS_STAR_EXCL	BIT(3)
+#define MDB_PG_FLAGS_BLOCKED	BIT(4)
 
 #define PG_SRC_ENT_LIMIT	32
 
-- 
cgit v1.2.3


From 09a5f210f67eea4a2176820c3bc398747a564705 Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Wed, 16 Sep 2020 16:43:24 +0800
Subject: RDMA/hns: Add support for CQE in size of 64 Bytes

The new version of RoCEE supports using CQE in size of 32B or 64B. The
performance of bus can be improved by using larger size of CQE.

Link: https://lore.kernel.org/r/1600245806-56321-3-git-send-email-liweihang@huawei.com
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/hns/hns_roce_cq.c     | 22 ++++++++++++++++++++--
 drivers/infiniband/hw/hns/hns_roce_device.h |  6 +++++-
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c  |  5 ++---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.h  |  2 +-
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 20 +++++++++++++-------
 drivers/infiniband/hw/hns/hns_roce_hw_v2.h  |  7 +++++--
 drivers/infiniband/hw/hns/hns_roce_main.c   |  2 ++
 include/uapi/rdma/hns-abi.h                 |  4 +++-
 8 files changed, 51 insertions(+), 17 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index c5acf3332519..fff3e624e261 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -150,7 +150,7 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
 	int err;
 
 	buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + HNS_HW_PAGE_SHIFT;
-	buf_attr.region[0].size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz;
+	buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size;
 	buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num;
 	buf_attr.region_count = 1;
 	buf_attr.fixed_page = true;
@@ -224,6 +224,21 @@ static void free_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
 	}
 }
 
+static void set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata,
+			 struct hns_roce_ib_create_cq *ucmd)
+{
+	struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device);
+
+	if (udata) {
+		if (udata->inlen >= offsetofend(typeof(*ucmd), cqe_size))
+			hr_cq->cqe_size = ucmd->cqe_size;
+		else
+			hr_cq->cqe_size = HNS_ROCE_V2_CQE_SIZE;
+	} else {
+		hr_cq->cqe_size = hr_dev->caps.cqe_sz;
+	}
+}
+
 int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
 		       struct ib_udata *udata)
 {
@@ -258,7 +273,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
 	INIT_LIST_HEAD(&hr_cq->rq_list);
 
 	if (udata) {
-		ret = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
+		ret = ib_copy_from_udata(&ucmd, udata,
+					 min(sizeof(ucmd), udata->inlen));
 		if (ret) {
 			ibdev_err(ibdev, "Failed to copy CQ udata, err %d\n",
 				  ret);
@@ -266,6 +282,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
 		}
 	}
 
+	set_cqe_size(hr_cq, udata, &ucmd);
+
 	ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr);
 	if (ret) {
 		ibdev_err(ibdev, "Failed to alloc CQ buf, err %d\n", ret);
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index ef92afdcee34..f935089b86ea 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -81,6 +81,9 @@
 
 #define HNS_ROCE_V3_EQE_SIZE 0x40
 
+#define HNS_ROCE_V2_CQE_SIZE 32
+#define HNS_ROCE_V3_CQE_SIZE 64
+
 #define HNS_ROCE_SL_SHIFT			28
 #define HNS_ROCE_TCLASS_SHIFT			20
 #define HNS_ROCE_FLOW_LABEL_MASK		0xfffff
@@ -469,6 +472,7 @@ struct hns_roce_cq {
 	void __iomem			*cq_db_l;
 	u16				*tptr_addr;
 	int				arm_sn;
+	int				cqe_size;
 	unsigned long			cqn;
 	u32				vector;
 	atomic_t			refcount;
@@ -796,7 +800,7 @@ struct hns_roce_caps {
 	int		num_pds;
 	int		reserved_pds;
 	u32		mtt_entry_sz;
-	u32		cq_entry_sz;
+	u32		cqe_sz;
 	u32		page_size_cap;
 	u32		reserved_lkey;
 	int		mtpt_entry_sz;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index e0a171c79ad8..e66661777cb8 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -1475,7 +1475,7 @@ static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
 	caps->cqc_entry_sz	= HNS_ROCE_V1_CQC_ENTRY_SIZE;
 	caps->mtpt_entry_sz	= HNS_ROCE_V1_MTPT_ENTRY_SIZE;
 	caps->mtt_entry_sz	= HNS_ROCE_V1_MTT_ENTRY_SIZE;
-	caps->cq_entry_sz	= HNS_ROCE_V1_CQE_ENTRY_SIZE;
+	caps->cqe_sz		= HNS_ROCE_V1_CQE_SIZE;
 	caps->page_size_cap	= HNS_ROCE_V1_PAGE_SIZE_SUPPORT;
 	caps->reserved_lkey	= 0;
 	caps->reserved_pds	= 0;
@@ -1896,8 +1896,7 @@ static int hns_roce_v1_write_mtpt(struct hns_roce_dev *hr_dev, void *mb_buf,
 
 static void *get_cqe(struct hns_roce_cq *hr_cq, int n)
 {
-	return hns_roce_buf_offset(hr_cq->mtr.kmem,
-				   n * HNS_ROCE_V1_CQE_ENTRY_SIZE);
+	return hns_roce_buf_offset(hr_cq->mtr.kmem, n * HNS_ROCE_V1_CQE_SIZE);
 }
 
 static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
index 52307b2c7100..5996892a1b96 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
@@ -74,7 +74,7 @@
 #define HNS_ROCE_V1_MTPT_ENTRY_SIZE			64
 #define HNS_ROCE_V1_MTT_ENTRY_SIZE			64
 
-#define HNS_ROCE_V1_CQE_ENTRY_SIZE			32
+#define HNS_ROCE_V1_CQE_SIZE				32
 #define HNS_ROCE_V1_PAGE_SIZE_SUPPORT			0xFFFFF000
 
 #define HNS_ROCE_V1_TABLE_CHUNK_SIZE			(1 << 17)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index fe43c15a4793..835fbd74ce98 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -1690,7 +1690,7 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
 	caps->mtpt_entry_sz	= HNS_ROCE_V2_MTPT_ENTRY_SZ;
 	caps->mtt_entry_sz	= HNS_ROCE_V2_MTT_ENTRY_SZ;
 	caps->idx_entry_sz	= HNS_ROCE_V2_IDX_ENTRY_SZ;
-	caps->cq_entry_sz	= HNS_ROCE_V2_CQE_ENTRY_SIZE;
+	caps->cqe_sz		= HNS_ROCE_V2_CQE_SIZE;
 	caps->page_size_cap	= HNS_ROCE_V2_PAGE_SIZE_SUPPORTED;
 	caps->reserved_lkey	= 0;
 	caps->reserved_pds	= 0;
@@ -1770,6 +1770,7 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
 	if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
 		caps->aeqe_size = HNS_ROCE_V3_EQE_SIZE;
 		caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE;
+		caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE;
 	}
 }
 
@@ -1862,7 +1863,7 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
 	caps->max_sq_desc_sz	     = resp_a->max_sq_desc_sz;
 	caps->max_rq_desc_sz	     = resp_a->max_rq_desc_sz;
 	caps->max_srq_desc_sz	     = resp_a->max_srq_desc_sz;
-	caps->cq_entry_sz	     = resp_a->cq_entry_sz;
+	caps->cqe_sz		     = HNS_ROCE_V2_CQE_SIZE;
 
 	caps->mtpt_entry_sz	     = resp_b->mtpt_entry_sz;
 	caps->irrl_entry_sz	     = resp_b->irrl_entry_sz;
@@ -1993,6 +1994,7 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
 	if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
 		caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE;
 		caps->aeqe_size = HNS_ROCE_V3_EQE_SIZE;
+		caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE;
 	}
 
 	calc_pg_sz(caps->num_qps, caps->qpc_entry_sz, caps->qpc_hop_num,
@@ -2771,8 +2773,7 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
 
 static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n)
 {
-	return hns_roce_buf_offset(hr_cq->mtr.kmem,
-				   n * HNS_ROCE_V2_CQE_ENTRY_SIZE);
+	return hns_roce_buf_offset(hr_cq->mtr.kmem, n * hr_cq->cqe_size);
 }
 
 static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n)
@@ -2872,6 +2873,10 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
 	roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQN_M,
 		       V2_CQC_BYTE_8_CQN_S, hr_cq->cqn);
 
+	roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQE_SIZE_M,
+		       V2_CQC_BYTE_8_CQE_SIZE_S, hr_cq->cqe_size ==
+		       HNS_ROCE_V3_CQE_SIZE ? 1 : 0);
+
 	cq_context->cqe_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0]));
 
 	roce_set_field(cq_context->byte_16_hop_addr,
@@ -3039,7 +3044,8 @@ out:
 }
 
 static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
-			   struct hns_roce_v2_cqe *cqe, struct ib_wc *wc)
+			   struct hns_roce_cq *cq, struct hns_roce_v2_cqe *cqe,
+			   struct ib_wc *wc)
 {
 	static const struct {
 		u32 cqe_status;
@@ -3080,7 +3086,7 @@ static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
 
 	ibdev_err(&hr_dev->ib_dev, "error cqe status 0x%x:\n", cqe_status);
 	print_hex_dump(KERN_ERR, "", DUMP_PREFIX_NONE, 16, 4, cqe,
-		       sizeof(*cqe), false);
+		       cq->cqe_size, false);
 
 	/*
 	 * For hns ROCEE, GENERAL_ERR is an error type that is not defined in
@@ -3177,7 +3183,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
 		++wq->tail;
 	}
 
-	get_cqe_status(hr_dev, *cur_qp, cqe, wc);
+	get_cqe_status(hr_dev, *cur_qp, hr_cq, cqe, wc);
 	if (unlikely(wc->status != IB_WC_SUCCESS))
 		return 0;
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index f98c55a30784..ca6b0554a42c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -86,7 +86,6 @@
 #define HNS_ROCE_V2_MTPT_ENTRY_SZ		64
 #define HNS_ROCE_V2_MTT_ENTRY_SZ		64
 #define HNS_ROCE_V2_IDX_ENTRY_SZ		4
-#define HNS_ROCE_V2_CQE_ENTRY_SIZE		32
 #define HNS_ROCE_V2_SCCC_ENTRY_SZ		32
 #define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ		PAGE_SIZE
 #define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ		PAGE_SIZE
@@ -309,6 +308,9 @@ struct hns_roce_v2_cq_context {
 #define	V2_CQC_BYTE_8_CQN_S 0
 #define V2_CQC_BYTE_8_CQN_M GENMASK(23, 0)
 
+#define V2_CQC_BYTE_8_CQE_SIZE_S 27
+#define V2_CQC_BYTE_8_CQE_SIZE_M GENMASK(28, 27)
+
 #define	V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_S 0
 #define V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_M GENMASK(19, 0)
 
@@ -896,6 +898,7 @@ struct hns_roce_v2_cqe {
 	u8	smac[4];
 	__le32	byte_28;
 	__le32	byte_32;
+	__le32	rsv[8];
 };
 
 #define	V2_CQE_BYTE_4_OPCODE_S 0
@@ -1571,7 +1574,7 @@ struct hns_roce_query_pf_caps_a {
 	u8 max_sq_desc_sz;
 	u8 max_rq_desc_sz;
 	u8 max_srq_desc_sz;
-	u8 cq_entry_sz;
+	u8 cqe_sz;
 };
 
 struct hns_roce_query_pf_caps_b {
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 2b4d75733e72..6f129e0f6d37 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -323,6 +323,8 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
 		mutex_init(&context->page_mutex);
 	}
 
+	resp.cqe_size = hr_dev->caps.cqe_sz;
+
 	ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
 	if (ret)
 		goto error_fail_copy_to_udata;
diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h
index eb76b38a00d4..9ec85f76e9ac 100644
--- a/include/uapi/rdma/hns-abi.h
+++ b/include/uapi/rdma/hns-abi.h
@@ -39,6 +39,8 @@
 struct hns_roce_ib_create_cq {
 	__aligned_u64 buf_addr;
 	__aligned_u64 db_addr;
+	__u32 cqe_size;
+	__u32 reserved;
 };
 
 struct hns_roce_ib_create_cq_resp {
@@ -73,7 +75,7 @@ struct hns_roce_ib_create_qp_resp {
 
 struct hns_roce_ib_alloc_ucontext_resp {
 	__u32	qp_tab_size;
-	__u32	reserved;
+	__u32	cqe_size;
 };
 
 struct hns_roce_ib_alloc_pd_resp {
-- 
cgit v1.2.3


From 77ebdabe8de7c02f43c6de3357f79ff96f9f0579 Mon Sep 17 00:00:00 2001
From: Elena Petrova <lenaptr@google.com>
Date: Fri, 18 Sep 2020 16:42:16 +0100
Subject: crypto: af_alg - add extra parameters for DRBG interface
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extend the user-space RNG interface:
  1. Add entropy input via ALG_SET_DRBG_ENTROPY setsockopt option;
  2. Add additional data input via sendmsg syscall.

This allows DRBG to be tested with test vectors, for example for the
purpose of CAVP testing, which otherwise isn't possible.

To prevent erroneous use of entropy input, it is hidden under
CRYPTO_USER_API_RNG_CAVP config option and requires CAP_SYS_ADMIN to
succeed.

Signed-off-by: Elena Petrova <lenaptr@google.com>
Acked-by: Stephan Müller <smueller@chronox.de>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 Documentation/crypto/userspace-if.rst |  20 +++-
 crypto/Kconfig                        |   9 ++
 crypto/af_alg.c                       |  14 ++-
 crypto/algif_rng.c                    | 175 +++++++++++++++++++++++++++++++---
 include/crypto/if_alg.h               |   1 +
 include/uapi/linux/if_alg.h           |   1 +
 6 files changed, 205 insertions(+), 15 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/crypto/userspace-if.rst b/Documentation/crypto/userspace-if.rst
index 52019e905900..b45dabbf69d6 100644
--- a/Documentation/crypto/userspace-if.rst
+++ b/Documentation/crypto/userspace-if.rst
@@ -296,15 +296,16 @@ follows:
 
     struct sockaddr_alg sa = {
         .salg_family = AF_ALG,
-        .salg_type = "rng", /* this selects the symmetric cipher */
-        .salg_name = "drbg_nopr_sha256" /* this is the cipher name */
+        .salg_type = "rng", /* this selects the random number generator */
+        .salg_name = "drbg_nopr_sha256" /* this is the RNG name */
     };
 
 
 Depending on the RNG type, the RNG must be seeded. The seed is provided
 using the setsockopt interface to set the key. For example, the
 ansi_cprng requires a seed. The DRBGs do not require a seed, but may be
-seeded.
+seeded. The seed is also known as a *Personalization String* in NIST SP 800-90A
+standard.
 
 Using the read()/recvmsg() system calls, random numbers can be obtained.
 The kernel generates at most 128 bytes in one call. If user space
@@ -314,6 +315,16 @@ WARNING: The user space caller may invoke the initially mentioned accept
 system call multiple times. In this case, the returned file descriptors
 have the same state.
 
+Following CAVP testing interfaces are enabled when kernel is built with
+CRYPTO_USER_API_RNG_CAVP option:
+
+-  the concatenation of *Entropy* and *Nonce* can be provided to the RNG via
+   ALG_SET_DRBG_ENTROPY setsockopt interface. Setting the entropy requires
+   CAP_SYS_ADMIN permission.
+
+-  *Additional Data* can be provided using the send()/sendmsg() system calls,
+   but only after the entropy has been set.
+
 Zero-Copy Interface
 -------------------
 
@@ -377,6 +388,9 @@ mentioned optname:
    provided ciphertext is assumed to contain an authentication tag of
    the given size (see section about AEAD memory layout below).
 
+-  ALG_SET_DRBG_ENTROPY -- Setting the entropy of the random number generator.
+   This option is applicable to RNG cipher type only.
+
 User space API example
 ----------------------
 
diff --git a/crypto/Kconfig b/crypto/Kconfig
index fac10143d23f..88f98caaf30d 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1875,6 +1875,15 @@ config CRYPTO_USER_API_RNG
 	  This option enables the user-spaces interface for random
 	  number generator algorithms.
 
+config CRYPTO_USER_API_RNG_CAVP
+	bool "Enable CAVP testing of DRBG"
+	depends on CRYPTO_USER_API_RNG && CRYPTO_DRBG
+	help
+	  This option enables extra API for CAVP testing via the user-space
+	  interface: resetting of DRBG entropy, and providing Additional Data.
+	  This should only be enabled for CAVP testing. You should say
+	  no unless you know what this is.
+
 config CRYPTO_USER_API_AEAD
 	tristate "User-space interface for AEAD cipher algorithms"
 	depends on NET
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index a6f581ab200c..8535cb03b484 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -253,6 +253,14 @@ static int alg_setsockopt(struct socket *sock, int level, int optname,
 		if (!type->setauthsize)
 			goto unlock;
 		err = type->setauthsize(ask->private, optlen);
+		break;
+	case ALG_SET_DRBG_ENTROPY:
+		if (sock->state == SS_CONNECTED)
+			goto unlock;
+		if (!type->setentropy)
+			goto unlock;
+
+		err = type->setentropy(ask->private, optval, optlen);
 	}
 
 unlock:
@@ -285,6 +293,11 @@ int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern)
 	security_sock_graft(sk2, newsock);
 	security_sk_clone(sk, sk2);
 
+	/*
+	 * newsock->ops assigned here to allow type->accept call to override
+	 * them when required.
+	 */
+	newsock->ops = type->ops;
 	err = type->accept(ask->private, sk2);
 
 	nokey = err == -ENOKEY;
@@ -303,7 +316,6 @@ int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern)
 	alg_sk(sk2)->parent = sk;
 	alg_sk(sk2)->type = type;
 
-	newsock->ops = type->ops;
 	newsock->state = SS_CONNECTED;
 
 	if (nokey)
diff --git a/crypto/algif_rng.c b/crypto/algif_rng.c
index 6300e0566dc5..407408c43730 100644
--- a/crypto/algif_rng.c
+++ b/crypto/algif_rng.c
@@ -38,6 +38,7 @@
  * DAMAGE.
  */
 
+#include <linux/capability.h>
 #include <linux/module.h>
 #include <crypto/rng.h>
 #include <linux/random.h>
@@ -53,15 +54,26 @@ struct rng_ctx {
 #define MAXSIZE 128
 	unsigned int len;
 	struct crypto_rng *drng;
+	u8 *addtl;
+	size_t addtl_len;
 };
 
-static int rng_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
-		       int flags)
+struct rng_parent_ctx {
+	struct crypto_rng *drng;
+	u8 *entropy;
+};
+
+static void rng_reset_addtl(struct rng_ctx *ctx)
 {
-	struct sock *sk = sock->sk;
-	struct alg_sock *ask = alg_sk(sk);
-	struct rng_ctx *ctx = ask->private;
-	int err;
+	kfree_sensitive(ctx->addtl);
+	ctx->addtl = NULL;
+	ctx->addtl_len = 0;
+}
+
+static int _rng_recvmsg(struct crypto_rng *drng, struct msghdr *msg, size_t len,
+			u8 *addtl, size_t addtl_len)
+{
+	int err = 0;
 	int genlen = 0;
 	u8 result[MAXSIZE];
 
@@ -82,7 +94,7 @@ static int rng_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	 * seeding as they automatically seed. The X9.31 DRNG will return
 	 * an error if it was not seeded properly.
 	 */
-	genlen = crypto_rng_get_bytes(ctx->drng, result, len);
+	genlen = crypto_rng_generate(drng, addtl, addtl_len, result, len);
 	if (genlen < 0)
 		return genlen;
 
@@ -92,6 +104,63 @@ static int rng_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	return err ? err : len;
 }
 
+static int rng_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+		       int flags)
+{
+	struct sock *sk = sock->sk;
+	struct alg_sock *ask = alg_sk(sk);
+	struct rng_ctx *ctx = ask->private;
+
+	return _rng_recvmsg(ctx->drng, msg, len, NULL, 0);
+}
+
+static int rng_test_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+			    int flags)
+{
+	struct sock *sk = sock->sk;
+	struct alg_sock *ask = alg_sk(sk);
+	struct rng_ctx *ctx = ask->private;
+	int ret;
+
+	lock_sock(sock->sk);
+	ret = _rng_recvmsg(ctx->drng, msg, len, ctx->addtl, ctx->addtl_len);
+	rng_reset_addtl(ctx);
+	release_sock(sock->sk);
+
+	return ret;
+}
+
+static int rng_test_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
+{
+	int err;
+	struct alg_sock *ask = alg_sk(sock->sk);
+	struct rng_ctx *ctx = ask->private;
+
+	lock_sock(sock->sk);
+	if (len > MAXSIZE) {
+		err = -EMSGSIZE;
+		goto unlock;
+	}
+
+	rng_reset_addtl(ctx);
+	ctx->addtl = kmalloc(len, GFP_KERNEL);
+	if (!ctx->addtl) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+
+	err = memcpy_from_msg(ctx->addtl, msg, len);
+	if (err) {
+		rng_reset_addtl(ctx);
+		goto unlock;
+	}
+	ctx->addtl_len = len;
+
+unlock:
+	release_sock(sock->sk);
+	return err ? err : len;
+}
+
 static struct proto_ops algif_rng_ops = {
 	.family		=	PF_ALG,
 
@@ -111,14 +180,53 @@ static struct proto_ops algif_rng_ops = {
 	.recvmsg	=	rng_recvmsg,
 };
 
+static struct proto_ops __maybe_unused algif_rng_test_ops = {
+	.family		=	PF_ALG,
+
+	.connect	=	sock_no_connect,
+	.socketpair	=	sock_no_socketpair,
+	.getname	=	sock_no_getname,
+	.ioctl		=	sock_no_ioctl,
+	.listen		=	sock_no_listen,
+	.shutdown	=	sock_no_shutdown,
+	.mmap		=	sock_no_mmap,
+	.bind		=	sock_no_bind,
+	.accept		=	sock_no_accept,
+	.sendpage	=	sock_no_sendpage,
+
+	.release	=	af_alg_release,
+	.recvmsg	=	rng_test_recvmsg,
+	.sendmsg	=	rng_test_sendmsg,
+};
+
 static void *rng_bind(const char *name, u32 type, u32 mask)
 {
-	return crypto_alloc_rng(name, type, mask);
+	struct rng_parent_ctx *pctx;
+	struct crypto_rng *rng;
+
+	pctx = kzalloc(sizeof(*pctx), GFP_KERNEL);
+	if (!pctx)
+		return ERR_PTR(-ENOMEM);
+
+	rng = crypto_alloc_rng(name, type, mask);
+	if (IS_ERR(rng)) {
+		kfree(pctx);
+		return ERR_CAST(rng);
+	}
+
+	pctx->drng = rng;
+	return pctx;
 }
 
 static void rng_release(void *private)
 {
-	crypto_free_rng(private);
+	struct rng_parent_ctx *pctx = private;
+
+	if (unlikely(!pctx))
+		return;
+	crypto_free_rng(pctx->drng);
+	kfree_sensitive(pctx->entropy);
+	kfree_sensitive(pctx);
 }
 
 static void rng_sock_destruct(struct sock *sk)
@@ -126,6 +234,7 @@ static void rng_sock_destruct(struct sock *sk)
 	struct alg_sock *ask = alg_sk(sk);
 	struct rng_ctx *ctx = ask->private;
 
+	rng_reset_addtl(ctx);
 	sock_kfree_s(sk, ctx, ctx->len);
 	af_alg_release_parent(sk);
 }
@@ -133,6 +242,7 @@ static void rng_sock_destruct(struct sock *sk)
 static int rng_accept_parent(void *private, struct sock *sk)
 {
 	struct rng_ctx *ctx;
+	struct rng_parent_ctx *pctx = private;
 	struct alg_sock *ask = alg_sk(sk);
 	unsigned int len = sizeof(*ctx);
 
@@ -141,6 +251,8 @@ static int rng_accept_parent(void *private, struct sock *sk)
 		return -ENOMEM;
 
 	ctx->len = len;
+	ctx->addtl = NULL;
+	ctx->addtl_len = 0;
 
 	/*
 	 * No seeding done at that point -- if multiple accepts are
@@ -148,20 +260,58 @@ static int rng_accept_parent(void *private, struct sock *sk)
 	 * state of the RNG.
 	 */
 
-	ctx->drng = private;
+	ctx->drng = pctx->drng;
 	ask->private = ctx;
 	sk->sk_destruct = rng_sock_destruct;
 
+	/*
+	 * Non NULL pctx->entropy means that CAVP test has been initiated on
+	 * this socket, replace proto_ops algif_rng_ops with algif_rng_test_ops.
+	 */
+	if (IS_ENABLED(CONFIG_CRYPTO_USER_API_RNG_CAVP) && pctx->entropy)
+		sk->sk_socket->ops = &algif_rng_test_ops;
+
 	return 0;
 }
 
 static int rng_setkey(void *private, const u8 *seed, unsigned int seedlen)
 {
+	struct rng_parent_ctx *pctx = private;
 	/*
 	 * Check whether seedlen is of sufficient size is done in RNG
 	 * implementations.
 	 */
-	return crypto_rng_reset(private, seed, seedlen);
+	return crypto_rng_reset(pctx->drng, seed, seedlen);
+}
+
+static int __maybe_unused rng_setentropy(void *private, sockptr_t entropy,
+					 unsigned int len)
+{
+	struct rng_parent_ctx *pctx = private;
+	u8 *kentropy = NULL;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	if (pctx->entropy)
+		return -EINVAL;
+
+	if (len > MAXSIZE)
+		return -EMSGSIZE;
+
+	if (len) {
+		kentropy = memdup_sockptr(entropy, len);
+		if (IS_ERR(kentropy))
+			return PTR_ERR(kentropy);
+	}
+
+	crypto_rng_alg(pctx->drng)->set_ent(pctx->drng, kentropy, len);
+	/*
+	 * Since rng doesn't perform any memory management for the entropy
+	 * buffer, save kentropy pointer to pctx now to free it after use.
+	 */
+	pctx->entropy = kentropy;
+	return 0;
 }
 
 static const struct af_alg_type algif_type_rng = {
@@ -169,6 +319,9 @@ static const struct af_alg_type algif_type_rng = {
 	.release	=	rng_release,
 	.accept		=	rng_accept_parent,
 	.setkey		=	rng_setkey,
+#ifdef CONFIG_CRYPTO_USER_API_RNG_CAVP
+	.setentropy	=	rng_setentropy,
+#endif
 	.ops		=	&algif_rng_ops,
 	.name		=	"rng",
 	.owner		=	THIS_MODULE
diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h
index ee6412314f8f..a5db86670bdf 100644
--- a/include/crypto/if_alg.h
+++ b/include/crypto/if_alg.h
@@ -46,6 +46,7 @@ struct af_alg_type {
 	void *(*bind)(const char *name, u32 type, u32 mask);
 	void (*release)(void *private);
 	int (*setkey)(void *private, const u8 *key, unsigned int keylen);
+	int (*setentropy)(void *private, sockptr_t entropy, unsigned int len);
 	int (*accept)(void *private, struct sock *sk);
 	int (*accept_nokey)(void *private, struct sock *sk);
 	int (*setauthsize)(void *private, unsigned int authsize);
diff --git a/include/uapi/linux/if_alg.h b/include/uapi/linux/if_alg.h
index bc2bcdec377b..60b7c2efd921 100644
--- a/include/uapi/linux/if_alg.h
+++ b/include/uapi/linux/if_alg.h
@@ -35,6 +35,7 @@ struct af_alg_iv {
 #define ALG_SET_OP			3
 #define ALG_SET_AEAD_ASSOCLEN		4
 #define ALG_SET_AEAD_AUTHSIZE		5
+#define ALG_SET_DRBG_ENTROPY		6
 
 /* Operations */
 #define ALG_OP_DECRYPT			0
-- 
cgit v1.2.3


From 2a36ab717e8fe678d98f81c14a0b124712719840 Mon Sep 17 00:00:00 2001
From: Peter Oskolkov <posk@google.com>
Date: Wed, 23 Sep 2020 16:36:16 -0700
Subject: rseq/membarrier: Add MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ

This patchset is based on Google-internal RSEQ work done by Paul
Turner and Andrew Hunter.

When working with per-CPU RSEQ-based memory allocations, it is
sometimes important to make sure that a global memory location is no
longer accessed from RSEQ critical sections. For example, there can be
two per-CPU lists, one is "active" and accessed per-CPU, while another
one is inactive and worked on asynchronously "off CPU" (e.g.  garbage
collection is performed). Then at some point the two lists are
swapped, and a fast RCU-like mechanism is required to make sure that
the previously active list is no longer accessed.

This patch introduces such a mechanism: in short, membarrier() syscall
issues an IPI to a CPU, restarting a potentially active RSEQ critical
section on the CPU.

Signed-off-by: Peter Oskolkov <posk@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Link: https://lkml.kernel.org/r/20200923233618.2572849-1-posk@google.com
---
 include/linux/sched/mm.h        |   3 +
 include/linux/syscalls.h        |   2 +-
 include/uapi/linux/membarrier.h |  26 ++++++++
 kernel/sched/membarrier.c       | 136 +++++++++++++++++++++++++++++++---------
 4 files changed, 136 insertions(+), 31 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index f889e332912f..15bfb06f2884 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -348,10 +348,13 @@ enum {
 	MEMBARRIER_STATE_GLOBAL_EXPEDITED			= (1U << 3),
 	MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY	= (1U << 4),
 	MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE		= (1U << 5),
+	MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY		= (1U << 6),
+	MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ			= (1U << 7),
 };
 
 enum {
 	MEMBARRIER_FLAG_SYNC_CORE	= (1U << 0),
+	MEMBARRIER_FLAG_RSEQ		= (1U << 1),
 };
 
 #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 75ac7f8ae93c..06db09875aa4 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -974,7 +974,7 @@ asmlinkage long sys_execveat(int dfd, const char __user *filename,
 			const char __user *const __user *argv,
 			const char __user *const __user *envp, int flags);
 asmlinkage long sys_userfaultfd(int flags);
-asmlinkage long sys_membarrier(int cmd, int flags);
+asmlinkage long sys_membarrier(int cmd, unsigned int flags, int cpu_id);
 asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags);
 asmlinkage long sys_copy_file_range(int fd_in, loff_t __user *off_in,
 				    int fd_out, loff_t __user *off_out,
diff --git a/include/uapi/linux/membarrier.h b/include/uapi/linux/membarrier.h
index 5891d7614c8c..737605897f36 100644
--- a/include/uapi/linux/membarrier.h
+++ b/include/uapi/linux/membarrier.h
@@ -114,6 +114,26 @@
  *                          If this command is not implemented by an
  *                          architecture, -EINVAL is returned.
  *                          Returns 0 on success.
+ * @MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ:
+ *                          Ensure the caller thread, upon return from
+ *                          system call, that all its running thread
+ *                          siblings have any currently running rseq
+ *                          critical sections restarted if @flags
+ *                          parameter is 0; if @flags parameter is
+ *                          MEMBARRIER_CMD_FLAG_CPU,
+ *                          then this operation is performed only
+ *                          on CPU indicated by @cpu_id. If this command is
+ *                          not implemented by an architecture, -EINVAL
+ *                          is returned. A process needs to register its
+ *                          intent to use the private expedited rseq
+ *                          command prior to using it, otherwise
+ *                          this command returns -EPERM.
+ * @MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ:
+ *                          Register the process intent to use
+ *                          MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ.
+ *                          If this command is not implemented by an
+ *                          architecture, -EINVAL is returned.
+ *                          Returns 0 on success.
  * @MEMBARRIER_CMD_SHARED:
  *                          Alias to MEMBARRIER_CMD_GLOBAL. Provided for
  *                          header backward compatibility.
@@ -131,9 +151,15 @@ enum membarrier_cmd {
 	MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED		= (1 << 4),
 	MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE		= (1 << 5),
 	MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE	= (1 << 6),
+	MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ			= (1 << 7),
+	MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ		= (1 << 8),
 
 	/* Alias for header backward compatibility. */
 	MEMBARRIER_CMD_SHARED			= MEMBARRIER_CMD_GLOBAL,
 };
 
+enum membarrier_cmd_flag {
+	MEMBARRIER_CMD_FLAG_CPU		= (1 << 0),
+};
+
 #endif /* _UAPI_LINUX_MEMBARRIER_H */
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index 168479a7d61b..e23e74d52db5 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -18,6 +18,14 @@
 #define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK	0
 #endif
 
+#ifdef CONFIG_RSEQ
+#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK		\
+	(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ			\
+	| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ_BITMASK)
+#else
+#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK	0
+#endif
+
 #define MEMBARRIER_CMD_BITMASK						\
 	(MEMBARRIER_CMD_GLOBAL | MEMBARRIER_CMD_GLOBAL_EXPEDITED	\
 	| MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED			\
@@ -30,6 +38,11 @@ static void ipi_mb(void *info)
 	smp_mb();	/* IPIs should be serializing but paranoid. */
 }
 
+static void ipi_rseq(void *info)
+{
+	rseq_preempt(current);
+}
+
 static void ipi_sync_rq_state(void *info)
 {
 	struct mm_struct *mm = (struct mm_struct *) info;
@@ -129,19 +142,27 @@ static int membarrier_global_expedited(void)
 	return 0;
 }
 
-static int membarrier_private_expedited(int flags)
+static int membarrier_private_expedited(int flags, int cpu_id)
 {
-	int cpu;
 	cpumask_var_t tmpmask;
 	struct mm_struct *mm = current->mm;
+	smp_call_func_t ipi_func = ipi_mb;
 
-	if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
+	if (flags == MEMBARRIER_FLAG_SYNC_CORE) {
 		if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
 			return -EINVAL;
 		if (!(atomic_read(&mm->membarrier_state) &
 		      MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY))
 			return -EPERM;
+	} else if (flags == MEMBARRIER_FLAG_RSEQ) {
+		if (!IS_ENABLED(CONFIG_RSEQ))
+			return -EINVAL;
+		if (!(atomic_read(&mm->membarrier_state) &
+		      MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY))
+			return -EPERM;
+		ipi_func = ipi_rseq;
 	} else {
+		WARN_ON_ONCE(flags);
 		if (!(atomic_read(&mm->membarrier_state) &
 		      MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
 			return -EPERM;
@@ -156,35 +177,59 @@ static int membarrier_private_expedited(int flags)
 	 */
 	smp_mb();	/* system call entry is not a mb. */
 
-	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
+	if (cpu_id < 0 && !zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
 		return -ENOMEM;
 
 	cpus_read_lock();
-	rcu_read_lock();
-	for_each_online_cpu(cpu) {
+
+	if (cpu_id >= 0) {
 		struct task_struct *p;
 
-		/*
-		 * Skipping the current CPU is OK even through we can be
-		 * migrated at any point. The current CPU, at the point
-		 * where we read raw_smp_processor_id(), is ensured to
-		 * be in program order with respect to the caller
-		 * thread. Therefore, we can skip this CPU from the
-		 * iteration.
-		 */
-		if (cpu == raw_smp_processor_id())
-			continue;
-		p = rcu_dereference(cpu_rq(cpu)->curr);
-		if (p && p->mm == mm)
-			__cpumask_set_cpu(cpu, tmpmask);
+		if (cpu_id >= nr_cpu_ids || !cpu_online(cpu_id))
+			goto out;
+		if (cpu_id == raw_smp_processor_id())
+			goto out;
+		rcu_read_lock();
+		p = rcu_dereference(cpu_rq(cpu_id)->curr);
+		if (!p || p->mm != mm) {
+			rcu_read_unlock();
+			goto out;
+		}
+		rcu_read_unlock();
+	} else {
+		int cpu;
+
+		rcu_read_lock();
+		for_each_online_cpu(cpu) {
+			struct task_struct *p;
+
+			/*
+			 * Skipping the current CPU is OK even through we can be
+			 * migrated at any point. The current CPU, at the point
+			 * where we read raw_smp_processor_id(), is ensured to
+			 * be in program order with respect to the caller
+			 * thread. Therefore, we can skip this CPU from the
+			 * iteration.
+			 */
+			if (cpu == raw_smp_processor_id())
+				continue;
+			p = rcu_dereference(cpu_rq(cpu)->curr);
+			if (p && p->mm == mm)
+				__cpumask_set_cpu(cpu, tmpmask);
+		}
+		rcu_read_unlock();
 	}
-	rcu_read_unlock();
 
 	preempt_disable();
-	smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
+	if (cpu_id >= 0)
+		smp_call_function_single(cpu_id, ipi_func, NULL, 1);
+	else
+		smp_call_function_many(tmpmask, ipi_func, NULL, 1);
 	preempt_enable();
 
-	free_cpumask_var(tmpmask);
+out:
+	if (cpu_id < 0)
+		free_cpumask_var(tmpmask);
 	cpus_read_unlock();
 
 	/*
@@ -283,11 +328,18 @@ static int membarrier_register_private_expedited(int flags)
 	    set_state = MEMBARRIER_STATE_PRIVATE_EXPEDITED,
 	    ret;
 
-	if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
+	if (flags == MEMBARRIER_FLAG_SYNC_CORE) {
 		if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
 			return -EINVAL;
 		ready_state =
 			MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY;
+	} else if (flags == MEMBARRIER_FLAG_RSEQ) {
+		if (!IS_ENABLED(CONFIG_RSEQ))
+			return -EINVAL;
+		ready_state =
+			MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY;
+	} else {
+		WARN_ON_ONCE(flags);
 	}
 
 	/*
@@ -299,6 +351,8 @@ static int membarrier_register_private_expedited(int flags)
 		return 0;
 	if (flags & MEMBARRIER_FLAG_SYNC_CORE)
 		set_state |= MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE;
+	if (flags & MEMBARRIER_FLAG_RSEQ)
+		set_state |= MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ;
 	atomic_or(set_state, &mm->membarrier_state);
 	ret = sync_runqueues_membarrier_state(mm);
 	if (ret)
@@ -310,8 +364,15 @@ static int membarrier_register_private_expedited(int flags)
 
 /**
  * sys_membarrier - issue memory barriers on a set of threads
- * @cmd:   Takes command values defined in enum membarrier_cmd.
- * @flags: Currently needs to be 0. For future extensions.
+ * @cmd:    Takes command values defined in enum membarrier_cmd.
+ * @flags:  Currently needs to be 0 for all commands other than
+ *          MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ: in the latter
+ *          case it can be MEMBARRIER_CMD_FLAG_CPU, indicating that @cpu_id
+ *          contains the CPU on which to interrupt (= restart)
+ *          the RSEQ critical section.
+ * @cpu_id: if @flags == MEMBARRIER_CMD_FLAG_CPU, indicates the cpu on which
+ *          RSEQ CS should be interrupted (@cmd must be
+ *          MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ).
  *
  * If this system call is not implemented, -ENOSYS is returned. If the
  * command specified does not exist, not available on the running
@@ -337,10 +398,21 @@ static int membarrier_register_private_expedited(int flags)
  *        smp_mb()           X           O            O
  *        sys_membarrier()   O           O            O
  */
-SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
+SYSCALL_DEFINE3(membarrier, int, cmd, unsigned int, flags, int, cpu_id)
 {
-	if (unlikely(flags))
-		return -EINVAL;
+	switch (cmd) {
+	case MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ:
+		if (unlikely(flags && flags != MEMBARRIER_CMD_FLAG_CPU))
+			return -EINVAL;
+		break;
+	default:
+		if (unlikely(flags))
+			return -EINVAL;
+	}
+
+	if (!(flags & MEMBARRIER_CMD_FLAG_CPU))
+		cpu_id = -1;
+
 	switch (cmd) {
 	case MEMBARRIER_CMD_QUERY:
 	{
@@ -362,13 +434,17 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
 	case MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED:
 		return membarrier_register_global_expedited();
 	case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
-		return membarrier_private_expedited(0);
+		return membarrier_private_expedited(0, cpu_id);
 	case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
 		return membarrier_register_private_expedited(0);
 	case MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE:
-		return membarrier_private_expedited(MEMBARRIER_FLAG_SYNC_CORE);
+		return membarrier_private_expedited(MEMBARRIER_FLAG_SYNC_CORE, cpu_id);
 	case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE:
 		return membarrier_register_private_expedited(MEMBARRIER_FLAG_SYNC_CORE);
+	case MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ:
+		return membarrier_private_expedited(MEMBARRIER_FLAG_RSEQ, cpu_id);
+	case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ:
+		return membarrier_register_private_expedited(MEMBARRIER_FLAG_RSEQ);
 	default:
 		return -EINVAL;
 	}
-- 
cgit v1.2.3


From a5fa25adf03d4b063aece74ba70ccbb3a71af122 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 24 Sep 2020 17:03:56 -0700
Subject: bpf: Change bpf_sk_release and bpf_sk_*cgroup_id to accept
 ARG_PTR_TO_BTF_ID_SOCK_COMMON

The previous patch allows the networking bpf prog to use the
bpf_skc_to_*() helpers to get a PTR_TO_BTF_ID socket pointer,
e.g. "struct tcp_sock *".  It allows the bpf prog to read all the
fields of the tcp_sock.

This patch changes the bpf_sk_release() and bpf_sk_*cgroup_id()
to take ARG_PTR_TO_BTF_ID_SOCK_COMMON such that they will
work with the pointer returned by the bpf_skc_to_*() helpers
also.  For example, the following will work:

	sk = bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
	if (!sk)
		return;
	tp = bpf_skc_to_tcp_sock(sk);
	if (!tp) {
		bpf_sk_release(sk);
		return;
	}
	lsndtime = tp->lsndtime;
	/* Pass tp to bpf_sk_release() will also work */
	bpf_sk_release(tp);

Since PTR_TO_BTF_ID could be NULL, the helper taking
ARG_PTR_TO_BTF_ID_SOCK_COMMON has to check for NULL at runtime.

A btf_id of "struct sock" may not always mean a fullsock.  Regardless
the helper's running context may get a non-fullsock or not,
considering fullsock check/handling is pretty cheap, it is better to
keep the same verifier expectation on helper that takes ARG_PTR_TO_BTF_ID*
will be able to handle the minisock situation.  In the bpf_sk_*cgroup_id()
case,  it will try to get a fullsock by using sk_to_full_sk() as its
skb variant bpf_sk"b"_*cgroup_id() has already been doing.

bpf_sk_release can already handle minisock, so nothing special has to
be done.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200925000356.3856047-1-kafai@fb.com
---
 include/uapi/linux/bpf.h       |  8 ++++----
 net/core/filter.c              | 30 ++++++++++++++----------------
 tools/include/uapi/linux/bpf.h |  8 ++++----
 3 files changed, 22 insertions(+), 24 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a22812561064..c96a56d9c3be 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2512,7 +2512,7 @@ union bpf_attr {
  *		result is from *reuse*\ **->socks**\ [] using the hash of the
  *		tuple.
  *
- * long bpf_sk_release(struct bpf_sock *sock)
+ * long bpf_sk_release(void *sock)
  *	Description
  *		Release the reference held by *sock*. *sock* must be a
  *		non-**NULL** pointer that was returned from
@@ -3234,11 +3234,11 @@ union bpf_attr {
  *
  *		**-EOVERFLOW** if an overflow happened: The same object will be tried again.
  *
- * u64 bpf_sk_cgroup_id(struct bpf_sock *sk)
+ * u64 bpf_sk_cgroup_id(void *sk)
  *	Description
  *		Return the cgroup v2 id of the socket *sk*.
  *
- *		*sk* must be a non-**NULL** pointer to a full socket, e.g. one
+ *		*sk* must be a non-**NULL** pointer to a socket, e.g. one
  *		returned from **bpf_sk_lookup_xxx**\ (),
  *		**bpf_sk_fullsock**\ (), etc. The format of returned id is
  *		same as in **bpf_skb_cgroup_id**\ ().
@@ -3248,7 +3248,7 @@ union bpf_attr {
  *	Return
  *		The id is returned or 0 in case the id could not be retrieved.
  *
- * u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level)
+ * u64 bpf_sk_ancestor_cgroup_id(void *sk, int ancestor_level)
  *	Description
  *		Return id of cgroup v2 that is ancestor of cgroup associated
  *		with the *sk* at the *ancestor_level*.  The root cgroup is at
diff --git a/net/core/filter.c b/net/core/filter.c
index 6d1864f2bd51..06d397eeef2a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4088,18 +4088,17 @@ static inline u64 __bpf_sk_cgroup_id(struct sock *sk)
 {
 	struct cgroup *cgrp;
 
+	sk = sk_to_full_sk(sk);
+	if (!sk || !sk_fullsock(sk))
+		return 0;
+
 	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
 	return cgroup_id(cgrp);
 }
 
 BPF_CALL_1(bpf_skb_cgroup_id, const struct sk_buff *, skb)
 {
-	struct sock *sk = skb_to_full_sk(skb);
-
-	if (!sk || !sk_fullsock(sk))
-		return 0;
-
-	return __bpf_sk_cgroup_id(sk);
+	return __bpf_sk_cgroup_id(skb->sk);
 }
 
 static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
@@ -4115,6 +4114,10 @@ static inline u64 __bpf_sk_ancestor_cgroup_id(struct sock *sk,
 	struct cgroup *ancestor;
 	struct cgroup *cgrp;
 
+	sk = sk_to_full_sk(sk);
+	if (!sk || !sk_fullsock(sk))
+		return 0;
+
 	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
 	ancestor = cgroup_ancestor(cgrp, ancestor_level);
 	if (!ancestor)
@@ -4126,12 +4129,7 @@ static inline u64 __bpf_sk_ancestor_cgroup_id(struct sock *sk,
 BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
 	   ancestor_level)
 {
-	struct sock *sk = skb_to_full_sk(skb);
-
-	if (!sk || !sk_fullsock(sk))
-		return 0;
-
-	return __bpf_sk_ancestor_cgroup_id(sk, ancestor_level);
+	return __bpf_sk_ancestor_cgroup_id(skb->sk, ancestor_level);
 }
 
 static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
@@ -4151,7 +4149,7 @@ static const struct bpf_func_proto bpf_sk_cgroup_id_proto = {
 	.func           = bpf_sk_cgroup_id,
 	.gpl_only       = false,
 	.ret_type       = RET_INTEGER,
-	.arg1_type      = ARG_PTR_TO_SOCKET,
+	.arg1_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
 };
 
 BPF_CALL_2(bpf_sk_ancestor_cgroup_id, struct sock *, sk, int, ancestor_level)
@@ -4163,7 +4161,7 @@ static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
 	.func           = bpf_sk_ancestor_cgroup_id,
 	.gpl_only       = false,
 	.ret_type       = RET_INTEGER,
-	.arg1_type      = ARG_PTR_TO_SOCKET,
+	.arg1_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
 	.arg2_type      = ARG_ANYTHING,
 };
 #endif
@@ -5697,7 +5695,7 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
 
 BPF_CALL_1(bpf_sk_release, struct sock *, sk)
 {
-	if (sk_is_refcounted(sk))
+	if (sk && sk_is_refcounted(sk))
 		sock_gen_put(sk);
 	return 0;
 }
@@ -5706,7 +5704,7 @@ static const struct bpf_func_proto bpf_sk_release_proto = {
 	.func		= bpf_sk_release,
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
+	.arg1_type	= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
 };
 
 BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index a22812561064..c96a56d9c3be 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2512,7 +2512,7 @@ union bpf_attr {
  *		result is from *reuse*\ **->socks**\ [] using the hash of the
  *		tuple.
  *
- * long bpf_sk_release(struct bpf_sock *sock)
+ * long bpf_sk_release(void *sock)
  *	Description
  *		Release the reference held by *sock*. *sock* must be a
  *		non-**NULL** pointer that was returned from
@@ -3234,11 +3234,11 @@ union bpf_attr {
  *
  *		**-EOVERFLOW** if an overflow happened: The same object will be tried again.
  *
- * u64 bpf_sk_cgroup_id(struct bpf_sock *sk)
+ * u64 bpf_sk_cgroup_id(void *sk)
  *	Description
  *		Return the cgroup v2 id of the socket *sk*.
  *
- *		*sk* must be a non-**NULL** pointer to a full socket, e.g. one
+ *		*sk* must be a non-**NULL** pointer to a socket, e.g. one
  *		returned from **bpf_sk_lookup_xxx**\ (),
  *		**bpf_sk_fullsock**\ (), etc. The format of returned id is
  *		same as in **bpf_skb_cgroup_id**\ ().
@@ -3248,7 +3248,7 @@ union bpf_attr {
  *	Return
  *		The id is returned or 0 in case the id could not be retrieved.
  *
- * u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level)
+ * u64 bpf_sk_ancestor_cgroup_id(void *sk, int ancestor_level)
  *	Description
  *		Return id of cgroup v2 that is ancestor of cgroup associated
  *		with the *sk* at the *ancestor_level*.  The root cgroup is at
-- 
cgit v1.2.3


From 592a3498648af000e93dff2d36229ab11cd8c7f6 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 24 Sep 2020 17:04:02 -0700
Subject: bpf: Change bpf_sk_storage_*() to accept
 ARG_PTR_TO_BTF_ID_SOCK_COMMON

This patch changes the bpf_sk_storage_*() to take
ARG_PTR_TO_BTF_ID_SOCK_COMMON such that they will work with the pointer
returned by the bpf_skc_to_*() helpers also.

A micro benchmark has been done on a "cgroup_skb/egress" bpf program
which does a bpf_sk_storage_get().  It was driven by netperf doing
a 4096 connected UDP_STREAM test with 64bytes packet.
The stats from "kernel.bpf_stats_enabled" shows no meaningful difference.

The sk_storage_get_btf_proto, sk_storage_delete_btf_proto,
btf_sk_storage_get_proto, and btf_sk_storage_delete_proto are
no longer needed, so they are removed.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Lorenz Bauer <lmb@cloudflare.com>
Link: https://lore.kernel.org/bpf/20200925000402.3856307-1-kafai@fb.com
---
 include/net/bpf_sk_storage.h   |  2 --
 include/uapi/linux/bpf.h       |  1 +
 kernel/bpf/bpf_lsm.c           |  4 ++--
 net/core/bpf_sk_storage.c      | 29 ++++++-----------------------
 net/ipv4/bpf_tcp_ca.c          | 23 ++---------------------
 tools/include/uapi/linux/bpf.h |  1 +
 6 files changed, 12 insertions(+), 48 deletions(-)

(limited to 'include/uapi')

diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h
index 119f4c9c3a9c..3c516dd07caf 100644
--- a/include/net/bpf_sk_storage.h
+++ b/include/net/bpf_sk_storage.h
@@ -20,8 +20,6 @@ void bpf_sk_storage_free(struct sock *sk);
 
 extern const struct bpf_func_proto bpf_sk_storage_get_proto;
 extern const struct bpf_func_proto bpf_sk_storage_delete_proto;
-extern const struct bpf_func_proto sk_storage_get_btf_proto;
-extern const struct bpf_func_proto sk_storage_delete_btf_proto;
 
 struct bpf_local_storage_elem;
 struct bpf_sk_storage_diag;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c96a56d9c3be..0ec6dbeb17a5 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2861,6 +2861,7 @@ union bpf_attr {
  *		0 on success.
  *
  *		**-ENOENT** if the bpf-local-storage cannot be found.
+ *		**-EINVAL** if sk is not a fullsock (e.g. a request_sock).
  *
  * long bpf_send_signal(u32 sig)
  *	Description
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 9cd1428c7199..78ea8a7bd27f 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -56,9 +56,9 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_inode_storage_delete:
 		return &bpf_inode_storage_delete_proto;
 	case BPF_FUNC_sk_storage_get:
-		return &sk_storage_get_btf_proto;
+		return &bpf_sk_storage_get_proto;
 	case BPF_FUNC_sk_storage_delete:
-		return &sk_storage_delete_btf_proto;
+		return &bpf_sk_storage_delete_proto;
 	default:
 		return tracing_prog_func_proto(func_id, prog);
 	}
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 838efc682cff..c907f0dc7f87 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -269,7 +269,7 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
 {
 	struct bpf_local_storage_data *sdata;
 
-	if (flags > BPF_SK_STORAGE_GET_F_CREATE)
+	if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE)
 		return (unsigned long)NULL;
 
 	sdata = sk_storage_lookup(sk, map, true);
@@ -299,6 +299,9 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
 
 BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
 {
+	if (!sk || !sk_fullsock(sk))
+		return -EINVAL;
+
 	if (refcount_inc_not_zero(&sk->sk_refcnt)) {
 		int err;
 
@@ -355,7 +358,7 @@ const struct bpf_func_proto bpf_sk_storage_get_proto = {
 	.gpl_only	= false,
 	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
 	.arg1_type	= ARG_CONST_MAP_PTR,
-	.arg2_type	= ARG_PTR_TO_SOCKET,
+	.arg2_type	= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
 	.arg3_type	= ARG_PTR_TO_MAP_VALUE_OR_NULL,
 	.arg4_type	= ARG_ANYTHING,
 };
@@ -375,27 +378,7 @@ const struct bpf_func_proto bpf_sk_storage_delete_proto = {
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_CONST_MAP_PTR,
-	.arg2_type	= ARG_PTR_TO_SOCKET,
-};
-
-const struct bpf_func_proto sk_storage_get_btf_proto = {
-	.func		= bpf_sk_storage_get,
-	.gpl_only	= false,
-	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
-	.arg1_type	= ARG_CONST_MAP_PTR,
-	.arg2_type	= ARG_PTR_TO_BTF_ID,
-	.arg2_btf_id	= &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
-	.arg3_type	= ARG_PTR_TO_MAP_VALUE_OR_NULL,
-	.arg4_type	= ARG_ANYTHING,
-};
-
-const struct bpf_func_proto sk_storage_delete_btf_proto = {
-	.func		= bpf_sk_storage_delete,
-	.gpl_only	= false,
-	.ret_type	= RET_INTEGER,
-	.arg1_type	= ARG_CONST_MAP_PTR,
-	.arg2_type	= ARG_PTR_TO_BTF_ID,
-	.arg2_btf_id	= &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
+	.arg2_type	= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
 };
 
 struct bpf_sk_storage_diag {
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 74a2ef598c31..618954f82764 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -28,22 +28,6 @@ static u32 unsupported_ops[] = {
 static const struct btf_type *tcp_sock_type;
 static u32 tcp_sock_id, sock_id;
 
-static struct bpf_func_proto btf_sk_storage_get_proto __read_mostly;
-static struct bpf_func_proto btf_sk_storage_delete_proto __read_mostly;
-
-static void convert_sk_func_proto(struct bpf_func_proto *to, const struct bpf_func_proto *from)
-{
-	int i;
-
-	*to = *from;
-	for (i = 0; i < ARRAY_SIZE(to->arg_type); i++) {
-		if (to->arg_type[i] == ARG_PTR_TO_SOCKET) {
-			to->arg_type[i] = ARG_PTR_TO_BTF_ID;
-			to->arg_btf_id[i] = &tcp_sock_id;
-		}
-	}
-}
-
 static int bpf_tcp_ca_init(struct btf *btf)
 {
 	s32 type_id;
@@ -59,9 +43,6 @@ static int bpf_tcp_ca_init(struct btf *btf)
 	tcp_sock_id = type_id;
 	tcp_sock_type = btf_type_by_id(btf, tcp_sock_id);
 
-	convert_sk_func_proto(&btf_sk_storage_get_proto, &bpf_sk_storage_get_proto);
-	convert_sk_func_proto(&btf_sk_storage_delete_proto, &bpf_sk_storage_delete_proto);
-
 	return 0;
 }
 
@@ -188,9 +169,9 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
 	case BPF_FUNC_tcp_send_ack:
 		return &bpf_tcp_send_ack_proto;
 	case BPF_FUNC_sk_storage_get:
-		return &btf_sk_storage_get_proto;
+		return &bpf_sk_storage_get_proto;
 	case BPF_FUNC_sk_storage_delete:
-		return &btf_sk_storage_delete_proto;
+		return &bpf_sk_storage_delete_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c96a56d9c3be..0ec6dbeb17a5 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2861,6 +2861,7 @@ union bpf_attr {
  *		0 on success.
  *
  *		**-ENOENT** if the bpf-local-storage cannot be found.
+ *		**-EINVAL** if sk is not a fullsock (e.g. a request_sock).
  *
  * long bpf_send_signal(u32 sig)
  *	Description
-- 
cgit v1.2.3


From c0df236e1394970f3503a8fb103de95d000014ca Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 24 Sep 2020 17:04:09 -0700
Subject: bpf: Change bpf_tcp_*_syncookie to accept
 ARG_PTR_TO_BTF_ID_SOCK_COMMON

This patch changes the bpf_tcp_*_syncookie() to take
ARG_PTR_TO_BTF_ID_SOCK_COMMON such that they will work with the pointer
returned by the bpf_skc_to_*() helpers also.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Lorenz Bauer <lmb@cloudflare.com>
Link: https://lore.kernel.org/bpf/20200925000409.3856725-1-kafai@fb.com
---
 include/uapi/linux/bpf.h       | 4 ++--
 net/core/filter.c              | 8 ++++----
 tools/include/uapi/linux/bpf.h | 4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0ec6dbeb17a5..69b9e30375bc 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2692,7 +2692,7 @@ union bpf_attr {
  *		result is from *reuse*\ **->socks**\ [] using the hash of the
  *		tuple.
  *
- * long bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * long bpf_tcp_check_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
  * 	Description
  * 		Check whether *iph* and *th* contain a valid SYN cookie ACK for
  * 		the listening socket in *sk*.
@@ -2878,7 +2878,7 @@ union bpf_attr {
  *
  *		**-EAGAIN** if bpf program can try again.
  *
- * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * s64 bpf_tcp_gen_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
  *	Description
  *		Try to issue a SYN cookie for the packet with corresponding
  *		IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
diff --git a/net/core/filter.c b/net/core/filter.c
index 06d397eeef2a..1d88e9b498eb 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6086,7 +6086,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
 	u32 cookie;
 	int ret;
 
-	if (unlikely(th_len < sizeof(*th)))
+	if (unlikely(!sk || th_len < sizeof(*th)))
 		return -EINVAL;
 
 	/* sk_listener() allows TCP_NEW_SYN_RECV, which makes no sense here. */
@@ -6139,7 +6139,7 @@ static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
 	.gpl_only	= true,
 	.pkt_access	= true,
 	.ret_type	= RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
+	.arg1_type	= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
 	.arg2_type	= ARG_PTR_TO_MEM,
 	.arg3_type	= ARG_CONST_SIZE,
 	.arg4_type	= ARG_PTR_TO_MEM,
@@ -6153,7 +6153,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
 	u32 cookie;
 	u16 mss;
 
-	if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4))
+	if (unlikely(!sk || th_len < sizeof(*th) || th_len != th->doff * 4))
 		return -EINVAL;
 
 	if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
@@ -6208,7 +6208,7 @@ static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
 	.gpl_only	= true, /* __cookie_v*_init_sequence() is GPL */
 	.pkt_access	= true,
 	.ret_type	= RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
+	.arg1_type	= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
 	.arg2_type	= ARG_PTR_TO_MEM,
 	.arg3_type	= ARG_CONST_SIZE,
 	.arg4_type	= ARG_PTR_TO_MEM,
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 0ec6dbeb17a5..69b9e30375bc 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2692,7 +2692,7 @@ union bpf_attr {
  *		result is from *reuse*\ **->socks**\ [] using the hash of the
  *		tuple.
  *
- * long bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * long bpf_tcp_check_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
  * 	Description
  * 		Check whether *iph* and *th* contain a valid SYN cookie ACK for
  * 		the listening socket in *sk*.
@@ -2878,7 +2878,7 @@ union bpf_attr {
  *
  *		**-EAGAIN** if bpf program can try again.
  *
- * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * s64 bpf_tcp_gen_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
  *	Description
  *		Try to issue a SYN cookie for the packet with corresponding
  *		IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
-- 
cgit v1.2.3


From 27e5203bd9c5cc6d54dcac48c3027f3f04522b8b Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 24 Sep 2020 17:04:15 -0700
Subject: bpf: Change bpf_sk_assign to accept ARG_PTR_TO_BTF_ID_SOCK_COMMON

This patch changes the bpf_sk_assign() to take
ARG_PTR_TO_BTF_ID_SOCK_COMMON such that they will work with the pointer
returned by the bpf_skc_to_*() helpers also.

The bpf_sk_lookup_assign() is taking ARG_PTR_TO_SOCKET_"OR_NULL".  Meaning
it specifically takes a literal NULL.  ARG_PTR_TO_BTF_ID_SOCK_COMMON
does not allow a literal NULL, so another ARG type is required
for this purpose and another follow-up patch can be used if
there is such need.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200925000415.3857374-1-kafai@fb.com
---
 include/uapi/linux/bpf.h       | 2 +-
 net/core/filter.c              | 4 ++--
 tools/include/uapi/linux/bpf.h | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 69b9e30375bc..2d6519a2ed77 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3107,7 +3107,7 @@ union bpf_attr {
  * 	Return
  * 		The id is returned or 0 in case the id could not be retrieved.
  *
- * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
+ * long bpf_sk_assign(struct sk_buff *skb, void *sk, u64 flags)
  *	Description
  *		Helper is overloaded depending on BPF program type. This
  *		description applies to **BPF_PROG_TYPE_SCHED_CLS** and
diff --git a/net/core/filter.c b/net/core/filter.c
index 1d88e9b498eb..af88935e24b1 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6217,7 +6217,7 @@ static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
 
 BPF_CALL_3(bpf_sk_assign, struct sk_buff *, skb, struct sock *, sk, u64, flags)
 {
-	if (flags != 0)
+	if (!sk || flags != 0)
 		return -EINVAL;
 	if (!skb_at_tc_ingress(skb))
 		return -EOPNOTSUPP;
@@ -6241,7 +6241,7 @@ static const struct bpf_func_proto bpf_sk_assign_proto = {
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 	.arg1_type      = ARG_PTR_TO_CTX,
-	.arg2_type      = ARG_PTR_TO_SOCK_COMMON,
+	.arg2_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
 	.arg3_type	= ARG_ANYTHING,
 };
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 69b9e30375bc..2d6519a2ed77 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3107,7 +3107,7 @@ union bpf_attr {
  * 	Return
  * 		The id is returned or 0 in case the id could not be retrieved.
  *
- * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
+ * long bpf_sk_assign(struct sk_buff *skb, void *sk, u64 flags)
  *	Description
  *		Helper is overloaded depending on BPF program type. This
  *		description applies to **BPF_PROG_TYPE_SCHED_CLS** and
-- 
cgit v1.2.3


From 5d5b4128c4caae34ddcd9b2dc30ac4d6155617a3 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Fri, 25 Sep 2020 13:46:07 -0700
Subject: devlink: introduce flash update overwrite mask

Sections of device flash may contain settings or device identifying
information. When performing a flash update, it is generally expected
that these settings and identifiers are not overwritten.

However, it may sometimes be useful to allow overwriting these fields
when performing a flash update. Some examples include, 1) customizing
the initial device config on first programming, such as overwriting
default device identifying information, or 2) reverting a device
configuration to known good state provided in the new firmware image, or
3) in case it is suspected that current firmware logic for managing the
preservation of fields during an update is broken.

Although some devices are able to completely separate these types of
settings and fields into separate components, this is not true for all
hardware.

To support controlling this behavior, a new
DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK is defined. This is an
nla_bitfield32 which will define what subset of fields in a component
should be overwritten during an update.

If no bits are specified, or of the overwrite mask is not provided, then
an update should not overwrite anything, and should maintain the
settings and identifiers as they are in the previous image.

If the overwrite mask has the DEVLINK_FLASH_OVERWRITE_SETTINGS bit set,
then the device should be configured to overwrite any of the settings in
the requested component with settings found in the provided image.

Similarly, if the DEVLINK_FLASH_OVERWRITE_IDENTIFIERS bit is set, the
device should be configured to overwrite any device identifiers in the
requested component with the identifiers from the image.

Multiple overwrite modes may be combined to indicate that a combination
of the set of fields that should be overwritten.

Drivers which support the new overwrite mask must set the
DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK in the
supported_flash_update_params field of their devlink_ops.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/devlink/devlink-flash.rst | 28 ++++++++++++++++++++++
 include/net/devlink.h                              |  4 +++-
 include/uapi/linux/devlink.h                       | 23 ++++++++++++++++++
 net/core/devlink.c                                 | 17 ++++++++++++-
 4 files changed, 70 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/networking/devlink/devlink-flash.rst b/Documentation/networking/devlink/devlink-flash.rst
index 40a87c0222cb..603e732f00cc 100644
--- a/Documentation/networking/devlink/devlink-flash.rst
+++ b/Documentation/networking/devlink/devlink-flash.rst
@@ -16,6 +16,34 @@ Note that the file name is a path relative to the firmware loading path
 (usually ``/lib/firmware/``). Drivers may send status updates to inform
 user space about the progress of the update operation.
 
+Overwrite Mask
+==============
+
+The ``devlink-flash`` command allows optionally specifying a mask indicating
+how the device should handle subsections of flash components when updating.
+This mask indicates the set of sections which are allowed to be overwritten.
+
+.. list-table:: List of overwrite mask bits
+   :widths: 5 95
+
+   * - Name
+     - Description
+   * - ``DEVLINK_FLASH_OVERWRITE_SETTINGS``
+     - Indicates that the device should overwrite settings in the components
+       being updated with the settings found in the provided image.
+   * - ``DEVLINK_FLASH_OVERWRITE_IDENTIFIERS``
+     - Indicates that the device should overwrite identifiers in the
+       components being updated with the identifiers found in the provided
+       image. This includes MAC addresses, serial IDs, and similar device
+       identifiers.
+
+Multiple overwrite bits may be combined and requested together. If no bits
+are provided, it is expected that the device only update firmware binaries
+in the components being updated. Settings and identifiers are expected to be
+preserved across the update. A device may not support every combination and
+the driver for such a device must reject any combination which cannot be
+faithfully implemented.
+
 Firmware Loading
 ================
 
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 7794e1601772..7339bf9ba6b4 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -562,9 +562,11 @@ enum devlink_param_generic_id {
 struct devlink_flash_update_params {
 	const char *file_name;
 	const char *component;
+	u32 overwrite_mask;
 };
 
-#define DEVLINK_SUPPORT_FLASH_UPDATE_COMPONENT	BIT(0)
+#define DEVLINK_SUPPORT_FLASH_UPDATE_COMPONENT		BIT(0)
+#define DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK	BIT(1)
 
 struct devlink_region;
 struct devlink_info_req;
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index a2ecc8b00611..7b0face1bad5 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -230,6 +230,28 @@ enum {
 	DEVLINK_ATTR_STATS_MAX = __DEVLINK_ATTR_STATS_MAX - 1
 };
 
+/* Specify what sections of a flash component can be overwritten when
+ * performing an update. Overwriting of firmware binary sections is always
+ * implicitly assumed to be allowed.
+ *
+ * Each section must be documented in
+ * Documentation/networking/devlink/devlink-flash.rst
+ *
+ */
+enum {
+	DEVLINK_FLASH_OVERWRITE_SETTINGS_BIT,
+	DEVLINK_FLASH_OVERWRITE_IDENTIFIERS_BIT,
+
+	__DEVLINK_FLASH_OVERWRITE_MAX_BIT,
+	DEVLINK_FLASH_OVERWRITE_MAX_BIT = __DEVLINK_FLASH_OVERWRITE_MAX_BIT - 1
+};
+
+#define DEVLINK_FLASH_OVERWRITE_SETTINGS _BITUL(DEVLINK_FLASH_OVERWRITE_SETTINGS_BIT)
+#define DEVLINK_FLASH_OVERWRITE_IDENTIFIERS _BITUL(DEVLINK_FLASH_OVERWRITE_IDENTIFIERS_BIT)
+
+#define DEVLINK_SUPPORTED_FLASH_OVERWRITE_SECTIONS \
+	(_BITUL(__DEVLINK_FLASH_OVERWRITE_MAX_BIT) - 1)
+
 /**
  * enum devlink_trap_action - Packet trap action.
  * @DEVLINK_TRAP_ACTION_DROP: Packet is dropped by the device and a copy is not
@@ -464,6 +486,7 @@ enum devlink_attr {
 	DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,	/* u32 */
 
 	DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT,	/* u64 */
+	DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK,	/* bitfield32 */
 
 	/* add new attributes above here, update the policy in devlink.c */
 
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 6766f9ef3152..7a38f9e25922 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -3147,9 +3147,9 @@ EXPORT_SYMBOL_GPL(devlink_flash_update_timeout_notify);
 static int devlink_nl_cmd_flash_update(struct sk_buff *skb,
 				       struct genl_info *info)
 {
+	struct nlattr *nla_component, *nla_overwrite_mask;
 	struct devlink_flash_update_params params = {};
 	struct devlink *devlink = info->user_ptr[0];
-	struct nlattr *nla_component;
 	u32 supported_params;
 
 	if (!devlink->ops->flash_update)
@@ -3172,6 +3172,19 @@ static int devlink_nl_cmd_flash_update(struct sk_buff *skb,
 		params.component = nla_data(nla_component);
 	}
 
+	nla_overwrite_mask = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK];
+	if (nla_overwrite_mask) {
+		struct nla_bitfield32 sections;
+
+		if (!(supported_params & DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK)) {
+			NL_SET_ERR_MSG_ATTR(info->extack, nla_overwrite_mask,
+					    "overwrite settings are not supported by this device");
+			return -EOPNOTSUPP;
+		}
+		sections = nla_get_bitfield32(nla_overwrite_mask);
+		params.overwrite_mask = sections.value & sections.selector;
+	}
+
 	return devlink->ops->flash_update(devlink, &params, info->extack);
 }
 
@@ -7093,6 +7106,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 },
 	[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .type = NLA_NUL_STRING },
+	[DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK] =
+		NLA_POLICY_BITFIELD32(DEVLINK_SUPPORTED_FLASH_OVERWRITE_SECTIONS),
 	[DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 },
 	[DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING },
-- 
cgit v1.2.3


From b38c73ca1c213bbf8a872b334a6bb835becfaba5 Mon Sep 17 00:00:00 2001
From: Dafna Hirschfeld <dafna.hirschfeld@collabora.com>
Date: Thu, 27 Aug 2020 21:46:08 +0200
Subject: media: v4l2: add support for colorspace conversion API (CSC) for
 video capture

For video capture it is the driver that reports the colorspace,
transfer function, Y'CbCr/HSV encoding and quantization range
used by the video, and there is no way to request something
different, even though many HDTV receivers have some sort of
colorspace conversion capabilities.

For output video this feature already exists since the application
specifies this information for the video format it will send out, and
the transmitter will enable any available CSC if a format conversion has
to be performed in order to match the capabilities of the sink.

For video capture we propose adding new v4l2_pix_format flag:
V4L2_PIX_FMT_FLAG_SET_CSC. The flag is set by the application,
the driver will interpret the colorspace, xfer_func, ycbcr_enc/hsv_enc
and quantization fields as the requested colorspace information and will
attempt to do the conversion it supports.

Drivers set the flags
V4L2_FMT_FLAG_CSC_COLORSPACE,
V4L2_FMT_FLAG_CSC_XFER_FUNC,
V4L2_FMT_FLAG_CSC_YCBCR_ENC/V4L2_FMT_FLAG_CSC_HSV_ENC,
V4L2_FMT_FLAG_CSC_QUANTIZATION,
in the flags field of the struct v4l2_fmtdesc during enumeration to
indicate that they support colorspace conversion for the respective field.

Drivers do not have to actually look at the flags. If the flags are not
set, then the fields 'colorspace', 'xfer_func', 'ycbcr_enc/hsv_enc',
and 'quantization' are set to the default values by the core, i.e. just
pass on the received format without conversion.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Dafna Hirschfeld <dafna.hirschfeld@collabora.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../userspace-api/media/v4l/pixfmt-v4l2-mplane.rst | 16 ++----
 .../userspace-api/media/v4l/pixfmt-v4l2.rst        | 64 ++++++++++++++++++++--
 .../userspace-api/media/v4l/vidioc-enum-fmt.rst    | 35 ++++++++++++
 .../userspace-api/media/videodev2.h.rst.exceptions |  5 ++
 include/uapi/linux/videodev2.h                     |  6 ++
 5 files changed, 109 insertions(+), 17 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/userspace-api/media/v4l/pixfmt-v4l2-mplane.rst b/Documentation/userspace-api/media/v4l/pixfmt-v4l2-mplane.rst
index ac82882135ae..977facc3a1f4 100644
--- a/Documentation/userspace-api/media/v4l/pixfmt-v4l2-mplane.rst
+++ b/Documentation/userspace-api/media/v4l/pixfmt-v4l2-mplane.rst
@@ -98,29 +98,21 @@ describing all planes of that format.
     * - __u8
       - ``ycbcr_enc``
       - Y'CbCr encoding, from enum :c:type:`v4l2_ycbcr_encoding`.
-        This information supplements the ``colorspace`` and must be set by
-	the driver for capture streams and by the application for output
-	streams, see :ref:`colorspaces`.
+	See struct :c:type:`v4l2_pix_format`.
     * - __u8
       - ``hsv_enc``
       - HSV encoding, from enum :c:type:`v4l2_hsv_encoding`.
-        This information supplements the ``colorspace`` and must be set by
-	the driver for capture streams and by the application for output
-	streams, see :ref:`colorspaces`.
+	See struct :c:type:`v4l2_pix_format`.
     * - }
       -
     * - __u8
       - ``quantization``
       - Quantization range, from enum :c:type:`v4l2_quantization`.
-        This information supplements the ``colorspace`` and must be set by
-	the driver for capture streams and by the application for output
-	streams, see :ref:`colorspaces`.
+	See struct :c:type:`v4l2_pix_format`.
     * - __u8
       - ``xfer_func``
       - Transfer function, from enum :c:type:`v4l2_xfer_func`.
-        This information supplements the ``colorspace`` and must be set by
-	the driver for capture streams and by the application for output
-	streams, see :ref:`colorspaces`.
+	See struct :c:type:`v4l2_pix_format`.
     * - __u8
       - ``reserved[7]``
       - Reserved for future extensions. Should be zeroed by drivers and
diff --git a/Documentation/userspace-api/media/v4l/pixfmt-v4l2.rst b/Documentation/userspace-api/media/v4l/pixfmt-v4l2.rst
index 8424d6f53b0c..71e828093310 100644
--- a/Documentation/userspace-api/media/v4l/pixfmt-v4l2.rst
+++ b/Documentation/userspace-api/media/v4l/pixfmt-v4l2.rst
@@ -109,7 +109,14 @@ Single-planar format structure
       - Image colorspace, from enum :c:type:`v4l2_colorspace`.
         This information supplements the ``pixelformat`` and must be set
 	by the driver for capture streams and by the application for
-	output streams, see :ref:`colorspaces`.
+	output streams, see :ref:`colorspaces`. If the application sets the
+	flag ``V4L2_PIX_FMT_FLAG_SET_CSC`` then the application can set
+	this field for a capture stream to request a specific colorspace
+	for the captured image data. If the driver cannot handle requested
+	conversion, it will return another supported colorspace.
+	The driver indicates that colorspace conversion is supported by setting
+	the flag V4L2_FMT_FLAG_CSC_COLORSPACE in the corresponding struct
+	:c:type:`v4l2_fmtdesc` during enumeration. See :ref:`fmtdesc-flags`.
     * - __u32
       - ``priv``
       - This field indicates whether the remaining fields of the
@@ -146,13 +153,29 @@ Single-planar format structure
       - Y'CbCr encoding, from enum :c:type:`v4l2_ycbcr_encoding`.
         This information supplements the ``colorspace`` and must be set by
 	the driver for capture streams and by the application for output
-	streams, see :ref:`colorspaces`.
+	streams, see :ref:`colorspaces`. If the application sets the
+	flag ``V4L2_PIX_FMT_FLAG_SET_CSC`` then the application can set
+	this field for a capture stream to request a specific Y'CbCr encoding
+	for the captured image data. If the driver cannot handle requested
+	conversion, it will return another supported encoding.
+	This field is ignored for HSV pixelformats. The driver indicates that
+	ycbcr_enc conversion is supported by setting the flag
+	V4L2_FMT_FLAG_CSC_YCBCR_ENC in the corresponding struct
+	:c:type:`v4l2_fmtdesc` during enumeration. See :ref:`fmtdesc-flags`.
     * - __u32
       - ``hsv_enc``
       - HSV encoding, from enum :c:type:`v4l2_hsv_encoding`.
         This information supplements the ``colorspace`` and must be set by
 	the driver for capture streams and by the application for output
-	streams, see :ref:`colorspaces`.
+	streams, see :ref:`colorspaces`. If the application sets the flag
+	``V4L2_PIX_FMT_FLAG_SET_CSC`` then the application can set this
+	field for a capture stream to request a specific HSV encoding for the
+	captured image data. If the driver cannot handle requested
+	conversion, it will return another supported encoding.
+	This field is ignored for non-HSV pixelformats. The driver indicates
+	that hsv_enc conversion is supported by setting the flag
+	V4L2_FMT_FLAG_CSC_HSV_ENC in the corresponding struct
+	:c:type:`v4l2_fmtdesc` during enumeration. See :ref:`fmtdesc-flags`.
     * - }
       -
     * - __u32
@@ -160,13 +183,27 @@ Single-planar format structure
       - Quantization range, from enum :c:type:`v4l2_quantization`.
         This information supplements the ``colorspace`` and must be set by
 	the driver for capture streams and by the application for output
-	streams, see :ref:`colorspaces`.
+	streams, see :ref:`colorspaces`. If the application sets the flag
+	``V4L2_PIX_FMT_FLAG_SET_CSC`` then the application can set
+	this field for a capture stream to request a specific quantization
+	range for the captured image data. If the driver cannot handle requested
+	conversion, it will return another supported quantization.
+	The driver indicates that quantization conversion is supported by setting
+	the flag V4L2_FMT_FLAG_CSC_QUANTIZATION in the corresponding struct
+	:c:type:`v4l2_fmtdesc` during enumeration. See :ref:`fmtdesc-flags`.
     * - __u32
       - ``xfer_func``
       - Transfer function, from enum :c:type:`v4l2_xfer_func`.
         This information supplements the ``colorspace`` and must be set by
 	the driver for capture streams and by the application for output
-	streams, see :ref:`colorspaces`.
+	streams, see :ref:`colorspaces`. If the application sets the flag
+	``V4L2_PIX_FMT_FLAG_SET_CSC`` then the application can set
+	this field for a capture stream to request a specific transfer function
+	for the captured image data. If the driver cannot handle requested
+	conversion, it will return another supported transfer function.
+	The driver indicates that xfer_func conversion is supported by setting
+	the flag V4L2_FMT_FLAG_CSC_XFER_FUNC in the corresponding struct
+	:c:type:`v4l2_fmtdesc` during enumeration. See :ref:`fmtdesc-flags`.
 
 .. tabularcolumns:: |p{6.6cm}|p{2.2cm}|p{8.7cm}|
 
@@ -184,3 +221,20 @@ Single-planar format structure
 	by RGBA values (128, 192, 255, 128), the same pixel described with
 	premultiplied colors would be described by RGBA values (64, 96,
 	128, 128)
+    * .. _`v4l2-pix-fmt-flag-set-csc`:
+
+      - ``V4L2_PIX_FMT_FLAG_SET_CSC``
+      - 0x00000002
+      - Set by the application. It is only used for capture and is
+        ignored for output streams. If set, then request the device to do
+	colorspace conversion from the received colorspace to the requested
+	colorspace values. If the colorimetry field (``colorspace``, ``xfer_func``,
+	``ycbcr_enc``, ``hsv_enc`` or ``quantization``) is set to ``*_DEFAULT``,
+	then that colorimetry setting will remain unchanged from what was received.
+	So in order to change the quantization, only the ``quantization`` field shall
+	be set to non default value (``V4L2_QUANTIZATION_FULL_RANGE`` or
+	``V4L2_QUANTIZATION_LIM_RANGE``) and all other colorimetry fields shall
+	be set to ``*_DEFAULT``.
+
+	To check which conversions are supported by the hardware for the current
+	pixel format, see :ref:`fmtdesc-flags`.
diff --git a/Documentation/userspace-api/media/v4l/vidioc-enum-fmt.rst b/Documentation/userspace-api/media/v4l/vidioc-enum-fmt.rst
index 296b7d437431..b8347a96a554 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-enum-fmt.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-enum-fmt.rst
@@ -191,6 +191,41 @@ the ``mbus_code`` field is handled differently:
 	This flag can only be used in combination with the
 	``V4L2_FMT_FLAG_COMPRESSED`` flag, since this applies to
         compressed formats only. This flag is valid for stateful encoders only.
+    * - ``V4L2_FMT_FLAG_CSC_COLORSPACE``
+      - 0x0020
+      - The driver allows the application to try to change the default
+	colorspace. This flag is relevant only for capture devices.
+	The application can ask to configure the colorspace of the capture device
+	when calling the :ref:`VIDIOC_S_FMT <VIDIOC_G_FMT>` ioctl with
+	:ref:`V4L2_PIX_FMT_FLAG_SET_CSC <v4l2-pix-fmt-flag-set-csc>` set.
+    * - ``V4L2_FMT_FLAG_CSC_XFER_FUNC``
+      - 0x0040
+      - The driver allows the application to try to change the default
+	transfer function. This flag is relevant only for capture devices.
+	The application can ask to configure the transfer function of the capture
+	device when calling the :ref:`VIDIOC_S_FMT <VIDIOC_G_FMT>` ioctl with
+	:ref:`V4L2_PIX_FMT_FLAG_SET_CSC <v4l2-pix-fmt-flag-set-csc>` set.
+    * - ``V4L2_FMT_FLAG_CSC_YCBCR_ENC``
+      - 0x0080
+      - The driver allows the application to try to change the default
+	Y'CbCr encoding. This flag is relevant only for capture devices.
+	The application can ask to configure the Y'CbCr encoding of the capture device
+	when calling the :ref:`VIDIOC_S_FMT <VIDIOC_G_FMT>` ioctl with
+	:ref:`V4L2_PIX_FMT_FLAG_SET_CSC <v4l2-pix-fmt-flag-set-csc>` set.
+    * - ``V4L2_FMT_FLAG_CSC_HSV_ENC``
+      - 0x0080
+      - The driver allows the application to try to change the default
+	HSV encoding. This flag is relevant only for capture devices.
+	The application can ask to configure the HSV encoding of the capture device
+	when calling the :ref:`VIDIOC_S_FMT <VIDIOC_G_FMT>` ioctl with
+	:ref:`V4L2_PIX_FMT_FLAG_SET_CSC <v4l2-pix-fmt-flag-set-csc>` set.
+    * - ``V4L2_FMT_FLAG_CSC_QUANTIZATION``
+      - 0x0100
+      - The driver allows the application to try to change the default
+	quantization. This flag is relevant only for capture devices.
+	The application can ask to configure the quantization of the capture
+	device when calling the :ref:`VIDIOC_S_FMT <VIDIOC_G_FMT>` ioctl with
+	:ref:`V4L2_PIX_FMT_FLAG_SET_CSC <v4l2-pix-fmt-flag-set-csc>` set.
 
 
 Return Value
diff --git a/Documentation/userspace-api/media/videodev2.h.rst.exceptions b/Documentation/userspace-api/media/videodev2.h.rst.exceptions
index 659799cc1eca..121e396a2779 100644
--- a/Documentation/userspace-api/media/videodev2.h.rst.exceptions
+++ b/Documentation/userspace-api/media/videodev2.h.rst.exceptions
@@ -188,6 +188,11 @@ replace define V4L2_FMT_FLAG_EMULATED fmtdesc-flags
 replace define V4L2_FMT_FLAG_CONTINUOUS_BYTESTREAM fmtdesc-flags
 replace define V4L2_FMT_FLAG_DYN_RESOLUTION fmtdesc-flags
 replace define V4L2_FMT_FLAG_ENC_CAP_FRAME_INTERVAL fmtdesc-flags
+replace define V4L2_FMT_FLAG_CSC_COLORSPACE fmtdesc-flags
+replace define V4L2_FMT_FLAG_CSC_XFER_FUNC fmtdesc-flags
+replace define V4L2_FMT_FLAG_CSC_YCBCR_ENC fmtdesc-flags
+replace define V4L2_FMT_FLAG_CSC_HSV_ENC fmtdesc-flags
+replace define V4L2_FMT_FLAG_CSC_QUANTIZATION fmtdesc-flags
 
 # V4L2 timecode types
 replace define V4L2_TC_TYPE_24FPS timecode-type
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 4769628790da..b2bc83f37024 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -777,6 +777,7 @@ struct v4l2_pix_format {
 
 /* Flags */
 #define V4L2_PIX_FMT_FLAG_PREMUL_ALPHA	0x00000001
+#define V4L2_PIX_FMT_FLAG_SET_CSC	0x00000002
 
 /*
  *	F O R M A T   E N U M E R A T I O N
@@ -796,6 +797,11 @@ struct v4l2_fmtdesc {
 #define V4L2_FMT_FLAG_CONTINUOUS_BYTESTREAM	0x0004
 #define V4L2_FMT_FLAG_DYN_RESOLUTION		0x0008
 #define V4L2_FMT_FLAG_ENC_CAP_FRAME_INTERVAL	0x0010
+#define V4L2_FMT_FLAG_CSC_COLORSPACE		0x0020
+#define V4L2_FMT_FLAG_CSC_XFER_FUNC		0x0040
+#define V4L2_FMT_FLAG_CSC_YCBCR_ENC		0x0080
+#define V4L2_FMT_FLAG_CSC_HSV_ENC		V4L2_FMT_FLAG_CSC_YCBCR_ENC
+#define V4L2_FMT_FLAG_CSC_QUANTIZATION		0x0100
 
 	/* Frame Size and frame rate enumeration */
 /*
-- 
cgit v1.2.3


From 62aacfa9bf93f94f6949338e0c7a2ed4c4bd2c2a Mon Sep 17 00:00:00 2001
From: Dafna Hirschfeld <dafna.hirschfeld@collabora.com>
Date: Thu, 27 Aug 2020 21:46:10 +0200
Subject: media: v4l2: extend the CSC API to subdevice.

This patch extends the CSC API in video devices to be supported
also on sub-devices. The flag V4L2_MBUS_FRAMEFMT_SET_CSC set by
the application when calling VIDIOC_SUBDEV_S_FMT ioctl.
The flags:

V4L2_SUBDEV_MBUS_CODE_CSC_COLORSPACE,
V4L2_SUBDEV_MBUS_CODE_CSC_XFER_FUNC,
V4L2_SUBDEV_MBUS_CODE_CSC_YCBCR_ENC/V4L2_SUBDEV_MBUS_CODE_CSC_HSV_ENC
V4L2_SUBDEV_MBUS_CODE_CSC_QUANTIZATION

are set by the driver in the VIDIOC_SUBDEV_ENUM_MBUS_CODE ioctl.

New 'flags' fields were added to the structs
v4l2_subdev_mbus_code_enum, v4l2_mbus_framefmt which are borrowed
from the 'reserved' field

The patch also replaces the 'ycbcr_enc' field in
'struct v4l2_mbus_framefmt' with a union that includes 'hsv_enc'

Signed-off-by: Dafna Hirschfeld <dafna.hirschfeld@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../userspace-api/media/v4l/subdev-formats.rst     | 95 +++++++++++++++++++---
 .../media/v4l/vidioc-subdev-enum-mbus-code.rst     | 51 +++++++++++-
 include/uapi/linux/v4l2-mediabus.h                 | 15 +++-
 include/uapi/linux/v4l2-subdev.h                   | 10 ++-
 4 files changed, 157 insertions(+), 14 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/userspace-api/media/v4l/subdev-formats.rst b/Documentation/userspace-api/media/v4l/subdev-formats.rst
index dd2f037b74dd..c9b7bb3ca089 100644
--- a/Documentation/userspace-api/media/v4l/subdev-formats.rst
+++ b/Documentation/userspace-api/media/v4l/subdev-formats.rst
@@ -34,32 +34,107 @@ Media Bus Formats
 	:ref:`field-order` for details.
     * - __u32
       - ``colorspace``
-      - Image colorspace, from enum
-	:c:type:`v4l2_colorspace`. See
-	:ref:`colorspaces` for details.
+      - Image colorspace, from enum :c:type:`v4l2_colorspace`.
+        Must be set by the driver for subdevices. If the application sets the
+	flag ``V4L2_MBUS_FRAMEFMT_SET_CSC`` then the application can set this
+	field on the source pad to request a specific colorspace for the media
+	bus data. If the driver cannot handle the requested conversion, it will
+	return another supported colorspace. The driver indicates that colorspace
+	conversion is supported by setting the flag
+	V4L2_SUBDEV_MBUS_CODE_CSC_COLORSPACE in the corresponding struct
+	:c:type:`v4l2_subdev_mbus_code_enum` during enumeration.
+	See :ref:`v4l2-subdev-mbus-code-flags`.
+    * - union {
+      - (anonymous)
     * - __u16
       - ``ycbcr_enc``
       - Y'CbCr encoding, from enum :c:type:`v4l2_ycbcr_encoding`.
         This information supplements the ``colorspace`` and must be set by
-	the driver for capture streams and by the application for output
-	streams, see :ref:`colorspaces`.
+	the driver for subdevices, see :ref:`colorspaces`. If the application
+	sets the flag ``V4L2_MBUS_FRAMEFMT_SET_CSC`` then the application can set
+	this field on a source pad to request a specific Y'CbCr encoding
+	for the media bus data. If the driver cannot handle the requested
+	conversion, it will return another supported encoding.
+	This field is ignored for HSV media bus formats. The driver indicates
+	that ycbcr_enc conversion is supported by setting the flag
+	V4L2_SUBDEV_MBUS_CODE_CSC_YCBCR_ENC in the corresponding struct
+	:c:type:`v4l2_subdev_mbus_code_enum` during enumeration.
+	See :ref:`v4l2-subdev-mbus-code-flags`.
+    * - __u16
+      - ``hsv_enc``
+      - HSV encoding, from enum :c:type:`v4l2_hsv_encoding`.
+        This information supplements the ``colorspace`` and must be set by
+	the driver for subdevices, see :ref:`colorspaces`. If the application
+	sets the flag ``V4L2_MBUS_FRAMEFMT_SET_CSC`` then the application can set
+	this field on a source pad to request a specific HSV encoding
+	for the media bus data. If the driver cannot handle the requested
+	conversion, it will return another supported encoding.
+	This field is ignored for Y'CbCr media bus formats. The driver indicates
+	that hsv_enc conversion is supported by setting the flag
+	V4L2_SUBDEV_MBUS_CODE_CSC_HSV_ENC in the corresponding struct
+	:c:type:`v4l2_subdev_mbus_code_enum` during enumeration.
+	See :ref:`v4l2-subdev-mbus-code-flags`
+    * - }
+      -
     * - __u16
       - ``quantization``
       - Quantization range, from enum :c:type:`v4l2_quantization`.
         This information supplements the ``colorspace`` and must be set by
-	the driver for capture streams and by the application for output
-	streams, see :ref:`colorspaces`.
+	the driver for subdevices, see :ref:`colorspaces`. If the application
+	sets the flag ``V4L2_MBUS_FRAMEFMT_SET_CSC`` then the application can set
+	this field on a source pad to request a specific quantization
+	for the media bus data. If the driver cannot handle the requested
+	conversion, it will return another supported quantization.
+	The driver indicates that quantization conversion is supported by
+	setting the flag V4L2_SUBDEV_MBUS_CODE_CSC_QUANTIZATION in the
+	corresponding struct :c:type:`v4l2_subdev_mbus_code_enum`
+	during enumeration. See :ref:`v4l2-subdev-mbus-code-flags`.
+
     * - __u16
       - ``xfer_func``
       - Transfer function, from enum :c:type:`v4l2_xfer_func`.
         This information supplements the ``colorspace`` and must be set by
-	the driver for capture streams and by the application for output
-	streams, see :ref:`colorspaces`.
+	the driver for subdevices, see :ref:`colorspaces`. If the application
+	sets the flag ``V4L2_MBUS_FRAMEFMT_SET_CSC`` then the application can set
+	this field on a source pad to request a specific transfer
+	function for the media bus data. If the driver cannot handle the requested
+	conversion, it will return another supported transfer function.
+	The driver indicates that the transfer function conversion is supported by
+	setting the flag V4L2_SUBDEV_MBUS_CODE_CSC_XFER_FUNC in the
+	corresponding struct :c:type:`v4l2_subdev_mbus_code_enum`
+	during enumeration. See :ref:`v4l2-subdev-mbus-code-flags`.
     * - __u16
-      - ``reserved``\ [11]
+      - ``flags``
+      - flags See:  :ref:v4l2-mbus-framefmt-flags
+    * - __u16
+      - ``reserved``\ [10]
       - Reserved for future extensions. Applications and drivers must set
 	the array to zero.
 
+.. _v4l2-mbus-framefmt-flags:
+
+.. flat-table:: v4l2_mbus_framefmt Flags
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       3 1 4
+
+    * .. _`mbus-framefmt-set-csc`:
+
+      - ``V4L2_MBUS_FRAMEFMT_SET_CSC``
+      - 0x0001
+      - Set by the application. It is only used for source pads and is
+	ignored for sink pads. If set, then request the subdevice to do
+	colorspace conversion from the received colorspace to the requested
+	colorspace values. If the colorimetry field (``colorspace``, ``xfer_func``,
+	``ycbcr_enc``, ``hsv_enc`` or ``quantization``) is set to ``*_DEFAULT``,
+	then that colorimetry setting will remain unchanged from what was received.
+	So in order to change the quantization, only the ``quantization`` field shall
+	be set to non default value (``V4L2_QUANTIZATION_FULL_RANGE`` or
+	``V4L2_QUANTIZATION_LIM_RANGE``) and all other colorimetry fields shall
+	be set to ``*_DEFAULT``.
+
+	To check which conversions are supported by the hardware for the current
+	media bus frame format, see :ref:`v4l2-subdev-mbus-code-flags`.
 
 
 .. _v4l2-mbus-pixelcode:
diff --git a/Documentation/userspace-api/media/v4l/vidioc-subdev-enum-mbus-code.rst b/Documentation/userspace-api/media/v4l/vidioc-subdev-enum-mbus-code.rst
index 9db76f7d240f..3b6a8044c391 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-subdev-enum-mbus-code.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-subdev-enum-mbus-code.rst
@@ -72,11 +72,60 @@ information about the try formats.
       - Media bus format codes to be enumerated, from enum
 	:ref:`v4l2_subdev_format_whence <v4l2-subdev-format-whence>`.
     * - __u32
-      - ``reserved``\ [8]
+      - ``flags``
+      - See :ref:`v4l2-subdev-mbus-code-flags`
+    * - __u32
+      - ``reserved``\ [7]
       - Reserved for future extensions. Applications and drivers must set
 	the array to zero.
 
 
+
+.. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{7.7cm}|
+
+.. _v4l2-subdev-mbus-code-flags:
+
+.. flat-table:: Subdev Media Bus Code Enumerate Flags
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - V4L2_SUBDEV_MBUS_CODE_CSC_COLORSPACE
+      - 0x00000001
+      - The driver allows the application to try to change the default colorspace
+	encoding. The application can ask to configure the colorspace of the
+	subdevice when calling the :ref:`VIDIOC_SUBDEV_S_FMT <VIDIOC_SUBDEV_G_FMT>`
+	ioctl with :ref:`V4L2_MBUS_FRAMEFMT_SET_CSC <mbus-framefmt-set-csc>` set.
+	See :ref:`v4l2-mbus-format` on how to do this.
+    * - V4L2_SUBDEV_MBUS_CODE_CSC_XFER_FUNC
+      - 0x00000002
+      - The driver allows the application to try to change the default transform function.
+	The application can ask to configure the transform function of
+	the subdevice when calling the :ref:`VIDIOC_SUBDEV_S_FMT <VIDIOC_SUBDEV_G_FMT>`
+	ioctl with :ref:`V4L2_MBUS_FRAMEFMT_SET_CSC <mbus-framefmt-set-csc>` set.
+	See :ref:`v4l2-mbus-format` on how to do this.
+    * - V4L2_SUBDEV_MBUS_CODE_CSC_YCBCR_ENC
+      - 0x00000004
+      - The driver allows the application to try to change the default Y'CbCr
+	encoding. The application can ask to configure the Y'CbCr encoding of the
+	subdevice when calling the :ref:`VIDIOC_SUBDEV_S_FMT <VIDIOC_SUBDEV_G_FMT>`
+	ioctl with :ref:`V4L2_MBUS_FRAMEFMT_SET_CSC <mbus-framefmt-set-csc>` set.
+	See :ref:`v4l2-mbus-format` on how to do this.
+    * - V4L2_SUBDEV_MBUS_CODE_CSC_HSV_ENC
+      - 0x00000004
+      - The driver allows the application to try to change the default HSV
+	encoding. The application can ask to configure the HSV encoding of the
+	subdevice when calling the :ref:`VIDIOC_SUBDEV_S_FMT <VIDIOC_SUBDEV_G_FMT>`
+	ioctl with :ref:`V4L2_MBUS_FRAMEFMT_SET_CSC <mbus-framefmt-set-csc>` set.
+	See :ref:`v4l2-mbus-format` on how to do this.
+    * - V4L2_SUBDEV_MBUS_CODE_CSC_QUANTIZATION
+      - 0x00000008
+      - The driver allows the application to try to change the default
+	quantization. The application can ask to configure the quantization of
+	the subdevice when calling the :ref:`VIDIOC_SUBDEV_S_FMT <VIDIOC_SUBDEV_G_FMT>`
+	ioctl with :ref:`V4L2_MBUS_FRAMEFMT_SET_CSC <mbus-framefmt-set-csc>` set.
+	See :ref:`v4l2-mbus-format` on how to do this.
+
 Return Value
 ============
 
diff --git a/include/uapi/linux/v4l2-mediabus.h b/include/uapi/linux/v4l2-mediabus.h
index 123a231001a8..903e67b16711 100644
--- a/include/uapi/linux/v4l2-mediabus.h
+++ b/include/uapi/linux/v4l2-mediabus.h
@@ -16,6 +16,8 @@
 #include <linux/types.h>
 #include <linux/videodev2.h>
 
+#define V4L2_MBUS_FRAMEFMT_SET_CSC	0x0001
+
 /**
  * struct v4l2_mbus_framefmt - frame format on the media bus
  * @width:	image width
@@ -24,8 +26,11 @@
  * @field:	used interlacing type (from enum v4l2_field)
  * @colorspace:	colorspace of the data (from enum v4l2_colorspace)
  * @ycbcr_enc:	YCbCr encoding of the data (from enum v4l2_ycbcr_encoding)
+ * @hsv_enc:	HSV encoding of the data (from enum v4l2_hsv_encoding)
  * @quantization: quantization of the data (from enum v4l2_quantization)
  * @xfer_func:  transfer function of the data (from enum v4l2_xfer_func)
+ * @flags:	flags (V4L2_MBUS_FRAMEFMT_*)
+ * @reserved:  reserved bytes that can be later used
  */
 struct v4l2_mbus_framefmt {
 	__u32			width;
@@ -33,10 +38,16 @@ struct v4l2_mbus_framefmt {
 	__u32			code;
 	__u32			field;
 	__u32			colorspace;
-	__u16			ycbcr_enc;
+	union {
+		/* enum v4l2_ycbcr_encoding */
+		__u16			ycbcr_enc;
+		/* enum v4l2_hsv_encoding */
+		__u16			hsv_enc;
+	};
 	__u16			quantization;
 	__u16			xfer_func;
-	__u16			reserved[11];
+	__u16			flags;
+	__u16			reserved[10];
 };
 
 #ifndef __KERNEL__
diff --git a/include/uapi/linux/v4l2-subdev.h b/include/uapi/linux/v4l2-subdev.h
index 5d2a1dab7911..00850b98078a 100644
--- a/include/uapi/linux/v4l2-subdev.h
+++ b/include/uapi/linux/v4l2-subdev.h
@@ -65,19 +65,27 @@ struct v4l2_subdev_crop {
 	__u32 reserved[8];
 };
 
+#define V4L2_SUBDEV_MBUS_CODE_CSC_COLORSPACE	0x00000001
+#define V4L2_SUBDEV_MBUS_CODE_CSC_XFER_FUNC	0x00000002
+#define V4L2_SUBDEV_MBUS_CODE_CSC_YCBCR_ENC	0x00000004
+#define V4L2_SUBDEV_MBUS_CODE_CSC_HSV_ENC	V4L2_SUBDEV_MBUS_CODE_CSC_YCBCR_ENC
+#define V4L2_SUBDEV_MBUS_CODE_CSC_QUANTIZATION	0x00000008
+
 /**
  * struct v4l2_subdev_mbus_code_enum - Media bus format enumeration
  * @pad: pad number, as reported by the media API
  * @index: format index during enumeration
  * @code: format code (MEDIA_BUS_FMT_ definitions)
  * @which: format type (from enum v4l2_subdev_format_whence)
+ * @flags: flags set by the driver, (V4L2_SUBDEV_MBUS_CODE_*)
  */
 struct v4l2_subdev_mbus_code_enum {
 	__u32 pad;
 	__u32 index;
 	__u32 code;
 	__u32 which;
-	__u32 reserved[8];
+	__u32 flags;
+	__u32 reserved[7];
 };
 
 /**
-- 
cgit v1.2.3


From c8cb5b854b40f2ce52ccd032fa19750f4181d5fc Mon Sep 17 00:00:00 2001
From: Tova Mussai <tova.mussai@intel.com>
Date: Fri, 18 Sep 2020 11:33:13 +0200
Subject: nl80211/cfg80211: support 6 GHz scanning

Support 6 GHz scanning, by
 * a new scan flag to scan for colocated BSSes advertised
   by (and found) APs on 2.4 & 5 GHz
 * doing the necessary reduced neighbor report parsing for
   this, to find them
 * adding the ability to split the scan request in case the
   device by itself cannot support this.

Also add some necessary bits in mac80211 to not break with
these changes.

Signed-off-by: Tova Mussai <tova.mussai@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Link: https://lore.kernel.org/r/20200918113313.232917c93af9.Ida22f0212f9122f47094d81659e879a50434a6a2@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  32 ++-
 include/uapi/linux/nl80211.h |   3 +
 net/mac80211/scan.c          |   9 +-
 net/wireless/core.c          |   8 +-
 net/wireless/core.h          |   5 +-
 net/wireless/nl80211.c       |  11 +-
 net/wireless/scan.c          | 501 ++++++++++++++++++++++++++++++++++++++++++-
 7 files changed, 552 insertions(+), 17 deletions(-)

(limited to 'include/uapi')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 10c2cc8f0efc..11eb81676e95 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2095,6 +2095,27 @@ struct cfg80211_scan_info {
 	bool aborted;
 };
 
+/**
+ * struct cfg80211_scan_6ghz_params - relevant for 6 GHz only
+ *
+ * @short_bssid: short ssid to scan for
+ * @bssid: bssid to scan for
+ * @channel_idx: idx of the channel in the channel array in the scan request
+ *	 which the above info relvant to
+ * @unsolicited_probe: the AP transmits unsolicited probe response every 20 TU
+ * @short_ssid_valid: short_ssid is valid and can be used
+ * @psc_no_listen: when set, and the channel is a PSC channel, no need to wait
+ *       20 TUs before starting to send probe requests.
+ */
+struct cfg80211_scan_6ghz_params {
+	u32 short_ssid;
+	u32 channel_idx;
+	u8 bssid[ETH_ALEN];
+	bool unsolicited_probe;
+	bool short_ssid_valid;
+	bool psc_no_listen;
+};
+
 /**
  * struct cfg80211_scan_request - scan request description
  *
@@ -2122,6 +2143,10 @@ struct cfg80211_scan_info {
  * @mac_addr_mask: MAC address mask used with randomisation, bits that
  *	are 0 in the mask should be randomised, bits that are 1 should
  *	be taken from the @mac_addr
+ * @scan_6ghz: relevant for split scan request only,
+ *	true if this is the second scan request
+ * @n_6ghz_params: number of 6 GHz params
+ * @scan_6ghz_params: 6 GHz params
  * @bssid: BSSID to scan for (most commonly, the wildcard BSSID)
  */
 struct cfg80211_scan_request {
@@ -2149,6 +2174,9 @@ struct cfg80211_scan_request {
 	struct cfg80211_scan_info info;
 	bool notified;
 	bool no_cck;
+	bool scan_6ghz;
+	u32 n_6ghz_params;
+	struct cfg80211_scan_6ghz_params *scan_6ghz_params;
 
 	/* keep last */
 	struct ieee80211_channel *channels[];
@@ -4217,6 +4245,8 @@ struct cfg80211_ops {
 /**
  * enum wiphy_flags - wiphy capability flags
  *
+ * @WIPHY_FLAG_SPLIT_SCAN_6GHZ: if set to true, the scan request will be split
+ *	 into two, first for legacy bands and second for UHB.
  * @WIPHY_FLAG_NETNS_OK: if not set, do not allow changing the netns of this
  *	wiphy at all
  * @WIPHY_FLAG_PS_ON_BY_DEFAULT: if set to true, powersave will be enabled
@@ -4260,7 +4290,7 @@ struct cfg80211_ops {
 enum wiphy_flags {
 	WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK		= BIT(0),
 	/* use hole at 1 */
-	/* use hole at 2 */
+	WIPHY_FLAG_SPLIT_SCAN_6GHZ		= BIT(2),
 	WIPHY_FLAG_NETNS_OK			= BIT(3),
 	WIPHY_FLAG_PS_ON_BY_DEFAULT		= BIT(4),
 	WIPHY_FLAG_4ADDR_AP			= BIT(5),
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index bdc90b8dfd24..c74ceaddb909 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -6059,6 +6059,8 @@ enum nl80211_timeout_reason {
  * @NL80211_SCAN_FLAG_FREQ_KHZ: report scan results with
  *	%NL80211_ATTR_SCAN_FREQ_KHZ. This also means
  *	%NL80211_ATTR_SCAN_FREQUENCIES will not be included.
+ * @NL80211_SCAN_FLAG_COLOCATED_6GHZ: scan for colocated APs reported by
+ *	2.4/5 GHz APs
  */
 enum nl80211_scan_flags {
 	NL80211_SCAN_FLAG_LOW_PRIORITY				= 1<<0,
@@ -6075,6 +6077,7 @@ enum nl80211_scan_flags {
 	NL80211_SCAN_FLAG_RANDOM_SN				= 1<<11,
 	NL80211_SCAN_FLAG_MIN_PREQ_CONTENT			= 1<<12,
 	NL80211_SCAN_FLAG_FREQ_KHZ				= 1<<13,
+	NL80211_SCAN_FLAG_COLOCATED_6GHZ			= 1<<14,
 };
 
 /**
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 5ac2785cdc7b..7361e1239bf2 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -9,7 +9,7 @@
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2013-2015  Intel Mobile Communications GmbH
  * Copyright 2016-2017  Intel Deutschland GmbH
- * Copyright (C) 2018-2019 Intel Corporation
+ * Copyright (C) 2018-2020 Intel Corporation
  */
 
 #include <linux/if_arp.h>
@@ -712,6 +712,10 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
 			req->duration_mandatory;
 
 		local->hw_scan_band = 0;
+		local->hw_scan_req->req.n_6ghz_params = req->n_6ghz_params;
+		local->hw_scan_req->req.scan_6ghz_params =
+			req->scan_6ghz_params;
+		local->hw_scan_req->req.scan_6ghz = req->scan_6ghz;
 
 		/*
 		 * After allocating local->hw_scan_req, we must
@@ -1124,7 +1128,8 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata,
 		int max_n;
 
 		for (band = 0; band < NUM_NL80211_BANDS; band++) {
-			if (!local->hw.wiphy->bands[band])
+			if (!local->hw.wiphy->bands[band] ||
+			    band == NL80211_BAND_6GHZ)
 				continue;
 
 			max_n = local->hw.wiphy->bands[band]->n_channels;
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 354b0ccbdc24..9f23923e8d29 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -236,7 +236,9 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
 	rdev->opencount--;
 
 	if (rdev->scan_req && rdev->scan_req->wdev == wdev) {
-		if (WARN_ON(!rdev->scan_req->notified))
+		if (WARN_ON(!rdev->scan_req->notified &&
+			    (!rdev->int_scan_req ||
+			     !rdev->int_scan_req->notified)))
 			rdev->scan_req->info.aborted = true;
 		___cfg80211_scan_done(rdev, false);
 	}
@@ -1336,7 +1338,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 	case NETDEV_DOWN:
 		cfg80211_update_iface_num(rdev, wdev->iftype, -1);
 		if (rdev->scan_req && rdev->scan_req->wdev == wdev) {
-			if (WARN_ON(!rdev->scan_req->notified))
+			if (WARN_ON(!rdev->scan_req->notified &&
+				    (!rdev->int_scan_req ||
+				     !rdev->int_scan_req->notified)))
 				rdev->scan_req->info.aborted = true;
 			___cfg80211_scan_done(rdev, false);
 		}
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 2ebc2a66680d..e1ec9ac8e608 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -3,7 +3,7 @@
  * Wireless configuration interface internals.
  *
  * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2018-2019 Intel Corporation
+ * Copyright (C) 2018-2020 Intel Corporation
  */
 #ifndef __NET_WIRELESS_CORE_H
 #define __NET_WIRELESS_CORE_H
@@ -72,6 +72,7 @@ struct cfg80211_registered_device {
 	u32 bss_generation;
 	u32 bss_entries;
 	struct cfg80211_scan_request *scan_req; /* protected by RTNL */
+	struct cfg80211_scan_request *int_scan_req;
 	struct sk_buff *scan_msg;
 	struct list_head sched_scan_req_list;
 	time64_t suspend_at;
@@ -457,6 +458,8 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev);
 bool cfg80211_does_bw_fit_range(const struct ieee80211_freq_range *freq_range,
 				u32 center_freq_khz, u32 bw_khz);
 
+int cfg80211_scan(struct cfg80211_registered_device *rdev);
+
 extern struct work_struct cfg80211_disconnect_work;
 
 /**
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 1a212db7a300..d98db166d5e6 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -8236,7 +8236,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
 	request->scan_start = jiffies;
 
 	rdev->scan_req = request;
-	err = rdev_scan(rdev, request);
+	err = cfg80211_scan(rdev);
 
 	if (err)
 		goto out_free;
@@ -15518,6 +15518,7 @@ static int nl80211_add_scan_req(struct sk_buff *msg,
 	struct cfg80211_scan_request *req = rdev->scan_req;
 	struct nlattr *nest;
 	int i;
+	struct cfg80211_scan_info *info;
 
 	if (WARN_ON(!req))
 		return 0;
@@ -15561,11 +15562,13 @@ static int nl80211_add_scan_req(struct sk_buff *msg,
 	    nla_put_u32(msg, NL80211_ATTR_SCAN_FLAGS, req->flags))
 		goto nla_put_failure;
 
-	if (req->info.scan_start_tsf &&
+	info = rdev->int_scan_req ? &rdev->int_scan_req->info :
+		&rdev->scan_req->info;
+	if (info->scan_start_tsf &&
 	    (nla_put_u64_64bit(msg, NL80211_ATTR_SCAN_START_TIME_TSF,
-			       req->info.scan_start_tsf, NL80211_BSS_PAD) ||
+			       info->scan_start_tsf, NL80211_BSS_PAD) ||
 	     nla_put(msg, NL80211_ATTR_SCAN_START_TIME_TSF_BSSID, ETH_ALEN,
-		     req->info.tsf_bssid)))
+		     info->tsf_bssid)))
 		goto nla_put_failure;
 
 	return 0;
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 84fc8ab16dd2..4fbeb17580d9 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -5,7 +5,7 @@
  * Copyright 2008 Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright 2016	Intel Deutschland GmbH
- * Copyright (C) 2018-2019 Intel Corporation
+ * Copyright (C) 2018-2020 Intel Corporation
  */
 #include <linux/kernel.h>
 #include <linux/slab.h>
@@ -14,6 +14,8 @@
 #include <linux/wireless.h>
 #include <linux/nl80211.h>
 #include <linux/etherdevice.h>
+#include <linux/crc32.h>
+#include <linux/bitfield.h>
 #include <net/arp.h>
 #include <net/cfg80211.h>
 #include <net/cfg80211-wext.h>
@@ -74,6 +76,43 @@ MODULE_PARM_DESC(bss_entries_limit,
 
 #define IEEE80211_SCAN_RESULT_EXPIRE	(30 * HZ)
 
+/**
+ * struct cfg80211_colocated_ap - colocated AP information
+ *
+ * @list: linked list to all colocated aPS
+ * @bssid: BSSID of the reported AP
+ * @ssid: SSID of the reported AP
+ * @ssid_len: length of the ssid
+ * @center_freq: frequency the reported AP is on
+ * @unsolicited_probe: the reported AP is part of an ESS, where all the APs
+ *	that operate in the same channel as the reported AP and that might be
+ *	detected by a STA receiving this frame, are transmitting unsolicited
+ *	Probe Response frames every 20 TUs
+ * @oct_recommended: OCT is recommended to exchange MMPDUs with the reported AP
+ * @same_ssid: the reported AP has the same SSID as the reporting AP
+ * @multi_bss: the reported AP is part of a multiple BSSID set
+ * @transmitted_bssid: the reported AP is the transmitting BSSID
+ * @colocated_ess: all the APs that share the same ESS as the reported AP are
+ *	colocated and can be discovered via legacy bands.
+ * @short_ssid_valid: short_ssid is valid and can be used
+ * @short_ssid: the short SSID for this SSID
+ */
+struct cfg80211_colocated_ap {
+	struct list_head list;
+	u8 bssid[ETH_ALEN];
+	u8 ssid[IEEE80211_MAX_SSID_LEN];
+	size_t ssid_len;
+	u32 short_ssid;
+	u32 center_freq;
+	u8 unsolicited_probe:1,
+	   oct_recommended:1,
+	   same_ssid:1,
+	   multi_bss:1,
+	   transmitted_bssid:1,
+	   colocated_ess:1,
+	   short_ssid_valid:1;
+};
+
 static void bss_free(struct cfg80211_internal_bss *bss)
 {
 	struct cfg80211_bss_ies *ies;
@@ -448,10 +487,433 @@ static bool cfg80211_bss_expire_oldest(struct cfg80211_registered_device *rdev)
 	return ret;
 }
 
+static u8 cfg80211_parse_bss_param(u8 data,
+				   struct cfg80211_colocated_ap *coloc_ap)
+{
+	coloc_ap->oct_recommended =
+		u8_get_bits(data, IEEE80211_RNR_TBTT_PARAMS_OCT_RECOMMENDED);
+	coloc_ap->same_ssid =
+		u8_get_bits(data, IEEE80211_RNR_TBTT_PARAMS_SAME_SSID);
+	coloc_ap->multi_bss =
+		u8_get_bits(data, IEEE80211_RNR_TBTT_PARAMS_MULTI_BSSID);
+	coloc_ap->transmitted_bssid =
+		u8_get_bits(data, IEEE80211_RNR_TBTT_PARAMS_TRANSMITTED_BSSID);
+	coloc_ap->unsolicited_probe =
+		u8_get_bits(data, IEEE80211_RNR_TBTT_PARAMS_PROBE_ACTIVE);
+	coloc_ap->colocated_ess =
+		u8_get_bits(data, IEEE80211_RNR_TBTT_PARAMS_COLOC_ESS);
+
+	return u8_get_bits(data, IEEE80211_RNR_TBTT_PARAMS_COLOC_AP);
+}
+
+static int cfg80211_calc_short_ssid(const struct cfg80211_bss_ies *ies,
+				    const struct element **elem, u32 *s_ssid)
+{
+
+	*elem = cfg80211_find_elem(WLAN_EID_SSID, ies->data, ies->len);
+	if (!*elem || (*elem)->datalen > IEEE80211_MAX_SSID_LEN)
+		return -EINVAL;
+
+	*s_ssid = ~crc32_le(~0, (*elem)->data, (*elem)->datalen);
+	return 0;
+}
+
+static void cfg80211_free_coloc_ap_list(struct list_head *coloc_ap_list)
+{
+	struct cfg80211_colocated_ap *ap, *tmp_ap;
+
+	list_for_each_entry_safe(ap, tmp_ap, coloc_ap_list, list) {
+		list_del(&ap->list);
+		kfree(ap);
+	}
+}
+
+static int cfg80211_parse_ap_info(struct cfg80211_colocated_ap *entry,
+				  const u8 *pos, u8 length,
+				  const struct element *ssid_elem,
+				  int s_ssid_tmp)
+{
+	/* skip the TBTT offset */
+	pos++;
+
+	memcpy(entry->bssid, pos, ETH_ALEN);
+	pos += ETH_ALEN;
+
+	if (length == IEEE80211_TBTT_INFO_OFFSET_BSSID_SSSID_BSS_PARAM) {
+		memcpy(&entry->short_ssid, pos,
+		       sizeof(entry->short_ssid));
+		entry->short_ssid_valid = true;
+		pos += 4;
+	}
+
+	/* skip non colocated APs */
+	if (!cfg80211_parse_bss_param(*pos, entry))
+		return -EINVAL;
+	pos++;
+
+	if (length == IEEE80211_TBTT_INFO_OFFSET_BSSID_BSS_PARAM) {
+		/*
+		 * no information about the short ssid. Consider the entry valid
+		 * for now. It would later be dropped in case there are explicit
+		 * SSIDs that need to be matched
+		 */
+		if (!entry->same_ssid)
+			return 0;
+	}
+
+	if (entry->same_ssid) {
+		entry->short_ssid = s_ssid_tmp;
+		entry->short_ssid_valid = true;
+
+		/*
+		 * This is safe because we validate datalen in
+		 * cfg80211_parse_colocated_ap(), before calling this
+		 * function.
+		 */
+		memcpy(&entry->ssid, &ssid_elem->data,
+		       ssid_elem->datalen);
+		entry->ssid_len = ssid_elem->datalen;
+	}
+	return 0;
+}
+
+static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,
+				       struct list_head *list)
+{
+	struct ieee80211_neighbor_ap_info *ap_info;
+	const struct element *elem, *ssid_elem;
+	const u8 *pos, *end;
+	u32 s_ssid_tmp;
+	int n_coloc = 0, ret;
+	LIST_HEAD(ap_list);
+
+	elem = cfg80211_find_elem(WLAN_EID_REDUCED_NEIGHBOR_REPORT, ies->data,
+				  ies->len);
+	if (!elem || elem->datalen > IEEE80211_MAX_SSID_LEN)
+		return 0;
+
+	pos = elem->data;
+	end = pos + elem->datalen;
+
+	ret = cfg80211_calc_short_ssid(ies, &ssid_elem, &s_ssid_tmp);
+	if (ret)
+		return ret;
+
+	/* RNR IE may contain more than one NEIGHBOR_AP_INFO */
+	while (pos + sizeof(*ap_info) <= end) {
+		enum nl80211_band band;
+		int freq;
+		u8 length, i, count;
+
+		ap_info = (void *)pos;
+		count = u8_get_bits(ap_info->tbtt_info_hdr,
+				    IEEE80211_AP_INFO_TBTT_HDR_COUNT) + 1;
+		length = ap_info->tbtt_info_len;
+
+		pos += sizeof(*ap_info);
+
+		if (!ieee80211_operating_class_to_band(ap_info->op_class,
+						       &band))
+			break;
+
+		freq = ieee80211_channel_to_frequency(ap_info->channel, band);
+
+		if (end - pos < count * ap_info->tbtt_info_len)
+			break;
+
+		/*
+		 * TBTT info must include bss param + BSSID +
+		 * (short SSID or same_ssid bit to be set).
+		 * ignore other options, and move to the
+		 * next AP info
+		 */
+		if (band != NL80211_BAND_6GHZ ||
+		    (length != IEEE80211_TBTT_INFO_OFFSET_BSSID_BSS_PARAM &&
+		     length < IEEE80211_TBTT_INFO_OFFSET_BSSID_SSSID_BSS_PARAM)) {
+			pos += count * ap_info->tbtt_info_len;
+			continue;
+		}
+
+		for (i = 0; i < count; i++) {
+			struct cfg80211_colocated_ap *entry;
+
+			entry = kzalloc(sizeof(*entry) + IEEE80211_MAX_SSID_LEN,
+					GFP_ATOMIC);
+
+			if (!entry)
+				break;
+
+			entry->center_freq = freq;
+
+			if (!cfg80211_parse_ap_info(entry, pos, length,
+						    ssid_elem, s_ssid_tmp)) {
+				n_coloc++;
+				list_add_tail(&entry->list, &ap_list);
+			} else {
+				kfree(entry);
+			}
+
+			pos += ap_info->tbtt_info_len;
+		}
+	}
+
+	if (pos != end) {
+		cfg80211_free_coloc_ap_list(&ap_list);
+		return 0;
+	}
+
+	list_splice_tail(&ap_list, list);
+	return n_coloc;
+}
+
+static  void cfg80211_scan_req_add_chan(struct cfg80211_scan_request *request,
+					struct ieee80211_channel *chan,
+					bool add_to_6ghz)
+{
+	int i;
+	u32 n_channels = request->n_channels;
+	struct cfg80211_scan_6ghz_params *params =
+		&request->scan_6ghz_params[request->n_6ghz_params];
+
+	for (i = 0; i < n_channels; i++) {
+		if (request->channels[i] == chan) {
+			if (add_to_6ghz)
+				params->channel_idx = i;
+			return;
+		}
+	}
+
+	request->channels[n_channels] = chan;
+	if (add_to_6ghz)
+		request->scan_6ghz_params[request->n_6ghz_params].channel_idx =
+			n_channels;
+
+	request->n_channels++;
+}
+
+static bool cfg80211_find_ssid_match(struct cfg80211_colocated_ap *ap,
+				     struct cfg80211_scan_request *request)
+{
+	u8 i;
+	u32 s_ssid;
+
+	for (i = 0; i < request->n_ssids; i++) {
+		/* wildcard ssid in the scan request */
+		if (!request->ssids[i].ssid_len)
+			return true;
+
+		if (ap->ssid_len &&
+		    ap->ssid_len == request->ssids[i].ssid_len) {
+			if (!memcmp(request->ssids[i].ssid, ap->ssid,
+				    ap->ssid_len))
+				return true;
+		} else if (ap->short_ssid_valid) {
+			s_ssid = ~crc32_le(~0, request->ssids[i].ssid,
+					   request->ssids[i].ssid_len);
+
+			if (ap->short_ssid == s_ssid)
+				return true;
+		}
+	}
+
+	return false;
+}
+
+static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
+{
+	u8 i;
+	struct cfg80211_colocated_ap *ap;
+	int n_channels, count = 0, err;
+	struct cfg80211_scan_request *request, *rdev_req = rdev->scan_req;
+	LIST_HEAD(coloc_ap_list);
+	bool need_scan_psc;
+	const struct ieee80211_sband_iftype_data *iftd;
+
+	rdev_req->scan_6ghz = true;
+
+	if (!rdev->wiphy.bands[NL80211_BAND_6GHZ])
+		return -EOPNOTSUPP;
+
+	iftd = ieee80211_get_sband_iftype_data(rdev->wiphy.bands[NL80211_BAND_6GHZ],
+					       rdev_req->wdev->iftype);
+	if (!iftd || !iftd->he_cap.has_he)
+		return -EOPNOTSUPP;
+
+	n_channels = rdev->wiphy.bands[NL80211_BAND_6GHZ]->n_channels;
+
+	if (rdev_req->flags & NL80211_SCAN_FLAG_COLOCATED_6GHZ) {
+		struct cfg80211_internal_bss *intbss;
+
+		spin_lock_bh(&rdev->bss_lock);
+		list_for_each_entry(intbss, &rdev->bss_list, list) {
+			struct cfg80211_bss *res = &intbss->pub;
+			const struct cfg80211_bss_ies *ies;
+
+			ies = rcu_access_pointer(res->ies);
+			count += cfg80211_parse_colocated_ap(ies,
+							     &coloc_ap_list);
+		}
+		spin_unlock_bh(&rdev->bss_lock);
+	}
+
+	request = kzalloc(struct_size(request, channels, n_channels) +
+			  sizeof(*request->scan_6ghz_params) * count,
+			  GFP_KERNEL);
+	if (!request) {
+		cfg80211_free_coloc_ap_list(&coloc_ap_list);
+		return -ENOMEM;
+	}
+
+	*request = *rdev_req;
+	request->n_channels = 0;
+	request->scan_6ghz_params =
+		(void *)&request->channels[n_channels];
+
+	/*
+	 * PSC channels should not be scanned if all the reported co-located APs
+	 * are indicating that all APs in the same ESS are co-located
+	 */
+	if (count) {
+		need_scan_psc = false;
+
+		list_for_each_entry(ap, &coloc_ap_list, list) {
+			if (!ap->colocated_ess) {
+				need_scan_psc = true;
+				break;
+			}
+		}
+	} else {
+		need_scan_psc = true;
+	}
+
+	/*
+	 * add to the scan request the channels that need to be scanned
+	 * regardless of the collocated APs (PSC channels or all channels
+	 * in case that NL80211_SCAN_FLAG_COLOCATED_6GHZ is not set)
+	 */
+	for (i = 0; i < rdev_req->n_channels; i++) {
+		if (rdev_req->channels[i]->band == NL80211_BAND_6GHZ &&
+		    ((need_scan_psc &&
+		      cfg80211_channel_is_psc(rdev_req->channels[i])) ||
+		     !(rdev_req->flags & NL80211_SCAN_FLAG_COLOCATED_6GHZ))) {
+			cfg80211_scan_req_add_chan(request,
+						   rdev_req->channels[i],
+						   false);
+		}
+	}
+
+	if (!(rdev_req->flags & NL80211_SCAN_FLAG_COLOCATED_6GHZ))
+		goto skip;
+
+	list_for_each_entry(ap, &coloc_ap_list, list) {
+		bool found = false;
+		struct cfg80211_scan_6ghz_params *scan_6ghz_params =
+			&request->scan_6ghz_params[request->n_6ghz_params];
+		struct ieee80211_channel *chan =
+			ieee80211_get_channel(&rdev->wiphy, ap->center_freq);
+
+		if (!chan || chan->flags & IEEE80211_CHAN_DISABLED)
+			continue;
+
+		for (i = 0; i < rdev_req->n_channels; i++) {
+			if (rdev_req->channels[i] == chan)
+				found = true;
+		}
+
+		if (!found)
+			continue;
+
+		if (request->n_ssids > 0 &&
+		    !cfg80211_find_ssid_match(ap, request))
+			continue;
+
+		cfg80211_scan_req_add_chan(request, chan, true);
+		memcpy(scan_6ghz_params->bssid, ap->bssid, ETH_ALEN);
+		scan_6ghz_params->short_ssid = ap->short_ssid;
+		scan_6ghz_params->short_ssid_valid = ap->short_ssid_valid;
+		scan_6ghz_params->unsolicited_probe = ap->unsolicited_probe;
+
+		/*
+		 * If a PSC channel is added to the scan and 'need_scan_psc' is
+		 * set to false, then all the APs that the scan logic is
+		 * interested with on the channel are collocated and thus there
+		 * is no need to perform the initial PSC channel listen.
+		 */
+		if (cfg80211_channel_is_psc(chan) && !need_scan_psc)
+			scan_6ghz_params->psc_no_listen = true;
+
+		request->n_6ghz_params++;
+	}
+
+skip:
+	cfg80211_free_coloc_ap_list(&coloc_ap_list);
+
+	if (request->n_channels) {
+		struct cfg80211_scan_request *old = rdev->int_scan_req;
+
+		rdev->int_scan_req = request;
+
+		/*
+		 * If this scan follows a previous scan, save the scan start
+		 * info from the first part of the scan
+		 */
+		if (old)
+			rdev->int_scan_req->info = old->info;
+
+		err = rdev_scan(rdev, request);
+		if (err) {
+			rdev->int_scan_req = old;
+			kfree(request);
+		} else {
+			kfree(old);
+		}
+
+		return err;
+	}
+
+	kfree(request);
+	return -EINVAL;
+}
+
+int cfg80211_scan(struct cfg80211_registered_device *rdev)
+{
+	struct cfg80211_scan_request *request;
+	struct cfg80211_scan_request *rdev_req = rdev->scan_req;
+	u32 n_channels = 0, idx, i;
+
+	if (!(rdev->wiphy.flags & WIPHY_FLAG_SPLIT_SCAN_6GHZ))
+		return rdev_scan(rdev, rdev_req);
+
+	for (i = 0; i < rdev_req->n_channels; i++) {
+		if (rdev_req->channels[i]->band != NL80211_BAND_6GHZ)
+			n_channels++;
+	}
+
+	if (!n_channels)
+		return cfg80211_scan_6ghz(rdev);
+
+	request = kzalloc(struct_size(request, channels, n_channels),
+			  GFP_KERNEL);
+	if (!request)
+		return -ENOMEM;
+
+	*request = *rdev_req;
+	request->n_channels = n_channels;
+
+	for (i = idx = 0; i < rdev_req->n_channels; i++) {
+		if (rdev_req->channels[i]->band != NL80211_BAND_6GHZ)
+			request->channels[idx++] = rdev_req->channels[i];
+	}
+
+	rdev_req->scan_6ghz = false;
+	rdev->int_scan_req = request;
+	return rdev_scan(rdev, request);
+}
+
 void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
 			   bool send_message)
 {
-	struct cfg80211_scan_request *request;
+	struct cfg80211_scan_request *request, *rdev_req;
 	struct wireless_dev *wdev;
 	struct sk_buff *msg;
 #ifdef CONFIG_CFG80211_WEXT
@@ -466,11 +928,18 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
 		return;
 	}
 
-	request = rdev->scan_req;
-	if (!request)
+	rdev_req = rdev->scan_req;
+	if (!rdev_req)
 		return;
 
-	wdev = request->wdev;
+	wdev = rdev_req->wdev;
+	request = rdev->int_scan_req ? rdev->int_scan_req : rdev_req;
+
+	if (wdev_running(wdev) &&
+	    (rdev->wiphy.flags & WIPHY_FLAG_SPLIT_SCAN_6GHZ) &&
+	    !rdev_req->scan_6ghz && !request->info.aborted &&
+	    !cfg80211_scan_6ghz(rdev))
+		return;
 
 	/*
 	 * This must be before sending the other events!
@@ -501,8 +970,11 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
 	if (wdev->netdev)
 		dev_put(wdev->netdev);
 
+	kfree(rdev->int_scan_req);
+	rdev->int_scan_req = NULL;
+
+	kfree(rdev->scan_req);
 	rdev->scan_req = NULL;
-	kfree(request);
 
 	if (!send_message)
 		rdev->scan_msg = msg;
@@ -525,10 +997,25 @@ void __cfg80211_scan_done(struct work_struct *wk)
 void cfg80211_scan_done(struct cfg80211_scan_request *request,
 			struct cfg80211_scan_info *info)
 {
+	struct cfg80211_scan_info old_info = request->info;
+
 	trace_cfg80211_scan_done(request, info);
-	WARN_ON(request != wiphy_to_rdev(request->wiphy)->scan_req);
+	WARN_ON(request != wiphy_to_rdev(request->wiphy)->scan_req &&
+		request != wiphy_to_rdev(request->wiphy)->int_scan_req);
 
 	request->info = *info;
+
+	/*
+	 * In case the scan is split, the scan_start_tsf and tsf_bssid should
+	 * be of the first part. In such a case old_info.scan_start_tsf should
+	 * be non zero.
+	 */
+	if (request->scan_6ghz && old_info.scan_start_tsf) {
+		request->info.scan_start_tsf = old_info.scan_start_tsf;
+		memcpy(request->info.tsf_bssid, old_info.tsf_bssid,
+		       sizeof(request->info.tsf_bssid));
+	}
+
 	request->notified = true;
 	queue_work(cfg80211_wq, &wiphy_to_rdev(request->wiphy)->scan_done_wk);
 }
-- 
cgit v1.2.3


From d2b7588a47de8322891de38ec14d15105d66cb1e Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@adapt-ip.com>
Date: Mon, 21 Sep 2020 19:28:04 -0700
Subject: nl80211: support S1G capability overrides in assoc

NL80211_ATTR_S1G_CAPABILITY can be passed along with
NL80211_ATTR_S1G_CAPABILITY_MASK to NL80211_CMD_ASSOCIATE
to indicate S1G capabilities which should override the
hardware capabilities in eg. the association request.

Signed-off-by: Thomas Pedersen <thomas@adapt-ip.com>
Link: https://lore.kernel.org/r/20200922022818.15855-4-thomas@adapt-ip.com
[johannes: always require both attributes together, commit message]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h    |  2 ++
 include/net/cfg80211.h       |  3 +++
 include/uapi/linux/nl80211.h |  9 +++++++++
 net/wireless/nl80211.c       | 20 ++++++++++++++++++++
 4 files changed, 34 insertions(+)

(limited to 'include/uapi')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 53fba39d4ba6..f71cffa18176 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2330,6 +2330,8 @@ ieee80211_he_spr_size(const u8 *he_spr_ie)
 }
 
 /* S1G Capabilities Information field */
+#define IEEE80211_S1G_CAPABILITY_LEN	15
+
 #define S1G_CAP0_S1G_LONG	BIT(0)
 #define S1G_CAP0_SGI_1MHZ	BIT(1)
 #define S1G_CAP0_SGI_2MHZ	BIT(2)
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 11eb81676e95..bead4b9afeca 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2556,6 +2556,8 @@ enum cfg80211_assoc_req_flags {
  * @fils_nonces: FILS nonces (part of AAD) for protecting (Re)Association
  *	Request/Response frame or %NULL if FILS is not used. This field starts
  *	with 16 octets of STA Nonce followed by 16 octets of AP Nonce.
+ * @s1g_capa: S1G capability override
+ * @s1g_capa_mask: S1G capability override mask
  */
 struct cfg80211_assoc_request {
 	struct cfg80211_bss *bss;
@@ -2570,6 +2572,7 @@ struct cfg80211_assoc_request {
 	const u8 *fils_kek;
 	size_t fils_kek_len;
 	const u8 *fils_nonces;
+	struct ieee80211_s1g_cap s1g_capa, s1g_capa_mask;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index c74ceaddb909..05db40b4c56f 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2521,6 +2521,12 @@ enum nl80211_commands {
  *	unsolicited broadcast probe response. It is a nested attribute, see
  *	&enum nl80211_unsol_bcast_probe_resp_attributes.
  *
+ * @NL80211_ATTR_S1G_CAPABILITY: S1G Capability information element (from
+ *	association request when used with NL80211_CMD_NEW_STATION)
+ * @NL80211_ATTR_S1G_CAPABILITY_MASK: S1G Capability Information element
+ *	override mask. Used with NL80211_ATTR_S1G_CAPABILITY in
+ *	NL80211_CMD_ASSOCIATE or NL80211_CMD_CONNECT.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -3007,6 +3013,9 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_UNSOL_BCAST_PROBE_RESP,
 
+	NL80211_ATTR_S1G_CAPABILITY,
+	NL80211_ATTR_S1G_CAPABILITY_MASK,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d98db166d5e6..d31451db5407 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -704,6 +704,10 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 		NLA_POLICY_NESTED(nl80211_fils_discovery_policy),
 	[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP] =
 		NLA_POLICY_NESTED(nl80211_unsol_bcast_probe_resp_policy),
+	[NL80211_ATTR_S1G_CAPABILITY] =
+		NLA_POLICY_EXACT_LEN(IEEE80211_S1G_CAPABILITY_LEN),
+	[NL80211_ATTR_S1G_CAPABILITY_MASK] =
+		NLA_POLICY_EXACT_LEN(IEEE80211_S1G_CAPABILITY_LEN),
 };
 
 /* policy for the key attributes */
@@ -9792,6 +9796,22 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 			nla_data(info->attrs[NL80211_ATTR_FILS_NONCES]);
 	}
 
+	if (info->attrs[NL80211_ATTR_S1G_CAPABILITY_MASK]) {
+		if (!info->attrs[NL80211_ATTR_S1G_CAPABILITY])
+			return -EINVAL;
+		memcpy(&req.s1g_capa_mask,
+		       nla_data(info->attrs[NL80211_ATTR_S1G_CAPABILITY_MASK]),
+		       sizeof(req.s1g_capa_mask));
+	}
+
+	if (info->attrs[NL80211_ATTR_S1G_CAPABILITY]) {
+		if (!info->attrs[NL80211_ATTR_S1G_CAPABILITY_MASK])
+			return -EINVAL;
+		memcpy(&req.s1g_capa,
+		       nla_data(info->attrs[NL80211_ATTR_S1G_CAPABILITY]),
+		       sizeof(req.s1g_capa));
+	}
+
 	err = nl80211_crypto_settings(rdev, info, &req.crypto, 1);
 	if (!err) {
 		wdev_lock(dev->ieee80211_ptr);
-- 
cgit v1.2.3


From 1ae099540e8c7f1ee066b3ad45cc91f582bb1ce8 Mon Sep 17 00:00:00 2001
From: Alexander Graf <graf@amazon.com>
Date: Fri, 25 Sep 2020 16:34:16 +0200
Subject: KVM: x86: Allow deflecting unknown MSR accesses to user space

MSRs are weird. Some of them are normal control registers, such as EFER.
Some however are registers that really are model specific, not very
interesting to virtualization workloads, and not performance critical.
Others again are really just windows into package configuration.

Out of these MSRs, only the first category is necessary to implement in
kernel space. Rarely accessed MSRs, MSRs that should be fine tunes against
certain CPU models and MSRs that contain information on the package level
are much better suited for user space to process. However, over time we have
accumulated a lot of MSRs that are not the first category, but still handled
by in-kernel KVM code.

This patch adds a generic interface to handle WRMSR and RDMSR from user
space. With this, any future MSR that is part of the latter categories can
be handled in user space.

Furthermore, it allows us to replace the existing "ignore_msrs" logic with
something that applies per-VM rather than on the full system. That way you
can run productive VMs in parallel to experimental ones where you don't care
about proper MSR handling.

Signed-off-by: Alexander Graf <graf@amazon.com>
Reviewed-by: Jim Mattson <jmattson@google.com>

Message-Id: <20200925143422.21718-3-graf@amazon.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/api.rst  |  85 +++++++++++++++++++++++++---
 arch/x86/include/asm/kvm_host.h |   3 +
 arch/x86/kvm/emulate.c          |  18 +++++-
 arch/x86/kvm/x86.c              | 120 ++++++++++++++++++++++++++++++++++++++--
 include/trace/events/kvm.h      |   2 +-
 include/uapi/linux/kvm.h        |  13 +++++
 6 files changed, 226 insertions(+), 15 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 9e2a545d8084..4fdba43d83e8 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -4872,14 +4872,13 @@ to the byte array.
 
 .. note::
 
-      For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR and
-      KVM_EXIT_EPR the corresponding
-
-operations are complete (and guest state is consistent) only after userspace
-has re-entered the kernel with KVM_RUN.  The kernel side will first finish
-incomplete operations and then check for pending signals.  Userspace
-can re-enter the guest with an unmasked signal pending to complete
-pending operations.
+      For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR,
+      KVM_EXIT_EPR, KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding
+      operations are complete (and guest state is consistent) only after userspace
+      has re-entered the kernel with KVM_RUN.  The kernel side will first finish
+      incomplete operations and then check for pending signals.  Userspace
+      can re-enter the guest with an unmasked signal pending to complete
+      pending operations.
 
 ::
 
@@ -5166,6 +5165,43 @@ Note that KVM does not skip the faulting instruction as it does for
 KVM_EXIT_MMIO, but userspace has to emulate any change to the processing state
 if it decides to decode and emulate the instruction.
 
+::
+
+		/* KVM_EXIT_X86_RDMSR / KVM_EXIT_X86_WRMSR */
+		struct {
+			__u8 error; /* user -> kernel */
+			__u8 pad[7];
+			__u32 reason; /* kernel -> user */
+			__u32 index; /* kernel -> user */
+			__u64 data; /* kernel <-> user */
+		} msr;
+
+Used on x86 systems. When the VM capability KVM_CAP_X86_USER_SPACE_MSR is
+enabled, MSR accesses to registers that would invoke a #GP by KVM kernel code
+will instead trigger a KVM_EXIT_X86_RDMSR exit for reads and KVM_EXIT_X86_WRMSR
+exit for writes.
+
+The "reason" field specifies why the MSR trap occurred. User space will only
+receive MSR exit traps when a particular reason was requested during through
+ENABLE_CAP. Currently valid exit reasons are:
+
+	KVM_MSR_EXIT_REASON_UNKNOWN - access to MSR that is unknown to KVM
+	KVM_MSR_EXIT_REASON_INVAL - access to invalid MSRs or reserved bits
+
+For KVM_EXIT_X86_RDMSR, the "index" field tells user space which MSR the guest
+wants to read. To respond to this request with a successful read, user space
+writes the respective data into the "data" field and must continue guest
+execution to ensure the read data is transferred into guest register state.
+
+If the RDMSR request was unsuccessful, user space indicates that with a "1" in
+the "error" field. This will inject a #GP into the guest when the VCPU is
+executed again.
+
+For KVM_EXIT_X86_WRMSR, the "index" field tells user space which MSR the guest
+wants to write. Once finished processing the event, user space must continue
+vCPU execution. If the MSR write was unsuccessful, user space also sets the
+"error" field to "1".
+
 ::
 
 		/* Fix the size of the union. */
@@ -5855,6 +5891,28 @@ controlled by the kvm module parameter halt_poll_ns. This capability allows
 the maximum halt time to specified on a per-VM basis, effectively overriding
 the module parameter for the target VM.
 
+7.21 KVM_CAP_X86_USER_SPACE_MSR
+-------------------------------
+
+:Architectures: x86
+:Target: VM
+:Parameters: args[0] contains the mask of KVM_MSR_EXIT_REASON_* events to report
+:Returns: 0 on success; -1 on error
+
+This capability enables trapping of #GP invoking RDMSR and WRMSR instructions
+into user space.
+
+When a guest requests to read or write an MSR, KVM may not implement all MSRs
+that are relevant to a respective system. It also does not differentiate by
+CPU type.
+
+To allow more fine grained control over MSR handling, user space may enable
+this capability. With it enabled, MSR accesses that match the mask specified in
+args[0] and trigger a #GP event inside the guest by KVM will instead trigger
+KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications which user space
+can then handle to implement model specific MSR handling and/or user notifications
+to inform a user that an MSR was not handled.
+
 8. Other capabilities.
 ======================
 
@@ -6196,3 +6254,14 @@ distribution...)
 
 If this capability is available, then the CPNC and CPVC can be synchronized
 between KVM and userspace via the sync regs mechanism (KVM_SYNC_DIAG318).
+
+8.26 KVM_CAP_X86_USER_SPACE_MSR
+-------------------------------
+
+:Architectures: x86
+
+This capability indicates that KVM supports deflection of MSR reads and
+writes to user space. It can be enabled on a VM level. If enabled, MSR
+accesses that would usually trigger a #GP by KVM into the guest will
+instead get bounced to user space through the KVM_EXIT_X86_RDMSR and
+KVM_EXIT_X86_WRMSR exit notifications.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5d4c39c37390..dd2665504dc0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -961,6 +961,9 @@ struct kvm_arch {
 	bool guest_can_read_msr_platform_info;
 	bool exception_payload_enabled;
 
+	/* Deflect RDMSR and WRMSR to user space when they trigger a #GP */
+	u32 user_space_msr_mask;
+
 	struct kvm_pmu_event_filter *pmu_event_filter;
 	struct task_struct *nx_lpage_recovery_thread;
 };
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 85111cd0adcd..0cc0db500f71 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3701,11 +3701,18 @@ static int em_dr_write(struct x86_emulate_ctxt *ctxt)
 
 static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
 {
+	u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
 	u64 msr_data;
+	int r;
 
 	msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
 		| ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
-	if (ctxt->ops->set_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), msr_data))
+	r = ctxt->ops->set_msr(ctxt, msr_index, msr_data);
+
+	if (r == X86EMUL_IO_NEEDED)
+		return r;
+
+	if (r)
 		return emulate_gp(ctxt, 0);
 
 	return X86EMUL_CONTINUE;
@@ -3713,9 +3720,16 @@ static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
 
 static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
 {
+	u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
 	u64 msr_data;
+	int r;
+
+	r = ctxt->ops->get_msr(ctxt, msr_index, &msr_data);
+
+	if (r == X86EMUL_IO_NEEDED)
+		return r;
 
-	if (ctxt->ops->get_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &msr_data))
+	if (r)
 		return emulate_gp(ctxt, 0);
 
 	*reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 43173382f02f..af6d008145cd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1590,12 +1590,89 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
 }
 EXPORT_SYMBOL_GPL(kvm_set_msr);
 
+static int complete_emulated_msr(struct kvm_vcpu *vcpu, bool is_read)
+{
+	if (vcpu->run->msr.error) {
+		kvm_inject_gp(vcpu, 0);
+		return 1;
+	} else if (is_read) {
+		kvm_rax_write(vcpu, (u32)vcpu->run->msr.data);
+		kvm_rdx_write(vcpu, vcpu->run->msr.data >> 32);
+	}
+
+	return kvm_skip_emulated_instruction(vcpu);
+}
+
+static int complete_emulated_rdmsr(struct kvm_vcpu *vcpu)
+{
+	return complete_emulated_msr(vcpu, true);
+}
+
+static int complete_emulated_wrmsr(struct kvm_vcpu *vcpu)
+{
+	return complete_emulated_msr(vcpu, false);
+}
+
+static u64 kvm_msr_reason(int r)
+{
+	switch (r) {
+	case -ENOENT:
+		return KVM_MSR_EXIT_REASON_UNKNOWN;
+	default:
+		return KVM_MSR_EXIT_REASON_INVAL;
+	}
+}
+
+static int kvm_msr_user_space(struct kvm_vcpu *vcpu, u32 index,
+			      u32 exit_reason, u64 data,
+			      int (*completion)(struct kvm_vcpu *vcpu),
+			      int r)
+{
+	u64 msr_reason = kvm_msr_reason(r);
+
+	/* Check if the user wanted to know about this MSR fault */
+	if (!(vcpu->kvm->arch.user_space_msr_mask & msr_reason))
+		return 0;
+
+	vcpu->run->exit_reason = exit_reason;
+	vcpu->run->msr.error = 0;
+	memset(vcpu->run->msr.pad, 0, sizeof(vcpu->run->msr.pad));
+	vcpu->run->msr.reason = msr_reason;
+	vcpu->run->msr.index = index;
+	vcpu->run->msr.data = data;
+	vcpu->arch.complete_userspace_io = completion;
+
+	return 1;
+}
+
+static int kvm_get_msr_user_space(struct kvm_vcpu *vcpu, u32 index, int r)
+{
+	return kvm_msr_user_space(vcpu, index, KVM_EXIT_X86_RDMSR, 0,
+				   complete_emulated_rdmsr, r);
+}
+
+static int kvm_set_msr_user_space(struct kvm_vcpu *vcpu, u32 index, u64 data, int r)
+{
+	return kvm_msr_user_space(vcpu, index, KVM_EXIT_X86_WRMSR, data,
+				   complete_emulated_wrmsr, r);
+}
+
 int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
 {
 	u32 ecx = kvm_rcx_read(vcpu);
 	u64 data;
+	int r;
+
+	r = kvm_get_msr(vcpu, ecx, &data);
 
-	if (kvm_get_msr(vcpu, ecx, &data)) {
+	/* MSR read failed? See if we should ask user space */
+	if (r && kvm_get_msr_user_space(vcpu, ecx, r)) {
+		/* Bounce to user space */
+		return 0;
+	}
+
+	/* MSR read failed? Inject a #GP */
+	if (r) {
 		trace_kvm_msr_read_ex(ecx);
 		kvm_inject_gp(vcpu, 0);
 		return 1;
@@ -1613,8 +1690,18 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
 {
 	u32 ecx = kvm_rcx_read(vcpu);
 	u64 data = kvm_read_edx_eax(vcpu);
+	int r;
 
-	if (kvm_set_msr(vcpu, ecx, data)) {
+	r = kvm_set_msr(vcpu, ecx, data);
+
+	/* MSR write failed? See if we should ask user space */
+	if (r && kvm_set_msr_user_space(vcpu, ecx, data, r)) {
+		/* Bounce to user space */
+		return 0;
+	}
+
+	/* MSR write failed? Inject a #GP */
+	if (r) {
 		trace_kvm_msr_write_ex(ecx, data);
 		kvm_inject_gp(vcpu, 0);
 		return 1;
@@ -3526,6 +3613,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_EXCEPTION_PAYLOAD:
 	case KVM_CAP_SET_GUEST_DEBUG:
 	case KVM_CAP_LAST_CPU:
+	case KVM_CAP_X86_USER_SPACE_MSR:
 		r = 1;
 		break;
 	case KVM_CAP_SYNC_REGS:
@@ -5046,6 +5134,10 @@ split_irqchip_unlock:
 		kvm->arch.exception_payload_enabled = cap->args[0];
 		r = 0;
 		break;
+	case KVM_CAP_X86_USER_SPACE_MSR:
+		kvm->arch.user_space_msr_mask = cap->args[0];
+		r = 0;
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -6378,13 +6470,33 @@ static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
 static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
 			    u32 msr_index, u64 *pdata)
 {
-	return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
+	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+	int r;
+
+	r = kvm_get_msr(vcpu, msr_index, pdata);
+
+	if (r && kvm_get_msr_user_space(vcpu, msr_index, r)) {
+		/* Bounce to user space */
+		return X86EMUL_IO_NEEDED;
+	}
+
+	return r;
 }
 
 static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
 			    u32 msr_index, u64 data)
 {
-	return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);
+	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+	int r;
+
+	r = kvm_set_msr(vcpu, msr_index, data);
+
+	if (r && kvm_set_msr_user_space(vcpu, msr_index, data, r)) {
+		/* Bounce to user space */
+		return X86EMUL_IO_NEEDED;
+	}
+
+	return r;
 }
 
 static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt)
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 9417a34aad08..26cfb0fa8e7e 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -17,7 +17,7 @@
 	ERSN(NMI), ERSN(INTERNAL_ERROR), ERSN(OSI), ERSN(PAPR_HCALL),	\
 	ERSN(S390_UCONTROL), ERSN(WATCHDOG), ERSN(S390_TSCH), ERSN(EPR),\
 	ERSN(SYSTEM_EVENT), ERSN(S390_STSI), ERSN(IOAPIC_EOI),          \
-	ERSN(HYPERV), ERSN(ARM_NISV)
+	ERSN(HYPERV), ERSN(ARM_NISV), ERSN(X86_RDMSR), ERSN(X86_WRMSR)
 
 TRACE_EVENT(kvm_userspace_exit,
 	    TP_PROTO(__u32 reason, int errno),
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 7d8eced6f459..31292a3cdfc2 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -248,6 +248,8 @@ struct kvm_hyperv_exit {
 #define KVM_EXIT_IOAPIC_EOI       26
 #define KVM_EXIT_HYPERV           27
 #define KVM_EXIT_ARM_NISV         28
+#define KVM_EXIT_X86_RDMSR        29
+#define KVM_EXIT_X86_WRMSR        30
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -413,6 +415,16 @@ struct kvm_run {
 			__u64 esr_iss;
 			__u64 fault_ipa;
 		} arm_nisv;
+		/* KVM_EXIT_X86_RDMSR / KVM_EXIT_X86_WRMSR */
+		struct {
+			__u8 error; /* user -> kernel */
+			__u8 pad[7];
+#define KVM_MSR_EXIT_REASON_INVAL	(1 << 0)
+#define KVM_MSR_EXIT_REASON_UNKNOWN	(1 << 1)
+			__u32 reason; /* kernel -> user */
+			__u32 index; /* kernel -> user */
+			__u64 data; /* kernel <-> user */
+		} msr;
 		/* Fix the size of the union. */
 		char padding[256];
 	};
@@ -1037,6 +1049,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_SMALLER_MAXPHYADDR 185
 #define KVM_CAP_S390_DIAG318 186
 #define KVM_CAP_STEAL_TIME 187
+#define KVM_CAP_X86_USER_SPACE_MSR 188
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From 1a155254ff937ac92cf9940d273ea597b2c667a2 Mon Sep 17 00:00:00 2001
From: Alexander Graf <graf@amazon.com>
Date: Fri, 25 Sep 2020 16:34:21 +0200
Subject: KVM: x86: Introduce MSR filtering

It's not desireable to have all MSRs always handled by KVM kernel space. Some
MSRs would be useful to handle in user space to either emulate behavior (like
uCode updates) or differentiate whether they are valid based on the CPU model.

To allow user space to specify which MSRs it wants to see handled by KVM,
this patch introduces a new ioctl to push filter rules with bitmaps into
KVM. Based on these bitmaps, KVM can then decide whether to reject MSR access.
With the addition of KVM_CAP_X86_USER_SPACE_MSR it can also deflect the
denied MSR events to user space to operate on.

If no filter is populated, MSR handling stays identical to before.

Signed-off-by: Alexander Graf <graf@amazon.com>

Message-Id: <20200925143422.21718-8-graf@amazon.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/api.rst  | 108 ++++++++++++++++++++++++++++++
 arch/x86/include/asm/kvm_host.h |  14 ++++
 arch/x86/include/uapi/asm/kvm.h |  18 +++++
 arch/x86/kvm/x86.c              | 145 +++++++++++++++++++++++++++++++++++++++-
 include/uapi/linux/kvm.h        |   5 ++
 5 files changed, 289 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 4fdba43d83e8..425325ff4434 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -4707,6 +4707,99 @@ KVM_PV_VM_VERIFY
   Verify the integrity of the unpacked image. Only if this succeeds,
   KVM is allowed to start protected VCPUs.
 
+4.126 KVM_X86_SET_MSR_FILTER
+----------------------------
+
+:Capability: KVM_X86_SET_MSR_FILTER
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_msr_filter
+:Returns: 0 on success, < 0 on error
+
+::
+
+  struct kvm_msr_filter_range {
+  #define KVM_MSR_FILTER_READ  (1 << 0)
+  #define KVM_MSR_FILTER_WRITE (1 << 1)
+	__u32 flags;
+	__u32 nmsrs; /* number of msrs in bitmap */
+	__u32 base;  /* MSR index the bitmap starts at */
+	__u8 *bitmap; /* a 1 bit allows the operations in flags, 0 denies */
+  };
+
+  #define KVM_MSR_FILTER_MAX_RANGES 16
+  struct kvm_msr_filter {
+  #define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0)
+  #define KVM_MSR_FILTER_DEFAULT_DENY  (1 << 0)
+	__u32 flags;
+	struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES];
+  };
+
+flags values for struct kvm_msr_filter_range:
+
+KVM_MSR_FILTER_READ
+
+  Filter read accesses to MSRs using the given bitmap. A 0 in the bitmap
+  indicates that a read should immediately fail, while a 1 indicates that
+  a read for a particular MSR should be handled regardless of the default
+  filter action.
+
+KVM_MSR_FILTER_WRITE
+
+  Filter write accesses to MSRs using the given bitmap. A 0 in the bitmap
+  indicates that a write should immediately fail, while a 1 indicates that
+  a write for a particular MSR should be handled regardless of the default
+  filter action.
+
+KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE
+
+  Filter both read and write accesses to MSRs using the given bitmap. A 0
+  in the bitmap indicates that both reads and writes should immediately fail,
+  while a 1 indicates that reads and writes for a particular MSR are not
+  filtered by this range.
+
+flags values for struct kvm_msr_filter:
+
+KVM_MSR_FILTER_DEFAULT_ALLOW
+
+  If no filter range matches an MSR index that is getting accessed, KVM will
+  fall back to allowing access to the MSR.
+
+KVM_MSR_FILTER_DEFAULT_DENY
+
+  If no filter range matches an MSR index that is getting accessed, KVM will
+  fall back to rejecting access to the MSR. In this mode, all MSRs that should
+  be processed by KVM need to explicitly be marked as allowed in the bitmaps.
+
+This ioctl allows user space to define up to 16 bitmaps of MSR ranges to
+specify whether a certain MSR access should be explicitly filtered for or not.
+
+If this ioctl has never been invoked, MSR accesses are not guarded and the
+old KVM in-kernel emulation behavior is fully preserved.
+
+As soon as the filtering is in place, every MSR access is processed through
+the filtering. If a bit is within one of the defined ranges, read and write
+accesses are guarded by the bitmap's value for the MSR index. If it is not
+defined in any range, whether MSR access is rejected is determined by the flags
+field in the kvm_msr_filter struct: KVM_MSR_FILTER_DEFAULT_ALLOW and
+KVM_MSR_FILTER_DEFAULT_DENY.
+
+Calling this ioctl with an empty set of ranges (all nmsrs == 0) disables MSR
+filtering. In that mode, KVM_MSR_FILTER_DEFAULT_DENY no longer has any effect.
+
+Each bitmap range specifies a range of MSRs to potentially allow access on.
+The range goes from MSR index [base .. base+nmsrs]. The flags field
+indicates whether reads, writes or both reads and writes are filtered
+by setting a 1 bit in the bitmap for the corresponding MSR index.
+
+If an MSR access is not permitted through the filtering, it generates a
+#GP inside the guest. When combined with KVM_CAP_X86_USER_SPACE_MSR, that
+allows user space to deflect and potentially handle various MSR accesses
+into user space.
+
+If a vCPU is in running state while this ioctl is invoked, the vCPU may
+experience inconsistent filtering behavior on MSR accesses.
+
 
 5. The kvm_run structure
 ========================
@@ -5187,6 +5280,7 @@ ENABLE_CAP. Currently valid exit reasons are:
 
 	KVM_MSR_EXIT_REASON_UNKNOWN - access to MSR that is unknown to KVM
 	KVM_MSR_EXIT_REASON_INVAL - access to invalid MSRs or reserved bits
+	KVM_MSR_EXIT_REASON_FILTER - access blocked by KVM_X86_SET_MSR_FILTER
 
 For KVM_EXIT_X86_RDMSR, the "index" field tells user space which MSR the guest
 wants to read. To respond to this request with a successful read, user space
@@ -6265,3 +6359,17 @@ writes to user space. It can be enabled on a VM level. If enabled, MSR
 accesses that would usually trigger a #GP by KVM into the guest will
 instead get bounced to user space through the KVM_EXIT_X86_RDMSR and
 KVM_EXIT_X86_WRMSR exit notifications.
+
+8.25 KVM_X86_SET_MSR_FILTER
+---------------------------
+
+:Architectures: x86
+
+This capability indicates that KVM supports that accesses to user defined MSRs
+may be rejected. With this capability exposed, KVM exports new VM ioctl
+KVM_X86_SET_MSR_FILTER which user space can call to specify bitmaps of MSR
+ranges that KVM should reject access to.
+
+In combination with KVM_CAP_X86_USER_SPACE_MSR, this allows user space to
+trap and emulate MSRs that are outside of the scope of KVM as well as
+limit the attack surface on KVM's MSR emulation code.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f4a2443219bc..dc7a58b39faf 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -87,6 +87,7 @@
 #define KVM_REQ_HV_TLB_FLUSH \
 	KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_APF_READY		KVM_ARCH_REQ(28)
+#define KVM_REQ_MSR_FILTER_CHANGED	KVM_ARCH_REQ(29)
 
 #define CR0_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -860,6 +861,13 @@ struct kvm_hv {
 	struct kvm_hv_syndbg hv_syndbg;
 };
 
+struct msr_bitmap_range {
+	u32 flags;
+	u32 nmsrs;
+	u32 base;
+	unsigned long *bitmap;
+};
+
 enum kvm_irqchip_mode {
 	KVM_IRQCHIP_NONE,
 	KVM_IRQCHIP_KERNEL,       /* created with KVM_CREATE_IRQCHIP */
@@ -964,6 +972,12 @@ struct kvm_arch {
 	/* Deflect RDMSR and WRMSR to user space when they trigger a #GP */
 	u32 user_space_msr_mask;
 
+	struct {
+		u8 count;
+		bool default_allow:1;
+		struct msr_bitmap_range ranges[16];
+	} msr_filter;
+
 	struct kvm_pmu_event_filter *pmu_event_filter;
 	struct task_struct *nx_lpage_recovery_thread;
 };
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index c2fd0aa2f587..89e5f3d1bba8 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -192,8 +192,26 @@ struct kvm_msr_list {
 	__u32 indices[0];
 };
 
+/* Maximum size of any access bitmap in bytes */
+#define KVM_MSR_FILTER_MAX_BITMAP_SIZE 0x600
+
+/* for KVM_X86_SET_MSR_FILTER */
+struct kvm_msr_filter_range {
 #define KVM_MSR_FILTER_READ  (1 << 0)
 #define KVM_MSR_FILTER_WRITE (1 << 1)
+	__u32 flags;
+	__u32 nmsrs; /* number of msrs in bitmap */
+	__u32 base;  /* MSR index the bitmap starts at */
+	__u8 *bitmap; /* a 1 bit allows the operations in flags, 0 denies */
+};
+
+#define KVM_MSR_FILTER_MAX_RANGES 16
+struct kvm_msr_filter {
+#define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0)
+#define KVM_MSR_FILTER_DEFAULT_DENY  (1 << 0)
+	__u32 flags;
+	struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES];
+};
 
 struct kvm_cpuid_entry {
 	__u32 function;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 60219882fee2..72f91f3640f3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1490,7 +1490,35 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
 
 bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type)
 {
-	return true;
+	struct kvm *kvm = vcpu->kvm;
+	struct msr_bitmap_range *ranges = kvm->arch.msr_filter.ranges;
+	u32 count = kvm->arch.msr_filter.count;
+	u32 i;
+	bool r = kvm->arch.msr_filter.default_allow;
+	int idx;
+
+	/* MSR filtering not set up, allow everything */
+	if (!count)
+		return true;
+
+	/* Prevent collision with set_msr_filter */
+	idx = srcu_read_lock(&kvm->srcu);
+
+	for (i = 0; i < count; i++) {
+		u32 start = ranges[i].base;
+		u32 end = start + ranges[i].nmsrs;
+		u32 flags = ranges[i].flags;
+		unsigned long *bitmap = ranges[i].bitmap;
+
+		if ((index >= start) && (index < end) && (flags & type)) {
+			r = !!test_bit(index - start, bitmap);
+			break;
+		}
+	}
+
+	srcu_read_unlock(&kvm->srcu, idx);
+
+	return r;
 }
 EXPORT_SYMBOL_GPL(kvm_msr_allowed);
 
@@ -1505,6 +1533,9 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
 {
 	struct msr_data msr;
 
+	if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE))
+		return -EPERM;
+
 	switch (index) {
 	case MSR_FS_BASE:
 	case MSR_GS_BASE:
@@ -1561,6 +1592,9 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
 	struct msr_data msr;
 	int ret;
 
+	if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
+		return -EPERM;
+
 	msr.index = index;
 	msr.host_initiated = host_initiated;
 
@@ -1624,6 +1658,8 @@ static u64 kvm_msr_reason(int r)
 	switch (r) {
 	case -ENOENT:
 		return KVM_MSR_EXIT_REASON_UNKNOWN;
+	case -EPERM:
+		return KVM_MSR_EXIT_REASON_FILTER;
 	default:
 		return KVM_MSR_EXIT_REASON_INVAL;
 	}
@@ -3620,6 +3656,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_SET_GUEST_DEBUG:
 	case KVM_CAP_LAST_CPU:
 	case KVM_CAP_X86_USER_SPACE_MSR:
+	case KVM_CAP_X86_MSR_FILTER:
 		r = 1;
 		break;
 	case KVM_CAP_SYNC_REGS:
@@ -5151,6 +5188,103 @@ split_irqchip_unlock:
 	return r;
 }
 
+static void kvm_clear_msr_filter(struct kvm *kvm)
+{
+	u32 i;
+	u32 count = kvm->arch.msr_filter.count;
+	struct msr_bitmap_range ranges[16];
+
+	mutex_lock(&kvm->lock);
+	kvm->arch.msr_filter.count = 0;
+	memcpy(ranges, kvm->arch.msr_filter.ranges, count * sizeof(ranges[0]));
+	mutex_unlock(&kvm->lock);
+	synchronize_srcu(&kvm->srcu);
+
+	for (i = 0; i < count; i++)
+		kfree(ranges[i].bitmap);
+}
+
+static int kvm_add_msr_filter(struct kvm *kvm, struct kvm_msr_filter_range *user_range)
+{
+	struct msr_bitmap_range *ranges = kvm->arch.msr_filter.ranges;
+	struct msr_bitmap_range range;
+	unsigned long *bitmap = NULL;
+	size_t bitmap_size;
+	int r;
+
+	if (!user_range->nmsrs)
+		return 0;
+
+	bitmap_size = BITS_TO_LONGS(user_range->nmsrs) * sizeof(long);
+	if (!bitmap_size || bitmap_size > KVM_MSR_FILTER_MAX_BITMAP_SIZE)
+		return -EINVAL;
+
+	bitmap = memdup_user((__user u8*)user_range->bitmap, bitmap_size);
+	if (IS_ERR(bitmap))
+		return PTR_ERR(bitmap);
+
+	range = (struct msr_bitmap_range) {
+		.flags = user_range->flags,
+		.base = user_range->base,
+		.nmsrs = user_range->nmsrs,
+		.bitmap = bitmap,
+	};
+
+	if (range.flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE)) {
+		r = -EINVAL;
+		goto err;
+	}
+
+	if (!range.flags) {
+		r = -EINVAL;
+		goto err;
+	}
+
+	/* Everything ok, add this range identifier to our global pool */
+	ranges[kvm->arch.msr_filter.count] = range;
+	/* Make sure we filled the array before we tell anyone to walk it */
+	smp_wmb();
+	kvm->arch.msr_filter.count++;
+
+	return 0;
+err:
+	kfree(bitmap);
+	return r;
+}
+
+static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
+{
+	struct kvm_msr_filter __user *user_msr_filter = argp;
+	struct kvm_msr_filter filter;
+	bool default_allow;
+	int r = 0;
+	u32 i;
+
+	if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
+		return -EFAULT;
+
+	kvm_clear_msr_filter(kvm);
+
+	default_allow = !(filter.flags & KVM_MSR_FILTER_DEFAULT_DENY);
+	kvm->arch.msr_filter.default_allow = default_allow;
+
+	/*
+	 * Protect from concurrent calls to this function that could trigger
+	 * a TOCTOU violation on kvm->arch.msr_filter.count.
+	 */
+	mutex_lock(&kvm->lock);
+	for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) {
+		r = kvm_add_msr_filter(kvm, &filter.ranges[i]);
+		if (r)
+			break;
+	}
+
+	kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED);
+	mutex_unlock(&kvm->lock);
+
+	return r;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
 		       unsigned int ioctl, unsigned long arg)
 {
@@ -5457,6 +5591,9 @@ set_pit2_out:
 	case KVM_SET_PMU_EVENT_FILTER:
 		r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
 		break;
+	case KVM_X86_SET_MSR_FILTER:
+		r = kvm_vm_ioctl_set_msr_filter(kvm, argp);
+		break;
 	default:
 		r = -ENOTTY;
 	}
@@ -8611,6 +8748,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			kvm_vcpu_update_apicv(vcpu);
 		if (kvm_check_request(KVM_REQ_APF_READY, vcpu))
 			kvm_check_async_pf_completion(vcpu);
+		if (kvm_check_request(KVM_REQ_MSR_FILTER_CHANGED, vcpu))
+			kvm_x86_ops.msr_filter_changed(vcpu);
 	}
 
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -10163,6 +10302,8 @@ void kvm_arch_pre_destroy_vm(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
+	u32 i;
+
 	if (current->mm == kvm->mm) {
 		/*
 		 * Free memory regions allocated on behalf of userspace,
@@ -10179,6 +10320,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	}
 	if (kvm_x86_ops.vm_destroy)
 		kvm_x86_ops.vm_destroy(kvm);
+	for (i = 0; i < kvm->arch.msr_filter.count; i++)
+		kfree(kvm->arch.msr_filter.ranges[i].bitmap);
 	kvm_pic_destroy(kvm);
 	kvm_ioapic_destroy(kvm);
 	kvm_free_vcpus(kvm);
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 31292a3cdfc2..58f43aa1fc21 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -421,6 +421,7 @@ struct kvm_run {
 			__u8 pad[7];
 #define KVM_MSR_EXIT_REASON_INVAL	(1 << 0)
 #define KVM_MSR_EXIT_REASON_UNKNOWN	(1 << 1)
+#define KVM_MSR_EXIT_REASON_FILTER	(1 << 2)
 			__u32 reason; /* kernel -> user */
 			__u32 index; /* kernel -> user */
 			__u64 data; /* kernel <-> user */
@@ -1050,6 +1051,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_DIAG318 186
 #define KVM_CAP_STEAL_TIME 187
 #define KVM_CAP_X86_USER_SPACE_MSR 188
+#define KVM_CAP_X86_MSR_FILTER 189
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1551,6 +1553,9 @@ struct kvm_pv_cmd {
 /* Available with KVM_CAP_S390_PROTECTED */
 #define KVM_S390_PV_COMMAND		_IOWR(KVMIO, 0xc5, struct kvm_pv_cmd)
 
+/* Available with KVM_CAP_X86_MSR_FILTER */
+#define KVM_X86_SET_MSR_FILTER	_IOW(KVMIO,  0xc6, struct kvm_msr_filter)
+
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {
 	/* Guest initialization commands */
-- 
cgit v1.2.3


From 58ef7c1b555e0e605da24b76cb2821dd3fcd6bc6 Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@adapt-ip.com>
Date: Mon, 21 Sep 2020 19:28:16 -0700
Subject: nl80211: include frequency offset in survey info

Recently channels gained a potential frequency offset, so
include this in the per-channel survey info.

Signed-off-by: Thomas Pedersen <thomas@adapt-ip.com>
Link: https://lore.kernel.org/r/20200922022818.15855-16-thomas@adapt-ip.com
[add the offset only if non-zero]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 2 ++
 net/wireless/nl80211.c       | 5 +++++
 2 files changed, 7 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 05db40b4c56f..1e51445f81cd 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -4097,6 +4097,7 @@ enum nl80211_user_reg_hint_type {
  *	receiving frames destined to the local BSS
  * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number
  *	currently defined
+ * @NL80211_SURVEY_INFO_FREQUENCY_OFFSET: center frequency offset in KHz
  * @__NL80211_SURVEY_INFO_AFTER_LAST: internal use
  */
 enum nl80211_survey_info {
@@ -4112,6 +4113,7 @@ enum nl80211_survey_info {
 	NL80211_SURVEY_INFO_TIME_SCAN,
 	NL80211_SURVEY_INFO_PAD,
 	NL80211_SURVEY_INFO_TIME_BSS_RX,
+	NL80211_SURVEY_INFO_FREQUENCY_OFFSET,
 
 	/* keep last */
 	__NL80211_SURVEY_INFO_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d31451db5407..aece2352a349 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -9319,6 +9319,11 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq,
 			survey->channel->center_freq))
 		goto nla_put_failure;
 
+	if (survey->channel && survey->channel->freq_offset &&
+	    nla_put_u32(msg, NL80211_SURVEY_INFO_FREQUENCY_OFFSET,
+			survey->channel->freq_offset))
+		goto nla_put_failure;
+
 	if ((survey->filled & SURVEY_INFO_NOISE_DBM) &&
 	    nla_put_u8(msg, NL80211_SURVEY_INFO_NOISE, survey->noise))
 		goto nla_put_failure;
-- 
cgit v1.2.3


From f5bec330e3010450daeb5cb6a94a4a7c54afa306 Mon Sep 17 00:00:00 2001
From: Rajkumar Manoharan <rmanohar@codeaurora.org>
Date: Mon, 28 Sep 2020 00:28:11 -0700
Subject: nl80211: extend support to config spatial reuse parameter set

Allow the user to configure below Spatial Reuse Parameter Set element.
  * Non-SRG OBSS PD Max Offset
  * SRG BSS Color Bitmap
  * SRG Partial BSSID Bitmap

Signed-off-by: Rajkumar Manoharan <rmanohar@codeaurora.org>
Link: https://lore.kernel.org/r/1601278091-20313-2-git-send-email-rmanohar@codeaurora.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h    |  7 +++++--
 include/net/cfg80211.h       | 10 ++++++++++
 include/uapi/linux/nl80211.h | 11 +++++++++++
 net/wireless/nl80211.c       | 25 +++++++++++++++++++++++++
 4 files changed, 51 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index f2f56b287aed..770408b2fdaf 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2350,8 +2350,11 @@ ieee80211_he_6ghz_oper(const struct ieee80211_he_operation *he_oper)
 }
 
 /* HE Spatial Reuse defines */
-#define IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT			0x4
-#define IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT		0x8
+#define IEEE80211_HE_SPR_PSR_DISALLOWED				BIT(0)
+#define IEEE80211_HE_SPR_NON_SRG_OBSS_PD_SR_DISALLOWED		BIT(1)
+#define IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT			BIT(2)
+#define IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT		BIT(3)
+#define IEEE80211_HE_SPR_HESIGA_SR_VAL15_ALLOWED		BIT(4)
 
 /*
  * ieee80211_he_spr_size - calculate 802.11ax HE Spatial Reuse IE size
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index bead4b9afeca..aee47f2b5709 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -269,13 +269,23 @@ struct ieee80211_rate {
  * struct ieee80211_he_obss_pd - AP settings for spatial reuse
  *
  * @enable: is the feature enabled.
+ * @sr_ctrl: The SR Control field of SRP element.
+ * @non_srg_max_offset: non-SRG maximum tx power offset
  * @min_offset: minimal tx power offset an associated station shall use
  * @max_offset: maximum tx power offset an associated station shall use
+ * @bss_color_bitmap: bitmap that indicates the BSS color values used by
+ *	members of the SRG
+ * @partial_bssid_bitmap: bitmap that indicates the partial BSSID values
+ *	used by members of the SRG
  */
 struct ieee80211_he_obss_pd {
 	bool enable;
+	u8 sr_ctrl;
+	u8 non_srg_max_offset;
 	u8 min_offset;
 	u8 max_offset;
+	u8 bss_color_bitmap[8];
+	u8 partial_bssid_bitmap[8];
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 1e51445f81cd..47700a2b9af9 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -6991,6 +6991,13 @@ enum nl80211_peer_measurement_ftm_resp {
  *
  * @NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET: the OBSS PD minimum tx power offset.
  * @NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET: the OBSS PD maximum tx power offset.
+ * @NL80211_HE_OBSS_PD_ATTR_NON_SRG_MAX_OFFSET: the non-SRG OBSS PD maximum
+ *	tx power offset.
+ * @NL80211_HE_OBSS_PD_ATTR_BSS_COLOR_BITMAP: bitmap that indicates the BSS color
+ *	values used by members of the SRG.
+ * @NL80211_HE_OBSS_PD_ATTR_PARTIAL_BSSID_BITMAP: bitmap that indicates the partial
+ *	BSSID values used by members of the SRG.
+ * @NL80211_HE_OBSS_PD_ATTR_SR_CTRL: The SR Control field of SRP element.
  *
  * @__NL80211_HE_OBSS_PD_ATTR_LAST: Internal
  * @NL80211_HE_OBSS_PD_ATTR_MAX: highest OBSS PD attribute.
@@ -7000,6 +7007,10 @@ enum nl80211_obss_pd_attributes {
 
 	NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET,
 	NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET,
+	NL80211_HE_OBSS_PD_ATTR_NON_SRG_MAX_OFFSET,
+	NL80211_HE_OBSS_PD_ATTR_BSS_COLOR_BITMAP,
+	NL80211_HE_OBSS_PD_ATTR_PARTIAL_BSSID_BITMAP,
+	NL80211_HE_OBSS_PD_ATTR_SR_CTRL,
 
 	/* keep last */
 	__NL80211_HE_OBSS_PD_ATTR_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index e501bce86436..d76b8bd0e1d1 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -329,6 +329,13 @@ he_obss_pd_policy[NL80211_HE_OBSS_PD_ATTR_MAX + 1] = {
 		NLA_POLICY_RANGE(NLA_U8, 1, 20),
 	[NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET] =
 		NLA_POLICY_RANGE(NLA_U8, 1, 20),
+	[NL80211_HE_OBSS_PD_ATTR_NON_SRG_MAX_OFFSET] =
+		NLA_POLICY_RANGE(NLA_U8, 1, 20),
+	[NL80211_HE_OBSS_PD_ATTR_BSS_COLOR_BITMAP] =
+		NLA_POLICY_EXACT_LEN(8),
+	[NL80211_HE_OBSS_PD_ATTR_PARTIAL_BSSID_BITMAP] =
+		NLA_POLICY_EXACT_LEN(8),
+	[NL80211_HE_OBSS_PD_ATTR_SR_CTRL] = { .type = NLA_U8 },
 };
 
 static const struct nla_policy
@@ -4857,16 +4864,34 @@ static int nl80211_parse_he_obss_pd(struct nlattr *attrs,
 	if (err)
 		return err;
 
+	if (!tb[NL80211_HE_OBSS_PD_ATTR_SR_CTRL])
+		return -EINVAL;
+
+	he_obss_pd->sr_ctrl = nla_get_u8(tb[NL80211_HE_OBSS_PD_ATTR_SR_CTRL]);
+
 	if (tb[NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET])
 		he_obss_pd->min_offset =
 			nla_get_u8(tb[NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET]);
 	if (tb[NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET])
 		he_obss_pd->max_offset =
 			nla_get_u8(tb[NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET]);
+	if (tb[NL80211_HE_OBSS_PD_ATTR_NON_SRG_MAX_OFFSET])
+		he_obss_pd->non_srg_max_offset =
+			nla_get_u8(tb[NL80211_HE_OBSS_PD_ATTR_NON_SRG_MAX_OFFSET]);
 
 	if (he_obss_pd->min_offset > he_obss_pd->max_offset)
 		return -EINVAL;
 
+	if (tb[NL80211_HE_OBSS_PD_ATTR_BSS_COLOR_BITMAP])
+		memcpy(he_obss_pd->bss_color_bitmap,
+		       nla_data(tb[NL80211_HE_OBSS_PD_ATTR_BSS_COLOR_BITMAP]),
+		       sizeof(he_obss_pd->bss_color_bitmap));
+
+	if (tb[NL80211_HE_OBSS_PD_ATTR_PARTIAL_BSSID_BITMAP])
+		memcpy(he_obss_pd->partial_bssid_bitmap,
+		       nla_data(tb[NL80211_HE_OBSS_PD_ATTR_PARTIAL_BSSID_BITMAP]),
+		       sizeof(he_obss_pd->partial_bssid_bitmap));
+
 	he_obss_pd->enable = true;
 
 	return 0;
-- 
cgit v1.2.3


From 1b4d60ec162f82ea29a2e7a907b5c6cc9f926321 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Fri, 25 Sep 2020 13:54:29 -0700
Subject: bpf: Enable BPF_PROG_TEST_RUN for raw_tracepoint

Add .test_run for raw_tracepoint. Also, introduce a new feature that runs
the target program on a specific CPU. This is achieved by a new flag in
bpf_attr.test, BPF_F_TEST_RUN_ON_CPU. When this flag is set, the program
is triggered on cpu with id bpf_attr.test.cpu. This feature is needed for
BPF programs that handle perf_event and other percpu resources, as the
program can access these resource locally.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200925205432.1777-2-songliubraving@fb.com
---
 include/linux/bpf.h            |  3 ++
 include/uapi/linux/bpf.h       |  7 ++++
 kernel/bpf/syscall.c           |  2 +-
 kernel/trace/bpf_trace.c       |  1 +
 net/bpf/test_run.c             | 91 ++++++++++++++++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  7 ++++
 6 files changed, 110 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 79902325bef8..db6dcdee7933 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1396,6 +1396,9 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
 int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 				     const union bpf_attr *kattr,
 				     union bpf_attr __user *uattr);
+int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
+			     const union bpf_attr *kattr,
+			     union bpf_attr __user *uattr);
 bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 		    const struct bpf_prog *prog,
 		    struct bpf_insn_access_aux *info);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2d6519a2ed77..82522f05c021 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -424,6 +424,11 @@ enum {
  */
 #define BPF_F_QUERY_EFFECTIVE	(1U << 0)
 
+/* Flags for BPF_PROG_TEST_RUN */
+
+/* If set, run the test on the cpu specified by bpf_attr.test.cpu */
+#define BPF_F_TEST_RUN_ON_CPU	(1U << 0)
+
 /* type for BPF_ENABLE_STATS */
 enum bpf_stats_type {
 	/* enabled run_time_ns and run_cnt */
@@ -566,6 +571,8 @@ union bpf_attr {
 						 */
 		__aligned_u64	ctx_in;
 		__aligned_u64	ctx_out;
+		__u32		flags;
+		__u32		cpu;
 	} test;
 
 	struct { /* anonymous struct used by BPF_*_GET_*_ID */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2740df19f55e..3bc2ed2e171b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2979,7 +2979,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
 	}
 }
 
-#define BPF_PROG_TEST_RUN_LAST_FIELD test.ctx_out
+#define BPF_PROG_TEST_RUN_LAST_FIELD test.cpu
 
 static int bpf_prog_test_run(const union bpf_attr *attr,
 			     union bpf_attr __user *uattr)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 36508f46a8db..2834866d379a 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1678,6 +1678,7 @@ const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
 };
 
 const struct bpf_prog_ops raw_tracepoint_prog_ops = {
+	.test_run = bpf_prog_test_run_raw_tp,
 };
 
 const struct bpf_verifier_ops tracing_verifier_ops = {
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index a66f211726e7..fde5db93507c 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -11,6 +11,7 @@
 #include <net/sock.h>
 #include <net/tcp.h>
 #include <linux/error-injection.h>
+#include <linux/smp.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/bpf_test_run.h>
@@ -204,6 +205,9 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
 	int b = 2, err = -EFAULT;
 	u32 retval = 0;
 
+	if (kattr->test.flags || kattr->test.cpu)
+		return -EINVAL;
+
 	switch (prog->expected_attach_type) {
 	case BPF_TRACE_FENTRY:
 	case BPF_TRACE_FEXIT:
@@ -236,6 +240,87 @@ out:
 	return err;
 }
 
+struct bpf_raw_tp_test_run_info {
+	struct bpf_prog *prog;
+	void *ctx;
+	u32 retval;
+};
+
+static void
+__bpf_prog_test_run_raw_tp(void *data)
+{
+	struct bpf_raw_tp_test_run_info *info = data;
+
+	rcu_read_lock();
+	migrate_disable();
+	info->retval = BPF_PROG_RUN(info->prog, info->ctx);
+	migrate_enable();
+	rcu_read_unlock();
+}
+
+int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
+			     const union bpf_attr *kattr,
+			     union bpf_attr __user *uattr)
+{
+	void __user *ctx_in = u64_to_user_ptr(kattr->test.ctx_in);
+	__u32 ctx_size_in = kattr->test.ctx_size_in;
+	struct bpf_raw_tp_test_run_info info;
+	int cpu = kattr->test.cpu, err = 0;
+
+	/* doesn't support data_in/out, ctx_out, duration, or repeat */
+	if (kattr->test.data_in || kattr->test.data_out ||
+	    kattr->test.ctx_out || kattr->test.duration ||
+	    kattr->test.repeat)
+		return -EINVAL;
+
+	if (ctx_size_in < prog->aux->max_ctx_offset)
+		return -EINVAL;
+
+	if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 && cpu != 0)
+		return -EINVAL;
+
+	if (ctx_size_in) {
+		info.ctx = kzalloc(ctx_size_in, GFP_USER);
+		if (!info.ctx)
+			return -ENOMEM;
+		if (copy_from_user(info.ctx, ctx_in, ctx_size_in)) {
+			err = -EFAULT;
+			goto out;
+		}
+	} else {
+		info.ctx = NULL;
+	}
+
+	info.prog = prog;
+
+	if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 ||
+	    cpu == smp_processor_id()) {
+		__bpf_prog_test_run_raw_tp(&info);
+	} else {
+		/* smp_call_function_single() also checks cpu_online()
+		 * after csd_lock(). However, since cpu is from user
+		 * space, let's do an extra quick check to filter out
+		 * invalid value before smp_call_function_single().
+		 */
+		if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
+			err = -ENXIO;
+			goto out;
+		}
+
+		err = smp_call_function_single(cpu, __bpf_prog_test_run_raw_tp,
+					       &info, 1);
+		if (err)
+			goto out;
+	}
+
+	if (copy_to_user(&uattr->test.retval, &info.retval, sizeof(u32)))
+		err = -EFAULT;
+
+out:
+	kfree(info.ctx);
+	return err;
+}
+
 static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size)
 {
 	void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in);
@@ -410,6 +495,9 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
 	void *data;
 	int ret;
 
+	if (kattr->test.flags || kattr->test.cpu)
+		return -EINVAL;
+
 	data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN,
 			     SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
 	if (IS_ERR(data))
@@ -607,6 +695,9 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 	if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
 		return -EINVAL;
 
+	if (kattr->test.flags || kattr->test.cpu)
+		return -EINVAL;
+
 	if (size < ETH_HLEN)
 		return -EINVAL;
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 2d6519a2ed77..82522f05c021 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -424,6 +424,11 @@ enum {
  */
 #define BPF_F_QUERY_EFFECTIVE	(1U << 0)
 
+/* Flags for BPF_PROG_TEST_RUN */
+
+/* If set, run the test on the cpu specified by bpf_attr.test.cpu */
+#define BPF_F_TEST_RUN_ON_CPU	(1U << 0)
+
 /* type for BPF_ENABLE_STATS */
 enum bpf_stats_type {
 	/* enabled run_time_ns and run_cnt */
@@ -566,6 +571,8 @@ union bpf_attr {
 						 */
 		__aligned_u64	ctx_in;
 		__aligned_u64	ctx_out;
+		__u32		flags;
+		__u32		cpu;
 	} test;
 
 	struct { /* anonymous struct used by BPF_*_GET_*_ID */
-- 
cgit v1.2.3


From c4d0bfb45068d853a478b9067a95969b1886a30f Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Mon, 28 Sep 2020 12:31:05 +0100
Subject: bpf: Add bpf_snprintf_btf helper

A helper is added to support tracing kernel type information in BPF
using the BPF Type Format (BTF).  Its signature is

long bpf_snprintf_btf(char *str, u32 str_size, struct btf_ptr *ptr,
		      u32 btf_ptr_size, u64 flags);

struct btf_ptr * specifies

- a pointer to the data to be traced
- the BTF id of the type of data pointed to
- a flags field is provided for future use; these flags
  are not to be confused with the BTF_F_* flags
  below that control how the btf_ptr is displayed; the
  flags member of the struct btf_ptr may be used to
  disambiguate types in kernel versus module BTF, etc;
  the main distinction is the flags relate to the type
  and information needed in identifying it; not how it
  is displayed.

For example a BPF program with a struct sk_buff *skb
could do the following:

	static struct btf_ptr b = { };

	b.ptr = skb;
	b.type_id = __builtin_btf_type_id(struct sk_buff, 1);
	bpf_snprintf_btf(str, sizeof(str), &b, sizeof(b), 0, 0);

Default output looks like this:

(struct sk_buff){
 .transport_header = (__u16)65535,
 .mac_header = (__u16)65535,
 .end = (sk_buff_data_t)192,
 .head = (unsigned char *)0x000000007524fd8b,
 .data = (unsigned char *)0x000000007524fd8b,
 .truesize = (unsigned int)768,
 .users = (refcount_t){
  .refs = (atomic_t){
   .counter = (int)1,
  },
 },
}

Flags modifying display are as follows:

- BTF_F_COMPACT:	no formatting around type information
- BTF_F_NONAME:		no struct/union member names/types
- BTF_F_PTR_RAW:	show raw (unobfuscated) pointer values;
			equivalent to %px.
- BTF_F_ZERO:		show zero-valued struct/union members;
			they are not displayed by default

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/1601292670-1616-4-git-send-email-alan.maguire@oracle.com
---
 include/linux/bpf.h            |  1 +
 include/linux/btf.h            |  9 +++---
 include/uapi/linux/bpf.h       | 67 ++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/core.c              |  1 +
 kernel/bpf/helpers.c           |  4 +++
 kernel/trace/bpf_trace.c       | 65 ++++++++++++++++++++++++++++++++++++++++
 scripts/bpf_helpers_doc.py     |  2 ++
 tools/include/uapi/linux/bpf.h | 67 ++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 212 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e620a4b1290f..768b533ba48e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1822,6 +1822,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
 extern const struct bpf_func_proto bpf_copy_from_user_proto;
+extern const struct bpf_func_proto bpf_snprintf_btf_proto;
 
 const struct bpf_func_proto *bpf_tracing_func_proto(
 	enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/linux/btf.h b/include/linux/btf.h
index d0f5d3c9ec3d..3e5cdc2ba963 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -6,6 +6,7 @@
 
 #include <linux/types.h>
 #include <uapi/linux/btf.h>
+#include <uapi/linux/bpf.h>
 
 #define BTF_TYPE_EMIT(type) ((void)(type *)0)
 
@@ -59,10 +60,10 @@ const struct btf_type *btf_type_id_size(const struct btf *btf,
  *	- BTF_SHOW_UNSAFE: skip use of bpf_probe_read() to safely read
  *	  data before displaying it.
  */
-#define BTF_SHOW_COMPACT	(1ULL << 0)
-#define BTF_SHOW_NONAME		(1ULL << 1)
-#define BTF_SHOW_PTR_RAW	(1ULL << 2)
-#define BTF_SHOW_ZERO		(1ULL << 3)
+#define BTF_SHOW_COMPACT	BTF_F_COMPACT
+#define BTF_SHOW_NONAME		BTF_F_NONAME
+#define BTF_SHOW_PTR_RAW	BTF_F_PTR_RAW
+#define BTF_SHOW_ZERO		BTF_F_ZERO
 #define BTF_SHOW_UNSAFE		(1ULL << 4)
 
 void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 82522f05c021..cca9eb1b13e5 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3594,6 +3594,42 @@ union bpf_attr {
  * 		the data in *dst*. This is a wrapper of **copy_from_user**\ ().
  * 	Return
  * 		0 on success, or a negative error in case of failure.
+ *
+ * long bpf_snprintf_btf(char *str, u32 str_size, struct btf_ptr *ptr, u32 btf_ptr_size, u64 flags)
+ *	Description
+ *		Use BTF to store a string representation of *ptr*->ptr in *str*,
+ *		using *ptr*->type_id.  This value should specify the type
+ *		that *ptr*->ptr points to. LLVM __builtin_btf_type_id(type, 1)
+ *		can be used to look up vmlinux BTF type ids. Traversing the
+ *		data structure using BTF, the type information and values are
+ *		stored in the first *str_size* - 1 bytes of *str*.  Safe copy of
+ *		the pointer data is carried out to avoid kernel crashes during
+ *		operation.  Smaller types can use string space on the stack;
+ *		larger programs can use map data to store the string
+ *		representation.
+ *
+ *		The string can be subsequently shared with userspace via
+ *		bpf_perf_event_output() or ring buffer interfaces.
+ *		bpf_trace_printk() is to be avoided as it places too small
+ *		a limit on string size to be useful.
+ *
+ *		*flags* is a combination of
+ *
+ *		**BTF_F_COMPACT**
+ *			no formatting around type information
+ *		**BTF_F_NONAME**
+ *			no struct/union member names/types
+ *		**BTF_F_PTR_RAW**
+ *			show raw (unobfuscated) pointer values;
+ *			equivalent to printk specifier %px.
+ *		**BTF_F_ZERO**
+ *			show zero-valued struct/union members; they
+ *			are not displayed by default
+ *
+ *	Return
+ *		The number of bytes that were written (or would have been
+ *		written if output had to be truncated due to string size),
+ *		or a negative error in cases of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3745,6 +3781,7 @@ union bpf_attr {
 	FN(inode_storage_delete),	\
 	FN(d_path),			\
 	FN(copy_from_user),		\
+	FN(snprintf_btf),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4853,4 +4890,34 @@ struct bpf_sk_lookup {
 	__u32 local_port;	/* Host byte order */
 };
 
+/*
+ * struct btf_ptr is used for typed pointer representation; the
+ * type id is used to render the pointer data as the appropriate type
+ * via the bpf_snprintf_btf() helper described above.  A flags field -
+ * potentially to specify additional details about the BTF pointer
+ * (rather than its mode of display) - is included for future use.
+ * Display flags - BTF_F_* - are passed to bpf_snprintf_btf separately.
+ */
+struct btf_ptr {
+	void *ptr;
+	__u32 type_id;
+	__u32 flags;		/* BTF ptr flags; unused at present. */
+};
+
+/*
+ * Flags to control bpf_snprintf_btf() behaviour.
+ *     - BTF_F_COMPACT: no formatting around type information
+ *     - BTF_F_NONAME: no struct/union member names/types
+ *     - BTF_F_PTR_RAW: show raw (unobfuscated) pointer values;
+ *       equivalent to %px.
+ *     - BTF_F_ZERO: show zero-valued struct/union members; they
+ *       are not displayed by default
+ */
+enum {
+	BTF_F_COMPACT	=	(1ULL << 0),
+	BTF_F_NONAME	=	(1ULL << 1),
+	BTF_F_PTR_RAW	=	(1ULL << 2),
+	BTF_F_ZERO	=	(1ULL << 3),
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index c4811b139caa..403fb2341a86 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2216,6 +2216,7 @@ const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
 const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto __weak;
 const struct bpf_func_proto bpf_get_local_storage_proto __weak;
 const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak;
+const struct bpf_func_proto bpf_snprintf_btf_proto __weak;
 
 const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
 {
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 5cc7425ee476..e825441781ab 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -683,6 +683,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		if (!perfmon_capable())
 			return NULL;
 		return bpf_get_trace_printk_proto();
+	case BPF_FUNC_snprintf_btf:
+		if (!perfmon_capable())
+			return NULL;
+		return &bpf_snprintf_btf_proto;
 	case BPF_FUNC_jiffies64:
 		return &bpf_jiffies64_proto;
 	default:
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 2834866d379a..140e1be9dab6 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -7,6 +7,7 @@
 #include <linux/slab.h>
 #include <linux/bpf.h>
 #include <linux/bpf_perf_event.h>
+#include <linux/btf.h>
 #include <linux/filter.h>
 #include <linux/uaccess.h>
 #include <linux/ctype.h>
@@ -16,6 +17,9 @@
 #include <linux/error-injection.h>
 #include <linux/btf_ids.h>
 
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/btf.h>
+
 #include <asm/tlb.h>
 
 #include "trace_probe.h"
@@ -1147,6 +1151,65 @@ static const struct bpf_func_proto bpf_d_path_proto = {
 	.allowed	= bpf_d_path_allowed,
 };
 
+#define BTF_F_ALL	(BTF_F_COMPACT  | BTF_F_NONAME | \
+			 BTF_F_PTR_RAW | BTF_F_ZERO)
+
+static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
+				  u64 flags, const struct btf **btf,
+				  s32 *btf_id)
+{
+	const struct btf_type *t;
+
+	if (unlikely(flags & ~(BTF_F_ALL)))
+		return -EINVAL;
+
+	if (btf_ptr_size != sizeof(struct btf_ptr))
+		return -EINVAL;
+
+	*btf = bpf_get_btf_vmlinux();
+
+	if (IS_ERR_OR_NULL(*btf))
+		return PTR_ERR(*btf);
+
+	if (ptr->type_id > 0)
+		*btf_id = ptr->type_id;
+	else
+		return -EINVAL;
+
+	if (*btf_id > 0)
+		t = btf_type_by_id(*btf, *btf_id);
+	if (*btf_id <= 0 || !t)
+		return -ENOENT;
+
+	return 0;
+}
+
+BPF_CALL_5(bpf_snprintf_btf, char *, str, u32, str_size, struct btf_ptr *, ptr,
+	   u32, btf_ptr_size, u64, flags)
+{
+	const struct btf *btf;
+	s32 btf_id;
+	int ret;
+
+	ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id);
+	if (ret)
+		return ret;
+
+	return btf_type_snprintf_show(btf, btf_id, ptr->ptr, str, str_size,
+				      flags);
+}
+
+const struct bpf_func_proto bpf_snprintf_btf_proto = {
+	.func		= bpf_snprintf_btf,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_MEM,
+	.arg2_type	= ARG_CONST_SIZE,
+	.arg3_type	= ARG_PTR_TO_MEM,
+	.arg4_type	= ARG_CONST_SIZE,
+	.arg5_type	= ARG_ANYTHING,
+};
+
 const struct bpf_func_proto *
 bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -1233,6 +1296,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_get_task_stack_proto;
 	case BPF_FUNC_copy_from_user:
 		return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
+	case BPF_FUNC_snprintf_btf:
+		return &bpf_snprintf_btf_proto;
 	default:
 		return NULL;
 	}
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index 08388173973f..7d86fdd190be 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -433,6 +433,7 @@ class PrinterHelpers(Printer):
             'struct sk_msg_md',
             'struct xdp_md',
             'struct path',
+            'struct btf_ptr',
     ]
     known_types = {
             '...',
@@ -474,6 +475,7 @@ class PrinterHelpers(Printer):
             'struct udp6_sock',
             'struct task_struct',
             'struct path',
+            'struct btf_ptr',
     }
     mapped_types = {
             'u8': '__u8',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 82522f05c021..cca9eb1b13e5 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3594,6 +3594,42 @@ union bpf_attr {
  * 		the data in *dst*. This is a wrapper of **copy_from_user**\ ().
  * 	Return
  * 		0 on success, or a negative error in case of failure.
+ *
+ * long bpf_snprintf_btf(char *str, u32 str_size, struct btf_ptr *ptr, u32 btf_ptr_size, u64 flags)
+ *	Description
+ *		Use BTF to store a string representation of *ptr*->ptr in *str*,
+ *		using *ptr*->type_id.  This value should specify the type
+ *		that *ptr*->ptr points to. LLVM __builtin_btf_type_id(type, 1)
+ *		can be used to look up vmlinux BTF type ids. Traversing the
+ *		data structure using BTF, the type information and values are
+ *		stored in the first *str_size* - 1 bytes of *str*.  Safe copy of
+ *		the pointer data is carried out to avoid kernel crashes during
+ *		operation.  Smaller types can use string space on the stack;
+ *		larger programs can use map data to store the string
+ *		representation.
+ *
+ *		The string can be subsequently shared with userspace via
+ *		bpf_perf_event_output() or ring buffer interfaces.
+ *		bpf_trace_printk() is to be avoided as it places too small
+ *		a limit on string size to be useful.
+ *
+ *		*flags* is a combination of
+ *
+ *		**BTF_F_COMPACT**
+ *			no formatting around type information
+ *		**BTF_F_NONAME**
+ *			no struct/union member names/types
+ *		**BTF_F_PTR_RAW**
+ *			show raw (unobfuscated) pointer values;
+ *			equivalent to printk specifier %px.
+ *		**BTF_F_ZERO**
+ *			show zero-valued struct/union members; they
+ *			are not displayed by default
+ *
+ *	Return
+ *		The number of bytes that were written (or would have been
+ *		written if output had to be truncated due to string size),
+ *		or a negative error in cases of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3745,6 +3781,7 @@ union bpf_attr {
 	FN(inode_storage_delete),	\
 	FN(d_path),			\
 	FN(copy_from_user),		\
+	FN(snprintf_btf),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4853,4 +4890,34 @@ struct bpf_sk_lookup {
 	__u32 local_port;	/* Host byte order */
 };
 
+/*
+ * struct btf_ptr is used for typed pointer representation; the
+ * type id is used to render the pointer data as the appropriate type
+ * via the bpf_snprintf_btf() helper described above.  A flags field -
+ * potentially to specify additional details about the BTF pointer
+ * (rather than its mode of display) - is included for future use.
+ * Display flags - BTF_F_* - are passed to bpf_snprintf_btf separately.
+ */
+struct btf_ptr {
+	void *ptr;
+	__u32 type_id;
+	__u32 flags;		/* BTF ptr flags; unused at present. */
+};
+
+/*
+ * Flags to control bpf_snprintf_btf() behaviour.
+ *     - BTF_F_COMPACT: no formatting around type information
+ *     - BTF_F_NONAME: no struct/union member names/types
+ *     - BTF_F_PTR_RAW: show raw (unobfuscated) pointer values;
+ *       equivalent to %px.
+ *     - BTF_F_ZERO: show zero-valued struct/union members; they
+ *       are not displayed by default
+ */
+enum {
+	BTF_F_COMPACT	=	(1ULL << 0),
+	BTF_F_NONAME	=	(1ULL << 1),
+	BTF_F_PTR_RAW	=	(1ULL << 2),
+	BTF_F_ZERO	=	(1ULL << 3),
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
-- 
cgit v1.2.3


From eb411377aed9e27835e77ee0710ee8f4649958f3 Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Mon, 28 Sep 2020 12:31:09 +0100
Subject: bpf: Add bpf_seq_printf_btf helper

A helper is added to allow seq file writing of kernel data
structures using vmlinux BTF.  Its signature is

long bpf_seq_printf_btf(struct seq_file *m, struct btf_ptr *ptr,
                        u32 btf_ptr_size, u64 flags);

Flags and struct btf_ptr definitions/use are identical to the
bpf_snprintf_btf helper, and the helper returns 0 on success
or a negative error value.

Suggested-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/1601292670-1616-8-git-send-email-alan.maguire@oracle.com
---
 include/linux/btf.h            |  2 ++
 include/uapi/linux/bpf.h       |  9 +++++++++
 kernel/bpf/btf.c               |  4 ++--
 kernel/bpf/core.c              |  1 +
 kernel/trace/bpf_trace.c       | 33 +++++++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  9 +++++++++
 6 files changed, 56 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/btf.h b/include/linux/btf.h
index 3e5cdc2ba963..024e16ff7dcc 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -68,6 +68,8 @@ const struct btf_type *btf_type_id_size(const struct btf *btf,
 
 void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
 		       struct seq_file *m);
+int btf_type_seq_show_flags(const struct btf *btf, u32 type_id, void *obj,
+			    struct seq_file *m, u64 flags);
 
 /*
  * Copy len bytes of string representation of obj of BTF type_id into buf.
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index cca9eb1b13e5..96ddb00b91dc 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3630,6 +3630,14 @@ union bpf_attr {
  *		The number of bytes that were written (or would have been
  *		written if output had to be truncated due to string size),
  *		or a negative error in cases of failure.
+ *
+ * long bpf_seq_printf_btf(struct seq_file *m, struct btf_ptr *ptr, u32 ptr_size, u64 flags)
+ *	Description
+ *		Use BTF to write to seq_write a string representation of
+ *		*ptr*->ptr, using *ptr*->type_id as per bpf_snprintf_btf().
+ *		*flags* are identical to those used for bpf_snprintf_btf.
+ *	Return
+ *		0 on success or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3782,6 +3790,7 @@ union bpf_attr {
 	FN(d_path),			\
 	FN(copy_from_user),		\
 	FN(snprintf_btf),		\
+	FN(seq_printf_btf),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index dcdd7109aa29..498e5e553825 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -5346,8 +5346,8 @@ static void btf_seq_show(struct btf_show *show, const char *fmt,
 	seq_vprintf((struct seq_file *)show->target, fmt, args);
 }
 
-static int btf_type_seq_show_flags(const struct btf *btf, u32 type_id,
-				   void *obj, struct seq_file *m, u64 flags)
+int btf_type_seq_show_flags(const struct btf *btf, u32 type_id,
+			    void *obj, struct seq_file *m, u64 flags)
 {
 	struct btf_show sseq;
 
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 403fb2341a86..c4ba45fa4fe1 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2217,6 +2217,7 @@ const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto __weak;
 const struct bpf_func_proto bpf_get_local_storage_proto __weak;
 const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak;
 const struct bpf_func_proto bpf_snprintf_btf_proto __weak;
+const struct bpf_func_proto bpf_seq_printf_btf_proto __weak;
 
 const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
 {
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 140e1be9dab6..e118a83439c3 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -71,6 +71,10 @@ static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name)
 u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 
+static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
+				  u64 flags, const struct btf **btf,
+				  s32 *btf_id);
+
 /**
  * trace_call_bpf - invoke BPF program
  * @call: tracepoint event
@@ -776,6 +780,31 @@ static const struct bpf_func_proto bpf_seq_write_proto = {
 	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
 };
 
+BPF_CALL_4(bpf_seq_printf_btf, struct seq_file *, m, struct btf_ptr *, ptr,
+	   u32, btf_ptr_size, u64, flags)
+{
+	const struct btf *btf;
+	s32 btf_id;
+	int ret;
+
+	ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id);
+	if (ret)
+		return ret;
+
+	return btf_type_seq_show_flags(btf, btf_id, ptr->ptr, m, flags);
+}
+
+static const struct bpf_func_proto bpf_seq_printf_btf_proto = {
+	.func		= bpf_seq_printf_btf,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_BTF_ID,
+	.arg1_btf_id	= &btf_seq_file_ids[0],
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
+	.arg4_type	= ARG_ANYTHING,
+};
+
 static __always_inline int
 get_map_perf_counter(struct bpf_map *map, u64 flags,
 		     u64 *value, u64 *enabled, u64 *running)
@@ -1695,6 +1724,10 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return prog->expected_attach_type == BPF_TRACE_ITER ?
 		       &bpf_seq_write_proto :
 		       NULL;
+	case BPF_FUNC_seq_printf_btf:
+		return prog->expected_attach_type == BPF_TRACE_ITER ?
+		       &bpf_seq_printf_btf_proto :
+		       NULL;
 	case BPF_FUNC_d_path:
 		return &bpf_d_path_proto;
 	default:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index cca9eb1b13e5..96ddb00b91dc 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3630,6 +3630,14 @@ union bpf_attr {
  *		The number of bytes that were written (or would have been
  *		written if output had to be truncated due to string size),
  *		or a negative error in cases of failure.
+ *
+ * long bpf_seq_printf_btf(struct seq_file *m, struct btf_ptr *ptr, u32 ptr_size, u64 flags)
+ *	Description
+ *		Use BTF to write to seq_write a string representation of
+ *		*ptr*->ptr, using *ptr*->type_id as per bpf_snprintf_btf().
+ *		*flags* are identical to those used for bpf_snprintf_btf.
+ *	Return
+ *		0 on success or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3782,6 +3790,7 @@ union bpf_attr {
 	FN(d_path),			\
 	FN(copy_from_user),		\
 	FN(snprintf_btf),		\
+	FN(seq_printf_btf),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From ff886cbdcc44334b5f876721cf65428aa955948b Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Wed, 23 Sep 2020 17:31:58 -0700
Subject: virtio-gpu api: blob resources

A blob resource is a container for:
   - VIRTIO_GPU_BLOB_MEM_GUEST: a guest memory allocation
     (referred to as a "guest-only blob resource")

   - VIRTIO_GPU_BLOB_MEM_HOST3D: a host3d memory allocation
     (referred to as a "host-only blob resource")

   - VIRTIO_GPU_BLOB_MEM_HOST3D_GUEST: a guest + host3d memory allocation
     (referred to as a "default blob resource").

The memory properties of the blob resource must be described by
`blob_mem`.

For default and guest only blob resources set, `nents` guest system
pages are assigned to the resource.  For default blob resources,
these guest pages are used for transfer operations. Attach/detach is
also possible to allow swap-in/swap-out, but isn't required since it
may not be applicable to future blob mem types
(shared guest/guest vram).

Host allocations depend on whether the 3D is supported. If 3D is not
supported, the only valid field for `blob_mem` is
VIRTIO_GPU_BLOB_MEM_GUEST.

If 3D is supported, the virtio-gpu resource is created from the
context local object identified by the `blob_id`. The actual host
allocation done by the CMD_SUBMIT_3D.

Userspace must specify if the blob resource is intended to be used
for userspace mapping, sharing between virtio-gpu contexts and/or
sharing between virtio devices. This is done via `blob_flags`.

For 3D hosts, both VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D and
VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D may be used to update
the host resource. There is no restriction on the image/buffer
view the guest/host userspace has on the blob resource.

VIRTIO_GPU_CMD_SET_SCANOUT_BLOB / VIRTIO_GPU_CMD_RESOURCE_FLUSH may
be used with blob resources as well.  The modifier is intentionally
left out of SCANOUT_BLOB, and auxilary blobs are also left out
as a simplification.

The use case for blob resources is zero-copy, needed for coherent
memory in virglrenderer. Host only blob resources are not mappable
without the feature described in the next patch, but are shareable.

Future work:
   - Emulated coherent `blob_mem` type for QEMU/vhost-user
   - A `blob_mem` type for guest-only resources imported in
     cache-coherent FOSS GPU/display drivers.
   - Display integration involving the blob model using seamless
     Wayland windows.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Acked-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Acked-by: Chia-I Wu <olvaffe@gmail.com>
Acked-by: Lingfeng Yang <lfy@google.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20200924003214.662-3-gurchetansingh@chromium.org
Co-developed-by: Gurchetan Singh <gurchetansingh@chromium.org>
Signed-off-by: Gurchetan Singh <gurchetansingh@chromium.org>
---
 include/uapi/linux/virtio_gpu.h | 43 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h
index 747a5c5cc4e6..4ddf2fe342ed 100644
--- a/include/uapi/linux/virtio_gpu.h
+++ b/include/uapi/linux/virtio_gpu.h
@@ -55,6 +55,11 @@
  */
 #define VIRTIO_GPU_F_RESOURCE_UUID       2
 
+/*
+ * VIRTIO_GPU_CMD_RESOURCE_CREATE_BLOB
+ */
+#define VIRTIO_GPU_F_RESOURCE_BLOB       3
+
 enum virtio_gpu_ctrl_type {
 	VIRTIO_GPU_UNDEFINED = 0,
 
@@ -71,6 +76,8 @@ enum virtio_gpu_ctrl_type {
 	VIRTIO_GPU_CMD_GET_CAPSET,
 	VIRTIO_GPU_CMD_GET_EDID,
 	VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID,
+	VIRTIO_GPU_CMD_RESOURCE_CREATE_BLOB,
+	VIRTIO_GPU_CMD_SET_SCANOUT_BLOB,
 
 	/* 3d commands */
 	VIRTIO_GPU_CMD_CTX_CREATE = 0x0200,
@@ -359,4 +366,40 @@ struct virtio_gpu_resp_resource_uuid {
 	__u8 uuid[16];
 };
 
+/* VIRTIO_GPU_CMD_RESOURCE_CREATE_BLOB */
+struct virtio_gpu_resource_create_blob {
+	struct virtio_gpu_ctrl_hdr hdr;
+	__le32 resource_id;
+#define VIRTIO_GPU_BLOB_MEM_GUEST             0x0001
+#define VIRTIO_GPU_BLOB_MEM_HOST3D            0x0002
+#define VIRTIO_GPU_BLOB_MEM_HOST3D_GUEST      0x0003
+
+#define VIRTIO_GPU_BLOB_FLAG_USE_MAPPABLE     0x0001
+#define VIRTIO_GPU_BLOB_FLAG_USE_SHAREABLE    0x0002
+#define VIRTIO_GPU_BLOB_FLAG_USE_CROSS_DEVICE 0x0004
+	/* zero is invalid blob mem */
+	__le32 blob_mem;
+	__le32 blob_flags;
+	__le64 blob_id;
+	__le64 size;
+	__le32 nr_entries;
+	/*
+	 * sizeof(nr_entries * virtio_gpu_mem_entry) bytes follow
+	 */
+};
+
+/* VIRTIO_GPU_CMD_SET_SCANOUT_BLOB */
+struct virtio_gpu_set_scanout_blob {
+	struct virtio_gpu_ctrl_hdr hdr;
+	struct virtio_gpu_rect r;
+	__le32 scanout_id;
+	__le32 resource_id;
+	__le32 width;
+	__le32 height;
+	__le32 format;
+	__le32 padding;
+	__le32 strides[4];
+	__le32 offsets[4];
+};
+
 #endif
-- 
cgit v1.2.3


From e5cd6cbe025570f6135f48538cb450254f8fbdd3 Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Wed, 23 Sep 2020 17:31:59 -0700
Subject: virtio-gpu api: host visible feature

This patch adds a new virtgpu feature that allows directly
mapping host allocated resources.

This is based on virtio shared memory regions, which allows
querying for memory regions using PCI transport. Each shared
memory region has an associated "shmid", the meaning of which
is device specific.

For virtio-gpu, we can define the shared memory region with id
VIRTIO_GPU_SHM_ID_HOST_VISIBLE to be the "host visible memory
region".

The presence of the host visible memory region means the following
hypercalls are supported:

1) VIRTIO_GPU_CMD_RESOURCE_MAP_BLOB

This hypercall tells the host to inject the host resource's
mapping in an offset into virtio-gpu's PCI address space.
This is typically done via KVM_SET_USER_MEMORY_REGION on Linux
hosts.

On success, VIRTIO_GPU_RESP_OK_MAP_INFO is returned, which
specifies the host buffer's caching type and possibly in the
future performance hints about the buffer..

2) VIRTIO_GPU_CMD_RESOURCE_UNMAP_BLOB

This hypercall tells the host to remove the host resource's
mapping from the guest VM.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Acked-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Acked-by: Lingfeng Yang <lfy@google.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20200924003214.662-4-gurchetansingh@chromium.org
Co-developed-by: Gurchetan Singh <gurchetansingh@chromium.org>
Signed-off-by: Gurchetan Singh <gurchetansingh@chromium.org>
---
 include/uapi/linux/virtio_gpu.h | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h
index 4ddf2fe342ed..fa2ae4a1da5f 100644
--- a/include/uapi/linux/virtio_gpu.h
+++ b/include/uapi/linux/virtio_gpu.h
@@ -88,6 +88,8 @@ enum virtio_gpu_ctrl_type {
 	VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D,
 	VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D,
 	VIRTIO_GPU_CMD_SUBMIT_3D,
+	VIRTIO_GPU_CMD_RESOURCE_MAP_BLOB,
+	VIRTIO_GPU_CMD_RESOURCE_UNMAP_BLOB,
 
 	/* cursor commands */
 	VIRTIO_GPU_CMD_UPDATE_CURSOR = 0x0300,
@@ -100,6 +102,7 @@ enum virtio_gpu_ctrl_type {
 	VIRTIO_GPU_RESP_OK_CAPSET,
 	VIRTIO_GPU_RESP_OK_EDID,
 	VIRTIO_GPU_RESP_OK_RESOURCE_UUID,
+	VIRTIO_GPU_RESP_OK_MAP_INFO,
 
 	/* error responses */
 	VIRTIO_GPU_RESP_ERR_UNSPEC = 0x1200,
@@ -110,6 +113,11 @@ enum virtio_gpu_ctrl_type {
 	VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER,
 };
 
+enum virtio_gpu_shm_id {
+	VIRTIO_GPU_SHM_ID_UNDEFINED = 0,
+	VIRTIO_GPU_SHM_ID_HOST_VISIBLE = 1
+};
+
 #define VIRTIO_GPU_FLAG_FENCE (1 << 0)
 
 struct virtio_gpu_ctrl_hdr {
@@ -402,4 +410,31 @@ struct virtio_gpu_set_scanout_blob {
 	__le32 offsets[4];
 };
 
+/* VIRTIO_GPU_CMD_RESOURCE_MAP_BLOB */
+struct virtio_gpu_resource_map_blob {
+	struct virtio_gpu_ctrl_hdr hdr;
+	__le32 resource_id;
+	__le32 padding;
+	__le64 offset;
+};
+
+/* VIRTIO_GPU_RESP_OK_MAP_INFO */
+#define VIRTIO_GPU_MAP_CACHE_MASK     0x0f
+#define VIRTIO_GPU_MAP_CACHE_NONE     0x00
+#define VIRTIO_GPU_MAP_CACHE_CACHED   0x01
+#define VIRTIO_GPU_MAP_CACHE_UNCACHED 0x02
+#define VIRTIO_GPU_MAP_CACHE_WC       0x03
+struct virtio_gpu_resp_map_info {
+	struct virtio_gpu_ctrl_hdr hdr;
+	__u32 map_info;
+	__u32 padding;
+};
+
+/* VIRTIO_GPU_CMD_RESOURCE_UNMAP_BLOB */
+struct virtio_gpu_resource_unmap_blob {
+	struct virtio_gpu_ctrl_hdr hdr;
+	__le32 resource_id;
+	__le32 padding;
+};
+
 #endif
-- 
cgit v1.2.3


From eda3e1018f786f8f3db93001fb2da757b193ed1a Mon Sep 17 00:00:00 2001
From: Gurchetan Singh <gurchetansingh@chromium.org>
Date: Wed, 23 Sep 2020 17:32:00 -0700
Subject: drm/virtgpu api: blob resources

This makes blob resources available to guest userspace. They are needed
for GL4.5, Vulkan and zero-copy virtio-gpu.

For Mesa, blob resources have been tested with Piglit's ARB_buffer_storage
tests and apitraces.  Apitraces of GL4.5 games show we're between 70%
to 80% of host performance on Iris, based on a apitrace of a 2013 GL4.5
game:

11.204 FPS (guest)
15.947 FPS (host)

This is still better than the status quo, when said game was unplayable
with Virgl due to an inefficient GL4.3 fallback.  But there's still room
for improvement if we want to match HW-assisted virtualization.

For Vulkan, blob resources have been tested with dEQP.vk.memory* and
running Vulkan applications in production with the "Cuttlefish" virtual
Android device.  This has been done with Lingfeng Yang's "gfxstream"
Vulkan implementation, which virtualizes Vulkan across many Google
products.

Signed-off-by: Gurchetan Singh <gurchetansingh@chromium.org>
Acked-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Acked-by: Chia-I Wu <olvaffe@gmail.com>
Acked-by: Lingfeng Yang <lfy@google.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20200924003214.662-5-gurchetansingh@chromium.org
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 include/uapi/drm/virtgpu_drm.h | 37 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h
index f06a789f34cd..bb224f604c9e 100644
--- a/include/uapi/drm/virtgpu_drm.h
+++ b/include/uapi/drm/virtgpu_drm.h
@@ -46,6 +46,7 @@ extern "C" {
 #define DRM_VIRTGPU_TRANSFER_TO_HOST 0x07
 #define DRM_VIRTGPU_WAIT     0x08
 #define DRM_VIRTGPU_GET_CAPS  0x09
+#define DRM_VIRTGPU_RESOURCE_CREATE_BLOB 0x0a
 
 #define VIRTGPU_EXECBUF_FENCE_FD_IN	0x01
 #define VIRTGPU_EXECBUF_FENCE_FD_OUT	0x02
@@ -71,6 +72,7 @@ struct drm_virtgpu_execbuffer {
 
 #define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */
 #define VIRTGPU_PARAM_CAPSET_QUERY_FIX 2 /* do we have the capset fix */
+#define VIRTGPU_PARAM_RESOURCE_BLOB 3 /* DRM_VIRTGPU_RESOURCE_CREATE_BLOB */
 
 struct drm_virtgpu_getparam {
 	__u64 param;
@@ -100,7 +102,7 @@ struct drm_virtgpu_resource_info {
 	__u32 bo_handle;
 	__u32 res_handle;
 	__u32 size;
-	__u32 stride;
+	__u32 blob_mem;
 };
 
 struct drm_virtgpu_3d_box {
@@ -117,6 +119,8 @@ struct drm_virtgpu_3d_transfer_to_host {
 	struct drm_virtgpu_3d_box box;
 	__u32 level;
 	__u32 offset;
+	__u32 stride;
+	__u32 layer_stride;
 };
 
 struct drm_virtgpu_3d_transfer_from_host {
@@ -124,6 +128,8 @@ struct drm_virtgpu_3d_transfer_from_host {
 	struct drm_virtgpu_3d_box box;
 	__u32 level;
 	__u32 offset;
+	__u32 stride;
+	__u32 layer_stride;
 };
 
 #define VIRTGPU_WAIT_NOWAIT 1 /* like it */
@@ -140,6 +146,31 @@ struct drm_virtgpu_get_caps {
 	__u32 pad;
 };
 
+struct drm_virtgpu_resource_create_blob {
+#define VIRTGPU_BLOB_MEM_GUEST             0x0001
+#define VIRTGPU_BLOB_MEM_HOST3D            0x0002
+#define VIRTGPU_BLOB_MEM_HOST3D_GUEST      0x0003
+
+#define VIRTGPU_BLOB_FLAG_USE_MAPPABLE     0x0001
+#define VIRTGPU_BLOB_FLAG_USE_SHAREABLE    0x0002
+#define VIRTGPU_BLOB_FLAG_USE_CROSS_DEVICE 0x0004
+	/* zero is invalid blob_mem */
+	__u32 blob_mem;
+	__u32 blob_flags;
+	__u32 bo_handle;
+	__u32 res_handle;
+	__u64 size;
+
+	/*
+	 * for 3D contexts with VIRTGPU_BLOB_MEM_HOST3D_GUEST and
+	 * VIRTGPU_BLOB_MEM_HOST3D otherwise, must be zero.
+	 */
+	__u32 pad;
+	__u32 cmd_size;
+	__u64 cmd;
+	__u64 blob_id;
+};
+
 #define DRM_IOCTL_VIRTGPU_MAP \
 	DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_MAP, struct drm_virtgpu_map)
 
@@ -175,6 +206,10 @@ struct drm_virtgpu_get_caps {
 	DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_GET_CAPS, \
 	struct drm_virtgpu_get_caps)
 
+#define DRM_IOCTL_VIRTGPU_RESOURCE_CREATE_BLOB				\
+	DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_RESOURCE_CREATE_BLOB,	\
+		struct drm_virtgpu_resource_create_blob)
+
 #if defined(__cplusplus)
 }
 #endif
-- 
cgit v1.2.3


From 7a571c76ddce5928f872cdfd707ba5c7193e088c Mon Sep 17 00:00:00 2001
From: Gurchetan Singh <gurchetansingh@chromium.org>
Date: Wed, 23 Sep 2020 17:32:01 -0700
Subject: drm/virtgpu api: host visible feature

This exposes the host visible feature to userspace.  Without it,
it is an error to specify BLOB_MEM_HOST3D with
BLOG_FLAG_USE_MAPPABLE.

Signed-off-by: Gurchetan Singh <gurchetansingh@chromium.org>
Acked-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Acked-by: Lingfeng Yang <lfy@google.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20200924003214.662-6-gurchetansingh@chromium.org
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 include/uapi/drm/virtgpu_drm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h
index bb224f604c9e..e1b1518f9acc 100644
--- a/include/uapi/drm/virtgpu_drm.h
+++ b/include/uapi/drm/virtgpu_drm.h
@@ -73,6 +73,7 @@ struct drm_virtgpu_execbuffer {
 #define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */
 #define VIRTGPU_PARAM_CAPSET_QUERY_FIX 2 /* do we have the capset fix */
 #define VIRTGPU_PARAM_RESOURCE_BLOB 3 /* DRM_VIRTGPU_RESOURCE_CREATE_BLOB */
+#define VIRTGPU_PARAM_HOST_VISIBLE 4 /* Host blob resources are mappable */
 
 struct drm_virtgpu_getparam {
 	__u64 param;
-- 
cgit v1.2.3


From bf36dea17623b65cfbbe6219bfea1326255a89b6 Mon Sep 17 00:00:00 2001
From: Gurchetan Singh <gurchetansingh@chromium.org>
Date: Wed, 23 Sep 2020 17:32:02 -0700
Subject: drm/virtgpu api: cross-device feature

This feature was recently added to virtio-gpu, lets make
it userspace queryable. It's an error to use
BLOB_FLAG_USE_CROSS_DEVICE when this feature is not present.

Signed-off-by: Gurchetan Singh <gurchetansingh@chromium.org>
Acked-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20200924003214.662-7-gurchetansingh@chromium.org
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 include/uapi/drm/virtgpu_drm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h
index e1b1518f9acc..b9ec26e9c646 100644
--- a/include/uapi/drm/virtgpu_drm.h
+++ b/include/uapi/drm/virtgpu_drm.h
@@ -74,6 +74,7 @@ struct drm_virtgpu_execbuffer {
 #define VIRTGPU_PARAM_CAPSET_QUERY_FIX 2 /* do we have the capset fix */
 #define VIRTGPU_PARAM_RESOURCE_BLOB 3 /* DRM_VIRTGPU_RESOURCE_CREATE_BLOB */
 #define VIRTGPU_PARAM_HOST_VISIBLE 4 /* Host blob resources are mappable */
+#define VIRTGPU_PARAM_CROSS_DEVICE 5 /* Cross virtio-device resource sharing  */
 
 struct drm_virtgpu_getparam {
 	__u64 param;
-- 
cgit v1.2.3


From 3789af9a13e5561738c0f2114e3a5e22c843ca3e Mon Sep 17 00:00:00 2001
From: Krzysztof Wilczyński <kw@linux.com>
Date: Thu, 30 Jul 2020 21:08:48 +0000
Subject: PCI/PM: Rename pci_dev.d3_delay to d3hot_delay
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PCI devices support two variants of the D3 power state: D3hot (main power
present) D3cold (main power removed).  Previously struct pci_dev contained:

  unsigned int    d3_delay;       /* D3->D0 transition time in ms */
  unsigned int    d3cold_delay;   /* D3cold->D0 transition time in ms */

"d3_delay" refers specifically to the D3hot state.  Rename it to
"d3hot_delay" to avoid ambiguity and align with the ACPI "_DSM for
Specifying Device Readiness Durations" in the PCI Firmware spec r3.2,
sec 4.6.9.

There is no change to the functionality.

Link: https://lore.kernel.org/r/20200730210848.1578826-1-kw@linux.com
Signed-off-by: Krzysztof Wilczyński <kw@linux.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 Documentation/power/pci.rst                      |  2 +-
 arch/x86/pci/fixup.c                             |  2 +-
 arch/x86/pci/intel_mid_pci.c                     |  2 +-
 drivers/hid/intel-ish-hid/ipc/ipc.c              |  2 +-
 drivers/net/ethernet/marvell/sky2.c              |  2 +-
 drivers/pci/pci-acpi.c                           |  6 +--
 drivers/pci/pci.c                                | 14 ++---
 drivers/pci/pci.h                                |  4 +-
 drivers/pci/quirks.c                             | 68 ++++++++++++------------
 drivers/staging/media/atomisp/pci/atomisp_v4l2.c |  2 +-
 include/linux/pci.h                              |  2 +-
 include/uapi/linux/pci_regs.h                    |  2 +-
 12 files changed, 54 insertions(+), 54 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/power/pci.rst b/Documentation/power/pci.rst
index 1831e431f725..b04fb18cc4e2 100644
--- a/Documentation/power/pci.rst
+++ b/Documentation/power/pci.rst
@@ -320,7 +320,7 @@ that these callbacks operate on::
 	unsigned int	d2_support:1;	/* Low power state D2 is supported */
 	unsigned int	no_d1d2:1;	/* D1 and D2 are forbidden */
 	unsigned int	wakeup_prepared:1;  /* Device prepared for wake up */
-	unsigned int	d3_delay;	/* D3->D0 transition time in ms */
+	unsigned int	d3hot_delay;	/* D3hot->D0 transition time in ms */
 	...
   };
 
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index b8c9a5b87f37..0a0e168be1cb 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -587,7 +587,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0xa26d, pci_invalid_bar);
 static void pci_fixup_amd_ehci_pme(struct pci_dev *dev)
 {
 	dev_info(&dev->dev, "PME# does not work under D3, disabling it\n");
-	dev->pme_support &= ~((PCI_PM_CAP_PME_D3 | PCI_PM_CAP_PME_D3cold)
+	dev->pme_support &= ~((PCI_PM_CAP_PME_D3hot | PCI_PM_CAP_PME_D3cold)
 		>> PCI_PM_CAP_PME_SHIFT);
 }
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x7808, pci_fixup_amd_ehci_pme);
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index 00c62115f39c..979f310b67d4 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -322,7 +322,7 @@ static void pci_d3delay_fixup(struct pci_dev *dev)
 	 */
 	if (type1_access_ok(dev->bus->number, dev->devfn, PCI_DEVICE_ID))
 		return;
-	dev->d3_delay = 0;
+	dev->d3hot_delay = 0;
 }
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_d3delay_fixup);
 
diff --git a/drivers/hid/intel-ish-hid/ipc/ipc.c b/drivers/hid/intel-ish-hid/ipc/ipc.c
index 8f8dfdf64833..a45ac7fa417b 100644
--- a/drivers/hid/intel-ish-hid/ipc/ipc.c
+++ b/drivers/hid/intel-ish-hid/ipc/ipc.c
@@ -755,7 +755,7 @@ static int _ish_hw_reset(struct ishtp_device *dev)
 	csr |= PCI_D3hot;
 	pci_write_config_word(pdev, pdev->pm_cap + PCI_PM_CTRL, csr);
 
-	mdelay(pdev->d3_delay);
+	mdelay(pdev->d3hot_delay);
 
 	csr &= ~PCI_PM_CTRL_STATE_MASK;
 	csr |= PCI_D0;
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index cec8124301c7..dd11c06ca7f9 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -5105,7 +5105,7 @@ static int sky2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	INIT_WORK(&hw->restart_work, sky2_restart);
 
 	pci_set_drvdata(pdev, hw);
-	pdev->d3_delay = 300;
+	pdev->d3hot_delay = 300;
 
 	return 0;
 
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index d5869a03f748..154db9a47511 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -1167,7 +1167,7 @@ static struct acpi_device *acpi_pci_find_companion(struct device *dev)
  * @pdev: the PCI device whose delay is to be updated
  * @handle: ACPI handle of this device
  *
- * Update the d3_delay and d3cold_delay of a PCI device from the ACPI _DSM
+ * Update the d3hot_delay and d3cold_delay of a PCI device from the ACPI _DSM
  * control method of either the device itself or the PCI host bridge.
  *
  * Function 8, "Reset Delay," applies to the entire hierarchy below a PCI
@@ -1206,8 +1206,8 @@ static void pci_acpi_optimize_delay(struct pci_dev *pdev,
 		}
 		if (elements[3].type == ACPI_TYPE_INTEGER) {
 			value = (int)elements[3].integer.value / 1000;
-			if (value < PCI_PM_D3_WAIT)
-				pdev->d3_delay = value;
+			if (value < PCI_PM_D3HOT_WAIT)
+				pdev->d3hot_delay = value;
 		}
 	}
 	ACPI_FREE(obj);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index a458c46d7e39..c4a26532a447 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -49,7 +49,7 @@ EXPORT_SYMBOL(isa_dma_bridge_buggy);
 int pci_pci_problems;
 EXPORT_SYMBOL(pci_pci_problems);
 
-unsigned int pci_pm_d3_delay;
+unsigned int pci_pm_d3hot_delay;
 
 static void pci_pme_list_scan(struct work_struct *work);
 
@@ -66,10 +66,10 @@ struct pci_pme_device {
 
 static void pci_dev_d3_sleep(struct pci_dev *dev)
 {
-	unsigned int delay = dev->d3_delay;
+	unsigned int delay = dev->d3hot_delay;
 
-	if (delay < pci_pm_d3_delay)
-		delay = pci_pm_d3_delay;
+	if (delay < pci_pm_d3hot_delay)
+		delay = pci_pm_d3hot_delay;
 
 	if (delay)
 		msleep(delay);
@@ -3013,7 +3013,7 @@ void pci_pm_init(struct pci_dev *dev)
 	}
 
 	dev->pm_cap = pm;
-	dev->d3_delay = PCI_PM_D3_WAIT;
+	dev->d3hot_delay = PCI_PM_D3HOT_WAIT;
 	dev->d3cold_delay = PCI_PM_D3COLD_WAIT;
 	dev->bridge_d3 = pci_bridge_d3_possible(dev);
 	dev->d3cold_allowed = true;
@@ -3038,7 +3038,7 @@ void pci_pm_init(struct pci_dev *dev)
 			 (pmc & PCI_PM_CAP_PME_D0) ? " D0" : "",
 			 (pmc & PCI_PM_CAP_PME_D1) ? " D1" : "",
 			 (pmc & PCI_PM_CAP_PME_D2) ? " D2" : "",
-			 (pmc & PCI_PM_CAP_PME_D3) ? " D3hot" : "",
+			 (pmc & PCI_PM_CAP_PME_D3hot) ? " D3hot" : "",
 			 (pmc & PCI_PM_CAP_PME_D3cold) ? " D3cold" : "");
 		dev->pme_support = pmc >> PCI_PM_CAP_PME_SHIFT;
 		dev->pme_poll = true;
@@ -4621,7 +4621,7 @@ static int pci_af_flr(struct pci_dev *dev, int probe)
  *
  * NOTE: This causes the caller to sleep for twice the device power transition
  * cooldown period, which for the D0->D3hot and D3hot->D0 transitions is 10 ms
- * by default (i.e. unless the @dev's d3_delay field has a different value).
+ * by default (i.e. unless the @dev's d3hot_delay field has a different value).
  * Moreover, only devices in D0 can be reset by this function.
  */
 static int pci_pm_reset(struct pci_dev *dev, int probe)
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index fa12f7cbc1a0..8d492669ecfd 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -44,7 +44,7 @@ int pci_bridge_secondary_bus_reset(struct pci_dev *dev);
 int pci_bus_error_reset(struct pci_dev *dev);
 
 #define PCI_PM_D2_DELAY         200
-#define PCI_PM_D3_WAIT          10
+#define PCI_PM_D3HOT_WAIT       10
 #define PCI_PM_D3COLD_WAIT      100
 #define PCI_PM_BUS_WAIT         50
 
@@ -178,7 +178,7 @@ extern struct mutex pci_slot_mutex;
 
 extern raw_spinlock_t pci_lock;
 
-extern unsigned int pci_pm_d3_delay;
+extern unsigned int pci_pm_d3hot_delay;
 
 #ifdef CONFIG_PCI_MSI
 void pci_no_msi(void);
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index bdf9b52567e0..72b22a35e516 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -1846,7 +1846,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_PXHV,	quirk_pci
  */
 static void quirk_intel_pcie_pm(struct pci_dev *dev)
 {
-	pci_pm_d3_delay = 120;
+	pci_pm_d3hot_delay = 120;
 	dev->no_d1d2 = 1;
 }
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x25e2, quirk_intel_pcie_pm);
@@ -1873,12 +1873,12 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x260b, quirk_intel_pcie_pm);
 
 static void quirk_d3hot_delay(struct pci_dev *dev, unsigned int delay)
 {
-	if (dev->d3_delay >= delay)
+	if (dev->d3hot_delay >= delay)
 		return;
 
-	dev->d3_delay = delay;
+	dev->d3hot_delay = delay;
 	pci_info(dev, "extending delay after power-on from D3hot to %d msec\n",
-		 dev->d3_delay);
+		 dev->d3hot_delay);
 }
 
 static void quirk_radeon_pm(struct pci_dev *dev)
@@ -3387,36 +3387,36 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0152, disable_igfx_irq);
  * PCI devices which are on Intel chips can skip the 10ms delay
  * before entering D3 mode.
  */
-static void quirk_remove_d3_delay(struct pci_dev *dev)
-{
-	dev->d3_delay = 0;
-}
-/* C600 Series devices do not need 10ms d3_delay */
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0412, quirk_remove_d3_delay);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0c00, quirk_remove_d3_delay);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0c0c, quirk_remove_d3_delay);
-/* Lynxpoint-H PCH devices do not need 10ms d3_delay */
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c02, quirk_remove_d3_delay);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c18, quirk_remove_d3_delay);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c1c, quirk_remove_d3_delay);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c20, quirk_remove_d3_delay);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c22, quirk_remove_d3_delay);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c26, quirk_remove_d3_delay);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c2d, quirk_remove_d3_delay);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c31, quirk_remove_d3_delay);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c3a, quirk_remove_d3_delay);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c3d, quirk_remove_d3_delay);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c4e, quirk_remove_d3_delay);
-/* Intel Cherrytrail devices do not need 10ms d3_delay */
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x2280, quirk_remove_d3_delay);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x2298, quirk_remove_d3_delay);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x229c, quirk_remove_d3_delay);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x22b0, quirk_remove_d3_delay);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x22b5, quirk_remove_d3_delay);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x22b7, quirk_remove_d3_delay);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x22b8, quirk_remove_d3_delay);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x22d8, quirk_remove_d3_delay);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x22dc, quirk_remove_d3_delay);
+static void quirk_remove_d3hot_delay(struct pci_dev *dev)
+{
+	dev->d3hot_delay = 0;
+}
+/* C600 Series devices do not need 10ms d3hot_delay */
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0412, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0c00, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0c0c, quirk_remove_d3hot_delay);
+/* Lynxpoint-H PCH devices do not need 10ms d3hot_delay */
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c02, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c18, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c1c, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c20, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c22, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c26, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c2d, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c31, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c3a, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c3d, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x8c4e, quirk_remove_d3hot_delay);
+/* Intel Cherrytrail devices do not need 10ms d3hot_delay */
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x2280, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x2298, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x229c, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x22b0, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x22b5, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x22b7, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x22b8, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x22d8, quirk_remove_d3hot_delay);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x22dc, quirk_remove_d3hot_delay);
 
 /*
  * Some devices may pass our check in pci_intx_mask_supported() if
diff --git a/drivers/staging/media/atomisp/pci/atomisp_v4l2.c b/drivers/staging/media/atomisp/pci/atomisp_v4l2.c
index a000a1e316f7..beba430a197e 100644
--- a/drivers/staging/media/atomisp/pci/atomisp_v4l2.c
+++ b/drivers/staging/media/atomisp/pci/atomisp_v4l2.c
@@ -1573,7 +1573,7 @@ static int atomisp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *i
 	spin_lock_init(&isp->lock);
 
 	/* This is not a true PCI device on SoC, so the delay is not needed. */
-	pdev->d3_delay = 0;
+	pdev->d3hot_delay = 0;
 
 	pci_set_drvdata(pdev, isp);
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index c9e169c4e216..bea1a03faab6 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -373,7 +373,7 @@ struct pci_dev {
 						      user sysfs */
 	unsigned int	clear_retrain_link:1;	/* Need to clear Retrain Link
 						   bit manually */
-	unsigned int	d3_delay;	/* D3->D0 transition time in ms */
+	unsigned int	d3hot_delay;	/* D3hot->D0 transition time in ms */
 	unsigned int	d3cold_delay;	/* D3cold->D0 transition time in ms */
 
 #ifdef CONFIG_PCIEASPM
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index f9701410d3b5..49f15c37e771 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -246,7 +246,7 @@
 #define  PCI_PM_CAP_PME_D0	0x0800	/* PME# from D0 */
 #define  PCI_PM_CAP_PME_D1	0x1000	/* PME# from D1 */
 #define  PCI_PM_CAP_PME_D2	0x2000	/* PME# from D2 */
-#define  PCI_PM_CAP_PME_D3	0x4000	/* PME# from D3 (hot) */
+#define  PCI_PM_CAP_PME_D3hot	0x4000	/* PME# from D3 (hot) */
 #define  PCI_PM_CAP_PME_D3cold	0x8000	/* PME# from D3 (cold) */
 #define  PCI_PM_CAP_PME_SHIFT	11	/* Start of the PME Mask in PMC */
 #define PCI_PM_CTRL		4	/* PM control and status register */
-- 
cgit v1.2.3


From 4a1e7c0c63e02daad751842b7880f9bbcdfb6e89 Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Tue, 29 Sep 2020 14:45:51 +0200
Subject: bpf: Support attaching freplace programs to multiple attach points
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This enables support for attaching freplace programs to multiple attach
points. It does this by amending the UAPI for bpf_link_Create with a target
btf ID that can be used to supply the new attachment point along with the
target program fd. The target must be compatible with the target that was
supplied at program load time.

The implementation reuses the checks that were factored out of
check_attach_btf_id() to ensure compatibility between the BTF types of the
old and new attachment. If these match, a new bpf_tracing_link will be
created for the new attach target, allowing multiple attachments to
co-exist simultaneously.

The code could theoretically support multiple-attach of other types of
tracing programs as well, but since I don't have a use case for any of
those, there is no API support for doing so.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/160138355169.48470.17165680973640685368.stgit@toke.dk
---
 include/linux/bpf.h            |   2 +
 include/uapi/linux/bpf.h       |   9 ++-
 kernel/bpf/syscall.c           | 132 ++++++++++++++++++++++++++++++++++++-----
 kernel/bpf/verifier.c          |  10 ++++
 tools/include/uapi/linux/bpf.h |   9 ++-
 5 files changed, 142 insertions(+), 20 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 839dd8670a7a..50e5c4b52bd1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -768,6 +768,8 @@ struct bpf_prog_aux {
 	struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */
 	struct bpf_prog *dst_prog;
 	struct bpf_trampoline *dst_trampoline;
+	enum bpf_prog_type saved_dst_prog_type;
+	enum bpf_attach_type saved_dst_attach_type;
 	bool verifier_zext; /* Zero extensions has been inserted by verifier. */
 	bool offload_requested;
 	bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 96ddb00b91dc..2b1d3f16cbd1 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -639,8 +639,13 @@ union bpf_attr {
 		};
 		__u32		attach_type;	/* attach type */
 		__u32		flags;		/* extra flags */
-		__aligned_u64	iter_info;	/* extra bpf_iter_link_info */
-		__u32		iter_info_len;	/* iter_info length */
+		union {
+			__u32		target_btf_id;	/* btf_id of target to attach to */
+			struct {
+				__aligned_u64	iter_info;	/* extra bpf_iter_link_info */
+				__u32		iter_info_len;	/* iter_info length */
+			};
+		};
 	} link_create;
 
 	struct { /* struct used by BPF_LINK_UPDATE command */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index e6a0a948e30c..f1528c2a6927 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -4,6 +4,7 @@
 #include <linux/bpf.h>
 #include <linux/bpf_trace.h>
 #include <linux/bpf_lirc.h>
+#include <linux/bpf_verifier.h>
 #include <linux/btf.h>
 #include <linux/syscalls.h>
 #include <linux/slab.h>
@@ -2554,12 +2555,15 @@ static const struct bpf_link_ops bpf_tracing_link_lops = {
 	.fill_link_info = bpf_tracing_link_fill_link_info,
 };
 
-static int bpf_tracing_prog_attach(struct bpf_prog *prog)
+static int bpf_tracing_prog_attach(struct bpf_prog *prog,
+				   int tgt_prog_fd,
+				   u32 btf_id)
 {
 	struct bpf_link_primer link_primer;
 	struct bpf_prog *tgt_prog = NULL;
+	struct bpf_trampoline *tr = NULL;
 	struct bpf_tracing_link *link;
-	struct bpf_trampoline *tr;
+	u64 key = 0;
 	int err;
 
 	switch (prog->type) {
@@ -2588,6 +2592,28 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
 		goto out_put_prog;
 	}
 
+	if (!!tgt_prog_fd != !!btf_id) {
+		err = -EINVAL;
+		goto out_put_prog;
+	}
+
+	if (tgt_prog_fd) {
+		/* For now we only allow new targets for BPF_PROG_TYPE_EXT */
+		if (prog->type != BPF_PROG_TYPE_EXT) {
+			err = -EINVAL;
+			goto out_put_prog;
+		}
+
+		tgt_prog = bpf_prog_get(tgt_prog_fd);
+		if (IS_ERR(tgt_prog)) {
+			err = PTR_ERR(tgt_prog);
+			tgt_prog = NULL;
+			goto out_put_prog;
+		}
+
+		key = bpf_trampoline_compute_key(tgt_prog, btf_id);
+	}
+
 	link = kzalloc(sizeof(*link), GFP_USER);
 	if (!link) {
 		err = -ENOMEM;
@@ -2599,12 +2625,58 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
 
 	mutex_lock(&prog->aux->dst_mutex);
 
-	if (!prog->aux->dst_trampoline) {
+	/* There are a few possible cases here:
+	 *
+	 * - if prog->aux->dst_trampoline is set, the program was just loaded
+	 *   and not yet attached to anything, so we can use the values stored
+	 *   in prog->aux
+	 *
+	 * - if prog->aux->dst_trampoline is NULL, the program has already been
+         *   attached to a target and its initial target was cleared (below)
+	 *
+	 * - if tgt_prog != NULL, the caller specified tgt_prog_fd +
+	 *   target_btf_id using the link_create API.
+	 *
+	 * - if tgt_prog == NULL when this function was called using the old
+         *   raw_tracepoint_open API, and we need a target from prog->aux
+         *
+         * The combination of no saved target in prog->aux, and no target
+         * specified on load is illegal, and we reject that here.
+	 */
+	if (!prog->aux->dst_trampoline && !tgt_prog) {
 		err = -ENOENT;
 		goto out_unlock;
 	}
-	tr = prog->aux->dst_trampoline;
-	tgt_prog = prog->aux->dst_prog;
+
+	if (!prog->aux->dst_trampoline ||
+	    (key && key != prog->aux->dst_trampoline->key)) {
+		/* If there is no saved target, or the specified target is
+		 * different from the destination specified at load time, we
+		 * need a new trampoline and a check for compatibility
+		 */
+		struct bpf_attach_target_info tgt_info = {};
+
+		err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id,
+					      &tgt_info);
+		if (err)
+			goto out_unlock;
+
+		tr = bpf_trampoline_get(key, &tgt_info);
+		if (!tr) {
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+	} else {
+		/* The caller didn't specify a target, or the target was the
+		 * same as the destination supplied during program load. This
+		 * means we can reuse the trampoline and reference from program
+		 * load time, and there is no need to allocate a new one. This
+		 * can only happen once for any program, as the saved values in
+		 * prog->aux are cleared below.
+		 */
+		tr = prog->aux->dst_trampoline;
+		tgt_prog = prog->aux->dst_prog;
+	}
 
 	err = bpf_link_prime(&link->link, &link_primer);
 	if (err)
@@ -2620,15 +2692,31 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
 	link->tgt_prog = tgt_prog;
 	link->trampoline = tr;
 
+	/* Always clear the trampoline and target prog from prog->aux to make
+	 * sure the original attach destination is not kept alive after a
+	 * program is (re-)attached to another target.
+	 */
+	if (prog->aux->dst_prog &&
+	    (tgt_prog_fd || tr != prog->aux->dst_trampoline))
+		/* got extra prog ref from syscall, or attaching to different prog */
+		bpf_prog_put(prog->aux->dst_prog);
+	if (prog->aux->dst_trampoline && tr != prog->aux->dst_trampoline)
+		/* we allocated a new trampoline, so free the old one */
+		bpf_trampoline_put(prog->aux->dst_trampoline);
+
 	prog->aux->dst_prog = NULL;
 	prog->aux->dst_trampoline = NULL;
 	mutex_unlock(&prog->aux->dst_mutex);
 
 	return bpf_link_settle(&link_primer);
 out_unlock:
+	if (tr && tr != prog->aux->dst_trampoline)
+		bpf_trampoline_put(tr);
 	mutex_unlock(&prog->aux->dst_mutex);
 	kfree(link);
 out_put_prog:
+	if (tgt_prog_fd && tgt_prog)
+		bpf_prog_put(tgt_prog);
 	bpf_prog_put(prog);
 	return err;
 }
@@ -2742,7 +2830,7 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
 			tp_name = prog->aux->attach_func_name;
 			break;
 		}
-		return bpf_tracing_prog_attach(prog);
+		return bpf_tracing_prog_attach(prog, 0, 0);
 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
 	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
 		if (strncpy_from_user(buf,
@@ -3926,10 +4014,15 @@ err_put:
 
 static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 {
-	if (attr->link_create.attach_type == BPF_TRACE_ITER &&
-	    prog->expected_attach_type == BPF_TRACE_ITER)
-		return bpf_iter_link_attach(attr, prog);
+	if (attr->link_create.attach_type != prog->expected_attach_type)
+		return -EINVAL;
 
+	if (prog->expected_attach_type == BPF_TRACE_ITER)
+		return bpf_iter_link_attach(attr, prog);
+	else if (prog->type == BPF_PROG_TYPE_EXT)
+		return bpf_tracing_prog_attach(prog,
+					       attr->link_create.target_fd,
+					       attr->link_create.target_btf_id);
 	return -EINVAL;
 }
 
@@ -3943,18 +4036,25 @@ static int link_create(union bpf_attr *attr)
 	if (CHECK_ATTR(BPF_LINK_CREATE))
 		return -EINVAL;
 
-	ptype = attach_type_to_prog_type(attr->link_create.attach_type);
-	if (ptype == BPF_PROG_TYPE_UNSPEC)
-		return -EINVAL;
-
-	prog = bpf_prog_get_type(attr->link_create.prog_fd, ptype);
+	prog = bpf_prog_get(attr->link_create.prog_fd);
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
 
 	ret = bpf_prog_attach_check_attach_type(prog,
 						attr->link_create.attach_type);
 	if (ret)
-		goto err_out;
+		goto out;
+
+	if (prog->type == BPF_PROG_TYPE_EXT) {
+		ret = tracing_bpf_link_attach(attr, prog);
+		goto out;
+	}
+
+	ptype = attach_type_to_prog_type(attr->link_create.attach_type);
+	if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) {
+		ret = -EINVAL;
+		goto out;
+	}
 
 	switch (ptype) {
 	case BPF_PROG_TYPE_CGROUP_SKB:
@@ -3982,7 +4082,7 @@ static int link_create(union bpf_attr *attr)
 		ret = -EINVAL;
 	}
 
-err_out:
+out:
 	if (ret < 0)
 		bpf_prog_put(prog);
 	return ret;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a97a2f2964e3..015a1c074b6b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -11404,6 +11404,11 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
 		if (!btf_type_is_func_proto(t))
 			return -EINVAL;
 
+		if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
+		    (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
+		     prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
+			return -EINVAL;
+
 		if (tgt_prog && conservative)
 			t = NULL;
 
@@ -11512,6 +11517,11 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
 	prog->aux->attach_func_proto = tgt_info.tgt_type;
 	prog->aux->attach_func_name = tgt_info.tgt_name;
 
+	if (tgt_prog) {
+		prog->aux->saved_dst_prog_type = tgt_prog->type;
+		prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
+	}
+
 	if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
 		prog->aux->attach_btf_trace = true;
 		return 0;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 96ddb00b91dc..2b1d3f16cbd1 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -639,8 +639,13 @@ union bpf_attr {
 		};
 		__u32		attach_type;	/* attach type */
 		__u32		flags;		/* extra flags */
-		__aligned_u64	iter_info;	/* extra bpf_iter_link_info */
-		__u32		iter_info_len;	/* iter_info length */
+		union {
+			__u32		target_btf_id;	/* btf_id of target to attach to */
+			struct {
+				__aligned_u64	iter_info;	/* extra bpf_iter_link_info */
+				__u32		iter_info_len;	/* iter_info length */
+			};
+		};
 	} link_create;
 
 	struct { /* struct used by BPF_LINK_UPDATE command */
-- 
cgit v1.2.3


From 3f47cb4c1cf3bceb2438ea962bfffc6665ee4a9f Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Tue, 29 Sep 2020 13:35:41 +0100
Subject: l2tp: report rx cookie discards in netlink get

When an L2TPv3 session receives a data frame with an incorrect cookie
l2tp_core logs a warning message and bumps a stats counter to reflect
the fact that the packet has been dropped.

However, the stats counter in question is missing from the l2tp_netlink
get message for tunnel and session instances.

Include the statistic in the netlink get response.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/l2tp.h | 1 +
 net/l2tp/l2tp_netlink.c   | 6 ++++++
 2 files changed, 7 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h
index 88a0d32b8c07..30c80d5ba4bf 100644
--- a/include/uapi/linux/l2tp.h
+++ b/include/uapi/linux/l2tp.h
@@ -144,6 +144,7 @@ enum {
 	L2TP_ATTR_RX_OOS_PACKETS,	/* u64 */
 	L2TP_ATTR_RX_ERRORS,		/* u64 */
 	L2TP_ATTR_STATS_PAD,
+	L2TP_ATTR_RX_COOKIE_DISCARDS,	/* u64 */
 	__L2TP_ATTR_STATS_MAX,
 };
 
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 83c015f7f20d..5ca5056e9636 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -420,6 +420,9 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
 	    nla_put_u64_64bit(skb, L2TP_ATTR_RX_SEQ_DISCARDS,
 			      atomic_long_read(&tunnel->stats.rx_seq_discards),
 			      L2TP_ATTR_STATS_PAD) ||
+	    nla_put_u64_64bit(skb, L2TP_ATTR_RX_COOKIE_DISCARDS,
+			      atomic_long_read(&tunnel->stats.rx_cookie_discards),
+			      L2TP_ATTR_STATS_PAD) ||
 	    nla_put_u64_64bit(skb, L2TP_ATTR_RX_OOS_PACKETS,
 			      atomic_long_read(&tunnel->stats.rx_oos_packets),
 			      L2TP_ATTR_STATS_PAD) ||
@@ -760,6 +763,9 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
 	    nla_put_u64_64bit(skb, L2TP_ATTR_RX_SEQ_DISCARDS,
 			      atomic_long_read(&session->stats.rx_seq_discards),
 			      L2TP_ATTR_STATS_PAD) ||
+	    nla_put_u64_64bit(skb, L2TP_ATTR_RX_COOKIE_DISCARDS,
+			      atomic_long_read(&session->stats.rx_cookie_discards),
+			      L2TP_ATTR_STATS_PAD) ||
 	    nla_put_u64_64bit(skb, L2TP_ATTR_RX_OOS_PACKETS,
 			      atomic_long_read(&session->stats.rx_oos_packets),
 			      L2TP_ATTR_STATS_PAD) ||
-- 
cgit v1.2.3


From 2ec13cbcfadbbeac499f3b63de0f7db490d45a7e Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Tue, 29 Sep 2020 11:08:59 -0700
Subject: devlink: include <linux/const.h> for _BITUL

Commit 5d5b4128c4ca ("devlink: introduce flash update overwrite mask")
added a usage of _BITUL to the UAPI <linux/devlink.h> header, but failed
to include the header file where it was defined. It happens that this
does not break any existing kernel include chains because it gets
included through other sources. However, when including the UAPI headers
in a userspace application (such as devlink in iproute2), _BITUL is not
defined.

Fixes: 5d5b4128c4ca ("devlink: introduce flash update overwrite mask")
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 7b0face1bad5..ba467dc07852 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -13,6 +13,8 @@
 #ifndef _UAPI_LINUX_DEVLINK_H_
 #define _UAPI_LINUX_DEVLINK_H_
 
+#include <linux/const.h>
+
 #define DEVLINK_GENL_NAME "devlink"
 #define DEVLINK_GENL_VERSION 0x1
 #define DEVLINK_GENL_MCGRP_CONFIG_NAME "config"
-- 
cgit v1.2.3


From 91e515c2f0f68d4ce137c9182eb03af4858f362e Mon Sep 17 00:00:00 2001
From: Alistair Delva <adelva@google.com>
Date: Tue, 29 Sep 2020 14:53:33 -0700
Subject: virtio-gpu api: fix 64/32 compat issue with blob implementation

We encountered this issue when booting blob with a 32-bit kernel.
The implementation doesn't match v6 of the virtio-spec change, so fix
this.

Fixes: ff886cbdcc44 ("virtio-gpu api: blob resources")
Signed-off-by: Alistair Delva <adelva@google.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20200929215333.1241-1-gurchetansingh@chromium.org
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 include/uapi/linux/virtio_gpu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h
index fa2ae4a1da5f..0ec6b610402c 100644
--- a/include/uapi/linux/virtio_gpu.h
+++ b/include/uapi/linux/virtio_gpu.h
@@ -388,9 +388,9 @@ struct virtio_gpu_resource_create_blob {
 	/* zero is invalid blob mem */
 	__le32 blob_mem;
 	__le32 blob_flags;
+	__le32 nr_entries;
 	__le64 blob_id;
 	__le64 size;
-	__le32 nr_entries;
 	/*
 	 * sizeof(nr_entries * virtio_gpu_mem_entry) bytes follow
 	 */
-- 
cgit v1.2.3


From 539430fbbcc4a8d02451c77fff1ecd1f3b5f8abf Mon Sep 17 00:00:00 2001
From: Kent Gibson <warthog618@gmail.com>
Date: Mon, 28 Sep 2020 08:27:50 +0800
Subject: gpio: uapi: define GPIO_MAX_NAME_SIZE for array sizes

Replace constant array sizes with a macro constant to clarify the source
of array sizes, provide a place to document any constraints on the size,
and to simplify array sizing in userspace if constructing structs
from their composite fields.

Signed-off-by: Kent Gibson <warthog618@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 include/uapi/linux/gpio.h | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h
index 9c27cecf406f..285cc10355b2 100644
--- a/include/uapi/linux/gpio.h
+++ b/include/uapi/linux/gpio.h
@@ -14,6 +14,11 @@
 #include <linux/ioctl.h>
 #include <linux/types.h>
 
+/*
+ * The maximum size of name and label arrays.
+ */
+#define GPIO_MAX_NAME_SIZE 32
+
 /**
  * struct gpiochip_info - Information about a certain GPIO chip
  * @name: the Linux kernel name of this GPIO chip
@@ -22,8 +27,8 @@
  * @lines: number of GPIO lines on this chip
  */
 struct gpiochip_info {
-	char name[32];
-	char label[32];
+	char name[GPIO_MAX_NAME_SIZE];
+	char label[GPIO_MAX_NAME_SIZE];
 	__u32 lines;
 };
 
@@ -52,8 +57,8 @@ struct gpiochip_info {
 struct gpioline_info {
 	__u32 line_offset;
 	__u32 flags;
-	char name[32];
-	char consumer[32];
+	char name[GPIO_MAX_NAME_SIZE];
+	char consumer[GPIO_MAX_NAME_SIZE];
 };
 
 /* Maximum number of requested handles */
@@ -123,7 +128,7 @@ struct gpiohandle_request {
 	__u32 lineoffsets[GPIOHANDLES_MAX];
 	__u32 flags;
 	__u8 default_values[GPIOHANDLES_MAX];
-	char consumer_label[32];
+	char consumer_label[GPIO_MAX_NAME_SIZE];
 	__u32 lines;
 	int fd;
 };
@@ -182,7 +187,7 @@ struct gpioevent_request {
 	__u32 lineoffset;
 	__u32 handleflags;
 	__u32 eventflags;
-	char consumer_label[32];
+	char consumer_label[GPIO_MAX_NAME_SIZE];
 	int fd;
 };
 
-- 
cgit v1.2.3


From b53911aa872db462be2e5f1dd611b25c4c2e663b Mon Sep 17 00:00:00 2001
From: Kent Gibson <warthog618@gmail.com>
Date: Mon, 28 Sep 2020 08:27:51 +0800
Subject: gpio: uapi: define uAPI v2

Add a new version of the uAPI to address existing 32/64-bit alignment
issues, add support for debounce and event sequence numbers, allow
requested lines with different configurations, and provide some future
proofing by adding padding reserved for future use.

The alignment issue relates to the gpioevent_data, which packs to different
sizes on 32-bit and 64-bit platforms. That creates problems for 32-bit apps
running on 64-bit kernels.  uAPI v2 addresses that particular issue, and
the problem more generally, by adding pad fields that explicitly pad
structs out to 64-bit boundaries, so they will pack to the same size now,
and even if some of the reserved padding is used for __u64 fields in the
future.

The new structs have been analysed with pahole to ensure that they
are sized as expected and contain no implicit padding.

The lack of future proofing in v1 makes it impossible to, for example,
add the debounce feature that is included in v2.
The future proofing is addressed by providing configurable attributes in
line config and reserved padding in all structs for future features.
Specifically, the line request, config, info, info_changed and event
structs receive updated versions and new ioctls.

As the majority of the structs and ioctls were being replaced, it is
opportune to rework some of the other aspects of the uAPI:

v1 has three different flags fields, each with their own separate
bit definitions.  In v2 that is collapsed to one - gpio_v2_line_flag.

The handle and event requests are merged into a single request, the line
request, as the two requests were mostly the same other than the edge
detection provided by event requests.  As a byproduct, the v2 uAPI allows
for multiple lines producing edge events on the same line handle.
This is a new capability as v1 only supports a single line in an event
request.

As a consequence, there are now only two types of file handle to be
concerned with, the chip and the line, and it is clearer which ioctls
apply to which type of handle.

There is also some minor renaming of fields for consistency compared to
their v1 counterparts, e.g. offset rather than lineoffset or line_offset,
and consumer rather than consumer_label.

Additionally, v1 GPIOHANDLES_MAX becomes GPIO_V2_LINES_MAX in v2 for
clarity, and the gpiohandle_data __u8 array becomes a bitmap in
gpio_v2_line_values.

The v2 uAPI is mostly a reorganisation and extension of v1, so userspace
code, particularly libgpiod, should readily port to it.

Signed-off-by: Kent Gibson <warthog618@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 include/uapi/linux/gpio.h | 291 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 284 insertions(+), 7 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h
index 285cc10355b2..5904f49399de 100644
--- a/include/uapi/linux/gpio.h
+++ b/include/uapi/linux/gpio.h
@@ -11,11 +11,14 @@
 #ifndef _UAPI_GPIO_H_
 #define _UAPI_GPIO_H_
 
+#include <linux/const.h>
 #include <linux/ioctl.h>
 #include <linux/types.h>
 
 /*
  * The maximum size of name and label arrays.
+ *
+ * Must be a multiple of 8 to ensure 32/64-bit alignment of structs.
  */
 #define GPIO_MAX_NAME_SIZE 32
 
@@ -32,6 +35,265 @@ struct gpiochip_info {
 	__u32 lines;
 };
 
+/*
+ * Maximum number of requested lines.
+ *
+ * Must be no greater than 64, as bitmaps are restricted here to 64-bits
+ * for simplicity, and a multiple of 2 to ensure 32/64-bit alignment of
+ * structs.
+ */
+#define GPIO_V2_LINES_MAX 64
+
+/*
+ * The maximum number of configuration attributes associated with a line
+ * request.
+ */
+#define GPIO_V2_LINE_NUM_ATTRS_MAX 10
+
+/**
+ * enum gpio_v2_line_flag - &struct gpio_v2_line_attribute.flags values
+ * @GPIO_V2_LINE_FLAG_USED: line is not available for request
+ * @GPIO_V2_LINE_FLAG_ACTIVE_LOW: line active state is physical low
+ * @GPIO_V2_LINE_FLAG_INPUT: line is an input
+ * @GPIO_V2_LINE_FLAG_OUTPUT: line is an output
+ * @GPIO_V2_LINE_FLAG_EDGE_RISING: line detects rising (inactive to active)
+ * edges
+ * @GPIO_V2_LINE_FLAG_EDGE_FALLING: line detects falling (active to
+ * inactive) edges
+ * @GPIO_V2_LINE_FLAG_OPEN_DRAIN: line is an open drain output
+ * @GPIO_V2_LINE_FLAG_OPEN_SOURCE: line is an open source output
+ * @GPIO_V2_LINE_FLAG_BIAS_PULL_UP: line has pull-up bias enabled
+ * @GPIO_V2_LINE_FLAG_BIAS_PULL_DOWN: line has pull-down bias enabled
+ * @GPIO_V2_LINE_FLAG_BIAS_DISABLED: line has bias disabled
+ */
+enum gpio_v2_line_flag {
+	GPIO_V2_LINE_FLAG_USED			= _BITULL(0),
+	GPIO_V2_LINE_FLAG_ACTIVE_LOW		= _BITULL(1),
+	GPIO_V2_LINE_FLAG_INPUT			= _BITULL(2),
+	GPIO_V2_LINE_FLAG_OUTPUT		= _BITULL(3),
+	GPIO_V2_LINE_FLAG_EDGE_RISING		= _BITULL(4),
+	GPIO_V2_LINE_FLAG_EDGE_FALLING		= _BITULL(5),
+	GPIO_V2_LINE_FLAG_OPEN_DRAIN		= _BITULL(6),
+	GPIO_V2_LINE_FLAG_OPEN_SOURCE		= _BITULL(7),
+	GPIO_V2_LINE_FLAG_BIAS_PULL_UP		= _BITULL(8),
+	GPIO_V2_LINE_FLAG_BIAS_PULL_DOWN	= _BITULL(9),
+	GPIO_V2_LINE_FLAG_BIAS_DISABLED		= _BITULL(10),
+};
+
+/**
+ * struct gpio_v2_line_values - Values of GPIO lines
+ * @bits: a bitmap containing the value of the lines, set to 1 for active
+ * and 0 for inactive.
+ * @mask: a bitmap identifying the lines to get or set, with each bit
+ * number corresponding to the index into &struct
+ * gpio_v2_line_request.offsets.
+ */
+struct gpio_v2_line_values {
+	__aligned_u64 bits;
+	__aligned_u64 mask;
+};
+
+/**
+ * enum gpio_v2_line_attr_id - &struct gpio_v2_line_attribute.id values
+ * identifying which field of the attribute union is in use.
+ * @GPIO_V2_LINE_ATTR_ID_FLAGS: flags field is in use
+ * @GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES: values field is in use
+ * @GPIO_V2_LINE_ATTR_ID_DEBOUNCE: debounce_period_us is in use
+ */
+enum gpio_v2_line_attr_id {
+	GPIO_V2_LINE_ATTR_ID_FLAGS		= 1,
+	GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES	= 2,
+	GPIO_V2_LINE_ATTR_ID_DEBOUNCE		= 3,
+};
+
+/**
+ * struct gpio_v2_line_attribute - a configurable attribute of a line
+ * @id: attribute identifier with value from &enum gpio_v2_line_attr_id
+ * @padding: reserved for future use and must be zero filled
+ * @flags: if id is GPIO_V2_LINE_ATTR_ID_FLAGS, the flags for the GPIO
+ * line, with values from enum gpio_v2_line_flag, such as
+ * GPIO_V2_LINE_FLAG_ACTIVE_LOW, GPIO_V2_LINE_FLAG_OUTPUT etc, OR:ed
+ * together.  This overrides the default flags contained in the &struct
+ * gpio_v2_line_config for the associated line.
+ * @values: if id is GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES, a bitmap
+ * containing the values to which the lines will be set, with each bit
+ * number corresponding to the index into &struct
+ * gpio_v2_line_request.offsets.
+ * @debounce_period_us: if id is GPIO_V2_LINE_ATTR_ID_DEBOUNCE, the desired
+ * debounce period, in microseconds
+ */
+struct gpio_v2_line_attribute {
+	__u32 id;
+	__u32 padding;
+	union {
+		__aligned_u64 flags;
+		__aligned_u64 values;
+		__u32 debounce_period_us;
+	};
+};
+
+/**
+ * struct gpio_v2_line_config_attribute - a configuration attribute
+ * associated with one or more of the requested lines.
+ * @attr: the configurable attribute
+ * @mask: a bitmap identifying the lines to which the attribute applies,
+ * with each bit number corresponding to the index into &struct
+ * gpio_v2_line_request.offsets.
+ */
+struct gpio_v2_line_config_attribute {
+	struct gpio_v2_line_attribute attr;
+	__aligned_u64 mask;
+};
+
+/**
+ * struct gpio_v2_line_config - Configuration for GPIO lines
+ * @flags: flags for the GPIO lines, with values from enum
+ * gpio_v2_line_flag, such as GPIO_V2_LINE_FLAG_ACTIVE_LOW,
+ * GPIO_V2_LINE_FLAG_OUTPUT etc, OR:ed together.  This is the default for
+ * all requested lines but may be overridden for particular lines using
+ * attrs.
+ * @num_attrs: the number of attributes in attrs
+ * @padding: reserved for future use and must be zero filled
+ * @attrs: the configuration attributes associated with the requested
+ * lines.  Any attribute should only be associated with a particular line
+ * once.  If an attribute is associated with a line multiple times then the
+ * first occurrence (i.e. lowest index) has precedence.
+ */
+struct gpio_v2_line_config {
+	__aligned_u64 flags;
+	__u32 num_attrs;
+	/* Pad to fill implicit padding and reserve space for future use. */
+	__u32 padding[5];
+	struct gpio_v2_line_config_attribute attrs[GPIO_V2_LINE_NUM_ATTRS_MAX];
+};
+
+/**
+ * struct gpio_v2_line_request - Information about a request for GPIO lines
+ * @offsets: an array of desired lines, specified by offset index for the
+ * associated GPIO chip
+ * @consumer: a desired consumer label for the selected GPIO lines such as
+ * "my-bitbanged-relay"
+ * @config: requested configuration for the lines.
+ * @num_lines: number of lines requested in this request, i.e. the number
+ * of valid fields in the GPIO_V2_LINES_MAX sized arrays, set to 1 to
+ * request a single line
+ * @event_buffer_size: a suggested minimum number of line events that the
+ * kernel should buffer.  This is only relevant if edge detection is
+ * enabled in the configuration. Note that this is only a suggested value
+ * and the kernel may allocate a larger buffer or cap the size of the
+ * buffer. If this field is zero then the buffer size defaults to a minimum
+ * of num_lines*16.
+ * @padding: reserved for future use and must be zero filled
+ * @fd: if successful this field will contain a valid anonymous file handle
+ * after a GPIO_GET_LINE_IOCTL operation, zero or negative value means
+ * error
+ */
+struct gpio_v2_line_request {
+	__u32 offsets[GPIO_V2_LINES_MAX];
+	char consumer[GPIO_MAX_NAME_SIZE];
+	struct gpio_v2_line_config config;
+	__u32 num_lines;
+	__u32 event_buffer_size;
+	/* Pad to fill implicit padding and reserve space for future use. */
+	__u32 padding[5];
+	__s32 fd;
+};
+
+/**
+ * struct gpio_v2_line_info - Information about a certain GPIO line
+ * @name: the name of this GPIO line, such as the output pin of the line on
+ * the chip, a rail or a pin header name on a board, as specified by the
+ * GPIO chip, may be empty
+ * @consumer: a functional name for the consumer of this GPIO line as set
+ * by whatever is using it, will be empty if there is no current user but
+ * may also be empty if the consumer doesn't set this up
+ * @flags: flags for the GPIO line, such as GPIO_V2_LINE_FLAG_ACTIVE_LOW,
+ * GPIO_V2_LINE_FLAG_OUTPUT etc, OR:ed together
+ * @offset: the local offset on this GPIO chip, fill this in when
+ * requesting the line information from the kernel
+ * @num_attrs: the number of attributes in attrs
+ * @attrs: the configuration attributes associated with the line
+ * @padding: reserved for future use
+ */
+struct gpio_v2_line_info {
+	char name[GPIO_MAX_NAME_SIZE];
+	char consumer[GPIO_MAX_NAME_SIZE];
+	__u32 offset;
+	__u32 num_attrs;
+	__aligned_u64 flags;
+	struct gpio_v2_line_attribute attrs[GPIO_V2_LINE_NUM_ATTRS_MAX];
+	/* Space reserved for future use. */
+	__u32 padding[4];
+};
+
+/**
+ * enum gpio_v2_line_changed_type - &struct gpio_v2_line_changed.event_type
+ * values
+ * @GPIO_V2_LINE_CHANGED_REQUESTED: line has been requested
+ * @GPIO_V2_LINE_CHANGED_RELEASED: line has been released
+ * @GPIO_V2_LINE_CHANGED_CONFIG: line has been reconfigured
+ */
+enum gpio_v2_line_changed_type {
+	GPIO_V2_LINE_CHANGED_REQUESTED	= 1,
+	GPIO_V2_LINE_CHANGED_RELEASED	= 2,
+	GPIO_V2_LINE_CHANGED_CONFIG	= 3,
+};
+
+/**
+ * struct gpio_v2_line_info_changed - Information about a change in status
+ * of a GPIO line
+ * @info: updated line information
+ * @timestamp_ns: estimate of time of status change occurrence, in nanoseconds
+ * @event_type: the type of change with a value from enum
+ * gpio_v2_line_changed_type
+ * @padding: reserved for future use
+ */
+struct gpio_v2_line_info_changed {
+	struct gpio_v2_line_info info;
+	__aligned_u64 timestamp_ns;
+	__u32 event_type;
+	/* Pad struct to 64-bit boundary and reserve space for future use. */
+	__u32 padding[5];
+};
+
+/**
+ * enum gpio_v2_line_event_id - &struct gpio_v2_line_event.id values
+ * @GPIO_V2_LINE_EVENT_RISING_EDGE: event triggered by a rising edge
+ * @GPIO_V2_LINE_EVENT_FALLING_EDGE: event triggered by a falling edge
+ */
+enum gpio_v2_line_event_id {
+	GPIO_V2_LINE_EVENT_RISING_EDGE	= 1,
+	GPIO_V2_LINE_EVENT_FALLING_EDGE	= 2,
+};
+
+/**
+ * struct gpio_v2_line_event - The actual event being pushed to userspace
+ * @timestamp_ns: best estimate of time of event occurrence, in nanoseconds.
+ * The timestamp_ns is read from CLOCK_MONOTONIC and is intended to allow the
+ * accurate measurement of the time between events.  It does not provide
+ * the wall-clock time.
+ * @id: event identifier with value from enum gpio_v2_line_event_id
+ * @offset: the offset of the line that triggered the event
+ * @seqno: the sequence number for this event in the sequence of events for
+ * all the lines in this line request
+ * @line_seqno: the sequence number for this event in the sequence of
+ * events on this particular line
+ * @padding: reserved for future use
+ */
+struct gpio_v2_line_event {
+	__aligned_u64 timestamp_ns;
+	__u32 id;
+	__u32 offset;
+	__u32 seqno;
+	__u32 line_seqno;
+	/* Space reserved for future use. */
+	__u32 padding[6];
+};
+
+/*
+ *  ABI v1
+ */
+
 /* Informational flags */
 #define GPIOLINE_FLAG_KERNEL		(1UL << 0) /* Line used by the kernel */
 #define GPIOLINE_FLAG_IS_OUT		(1UL << 1)
@@ -149,8 +411,6 @@ struct gpiohandle_config {
 	__u32 padding[4]; /* padding for future use */
 };
 
-#define GPIOHANDLE_SET_CONFIG_IOCTL _IOWR(0xB4, 0x0a, struct gpiohandle_config)
-
 /**
  * struct gpiohandle_data - Information of values on a GPIO handle
  * @values: when getting the state of lines this contains the current
@@ -161,9 +421,6 @@ struct gpiohandle_data {
 	__u8 values[GPIOHANDLES_MAX];
 };
 
-#define GPIOHANDLE_GET_LINE_VALUES_IOCTL _IOWR(0xB4, 0x08, struct gpiohandle_data)
-#define GPIOHANDLE_SET_LINE_VALUES_IOCTL _IOWR(0xB4, 0x09, struct gpiohandle_data)
-
 /* Eventrequest flags */
 #define GPIOEVENT_REQUEST_RISING_EDGE	(1UL << 0)
 #define GPIOEVENT_REQUEST_FALLING_EDGE	(1UL << 1)
@@ -207,11 +464,31 @@ struct gpioevent_data {
 	__u32 id;
 };
 
+/*
+ * v1 and v2 ioctl()s
+ */
 #define GPIO_GET_CHIPINFO_IOCTL _IOR(0xB4, 0x01, struct gpiochip_info)
+#define GPIO_GET_LINEINFO_UNWATCH_IOCTL _IOWR(0xB4, 0x0C, __u32)
+
+/*
+ * v2 ioctl()s
+ */
+#define GPIO_V2_GET_LINEINFO_IOCTL _IOWR(0xB4, 0x05, struct gpio_v2_line_info)
+#define GPIO_V2_GET_LINEINFO_WATCH_IOCTL _IOWR(0xB4, 0x06, struct gpio_v2_line_info)
+#define GPIO_V2_GET_LINE_IOCTL _IOWR(0xB4, 0x07, struct gpio_v2_line_request)
+#define GPIO_V2_LINE_SET_CONFIG_IOCTL _IOWR(0xB4, 0x0D, struct gpio_v2_line_config)
+#define GPIO_V2_LINE_GET_VALUES_IOCTL _IOWR(0xB4, 0x0E, struct gpio_v2_line_values)
+#define GPIO_V2_LINE_SET_VALUES_IOCTL _IOWR(0xB4, 0x0F, struct gpio_v2_line_values)
+
+/*
+ * v1 ioctl()s
+ */
 #define GPIO_GET_LINEINFO_IOCTL _IOWR(0xB4, 0x02, struct gpioline_info)
-#define GPIO_GET_LINEINFO_WATCH_IOCTL _IOWR(0xB4, 0x0b, struct gpioline_info)
-#define GPIO_GET_LINEINFO_UNWATCH_IOCTL _IOWR(0xB4, 0x0c, __u32)
 #define GPIO_GET_LINEHANDLE_IOCTL _IOWR(0xB4, 0x03, struct gpiohandle_request)
 #define GPIO_GET_LINEEVENT_IOCTL _IOWR(0xB4, 0x04, struct gpioevent_request)
+#define GPIOHANDLE_GET_LINE_VALUES_IOCTL _IOWR(0xB4, 0x08, struct gpiohandle_data)
+#define GPIOHANDLE_SET_LINE_VALUES_IOCTL _IOWR(0xB4, 0x09, struct gpiohandle_data)
+#define GPIOHANDLE_SET_CONFIG_IOCTL _IOWR(0xB4, 0x0A, struct gpiohandle_config)
+#define GPIO_GET_LINEINFO_WATCH_IOCTL _IOWR(0xB4, 0x0B, struct gpioline_info)
 
 #endif /* _UAPI_GPIO_H_ */
-- 
cgit v1.2.3


From b234d233fe30c63c4e461b03e2884a6765c8e5b0 Mon Sep 17 00:00:00 2001
From: Kent Gibson <warthog618@gmail.com>
Date: Mon, 28 Sep 2020 08:28:00 +0800
Subject: gpio: uapi: document uAPI v1 as deprecated

Update uAPI documentation to deprecate v1 structs and ioctls.

Signed-off-by: Kent Gibson <warthog618@gmail.com>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 include/uapi/linux/gpio.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h
index 5904f49399de..07865c601099 100644
--- a/include/uapi/linux/gpio.h
+++ b/include/uapi/linux/gpio.h
@@ -292,6 +292,9 @@ struct gpio_v2_line_event {
 
 /*
  *  ABI v1
+ *
+ * This version of the ABI is deprecated.
+ * Use the latest version of the ABI, defined above, instead.
  */
 
 /* Informational flags */
@@ -315,6 +318,9 @@ struct gpio_v2_line_event {
  * @consumer: a functional name for the consumer of this GPIO line as set by
  * whatever is using it, will be empty if there is no current user but may
  * also be empty if the consumer doesn't set this up
+ *
+ * This struct is part of ABI v1 and is deprecated.
+ * Use struct gpio_v2_line_info instead.
  */
 struct gpioline_info {
 	__u32 line_offset;
@@ -346,6 +352,9 @@ enum {
  * guarantee there are no implicit holes between it and subsequent members.
  * The 20-byte padding at the end makes sure we don't add any implicit padding
  * at the end of the structure on 64-bit architectures.
+ *
+ * This struct is part of ABI v1 and is deprecated.
+ * Use struct gpio_v2_line_info_changed instead.
  */
 struct gpioline_info_changed {
 	struct gpioline_info info;
@@ -385,6 +394,9 @@ struct gpioline_info_changed {
  * @fd: if successful this field will contain a valid anonymous file handle
  * after a GPIO_GET_LINEHANDLE_IOCTL operation, zero or negative value
  * means error
+ *
+ * This struct is part of ABI v1 and is deprecated.
+ * Use struct gpio_v2_line_request instead.
  */
 struct gpiohandle_request {
 	__u32 lineoffsets[GPIOHANDLES_MAX];
@@ -404,6 +416,9 @@ struct gpiohandle_request {
  * this specifies the default output value, should be 0 (low) or
  * 1 (high), anything else than 0 or 1 will be interpreted as 1 (high)
  * @padding: reserved for future use and should be zero filled
+ *
+ * This struct is part of ABI v1 and is deprecated.
+ * Use struct gpio_v2_line_config instead.
  */
 struct gpiohandle_config {
 	__u32 flags;
@@ -416,6 +431,9 @@ struct gpiohandle_config {
  * @values: when getting the state of lines this contains the current
  * state of a line, when setting the state of lines these should contain
  * the desired target state
+ *
+ * This struct is part of ABI v1 and is deprecated.
+ * Use struct gpio_v2_line_values instead.
  */
 struct gpiohandle_data {
 	__u8 values[GPIOHANDLES_MAX];
@@ -439,6 +457,9 @@ struct gpiohandle_data {
  * @fd: if successful this field will contain a valid anonymous file handle
  * after a GPIO_GET_LINEEVENT_IOCTL operation, zero or negative value
  * means error
+ *
+ * This struct is part of ABI v1 and is deprecated.
+ * Use struct gpio_v2_line_request instead.
  */
 struct gpioevent_request {
 	__u32 lineoffset;
@@ -458,6 +479,9 @@ struct gpioevent_request {
  * struct gpioevent_data - The actual event being pushed to userspace
  * @timestamp: best estimate of time of event occurrence, in nanoseconds
  * @id: event identifier
+ *
+ * This struct is part of ABI v1 and is deprecated.
+ * Use struct gpio_v2_line_event instead.
  */
 struct gpioevent_data {
 	__u64 timestamp;
@@ -482,6 +506,8 @@ struct gpioevent_data {
 
 /*
  * v1 ioctl()s
+ *
+ * These ioctl()s are deprecated.  Use the v2 equivalent instead.
  */
 #define GPIO_GET_LINEINFO_IOCTL _IOWR(0xB4, 0x02, struct gpioline_info)
 #define GPIO_GET_LINEHANDLE_IOCTL _IOWR(0xB4, 0x03, struct gpiohandle_request)
-- 
cgit v1.2.3


From 002f2176532093753cb6ced61e5ea7b8904c6cae Mon Sep 17 00:00:00 2001
From: "Jose M. Guisado Gomez" <guigom@riseup.net>
Date: Mon, 28 Sep 2020 14:27:10 +0200
Subject: netfilter: nf_tables: add userdata attributes to nft_chain

Enables storing userdata for nft_chain. Field udata points to user data
and udlen stores its length.

Adds new attribute flag NFTA_CHAIN_USERDATA.

Signed-off-by: Jose M. Guisado Gomez <guigom@riseup.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        |  2 ++
 include/uapi/linux/netfilter/nf_tables.h |  2 ++
 net/netfilter/nf_tables_api.c            | 33 ++++++++++++++++++++++++--------
 3 files changed, 29 insertions(+), 8 deletions(-)

(limited to 'include/uapi')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index c4c526507ddb..0bd2a081ae39 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -945,6 +945,8 @@ struct nft_chain {
 					bound:1,
 					genmask:2;
 	char				*name;
+	u16				udlen;
+	u8				*udata;
 
 	/* Only used during control plane commit phase: */
 	struct nft_rule			**rules_next;
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 3c2469b43742..352ee51707a1 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -208,6 +208,7 @@ enum nft_chain_flags {
  * @NFTA_CHAIN_COUNTERS: counter specification of the chain (NLA_NESTED: nft_counter_attributes)
  * @NFTA_CHAIN_FLAGS: chain flags
  * @NFTA_CHAIN_ID: uniquely identifies a chain in a transaction (NLA_U32)
+ * @NFTA_CHAIN_USERDATA: user data (NLA_BINARY)
  */
 enum nft_chain_attributes {
 	NFTA_CHAIN_UNSPEC,
@@ -222,6 +223,7 @@ enum nft_chain_attributes {
 	NFTA_CHAIN_PAD,
 	NFTA_CHAIN_FLAGS,
 	NFTA_CHAIN_ID,
+	NFTA_CHAIN_USERDATA,
 	__NFTA_CHAIN_MAX
 };
 #define NFTA_CHAIN_MAX		(__NFTA_CHAIN_MAX - 1)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 0473316aa392..3cfff31e4818 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1304,6 +1304,8 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
 	[NFTA_CHAIN_COUNTERS]	= { .type = NLA_NESTED },
 	[NFTA_CHAIN_FLAGS]	= { .type = NLA_U32 },
 	[NFTA_CHAIN_ID]		= { .type = NLA_U32 },
+	[NFTA_CHAIN_USERDATA]	= { .type = NLA_BINARY,
+				    .len = NFT_USERDATA_MAXLEN },
 };
 
 static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = {
@@ -1445,6 +1447,10 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
 	if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use)))
 		goto nla_put_failure;
 
+	if (chain->udata &&
+	    nla_put(skb, NFTA_CHAIN_USERDATA, chain->udlen, chain->udata))
+		goto nla_put_failure;
+
 	nlmsg_end(skb, nlh);
 	return 0;
 
@@ -1682,9 +1688,11 @@ void nf_tables_chain_destroy(struct nft_ctx *ctx)
 			free_percpu(rcu_dereference_raw(basechain->stats));
 		}
 		kfree(chain->name);
+		kfree(chain->udata);
 		kfree(basechain);
 	} else {
 		kfree(chain->name);
+		kfree(chain->udata);
 		kfree(chain);
 	}
 }
@@ -2038,7 +2046,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
 	} else {
 		if (!(flags & NFT_CHAIN_BINDING)) {
 			err = -EINVAL;
-			goto err1;
+			goto err_destroy_chain;
 		}
 
 		snprintf(name, sizeof(name), "__chain%llu", ++chain_id);
@@ -2047,13 +2055,22 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
 
 	if (!chain->name) {
 		err = -ENOMEM;
-		goto err1;
+		goto err_destroy_chain;
+	}
+
+	if (nla[NFTA_CHAIN_USERDATA]) {
+		chain->udata = nla_memdup(nla[NFTA_CHAIN_USERDATA], GFP_KERNEL);
+		if (chain->udata == NULL) {
+			err = -ENOMEM;
+			goto err_destroy_chain;
+		}
+		chain->udlen = nla_len(nla[NFTA_CHAIN_USERDATA]);
 	}
 
 	rules = nf_tables_chain_alloc_rules(chain, 0);
 	if (!rules) {
 		err = -ENOMEM;
-		goto err1;
+		goto err_destroy_chain;
 	}
 
 	*rules = NULL;
@@ -2062,12 +2079,12 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
 
 	err = nf_tables_register_hook(net, table, chain);
 	if (err < 0)
-		goto err1;
+		goto err_destroy_chain;
 
 	trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
 	if (IS_ERR(trans)) {
 		err = PTR_ERR(trans);
-		goto err2;
+		goto err_unregister_hook;
 	}
 
 	nft_trans_chain_policy(trans) = NFT_CHAIN_POLICY_UNSET;
@@ -2077,15 +2094,15 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
 	err = nft_chain_add(table, chain);
 	if (err < 0) {
 		nft_trans_destroy(trans);
-		goto err2;
+		goto err_unregister_hook;
 	}
 
 	table->use++;
 
 	return 0;
-err2:
+err_unregister_hook:
 	nf_tables_unregister_hook(net, table, chain);
-err1:
+err_destroy_chain:
 	nf_tables_chain_destroy(ctx);
 
 	return err;
-- 
cgit v1.2.3


From b426ce83baa7dff947fb354118d3133f2953aac8 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 30 Sep 2020 17:18:15 +0200
Subject: bpf: Add classid helper only based on skb->sk

Similarly to 5a52ae4e32a6 ("bpf: Allow to retrieve cgroup v1 classid
from v2 hooks"), add a helper to retrieve cgroup v1 classid solely
based on the skb->sk, so it can be used as key as part of BPF map
lookups out of tc from host ns, in particular given the skb->sk is
retained these days when crossing net ns thanks to 9c4c325252c5
("skbuff: preserve sock reference when scrubbing the skb."). This
is similar to bpf_skb_cgroup_id() which implements the same for v2.
Kubernetes ecosystem is still operating on v1 however, hence net_cls
needs to be used there until this can be dropped in with the v2
helper of bpf_skb_cgroup_id().

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/ed633cf27a1c620e901c5aa99ebdefb028dce600.1601477936.git.daniel@iogearbox.net
---
 include/uapi/linux/bpf.h       | 10 ++++++++++
 net/core/filter.c              | 21 +++++++++++++++++++++
 tools/include/uapi/linux/bpf.h | 10 ++++++++++
 3 files changed, 41 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2b1d3f16cbd1..6116a7f54c8f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3643,6 +3643,15 @@ union bpf_attr {
  *		*flags* are identical to those used for bpf_snprintf_btf.
  *	Return
  *		0 on success or a negative error in case of failure.
+ *
+ * u64 bpf_skb_cgroup_classid(struct sk_buff *skb)
+ * 	Description
+ * 		See **bpf_get_cgroup_classid**\ () for the main description.
+ * 		This helper differs from **bpf_get_cgroup_classid**\ () in that
+ * 		the cgroup v1 net_cls class is retrieved only from the *skb*'s
+ * 		associated socket instead of the current process.
+ * 	Return
+ * 		The id is returned or 0 in case the id could not be retrieved.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3796,6 +3805,7 @@ union bpf_attr {
 	FN(copy_from_user),		\
 	FN(snprintf_btf),		\
 	FN(seq_printf_btf),		\
+	FN(skb_cgroup_classid),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/net/core/filter.c b/net/core/filter.c
index af88935e24b1..fa01c697977d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2707,6 +2707,23 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto = {
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 };
+
+BPF_CALL_1(bpf_skb_cgroup_classid, const struct sk_buff *, skb)
+{
+	struct sock *sk = skb_to_full_sk(skb);
+
+	if (!sk || !sk_fullsock(sk))
+		return 0;
+
+	return sock_cgroup_classid(&sk->sk_cgrp_data);
+}
+
+static const struct bpf_func_proto bpf_skb_cgroup_classid_proto = {
+	.func		= bpf_skb_cgroup_classid,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+};
 #endif
 
 BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
@@ -6772,6 +6789,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_skb_get_xfrm_state:
 		return &bpf_skb_get_xfrm_state_proto;
 #endif
+#ifdef CONFIG_CGROUP_NET_CLASSID
+	case BPF_FUNC_skb_cgroup_classid:
+		return &bpf_skb_cgroup_classid_proto;
+#endif
 #ifdef CONFIG_SOCK_CGROUP_DATA
 	case BPF_FUNC_skb_cgroup_id:
 		return &bpf_skb_cgroup_id_proto;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 2b1d3f16cbd1..6116a7f54c8f 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3643,6 +3643,15 @@ union bpf_attr {
  *		*flags* are identical to those used for bpf_snprintf_btf.
  *	Return
  *		0 on success or a negative error in case of failure.
+ *
+ * u64 bpf_skb_cgroup_classid(struct sk_buff *skb)
+ * 	Description
+ * 		See **bpf_get_cgroup_classid**\ () for the main description.
+ * 		This helper differs from **bpf_get_cgroup_classid**\ () in that
+ * 		the cgroup v1 net_cls class is retrieved only from the *skb*'s
+ * 		associated socket instead of the current process.
+ * 	Return
+ * 		The id is returned or 0 in case the id could not be retrieved.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3796,6 +3805,7 @@ union bpf_attr {
 	FN(copy_from_user),		\
 	FN(snprintf_btf),		\
 	FN(seq_printf_btf),		\
+	FN(skb_cgroup_classid),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From b4ab31414970a7a03a5d55d75083f2c101a30592 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 30 Sep 2020 17:18:17 +0200
Subject: bpf: Add redirect_neigh helper as redirect drop-in

Add a redirect_neigh() helper as redirect() drop-in replacement
for the xmit side. Main idea for the helper is to be very similar
in semantics to the latter just that the skb gets injected into
the neighboring subsystem in order to let the stack do the work
it knows best anyway to populate the L2 addresses of the packet
and then hand over to dev_queue_xmit() as redirect() does.

This solves two bigger items: i) skbs don't need to go up to the
stack on the host facing veth ingress side for traffic egressing
the container to achieve the same for populating L2 which also
has the huge advantage that ii) the skb->sk won't get orphaned in
ip_rcv_core() when entering the IP routing layer on the host stack.

Given that skb->sk neither gets orphaned when crossing the netns
as per 9c4c325252c5 ("skbuff: preserve sock reference when scrubbing
the skb.") the helper can then push the skbs directly to the phys
device where FQ scheduler can do its work and TCP stack gets proper
backpressure given we hold on to skb->sk as long as skb is still
residing in queues.

With the helper used in BPF data path to then push the skb to the
phys device, I observed a stable/consistent TCP_STREAM improvement
on veth devices for traffic going container -> host -> host ->
container from ~10Gbps to ~15Gbps for a single stream in my test
environment.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: David Ahern <dsahern@gmail.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Cc: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/bpf/f207de81629e1724899b73b8112e0013be782d35.1601477936.git.daniel@iogearbox.net
---
 include/linux/skbuff.h         |   5 +
 include/uapi/linux/bpf.h       |  14 +++
 net/core/filter.c              | 276 ++++++++++++++++++++++++++++++++++++++---
 tools/include/uapi/linux/bpf.h |  14 +++
 4 files changed, 294 insertions(+), 15 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 04a18e01b362..3d0cf3722bb4 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2548,6 +2548,11 @@ static inline int skb_mac_header_was_set(const struct sk_buff *skb)
 	return skb->mac_header != (typeof(skb->mac_header))~0U;
 }
 
+static inline void skb_unset_mac_header(struct sk_buff *skb)
+{
+	skb->mac_header = (typeof(skb->mac_header))~0U;
+}
+
 static inline void skb_reset_mac_header(struct sk_buff *skb)
 {
 	skb->mac_header = skb->data - skb->head;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6116a7f54c8f..1f17c6752deb 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3652,6 +3652,19 @@ union bpf_attr {
  * 		associated socket instead of the current process.
  * 	Return
  * 		The id is returned or 0 in case the id could not be retrieved.
+ *
+ * long bpf_redirect_neigh(u32 ifindex, u64 flags)
+ * 	Description
+ * 		Redirect the packet to another net device of index *ifindex*
+ * 		and fill in L2 addresses from neighboring subsystem. This helper
+ * 		is somewhat similar to **bpf_redirect**\ (), except that it
+ * 		fills in e.g. MAC addresses based on the L3 information from
+ * 		the packet. This helper is supported for IPv4 and IPv6 protocols.
+ * 		The *flags* argument is reserved and must be 0. The helper is
+ * 		currently only supported for tc BPF program types.
+ * 	Return
+ * 		The helper returns **TC_ACT_REDIRECT** on success or
+ * 		**TC_ACT_SHOT** on error.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3806,6 +3819,7 @@ union bpf_attr {
 	FN(snprintf_btf),		\
 	FN(seq_printf_btf),		\
 	FN(skb_cgroup_classid),		\
+	FN(redirect_neigh),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/net/core/filter.c b/net/core/filter.c
index a0776e48dcc9..3fb6adad1957 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2163,13 +2163,233 @@ static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
 		return __bpf_redirect_no_mac(skb, dev, flags);
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+	struct net_device *dev = dst->dev;
+	u32 hh_len = LL_RESERVED_SPACE(dev);
+	const struct in6_addr *nexthop;
+	struct neighbour *neigh;
+
+	if (dev_xmit_recursion()) {
+		net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
+		goto out_drop;
+	}
+
+	skb->dev = dev;
+	skb->tstamp = 0;
+
+	if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
+		struct sk_buff *skb2;
+
+		skb2 = skb_realloc_headroom(skb, hh_len);
+		if (unlikely(!skb2)) {
+			kfree_skb(skb);
+			return -ENOMEM;
+		}
+		if (skb->sk)
+			skb_set_owner_w(skb2, skb->sk);
+		consume_skb(skb);
+		skb = skb2;
+	}
+
+	rcu_read_lock_bh();
+	nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
+			      &ipv6_hdr(skb)->daddr);
+	neigh = ip_neigh_gw6(dev, nexthop);
+	if (likely(!IS_ERR(neigh))) {
+		int ret;
+
+		sock_confirm_neigh(skb, neigh);
+		dev_xmit_recursion_inc();
+		ret = neigh_output(neigh, skb, false);
+		dev_xmit_recursion_dec();
+		rcu_read_unlock_bh();
+		return ret;
+	}
+	rcu_read_unlock_bh();
+	IP6_INC_STATS(dev_net(dst->dev),
+		      ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+out_drop:
+	kfree_skb(skb);
+	return -ENETDOWN;
+}
+
+static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
+{
+	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	struct net *net = dev_net(dev);
+	int err, ret = NET_XMIT_DROP;
+	struct dst_entry *dst;
+	struct flowi6 fl6 = {
+		.flowi6_flags	= FLOWI_FLAG_ANYSRC,
+		.flowi6_mark	= skb->mark,
+		.flowlabel	= ip6_flowinfo(ip6h),
+		.flowi6_oif	= dev->ifindex,
+		.flowi6_proto	= ip6h->nexthdr,
+		.daddr		= ip6h->daddr,
+		.saddr		= ip6h->saddr,
+	};
+
+	dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
+	if (IS_ERR(dst))
+		goto out_drop;
+
+	skb_dst_set(skb, dst);
+
+	err = bpf_out_neigh_v6(net, skb);
+	if (unlikely(net_xmit_eval(err)))
+		dev->stats.tx_errors++;
+	else
+		ret = NET_XMIT_SUCCESS;
+	goto out_xmit;
+out_drop:
+	dev->stats.tx_errors++;
+	kfree_skb(skb);
+out_xmit:
+	return ret;
+}
+#else
+static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
+{
+	kfree_skb(skb);
+	return NET_XMIT_DROP;
+}
+#endif /* CONFIG_IPV6 */
+
+#if IS_ENABLED(CONFIG_INET)
+static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+	struct rtable *rt = container_of(dst, struct rtable, dst);
+	struct net_device *dev = dst->dev;
+	u32 hh_len = LL_RESERVED_SPACE(dev);
+	struct neighbour *neigh;
+	bool is_v6gw = false;
+
+	if (dev_xmit_recursion()) {
+		net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
+		goto out_drop;
+	}
+
+	skb->dev = dev;
+	skb->tstamp = 0;
+
+	if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
+		struct sk_buff *skb2;
+
+		skb2 = skb_realloc_headroom(skb, hh_len);
+		if (unlikely(!skb2)) {
+			kfree_skb(skb);
+			return -ENOMEM;
+		}
+		if (skb->sk)
+			skb_set_owner_w(skb2, skb->sk);
+		consume_skb(skb);
+		skb = skb2;
+	}
+
+	rcu_read_lock_bh();
+	neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
+	if (likely(!IS_ERR(neigh))) {
+		int ret;
+
+		sock_confirm_neigh(skb, neigh);
+		dev_xmit_recursion_inc();
+		ret = neigh_output(neigh, skb, is_v6gw);
+		dev_xmit_recursion_dec();
+		rcu_read_unlock_bh();
+		return ret;
+	}
+	rcu_read_unlock_bh();
+out_drop:
+	kfree_skb(skb);
+	return -ENETDOWN;
+}
+
+static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev)
+{
+	const struct iphdr *ip4h = ip_hdr(skb);
+	struct net *net = dev_net(dev);
+	int err, ret = NET_XMIT_DROP;
+	struct rtable *rt;
+	struct flowi4 fl4 = {
+		.flowi4_flags	= FLOWI_FLAG_ANYSRC,
+		.flowi4_mark	= skb->mark,
+		.flowi4_tos	= RT_TOS(ip4h->tos),
+		.flowi4_oif	= dev->ifindex,
+		.flowi4_proto	= ip4h->protocol,
+		.daddr		= ip4h->daddr,
+		.saddr		= ip4h->saddr,
+	};
+
+	rt = ip_route_output_flow(net, &fl4, NULL);
+	if (IS_ERR(rt))
+		goto out_drop;
+	if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
+		ip_rt_put(rt);
+		goto out_drop;
+	}
+
+	skb_dst_set(skb, &rt->dst);
+
+	err = bpf_out_neigh_v4(net, skb);
+	if (unlikely(net_xmit_eval(err)))
+		dev->stats.tx_errors++;
+	else
+		ret = NET_XMIT_SUCCESS;
+	goto out_xmit;
+out_drop:
+	dev->stats.tx_errors++;
+	kfree_skb(skb);
+out_xmit:
+	return ret;
+}
+#else
+static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev)
+{
+	kfree_skb(skb);
+	return NET_XMIT_DROP;
+}
+#endif /* CONFIG_INET */
+
+static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ethhdr *ethh = eth_hdr(skb);
+
+	if (unlikely(skb->mac_header >= skb->network_header))
+		goto out;
+	bpf_push_mac_rcsum(skb);
+	if (is_multicast_ether_addr(ethh->h_dest))
+		goto out;
+
+	skb_pull(skb, sizeof(*ethh));
+	skb_unset_mac_header(skb);
+	skb_reset_network_header(skb);
+
+	if (skb->protocol == htons(ETH_P_IP))
+		return __bpf_redirect_neigh_v4(skb, dev);
+	else if (skb->protocol == htons(ETH_P_IPV6))
+		return __bpf_redirect_neigh_v6(skb, dev);
+out:
+	kfree_skb(skb);
+	return -ENOTSUPP;
+}
+
+/* Internal, non-exposed redirect flags. */
+enum {
+	BPF_F_NEIGH = (1ULL << 1),
+#define BPF_F_REDIRECT_INTERNAL	(BPF_F_NEIGH)
+};
+
 BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
 {
 	struct net_device *dev;
 	struct sk_buff *clone;
 	int ret;
 
-	if (unlikely(flags & ~(BPF_F_INGRESS)))
+	if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL)))
 		return -EINVAL;
 
 	dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
@@ -2206,23 +2426,11 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = {
 DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
 EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
 
-BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
-{
-	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
-
-	if (unlikely(flags & ~(BPF_F_INGRESS)))
-		return TC_ACT_SHOT;
-
-	ri->flags = flags;
-	ri->tgt_index = ifindex;
-
-	return TC_ACT_REDIRECT;
-}
-
 int skb_do_redirect(struct sk_buff *skb)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 	struct net_device *dev;
+	u32 flags = ri->flags;
 
 	dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->tgt_index);
 	ri->tgt_index = 0;
@@ -2231,7 +2439,22 @@ int skb_do_redirect(struct sk_buff *skb)
 		return -EINVAL;
 	}
 
-	return __bpf_redirect(skb, dev, ri->flags);
+	return flags & BPF_F_NEIGH ?
+	       __bpf_redirect_neigh(skb, dev) :
+	       __bpf_redirect(skb, dev, flags);
+}
+
+BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
+{
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+	if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL)))
+		return TC_ACT_SHOT;
+
+	ri->flags = flags;
+	ri->tgt_index = ifindex;
+
+	return TC_ACT_REDIRECT;
 }
 
 static const struct bpf_func_proto bpf_redirect_proto = {
@@ -2242,6 +2465,27 @@ static const struct bpf_func_proto bpf_redirect_proto = {
 	.arg2_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_2(bpf_redirect_neigh, u32, ifindex, u64, flags)
+{
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+	if (unlikely(flags))
+		return TC_ACT_SHOT;
+
+	ri->flags = BPF_F_NEIGH;
+	ri->tgt_index = ifindex;
+
+	return TC_ACT_REDIRECT;
+}
+
+static const struct bpf_func_proto bpf_redirect_neigh_proto = {
+	.func		= bpf_redirect_neigh,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_ANYTHING,
+	.arg2_type	= ARG_ANYTHING,
+};
+
 BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes)
 {
 	msg->apply_bytes = bytes;
@@ -6759,6 +7003,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return bpf_get_skb_set_tunnel_proto(func_id);
 	case BPF_FUNC_redirect:
 		return &bpf_redirect_proto;
+	case BPF_FUNC_redirect_neigh:
+		return &bpf_redirect_neigh_proto;
 	case BPF_FUNC_get_route_realm:
 		return &bpf_get_route_realm_proto;
 	case BPF_FUNC_get_hash_recalc:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 6116a7f54c8f..1f17c6752deb 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3652,6 +3652,19 @@ union bpf_attr {
  * 		associated socket instead of the current process.
  * 	Return
  * 		The id is returned or 0 in case the id could not be retrieved.
+ *
+ * long bpf_redirect_neigh(u32 ifindex, u64 flags)
+ * 	Description
+ * 		Redirect the packet to another net device of index *ifindex*
+ * 		and fill in L2 addresses from neighboring subsystem. This helper
+ * 		is somewhat similar to **bpf_redirect**\ (), except that it
+ * 		fills in e.g. MAC addresses based on the L3 information from
+ * 		the packet. This helper is supported for IPv4 and IPv6 protocols.
+ * 		The *flags* argument is reserved and must be 0. The helper is
+ * 		currently only supported for tc BPF program types.
+ * 	Return
+ * 		The helper returns **TC_ACT_REDIRECT** on success or
+ * 		**TC_ACT_SHOT** on error.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3806,6 +3819,7 @@ union bpf_attr {
 	FN(snprintf_btf),		\
 	FN(seq_printf_btf),		\
 	FN(skb_cgroup_classid),		\
+	FN(redirect_neigh),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 9d4a75efa200a31deabe9ba1c941aef697e6bb30 Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Thu, 27 Aug 2020 16:58:29 +0200
Subject: io_uring: use an enumeration for io_uring_register(2) opcodes

The enumeration allows us to keep track of the last
io_uring_register(2) opcode available.

Behaviour and opcodes names don't change.

Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index d65fde732518..5f12ae6a415c 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -255,17 +255,22 @@ struct io_uring_params {
 /*
  * io_uring_register(2) opcodes and arguments
  */
-#define IORING_REGISTER_BUFFERS		0
-#define IORING_UNREGISTER_BUFFERS	1
-#define IORING_REGISTER_FILES		2
-#define IORING_UNREGISTER_FILES		3
-#define IORING_REGISTER_EVENTFD		4
-#define IORING_UNREGISTER_EVENTFD	5
-#define IORING_REGISTER_FILES_UPDATE	6
-#define IORING_REGISTER_EVENTFD_ASYNC	7
-#define IORING_REGISTER_PROBE		8
-#define IORING_REGISTER_PERSONALITY	9
-#define IORING_UNREGISTER_PERSONALITY	10
+enum {
+	IORING_REGISTER_BUFFERS			= 0,
+	IORING_UNREGISTER_BUFFERS		= 1,
+	IORING_REGISTER_FILES			= 2,
+	IORING_UNREGISTER_FILES			= 3,
+	IORING_REGISTER_EVENTFD			= 4,
+	IORING_UNREGISTER_EVENTFD		= 5,
+	IORING_REGISTER_FILES_UPDATE		= 6,
+	IORING_REGISTER_EVENTFD_ASYNC		= 7,
+	IORING_REGISTER_PROBE			= 8,
+	IORING_REGISTER_PERSONALITY		= 9,
+	IORING_UNREGISTER_PERSONALITY		= 10,
+
+	/* this goes last */
+	IORING_REGISTER_LAST
+};
 
 struct io_uring_files_update {
 	__u32 offset;
-- 
cgit v1.2.3


From 21b55dbc0653018b8cd4513c37cbca303b0f0d50 Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Thu, 27 Aug 2020 16:58:30 +0200
Subject: io_uring: add IOURING_REGISTER_RESTRICTIONS opcode

The new io_uring_register(2) IOURING_REGISTER_RESTRICTIONS opcode
permanently installs a feature allowlist on an io_ring_ctx.
The io_ring_ctx can then be passed to untrusted code with the
knowledge that only operations present in the allowlist can be
executed.

The allowlist approach ensures that new features added to io_uring
do not accidentally become available when an existing application
is launched on a newer kernel version.

Currently is it possible to restrict sqe opcodes, sqe flags, and
register opcodes.

IOURING_REGISTER_RESTRICTIONS can only be made once. Afterwards
it is not possible to change restrictions anymore.
This prevents untrusted code from removing restrictions.

Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                 | 124 +++++++++++++++++++++++++++++++++++++++++-
 include/uapi/linux/io_uring.h |  31 +++++++++++
 2 files changed, 154 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 05ec385a6094..c4855cecc8f3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -99,6 +99,8 @@
 #define IORING_MAX_FILES_TABLE	(1U << IORING_FILE_TABLE_SHIFT)
 #define IORING_FILE_TABLE_MASK	(IORING_MAX_FILES_TABLE - 1)
 #define IORING_MAX_FIXED_FILES	(64 * IORING_MAX_FILES_TABLE)
+#define IORING_MAX_RESTRICTIONS	(IORING_RESTRICTION_LAST + \
+				 IORING_REGISTER_LAST + IORING_OP_LAST)
 
 struct io_uring {
 	u32 head ____cacheline_aligned_in_smp;
@@ -220,6 +222,13 @@ struct io_buffer {
 	__u16 bid;
 };
 
+struct io_restriction {
+	DECLARE_BITMAP(register_op, IORING_REGISTER_LAST);
+	DECLARE_BITMAP(sqe_op, IORING_OP_LAST);
+	u8 sqe_flags_allowed;
+	u8 sqe_flags_required;
+};
+
 struct io_ring_ctx {
 	struct {
 		struct percpu_ref	refs;
@@ -232,6 +241,7 @@ struct io_ring_ctx {
 		unsigned int		cq_overflow_flushed: 1;
 		unsigned int		drain_next: 1;
 		unsigned int		eventfd_async: 1;
+		unsigned int		restricted: 1;
 
 		/*
 		 * Ring buffer of indices into array of io_uring_sqe, which is
@@ -346,6 +356,7 @@ struct io_ring_ctx {
 	struct llist_head		file_put_llist;
 
 	struct work_struct		exit_work;
+	struct io_restriction		restrictions;
 };
 
 /*
@@ -6438,6 +6449,32 @@ static inline void io_consume_sqe(struct io_ring_ctx *ctx)
 	ctx->cached_sq_head++;
 }
 
+/*
+ * Check SQE restrictions (opcode and flags).
+ *
+ * Returns 'true' if SQE is allowed, 'false' otherwise.
+ */
+static inline bool io_check_restriction(struct io_ring_ctx *ctx,
+					struct io_kiocb *req,
+					unsigned int sqe_flags)
+{
+	if (!ctx->restricted)
+		return true;
+
+	if (!test_bit(req->opcode, ctx->restrictions.sqe_op))
+		return false;
+
+	if ((sqe_flags & ctx->restrictions.sqe_flags_required) !=
+	    ctx->restrictions.sqe_flags_required)
+		return false;
+
+	if (sqe_flags & ~(ctx->restrictions.sqe_flags_allowed |
+			  ctx->restrictions.sqe_flags_required))
+		return false;
+
+	return true;
+}
+
 #define SQE_VALID_FLAGS	(IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK|	\
 				IOSQE_IO_HARDLINK | IOSQE_ASYNC | \
 				IOSQE_BUFFER_SELECT)
@@ -6473,6 +6510,9 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	if (unlikely(sqe_flags & ~SQE_VALID_FLAGS))
 		return -EINVAL;
 
+	if (unlikely(!io_check_restriction(ctx, req, sqe_flags)))
+		return -EACCES;
+
 	if ((sqe_flags & IOSQE_BUFFER_SELECT) &&
 	    !io_op_defs[req->opcode].buffer_select)
 		return -EOPNOTSUPP;
@@ -9077,6 +9117,72 @@ static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
 	return -EINVAL;
 }
 
+static int io_register_restrictions(struct io_ring_ctx *ctx, void __user *arg,
+				    unsigned int nr_args)
+{
+	struct io_uring_restriction *res;
+	size_t size;
+	int i, ret;
+
+	/* We allow only a single restrictions registration */
+	if (ctx->restricted)
+		return -EBUSY;
+
+	if (!arg || nr_args > IORING_MAX_RESTRICTIONS)
+		return -EINVAL;
+
+	size = array_size(nr_args, sizeof(*res));
+	if (size == SIZE_MAX)
+		return -EOVERFLOW;
+
+	res = memdup_user(arg, size);
+	if (IS_ERR(res))
+		return PTR_ERR(res);
+
+	ret = 0;
+
+	for (i = 0; i < nr_args; i++) {
+		switch (res[i].opcode) {
+		case IORING_RESTRICTION_REGISTER_OP:
+			if (res[i].register_op >= IORING_REGISTER_LAST) {
+				ret = -EINVAL;
+				goto out;
+			}
+
+			__set_bit(res[i].register_op,
+				  ctx->restrictions.register_op);
+			break;
+		case IORING_RESTRICTION_SQE_OP:
+			if (res[i].sqe_op >= IORING_OP_LAST) {
+				ret = -EINVAL;
+				goto out;
+			}
+
+			__set_bit(res[i].sqe_op, ctx->restrictions.sqe_op);
+			break;
+		case IORING_RESTRICTION_SQE_FLAGS_ALLOWED:
+			ctx->restrictions.sqe_flags_allowed = res[i].sqe_flags;
+			break;
+		case IORING_RESTRICTION_SQE_FLAGS_REQUIRED:
+			ctx->restrictions.sqe_flags_required = res[i].sqe_flags;
+			break;
+		default:
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+
+out:
+	/* Reset all restrictions if an error happened */
+	if (ret != 0)
+		memset(&ctx->restrictions, 0, sizeof(ctx->restrictions));
+	else
+		ctx->restricted = 1;
+
+	kfree(res);
+	return ret;
+}
+
 static bool io_register_op_must_quiesce(int op)
 {
 	switch (op) {
@@ -9123,6 +9229,18 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 		if (ret) {
 			percpu_ref_resurrect(&ctx->refs);
 			ret = -EINTR;
+			goto out_quiesce;
+		}
+	}
+
+	if (ctx->restricted) {
+		if (opcode >= IORING_REGISTER_LAST) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		if (!test_bit(opcode, ctx->restrictions.register_op)) {
+			ret = -EACCES;
 			goto out;
 		}
 	}
@@ -9186,15 +9304,19 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 			break;
 		ret = io_unregister_personality(ctx, nr_args);
 		break;
+	case IORING_REGISTER_RESTRICTIONS:
+		ret = io_register_restrictions(ctx, arg, nr_args);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
 	}
 
+out:
 	if (io_register_op_must_quiesce(opcode)) {
 		/* bring the ctx back to life */
 		percpu_ref_reinit(&ctx->refs);
-out:
+out_quiesce:
 		reinit_completion(&ctx->ref_comp);
 	}
 	return ret;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 5f12ae6a415c..6e7f2e5e917b 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -267,6 +267,7 @@ enum {
 	IORING_REGISTER_PROBE			= 8,
 	IORING_REGISTER_PERSONALITY		= 9,
 	IORING_UNREGISTER_PERSONALITY		= 10,
+	IORING_REGISTER_RESTRICTIONS		= 11,
 
 	/* this goes last */
 	IORING_REGISTER_LAST
@@ -295,4 +296,34 @@ struct io_uring_probe {
 	struct io_uring_probe_op ops[0];
 };
 
+struct io_uring_restriction {
+	__u16 opcode;
+	union {
+		__u8 register_op; /* IORING_RESTRICTION_REGISTER_OP */
+		__u8 sqe_op;      /* IORING_RESTRICTION_SQE_OP */
+		__u8 sqe_flags;   /* IORING_RESTRICTION_SQE_FLAGS_* */
+	};
+	__u8 resv;
+	__u32 resv2[3];
+};
+
+/*
+ * io_uring_restriction->opcode values
+ */
+enum {
+	/* Allow an io_uring_register(2) opcode */
+	IORING_RESTRICTION_REGISTER_OP		= 0,
+
+	/* Allow an sqe opcode */
+	IORING_RESTRICTION_SQE_OP		= 1,
+
+	/* Allow sqe flags */
+	IORING_RESTRICTION_SQE_FLAGS_ALLOWED	= 2,
+
+	/* Require sqe flags (these flags must be set on each submission) */
+	IORING_RESTRICTION_SQE_FLAGS_REQUIRED	= 3,
+
+	IORING_RESTRICTION_LAST
+};
+
 #endif
-- 
cgit v1.2.3


From 7e84e1c7566a1df470a9e1f49d3db2ce311261a4 Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Thu, 27 Aug 2020 16:58:31 +0200
Subject: io_uring: allow disabling rings during the creation

This patch adds a new IORING_SETUP_R_DISABLED flag to start the
rings disabled, allowing the user to register restrictions,
buffers, files, before to start processing SQEs.

When IORING_SETUP_R_DISABLED is set, SQE are not processed and
SQPOLL kthread is not started.

The restrictions registration are allowed only when the rings
are disable to prevent concurrency issue while processing SQEs.

The rings can be enabled using IORING_REGISTER_ENABLE_RINGS
opcode with io_uring_register(2).

Suggested-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                 | 61 ++++++++++++++++++++++++++++++++++++++-----
 include/uapi/linux/io_uring.h |  2 ++
 2 files changed, 56 insertions(+), 7 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/io_uring.c b/fs/io_uring.c
index c4855cecc8f3..b8fdb10c23e3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -227,6 +227,7 @@ struct io_restriction {
 	DECLARE_BITMAP(sqe_op, IORING_OP_LAST);
 	u8 sqe_flags_allowed;
 	u8 sqe_flags_required;
+	bool registered;
 };
 
 struct io_ring_ctx {
@@ -6910,6 +6911,14 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 static void io_sq_thread_stop(struct io_ring_ctx *ctx)
 {
 	if (ctx->sqo_thread) {
+		/*
+		 * We may arrive here from the error branch in
+		 * io_sq_offload_create() where the kthread is created
+		 * without being waked up, thus wake it up now to make
+		 * sure the wait will complete.
+		 */
+		wake_up_process(ctx->sqo_thread);
+
 		wait_for_completion(&ctx->sq_thread_comp);
 		/*
 		 * The park is a bit of a work-around, without it we get
@@ -7581,8 +7590,8 @@ void __io_uring_free(struct task_struct *tsk)
 	tsk->io_uring = NULL;
 }
 
-static int io_sq_offload_start(struct io_ring_ctx *ctx,
-			       struct io_uring_params *p)
+static int io_sq_offload_create(struct io_ring_ctx *ctx,
+				struct io_uring_params *p)
 {
 	int ret;
 
@@ -7619,7 +7628,6 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
 		ret = io_uring_alloc_task_context(ctx->sqo_thread);
 		if (ret)
 			goto err;
-		wake_up_process(ctx->sqo_thread);
 	} else if (p->flags & IORING_SETUP_SQ_AFF) {
 		/* Can't have SQ_AFF without SQPOLL */
 		ret = -EINVAL;
@@ -7636,6 +7644,12 @@ err:
 	return ret;
 }
 
+static void io_sq_offload_start(struct io_ring_ctx *ctx)
+{
+	if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sqo_thread)
+		wake_up_process(ctx->sqo_thread);
+}
+
 static inline void __io_unaccount_mem(struct user_struct *user,
 				      unsigned long nr_pages)
 {
@@ -8633,6 +8647,10 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 	if (!percpu_ref_tryget(&ctx->refs))
 		goto out_fput;
 
+	ret = -EBADFD;
+	if (ctx->flags & IORING_SETUP_R_DISABLED)
+		goto out;
+
 	/*
 	 * For SQ polling, the thread will do all submissions and completions.
 	 * Just return the requested submit count, and wake the thread if
@@ -8975,10 +8993,13 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
 	if (ret)
 		goto err;
 
-	ret = io_sq_offload_start(ctx, p);
+	ret = io_sq_offload_create(ctx, p);
 	if (ret)
 		goto err;
 
+	if (!(p->flags & IORING_SETUP_R_DISABLED))
+		io_sq_offload_start(ctx);
+
 	memset(&p->sq_off, 0, sizeof(p->sq_off));
 	p->sq_off.head = offsetof(struct io_rings, sq.head);
 	p->sq_off.tail = offsetof(struct io_rings, sq.tail);
@@ -9041,7 +9062,8 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
 
 	if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
 			IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE |
-			IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ))
+			IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ |
+			IORING_SETUP_R_DISABLED))
 		return -EINVAL;
 
 	return  io_uring_create(entries, &p, params);
@@ -9124,8 +9146,12 @@ static int io_register_restrictions(struct io_ring_ctx *ctx, void __user *arg,
 	size_t size;
 	int i, ret;
 
+	/* Restrictions allowed only if rings started disabled */
+	if (!(ctx->flags & IORING_SETUP_R_DISABLED))
+		return -EBADFD;
+
 	/* We allow only a single restrictions registration */
-	if (ctx->restricted)
+	if (ctx->restrictions.registered)
 		return -EBUSY;
 
 	if (!arg || nr_args > IORING_MAX_RESTRICTIONS)
@@ -9177,12 +9203,27 @@ out:
 	if (ret != 0)
 		memset(&ctx->restrictions, 0, sizeof(ctx->restrictions));
 	else
-		ctx->restricted = 1;
+		ctx->restrictions.registered = true;
 
 	kfree(res);
 	return ret;
 }
 
+static int io_register_enable_rings(struct io_ring_ctx *ctx)
+{
+	if (!(ctx->flags & IORING_SETUP_R_DISABLED))
+		return -EBADFD;
+
+	if (ctx->restrictions.registered)
+		ctx->restricted = 1;
+
+	ctx->flags &= ~IORING_SETUP_R_DISABLED;
+
+	io_sq_offload_start(ctx);
+
+	return 0;
+}
+
 static bool io_register_op_must_quiesce(int op)
 {
 	switch (op) {
@@ -9304,6 +9345,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 			break;
 		ret = io_unregister_personality(ctx, nr_args);
 		break;
+	case IORING_REGISTER_ENABLE_RINGS:
+		ret = -EINVAL;
+		if (arg || nr_args)
+			break;
+		ret = io_register_enable_rings(ctx);
+		break;
 	case IORING_REGISTER_RESTRICTIONS:
 		ret = io_register_restrictions(ctx, arg, nr_args);
 		break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 6e7f2e5e917b..a0c85e0e9016 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -95,6 +95,7 @@ enum {
 #define IORING_SETUP_CQSIZE	(1U << 3)	/* app defines CQ size */
 #define IORING_SETUP_CLAMP	(1U << 4)	/* clamp SQ/CQ ring sizes */
 #define IORING_SETUP_ATTACH_WQ	(1U << 5)	/* attach to existing wq */
+#define IORING_SETUP_R_DISABLED	(1U << 6)	/* start with ring disabled */
 
 enum {
 	IORING_OP_NOP,
@@ -268,6 +269,7 @@ enum {
 	IORING_REGISTER_PERSONALITY		= 9,
 	IORING_UNREGISTER_PERSONALITY		= 10,
 	IORING_REGISTER_RESTRICTIONS		= 11,
+	IORING_REGISTER_ENABLE_RINGS		= 12,
 
 	/* this goes last */
 	IORING_REGISTER_LAST
-- 
cgit v1.2.3


From 90554200724d5b280439dc361fe7ee92fe459ea7 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 3 Sep 2020 12:12:41 -0600
Subject: io_uring: provide IORING_ENTER_SQ_WAIT for SQPOLL SQ ring waits

When using SQPOLL, applications can run into the issue of running out of
SQ ring entries because the thread hasn't consumed them yet. The only
option for dealing with that is checking later, or busy checking for the
condition.

Provide IORING_ENTER_SQ_WAIT if applications want to wait on this
condition.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                 | 40 +++++++++++++++++++++++++++++++++++++---
 include/uapi/linux/io_uring.h |  1 +
 2 files changed, 38 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/io_uring.c b/fs/io_uring.c
index a924ab1cf15b..3ee6ee1785d2 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -301,6 +301,7 @@ struct io_ring_ctx {
 
 	struct io_sq_data	*sq_data;	/* if using sq thread polling */
 
+	struct wait_queue_head	sqo_sq_wait;
 	struct wait_queue_entry	sqo_wait_entry;
 	struct list_head	sqd_list;
 
@@ -1072,6 +1073,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 		goto err;
 
 	ctx->flags = p->flags;
+	init_waitqueue_head(&ctx->sqo_sq_wait);
 	INIT_LIST_HEAD(&ctx->sqd_list);
 	init_waitqueue_head(&ctx->cq_wait);
 	INIT_LIST_HEAD(&ctx->cq_overflow_list);
@@ -1324,6 +1326,13 @@ static void io_commit_cqring(struct io_ring_ctx *ctx)
 		__io_queue_deferred(ctx);
 }
 
+static inline bool io_sqring_full(struct io_ring_ctx *ctx)
+{
+	struct io_rings *r = ctx->rings;
+
+	return READ_ONCE(r->sq.tail) - ctx->cached_sq_head == r->sq_ring_entries;
+}
+
 static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
 {
 	struct io_rings *rings = ctx->rings;
@@ -6736,6 +6745,10 @@ again:
 	if (likely(!percpu_ref_is_dying(&ctx->refs)))
 		ret = io_submit_sqes(ctx, to_submit);
 	mutex_unlock(&ctx->uring_lock);
+
+	if (!io_sqring_full(ctx) && wq_has_sleeper(&ctx->sqo_sq_wait))
+		wake_up(&ctx->sqo_sq_wait);
+
 	return SQT_DID_WORK;
 }
 
@@ -8231,8 +8244,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
 	 * io_commit_cqring
 	 */
 	smp_rmb();
-	if (READ_ONCE(ctx->rings->sq.tail) - ctx->cached_sq_head !=
-	    ctx->rings->sq_ring_entries)
+	if (!io_sqring_full(ctx))
 		mask |= EPOLLOUT | EPOLLWRNORM;
 	if (io_cqring_events(ctx, false))
 		mask |= EPOLLIN | EPOLLRDNORM;
@@ -8801,6 +8813,25 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file,
 
 #endif /* !CONFIG_MMU */
 
+static void io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
+{
+	DEFINE_WAIT(wait);
+
+	do {
+		if (!io_sqring_full(ctx))
+			break;
+
+		prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE);
+
+		if (!io_sqring_full(ctx))
+			break;
+
+		schedule();
+	} while (!signal_pending(current));
+
+	finish_wait(&ctx->sqo_sq_wait, &wait);
+}
+
 SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 		u32, min_complete, u32, flags, const sigset_t __user *, sig,
 		size_t, sigsz)
@@ -8812,7 +8843,8 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 
 	io_run_task_work();
 
-	if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP))
+	if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP |
+			IORING_ENTER_SQ_WAIT))
 		return -EINVAL;
 
 	f = fdget(fd);
@@ -8843,6 +8875,8 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 			io_cqring_overflow_flush(ctx, false, NULL, NULL);
 		if (flags & IORING_ENTER_SQ_WAKEUP)
 			wake_up(&ctx->sq_data->wait);
+		if (flags & IORING_ENTER_SQ_WAIT)
+			io_sqpoll_wait_sq(ctx);
 		submitted = to_submit;
 	} else if (to_submit) {
 		ret = io_uring_add_task_file(f.file);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index a0c85e0e9016..98d8e06dea22 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -225,6 +225,7 @@ struct io_cqring_offsets {
  */
 #define IORING_ENTER_GETEVENTS	(1U << 0)
 #define IORING_ENTER_SQ_WAKEUP	(1U << 1)
+#define IORING_ENTER_SQ_WAIT	(1U << 2)
 
 /*
  * Passed in for io_uring_setup(2). Copied back with updated info on success
-- 
cgit v1.2.3


From 792caccc4526bb489e054f9ab61d7c024b15dea2 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Wed, 30 Sep 2020 15:49:26 -0700
Subject: bpf: Introduce BPF_F_PRESERVE_ELEMS for perf event array

Currently, perf event in perf event array is removed from the array when
the map fd used to add the event is closed. This behavior makes it
difficult to the share perf events with perf event array.

Introduce perf event map that keeps the perf event open with a new flag
BPF_F_PRESERVE_ELEMS. With this flag set, perf events in the array are not
removed when the original map fd is closed. Instead, the perf event will
stay in the map until 1) it is explicitly removed from the array; or 2)
the array is freed.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200930224927.1936644-2-songliubraving@fb.com
---
 include/uapi/linux/bpf.h       |  3 +++
 kernel/bpf/arraymap.c          | 19 +++++++++++++++++--
 tools/include/uapi/linux/bpf.h |  3 +++
 3 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1f17c6752deb..4f556cfcbfbe 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -414,6 +414,9 @@ enum {
 
 /* Enable memory-mapping BPF map */
 	BPF_F_MMAPABLE		= (1U << 10),
+
+/* Share perf_event among processes */
+	BPF_F_PRESERVE_ELEMS	= (1U << 11),
 };
 
 /* Flags for BPF_PROG_QUERY. */
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index e5fd31268ae0..bd777dd6f967 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -15,7 +15,8 @@
 #include "map_in_map.h"
 
 #define ARRAY_CREATE_FLAG_MASK \
-	(BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK)
+	(BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK | \
+	 BPF_F_PRESERVE_ELEMS)
 
 static void bpf_array_free_percpu(struct bpf_array *array)
 {
@@ -64,6 +65,10 @@ int array_map_alloc_check(union bpf_attr *attr)
 	    attr->map_flags & BPF_F_MMAPABLE)
 		return -EINVAL;
 
+	if (attr->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
+	    attr->map_flags & BPF_F_PRESERVE_ELEMS)
+		return -EINVAL;
+
 	if (attr->value_size > KMALLOC_MAX_SIZE)
 		/* if value_size is bigger, the user space won't be able to
 		 * access the elements.
@@ -1134,6 +1139,9 @@ static void perf_event_fd_array_release(struct bpf_map *map,
 	struct bpf_event_entry *ee;
 	int i;
 
+	if (map->map_flags & BPF_F_PRESERVE_ELEMS)
+		return;
+
 	rcu_read_lock();
 	for (i = 0; i < array->map.max_entries; i++) {
 		ee = READ_ONCE(array->ptrs[i]);
@@ -1143,12 +1151,19 @@ static void perf_event_fd_array_release(struct bpf_map *map,
 	rcu_read_unlock();
 }
 
+static void perf_event_fd_array_map_free(struct bpf_map *map)
+{
+	if (map->map_flags & BPF_F_PRESERVE_ELEMS)
+		bpf_fd_array_map_clear(map);
+	fd_array_map_free(map);
+}
+
 static int perf_event_array_map_btf_id;
 const struct bpf_map_ops perf_event_array_map_ops = {
 	.map_meta_equal = bpf_map_meta_equal,
 	.map_alloc_check = fd_array_map_alloc_check,
 	.map_alloc = array_map_alloc,
-	.map_free = fd_array_map_free,
+	.map_free = perf_event_fd_array_map_free,
 	.map_get_next_key = array_map_get_next_key,
 	.map_lookup_elem = fd_array_map_lookup_elem,
 	.map_delete_elem = fd_array_map_delete_elem,
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 1f17c6752deb..4f556cfcbfbe 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -414,6 +414,9 @@ enum {
 
 /* Enable memory-mapping BPF map */
 	BPF_F_MMAPABLE		= (1U << 10),
+
+/* Share perf_event among processes */
+	BPF_F_PRESERVE_ELEMS	= (1U << 11),
 };
 
 /* Flags for BPF_PROG_QUERY. */
-- 
cgit v1.2.3


From 1e6aaae93e9ddb9dc664993eb949b1da94cab3a5 Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.pan.linux@gmail.com>
Date: Fri, 25 Sep 2020 09:32:43 -0700
Subject: iommu/uapi: Add argsz for user filled data

As IOMMU UAPI gets extended, user data size may increase. To support
backward compatibiliy, this patch introduces a size field to each UAPI
data structures. It is *always* the responsibility for the user to fill in
the correct size. Padding fields are adjusted to ensure 8 byte alignment.

Specific scenarios for user data handling are documented in:
Documentation/userspace-api/iommu.rst

As there is no current users of the API, struct version is not
incremented.

Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Link: https://lore.kernel.org/r/1601051567-54787-3-git-send-email-jacob.jun.pan@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/uapi/linux/iommu.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
index c2b2caf9ed41..b42acc8fe007 100644
--- a/include/uapi/linux/iommu.h
+++ b/include/uapi/linux/iommu.h
@@ -139,6 +139,7 @@ enum iommu_page_response_code {
 
 /**
  * struct iommu_page_response - Generic page response information
+ * @argsz: User filled size of this data
  * @version: API version of this structure
  * @flags: encodes whether the corresponding fields are valid
  *         (IOMMU_FAULT_PAGE_RESPONSE_* values)
@@ -147,6 +148,7 @@ enum iommu_page_response_code {
  * @code: response code from &enum iommu_page_response_code
  */
 struct iommu_page_response {
+	__u32	argsz;
 #define IOMMU_PAGE_RESP_VERSION_1	1
 	__u32	version;
 #define IOMMU_PAGE_RESP_PASID_VALID	(1 << 0)
@@ -222,6 +224,7 @@ struct iommu_inv_pasid_info {
 /**
  * struct iommu_cache_invalidate_info - First level/stage invalidation
  *     information
+ * @argsz: User filled size of this data
  * @version: API version of this structure
  * @cache: bitfield that allows to select which caches to invalidate
  * @granularity: defines the lowest granularity used for the invalidation:
@@ -250,6 +253,7 @@ struct iommu_inv_pasid_info {
  * must support the used granularity.
  */
 struct iommu_cache_invalidate_info {
+	__u32	argsz;
 #define IOMMU_CACHE_INVALIDATE_INFO_VERSION_1 1
 	__u32	version;
 /* IOMMU paging structure cache */
@@ -259,7 +263,7 @@ struct iommu_cache_invalidate_info {
 #define IOMMU_CACHE_INV_TYPE_NR		(3)
 	__u8	cache;
 	__u8	granularity;
-	__u8	padding[2];
+	__u8	padding[6];
 	union {
 		struct iommu_inv_pasid_info pasid_info;
 		struct iommu_inv_addr_info addr_info;
@@ -296,6 +300,7 @@ struct iommu_gpasid_bind_data_vtd {
 
 /**
  * struct iommu_gpasid_bind_data - Information about device and guest PASID binding
+ * @argsz:	User filled size of this data
  * @version:	Version of this data structure
  * @format:	PASID table entry format
  * @flags:	Additional information on guest bind request
@@ -313,17 +318,18 @@ struct iommu_gpasid_bind_data_vtd {
  * PASID to host PASID based on this bind data.
  */
 struct iommu_gpasid_bind_data {
+	__u32 argsz;
 #define IOMMU_GPASID_BIND_VERSION_1	1
 	__u32 version;
 #define IOMMU_PASID_FORMAT_INTEL_VTD	1
 	__u32 format;
+	__u32 addr_width;
 #define IOMMU_SVA_GPASID_VAL	(1 << 0) /* guest PASID valid */
 	__u64 flags;
 	__u64 gpgd;
 	__u64 hpasid;
 	__u64 gpasid;
-	__u32 addr_width;
-	__u8  padding[12];
+	__u8  padding[8];
 	/* Vendor specific data */
 	union {
 		struct iommu_gpasid_bind_data_vtd vtd;
-- 
cgit v1.2.3


From 8d3bb3b8cbf2ffb3ef73720a48b3445518dcdb55 Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.pan.linux@gmail.com>
Date: Fri, 25 Sep 2020 09:32:44 -0700
Subject: iommu/uapi: Use named union for user data

IOMMU UAPI data size is filled by the user space which must be validated
by the kernel. To ensure backward compatibility, user data can only be
extended by either re-purpose padding bytes or extend the variable sized
union at the end. No size change is allowed before the union. Therefore,
the minimum size is the offset of the union.

To use offsetof() on the union, we must make it named.

Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Link: https://lore.kernel.org/linux-iommu/20200611145518.0c2817d6@x1.home/
Link: https://lore.kernel.org/r/1601051567-54787-4-git-send-email-jacob.jun.pan@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel/iommu.c | 22 +++++++++++-----------
 drivers/iommu/intel/svm.c   |  2 +-
 include/uapi/linux/iommu.h  |  4 ++--
 3 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index f8177c59d229..f1c66c94be55 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -5424,8 +5424,8 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
 
 	/* Size is only valid in address selective invalidation */
 	if (inv_info->granularity == IOMMU_INV_GRANU_ADDR)
-		size = to_vtd_size(inv_info->addr_info.granule_size,
-				   inv_info->addr_info.nb_granules);
+		size = to_vtd_size(inv_info->granu.addr_info.granule_size,
+				   inv_info->granu.addr_info.nb_granules);
 
 	for_each_set_bit(cache_type,
 			 (unsigned long *)&inv_info->cache,
@@ -5446,20 +5446,20 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
 		 * granularity.
 		 */
 		if (inv_info->granularity == IOMMU_INV_GRANU_PASID &&
-		    (inv_info->pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID))
-			pasid = inv_info->pasid_info.pasid;
+		    (inv_info->granu.pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID))
+			pasid = inv_info->granu.pasid_info.pasid;
 		else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
-			 (inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID))
-			pasid = inv_info->addr_info.pasid;
+			 (inv_info->granu.addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID))
+			pasid = inv_info->granu.addr_info.pasid;
 
 		switch (BIT(cache_type)) {
 		case IOMMU_CACHE_INV_TYPE_IOTLB:
 			/* HW will ignore LSB bits based on address mask */
 			if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
 			    size &&
-			    (inv_info->addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) {
+			    (inv_info->granu.addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) {
 				pr_err_ratelimited("User address not aligned, 0x%llx, size order %llu\n",
-						   inv_info->addr_info.addr, size);
+						   inv_info->granu.addr_info.addr, size);
 			}
 
 			/*
@@ -5467,9 +5467,9 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
 			 * We use npages = -1 to indicate that.
 			 */
 			qi_flush_piotlb(iommu, did, pasid,
-					mm_to_dma_pfn(inv_info->addr_info.addr),
+					mm_to_dma_pfn(inv_info->granu.addr_info.addr),
 					(granu == QI_GRAN_NONG_PASID) ? -1 : 1 << size,
-					inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF);
+					inv_info->granu.addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF);
 
 			if (!info->ats_enabled)
 				break;
@@ -5492,7 +5492,7 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
 				size = 64 - VTD_PAGE_SHIFT;
 				addr = 0;
 			} else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) {
-				addr = inv_info->addr_info.addr;
+				addr = inv_info->granu.addr_info.addr;
 			}
 
 			if (info->ats_enabled)
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 95c3164a2302..99353d6468fa 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -370,7 +370,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
 	spin_lock(&iommu->lock);
 	ret = intel_pasid_setup_nested(iommu, dev,
 				       (pgd_t *)(uintptr_t)data->gpgd,
-				       data->hpasid, &data->vtd, dmar_domain,
+				       data->hpasid, &data->vendor.vtd, dmar_domain,
 				       data->addr_width);
 	spin_unlock(&iommu->lock);
 	if (ret) {
diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
index b42acc8fe007..5946779ac1f9 100644
--- a/include/uapi/linux/iommu.h
+++ b/include/uapi/linux/iommu.h
@@ -267,7 +267,7 @@ struct iommu_cache_invalidate_info {
 	union {
 		struct iommu_inv_pasid_info pasid_info;
 		struct iommu_inv_addr_info addr_info;
-	};
+	} granu;
 };
 
 /**
@@ -333,7 +333,7 @@ struct iommu_gpasid_bind_data {
 	/* Vendor specific data */
 	union {
 		struct iommu_gpasid_bind_data_vtd vtd;
-	};
+	} vendor;
 };
 
 #endif /* _UAPI_IOMMU_H */
-- 
cgit v1.2.3


From d90573812eea63c6bc8ab8a38f661b4c27c3cdc0 Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.pan.linux@gmail.com>
Date: Fri, 25 Sep 2020 09:32:46 -0700
Subject: iommu/uapi: Handle data and argsz filled by users

IOMMU user APIs are responsible for processing user data. This patch
changes the interface such that user pointers can be passed into IOMMU
code directly. Separate kernel APIs without user pointers are introduced
for in-kernel users of the UAPI functionality.

IOMMU UAPI data has a user filled argsz field which indicates the data
length of the structure. User data is not trusted, argsz must be
validated based on the current kernel data size, mandatory data size,
and feature flags.

User data may also be extended, resulting in possible argsz increase.
Backward compatibility is ensured based on size and flags (or
the functional equivalent fields) checking.

This patch adds sanity checks in the IOMMU layer. In addition to argsz,
reserved/unused fields in padding, flags, and version are also checked.
Details are documented in Documentation/userspace-api/iommu.rst

Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Link: https://lore.kernel.org/r/1601051567-54787-6-git-send-email-jacob.jun.pan@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iommu.c      | 194 +++++++++++++++++++++++++++++++++++++++++++--
 include/linux/iommu.h      |  28 ++++---
 include/uapi/linux/iommu.h |   1 +
 3 files changed, 207 insertions(+), 16 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index e412a0abfefa..6d847027d35e 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1961,34 +1961,214 @@ out_unlock:
 }
 EXPORT_SYMBOL_GPL(iommu_attach_device);
 
+/*
+ * Check flags and other user provided data for valid combinations. We also
+ * make sure no reserved fields or unused flags are set. This is to ensure
+ * not breaking userspace in the future when these fields or flags are used.
+ */
+static int iommu_check_cache_invl_data(struct iommu_cache_invalidate_info *info)
+{
+	u32 mask;
+	int i;
+
+	if (info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1)
+		return -EINVAL;
+
+	mask = (1 << IOMMU_CACHE_INV_TYPE_NR) - 1;
+	if (info->cache & ~mask)
+		return -EINVAL;
+
+	if (info->granularity >= IOMMU_INV_GRANU_NR)
+		return -EINVAL;
+
+	switch (info->granularity) {
+	case IOMMU_INV_GRANU_ADDR:
+		if (info->cache & IOMMU_CACHE_INV_TYPE_PASID)
+			return -EINVAL;
+
+		mask = IOMMU_INV_ADDR_FLAGS_PASID |
+			IOMMU_INV_ADDR_FLAGS_ARCHID |
+			IOMMU_INV_ADDR_FLAGS_LEAF;
+
+		if (info->granu.addr_info.flags & ~mask)
+			return -EINVAL;
+		break;
+	case IOMMU_INV_GRANU_PASID:
+		mask = IOMMU_INV_PASID_FLAGS_PASID |
+			IOMMU_INV_PASID_FLAGS_ARCHID;
+		if (info->granu.pasid_info.flags & ~mask)
+			return -EINVAL;
+
+		break;
+	case IOMMU_INV_GRANU_DOMAIN:
+		if (info->cache & IOMMU_CACHE_INV_TYPE_DEV_IOTLB)
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Check reserved padding fields */
+	for (i = 0; i < sizeof(info->padding); i++) {
+		if (info->padding[i])
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 int iommu_uapi_cache_invalidate(struct iommu_domain *domain, struct device *dev,
-				struct iommu_cache_invalidate_info *inv_info)
+				void __user *uinfo)
 {
+	struct iommu_cache_invalidate_info inv_info = { 0 };
+	u32 minsz;
+	int ret;
+
 	if (unlikely(!domain->ops->cache_invalidate))
 		return -ENODEV;
 
-	return domain->ops->cache_invalidate(domain, dev, inv_info);
+	/*
+	 * No new spaces can be added before the variable sized union, the
+	 * minimum size is the offset to the union.
+	 */
+	minsz = offsetof(struct iommu_cache_invalidate_info, granu);
+
+	/* Copy minsz from user to get flags and argsz */
+	if (copy_from_user(&inv_info, uinfo, minsz))
+		return -EFAULT;
+
+	/* Fields before the variable size union are mandatory */
+	if (inv_info.argsz < minsz)
+		return -EINVAL;
+
+	/* PASID and address granu require additional info beyond minsz */
+	if (inv_info.granularity == IOMMU_INV_GRANU_PASID &&
+	    inv_info.argsz < offsetofend(struct iommu_cache_invalidate_info, granu.pasid_info))
+		return -EINVAL;
+
+	if (inv_info.granularity == IOMMU_INV_GRANU_ADDR &&
+	    inv_info.argsz < offsetofend(struct iommu_cache_invalidate_info, granu.addr_info))
+		return -EINVAL;
+
+	/*
+	 * User might be using a newer UAPI header which has a larger data
+	 * size, we shall support the existing flags within the current
+	 * size. Copy the remaining user data _after_ minsz but not more
+	 * than the current kernel supported size.
+	 */
+	if (copy_from_user((void *)&inv_info + minsz, uinfo + minsz,
+			   min_t(u32, inv_info.argsz, sizeof(inv_info)) - minsz))
+		return -EFAULT;
+
+	/* Now the argsz is validated, check the content */
+	ret = iommu_check_cache_invl_data(&inv_info);
+	if (ret)
+		return ret;
+
+	return domain->ops->cache_invalidate(domain, dev, &inv_info);
 }
 EXPORT_SYMBOL_GPL(iommu_uapi_cache_invalidate);
 
-int iommu_uapi_sva_bind_gpasid(struct iommu_domain *domain,
-			       struct device *dev, struct iommu_gpasid_bind_data *data)
+static int iommu_check_bind_data(struct iommu_gpasid_bind_data *data)
+{
+	u32 mask;
+	int i;
+
+	if (data->version != IOMMU_GPASID_BIND_VERSION_1)
+		return -EINVAL;
+
+	/* Check the range of supported formats */
+	if (data->format >= IOMMU_PASID_FORMAT_LAST)
+		return -EINVAL;
+
+	/* Check all flags */
+	mask = IOMMU_SVA_GPASID_VAL;
+	if (data->flags & ~mask)
+		return -EINVAL;
+
+	/* Check reserved padding fields */
+	for (i = 0; i < sizeof(data->padding); i++) {
+		if (data->padding[i])
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int iommu_sva_prepare_bind_data(void __user *udata,
+				       struct iommu_gpasid_bind_data *data)
+{
+	u32 minsz;
+
+	/*
+	 * No new spaces can be added before the variable sized union, the
+	 * minimum size is the offset to the union.
+	 */
+	minsz = offsetof(struct iommu_gpasid_bind_data, vendor);
+
+	/* Copy minsz from user to get flags and argsz */
+	if (copy_from_user(data, udata, minsz))
+		return -EFAULT;
+
+	/* Fields before the variable size union are mandatory */
+	if (data->argsz < minsz)
+		return -EINVAL;
+	/*
+	 * User might be using a newer UAPI header, we shall let IOMMU vendor
+	 * driver decide on what size it needs. Since the guest PASID bind data
+	 * can be vendor specific, larger argsz could be the result of extension
+	 * for one vendor but it should not affect another vendor.
+	 * Copy the remaining user data _after_ minsz
+	 */
+	if (copy_from_user((void *)data + minsz, udata + minsz,
+			   min_t(u32, data->argsz, sizeof(*data)) - minsz))
+		return -EFAULT;
+
+	return iommu_check_bind_data(data);
+}
+
+int iommu_uapi_sva_bind_gpasid(struct iommu_domain *domain, struct device *dev,
+			       void __user *udata)
 {
+	struct iommu_gpasid_bind_data data = { 0 };
+	int ret;
+
 	if (unlikely(!domain->ops->sva_bind_gpasid))
 		return -ENODEV;
 
-	return domain->ops->sva_bind_gpasid(domain, dev, data);
+	ret = iommu_sva_prepare_bind_data(udata, &data);
+	if (ret)
+		return ret;
+
+	return domain->ops->sva_bind_gpasid(domain, dev, &data);
 }
 EXPORT_SYMBOL_GPL(iommu_uapi_sva_bind_gpasid);
 
-int iommu_uapi_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev,
-				 ioasid_t pasid)
+int iommu_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev,
+			     ioasid_t pasid)
 {
 	if (unlikely(!domain->ops->sva_unbind_gpasid))
 		return -ENODEV;
 
 	return domain->ops->sva_unbind_gpasid(dev, pasid);
 }
+EXPORT_SYMBOL_GPL(iommu_sva_unbind_gpasid);
+
+int iommu_uapi_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev,
+				 void __user *udata)
+{
+	struct iommu_gpasid_bind_data data = { 0 };
+	int ret;
+
+	if (unlikely(!domain->ops->sva_bind_gpasid))
+		return -ENODEV;
+
+	ret = iommu_sva_prepare_bind_data(udata, &data);
+	if (ret)
+		return ret;
+
+	return iommu_sva_unbind_gpasid(domain, dev, data.hpasid);
+}
 EXPORT_SYMBOL_GPL(iommu_uapi_sva_unbind_gpasid);
 
 static void __iommu_detach_device(struct iommu_domain *domain,
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index d18de2afa6fb..82876f682367 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -426,11 +426,14 @@ extern void iommu_detach_device(struct iommu_domain *domain,
 				struct device *dev);
 extern int iommu_uapi_cache_invalidate(struct iommu_domain *domain,
 				       struct device *dev,
-				       struct iommu_cache_invalidate_info *inv_info);
+				       void __user *uinfo);
+
 extern int iommu_uapi_sva_bind_gpasid(struct iommu_domain *domain,
-				      struct device *dev, struct iommu_gpasid_bind_data *data);
+				      struct device *dev, void __user *udata);
 extern int iommu_uapi_sva_unbind_gpasid(struct iommu_domain *domain,
-					struct device *dev, ioasid_t pasid);
+					struct device *dev, void __user *udata);
+extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain,
+				   struct device *dev, ioasid_t pasid);
 extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
 extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
 extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
@@ -1032,22 +1035,29 @@ static inline int iommu_sva_get_pasid(struct iommu_sva *handle)
 	return IOMMU_PASID_INVALID;
 }
 
-static inline int iommu_uapi_cache_invalidate(struct iommu_domain *domain,
-					      struct device *dev,
-					      struct iommu_cache_invalidate_info *inv_info)
+static inline int
+iommu_uapi_cache_invalidate(struct iommu_domain *domain,
+			    struct device *dev,
+			    struct iommu_cache_invalidate_info *inv_info)
 {
 	return -ENODEV;
 }
 
 static inline int iommu_uapi_sva_bind_gpasid(struct iommu_domain *domain,
-					     struct device *dev,
-					     struct iommu_gpasid_bind_data *data)
+					     struct device *dev, void __user *udata)
 {
 	return -ENODEV;
 }
 
 static inline int iommu_uapi_sva_unbind_gpasid(struct iommu_domain *domain,
-					       struct device *dev, int pasid)
+					       struct device *dev, void __user *udata)
+{
+	return -ENODEV;
+}
+
+static inline int iommu_sva_unbind_gpasid(struct iommu_domain *domain,
+					  struct device *dev,
+					  ioasid_t pasid)
 {
 	return -ENODEV;
 }
diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
index 5946779ac1f9..66d4ca40b40f 100644
--- a/include/uapi/linux/iommu.h
+++ b/include/uapi/linux/iommu.h
@@ -322,6 +322,7 @@ struct iommu_gpasid_bind_data {
 #define IOMMU_GPASID_BIND_VERSION_1	1
 	__u32 version;
 #define IOMMU_PASID_FORMAT_INTEL_VTD	1
+#define IOMMU_PASID_FORMAT_LAST		2
 	__u32 format;
 	__u32 addr_width;
 #define IOMMU_SVA_GPASID_VAL	(1 << 0) /* guest PASID valid */
-- 
cgit v1.2.3


From 6278eecba31f3983fe2743fc01b198433aa18247 Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.pan.linux@gmail.com>
Date: Fri, 25 Sep 2020 09:32:47 -0700
Subject: iommu/vt-d: Check UAPI data processed by IOMMU core

IOMMU generic layer already does sanity checks on UAPI data for version
match and argsz range based on generic information.

This patch adjusts the following data checking responsibilities:
- removes the redundant version check from VT-d driver
- removes the check for vendor specific data size
- adds check for the use of reserved/undefined flags

Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Link: https://lore.kernel.org/r/1601051567-54787-7-git-send-email-jacob.jun.pan@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel/iommu.c |  3 +--
 drivers/iommu/intel/svm.c   | 11 +++++++++--
 include/uapi/linux/iommu.h  |  1 +
 3 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index f1c66c94be55..cae400b6807e 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -5398,8 +5398,7 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
 	int ret = 0;
 	u64 size = 0;
 
-	if (!inv_info || !dmar_domain ||
-	    inv_info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1)
+	if (!inv_info || !dmar_domain)
 		return -EINVAL;
 
 	if (!dev || !dev_is_pci(dev))
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 99353d6468fa..0cb9a15f1112 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -284,8 +284,15 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
 	if (WARN_ON(!iommu) || !data)
 		return -EINVAL;
 
-	if (data->version != IOMMU_GPASID_BIND_VERSION_1 ||
-	    data->format != IOMMU_PASID_FORMAT_INTEL_VTD)
+	if (data->format != IOMMU_PASID_FORMAT_INTEL_VTD)
+		return -EINVAL;
+
+	/* IOMMU core ensures argsz is more than the start of the union */
+	if (data->argsz < offsetofend(struct iommu_gpasid_bind_data, vendor.vtd))
+		return -EINVAL;
+
+	/* Make sure no undefined flags are used in vendor data */
+	if (data->vendor.vtd.flags & ~(IOMMU_SVA_VTD_GPASID_LAST - 1))
 		return -EINVAL;
 
 	if (!dev_is_pci(dev))
diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
index 66d4ca40b40f..e1d9e75f2c94 100644
--- a/include/uapi/linux/iommu.h
+++ b/include/uapi/linux/iommu.h
@@ -288,6 +288,7 @@ struct iommu_gpasid_bind_data_vtd {
 #define IOMMU_SVA_VTD_GPASID_PWT	(1 << 3) /* page-level write through */
 #define IOMMU_SVA_VTD_GPASID_EMTE	(1 << 4) /* extended mem type enable */
 #define IOMMU_SVA_VTD_GPASID_CD		(1 << 5) /* PASID-level cache disable */
+#define IOMMU_SVA_VTD_GPASID_LAST	(1 << 6)
 	__u64 flags;
 	__u32 pat;
 	__u32 emt;
-- 
cgit v1.2.3


From 61931c0ee9cf5da575996b977a2358b598ef84bb Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Thu, 1 Oct 2020 15:00:56 -0400
Subject: dm: export dm_copy_name_and_uuid

Allow DM targets to access the configured name and uuid.
Also, bump DM ioctl version.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-ioctl.c         | 2 +-
 include/uapi/linux/dm-ioctl.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 28122e850ea1..cd0478d44058 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -2044,7 +2044,7 @@ out:
 
 	return r;
 }
-
+EXPORT_SYMBOL_GPL(dm_copy_name_and_uuid);
 
 /**
  * dm_early_create - create a mapped device in early boot.
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index 6622912c2342..4933b6b67b85 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -272,9 +272,9 @@ enum {
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	42
+#define DM_VERSION_MINOR	43
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2020-02-27)"
+#define DM_VERSION_EXTRA	"-ioctl (2020-10-01)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
-- 
cgit v1.2.3


From 677cf51f71c97bcf98852aa2077d7289bc73e3b3 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@nvidia.com>
Date: Wed, 30 Sep 2020 19:38:27 +0300
Subject: RDMA/mlx5: Extend advice MR to support non faulting mode

Extend advice MR to support non faulting mode, this can improve
performance by increasing the populated page tables in the device.

Link: https://lore.kernel.org/r/20200930163828.1336747-4-leon@kernel.org
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/mlx5/mr.c         | 3 ++-
 drivers/infiniband/hw/mlx5/odp.c        | 7 ++++++-
 include/uapi/rdma/ib_user_ioctl_verbs.h | 1 +
 3 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index c69ce250769e..151b14038765 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1312,7 +1312,8 @@ int mlx5_ib_advise_mr(struct ib_pd *pd,
 		      struct uverbs_attr_bundle *attrs)
 {
 	if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH &&
-	    advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE)
+	    advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE &&
+	    advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT)
 		return -EOPNOTSUPP;
 
 	return mlx5_ib_advise_mr_prefetch(pd, advice, flags,
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 4743ef78afe0..d01fdec05b89 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -665,6 +665,7 @@ void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr)
 }
 
 #define MLX5_PF_FLAGS_DOWNGRADE BIT(1)
+#define MLX5_PF_FLAGS_SNAPSHOT BIT(2)
 static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
 			     u64 user_va, size_t bcnt, u32 *bytes_mapped,
 			     u32 flags)
@@ -673,6 +674,7 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
 	bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE;
 	u64 access_mask;
 	u64 start_idx;
+	bool fault = !(flags & MLX5_PF_FLAGS_SNAPSHOT);
 
 	page_shift = odp->page_shift;
 	start_idx = (user_va - ib_umem_start(odp)) >> page_shift;
@@ -681,7 +683,7 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
 	if (odp->umem.writable && !downgrade)
 		access_mask |= ODP_WRITE_ALLOWED_BIT;
 
-	np = ib_umem_odp_map_dma_and_lock(odp, user_va, bcnt, access_mask, true);
+	np = ib_umem_odp_map_dma_and_lock(odp, user_va, bcnt, access_mask, fault);
 	if (np < 0)
 		return np;
 
@@ -1851,6 +1853,9 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
 	if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH)
 		pf_flags |= MLX5_PF_FLAGS_DOWNGRADE;
 
+	if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT)
+		pf_flags |= MLX5_PF_FLAGS_SNAPSHOT;
+
 	if (flags & IB_UVERBS_ADVISE_MR_FLAG_FLUSH)
 		return mlx5_ib_prefetch_sg_list(pd, advice, pf_flags, sg_list,
 						num_sge);
diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h
index 5debab45ebcb..fb8cdb38198b 100644
--- a/include/uapi/rdma/ib_user_ioctl_verbs.h
+++ b/include/uapi/rdma/ib_user_ioctl_verbs.h
@@ -208,6 +208,7 @@ enum ib_uverbs_read_counters_flags {
 enum ib_uverbs_advise_mr_advice {
 	IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH,
 	IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE,
+	IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT,
 };
 
 enum ib_uverbs_advise_mr_flag {
-- 
cgit v1.2.3


From c4b4d548fabc15b9c5db9f61204dd0c608414d2d Mon Sep 17 00:00:00 2001
From: Avihai Horon <avihaih@nvidia.com>
Date: Wed, 23 Sep 2020 19:50:14 +0300
Subject: RDMA/core: Introduce new GID table query API

Introduce rdma_query_gid_table which enables querying all the GID tables
of a given device and copying the attributes of all valid GID entries to a
provided buffer.

This API provides a faster way to query a GID table using single call and
will be used in libibverbs to improve current approach that requires
multiple calls to open, close and read multiple sysfs files for a single
GID table entry.

Link: https://lore.kernel.org/r/20200923165015.2491894-4-leon@kernel.org
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/core/cache.c         | 66 +++++++++++++++++++++++++++++++--
 include/rdma/ib_cache.h                 |  3 ++
 include/uapi/rdma/ib_user_ioctl_verbs.h |  8 ++++
 3 files changed, 74 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index cf49ac0b0aa6..3a86a10c9572 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -1247,6 +1247,67 @@ done:
 }
 EXPORT_SYMBOL(rdma_get_gid_attr);
 
+/**
+ * rdma_query_gid_table - Reads GID table entries of all the ports of a device up to max_entries.
+ * @device: The device to query.
+ * @entries: Entries where GID entries are returned.
+ * @max_entries: Maximum number of entries that can be returned.
+ * Entries array must be allocated to hold max_entries number of entries.
+ * @num_entries: Updated to the number of entries that were successfully read.
+ *
+ * Returns number of entries on success or appropriate error code.
+ */
+ssize_t rdma_query_gid_table(struct ib_device *device,
+			     struct ib_uverbs_gid_entry *entries,
+			     size_t max_entries)
+{
+	const struct ib_gid_attr *gid_attr;
+	ssize_t num_entries = 0, ret;
+	struct ib_gid_table *table;
+	unsigned int port_num, i;
+	struct net_device *ndev;
+	unsigned long flags;
+
+	rdma_for_each_port(device, port_num) {
+		if (!rdma_ib_or_roce(device, port_num))
+			continue;
+
+		table = rdma_gid_table(device, port_num);
+		read_lock_irqsave(&table->rwlock, flags);
+		for (i = 0; i < table->sz; i++) {
+			if (!is_gid_entry_valid(table->data_vec[i]))
+				continue;
+			if (num_entries >= max_entries) {
+				ret = -EINVAL;
+				goto err;
+			}
+
+			gid_attr = &table->data_vec[i]->attr;
+
+			memcpy(&entries->gid, &gid_attr->gid,
+			       sizeof(gid_attr->gid));
+			entries->gid_index = gid_attr->index;
+			entries->port_num = gid_attr->port_num;
+			entries->gid_type = gid_attr->gid_type;
+			ndev = rcu_dereference_protected(
+				gid_attr->ndev,
+				lockdep_is_held(&table->rwlock));
+			if (ndev)
+				entries->netdev_ifindex = ndev->ifindex;
+
+			num_entries++;
+			entries++;
+		}
+		read_unlock_irqrestore(&table->rwlock, flags);
+	}
+
+	return num_entries;
+err:
+	read_unlock_irqrestore(&table->rwlock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(rdma_query_gid_table);
+
 /**
  * rdma_put_gid_attr - Release reference to the GID attribute
  * @attr:		Pointer to the GID attribute whose reference
@@ -1303,7 +1364,7 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
 	struct ib_gid_table_entry *entry =
 			container_of(attr, struct ib_gid_table_entry, attr);
 	struct ib_device *device = entry->attr.device;
-	struct net_device *ndev = ERR_PTR(-ENODEV);
+	struct net_device *ndev = ERR_PTR(-EINVAL);
 	u8 port_num = entry->attr.port_num;
 	struct ib_gid_table *table;
 	unsigned long flags;
@@ -1315,8 +1376,7 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
 	valid = is_gid_entry_valid(table->data_vec[attr->index]);
 	if (valid) {
 		ndev = rcu_dereference(attr->ndev);
-		if (!ndev ||
-		    (ndev && ((READ_ONCE(ndev->flags) & IFF_UP) == 0)))
+		if (!ndev)
 			ndev = ERR_PTR(-ENODEV);
 	}
 	read_unlock_irqrestore(&table->rwlock, flags);
diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
index 66a8f369a2fa..bae29f50adff 100644
--- a/include/rdma/ib_cache.h
+++ b/include/rdma/ib_cache.h
@@ -110,5 +110,8 @@ const struct ib_gid_attr *rdma_get_gid_attr(struct ib_device *device,
 					    u8 port_num, int index);
 void rdma_put_gid_attr(const struct ib_gid_attr *attr);
 void rdma_hold_gid_attr(const struct ib_gid_attr *attr);
+ssize_t rdma_query_gid_table(struct ib_device *device,
+			     struct ib_uverbs_gid_entry *entries,
+			     size_t max_entries);
 
 #endif /* _IB_CACHE_H */
diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h
index fb8cdb38198b..14820082de5e 100644
--- a/include/uapi/rdma/ib_user_ioctl_verbs.h
+++ b/include/uapi/rdma/ib_user_ioctl_verbs.h
@@ -251,4 +251,12 @@ enum rdma_driver_id {
 	RDMA_DRIVER_SIW,
 };
 
+struct ib_uverbs_gid_entry {
+	__aligned_u64 gid[2];
+	__u32 gid_index;
+	__u32 port_num;
+	__u32 gid_type;
+	__u32 netdev_ifindex; /* It is 0 if there is no netdev associated with it */
+};
+
 #endif
-- 
cgit v1.2.3


From 9f85cbe50aa044a46f0a22fda323fa27b80c82da Mon Sep 17 00:00:00 2001
From: Avihai Horon <avihaih@nvidia.com>
Date: Wed, 23 Sep 2020 19:50:15 +0300
Subject: RDMA/uverbs: Expose the new GID query API to user space

Expose the query GID table and entry API to user space by adding two new
methods and method handlers to the device object.

This API provides a faster way to query a GID table using single call and
will be used in libibverbs to improve current approach that requires
multiple calls to open, close and read multiple sysfs files for a single
GID table entry.

Link: https://lore.kernel.org/r/20200923165015.2491894-5-leon@kernel.org
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/core/uverbs_std_types_device.c | 196 +++++++++++++++++++++-
 include/rdma/ib_verbs.h                           |   6 +-
 include/uapi/rdma/ib_user_ioctl_cmds.h            |  16 ++
 include/uapi/rdma/ib_user_ioctl_verbs.h           |   6 +
 4 files changed, 220 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c
index 9f43c0161a8e..f367d523a46b 100644
--- a/drivers/infiniband/core/uverbs_std_types_device.c
+++ b/drivers/infiniband/core/uverbs_std_types_device.c
@@ -3,11 +3,13 @@
  * Copyright (c) 2018, Mellanox Technologies inc.  All rights reserved.
  */
 
+#include <linux/overflow.h>
 #include <rdma/uverbs_std_types.h>
 #include "rdma_core.h"
 #include "uverbs.h"
 #include <rdma/uverbs_ioctl.h>
 #include <rdma/opa_addr.h>
+#include <rdma/ib_cache.h>
 
 /*
  * This ioctl method allows calling any defined write or write_ex
@@ -266,6 +268,172 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_CONTEXT)(
 	return ucontext->device->ops.query_ucontext(ucontext, attrs);
 }
 
+static int copy_gid_entries_to_user(struct uverbs_attr_bundle *attrs,
+				    struct ib_uverbs_gid_entry *entries,
+				    size_t num_entries, size_t user_entry_size)
+{
+	const struct uverbs_attr *attr;
+	void __user *user_entries;
+	size_t copy_len;
+	int ret;
+	int i;
+
+	if (user_entry_size == sizeof(*entries)) {
+		ret = uverbs_copy_to(attrs,
+				     UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
+				     entries, sizeof(*entries) * num_entries);
+		return ret;
+	}
+
+	copy_len = min_t(size_t, user_entry_size, sizeof(*entries));
+	attr = uverbs_attr_get(attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES);
+	if (IS_ERR(attr))
+		return PTR_ERR(attr);
+
+	user_entries = u64_to_user_ptr(attr->ptr_attr.data);
+	for (i = 0; i < num_entries; i++) {
+		if (copy_to_user(user_entries, entries, copy_len))
+			return -EFAULT;
+
+		if (user_entry_size > sizeof(*entries)) {
+			if (clear_user(user_entries + sizeof(*entries),
+				       user_entry_size - sizeof(*entries)))
+				return -EFAULT;
+		}
+
+		entries++;
+		user_entries += user_entry_size;
+	}
+
+	return uverbs_output_written(attrs,
+				     UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_TABLE)(
+	struct uverbs_attr_bundle *attrs)
+{
+	struct ib_uverbs_gid_entry *entries;
+	struct ib_ucontext *ucontext;
+	struct ib_device *ib_dev;
+	size_t user_entry_size;
+	ssize_t num_entries;
+	size_t max_entries;
+	size_t num_bytes;
+	u32 flags;
+	int ret;
+
+	ret = uverbs_get_flags32(&flags, attrs,
+				 UVERBS_ATTR_QUERY_GID_TABLE_FLAGS, 0);
+	if (ret)
+		return ret;
+
+	ret = uverbs_get_const(&user_entry_size, attrs,
+			       UVERBS_ATTR_QUERY_GID_TABLE_ENTRY_SIZE);
+	if (ret)
+		return ret;
+
+	max_entries = uverbs_attr_ptr_get_array_size(
+		attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
+		user_entry_size);
+	if (max_entries <= 0)
+		return -EINVAL;
+
+	ucontext = ib_uverbs_get_ucontext(attrs);
+	if (IS_ERR(ucontext))
+		return PTR_ERR(ucontext);
+	ib_dev = ucontext->device;
+
+	if (check_mul_overflow(max_entries, sizeof(*entries), &num_bytes))
+		return -EINVAL;
+
+	entries = uverbs_zalloc(attrs, num_bytes);
+	if (!entries)
+		return -ENOMEM;
+
+	num_entries = rdma_query_gid_table(ib_dev, entries, max_entries);
+	if (num_entries < 0)
+		return -EINVAL;
+
+	ret = copy_gid_entries_to_user(attrs, entries, num_entries,
+				       user_entry_size);
+	if (ret)
+		return ret;
+
+	ret = uverbs_copy_to(attrs,
+			     UVERBS_ATTR_QUERY_GID_TABLE_RESP_NUM_ENTRIES,
+			     &num_entries, sizeof(num_entries));
+	return ret;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_ENTRY)(
+	struct uverbs_attr_bundle *attrs)
+{
+	struct ib_uverbs_gid_entry entry = {};
+	const struct ib_gid_attr *gid_attr;
+	struct ib_ucontext *ucontext;
+	struct ib_device *ib_dev;
+	struct net_device *ndev;
+	u32 gid_index;
+	u32 port_num;
+	u32 flags;
+	int ret;
+
+	ret = uverbs_get_flags32(&flags, attrs,
+				 UVERBS_ATTR_QUERY_GID_ENTRY_FLAGS, 0);
+	if (ret)
+		return ret;
+
+	ret = uverbs_get_const(&port_num, attrs,
+			       UVERBS_ATTR_QUERY_GID_ENTRY_PORT);
+	if (ret)
+		return ret;
+
+	ret = uverbs_get_const(&gid_index, attrs,
+			       UVERBS_ATTR_QUERY_GID_ENTRY_GID_INDEX);
+	if (ret)
+		return ret;
+
+	ucontext = ib_uverbs_get_ucontext(attrs);
+	if (IS_ERR(ucontext))
+		return PTR_ERR(ucontext);
+	ib_dev = ucontext->device;
+
+	if (!rdma_is_port_valid(ib_dev, port_num))
+		return -EINVAL;
+
+	if (!rdma_ib_or_roce(ib_dev, port_num))
+		return -EOPNOTSUPP;
+
+	gid_attr = rdma_get_gid_attr(ib_dev, port_num, gid_index);
+	if (IS_ERR(gid_attr))
+		return PTR_ERR(gid_attr);
+
+	memcpy(&entry.gid, &gid_attr->gid, sizeof(gid_attr->gid));
+	entry.gid_index = gid_attr->index;
+	entry.port_num = gid_attr->port_num;
+	entry.gid_type = gid_attr->gid_type;
+
+	rcu_read_lock();
+	ndev = rdma_read_gid_attr_ndev_rcu(gid_attr);
+	if (IS_ERR(ndev)) {
+		if (PTR_ERR(ndev) != -ENODEV) {
+			ret = PTR_ERR(ndev);
+			rcu_read_unlock();
+			goto out;
+		}
+	} else {
+		entry.netdev_ifindex = ndev->ifindex;
+	}
+	rcu_read_unlock();
+
+	ret = uverbs_copy_to_struct_or_zero(
+		attrs, UVERBS_ATTR_QUERY_GID_ENTRY_RESP_ENTRY, &entry,
+		sizeof(entry));
+out:
+	rdma_put_gid_attr(gid_attr);
+	return ret;
+}
+
 DECLARE_UVERBS_NAMED_METHOD(
 	UVERBS_METHOD_GET_CONTEXT,
 	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS,
@@ -300,12 +468,38 @@ DECLARE_UVERBS_NAMED_METHOD(
 				   reserved),
 		UA_MANDATORY));
 
+DECLARE_UVERBS_NAMED_METHOD(
+	UVERBS_METHOD_QUERY_GID_TABLE,
+	UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_GID_TABLE_ENTRY_SIZE, u64,
+			     UA_MANDATORY),
+	UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_QUERY_GID_TABLE_FLAGS, u32,
+			     UA_OPTIONAL),
+	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
+			    UVERBS_ATTR_MIN_SIZE(0), UA_MANDATORY),
+	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_GID_TABLE_RESP_NUM_ENTRIES,
+			    UVERBS_ATTR_TYPE(u64), UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+	UVERBS_METHOD_QUERY_GID_ENTRY,
+	UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_GID_ENTRY_PORT, u32,
+			     UA_MANDATORY),
+	UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_GID_ENTRY_GID_INDEX, u32,
+			     UA_MANDATORY),
+	UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_QUERY_GID_ENTRY_FLAGS, u32,
+			     UA_MANDATORY),
+	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_GID_ENTRY_RESP_ENTRY,
+			    UVERBS_ATTR_STRUCT(struct ib_uverbs_gid_entry,
+					       netdev_ifindex),
+			    UA_MANDATORY));
+
 DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE,
 			      &UVERBS_METHOD(UVERBS_METHOD_GET_CONTEXT),
 			      &UVERBS_METHOD(UVERBS_METHOD_INVOKE_WRITE),
 			      &UVERBS_METHOD(UVERBS_METHOD_INFO_HANDLES),
 			      &UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT),
-			      &UVERBS_METHOD(UVERBS_METHOD_QUERY_CONTEXT));
+			      &UVERBS_METHOD(UVERBS_METHOD_QUERY_CONTEXT),
+			      &UVERBS_METHOD(UVERBS_METHOD_QUERY_GID_TABLE),
+			      &UVERBS_METHOD(UVERBS_METHOD_QUERY_GID_ENTRY));
 
 const struct uapi_definition uverbs_def_obj_device[] = {
 	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DEVICE),
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 3b61fba531d0..ce935d70fdc8 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -138,9 +138,9 @@ union ib_gid {
 extern union ib_gid zgid;
 
 enum ib_gid_type {
-	IB_GID_TYPE_IB        = 0,
-	IB_GID_TYPE_ROCE      = 1,
-	IB_GID_TYPE_ROCE_UDP_ENCAP = 2,
+	IB_GID_TYPE_IB = IB_UVERBS_GID_TYPE_IB,
+	IB_GID_TYPE_ROCE = IB_UVERBS_GID_TYPE_ROCE_V1,
+	IB_GID_TYPE_ROCE_UDP_ENCAP = IB_UVERBS_GID_TYPE_ROCE_V2,
 	IB_GID_TYPE_SIZE
 };
 
diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h
index 99dcabf61a71..7968a1845355 100644
--- a/include/uapi/rdma/ib_user_ioctl_cmds.h
+++ b/include/uapi/rdma/ib_user_ioctl_cmds.h
@@ -70,6 +70,8 @@ enum uverbs_methods_device {
 	UVERBS_METHOD_QUERY_PORT,
 	UVERBS_METHOD_GET_CONTEXT,
 	UVERBS_METHOD_QUERY_CONTEXT,
+	UVERBS_METHOD_QUERY_GID_TABLE,
+	UVERBS_METHOD_QUERY_GID_ENTRY,
 };
 
 enum uverbs_attrs_invoke_write_cmd_attr_ids {
@@ -352,4 +354,18 @@ enum uverbs_attrs_async_event_create {
 	UVERBS_ATTR_ASYNC_EVENT_ALLOC_FD_HANDLE,
 };
 
+enum uverbs_attrs_query_gid_table_cmd_attr_ids {
+	UVERBS_ATTR_QUERY_GID_TABLE_ENTRY_SIZE,
+	UVERBS_ATTR_QUERY_GID_TABLE_FLAGS,
+	UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
+	UVERBS_ATTR_QUERY_GID_TABLE_RESP_NUM_ENTRIES,
+};
+
+enum uverbs_attrs_query_gid_entry_cmd_attr_ids {
+	UVERBS_ATTR_QUERY_GID_ENTRY_PORT,
+	UVERBS_ATTR_QUERY_GID_ENTRY_GID_INDEX,
+	UVERBS_ATTR_QUERY_GID_ENTRY_FLAGS,
+	UVERBS_ATTR_QUERY_GID_ENTRY_RESP_ENTRY,
+};
+
 #endif
diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h
index 14820082de5e..22483799cd07 100644
--- a/include/uapi/rdma/ib_user_ioctl_verbs.h
+++ b/include/uapi/rdma/ib_user_ioctl_verbs.h
@@ -251,6 +251,12 @@ enum rdma_driver_id {
 	RDMA_DRIVER_SIW,
 };
 
+enum ib_uverbs_gid_type {
+	IB_UVERBS_GID_TYPE_IB,
+	IB_UVERBS_GID_TYPE_ROCE_V1,
+	IB_UVERBS_GID_TYPE_ROCE_V2,
+};
+
 struct ib_uverbs_gid_entry {
 	__aligned_u64 gid[2];
 	__u32 gid_index;
-- 
cgit v1.2.3


From ba1df797e5bbba68ddd1a29bd658b1c11f9a60b6 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 1 Oct 2020 18:59:07 -0400
Subject: NFSACL: Replace PROC() macro with open code

Clean up: Follow-up on ten-year-old commit b9081d90f5b9 ("NFS: kill
off complicated macro 'PROC'") by performing the same conversion in
the NFSACL code. To reduce the chance of error, I copied the original
C preprocessor output and then made some minor edits.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs2acl.c           | 72 ++++++++++++++++++++++++++++++---------------
 fs/nfsd/nfs3acl.c           | 49 +++++++++++++++++-------------
 include/uapi/linux/nfsacl.h |  2 ++
 3 files changed, 80 insertions(+), 43 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index cbab1d2d8a75..8d20e0d74417 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -347,36 +347,62 @@ static void nfsaclsvc_release_access(struct svc_rqst *rqstp)
 	fh_put(&resp->fh);
 }
 
-#define nfsaclsvc_decode_voidargs	NULL
-#define nfsaclsvc_release_void		NULL
-#define nfsd3_fhandleargs	nfsd_fhandle
-#define nfsd3_attrstatres	nfsd_attrstat
-#define nfsd3_voidres		nfsd3_voidargs
 struct nfsd3_voidargs { int dummy; };
 
-#define PROC(name, argt, rest, relt, cache, respsize)			\
-{									\
-	.pc_func	= nfsacld_proc_##name,				\
-	.pc_decode	= nfsaclsvc_decode_##argt##args,		\
-	.pc_encode	= nfsaclsvc_encode_##rest##res,			\
-	.pc_release	= nfsaclsvc_release_##relt,	\
-	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
-	.pc_ressize	= sizeof(struct nfsd3_##rest##res),		\
-	.pc_cachetype	= cache,					\
-	.pc_xdrressize	= respsize,					\
-}
-
 #define ST 1		/* status*/
 #define AT 21		/* attributes */
 #define pAT (1+AT)	/* post attributes - conditional */
 #define ACL (1+NFS_ACL_MAX_ENTRIES*3)  /* Access Control List */
 
-static const struct svc_procedure nfsd_acl_procedures2[] = {
-  PROC(null,	void,		void,		void,	  RC_NOCACHE, ST),
-  PROC(getacl,	getacl,		getacl,		getacl,	  RC_NOCACHE, ST+1+2*(1+ACL)),
-  PROC(setacl,	setacl,		attrstat,	attrstat, RC_NOCACHE, ST+AT),
-  PROC(getattr, fhandle,	attrstat,	attrstat, RC_NOCACHE, ST+AT),
-  PROC(access,	access,		access,		access,   RC_NOCACHE, ST+AT+1),
+static const struct svc_procedure nfsd_acl_procedures2[5] = {
+	[ACLPROC2_NULL] = {
+		.pc_func = nfsacld_proc_null,
+		.pc_encode = nfsaclsvc_encode_voidres,
+		.pc_argsize = sizeof(struct nfsd3_voidargs),
+		.pc_ressize = sizeof(struct nfsd3_voidargs),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = ST,
+	},
+	[ACLPROC2_GETACL] = {
+		.pc_func = nfsacld_proc_getacl,
+		.pc_decode = nfsaclsvc_decode_getaclargs,
+		.pc_encode = nfsaclsvc_encode_getaclres,
+		.pc_release = nfsaclsvc_release_getacl,
+		.pc_argsize = sizeof(struct nfsd3_getaclargs),
+		.pc_ressize = sizeof(struct nfsd3_getaclres),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = ST+1+2*(1+ACL),
+	},
+	[ACLPROC2_SETACL] = {
+		.pc_func = nfsacld_proc_setacl,
+		.pc_decode = nfsaclsvc_decode_setaclargs,
+		.pc_encode = nfsaclsvc_encode_attrstatres,
+		.pc_release = nfsaclsvc_release_attrstat,
+		.pc_argsize = sizeof(struct nfsd3_setaclargs),
+		.pc_ressize = sizeof(struct nfsd_attrstat),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = ST+AT,
+	},
+	[ACLPROC2_GETATTR] = {
+		.pc_func = nfsacld_proc_getattr,
+		.pc_decode = nfsaclsvc_decode_fhandleargs,
+		.pc_encode = nfsaclsvc_encode_attrstatres,
+		.pc_release = nfsaclsvc_release_attrstat,
+		.pc_argsize = sizeof(struct nfsd_fhandle),
+		.pc_ressize = sizeof(struct nfsd_attrstat),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = ST+AT,
+	},
+	[ACLPROC2_ACCESS] = {
+		.pc_func = nfsacld_proc_access,
+		.pc_decode = nfsaclsvc_decode_accessargs,
+		.pc_encode = nfsaclsvc_encode_accessres,
+		.pc_release = nfsaclsvc_release_access,
+		.pc_argsize = sizeof(struct nfsd3_accessargs),
+		.pc_ressize = sizeof(struct nfsd3_accessres),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = ST+AT+1,
+	},
 };
 
 static unsigned int nfsd_acl_count2[ARRAY_SIZE(nfsd_acl_procedures2)];
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 13bca4a2f89d..292acb2e529c 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -235,33 +235,42 @@ static void nfs3svc_release_getacl(struct svc_rqst *rqstp)
 	posix_acl_release(resp->acl_default);
 }
 
-#define nfs3svc_decode_voidargs		NULL
-#define nfs3svc_release_void		NULL
-#define nfsd3_setaclres			nfsd3_attrstat
-#define nfsd3_voidres			nfsd3_voidargs
 struct nfsd3_voidargs { int dummy; };
 
-#define PROC(name, argt, rest, relt, cache, respsize)			\
-{									\
-	.pc_func	= nfsd3_proc_##name,				\
-	.pc_decode	= nfs3svc_decode_##argt##args,			\
-	.pc_encode	= nfs3svc_encode_##rest##res,			\
-	.pc_release	= nfs3svc_release_##relt,			\
-	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
-	.pc_ressize	= sizeof(struct nfsd3_##rest##res),		\
-	.pc_cachetype	= cache,					\
-	.pc_xdrressize	= respsize,					\
-}
-
 #define ST 1		/* status*/
 #define AT 21		/* attributes */
 #define pAT (1+AT)	/* post attributes - conditional */
 #define ACL (1+NFS_ACL_MAX_ENTRIES*3)  /* Access Control List */
 
-static const struct svc_procedure nfsd_acl_procedures3[] = {
-  PROC(null,	void,		void,		void,	  RC_NOCACHE, ST),
-  PROC(getacl,	getacl,		getacl,		getacl,	  RC_NOCACHE, ST+1+2*(1+ACL)),
-  PROC(setacl,	setacl,		setacl,		fhandle,  RC_NOCACHE, ST+pAT),
+static const struct svc_procedure nfsd_acl_procedures3[3] = {
+	[ACLPROC3_NULL] = {
+		.pc_func = nfsd3_proc_null,
+		.pc_encode = nfs3svc_encode_voidres,
+		.pc_argsize = sizeof(struct nfsd3_voidargs),
+		.pc_ressize = sizeof(struct nfsd3_voidargs),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = ST,
+	},
+	[ACLPROC3_GETACL] = {
+		.pc_func = nfsd3_proc_getacl,
+		.pc_decode = nfs3svc_decode_getaclargs,
+		.pc_encode = nfs3svc_encode_getaclres,
+		.pc_release = nfs3svc_release_getacl,
+		.pc_argsize = sizeof(struct nfsd3_getaclargs),
+		.pc_ressize = sizeof(struct nfsd3_getaclres),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = ST+1+2*(1+ACL),
+	},
+	[ACLPROC3_SETACL] = {
+		.pc_func = nfsd3_proc_setacl,
+		.pc_decode = nfs3svc_decode_setaclargs,
+		.pc_encode = nfs3svc_encode_setaclres,
+		.pc_release = nfs3svc_release_fhandle,
+		.pc_argsize = sizeof(struct nfsd3_setaclargs),
+		.pc_ressize = sizeof(struct nfsd3_attrstat),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = ST+pAT,
+	},
 };
 
 static unsigned int nfsd_acl_count3[ARRAY_SIZE(nfsd_acl_procedures3)];
diff --git a/include/uapi/linux/nfsacl.h b/include/uapi/linux/nfsacl.h
index ca9a8501ff30..2c2ad204d3b0 100644
--- a/include/uapi/linux/nfsacl.h
+++ b/include/uapi/linux/nfsacl.h
@@ -9,11 +9,13 @@
 
 #define NFS_ACL_PROGRAM	100227
 
+#define ACLPROC2_NULL		0
 #define ACLPROC2_GETACL		1
 #define ACLPROC2_SETACL		2
 #define ACLPROC2_GETATTR	3
 #define ACLPROC2_ACCESS		4
 
+#define ACLPROC3_NULL		0
 #define ACLPROC3_GETACL		1
 #define ACLPROC3_SETACL		2
 
-- 
cgit v1.2.3


From 4976b718c3551faba2c0616ef55ebeb74db1c5ca Mon Sep 17 00:00:00 2001
From: Hao Luo <haoluo@google.com>
Date: Tue, 29 Sep 2020 16:50:44 -0700
Subject: bpf: Introduce pseudo_btf_id

Pseudo_btf_id is a type of ld_imm insn that associates a btf_id to a
ksym so that further dereferences on the ksym can use the BTF info
to validate accesses. Internally, when seeing a pseudo_btf_id ld insn,
the verifier reads the btf_id stored in the insn[0]'s imm field and
marks the dst_reg as PTR_TO_BTF_ID. The btf_id points to a VAR_KIND,
which is encoded in btf_vminux by pahole. If the VAR is not of a struct
type, the dst reg will be marked as PTR_TO_MEM instead of PTR_TO_BTF_ID
and the mem_size is resolved to the size of the VAR's type.

>From the VAR btf_id, the verifier can also read the address of the
ksym's corresponding kernel var from kallsyms and use that to fill
dst_reg.

Therefore, the proper functionality of pseudo_btf_id depends on (1)
kallsyms and (2) the encoding of kernel global VARs in pahole, which
should be available since pahole v1.18.

Signed-off-by: Hao Luo <haoluo@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200929235049.2533242-2-haoluo@google.com
---
 include/linux/bpf_verifier.h   |   7 +++
 include/linux/btf.h            |  15 +++++
 include/uapi/linux/bpf.h       |  36 +++++++++---
 kernel/bpf/btf.c               |  15 -----
 kernel/bpf/verifier.c          | 125 ++++++++++++++++++++++++++++++++++++-----
 tools/include/uapi/linux/bpf.h |  36 +++++++++---
 6 files changed, 188 insertions(+), 46 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 363b4f1c562a..e83ef6f6bf43 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -308,6 +308,13 @@ struct bpf_insn_aux_data {
 			u32 map_index;		/* index into used_maps[] */
 			u32 map_off;		/* offset from value base address */
 		};
+		struct {
+			enum bpf_reg_type reg_type;	/* type of pseudo_btf_id */
+			union {
+				u32 btf_id;	/* btf_id for struct typed var */
+				u32 mem_size;	/* mem_size for non-struct typed var */
+			};
+		} btf_var;
 	};
 	u64 map_key_state; /* constant (32 bit) key tracking for maps */
 	int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 024e16ff7dcc..af1244180588 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -145,6 +145,21 @@ static inline bool btf_type_is_func_proto(const struct btf_type *t)
 	return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO;
 }
 
+static inline bool btf_type_is_var(const struct btf_type *t)
+{
+	return BTF_INFO_KIND(t->info) == BTF_KIND_VAR;
+}
+
+/* union is only a special case of struct:
+ * all its offsetof(member) == 0
+ */
+static inline bool btf_type_is_struct(const struct btf_type *t)
+{
+	u8 kind = BTF_INFO_KIND(t->info);
+
+	return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
+}
+
 static inline u16 btf_type_vlen(const struct btf_type *t)
 {
 	return BTF_INFO_VLEN(t->info);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4f556cfcbfbe..2aa156af24d6 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -356,18 +356,36 @@ enum bpf_link_type {
 #define BPF_F_SLEEPABLE		(1U << 4)
 
 /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
- * two extensions:
- *
- * insn[0].src_reg:  BPF_PSEUDO_MAP_FD   BPF_PSEUDO_MAP_VALUE
- * insn[0].imm:      map fd              map fd
- * insn[1].imm:      0                   offset into value
- * insn[0].off:      0                   0
- * insn[1].off:      0                   0
- * ldimm64 rewrite:  address of map      address of map[0]+offset
- * verifier type:    CONST_PTR_TO_MAP    PTR_TO_MAP_VALUE
+ * the following extensions:
+ *
+ * insn[0].src_reg:  BPF_PSEUDO_MAP_FD
+ * insn[0].imm:      map fd
+ * insn[1].imm:      0
+ * insn[0].off:      0
+ * insn[1].off:      0
+ * ldimm64 rewrite:  address of map
+ * verifier type:    CONST_PTR_TO_MAP
  */
 #define BPF_PSEUDO_MAP_FD	1
+/* insn[0].src_reg:  BPF_PSEUDO_MAP_VALUE
+ * insn[0].imm:      map fd
+ * insn[1].imm:      offset into value
+ * insn[0].off:      0
+ * insn[1].off:      0
+ * ldimm64 rewrite:  address of map[0]+offset
+ * verifier type:    PTR_TO_MAP_VALUE
+ */
 #define BPF_PSEUDO_MAP_VALUE	2
+/* insn[0].src_reg:  BPF_PSEUDO_BTF_ID
+ * insn[0].imm:      kernel btd id of VAR
+ * insn[1].imm:      0
+ * insn[0].off:      0
+ * insn[1].off:      0
+ * ldimm64 rewrite:  address of the kernel variable
+ * verifier type:    PTR_TO_BTF_ID or PTR_TO_MEM, depending on whether the var
+ *                   is struct/union.
+ */
+#define BPF_PSEUDO_BTF_ID	3
 
 /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
  * offset to another bpf function
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 4d0ee7839fdb..00569afe3d0d 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -440,16 +440,6 @@ static bool btf_type_nosize_or_null(const struct btf_type *t)
 	return !t || btf_type_nosize(t);
 }
 
-/* union is only a special case of struct:
- * all its offsetof(member) == 0
- */
-static bool btf_type_is_struct(const struct btf_type *t)
-{
-	u8 kind = BTF_INFO_KIND(t->info);
-
-	return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
-}
-
 static bool __btf_type_is_struct(const struct btf_type *t)
 {
 	return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT;
@@ -460,11 +450,6 @@ static bool btf_type_is_array(const struct btf_type *t)
 	return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY;
 }
 
-static bool btf_type_is_var(const struct btf_type *t)
-{
-	return BTF_INFO_KIND(t->info) == BTF_KIND_VAR;
-}
-
 static bool btf_type_is_datasec(const struct btf_type *t)
 {
 	return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 015a1c074b6b..fe4965079773 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7488,6 +7488,7 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
 	struct bpf_insn_aux_data *aux = cur_aux(env);
 	struct bpf_reg_state *regs = cur_regs(env);
+	struct bpf_reg_state *dst_reg;
 	struct bpf_map *map;
 	int err;
 
@@ -7504,25 +7505,44 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	if (err)
 		return err;
 
+	dst_reg = &regs[insn->dst_reg];
 	if (insn->src_reg == 0) {
 		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
 
-		regs[insn->dst_reg].type = SCALAR_VALUE;
+		dst_reg->type = SCALAR_VALUE;
 		__mark_reg_known(&regs[insn->dst_reg], imm);
 		return 0;
 	}
 
+	if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
+		mark_reg_known_zero(env, regs, insn->dst_reg);
+
+		dst_reg->type = aux->btf_var.reg_type;
+		switch (dst_reg->type) {
+		case PTR_TO_MEM:
+			dst_reg->mem_size = aux->btf_var.mem_size;
+			break;
+		case PTR_TO_BTF_ID:
+			dst_reg->btf_id = aux->btf_var.btf_id;
+			break;
+		default:
+			verbose(env, "bpf verifier is misconfigured\n");
+			return -EFAULT;
+		}
+		return 0;
+	}
+
 	map = env->used_maps[aux->map_index];
 	mark_reg_known_zero(env, regs, insn->dst_reg);
-	regs[insn->dst_reg].map_ptr = map;
+	dst_reg->map_ptr = map;
 
 	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
-		regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
-		regs[insn->dst_reg].off = aux->map_off;
+		dst_reg->type = PTR_TO_MAP_VALUE;
+		dst_reg->off = aux->map_off;
 		if (map_value_has_spin_lock(map))
-			regs[insn->dst_reg].id = ++env->id_gen;
+			dst_reg->id = ++env->id_gen;
 	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
-		regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
+		dst_reg->type = CONST_PTR_TO_MAP;
 	} else {
 		verbose(env, "bpf verifier is misconfigured\n");
 		return -EINVAL;
@@ -9424,6 +9444,73 @@ process_bpf_exit:
 	return 0;
 }
 
+/* replace pseudo btf_id with kernel symbol address */
+static int check_pseudo_btf_id(struct bpf_verifier_env *env,
+			       struct bpf_insn *insn,
+			       struct bpf_insn_aux_data *aux)
+{
+	u32 type, id = insn->imm;
+	const struct btf_type *t;
+	const char *sym_name;
+	u64 addr;
+
+	if (!btf_vmlinux) {
+		verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
+		return -EINVAL;
+	}
+
+	if (insn[1].imm != 0) {
+		verbose(env, "reserved field (insn[1].imm) is used in pseudo_btf_id ldimm64 insn.\n");
+		return -EINVAL;
+	}
+
+	t = btf_type_by_id(btf_vmlinux, id);
+	if (!t) {
+		verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
+		return -ENOENT;
+	}
+
+	if (!btf_type_is_var(t)) {
+		verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n",
+			id);
+		return -EINVAL;
+	}
+
+	sym_name = btf_name_by_offset(btf_vmlinux, t->name_off);
+	addr = kallsyms_lookup_name(sym_name);
+	if (!addr) {
+		verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
+			sym_name);
+		return -ENOENT;
+	}
+
+	insn[0].imm = (u32)addr;
+	insn[1].imm = addr >> 32;
+
+	type = t->type;
+	t = btf_type_skip_modifiers(btf_vmlinux, type, NULL);
+	if (!btf_type_is_struct(t)) {
+		const struct btf_type *ret;
+		const char *tname;
+		u32 tsize;
+
+		/* resolve the type size of ksym. */
+		ret = btf_resolve_size(btf_vmlinux, t, &tsize);
+		if (IS_ERR(ret)) {
+			tname = btf_name_by_offset(btf_vmlinux, t->name_off);
+			verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
+				tname, PTR_ERR(ret));
+			return -EINVAL;
+		}
+		aux->btf_var.reg_type = PTR_TO_MEM;
+		aux->btf_var.mem_size = tsize;
+	} else {
+		aux->btf_var.reg_type = PTR_TO_BTF_ID;
+		aux->btf_var.btf_id = type;
+	}
+	return 0;
+}
+
 static int check_map_prealloc(struct bpf_map *map)
 {
 	return (map->map_type != BPF_MAP_TYPE_HASH &&
@@ -9534,10 +9621,14 @@ static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
 		map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
 }
 
-/* look for pseudo eBPF instructions that access map FDs and
- * replace them with actual map pointers
+/* find and rewrite pseudo imm in ld_imm64 instructions:
+ *
+ * 1. if it accesses map FD, replace it with actual map pointer.
+ * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
+ *
+ * NOTE: btf_vmlinux is required for converting pseudo btf_id.
  */
-static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
+static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
 {
 	struct bpf_insn *insn = env->prog->insnsi;
 	int insn_cnt = env->prog->len;
@@ -9578,6 +9669,14 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
 				/* valid generic load 64-bit imm */
 				goto next_insn;
 
+			if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
+				aux = &env->insn_aux_data[i];
+				err = check_pseudo_btf_id(env, insn, aux);
+				if (err)
+					return err;
+				goto next_insn;
+			}
+
 			/* In final convert_pseudo_ld_imm64() step, this is
 			 * converted into regular 64-bit imm load insn.
 			 */
@@ -11633,10 +11732,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
 	if (is_priv)
 		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
 
-	ret = replace_map_fd_with_map_ptr(env);
-	if (ret < 0)
-		goto skip_full_check;
-
 	if (bpf_prog_is_dev_bound(env->prog->aux)) {
 		ret = bpf_prog_offload_verifier_prep(env->prog);
 		if (ret)
@@ -11662,6 +11757,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
 	if (ret)
 		goto skip_full_check;
 
+	ret = resolve_pseudo_ldimm64(env);
+	if (ret < 0)
+		goto skip_full_check;
+
 	ret = check_cfg(env);
 	if (ret < 0)
 		goto skip_full_check;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4f556cfcbfbe..2aa156af24d6 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -356,18 +356,36 @@ enum bpf_link_type {
 #define BPF_F_SLEEPABLE		(1U << 4)
 
 /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
- * two extensions:
- *
- * insn[0].src_reg:  BPF_PSEUDO_MAP_FD   BPF_PSEUDO_MAP_VALUE
- * insn[0].imm:      map fd              map fd
- * insn[1].imm:      0                   offset into value
- * insn[0].off:      0                   0
- * insn[1].off:      0                   0
- * ldimm64 rewrite:  address of map      address of map[0]+offset
- * verifier type:    CONST_PTR_TO_MAP    PTR_TO_MAP_VALUE
+ * the following extensions:
+ *
+ * insn[0].src_reg:  BPF_PSEUDO_MAP_FD
+ * insn[0].imm:      map fd
+ * insn[1].imm:      0
+ * insn[0].off:      0
+ * insn[1].off:      0
+ * ldimm64 rewrite:  address of map
+ * verifier type:    CONST_PTR_TO_MAP
  */
 #define BPF_PSEUDO_MAP_FD	1
+/* insn[0].src_reg:  BPF_PSEUDO_MAP_VALUE
+ * insn[0].imm:      map fd
+ * insn[1].imm:      offset into value
+ * insn[0].off:      0
+ * insn[1].off:      0
+ * ldimm64 rewrite:  address of map[0]+offset
+ * verifier type:    PTR_TO_MAP_VALUE
+ */
 #define BPF_PSEUDO_MAP_VALUE	2
+/* insn[0].src_reg:  BPF_PSEUDO_BTF_ID
+ * insn[0].imm:      kernel btd id of VAR
+ * insn[1].imm:      0
+ * insn[0].off:      0
+ * insn[1].off:      0
+ * ldimm64 rewrite:  address of the kernel variable
+ * verifier type:    PTR_TO_BTF_ID or PTR_TO_MEM, depending on whether the var
+ *                   is struct/union.
+ */
+#define BPF_PSEUDO_BTF_ID	3
 
 /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
  * offset to another bpf function
-- 
cgit v1.2.3


From eaa6bcb71ef6ed3dc18fc525ee7e293b06b4882b Mon Sep 17 00:00:00 2001
From: Hao Luo <haoluo@google.com>
Date: Tue, 29 Sep 2020 16:50:47 -0700
Subject: bpf: Introduce bpf_per_cpu_ptr()

Add bpf_per_cpu_ptr() to help bpf programs access percpu vars.
bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the kernel
except that it may return NULL. This happens when the cpu parameter is
out of range. So the caller must check the returned value.

Signed-off-by: Hao Luo <haoluo@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200929235049.2533242-5-haoluo@google.com
---
 include/linux/bpf.h            |  4 +++
 include/linux/btf.h            | 11 ++++++++
 include/uapi/linux/bpf.h       | 18 ++++++++++++
 kernel/bpf/btf.c               | 10 -------
 kernel/bpf/helpers.c           | 18 ++++++++++++
 kernel/bpf/verifier.c          | 64 ++++++++++++++++++++++++++++++++++++++++--
 kernel/trace/bpf_trace.c       |  2 ++
 tools/include/uapi/linux/bpf.h | 18 ++++++++++++
 8 files changed, 132 insertions(+), 13 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 50e5c4b52bd1..9dde15b2479d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -293,6 +293,7 @@ enum bpf_arg_type {
 	ARG_PTR_TO_ALLOC_MEM_OR_NULL,	/* pointer to dynamically allocated memory or NULL */
 	ARG_CONST_ALLOC_SIZE_OR_ZERO,	/* number of allocated bytes requested */
 	ARG_PTR_TO_BTF_ID_SOCK_COMMON,	/* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
+	ARG_PTR_TO_PERCPU_BTF_ID,	/* pointer to in-kernel percpu type */
 	__BPF_ARG_TYPE_MAX,
 };
 
@@ -307,6 +308,7 @@ enum bpf_return_type {
 	RET_PTR_TO_SOCK_COMMON_OR_NULL,	/* returns a pointer to a sock_common or NULL */
 	RET_PTR_TO_ALLOC_MEM_OR_NULL,	/* returns a pointer to dynamically allocated memory or NULL */
 	RET_PTR_TO_BTF_ID_OR_NULL,	/* returns a pointer to a btf_id or NULL */
+	RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */
 };
 
 /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
@@ -405,6 +407,7 @@ enum bpf_reg_type {
 	PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */
 	PTR_TO_RDWR_BUF,	 /* reg points to a read/write buffer */
 	PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
+	PTR_TO_PERCPU_BTF_ID,	 /* reg points to a percpu kernel variable */
 };
 
 /* The information passed from prog-specific *_is_valid_access
@@ -1828,6 +1831,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
 extern const struct bpf_func_proto bpf_copy_from_user_proto;
 extern const struct bpf_func_proto bpf_snprintf_btf_proto;
+extern const struct bpf_func_proto bpf_per_cpu_ptr_proto;
 
 const struct bpf_func_proto *bpf_tracing_func_proto(
 	enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/linux/btf.h b/include/linux/btf.h
index af1244180588..2bf641829664 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -110,6 +110,11 @@ btf_resolve_size(const struct btf *btf, const struct btf_type *type,
 	     i < btf_type_vlen(struct_type);			\
 	     i++, member++)
 
+#define for_each_vsi(i, datasec_type, member)			\
+	for (i = 0, member = btf_type_var_secinfo(datasec_type);	\
+	     i < btf_type_vlen(datasec_type);			\
+	     i++, member++)
+
 static inline bool btf_type_is_ptr(const struct btf_type *t)
 {
 	return BTF_INFO_KIND(t->info) == BTF_KIND_PTR;
@@ -194,6 +199,12 @@ static inline const struct btf_member *btf_type_member(const struct btf_type *t)
 	return (const struct btf_member *)(t + 1);
 }
 
+static inline const struct btf_var_secinfo *btf_type_var_secinfo(
+		const struct btf_type *t)
+{
+	return (const struct btf_var_secinfo *)(t + 1);
+}
+
 #ifdef CONFIG_BPF_SYSCALL
 const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
 const char *btf_name_by_offset(const struct btf *btf, u32 offset);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2aa156af24d6..f3c1b637ab39 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3686,6 +3686,23 @@ union bpf_attr {
  * 	Return
  * 		The helper returns **TC_ACT_REDIRECT** on success or
  * 		**TC_ACT_SHOT** on error.
+ *
+ * void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu)
+ *     Description
+ *             Take a pointer to a percpu ksym, *percpu_ptr*, and return a
+ *             pointer to the percpu kernel variable on *cpu*. A ksym is an
+ *             extern variable decorated with '__ksym'. For ksym, there is a
+ *             global var (either static or global) defined of the same name
+ *             in the kernel. The ksym is percpu if the global var is percpu.
+ *             The returned pointer points to the global percpu var on *cpu*.
+ *
+ *             bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the
+ *             kernel, except that bpf_per_cpu_ptr() may return NULL. This
+ *             happens if *cpu* is larger than nr_cpu_ids. The caller of
+ *             bpf_per_cpu_ptr() must check the returned value.
+ *     Return
+ *             A pointer pointing to the kernel percpu variable on *cpu*, or
+ *             NULL, if *cpu* is invalid.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3841,6 +3858,7 @@ union bpf_attr {
 	FN(seq_printf_btf),		\
 	FN(skb_cgroup_classid),		\
 	FN(redirect_neigh),		\
+	FN(bpf_per_cpu_ptr),            \
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 00569afe3d0d..ed7d02e8bc93 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -188,11 +188,6 @@
 	     i < btf_type_vlen(struct_type);				\
 	     i++, member++)
 
-#define for_each_vsi(i, struct_type, member)			\
-	for (i = 0, member = btf_type_var_secinfo(struct_type);	\
-	     i < btf_type_vlen(struct_type);			\
-	     i++, member++)
-
 #define for_each_vsi_from(i, from, struct_type, member)				\
 	for (i = from, member = btf_type_var_secinfo(struct_type) + from;	\
 	     i < btf_type_vlen(struct_type);					\
@@ -598,11 +593,6 @@ static const struct btf_var *btf_type_var(const struct btf_type *t)
 	return (const struct btf_var *)(t + 1);
 }
 
-static const struct btf_var_secinfo *btf_type_var_secinfo(const struct btf_type *t)
-{
-	return (const struct btf_var_secinfo *)(t + 1);
-}
-
 static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
 {
 	return kind_ops[BTF_INFO_KIND(t->info)];
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index e825441781ab..14fe3f64fd82 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -623,6 +623,22 @@ const struct bpf_func_proto bpf_copy_from_user_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
+{
+	if (cpu >= nr_cpu_ids)
+		return (unsigned long)NULL;
+
+	return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu);
+}
+
+const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
+	.func		= bpf_per_cpu_ptr,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL,
+	.arg1_type	= ARG_PTR_TO_PERCPU_BTF_ID,
+	.arg2_type	= ARG_ANYTHING,
+};
+
 const struct bpf_func_proto bpf_get_current_task_proto __weak;
 const struct bpf_func_proto bpf_probe_read_user_proto __weak;
 const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
@@ -689,6 +705,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_snprintf_btf_proto;
 	case BPF_FUNC_jiffies64:
 		return &bpf_jiffies64_proto;
+	case BPF_FUNC_bpf_per_cpu_ptr:
+		return &bpf_per_cpu_ptr_proto;
 	default:
 		break;
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index fe4965079773..216b8ece23ce 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -238,6 +238,8 @@ struct bpf_call_arg_meta {
 	u64 msize_max_value;
 	int ref_obj_id;
 	int func_id;
+	u32 btf_id;
+	u32 ret_btf_id;
 };
 
 struct btf *btf_vmlinux;
@@ -517,6 +519,7 @@ static const char * const reg_type_str[] = {
 	[PTR_TO_XDP_SOCK]	= "xdp_sock",
 	[PTR_TO_BTF_ID]		= "ptr_",
 	[PTR_TO_BTF_ID_OR_NULL]	= "ptr_or_null_",
+	[PTR_TO_PERCPU_BTF_ID]	= "percpu_ptr_",
 	[PTR_TO_MEM]		= "mem",
 	[PTR_TO_MEM_OR_NULL]	= "mem_or_null",
 	[PTR_TO_RDONLY_BUF]	= "rdonly_buf",
@@ -583,7 +586,9 @@ static void print_verifier_state(struct bpf_verifier_env *env,
 			/* reg->off should be 0 for SCALAR_VALUE */
 			verbose(env, "%lld", reg->var_off.value + reg->off);
 		} else {
-			if (t == PTR_TO_BTF_ID || t == PTR_TO_BTF_ID_OR_NULL)
+			if (t == PTR_TO_BTF_ID ||
+			    t == PTR_TO_BTF_ID_OR_NULL ||
+			    t == PTR_TO_PERCPU_BTF_ID)
 				verbose(env, "%s", kernel_type_name(reg->btf_id));
 			verbose(env, "(id=%d", reg->id);
 			if (reg_type_may_be_refcounted_or_null(t))
@@ -2204,6 +2209,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
 	case PTR_TO_RDONLY_BUF_OR_NULL:
 	case PTR_TO_RDWR_BUF:
 	case PTR_TO_RDWR_BUF_OR_NULL:
+	case PTR_TO_PERCPU_BTF_ID:
 		return true;
 	default:
 		return false;
@@ -4017,6 +4023,7 @@ static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } };
 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
 static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
 static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
+static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
 
 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
 	[ARG_PTR_TO_MAP_KEY]		= &map_key_value_types,
@@ -4042,6 +4049,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
 	[ARG_PTR_TO_ALLOC_MEM_OR_NULL]	= &alloc_mem_types,
 	[ARG_PTR_TO_INT]		= &int_ptr_types,
 	[ARG_PTR_TO_LONG]		= &int_ptr_types,
+	[ARG_PTR_TO_PERCPU_BTF_ID]	= &percpu_btf_ptr_types,
 };
 
 static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
@@ -4205,6 +4213,12 @@ skip_type_check:
 		err = check_helper_mem_access(env, regno,
 					      meta->map_ptr->value_size, false,
 					      meta);
+	} else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
+		if (!reg->btf_id) {
+			verbose(env, "Helper has invalid btf_id in R%d\n", regno);
+			return -EACCES;
+		}
+		meta->ret_btf_id = reg->btf_id;
 	} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
 		if (meta->func_id == BPF_FUNC_spin_lock) {
 			if (process_spin_lock(env, regno, true))
@@ -5114,6 +5128,30 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 		regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
 		regs[BPF_REG_0].id = ++env->id_gen;
 		regs[BPF_REG_0].mem_size = meta.mem_size;
+	} else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL) {
+		const struct btf_type *t;
+
+		mark_reg_known_zero(env, regs, BPF_REG_0);
+		t = btf_type_skip_modifiers(btf_vmlinux, meta.ret_btf_id, NULL);
+		if (!btf_type_is_struct(t)) {
+			u32 tsize;
+			const struct btf_type *ret;
+			const char *tname;
+
+			/* resolve the type size of ksym. */
+			ret = btf_resolve_size(btf_vmlinux, t, &tsize);
+			if (IS_ERR(ret)) {
+				tname = btf_name_by_offset(btf_vmlinux, t->name_off);
+				verbose(env, "unable to resolve the size of type '%s': %ld\n",
+					tname, PTR_ERR(ret));
+				return -EINVAL;
+			}
+			regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
+			regs[BPF_REG_0].mem_size = tsize;
+		} else {
+			regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL;
+			regs[BPF_REG_0].btf_id = meta.ret_btf_id;
+		}
 	} else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
 		int ret_btf_id;
 
@@ -7523,6 +7561,7 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
 			dst_reg->mem_size = aux->btf_var.mem_size;
 			break;
 		case PTR_TO_BTF_ID:
+		case PTR_TO_PERCPU_BTF_ID:
 			dst_reg->btf_id = aux->btf_var.btf_id;
 			break;
 		default:
@@ -9449,10 +9488,14 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
 			       struct bpf_insn *insn,
 			       struct bpf_insn_aux_data *aux)
 {
-	u32 type, id = insn->imm;
+	u32 datasec_id, type, id = insn->imm;
+	const struct btf_var_secinfo *vsi;
+	const struct btf_type *datasec;
 	const struct btf_type *t;
 	const char *sym_name;
+	bool percpu = false;
 	u64 addr;
+	int i;
 
 	if (!btf_vmlinux) {
 		verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
@@ -9484,12 +9527,27 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
 		return -ENOENT;
 	}
 
+	datasec_id = btf_find_by_name_kind(btf_vmlinux, ".data..percpu",
+					   BTF_KIND_DATASEC);
+	if (datasec_id > 0) {
+		datasec = btf_type_by_id(btf_vmlinux, datasec_id);
+		for_each_vsi(i, datasec, vsi) {
+			if (vsi->type == id) {
+				percpu = true;
+				break;
+			}
+		}
+	}
+
 	insn[0].imm = (u32)addr;
 	insn[1].imm = addr >> 32;
 
 	type = t->type;
 	t = btf_type_skip_modifiers(btf_vmlinux, type, NULL);
-	if (!btf_type_is_struct(t)) {
+	if (percpu) {
+		aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
+		aux->btf_var.btf_id = type;
+	} else if (!btf_type_is_struct(t)) {
 		const struct btf_type *ret;
 		const char *tname;
 		u32 tsize;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index e118a83439c3..364a322e2898 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1327,6 +1327,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
 	case BPF_FUNC_snprintf_btf:
 		return &bpf_snprintf_btf_proto;
+	case BPF_FUNC_bpf_per_cpu_ptr:
+		return &bpf_per_cpu_ptr_proto;
 	default:
 		return NULL;
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 2aa156af24d6..f3c1b637ab39 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3686,6 +3686,23 @@ union bpf_attr {
  * 	Return
  * 		The helper returns **TC_ACT_REDIRECT** on success or
  * 		**TC_ACT_SHOT** on error.
+ *
+ * void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu)
+ *     Description
+ *             Take a pointer to a percpu ksym, *percpu_ptr*, and return a
+ *             pointer to the percpu kernel variable on *cpu*. A ksym is an
+ *             extern variable decorated with '__ksym'. For ksym, there is a
+ *             global var (either static or global) defined of the same name
+ *             in the kernel. The ksym is percpu if the global var is percpu.
+ *             The returned pointer points to the global percpu var on *cpu*.
+ *
+ *             bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the
+ *             kernel, except that bpf_per_cpu_ptr() may return NULL. This
+ *             happens if *cpu* is larger than nr_cpu_ids. The caller of
+ *             bpf_per_cpu_ptr() must check the returned value.
+ *     Return
+ *             A pointer pointing to the kernel percpu variable on *cpu*, or
+ *             NULL, if *cpu* is invalid.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3841,6 +3858,7 @@ union bpf_attr {
 	FN(seq_printf_btf),		\
 	FN(skb_cgroup_classid),		\
 	FN(redirect_neigh),		\
+	FN(bpf_per_cpu_ptr),            \
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 63d9b80dcf2c67bc5ade61cbbaa09d7af21f43f1 Mon Sep 17 00:00:00 2001
From: Hao Luo <haoluo@google.com>
Date: Tue, 29 Sep 2020 16:50:48 -0700
Subject: bpf: Introducte bpf_this_cpu_ptr()

Add bpf_this_cpu_ptr() to help access percpu var on this cpu. This
helper always returns a valid pointer, therefore no need to check
returned value for NULL. Also note that all programs run with
preemption disabled, which means that the returned pointer is stable
during all the execution of the program.

Signed-off-by: Hao Luo <haoluo@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200929235049.2533242-6-haoluo@google.com
---
 include/linux/bpf.h            |  2 ++
 include/uapi/linux/bpf.h       | 13 +++++++++++++
 kernel/bpf/helpers.c           | 14 ++++++++++++++
 kernel/bpf/verifier.c          | 11 ++++++++---
 kernel/trace/bpf_trace.c       |  2 ++
 tools/include/uapi/linux/bpf.h | 13 +++++++++++++
 6 files changed, 52 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9dde15b2479d..dc63eeed4fd9 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -309,6 +309,7 @@ enum bpf_return_type {
 	RET_PTR_TO_ALLOC_MEM_OR_NULL,	/* returns a pointer to dynamically allocated memory or NULL */
 	RET_PTR_TO_BTF_ID_OR_NULL,	/* returns a pointer to a btf_id or NULL */
 	RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */
+	RET_PTR_TO_MEM_OR_BTF_ID,	/* returns a pointer to a valid memory or a btf_id */
 };
 
 /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
@@ -1832,6 +1833,7 @@ extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
 extern const struct bpf_func_proto bpf_copy_from_user_proto;
 extern const struct bpf_func_proto bpf_snprintf_btf_proto;
 extern const struct bpf_func_proto bpf_per_cpu_ptr_proto;
+extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
 
 const struct bpf_func_proto *bpf_tracing_func_proto(
 	enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f3c1b637ab39..c446394135be 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3703,6 +3703,18 @@ union bpf_attr {
  *     Return
  *             A pointer pointing to the kernel percpu variable on *cpu*, or
  *             NULL, if *cpu* is invalid.
+ *
+ * void *bpf_this_cpu_ptr(const void *percpu_ptr)
+ *	Description
+ *		Take a pointer to a percpu ksym, *percpu_ptr*, and return a
+ *		pointer to the percpu kernel variable on this cpu. See the
+ *		description of 'ksym' in **bpf_per_cpu_ptr**\ ().
+ *
+ *		bpf_this_cpu_ptr() has the same semantic as this_cpu_ptr() in
+ *		the kernel. Different from **bpf_per_cpu_ptr**\ (), it would
+ *		never return NULL.
+ *	Return
+ *		A pointer pointing to the kernel percpu variable on this cpu.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3859,6 +3871,7 @@ union bpf_attr {
 	FN(skb_cgroup_classid),		\
 	FN(redirect_neigh),		\
 	FN(bpf_per_cpu_ptr),            \
+	FN(bpf_this_cpu_ptr),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 14fe3f64fd82..25520f5eeaf6 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -639,6 +639,18 @@ const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr)
+{
+	return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr);
+}
+
+const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
+	.func		= bpf_this_cpu_ptr,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_MEM_OR_BTF_ID,
+	.arg1_type	= ARG_PTR_TO_PERCPU_BTF_ID,
+};
+
 const struct bpf_func_proto bpf_get_current_task_proto __weak;
 const struct bpf_func_proto bpf_probe_read_user_proto __weak;
 const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
@@ -707,6 +719,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_jiffies64_proto;
 	case BPF_FUNC_bpf_per_cpu_ptr:
 		return &bpf_per_cpu_ptr_proto;
+	case BPF_FUNC_bpf_this_cpu_ptr:
+		return &bpf_this_cpu_ptr_proto;
 	default:
 		break;
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 216b8ece23ce..d9dbf271ebab 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5128,7 +5128,8 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 		regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
 		regs[BPF_REG_0].id = ++env->id_gen;
 		regs[BPF_REG_0].mem_size = meta.mem_size;
-	} else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL) {
+	} else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL ||
+		   fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) {
 		const struct btf_type *t;
 
 		mark_reg_known_zero(env, regs, BPF_REG_0);
@@ -5146,10 +5147,14 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 					tname, PTR_ERR(ret));
 				return -EINVAL;
 			}
-			regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
+			regs[BPF_REG_0].type =
+				fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
+				PTR_TO_MEM : PTR_TO_MEM_OR_NULL;
 			regs[BPF_REG_0].mem_size = tsize;
 		} else {
-			regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL;
+			regs[BPF_REG_0].type =
+				fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
+				PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL;
 			regs[BPF_REG_0].btf_id = meta.ret_btf_id;
 		}
 	} else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 364a322e2898..a136a6a63a71 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1329,6 +1329,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_snprintf_btf_proto;
 	case BPF_FUNC_bpf_per_cpu_ptr:
 		return &bpf_per_cpu_ptr_proto;
+	case BPF_FUNC_bpf_this_cpu_ptr:
+		return &bpf_this_cpu_ptr_proto;
 	default:
 		return NULL;
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f3c1b637ab39..c446394135be 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3703,6 +3703,18 @@ union bpf_attr {
  *     Return
  *             A pointer pointing to the kernel percpu variable on *cpu*, or
  *             NULL, if *cpu* is invalid.
+ *
+ * void *bpf_this_cpu_ptr(const void *percpu_ptr)
+ *	Description
+ *		Take a pointer to a percpu ksym, *percpu_ptr*, and return a
+ *		pointer to the percpu kernel variable on this cpu. See the
+ *		description of 'ksym' in **bpf_per_cpu_ptr**\ ().
+ *
+ *		bpf_this_cpu_ptr() has the same semantic as this_cpu_ptr() in
+ *		the kernel. Different from **bpf_per_cpu_ptr**\ (), it would
+ *		never return NULL.
+ *	Return
+ *		A pointer pointing to the kernel percpu variable on this cpu.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3859,6 +3871,7 @@ union bpf_attr {
 	FN(skb_cgroup_classid),		\
 	FN(redirect_neigh),		\
 	FN(bpf_per_cpu_ptr),            \
+	FN(bpf_this_cpu_ptr),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From f5ace5ef37b1e1de49882248656f35c45e041585 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 2 Oct 2020 18:10:33 -0500
Subject: block: scsi_ioctl: Avoid the use of one-element arrays

One-element arrays are being deprecated[1]. Replace the one-element array
with a simple object of type compat_caddr_t: 'compat_caddr_t unused'[2],
once it seems this field is actually never used.

Also, update struct cdrom_generic_command in UAPI by adding an
anonimous union to avoid using the one-element array _reserved_.

[1] https://www.kernel.org/doc/html/v5.9-rc1/process/deprecated.html#zero-length-and-one-element-arrays
[2] https://github.com/KSPP/linux/issues/86

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://lore.kernel.org/lkml/5f76f5d0.qJ4t%2FHWuRzSW7bTa%25lkp@intel.com/
Build-tested-by: kernel test robot <lkp@intel.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/scsi_ioctl.c         | 6 +++---
 include/uapi/linux/cdrom.h | 5 ++++-
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 4421e61c1af1..227f489aeaa5 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -651,7 +651,7 @@ struct compat_cdrom_generic_command {
 	unsigned char	data_direction;
 	compat_int_t	quiet;
 	compat_int_t	timeout;
-	compat_caddr_t	reserved[1];
+	compat_caddr_t	unused;
 };
 #endif
 
@@ -673,7 +673,7 @@ static int scsi_get_cdrom_generic_arg(struct cdrom_generic_command *cgc,
 			.data_direction	= cgc32.data_direction,
 			.quiet		= cgc32.quiet,
 			.timeout	= cgc32.timeout,
-			.reserved[0]	= compat_ptr(cgc32.reserved[0]),
+			.unused		= compat_ptr(cgc32.unused),
 		};
 		memcpy(&cgc->cmd, &cgc32.cmd, CDROM_PACKET_SIZE);
 		return 0;
@@ -698,7 +698,7 @@ static int scsi_put_cdrom_generic_arg(const struct cdrom_generic_command *cgc,
 			.data_direction	= cgc->data_direction,
 			.quiet		= cgc->quiet,
 			.timeout	= cgc->timeout,
-			.reserved[0]	= (uintptr_t)(cgc->reserved[0]),
+			.unused		= (uintptr_t)(cgc->unused),
 		};
 		memcpy(&cgc32.cmd, &cgc->cmd, CDROM_PACKET_SIZE);
 
diff --git a/include/uapi/linux/cdrom.h b/include/uapi/linux/cdrom.h
index 2817230148fd..6c34f6e2f1f7 100644
--- a/include/uapi/linux/cdrom.h
+++ b/include/uapi/linux/cdrom.h
@@ -289,7 +289,10 @@ struct cdrom_generic_command
 	unsigned char		data_direction;
 	int			quiet;
 	int			timeout;
-	void			__user *reserved[1];	/* unused, actually */
+	union {
+		void		__user *reserved[1];	/* unused, actually */
+		void            __user *unused;
+	};
 };
 
 /*
-- 
cgit v1.2.3


From 5f764d624a89d4d00d282157077878d4e7c69869 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 25 Sep 2020 06:51:43 +0200
Subject: fs: remove the compat readv/writev syscalls

Now that import_iovec handles compat iovecs, the native readv and writev
syscalls can be used for the compat case as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/arm64/include/asm/unistd32.h                  |  4 ++--
 arch/mips/kernel/syscalls/syscall_n32.tbl          |  4 ++--
 arch/mips/kernel/syscalls/syscall_o32.tbl          |  4 ++--
 arch/parisc/kernel/syscalls/syscall.tbl            |  4 ++--
 arch/powerpc/kernel/syscalls/syscall.tbl           |  4 ++--
 arch/s390/kernel/syscalls/syscall.tbl              |  4 ++--
 arch/sparc/kernel/syscalls/syscall.tbl             |  4 ++--
 arch/x86/entry/syscall_x32.c                       |  2 ++
 arch/x86/entry/syscalls/syscall_32.tbl             |  4 ++--
 arch/x86/entry/syscalls/syscall_64.tbl             |  4 ++--
 fs/read_write.c                                    | 14 --------------
 include/linux/compat.h                             |  4 ----
 include/uapi/asm-generic/unistd.h                  |  4 ++--
 tools/include/uapi/asm-generic/unistd.h            |  4 ++--
 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl |  4 ++--
 tools/perf/arch/s390/entry/syscalls/syscall.tbl    |  4 ++--
 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl  |  4 ++--
 17 files changed, 30 insertions(+), 46 deletions(-)

(limited to 'include/uapi')

diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 734860ac7cf9..4a236493dca5 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -301,9 +301,9 @@ __SYSCALL(__NR_flock, sys_flock)
 #define __NR_msync 144
 __SYSCALL(__NR_msync, sys_msync)
 #define __NR_readv 145
-__SYSCALL(__NR_readv, compat_sys_readv)
+__SYSCALL(__NR_readv, sys_readv)
 #define __NR_writev 146
-__SYSCALL(__NR_writev, compat_sys_writev)
+__SYSCALL(__NR_writev, sys_writev)
 #define __NR_getsid 147
 __SYSCALL(__NR_getsid, sys_getsid)
 #define __NR_fdatasync 148
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index f9df9edb67a4..c99a92646f8e 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -25,8 +25,8 @@
 15	n32	ioctl				compat_sys_ioctl
 16	n32	pread64				sys_pread64
 17	n32	pwrite64			sys_pwrite64
-18	n32	readv				compat_sys_readv
-19	n32	writev				compat_sys_writev
+18	n32	readv				sys_readv
+19	n32	writev				sys_writev
 20	n32	access				sys_access
 21	n32	pipe				sysm_pipe
 22	n32	_newselect			compat_sys_select
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 195b43cf27c8..075064d10661 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -156,8 +156,8 @@
 142	o32	_newselect			sys_select			compat_sys_select
 143	o32	flock				sys_flock
 144	o32	msync				sys_msync
-145	o32	readv				sys_readv			compat_sys_readv
-146	o32	writev				sys_writev			compat_sys_writev
+145	o32	readv				sys_readv
+146	o32	writev				sys_writev
 147	o32	cacheflush			sys_cacheflush
 148	o32	cachectl			sys_cachectl
 149	o32	sysmips				__sys_sysmips
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index def64d221cd4..192abde0001d 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -159,8 +159,8 @@
 142	common	_newselect		sys_select			compat_sys_select
 143	common	flock			sys_flock
 144	common	msync			sys_msync
-145	common	readv			sys_readv			compat_sys_readv
-146	common	writev			sys_writev			compat_sys_writev
+145	common	readv			sys_readv
+146	common	writev			sys_writev
 147	common	getsid			sys_getsid
 148	common	fdatasync		sys_fdatasync
 149	common	_sysctl			sys_ni_syscall
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index c2d737ff2e7b..6f1e2ecf0eda 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -193,8 +193,8 @@
 142	common	_newselect			sys_select			compat_sys_select
 143	common	flock				sys_flock
 144	common	msync				sys_msync
-145	common	readv				sys_readv			compat_sys_readv
-146	common	writev				sys_writev			compat_sys_writev
+145	common	readv				sys_readv
+146	common	writev				sys_writev
 147	common	getsid				sys_getsid
 148	common	fdatasync			sys_fdatasync
 149	nospu	_sysctl				sys_ni_syscall
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 10456bc936fb..6101cf2e004c 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -134,8 +134,8 @@
 142  64		select			sys_select			-
 143  common	flock			sys_flock			sys_flock
 144  common	msync			sys_msync			sys_msync
-145  common	readv			sys_readv			compat_sys_readv
-146  common	writev			sys_writev			compat_sys_writev
+145  common	readv			sys_readv			sys_readv
+146  common	writev			sys_writev			sys_writev
 147  common	getsid			sys_getsid			sys_getsid
 148  common	fdatasync		sys_fdatasync			sys_fdatasync
 149  common	_sysctl			-				-
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index 4af114e84f20..a87ddb282ab1 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -149,8 +149,8 @@
 117	common	getrusage		sys_getrusage			compat_sys_getrusage
 118	common	getsockopt		sys_getsockopt			sys_getsockopt
 119	common	getcwd			sys_getcwd
-120	common	readv			sys_readv			compat_sys_readv
-121	common	writev			sys_writev			compat_sys_writev
+120	common	readv			sys_readv
+121	common	writev			sys_writev
 122	common	settimeofday		sys_settimeofday		compat_sys_settimeofday
 123	32	fchown			sys_fchown16
 123	64	fchown			sys_fchown
diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index 1583831f61a9..aa321444a41f 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -12,6 +12,8 @@
  * Reuse the 64-bit entry points for the x32 versions that occupy different
  * slots in the syscall table.
  */
+#define __x32_sys_readv		__x64_sys_readv
+#define __x32_sys_writev	__x64_sys_writev
 #define __x32_sys_getsockopt	__x64_sys_getsockopt
 #define __x32_sys_setsockopt	__x64_sys_setsockopt
 
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 9d1102873666..54ab4beb517f 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -156,8 +156,8 @@
 142	i386	_newselect		sys_select			compat_sys_select
 143	i386	flock			sys_flock
 144	i386	msync			sys_msync
-145	i386	readv			sys_readv			compat_sys_readv
-146	i386	writev			sys_writev			compat_sys_writev
+145	i386	readv			sys_readv
+146	i386	writev			sys_writev
 147	i386	getsid			sys_getsid
 148	i386	fdatasync		sys_fdatasync
 149	i386	_sysctl			sys_ni_syscall
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index f30d6ae9a688..b1e59957c5c5 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -371,8 +371,8 @@
 512	x32	rt_sigaction		compat_sys_rt_sigaction
 513	x32	rt_sigreturn		compat_sys_x32_rt_sigreturn
 514	x32	ioctl			compat_sys_ioctl
-515	x32	readv			compat_sys_readv
-516	x32	writev			compat_sys_writev
+515	x32	readv			sys_readv
+516	x32	writev			sys_writev
 517	x32	recvfrom		compat_sys_recvfrom
 518	x32	sendmsg			compat_sys_sendmsg
 519	x32	recvmsg			compat_sys_recvmsg
diff --git a/fs/read_write.c b/fs/read_write.c
index eab427b7cc0a..6c13f744c34a 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1074,13 +1074,6 @@ SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
  * in_compat_syscall().
  */
 #ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
-		const struct iovec __user *, vec,
-		compat_ulong_t, vlen)
-{
-	return do_readv(fd, vec, vlen, 0);
-}
-
 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
 COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
 		const struct iovec __user *, vec,
@@ -1122,13 +1115,6 @@ COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
 	return do_preadv(fd, vec, vlen, pos, flags);
 }
 
-COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
-		const struct iovec __user *, vec,
-		compat_ulong_t, vlen)
-{
-	return do_writev(fd, vec, vlen, 0);
-}
-
 #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64
 COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
 		const struct iovec __user *, vec,
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 36b5842162c7..07268fc8082b 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -545,10 +545,6 @@ asmlinkage long compat_sys_getdents(unsigned int fd,
 
 /* fs/read_write.c */
 asmlinkage long compat_sys_lseek(unsigned int, compat_off_t, unsigned int);
-asmlinkage ssize_t compat_sys_readv(compat_ulong_t fd,
-		const struct iovec __user *vec, compat_ulong_t vlen);
-asmlinkage ssize_t compat_sys_writev(compat_ulong_t fd,
-		const struct iovec __user *vec, compat_ulong_t vlen);
 /* No generic prototype for pread64 and pwrite64 */
 asmlinkage ssize_t compat_sys_preadv(compat_ulong_t fd,
 		const struct iovec __user *vec,
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 995b36c2ea7d..211c9eacbda6 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -207,9 +207,9 @@ __SYSCALL(__NR_read, sys_read)
 #define __NR_write 64
 __SYSCALL(__NR_write, sys_write)
 #define __NR_readv 65
-__SC_COMP(__NR_readv, sys_readv, compat_sys_readv)
+__SC_COMP(__NR_readv, sys_readv, sys_readv)
 #define __NR_writev 66
-__SC_COMP(__NR_writev, sys_writev, compat_sys_writev)
+__SC_COMP(__NR_writev, sys_writev, sys_writev)
 #define __NR_pread64 67
 __SC_COMP(__NR_pread64, sys_pread64, compat_sys_pread64)
 #define __NR_pwrite64 68
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 995b36c2ea7d..211c9eacbda6 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -207,9 +207,9 @@ __SYSCALL(__NR_read, sys_read)
 #define __NR_write 64
 __SYSCALL(__NR_write, sys_write)
 #define __NR_readv 65
-__SC_COMP(__NR_readv, sys_readv, compat_sys_readv)
+__SC_COMP(__NR_readv, sys_readv, sys_readv)
 #define __NR_writev 66
-__SC_COMP(__NR_writev, sys_writev, compat_sys_writev)
+__SC_COMP(__NR_writev, sys_writev, sys_writev)
 #define __NR_pread64 67
 __SC_COMP(__NR_pread64, sys_pread64, compat_sys_pread64)
 #define __NR_pwrite64 68
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 3ca6fe057a0b..46be68029587 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -189,8 +189,8 @@
 142	common	_newselect			sys_select			compat_sys_select
 143	common	flock				sys_flock
 144	common	msync				sys_msync
-145	common	readv				sys_readv			compat_sys_readv
-146	common	writev				sys_writev			compat_sys_writev
+145	common	readv				sys_readv
+146	common	writev				sys_writev
 147	common	getsid				sys_getsid
 148	common	fdatasync			sys_fdatasync
 149	nospu	_sysctl				sys_ni_syscall
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index 6a0bbea225db..fb5e61ce9d58 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -134,8 +134,8 @@
 142  64		select			sys_select			-
 143  common	flock			sys_flock			sys_flock
 144  common	msync			sys_msync			compat_sys_msync
-145  common	readv			sys_readv			compat_sys_readv
-146  common	writev			sys_writev			compat_sys_writev
+145  common	readv			sys_readv
+146  common	writev			sys_writev
 147  common	getsid			sys_getsid			sys_getsid
 148  common	fdatasync		sys_fdatasync			sys_fdatasync
 149  common	_sysctl			-				-
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index f30d6ae9a688..b1e59957c5c5 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -371,8 +371,8 @@
 512	x32	rt_sigaction		compat_sys_rt_sigaction
 513	x32	rt_sigreturn		compat_sys_x32_rt_sigreturn
 514	x32	ioctl			compat_sys_ioctl
-515	x32	readv			compat_sys_readv
-516	x32	writev			compat_sys_writev
+515	x32	readv			sys_readv
+516	x32	writev			sys_writev
 517	x32	recvfrom		compat_sys_recvfrom
 518	x32	sendmsg			compat_sys_sendmsg
 519	x32	recvmsg			compat_sys_recvmsg
-- 
cgit v1.2.3


From 598b3cec831fd6ccb3cbe4919a722e868c6364a8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 25 Sep 2020 06:51:44 +0200
Subject: fs: remove compat_sys_vmsplice

Now that import_iovec handles compat iovecs, the native vmsplice syscall
can be used for the compat case as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/arm64/include/asm/unistd32.h                  |  2 +-
 arch/mips/kernel/syscalls/syscall_n32.tbl          |  2 +-
 arch/mips/kernel/syscalls/syscall_o32.tbl          |  2 +-
 arch/parisc/kernel/syscalls/syscall.tbl            |  2 +-
 arch/powerpc/kernel/syscalls/syscall.tbl           |  2 +-
 arch/s390/kernel/syscalls/syscall.tbl              |  2 +-
 arch/sparc/kernel/syscalls/syscall.tbl             |  2 +-
 arch/x86/entry/syscall_x32.c                       |  1 +
 arch/x86/entry/syscalls/syscall_32.tbl             |  2 +-
 arch/x86/entry/syscalls/syscall_64.tbl             |  2 +-
 fs/splice.c                                        | 57 +++++-----------------
 include/linux/compat.h                             |  4 --
 include/uapi/asm-generic/unistd.h                  |  2 +-
 tools/include/uapi/asm-generic/unistd.h            |  2 +-
 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl |  2 +-
 tools/perf/arch/s390/entry/syscalls/syscall.tbl    |  2 +-
 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl  |  2 +-
 17 files changed, 28 insertions(+), 62 deletions(-)

(limited to 'include/uapi')

diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 4a236493dca5..11dfae3a8563 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -697,7 +697,7 @@ __SYSCALL(__NR_sync_file_range2, compat_sys_aarch32_sync_file_range2)
 #define __NR_tee 342
 __SYSCALL(__NR_tee, sys_tee)
 #define __NR_vmsplice 343
-__SYSCALL(__NR_vmsplice, compat_sys_vmsplice)
+__SYSCALL(__NR_vmsplice, sys_vmsplice)
 #define __NR_move_pages 344
 __SYSCALL(__NR_move_pages, compat_sys_move_pages)
 #define __NR_getcpu 345
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index c99a92646f8e..5a39d4de0ac8 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -278,7 +278,7 @@
 267	n32	splice				sys_splice
 268	n32	sync_file_range			sys_sync_file_range
 269	n32	tee				sys_tee
-270	n32	vmsplice			compat_sys_vmsplice
+270	n32	vmsplice			sys_vmsplice
 271	n32	move_pages			compat_sys_move_pages
 272	n32	set_robust_list			compat_sys_set_robust_list
 273	n32	get_robust_list			compat_sys_get_robust_list
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 075064d10661..136efc6b8c54 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -318,7 +318,7 @@
 304	o32	splice				sys_splice
 305	o32	sync_file_range			sys_sync_file_range		sys32_sync_file_range
 306	o32	tee				sys_tee
-307	o32	vmsplice			sys_vmsplice			compat_sys_vmsplice
+307	o32	vmsplice			sys_vmsplice
 308	o32	move_pages			sys_move_pages			compat_sys_move_pages
 309	o32	set_robust_list			sys_set_robust_list		compat_sys_set_robust_list
 310	o32	get_robust_list			sys_get_robust_list		compat_sys_get_robust_list
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 192abde0001d..a9e184192cae 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -330,7 +330,7 @@
 292	32	sync_file_range		parisc_sync_file_range
 292	64	sync_file_range		sys_sync_file_range
 293	common	tee			sys_tee
-294	common	vmsplice		sys_vmsplice			compat_sys_vmsplice
+294	common	vmsplice		sys_vmsplice
 295	common	move_pages		sys_move_pages			compat_sys_move_pages
 296	common	getcpu			sys_getcpu
 297	common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 6f1e2ecf0eda..0d4985919ca3 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -369,7 +369,7 @@
 282	common	unshare				sys_unshare
 283	common	splice				sys_splice
 284	common	tee				sys_tee
-285	common	vmsplice			sys_vmsplice			compat_sys_vmsplice
+285	common	vmsplice			sys_vmsplice
 286	common	openat				sys_openat			compat_sys_openat
 287	common	mkdirat				sys_mkdirat
 288	common	mknodat				sys_mknodat
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 6101cf2e004c..b5495a42814b 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -316,7 +316,7 @@
 306  common	splice			sys_splice			sys_splice
 307  common	sync_file_range		sys_sync_file_range		compat_sys_s390_sync_file_range
 308  common	tee			sys_tee				sys_tee
-309  common	vmsplice		sys_vmsplice			compat_sys_vmsplice
+309  common	vmsplice		sys_vmsplice			sys_vmsplice
 310  common	move_pages		sys_move_pages			compat_sys_move_pages
 311  common	getcpu			sys_getcpu			sys_getcpu
 312  common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index a87ddb282ab1..f1810c1a35ca 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -38,7 +38,7 @@
 23	64    	setuid			sys_setuid
 24	32	getuid			sys_getuid16
 24	64   	getuid			sys_getuid
-25	common	vmsplice		sys_vmsplice			compat_sys_vmsplice
+25	common	vmsplice		sys_vmsplice
 26	common	ptrace			sys_ptrace			compat_sys_ptrace
 27	common	alarm			sys_alarm
 28	common	sigaltstack		sys_sigaltstack			compat_sys_sigaltstack
diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index aa321444a41f..a4840b9d50ad 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -16,6 +16,7 @@
 #define __x32_sys_writev	__x64_sys_writev
 #define __x32_sys_getsockopt	__x64_sys_getsockopt
 #define __x32_sys_setsockopt	__x64_sys_setsockopt
+#define __x32_sys_vmsplice	__x64_sys_vmsplice
 
 #define __SYSCALL_64(nr, sym)
 
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 54ab4beb517f..0fb2f172581e 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -327,7 +327,7 @@
 313	i386	splice			sys_splice
 314	i386	sync_file_range		sys_ia32_sync_file_range
 315	i386	tee			sys_tee
-316	i386	vmsplice		sys_vmsplice			compat_sys_vmsplice
+316	i386	vmsplice		sys_vmsplice
 317	i386	move_pages		sys_move_pages			compat_sys_move_pages
 318	i386	getcpu			sys_getcpu
 319	i386	epoll_pwait		sys_epoll_pwait
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index b1e59957c5c5..642af919183d 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -388,7 +388,7 @@
 529	x32	waitid			compat_sys_waitid
 530	x32	set_robust_list		compat_sys_set_robust_list
 531	x32	get_robust_list		compat_sys_get_robust_list
-532	x32	vmsplice		compat_sys_vmsplice
+532	x32	vmsplice		sys_vmsplice
 533	x32	move_pages		compat_sys_move_pages
 534	x32	preadv			compat_sys_preadv64
 535	x32	pwritev			compat_sys_pwritev64
diff --git a/fs/splice.c b/fs/splice.c
index 132d42b9871f..18d84544030b 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -33,7 +33,6 @@
 #include <linux/security.h>
 #include <linux/gfp.h>
 #include <linux/socket.h>
-#include <linux/compat.h>
 #include <linux/sched/signal.h>
 
 #include "internal.h"
@@ -1332,20 +1331,6 @@ static int vmsplice_type(struct fd f, int *type)
  * Currently we punt and implement it as a normal copy, see pipe_to_user().
  *
  */
-static long do_vmsplice(struct file *f, struct iov_iter *iter, unsigned int flags)
-{
-	if (unlikely(flags & ~SPLICE_F_ALL))
-		return -EINVAL;
-
-	if (!iov_iter_count(iter))
-		return 0;
-
-	if (iov_iter_rw(iter) == WRITE)
-		return vmsplice_to_pipe(f, iter, flags);
-	else
-		return vmsplice_to_user(f, iter, flags);
-}
-
 SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov,
 		unsigned long, nr_segs, unsigned int, flags)
 {
@@ -1356,6 +1341,9 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov,
 	struct fd f;
 	int type;
 
+	if (unlikely(flags & ~SPLICE_F_ALL))
+		return -EINVAL;
+
 	f = fdget(fd);
 	error = vmsplice_type(f, &type);
 	if (error)
@@ -1363,40 +1351,21 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov,
 
 	error = import_iovec(type, uiov, nr_segs,
 			     ARRAY_SIZE(iovstack), &iov, &iter);
-	if (error >= 0) {
-		error = do_vmsplice(f.file, &iter, flags);
-		kfree(iov);
-	}
-	fdput(f);
-	return error;
-}
+	if (error < 0)
+		goto out_fdput;
 
-#ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, iov32,
-		    unsigned int, nr_segs, unsigned int, flags)
-{
-	struct iovec iovstack[UIO_FASTIOV];
-	struct iovec *iov = iovstack;
-	struct iov_iter iter;
-	ssize_t error;
-	struct fd f;
-	int type;
-
-	f = fdget(fd);
-	error = vmsplice_type(f, &type);
-	if (error)
-		return error;
+	if (!iov_iter_count(&iter))
+		error = 0;
+	else if (iov_iter_rw(&iter) == WRITE)
+		error = vmsplice_to_pipe(f.file, &iter, flags);
+	else
+		error = vmsplice_to_user(f.file, &iter, flags);
 
-	error = import_iovec(type, (struct iovec __user *)iov32, nr_segs,
-			     ARRAY_SIZE(iovstack), &iov, &iter);
-	if (error >= 0) {
-		error = do_vmsplice(f.file, &iter, flags);
-		kfree(iov);
-	}
+	kfree(iov);
+out_fdput:
 	fdput(f);
 	return error;
 }
-#endif
 
 SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
 		int, fd_out, loff_t __user *, off_out,
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 07268fc8082b..7c3e876703cf 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -597,10 +597,6 @@ asmlinkage long compat_sys_signalfd4(int ufd,
 				     const compat_sigset_t __user *sigmask,
 				     compat_size_t sigsetsize, int flags);
 
-/* fs/splice.c */
-asmlinkage long compat_sys_vmsplice(int fd, const struct compat_iovec __user *,
-				    unsigned int nr_segs, unsigned int flags);
-
 /* fs/stat.c */
 asmlinkage long compat_sys_newfstatat(unsigned int dfd,
 				      const char __user *filename,
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 211c9eacbda6..f2dcb0d57030 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -237,7 +237,7 @@ __SC_COMP(__NR_signalfd4, sys_signalfd4, compat_sys_signalfd4)
 
 /* fs/splice.c */
 #define __NR_vmsplice 75
-__SC_COMP(__NR_vmsplice, sys_vmsplice, compat_sys_vmsplice)
+__SYSCALL(__NR_vmsplice, sys_vmsplice)
 #define __NR_splice 76
 __SYSCALL(__NR_splice, sys_splice)
 #define __NR_tee 77
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 211c9eacbda6..f2dcb0d57030 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -237,7 +237,7 @@ __SC_COMP(__NR_signalfd4, sys_signalfd4, compat_sys_signalfd4)
 
 /* fs/splice.c */
 #define __NR_vmsplice 75
-__SC_COMP(__NR_vmsplice, sys_vmsplice, compat_sys_vmsplice)
+__SYSCALL(__NR_vmsplice, sys_vmsplice)
 #define __NR_splice 76
 __SYSCALL(__NR_splice, sys_splice)
 #define __NR_tee 77
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 46be68029587..26f0347c1511 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -363,7 +363,7 @@
 282	common	unshare				sys_unshare
 283	common	splice				sys_splice
 284	common	tee				sys_tee
-285	common	vmsplice			sys_vmsplice			compat_sys_vmsplice
+285	common	vmsplice			sys_vmsplice
 286	common	openat				sys_openat			compat_sys_openat
 287	common	mkdirat				sys_mkdirat
 288	common	mknodat				sys_mknodat
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index fb5e61ce9d58..02ad81f69bb7 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -316,7 +316,7 @@
 306  common	splice			sys_splice			compat_sys_splice
 307  common	sync_file_range		sys_sync_file_range		compat_sys_s390_sync_file_range
 308  common	tee			sys_tee				compat_sys_tee
-309  common	vmsplice		sys_vmsplice			compat_sys_vmsplice
+309  common	vmsplice		sys_vmsplice			sys_vmsplice
 310  common	move_pages		sys_move_pages			compat_sys_move_pages
 311  common	getcpu			sys_getcpu			compat_sys_getcpu
 312  common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index b1e59957c5c5..642af919183d 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -388,7 +388,7 @@
 529	x32	waitid			compat_sys_waitid
 530	x32	set_robust_list		compat_sys_set_robust_list
 531	x32	get_robust_list		compat_sys_get_robust_list
-532	x32	vmsplice		compat_sys_vmsplice
+532	x32	vmsplice		sys_vmsplice
 533	x32	move_pages		compat_sys_move_pages
 534	x32	preadv			compat_sys_preadv64
 535	x32	pwritev			compat_sys_pwritev64
-- 
cgit v1.2.3


From c3973b401ef2b0b8005f8074a10e96e3ea093823 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 25 Sep 2020 06:51:45 +0200
Subject: mm: remove compat_process_vm_{readv,writev}

Now that import_iovec handles compat iovecs, the native syscalls
can be used for the compat case as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/arm64/include/asm/unistd32.h                  |  4 +-
 arch/mips/kernel/syscalls/syscall_n32.tbl          |  4 +-
 arch/mips/kernel/syscalls/syscall_o32.tbl          |  4 +-
 arch/parisc/kernel/syscalls/syscall.tbl            |  4 +-
 arch/powerpc/kernel/syscalls/syscall.tbl           |  4 +-
 arch/s390/kernel/syscalls/syscall.tbl              |  4 +-
 arch/sparc/kernel/syscalls/syscall.tbl             |  4 +-
 arch/x86/entry/syscall_x32.c                       |  2 +
 arch/x86/entry/syscalls/syscall_32.tbl             |  4 +-
 arch/x86/entry/syscalls/syscall_64.tbl             |  4 +-
 include/linux/compat.h                             |  8 ---
 include/uapi/asm-generic/unistd.h                  |  6 +-
 mm/process_vm_access.c                             | 69 ----------------------
 tools/include/uapi/asm-generic/unistd.h            |  6 +-
 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl |  4 +-
 tools/perf/arch/s390/entry/syscalls/syscall.tbl    |  4 +-
 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl  |  4 +-
 17 files changed, 30 insertions(+), 109 deletions(-)

(limited to 'include/uapi')

diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 11dfae3a8563..0c280a05f699 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -763,9 +763,9 @@ __SYSCALL(__NR_sendmmsg, compat_sys_sendmmsg)
 #define __NR_setns 375
 __SYSCALL(__NR_setns, sys_setns)
 #define __NR_process_vm_readv 376
-__SYSCALL(__NR_process_vm_readv, compat_sys_process_vm_readv)
+__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv)
 #define __NR_process_vm_writev 377
-__SYSCALL(__NR_process_vm_writev, compat_sys_process_vm_writev)
+__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev)
 #define __NR_kcmp 378
 __SYSCALL(__NR_kcmp, sys_kcmp)
 #define __NR_finit_module 379
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 5a39d4de0ac8..0bc2e0fcf1ee 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -317,8 +317,8 @@
 306	n32	syncfs				sys_syncfs
 307	n32	sendmmsg			compat_sys_sendmmsg
 308	n32	setns				sys_setns
-309	n32	process_vm_readv		compat_sys_process_vm_readv
-310	n32	process_vm_writev		compat_sys_process_vm_writev
+309	n32	process_vm_readv		sys_process_vm_readv
+310	n32	process_vm_writev		sys_process_vm_writev
 311	n32	kcmp				sys_kcmp
 312	n32	finit_module			sys_finit_module
 313	n32	sched_setattr			sys_sched_setattr
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 136efc6b8c54..b408c13b9342 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -356,8 +356,8 @@
 342	o32	syncfs				sys_syncfs
 343	o32	sendmmsg			sys_sendmmsg			compat_sys_sendmmsg
 344	o32	setns				sys_setns
-345	o32	process_vm_readv		sys_process_vm_readv		compat_sys_process_vm_readv
-346	o32	process_vm_writev		sys_process_vm_writev		compat_sys_process_vm_writev
+345	o32	process_vm_readv		sys_process_vm_readv
+346	o32	process_vm_writev		sys_process_vm_writev
 347	o32	kcmp				sys_kcmp
 348	o32	finit_module			sys_finit_module
 349	o32	sched_setattr			sys_sched_setattr
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index a9e184192cae..2015a5124b78 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -372,8 +372,8 @@
 327	common	syncfs			sys_syncfs
 328	common	setns			sys_setns
 329	common	sendmmsg		sys_sendmmsg			compat_sys_sendmmsg
-330	common	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
-331	common	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
+330	common	process_vm_readv	sys_process_vm_readv
+331	common	process_vm_writev	sys_process_vm_writev
 332	common	kcmp			sys_kcmp
 333	common	finit_module		sys_finit_module
 334	common	sched_setattr		sys_sched_setattr
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 0d4985919ca3..66a472aa635d 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -449,8 +449,8 @@
 348	common	syncfs				sys_syncfs
 349	common	sendmmsg			sys_sendmmsg			compat_sys_sendmmsg
 350	common	setns				sys_setns
-351	nospu	process_vm_readv		sys_process_vm_readv		compat_sys_process_vm_readv
-352	nospu	process_vm_writev		sys_process_vm_writev		compat_sys_process_vm_writev
+351	nospu	process_vm_readv		sys_process_vm_readv
+352	nospu	process_vm_writev		sys_process_vm_writev
 353	nospu	finit_module			sys_finit_module
 354	nospu	kcmp				sys_kcmp
 355	common	sched_setattr			sys_sched_setattr
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index b5495a42814b..7485867a490b 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -347,8 +347,8 @@
 337  common	clock_adjtime		sys_clock_adjtime		sys_clock_adjtime32
 338  common	syncfs			sys_syncfs			sys_syncfs
 339  common	setns			sys_setns			sys_setns
-340  common	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
-341  common	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
+340  common	process_vm_readv	sys_process_vm_readv		sys_process_vm_readv
+341  common	process_vm_writev	sys_process_vm_writev		sys_process_vm_writev
 342  common	s390_runtime_instr	sys_s390_runtime_instr		sys_s390_runtime_instr
 343  common	kcmp			sys_kcmp			sys_kcmp
 344  common	finit_module		sys_finit_module		sys_finit_module
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index f1810c1a35ca..4a9365b2e340 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -406,8 +406,8 @@
 335	common	syncfs			sys_syncfs
 336	common	sendmmsg		sys_sendmmsg			compat_sys_sendmmsg
 337	common	setns			sys_setns
-338	common	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
-339	common	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
+338	common	process_vm_readv	sys_process_vm_readv
+339	common	process_vm_writev	sys_process_vm_writev
 340	32	kern_features		sys_ni_syscall			sys_kern_features
 340	64	kern_features		sys_kern_features
 341	common	kcmp			sys_kcmp
diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index a4840b9d50ad..f2fe0a33bcfd 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -17,6 +17,8 @@
 #define __x32_sys_getsockopt	__x64_sys_getsockopt
 #define __x32_sys_setsockopt	__x64_sys_setsockopt
 #define __x32_sys_vmsplice	__x64_sys_vmsplice
+#define __x32_sys_process_vm_readv	__x64_sys_process_vm_readv
+#define __x32_sys_process_vm_writev	__x64_sys_process_vm_writev
 
 #define __SYSCALL_64(nr, sym)
 
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 0fb2f172581e..5fbe10ad8a23 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -358,8 +358,8 @@
 344	i386	syncfs			sys_syncfs
 345	i386	sendmmsg		sys_sendmmsg			compat_sys_sendmmsg
 346	i386	setns			sys_setns
-347	i386	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
-348	i386	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
+347	i386	process_vm_readv	sys_process_vm_readv
+348	i386	process_vm_writev	sys_process_vm_writev
 349	i386	kcmp			sys_kcmp
 350	i386	finit_module		sys_finit_module
 351	i386	sched_setattr		sys_sched_setattr
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 642af919183d..347809649ba2 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -395,8 +395,8 @@
 536	x32	rt_tgsigqueueinfo	compat_sys_rt_tgsigqueueinfo
 537	x32	recvmmsg		compat_sys_recvmmsg_time64
 538	x32	sendmmsg		compat_sys_sendmmsg
-539	x32	process_vm_readv	compat_sys_process_vm_readv
-540	x32	process_vm_writev	compat_sys_process_vm_writev
+539	x32	process_vm_readv	sys_process_vm_readv
+540	x32	process_vm_writev	sys_process_vm_writev
 541	x32	setsockopt		sys_setsockopt
 542	x32	getsockopt		sys_getsockopt
 543	x32	io_setup		compat_sys_io_setup
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 7c3e876703cf..3e3d2beafed3 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -780,14 +780,6 @@ asmlinkage long compat_sys_open_by_handle_at(int mountdirfd,
 					     int flags);
 asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg,
 				    unsigned vlen, unsigned int flags);
-asmlinkage ssize_t compat_sys_process_vm_readv(compat_pid_t pid,
-		const struct compat_iovec __user *lvec,
-		compat_ulong_t liovcnt, const struct compat_iovec __user *rvec,
-		compat_ulong_t riovcnt, compat_ulong_t flags);
-asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid,
-		const struct compat_iovec __user *lvec,
-		compat_ulong_t liovcnt, const struct compat_iovec __user *rvec,
-		compat_ulong_t riovcnt, compat_ulong_t flags);
 asmlinkage long compat_sys_execveat(int dfd, const char __user *filename,
 		     const compat_uptr_t __user *argv,
 		     const compat_uptr_t __user *envp, int flags);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index f2dcb0d57030..c1dfe99c9c3f 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -727,11 +727,9 @@ __SYSCALL(__NR_setns, sys_setns)
 #define __NR_sendmmsg 269
 __SC_COMP(__NR_sendmmsg, sys_sendmmsg, compat_sys_sendmmsg)
 #define __NR_process_vm_readv 270
-__SC_COMP(__NR_process_vm_readv, sys_process_vm_readv, \
-          compat_sys_process_vm_readv)
+__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv)
 #define __NR_process_vm_writev 271
-__SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \
-          compat_sys_process_vm_writev)
+__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev)
 #define __NR_kcmp 272
 __SYSCALL(__NR_kcmp, sys_kcmp)
 #define __NR_finit_module 273
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index 3f2156aab442..fd12da80b6f2 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -14,10 +14,6 @@
 #include <linux/slab.h>
 #include <linux/syscalls.h>
 
-#ifdef CONFIG_COMPAT
-#include <linux/compat.h>
-#endif
-
 /**
  * process_vm_rw_pages - read/write pages from task specified
  * @pages: array of pointers to pages we want to copy
@@ -304,68 +300,3 @@ SYSCALL_DEFINE6(process_vm_writev, pid_t, pid,
 {
 	return process_vm_rw(pid, lvec, liovcnt, rvec, riovcnt, flags, 1);
 }
-
-#ifdef CONFIG_COMPAT
-
-static ssize_t
-compat_process_vm_rw(compat_pid_t pid,
-		     const struct compat_iovec __user *lvec,
-		     unsigned long liovcnt,
-		     const struct compat_iovec __user *rvec,
-		     unsigned long riovcnt,
-		     unsigned long flags, int vm_write)
-{
-	struct iovec iovstack_l[UIO_FASTIOV];
-	struct iovec iovstack_r[UIO_FASTIOV];
-	struct iovec *iov_l = iovstack_l;
-	struct iovec *iov_r = iovstack_r;
-	struct iov_iter iter;
-	ssize_t rc = -EFAULT;
-	int dir = vm_write ? WRITE : READ;
-
-	if (flags != 0)
-		return -EINVAL;
-
-	rc = import_iovec(dir, (const struct iovec __user *)lvec, liovcnt,
-			  UIO_FASTIOV, &iov_l, &iter);
-	if (rc < 0)
-		return rc;
-	if (!iov_iter_count(&iter))
-		goto free_iov_l;
-	iov_r = iovec_from_user((const struct iovec __user *)rvec, riovcnt,
-				UIO_FASTIOV, iovstack_r, true);
-	if (IS_ERR(iov_r)) {
-		rc = PTR_ERR(iov_r);
-		goto free_iov_l;
-	}
-	rc = process_vm_rw_core(pid, &iter, iov_r, riovcnt, flags, vm_write);
-	if (iov_r != iovstack_r)
-		kfree(iov_r);
-free_iov_l:
-	kfree(iov_l);
-	return rc;
-}
-
-COMPAT_SYSCALL_DEFINE6(process_vm_readv, compat_pid_t, pid,
-		       const struct compat_iovec __user *, lvec,
-		       compat_ulong_t, liovcnt,
-		       const struct compat_iovec __user *, rvec,
-		       compat_ulong_t, riovcnt,
-		       compat_ulong_t, flags)
-{
-	return compat_process_vm_rw(pid, lvec, liovcnt, rvec,
-				    riovcnt, flags, 0);
-}
-
-COMPAT_SYSCALL_DEFINE6(process_vm_writev, compat_pid_t, pid,
-		       const struct compat_iovec __user *, lvec,
-		       compat_ulong_t, liovcnt,
-		       const struct compat_iovec __user *, rvec,
-		       compat_ulong_t, riovcnt,
-		       compat_ulong_t, flags)
-{
-	return compat_process_vm_rw(pid, lvec, liovcnt, rvec,
-				    riovcnt, flags, 1);
-}
-
-#endif
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index f2dcb0d57030..c1dfe99c9c3f 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -727,11 +727,9 @@ __SYSCALL(__NR_setns, sys_setns)
 #define __NR_sendmmsg 269
 __SC_COMP(__NR_sendmmsg, sys_sendmmsg, compat_sys_sendmmsg)
 #define __NR_process_vm_readv 270
-__SC_COMP(__NR_process_vm_readv, sys_process_vm_readv, \
-          compat_sys_process_vm_readv)
+__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv)
 #define __NR_process_vm_writev 271
-__SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \
-          compat_sys_process_vm_writev)
+__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev)
 #define __NR_kcmp 272
 __SYSCALL(__NR_kcmp, sys_kcmp)
 #define __NR_finit_module 273
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 26f0347c1511..a188f053cbf9 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -443,8 +443,8 @@
 348	common	syncfs				sys_syncfs
 349	common	sendmmsg			sys_sendmmsg			compat_sys_sendmmsg
 350	common	setns				sys_setns
-351	nospu	process_vm_readv		sys_process_vm_readv		compat_sys_process_vm_readv
-352	nospu	process_vm_writev		sys_process_vm_writev		compat_sys_process_vm_writev
+351	nospu	process_vm_readv		sys_process_vm_readv
+352	nospu	process_vm_writev		sys_process_vm_writev
 353	nospu	finit_module			sys_finit_module
 354	nospu	kcmp				sys_kcmp
 355	common	sched_setattr			sys_sched_setattr
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index 02ad81f69bb7..c44c83032c3a 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -347,8 +347,8 @@
 337  common	clock_adjtime		sys_clock_adjtime		compat_sys_clock_adjtime
 338  common	syncfs			sys_syncfs			sys_syncfs
 339  common	setns			sys_setns			sys_setns
-340  common	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
-341  common	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
+340  common	process_vm_readv	sys_process_vm_readv		sys_process_vm_readv
+341  common	process_vm_writev	sys_process_vm_writev		sys_process_vm_writev
 342  common	s390_runtime_instr	sys_s390_runtime_instr		sys_s390_runtime_instr
 343  common	kcmp			sys_kcmp			compat_sys_kcmp
 344  common	finit_module		sys_finit_module		compat_sys_finit_module
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 642af919183d..347809649ba2 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -395,8 +395,8 @@
 536	x32	rt_tgsigqueueinfo	compat_sys_rt_tgsigqueueinfo
 537	x32	recvmmsg		compat_sys_recvmmsg_time64
 538	x32	sendmmsg		compat_sys_sendmmsg
-539	x32	process_vm_readv	compat_sys_process_vm_readv
-540	x32	process_vm_writev	compat_sys_process_vm_writev
+539	x32	process_vm_readv	sys_process_vm_readv
+540	x32	process_vm_writev	sys_process_vm_writev
 541	x32	setsockopt		sys_setsockopt
 542	x32	getsockopt		sys_getsockopt
 543	x32	io_setup		compat_sys_io_setup
-- 
cgit v1.2.3


From 50a896cf2d6f34e884a00139d6e6012c9833ace3 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 3 Oct 2020 10:44:45 +0200
Subject: genetlink: properly support per-op policy dumping

Add support for per-op policy dumping. The data is pretty much
as before, except that now the assumption that the policy with
index 0 is "the" policy no longer holds - you now need to look
at the new CTRL_ATTR_OP_POLICY attribute which is a nested attr
(indexed by op) containing attributes for do and dump policies.

When a single op is requested, the CTRL_ATTR_OP_POLICY will be
added in the same way, since do and dump policies may differ.

v2:
 - conditionally advertise per-command policies only if there
   actually is a policy being used for the do/dump and it's
   present at all

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/genetlink.h |  10 ++++
 net/netlink/genetlink.c        | 102 +++++++++++++++++++++++++++++++++++++----
 2 files changed, 102 insertions(+), 10 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h
index 9c0636ec2286..bc9c98e84828 100644
--- a/include/uapi/linux/genetlink.h
+++ b/include/uapi/linux/genetlink.h
@@ -64,6 +64,7 @@ enum {
 	CTRL_ATTR_OPS,
 	CTRL_ATTR_MCAST_GROUPS,
 	CTRL_ATTR_POLICY,
+	CTRL_ATTR_OP_POLICY,
 	__CTRL_ATTR_MAX,
 };
 
@@ -85,6 +86,15 @@ enum {
 	__CTRL_ATTR_MCAST_GRP_MAX,
 };
 
+enum {
+	CTRL_ATTR_POLICY_UNSPEC,
+	CTRL_ATTR_POLICY_DO,
+	CTRL_ATTR_POLICY_DUMP,
+
+	__CTRL_ATTR_POLICY_DUMP_MAX,
+	CTRL_ATTR_POLICY_DUMP_MAX = __CTRL_ATTR_POLICY_DUMP_MAX - 1
+};
+
 #define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1)
 
 
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 5e33c7938470..eb916c44884f 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1112,7 +1112,10 @@ static int genl_ctrl_event(int event, const struct genl_family *family,
 
 struct ctrl_dump_policy_ctx {
 	struct netlink_policy_dump_state *state;
+	const struct genl_family *rt;
+	unsigned int opidx;
 	u16 fam_id;
+	u8 policies:1;
 };
 
 static const struct nla_policy ctrl_policy_policy[] = {
@@ -1127,6 +1130,8 @@ static int ctrl_dumppolicy_start(struct netlink_callback *cb)
 	struct ctrl_dump_policy_ctx *ctx = (void *)cb->ctx;
 	struct nlattr **tb = info->attrs;
 	const struct genl_family *rt;
+	struct genl_ops op;
+	int err, i;
 
 	BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
 
@@ -1147,11 +1152,23 @@ static int ctrl_dumppolicy_start(struct netlink_callback *cb)
 	if (!rt)
 		return -ENOENT;
 
-	if (!rt->policy)
-		return -ENODATA;
+	ctx->rt = rt;
+
+	for (i = 0; i < genl_get_cmd_cnt(rt); i++) {
+		genl_get_cmd_by_index(i, rt, &op);
+
+		if (op.policy) {
+			err = netlink_policy_dump_add_policy(&ctx->state,
+							     op.policy,
+							     op.maxattr);
+			if (err)
+				return err;
+		}
+	}
 
-	return netlink_policy_dump_add_policy(&ctx->state, rt->policy,
-					      rt->maxattr);
+	if (!ctx->state)
+		return -ENODATA;
+	return 0;
 }
 
 static void *ctrl_dumppolicy_prep(struct sk_buff *skb,
@@ -1172,12 +1189,78 @@ static void *ctrl_dumppolicy_prep(struct sk_buff *skb,
 	return hdr;
 }
 
+static int ctrl_dumppolicy_put_op(struct sk_buff *skb,
+				  struct netlink_callback *cb,
+			          struct genl_ops *op)
+{
+	struct ctrl_dump_policy_ctx *ctx = (void *)cb->ctx;
+	struct nlattr *nest_pol, *nest_op;
+	void *hdr;
+	int idx;
+
+	/* skip if we have nothing to show */
+	if (!op->policy)
+		return 0;
+	if (!op->doit &&
+	    (!op->dumpit || op->validate & GENL_DONT_VALIDATE_DUMP))
+		return 0;
+
+	hdr = ctrl_dumppolicy_prep(skb, cb);
+	if (!hdr)
+		return -ENOBUFS;
+
+	nest_pol = nla_nest_start(skb, CTRL_ATTR_OP_POLICY);
+	if (!nest_pol)
+		goto err;
+
+	nest_op = nla_nest_start(skb, op->cmd);
+	if (!nest_op)
+		goto err;
+
+	/* for now both do/dump are always the same */
+	idx = netlink_policy_dump_get_policy_idx(ctx->state,
+						 op->policy,
+						 op->maxattr);
+
+	if (op->doit && nla_put_u32(skb, CTRL_ATTR_POLICY_DO, idx))
+		goto err;
+
+	if (op->dumpit && !(op->validate & GENL_DONT_VALIDATE_DUMP) &&
+	    nla_put_u32(skb, CTRL_ATTR_POLICY_DUMP, idx))
+		goto err;
+
+	nla_nest_end(skb, nest_op);
+	nla_nest_end(skb, nest_pol);
+	genlmsg_end(skb, hdr);
+
+	return 0;
+err:
+	genlmsg_cancel(skb, hdr);
+	return -ENOBUFS;
+}
+
 static int ctrl_dumppolicy(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct ctrl_dump_policy_ctx *ctx = (void *)cb->ctx;
+	void *hdr;
+
+	if (!ctx->policies) {
+		while (ctx->opidx < genl_get_cmd_cnt(ctx->rt)) {
+			struct genl_ops op;
+
+			genl_get_cmd_by_index(ctx->opidx, ctx->rt, &op);
+
+			if (ctrl_dumppolicy_put_op(skb, cb, &op))
+				return skb->len;
+
+			ctx->opidx++;
+		}
+
+		/* completed with the per-op policy index list */
+		ctx->policies = true;
+	}
 
 	while (netlink_policy_dump_loop(ctx->state)) {
-		void *hdr;
 		struct nlattr *nest;
 
 		hdr = ctrl_dumppolicy_prep(skb, cb);
@@ -1194,14 +1277,13 @@ static int ctrl_dumppolicy(struct sk_buff *skb, struct netlink_callback *cb)
 		nla_nest_end(skb, nest);
 
 		genlmsg_end(skb, hdr);
-		continue;
-
-nla_put_failure:
-		genlmsg_cancel(skb, hdr);
-		break;
 	}
 
 	return skb->len;
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return skb->len;
 }
 
 static int ctrl_dumppolicy_done(struct netlink_callback *cb)
-- 
cgit v1.2.3


From e992a6eda9a1eeeab73a8d2792464e4a2b1ebc3b Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Sat, 3 Oct 2020 10:44:46 +0200
Subject: genetlink: allow dumping command-specific policy

Right now CTRL_CMD_GETPOLICY can only dump the family-wide
policy. Support dumping policy of a specific op.

v3:
 - rebase after per-op policy export and handle that
v2:
 - make cmd U32, just in case.
v1:
 - don't echo op in the output in a naive way, this should
   make it cleaner to extend the output format for dumping
   policies for all the commands at once in the future.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/r/20201001225933.1373426-11-kuba@kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/genetlink.h |  1 +
 net/netlink/genetlink.c        | 41 ++++++++++++++++++++++++++++++++++++-----
 2 files changed, 37 insertions(+), 5 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h
index bc9c98e84828..d83f214b4134 100644
--- a/include/uapi/linux/genetlink.h
+++ b/include/uapi/linux/genetlink.h
@@ -65,6 +65,7 @@ enum {
 	CTRL_ATTR_MCAST_GROUPS,
 	CTRL_ATTR_POLICY,
 	CTRL_ATTR_OP_POLICY,
+	CTRL_ATTR_OP,
 	__CTRL_ATTR_MAX,
 };
 
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index eb916c44884f..c992424e4d63 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -123,7 +123,7 @@ static void genl_op_from_full(const struct genl_family *family,
 		op->policy = family->policy;
 }
 
-static int genl_get_cmd_full(u8 cmd, const struct genl_family *family,
+static int genl_get_cmd_full(u32 cmd, const struct genl_family *family,
 			     struct genl_ops *op)
 {
 	int i;
@@ -152,7 +152,7 @@ static void genl_op_from_small(const struct genl_family *family,
 	op->policy = family->policy;
 }
 
-static int genl_get_cmd_small(u8 cmd, const struct genl_family *family,
+static int genl_get_cmd_small(u32 cmd, const struct genl_family *family,
 			      struct genl_ops *op)
 {
 	int i;
@@ -166,7 +166,7 @@ static int genl_get_cmd_small(u8 cmd, const struct genl_family *family,
 	return -ENOENT;
 }
 
-static int genl_get_cmd(u8 cmd, const struct genl_family *family,
+static int genl_get_cmd(u32 cmd, const struct genl_family *family,
 			struct genl_ops *op)
 {
 	if (!genl_get_cmd_full(cmd, family, op))
@@ -1114,14 +1114,17 @@ struct ctrl_dump_policy_ctx {
 	struct netlink_policy_dump_state *state;
 	const struct genl_family *rt;
 	unsigned int opidx;
+	u32 op;
 	u16 fam_id;
-	u8 policies:1;
+	u8 policies:1,
+	   single_op:1;
 };
 
 static const struct nla_policy ctrl_policy_policy[] = {
 	[CTRL_ATTR_FAMILY_ID]	= { .type = NLA_U16 },
 	[CTRL_ATTR_FAMILY_NAME]	= { .type = NLA_NUL_STRING,
 				    .len = GENL_NAMSIZ - 1 },
+	[CTRL_ATTR_OP]		= { .type = NLA_U32 },
 };
 
 static int ctrl_dumppolicy_start(struct netlink_callback *cb)
@@ -1154,6 +1157,23 @@ static int ctrl_dumppolicy_start(struct netlink_callback *cb)
 
 	ctx->rt = rt;
 
+	if (tb[CTRL_ATTR_OP]) {
+		ctx->single_op = true;
+		ctx->op = nla_get_u32(tb[CTRL_ATTR_OP]);
+
+		err = genl_get_cmd(ctx->op, rt, &op);
+		if (err) {
+			NL_SET_BAD_ATTR(cb->extack, tb[CTRL_ATTR_OP]);
+			return err;
+		}
+
+		if (!op.policy)
+			return -ENODATA;
+
+		return netlink_policy_dump_add_policy(&ctx->state, op.policy,
+						      op.maxattr);
+	}
+
 	for (i = 0; i < genl_get_cmd_cnt(rt); i++) {
 		genl_get_cmd_by_index(i, rt, &op);
 
@@ -1248,7 +1268,18 @@ static int ctrl_dumppolicy(struct sk_buff *skb, struct netlink_callback *cb)
 		while (ctx->opidx < genl_get_cmd_cnt(ctx->rt)) {
 			struct genl_ops op;
 
-			genl_get_cmd_by_index(ctx->opidx, ctx->rt, &op);
+			if (ctx->single_op) {
+				int err;
+
+				err = genl_get_cmd(ctx->op, ctx->rt, &op);
+				if (WARN_ON(err))
+					return skb->len;
+
+				/* break out of the loop after this one */
+				ctx->opidx = genl_get_cmd_cnt(ctx->rt);
+			} else {
+				genl_get_cmd_by_index(ctx->opidx, ctx->rt, &op);
+			}
 
 			if (ctrl_dumppolicy_put_op(skb, cb, &op))
 				return skb->len;
-- 
cgit v1.2.3


From 19fbcb36a39eefbe8912a13ccc02e937b1c418d6 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Sat, 3 Oct 2020 00:44:28 +0200
Subject: net/sched: act_vlan: Add {POP,PUSH}_ETH actions

Implement TCA_VLAN_ACT_POP_ETH and TCA_VLAN_ACT_PUSH_ETH, to
respectively pop and push a base Ethernet header at the beginning of a
frame.

POP_ETH is just a matter of pulling ETH_HLEN bytes. VLAN tags, if any,
must be stripped before calling POP_ETH.

PUSH_ETH is restricted to skbs with no mac_header, and only the MAC
addresses can be configured. The Ethertype is automatically set from
skb->protocol. These restrictions ensure that all skb's fields remain
consistent, so that this action can't confuse other part of the
networking stack (like GSO).

Since openvswitch already had these actions, consolidate the code in
skbuff.c (like for vlan and mpls push/pop).

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h              |  3 ++
 include/net/tc_act/tc_vlan.h        |  2 ++
 include/uapi/linux/tc_act/tc_vlan.h |  4 +++
 net/core/skbuff.c                   | 67 +++++++++++++++++++++++++++++++++++++
 net/openvswitch/actions.c           | 28 ++++++----------
 net/sched/act_vlan.c                | 40 ++++++++++++++++++++++
 6 files changed, 126 insertions(+), 18 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3d0cf3722bb4..42131e325e27 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3573,6 +3573,9 @@ int skb_ensure_writable(struct sk_buff *skb, int write_len);
 int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci);
 int skb_vlan_pop(struct sk_buff *skb);
 int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
+int skb_eth_pop(struct sk_buff *skb);
+int skb_eth_push(struct sk_buff *skb, const unsigned char *dst,
+		 const unsigned char *src);
 int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
 		  int mac_len, bool ethernet);
 int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len,
diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h
index 4e2502408c31..f051046ba034 100644
--- a/include/net/tc_act/tc_vlan.h
+++ b/include/net/tc_act/tc_vlan.h
@@ -11,6 +11,8 @@
 
 struct tcf_vlan_params {
 	int               tcfv_action;
+	unsigned char     tcfv_push_dst[ETH_ALEN];
+	unsigned char     tcfv_push_src[ETH_ALEN];
 	u16               tcfv_push_vid;
 	__be16            tcfv_push_proto;
 	u8                tcfv_push_prio;
diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h
index 168995b54a70..5b306fe815cc 100644
--- a/include/uapi/linux/tc_act/tc_vlan.h
+++ b/include/uapi/linux/tc_act/tc_vlan.h
@@ -16,6 +16,8 @@
 #define TCA_VLAN_ACT_POP	1
 #define TCA_VLAN_ACT_PUSH	2
 #define TCA_VLAN_ACT_MODIFY	3
+#define TCA_VLAN_ACT_POP_ETH	4
+#define TCA_VLAN_ACT_PUSH_ETH	5
 
 struct tc_vlan {
 	tc_gen;
@@ -30,6 +32,8 @@ enum {
 	TCA_VLAN_PUSH_VLAN_PROTOCOL,
 	TCA_VLAN_PAD,
 	TCA_VLAN_PUSH_VLAN_PRIORITY,
+	TCA_VLAN_PUSH_ETH_DST,
+	TCA_VLAN_PUSH_ETH_SRC,
 	__TCA_VLAN_MAX,
 };
 #define TCA_VLAN_MAX (__TCA_VLAN_MAX - 1)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e0774471f56d..75b043accddb 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5558,6 +5558,73 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
 }
 EXPORT_SYMBOL(skb_vlan_push);
 
+/**
+ * skb_eth_pop() - Drop the Ethernet header at the head of a packet
+ *
+ * @skb: Socket buffer to modify
+ *
+ * Drop the Ethernet header of @skb.
+ *
+ * Expects that skb->data points to the mac header and that no VLAN tags are
+ * present.
+ *
+ * Returns 0 on success, -errno otherwise.
+ */
+int skb_eth_pop(struct sk_buff *skb)
+{
+	if (!pskb_may_pull(skb, ETH_HLEN) || skb_vlan_tagged(skb) ||
+	    skb_network_offset(skb) < ETH_HLEN)
+		return -EPROTO;
+
+	skb_pull_rcsum(skb, ETH_HLEN);
+	skb_reset_mac_header(skb);
+	skb_reset_mac_len(skb);
+
+	return 0;
+}
+EXPORT_SYMBOL(skb_eth_pop);
+
+/**
+ * skb_eth_push() - Add a new Ethernet header at the head of a packet
+ *
+ * @skb: Socket buffer to modify
+ * @dst: Destination MAC address of the new header
+ * @src: Source MAC address of the new header
+ *
+ * Prepend @skb with a new Ethernet header.
+ *
+ * Expects that skb->data points to the mac header, which must be empty.
+ *
+ * Returns 0 on success, -errno otherwise.
+ */
+int skb_eth_push(struct sk_buff *skb, const unsigned char *dst,
+		 const unsigned char *src)
+{
+	struct ethhdr *eth;
+	int err;
+
+	if (skb_network_offset(skb) || skb_vlan_tag_present(skb))
+		return -EPROTO;
+
+	err = skb_cow_head(skb, sizeof(*eth));
+	if (err < 0)
+		return err;
+
+	skb_push(skb, sizeof(*eth));
+	skb_reset_mac_header(skb);
+	skb_reset_mac_len(skb);
+
+	eth = eth_hdr(skb);
+	ether_addr_copy(eth->h_dest, dst);
+	ether_addr_copy(eth->h_source, src);
+	eth->h_proto = skb->protocol;
+
+	skb_postpush_rcsum(skb, eth, sizeof(*eth));
+
+	return 0;
+}
+EXPORT_SYMBOL(skb_eth_push);
+
 /* Update the ethertype of hdr and the skb csum value if required. */
 static void skb_mod_eth_type(struct sk_buff *skb, struct ethhdr *hdr,
 			     __be16 ethertype)
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 855f2c155956..b87bfc82f44f 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -277,9 +277,11 @@ static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
  */
 static int pop_eth(struct sk_buff *skb, struct sw_flow_key *key)
 {
-	skb_pull_rcsum(skb, ETH_HLEN);
-	skb_reset_mac_header(skb);
-	skb_reset_mac_len(skb);
+	int err;
+
+	err = skb_eth_pop(skb);
+	if (err)
+		return err;
 
 	/* safe right before invalidate_flow_key */
 	key->mac_proto = MAC_PROTO_NONE;
@@ -290,22 +292,12 @@ static int pop_eth(struct sk_buff *skb, struct sw_flow_key *key)
 static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
 		    const struct ovs_action_push_eth *ethh)
 {
-	struct ethhdr *hdr;
-
-	/* Add the new Ethernet header */
-	if (skb_cow_head(skb, ETH_HLEN) < 0)
-		return -ENOMEM;
-
-	skb_push(skb, ETH_HLEN);
-	skb_reset_mac_header(skb);
-	skb_reset_mac_len(skb);
-
-	hdr = eth_hdr(skb);
-	ether_addr_copy(hdr->h_source, ethh->addresses.eth_src);
-	ether_addr_copy(hdr->h_dest, ethh->addresses.eth_dst);
-	hdr->h_proto = skb->protocol;
+	int err;
 
-	skb_postpush_rcsum(skb, hdr, ETH_HLEN);
+	err = skb_eth_push(skb, ethh->addresses.eth_dst,
+			   ethh->addresses.eth_src);
+	if (err)
+		return err;
 
 	/* safe right before invalidate_flow_key */
 	key->mac_proto = MAC_PROTO_ETHERNET;
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index a5ff9f68ab02..8758bd2a78fa 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -77,6 +77,16 @@ static int tcf_vlan_act(struct sk_buff *skb, const struct tc_action *a,
 		/* put updated tci as hwaccel tag */
 		__vlan_hwaccel_put_tag(skb, p->tcfv_push_proto, tci);
 		break;
+	case TCA_VLAN_ACT_POP_ETH:
+		err = skb_eth_pop(skb);
+		if (err)
+			goto drop;
+		break;
+	case TCA_VLAN_ACT_PUSH_ETH:
+		err = skb_eth_push(skb, p->tcfv_push_dst, p->tcfv_push_src);
+		if (err)
+			goto drop;
+		break;
 	default:
 		BUG();
 	}
@@ -93,10 +103,13 @@ drop:
 }
 
 static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
+	[TCA_VLAN_UNSPEC]		= { .strict_start_type = TCA_VLAN_PUSH_ETH_DST },
 	[TCA_VLAN_PARMS]		= { .len = sizeof(struct tc_vlan) },
 	[TCA_VLAN_PUSH_VLAN_ID]		= { .type = NLA_U16 },
 	[TCA_VLAN_PUSH_VLAN_PROTOCOL]	= { .type = NLA_U16 },
 	[TCA_VLAN_PUSH_VLAN_PRIORITY]	= { .type = NLA_U8 },
+	[TCA_VLAN_PUSH_ETH_DST]		= NLA_POLICY_ETH_ADDR,
+	[TCA_VLAN_PUSH_ETH_SRC]		= NLA_POLICY_ETH_ADDR,
 };
 
 static int tcf_vlan_init(struct net *net, struct nlattr *nla,
@@ -179,6 +192,17 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 		if (tb[TCA_VLAN_PUSH_VLAN_PRIORITY])
 			push_prio = nla_get_u8(tb[TCA_VLAN_PUSH_VLAN_PRIORITY]);
 		break;
+	case TCA_VLAN_ACT_POP_ETH:
+		break;
+	case TCA_VLAN_ACT_PUSH_ETH:
+		if (!tb[TCA_VLAN_PUSH_ETH_DST] || !tb[TCA_VLAN_PUSH_ETH_SRC]) {
+			if (exists)
+				tcf_idr_release(*a, bind);
+			else
+				tcf_idr_cleanup(tn, index);
+			return -EINVAL;
+		}
+		break;
 	default:
 		if (exists)
 			tcf_idr_release(*a, bind);
@@ -219,6 +243,13 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	p->tcfv_push_prio = push_prio;
 	p->tcfv_push_proto = push_proto;
 
+	if (action == TCA_VLAN_ACT_PUSH_ETH) {
+		nla_memcpy(&p->tcfv_push_dst, tb[TCA_VLAN_PUSH_ETH_DST],
+			   ETH_ALEN);
+		nla_memcpy(&p->tcfv_push_src, tb[TCA_VLAN_PUSH_ETH_SRC],
+			   ETH_ALEN);
+	}
+
 	spin_lock_bh(&v->tcf_lock);
 	goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
 	p = rcu_replace_pointer(v->vlan_p, p, lockdep_is_held(&v->tcf_lock));
@@ -279,6 +310,15 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
 					      p->tcfv_push_prio))))
 		goto nla_put_failure;
 
+	if (p->tcfv_action == TCA_VLAN_ACT_PUSH_ETH) {
+		if (nla_put(skb, TCA_VLAN_PUSH_ETH_DST, ETH_ALEN,
+			    p->tcfv_push_dst))
+			goto nla_put_failure;
+		if (nla_put(skb, TCA_VLAN_PUSH_ETH_SRC, ETH_ALEN,
+			    p->tcfv_push_src))
+			goto nla_put_failure;
+	}
+
 	tcf_tm_dump(&t, &v->tcf_tm);
 	if (nla_put_64bit(skb, TCA_VLAN_TM, sizeof(t), &t, TCA_VLAN_PAD))
 		goto nla_put_failure;
-- 
cgit v1.2.3


From a45294af9e96a3e060b6272fa7cd2c4b196de335 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Sat, 3 Oct 2020 00:44:31 +0200
Subject: net/sched: act_mpls: Add action to push MPLS LSE before Ethernet
 header

Define the MAC_PUSH action which pushes an MPLS LSE before the mac
header (instead of between the mac and the network headers as the
plain PUSH action does).

The only special case is when the skb has an offloaded VLAN. In that
case, it has to be inlined before pushing the MPLS header.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tc_act/tc_mpls.h |  1 +
 net/sched/act_mpls.c                | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/tc_act/tc_mpls.h b/include/uapi/linux/tc_act/tc_mpls.h
index 9360e95273c7..9e4e8f52a779 100644
--- a/include/uapi/linux/tc_act/tc_mpls.h
+++ b/include/uapi/linux/tc_act/tc_mpls.h
@@ -10,6 +10,7 @@
 #define TCA_MPLS_ACT_PUSH	2
 #define TCA_MPLS_ACT_MODIFY	3
 #define TCA_MPLS_ACT_DEC_TTL	4
+#define TCA_MPLS_ACT_MAC_PUSH	5
 
 struct tc_mpls {
 	tc_gen;		/* generic TC action fields. */
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index 8118e2640979..bb6b715636db 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -87,6 +87,23 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a,
 				  skb->dev && skb->dev->type == ARPHRD_ETHER))
 			goto drop;
 		break;
+	case TCA_MPLS_ACT_MAC_PUSH:
+		if (skb_vlan_tag_present(skb)) {
+			if (__vlan_insert_inner_tag(skb, skb->vlan_proto,
+						    skb_vlan_tag_get(skb),
+						    ETH_HLEN) < 0)
+				goto drop;
+
+			skb->protocol = skb->vlan_proto;
+			__vlan_hwaccel_clear_tag(skb);
+		}
+
+		new_lse = tcf_mpls_get_lse(NULL, p, mac_len ||
+					   !eth_p_mpls(skb->protocol));
+
+		if (skb_mpls_push(skb, new_lse, p->tcfm_proto, 0, false))
+			goto drop;
+		break;
 	case TCA_MPLS_ACT_MODIFY:
 		new_lse = tcf_mpls_get_lse(mpls_hdr(skb), p, false);
 		if (skb_mpls_update_lse(skb, new_lse))
@@ -188,6 +205,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla,
 		}
 		break;
 	case TCA_MPLS_ACT_PUSH:
+	case TCA_MPLS_ACT_MAC_PUSH:
 		if (!tb[TCA_MPLS_LABEL]) {
 			NL_SET_ERR_MSG_MOD(extack, "Label is required for MPLS push");
 			return -EINVAL;
-- 
cgit v1.2.3


From cf1166349c68816f4259d32559f54972b0d5c1a4 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sun, 4 Oct 2020 18:12:51 +0200
Subject: net: devlink: Add unused port flavour

Not all ports of a switch need to be used, particularly in embedded
systems. Add a port flavour for ports which physically exist in the
switch, but are not connected to the front panel etc, and so are
unused. By having unused ports present in devlink, it gives a more
accurate representation of the hardware. It also allows regions to be
associated to such ports, so allowing, for example, to determine
unused ports are correctly powered off, or to compare probable reset
defaults of unused ports to used ports experiences issues.

Actually registering unused ports and setting the flavour to unused is
optional. The DSA core will register all such switch ports, but such
ports are expected to be limited in number. Bigger ASICs may decide
not to list unused ports.

v2:
Expand the description about why it is useful

Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Tested-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h | 3 +++
 net/core/devlink.c           | 4 +++-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index ba467dc07852..5f1d6c327670 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -197,6 +197,9 @@ enum devlink_port_flavour {
 				      * port that faces the PCI VF.
 				      */
 	DEVLINK_PORT_FLAVOUR_VIRTUAL, /* Any virtual port facing the user. */
+	DEVLINK_PORT_FLAVOUR_UNUSED, /* Port which exists in the switch, but
+				      * is not used in any way.
+				      */
 };
 
 enum devlink_param_cmode {
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 0f3c8b2ec056..20224fd1ebaf 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -7612,7 +7612,8 @@ static bool devlink_port_type_should_warn(struct devlink_port *devlink_port)
 {
 	/* Ignore CPU and DSA flavours. */
 	return devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_CPU &&
-	       devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_DSA;
+	       devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_DSA &&
+	       devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_UNUSED;
 }
 
 #define DEVLINK_PORT_TYPE_WARN_TIMEOUT (HZ * 3600)
@@ -7897,6 +7898,7 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
 		break;
 	case DEVLINK_PORT_FLAVOUR_CPU:
 	case DEVLINK_PORT_FLAVOUR_DSA:
+	case DEVLINK_PORT_FLAVOUR_UNUSED:
 		/* As CPU and DSA ports do not have a netdevice associated
 		 * case should not ever happen.
 		 */
-- 
cgit v1.2.3


From f7b2cdb23abf62bc3d33c2e0b0009a09412ff475 Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Fri, 11 Oct 2019 15:52:30 +0800
Subject: drm/amdgpu: add uapi to define van gogh series

Add a flag to define van gogh series.

Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index c5ff2b275fcd..455badbb1290 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -1083,6 +1083,7 @@ struct drm_amdgpu_info_vce_clock_table {
 #define AMDGPU_FAMILY_AI			141 /* Vega10 */
 #define AMDGPU_FAMILY_RV			142 /* Raven */
 #define AMDGPU_FAMILY_NV			143 /* Navi10 */
+#define AMDGPU_FAMILY_VGH			144 /* Van Gogh */
 
 #if defined(__cplusplus)
 }
-- 
cgit v1.2.3


From 1e483203965bdab466af0739c1edf7da07da241d Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Fri, 28 Aug 2020 17:35:12 +0800
Subject: drm/amdgpu: add uapi to define van gogh memory type

This patch adds van gogh memory type as DDR5.

Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 455badbb1290..41edf27fc1ee 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -946,6 +946,7 @@ struct drm_amdgpu_info_firmware {
 #define AMDGPU_VRAM_TYPE_DDR3  7
 #define AMDGPU_VRAM_TYPE_DDR4  8
 #define AMDGPU_VRAM_TYPE_GDDR6 9
+#define AMDGPU_VRAM_TYPE_DDR5  10
 
 struct drm_amdgpu_info_device {
 	/** PCI Device ID */
-- 
cgit v1.2.3


From 6fbcb00c7984fa7d49af2c361453c0397cdea400 Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Mon, 20 Jul 2020 16:11:10 +0800
Subject: drm/amdgpu: add TOC firmware definition

This patch is to add TOC firmware definition on uapi.

Signed-off-by: Huang Rui <ray.huang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 41edf27fc1ee..f7d7bce7d3b0 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -724,6 +724,8 @@ struct drm_amdgpu_cs_chunk_data {
 	#define AMDGPU_INFO_FW_TA		0x13
 	/* Subquery id: Query DMCUB firmware version */
 	#define AMDGPU_INFO_FW_DMCUB		0x14
+	/* Subquery id: Query TOC firmware version */
+	#define AMDGPU_INFO_FW_TOC		0x15
 
 /* number of bytes moved for TTM migration */
 #define AMDGPU_INFO_NUM_BYTES_MOVED		0x0f
-- 
cgit v1.2.3


From bdbb4e29df8b790db50cb73ce25d23543329f05f Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 5 Oct 2020 15:07:38 -0700
Subject: netlink: add mask validation

We don't have good validation policy for existing unsigned int attrs
which serve as flags (for new ones we could use NLA_BITFIELD32).
With increased use of policy dumping having the validation be
expressed as part of the policy is important. Add validation
policy in form of a mask of supported/valid bits.

Support u64 in the uAPI to be future-proof, but really for now
the embedded mask member can only hold 32 bits, so anything with
bit 32+ set will always fail validation.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h        | 10 ++++++++++
 include/uapi/linux/netlink.h |  2 ++
 lib/nlattr.c                 | 36 ++++++++++++++++++++++++++++++++++++
 net/netlink/policy.c         |  8 ++++++++
 4 files changed, 56 insertions(+)

(limited to 'include/uapi')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index c5aa46f379bc..2b9e41075f19 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -200,6 +200,7 @@ enum nla_policy_validation {
 	NLA_VALIDATE_RANGE_WARN_TOO_LONG,
 	NLA_VALIDATE_MIN,
 	NLA_VALIDATE_MAX,
+	NLA_VALIDATE_MASK,
 	NLA_VALIDATE_RANGE_PTR,
 	NLA_VALIDATE_FUNCTION,
 };
@@ -317,6 +318,7 @@ struct nla_policy {
 	u16		len;
 	union {
 		const u32 bitfield32_valid;
+		const u32 mask;
 		const char *reject_message;
 		const struct nla_policy *nested_policy;
 		struct netlink_range_validation *range;
@@ -368,6 +370,8 @@ struct nla_policy {
 	(tp == NLA_S8 || tp == NLA_S16 || tp == NLA_S32 || tp == NLA_S64)
 
 #define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition))
+#define NLA_ENSURE_UINT_TYPE(tp)			\
+	(__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp)) + tp)
 #define NLA_ENSURE_UINT_OR_BINARY_TYPE(tp)		\
 	(__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp) ||	\
 		      tp == NLA_MSECS ||		\
@@ -416,6 +420,12 @@ struct nla_policy {
 	.max = _max,					\
 }
 
+#define NLA_POLICY_MASK(tp, _mask) {			\
+	.type = NLA_ENSURE_UINT_TYPE(tp),		\
+	.validation_type = NLA_VALIDATE_MASK,		\
+	.mask = _mask,					\
+}
+
 #define NLA_POLICY_VALIDATE_FN(tp, fn, ...) {		\
 	.type = NLA_ENSURE_NO_VALIDATION_PTR(tp),	\
 	.validation_type = NLA_VALIDATE_FUNCTION,	\
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index eac8a6a648ea..d02e472ba54c 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -331,6 +331,7 @@ enum netlink_attribute_type {
  *	the index, if limited inside the nesting (U32)
  * @NL_POLICY_TYPE_ATTR_BITFIELD32_MASK: valid mask for the
  *	bitfield32 type (U32)
+ * @NL_POLICY_TYPE_ATTR_MASK: mask of valid bits for unsigned integers (U64)
  * @NL_POLICY_TYPE_ATTR_PAD: pad attribute for 64-bit alignment
  */
 enum netlink_policy_type_attr {
@@ -346,6 +347,7 @@ enum netlink_policy_type_attr {
 	NL_POLICY_TYPE_ATTR_POLICY_MAXTYPE,
 	NL_POLICY_TYPE_ATTR_BITFIELD32_MASK,
 	NL_POLICY_TYPE_ATTR_PAD,
+	NL_POLICY_TYPE_ATTR_MASK,
 
 	/* keep last */
 	__NL_POLICY_TYPE_ATTR_MAX,
diff --git a/lib/nlattr.c b/lib/nlattr.c
index 80ff9fe83696..9c99f5daa4d2 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -323,6 +323,37 @@ static int nla_validate_int_range(const struct nla_policy *pt,
 	}
 }
 
+static int nla_validate_mask(const struct nla_policy *pt,
+			     const struct nlattr *nla,
+			     struct netlink_ext_ack *extack)
+{
+	u64 value;
+
+	switch (pt->type) {
+	case NLA_U8:
+		value = nla_get_u8(nla);
+		break;
+	case NLA_U16:
+		value = nla_get_u16(nla);
+		break;
+	case NLA_U32:
+		value = nla_get_u32(nla);
+		break;
+	case NLA_U64:
+		value = nla_get_u64(nla);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (value & ~(u64)pt->mask) {
+		NL_SET_ERR_MSG_ATTR(extack, nla, "reserved bit set");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int validate_nla(const struct nlattr *nla, int maxtype,
 			const struct nla_policy *policy, unsigned int validate,
 			struct netlink_ext_ack *extack, unsigned int depth)
@@ -503,6 +534,11 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 		if (err)
 			return err;
 		break;
+	case NLA_VALIDATE_MASK:
+		err = nla_validate_mask(pt, nla, extack);
+		if (err)
+			return err;
+		break;
 	case NLA_VALIDATE_FUNCTION:
 		if (pt->validate) {
 			err = pt->validate(nla, extack);
diff --git a/net/netlink/policy.c b/net/netlink/policy.c
index cf23c0151721..ee26d01328ee 100644
--- a/net/netlink/policy.c
+++ b/net/netlink/policy.c
@@ -263,6 +263,14 @@ send_attribute:
 		else
 			type = NL_ATTR_TYPE_U64;
 
+		if (pt->validation_type == NLA_VALIDATE_MASK) {
+			if (nla_put_u64_64bit(skb, NL_POLICY_TYPE_ATTR_MASK,
+					      pt->mask,
+					      NL_POLICY_TYPE_ATTR_PAD))
+				goto nla_put_failure;
+			break;
+		}
+
 		nla_get_range_unsigned(pt, &range);
 
 		if (nla_put_u64_64bit(skb, NL_POLICY_TYPE_ATTR_MIN_VALUE_U,
-- 
cgit v1.2.3


From eb88531bdbfaafb827192d1fc6c5a3fcc4fadd96 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Sun, 27 Sep 2020 01:24:31 +0900
Subject: can: raw: add missing error queue support

Error queue are not yet implemented in CAN-raw sockets.

The problem: a userland call to recvmsg(soc, msg, MSG_ERRQUEUE) on a
CAN-raw socket would unqueue messages from the normal queue without
any kind of error or warning. As such, it prevented CAN drivers from
using the functionalities that relies on the error queue such as
skb_tx_timestamp().

SCM_CAN_RAW_ERRQUEUE is defined as the type for the CAN raw error
queue. SCM stands for "Socket control messages". The name is inspired
from SCM_J1939_ERRQUEUE of include/uapi/linux/can/j1939.h.

Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Link: https://lore.kernel.org/r/20200926162527.270030-1-mailhol.vincent@wanadoo.fr
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can/raw.h | 3 +++
 net/can/raw.c                | 4 ++++
 2 files changed, 7 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/can/raw.h b/include/uapi/linux/can/raw.h
index 6a11d308eb5c..3386aa81fdf2 100644
--- a/include/uapi/linux/can/raw.h
+++ b/include/uapi/linux/can/raw.h
@@ -49,6 +49,9 @@
 #include <linux/can.h>
 
 #define SOL_CAN_RAW (SOL_CAN_BASE + CAN_RAW)
+enum {
+	SCM_CAN_RAW_ERRQUEUE = 1,
+};
 
 /* for socket options affecting the socket (not the global system) */
 
diff --git a/net/can/raw.c b/net/can/raw.c
index 24db4b4afdc7..ea70850f9152 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -804,6 +804,10 @@ static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 	noblock = flags & MSG_DONTWAIT;
 	flags &= ~MSG_DONTWAIT;
 
+	if (flags & MSG_ERRQUEUE)
+		return sock_recv_errqueue(sk, msg, size,
+					  SOL_CAN_RAW, SCM_CAN_RAW_ERRQUEUE);
+
 	skb = skb_recv_datagram(sk, flags, noblock, &err);
 	if (!skb)
 		return err;
-- 
cgit v1.2.3


From 1465af12e254a68706e110846f59cf0f09683184 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 22 Sep 2020 10:37:01 +0800
Subject: btrfs: tree-checker: fix false alert caused by legacy btrfs root item

Commit 259ee7754b67 ("btrfs: tree-checker: Add ROOT_ITEM check")
introduced btrfs root item size check, however btrfs root item has two
versions, the legacy one which just ends before generation_v2 member, is
smaller than current btrfs root item size.

This caused btrfs kernel to reject valid but old tree root leaves.

Fix this problem by also allowing legacy root item, since kernel can
already handle them pretty well and upgrade to newer root item format
when needed.

Reported-by: Martin Steigerwald <martin@lichtvoll.de>
Fixes: 259ee7754b67 ("btrfs: tree-checker: Add ROOT_ITEM check")
CC: stable@vger.kernel.org # 5.4+
Tested-By: Martin Steigerwald <martin@lichtvoll.de>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-checker.c         | 17 ++++++++++++-----
 include/uapi/linux/btrfs_tree.h | 14 ++++++++++++++
 2 files changed, 26 insertions(+), 5 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 7b1fee630f97..f0ffd5ee77bd 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -1035,7 +1035,7 @@ static int check_root_item(struct extent_buffer *leaf, struct btrfs_key *key,
 			   int slot)
 {
 	struct btrfs_fs_info *fs_info = leaf->fs_info;
-	struct btrfs_root_item ri;
+	struct btrfs_root_item ri = { 0 };
 	const u64 valid_root_flags = BTRFS_ROOT_SUBVOL_RDONLY |
 				     BTRFS_ROOT_SUBVOL_DEAD;
 	int ret;
@@ -1044,14 +1044,21 @@ static int check_root_item(struct extent_buffer *leaf, struct btrfs_key *key,
 	if (ret < 0)
 		return ret;
 
-	if (btrfs_item_size_nr(leaf, slot) != sizeof(ri)) {
+	if (btrfs_item_size_nr(leaf, slot) != sizeof(ri) &&
+	    btrfs_item_size_nr(leaf, slot) != btrfs_legacy_root_item_size()) {
 		generic_err(leaf, slot,
-			    "invalid root item size, have %u expect %zu",
-			    btrfs_item_size_nr(leaf, slot), sizeof(ri));
+			    "invalid root item size, have %u expect %zu or %u",
+			    btrfs_item_size_nr(leaf, slot), sizeof(ri),
+			    btrfs_legacy_root_item_size());
 	}
 
+	/*
+	 * For legacy root item, the members starting at generation_v2 will be
+	 * all filled with 0.
+	 * And since we allow geneartion_v2 as 0, it will still pass the check.
+	 */
 	read_extent_buffer(leaf, &ri, btrfs_item_ptr_offset(leaf, slot),
-			   sizeof(ri));
+			   btrfs_item_size_nr(leaf, slot));
 
 	/* Generation related */
 	if (btrfs_root_generation(&ri) >
diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index 9ba64ca6b4ac..6b885982ece6 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -4,6 +4,11 @@
 
 #include <linux/btrfs.h>
 #include <linux/types.h>
+#ifdef __KERNEL__
+#include <linux/stddef.h>
+#else
+#include <stddef.h>
+#endif
 
 /*
  * This header contains the structure definitions and constants used
@@ -644,6 +649,15 @@ struct btrfs_root_item {
 	__le64 reserved[8]; /* for future */
 } __attribute__ ((__packed__));
 
+/*
+ * Btrfs root item used to be smaller than current size.  The old format ends
+ * at where member generation_v2 is.
+ */
+static inline __u32 btrfs_legacy_root_item_size(void)
+{
+	return offsetof(struct btrfs_root_item, generation_v2);
+}
+
 /*
  * this is used for both forward and backward root refs
  */
-- 
cgit v1.2.3


From 49f3d12b0f70ea867b891ad2a97f6e51bb564e18 Mon Sep 17 00:00:00 2001
From: Jakub Wilk <jwilk@jwilk.net>
Date: Wed, 7 Oct 2020 07:57:17 +0200
Subject: bpf: Fix typo in uapi/linux/bpf.h

Reported-by: Samanta Navarro <ferivoz@riseup.net>
Signed-off-by: Jakub Wilk <jwilk@jwilk.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20201007055717.7319-1-jwilk@jwilk.net
---
 include/uapi/linux/bpf.h       | 2 +-
 tools/include/uapi/linux/bpf.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c446394135be..d83561e8cd2c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2253,7 +2253,7 @@ union bpf_attr {
  *	Description
  *		This helper is used in programs implementing policies at the
  *		skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
- *		if the verdeict eBPF program returns **SK_PASS**), redirect it
+ *		if the verdict eBPF program returns **SK_PASS**), redirect it
  *		to the socket referenced by *map* (of type
  *		**BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
  *		egress interfaces can be used for redirection. The
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c446394135be..d83561e8cd2c 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2253,7 +2253,7 @@ union bpf_attr {
  *	Description
  *		This helper is used in programs implementing policies at the
  *		skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
- *		if the verdeict eBPF program returns **SK_PASS**), redirect it
+ *		if the verdict eBPF program returns **SK_PASS**), redirect it
  *		to the socket referenced by *map* (of type
  *		**BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
  *		egress interfaces can be used for redirection. The
-- 
cgit v1.2.3


From fb1ff4c1941573aea59e4cb575dc5a723303cd70 Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <Bharat.Bhushan@nxp.com>
Date: Mon, 5 Oct 2020 20:36:45 +0300
Subject: vfio/fsl-mc: Add VFIO framework skeleton for fsl-mc devices

DPAA2 (Data Path Acceleration Architecture) consists in
mechanisms for processing Ethernet packets, queue management,
accelerators, etc.

The Management Complex (mc) is a hardware entity that manages the DPAA2
hardware resources. It provides an object-based abstraction for software
drivers to use the DPAA2 hardware. The MC mediates operations such as
create, discover, destroy of DPAA2 objects.
The MC provides memory-mapped I/O command interfaces (MC portals) which
DPAA2 software drivers use to operate on DPAA2 objects.

A DPRC is a container object that holds other types of DPAA2 objects.
Each object in the DPRC is a Linux device and bound to a driver.
The MC-bus driver is a platform driver (different from PCI or platform
bus). The DPRC driver does runtime management of a bus instance. It
performs the initial scan of the DPRC and handles changes in the DPRC
configuration (adding/removing objects).

All objects inside a container share the same hardware isolation
context, meaning that only an entire DPRC can be assigned to
a virtual machine.
When a container is assigned to a virtual machine, all the objects
within that container are assigned to that virtual machine.
The DPRC container assigned to the virtual machine is not allowed
to change contents (add/remove objects) by the guest. The restriction
is set by the host and enforced by the mc hardware.

The DPAA2 objects can be directly assigned to the guest. However
the MC portals (the memory mapped command interface to the MC) need
to be emulated because there are commands that configure the
interrupts and the isolation IDs which are virtual in the guest.

Example:
echo vfio-fsl-mc > /sys/bus/fsl-mc/devices/dprc.2/driver_override
echo dprc.2 > /sys/bus/fsl-mc/drivers/vfio-fsl-mc/bind

The dprc.2 is bound to the VFIO driver and all the objects within
dprc.2 are going to be bound to the VFIO driver.

This patch adds the infrastructure for VFIO support for fsl-mc
devices. Subsequent patches will add support for binding and secure
assigning these devices using VFIO.

More details about the DPAA2 objects can be found here:
Documentation/networking/device_drivers/freescale/dpaa2/overview.rst

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@nxp.com>
Signed-off-by: Diana Craciun <diana.craciun@oss.nxp.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 MAINTAINERS                               |   6 ++
 drivers/vfio/Kconfig                      |   1 +
 drivers/vfio/Makefile                     |   1 +
 drivers/vfio/fsl-mc/Kconfig               |   9 ++
 drivers/vfio/fsl-mc/Makefile              |   4 +
 drivers/vfio/fsl-mc/vfio_fsl_mc.c         | 157 ++++++++++++++++++++++++++++++
 drivers/vfio/fsl-mc/vfio_fsl_mc_private.h |  14 +++
 include/uapi/linux/vfio.h                 |   1 +
 8 files changed, 193 insertions(+)
 create mode 100644 drivers/vfio/fsl-mc/Kconfig
 create mode 100644 drivers/vfio/fsl-mc/Makefile
 create mode 100644 drivers/vfio/fsl-mc/vfio_fsl_mc.c
 create mode 100644 drivers/vfio/fsl-mc/vfio_fsl_mc_private.h

(limited to 'include/uapi')

diff --git a/MAINTAINERS b/MAINTAINERS
index d746519253c3..e955a00af046 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18260,6 +18260,12 @@ F:	drivers/vfio/
 F:	include/linux/vfio.h
 F:	include/uapi/linux/vfio.h
 
+VFIO FSL-MC DRIVER
+M:	Diana Craciun <diana.craciun@oss.nxp.com>
+L:	kvm@vger.kernel.org
+S:	Maintained
+F:	drivers/vfio/fsl-mc/
+
 VFIO MEDIATED DEVICE DRIVERS
 M:	Kirti Wankhede <kwankhede@nvidia.com>
 L:	kvm@vger.kernel.org
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index fd17db9b432f..5533df91b257 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -47,4 +47,5 @@ menuconfig VFIO_NOIOMMU
 source "drivers/vfio/pci/Kconfig"
 source "drivers/vfio/platform/Kconfig"
 source "drivers/vfio/mdev/Kconfig"
+source "drivers/vfio/fsl-mc/Kconfig"
 source "virt/lib/Kconfig"
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index de67c4725cce..fee73f3d9480 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -9,3 +9,4 @@ obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o
 obj-$(CONFIG_VFIO_PCI) += pci/
 obj-$(CONFIG_VFIO_PLATFORM) += platform/
 obj-$(CONFIG_VFIO_MDEV) += mdev/
+obj-$(CONFIG_VFIO_FSL_MC) += fsl-mc/
diff --git a/drivers/vfio/fsl-mc/Kconfig b/drivers/vfio/fsl-mc/Kconfig
new file mode 100644
index 000000000000..b1a527d6b6f2
--- /dev/null
+++ b/drivers/vfio/fsl-mc/Kconfig
@@ -0,0 +1,9 @@
+config VFIO_FSL_MC
+	tristate "VFIO support for QorIQ DPAA2 fsl-mc bus devices"
+	depends on VFIO && FSL_MC_BUS && EVENTFD
+	help
+	  Driver to enable support for the VFIO QorIQ DPAA2 fsl-mc
+	  (Management Complex) devices. This is required to passthrough
+	  fsl-mc bus devices using the VFIO framework.
+
+	  If you don't know what to do here, say N.
diff --git a/drivers/vfio/fsl-mc/Makefile b/drivers/vfio/fsl-mc/Makefile
new file mode 100644
index 000000000000..0c6e5d2ddaae
--- /dev/null
+++ b/drivers/vfio/fsl-mc/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+
+vfio-fsl-mc-y := vfio_fsl_mc.o
+obj-$(CONFIG_VFIO_FSL_MC) += vfio-fsl-mc.o
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
new file mode 100644
index 000000000000..a7a483a1e90b
--- /dev/null
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/*
+ * Copyright 2013-2016 Freescale Semiconductor Inc.
+ * Copyright 2016-2017,2019-2020 NXP
+ */
+
+#include <linux/device.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/vfio.h>
+#include <linux/fsl/mc.h>
+
+#include "vfio_fsl_mc_private.h"
+
+static int vfio_fsl_mc_open(void *device_data)
+{
+	if (!try_module_get(THIS_MODULE))
+		return -ENODEV;
+
+	return 0;
+}
+
+static void vfio_fsl_mc_release(void *device_data)
+{
+	module_put(THIS_MODULE);
+}
+
+static long vfio_fsl_mc_ioctl(void *device_data, unsigned int cmd,
+			      unsigned long arg)
+{
+	switch (cmd) {
+	case VFIO_DEVICE_GET_INFO:
+	{
+		return -ENOTTY;
+	}
+	case VFIO_DEVICE_GET_REGION_INFO:
+	{
+		return -ENOTTY;
+	}
+	case VFIO_DEVICE_GET_IRQ_INFO:
+	{
+		return -ENOTTY;
+	}
+	case VFIO_DEVICE_SET_IRQS:
+	{
+		return -ENOTTY;
+	}
+	case VFIO_DEVICE_RESET:
+	{
+		return -ENOTTY;
+	}
+	default:
+		return -ENOTTY;
+	}
+}
+
+static ssize_t vfio_fsl_mc_read(void *device_data, char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	return -EINVAL;
+}
+
+static ssize_t vfio_fsl_mc_write(void *device_data, const char __user *buf,
+				 size_t count, loff_t *ppos)
+{
+	return -EINVAL;
+}
+
+static int vfio_fsl_mc_mmap(void *device_data, struct vm_area_struct *vma)
+{
+	return -EINVAL;
+}
+
+static const struct vfio_device_ops vfio_fsl_mc_ops = {
+	.name		= "vfio-fsl-mc",
+	.open		= vfio_fsl_mc_open,
+	.release	= vfio_fsl_mc_release,
+	.ioctl		= vfio_fsl_mc_ioctl,
+	.read		= vfio_fsl_mc_read,
+	.write		= vfio_fsl_mc_write,
+	.mmap		= vfio_fsl_mc_mmap,
+};
+
+static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev)
+{
+	struct iommu_group *group;
+	struct vfio_fsl_mc_device *vdev;
+	struct device *dev = &mc_dev->dev;
+	int ret;
+
+	group = vfio_iommu_group_get(dev);
+	if (!group) {
+		dev_err(dev, "VFIO_FSL_MC: No IOMMU group\n");
+		return -EINVAL;
+	}
+
+	vdev = devm_kzalloc(dev, sizeof(*vdev), GFP_KERNEL);
+	if (!vdev) {
+		ret = -ENOMEM;
+		goto out_group_put;
+	}
+
+	vdev->mc_dev = mc_dev;
+
+	ret = vfio_add_group_dev(dev, &vfio_fsl_mc_ops, vdev);
+	if (ret) {
+		dev_err(dev, "VFIO_FSL_MC: Failed to add to vfio group\n");
+		goto out_group_put;
+	}
+	return 0;
+
+out_group_put:
+	vfio_iommu_group_put(group, dev);
+	return ret;
+}
+
+static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev)
+{
+	struct vfio_fsl_mc_device *vdev;
+	struct device *dev = &mc_dev->dev;
+
+	vdev = vfio_del_group_dev(dev);
+	if (!vdev)
+		return -EINVAL;
+
+	vfio_iommu_group_put(mc_dev->dev.iommu_group, dev);
+
+	return 0;
+}
+
+static struct fsl_mc_driver vfio_fsl_mc_driver = {
+	.probe		= vfio_fsl_mc_probe,
+	.remove		= vfio_fsl_mc_remove,
+	.driver	= {
+		.name	= "vfio-fsl-mc",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init vfio_fsl_mc_driver_init(void)
+{
+	return fsl_mc_driver_register(&vfio_fsl_mc_driver);
+}
+
+static void __exit vfio_fsl_mc_driver_exit(void)
+{
+	fsl_mc_driver_unregister(&vfio_fsl_mc_driver);
+}
+
+module_init(vfio_fsl_mc_driver_init);
+module_exit(vfio_fsl_mc_driver_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("VFIO for FSL-MC devices - User Level meta-driver");
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
new file mode 100644
index 000000000000..e79cc116f6b8
--- /dev/null
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright 2013-2016 Freescale Semiconductor Inc.
+ * Copyright 2016,2019-2020 NXP
+ */
+
+#ifndef VFIO_FSL_MC_PRIVATE_H
+#define VFIO_FSL_MC_PRIVATE_H
+
+struct vfio_fsl_mc_device {
+	struct fsl_mc_device		*mc_dev;
+};
+
+#endif /* VFIO_FSL_MC_PRIVATE_H */
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 920470502329..95deac891378 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -201,6 +201,7 @@ struct vfio_device_info {
 #define VFIO_DEVICE_FLAGS_AMBA  (1 << 3)	/* vfio-amba device */
 #define VFIO_DEVICE_FLAGS_CCW	(1 << 4)	/* vfio-ccw device */
 #define VFIO_DEVICE_FLAGS_AP	(1 << 5)	/* vfio-ap device */
+#define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6)	/* vfio-fsl-mc device */
 	__u32	num_regions;	/* Max region index + 1 */
 	__u32	num_irqs;	/* Max IRQ index + 1 */
 };
-- 
cgit v1.2.3


From 0c633f0be1dc70a6db46d90dba4cdae82073350a Mon Sep 17 00:00:00 2001
From: Matthew Rosato <mjrosato@linux.ibm.com>
Date: Wed, 7 Oct 2020 14:56:22 -0400
Subject: vfio: Introduce capability definitions for VFIO_DEVICE_GET_INFO

Allow the VFIO_DEVICE_GET_INFO ioctl to include a capability chain.
Add a flag indicating capability chain support, and introduce the
definitions for the first set of capabilities which are specified to
s390 zPCI devices.

Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/uapi/linux/vfio.h      | 11 ++++++
 include/uapi/linux/vfio_zdev.h | 78 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 89 insertions(+)
 create mode 100644 include/uapi/linux/vfio_zdev.h

(limited to 'include/uapi')

diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 920470502329..04fbe425ad0c 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -201,8 +201,10 @@ struct vfio_device_info {
 #define VFIO_DEVICE_FLAGS_AMBA  (1 << 3)	/* vfio-amba device */
 #define VFIO_DEVICE_FLAGS_CCW	(1 << 4)	/* vfio-ccw device */
 #define VFIO_DEVICE_FLAGS_AP	(1 << 5)	/* vfio-ap device */
+#define VFIO_DEVICE_FLAGS_CAPS	(1 << 7)	/* Info supports caps */
 	__u32	num_regions;	/* Max region index + 1 */
 	__u32	num_irqs;	/* Max IRQ index + 1 */
+	__u32   cap_offset;	/* Offset within info struct of first cap */
 };
 #define VFIO_DEVICE_GET_INFO		_IO(VFIO_TYPE, VFIO_BASE + 7)
 
@@ -218,6 +220,15 @@ struct vfio_device_info {
 #define VFIO_DEVICE_API_CCW_STRING		"vfio-ccw"
 #define VFIO_DEVICE_API_AP_STRING		"vfio-ap"
 
+/*
+ * The following capabilities are unique to s390 zPCI devices.  Their contents
+ * are further-defined in vfio_zdev.h
+ */
+#define VFIO_DEVICE_INFO_CAP_ZPCI_BASE		1
+#define VFIO_DEVICE_INFO_CAP_ZPCI_GROUP		2
+#define VFIO_DEVICE_INFO_CAP_ZPCI_UTIL		3
+#define VFIO_DEVICE_INFO_CAP_ZPCI_PFIP		4
+
 /**
  * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
  *				       struct vfio_region_info)
diff --git a/include/uapi/linux/vfio_zdev.h b/include/uapi/linux/vfio_zdev.h
new file mode 100644
index 000000000000..b4309397b6b2
--- /dev/null
+++ b/include/uapi/linux/vfio_zdev.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * VFIO Region definitions for ZPCI devices
+ *
+ * Copyright IBM Corp. 2020
+ *
+ * Author(s): Pierre Morel <pmorel@linux.ibm.com>
+ *            Matthew Rosato <mjrosato@linux.ibm.com>
+ */
+
+#ifndef _VFIO_ZDEV_H_
+#define _VFIO_ZDEV_H_
+
+#include <linux/types.h>
+#include <linux/vfio.h>
+
+/**
+ * VFIO_DEVICE_INFO_CAP_ZPCI_BASE - Base PCI Function information
+ *
+ * This capability provides a set of descriptive information about the
+ * associated PCI function.
+ */
+struct vfio_device_info_cap_zpci_base {
+	struct vfio_info_cap_header header;
+	__u64 start_dma;	/* Start of available DMA addresses */
+	__u64 end_dma;		/* End of available DMA addresses */
+	__u16 pchid;		/* Physical Channel ID */
+	__u16 vfn;		/* Virtual function number */
+	__u16 fmb_length;	/* Measurement Block Length (in bytes) */
+	__u8 pft;		/* PCI Function Type */
+	__u8 gid;		/* PCI function group ID */
+};
+
+/**
+ * VFIO_DEVICE_INFO_CAP_ZPCI_GROUP - Base PCI Function Group information
+ *
+ * This capability provides a set of descriptive information about the group of
+ * PCI functions that the associated device belongs to.
+ */
+struct vfio_device_info_cap_zpci_group {
+	struct vfio_info_cap_header header;
+	__u64 dasm;		/* DMA Address space mask */
+	__u64 msi_addr;		/* MSI address */
+	__u64 flags;
+#define VFIO_DEVICE_INFO_ZPCI_FLAG_REFRESH 1 /* Program-specified TLB refresh */
+	__u16 mui;		/* Measurement Block Update Interval */
+	__u16 noi;		/* Maximum number of MSIs */
+	__u16 maxstbl;		/* Maximum Store Block Length */
+	__u8 version;		/* Supported PCI Version */
+};
+
+/**
+ * VFIO_DEVICE_INFO_CAP_ZPCI_UTIL - Utility String
+ *
+ * This capability provides the utility string for the associated device, which
+ * is a device identifier string made up of EBCDID characters.  'size' specifies
+ * the length of 'util_str'.
+ */
+struct vfio_device_info_cap_zpci_util {
+	struct vfio_info_cap_header header;
+	__u32 size;
+	__u8 util_str[];
+};
+
+/**
+ * VFIO_DEVICE_INFO_CAP_ZPCI_PFIP - PCI Function Path
+ *
+ * This capability provides the PCI function path string, which is an identifier
+ * that describes the internal hardware path of the device. 'size' specifies
+ * the length of 'pfip'.
+ */
+struct vfio_device_info_cap_zpci_pfip {
+	struct vfio_info_cap_header header;
+	__u32 size;
+	__u8 pfip[];
+};
+
+#endif
-- 
cgit v1.2.3


From e057dd3fc20ffb3d7f150af46542a51b59b90127 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Mon, 28 Sep 2020 22:04:04 +0200
Subject: can: add ISO 15765-2:2016 transport protocol

CAN Transport Protocols offer support for segmented Point-to-Point
communication between CAN nodes via two defined CAN Identifiers.
As CAN frames can only transport a small amount of data bytes
(max. 8 bytes for 'classic' CAN and max. 64 bytes for CAN FD) this
segmentation is needed to transport longer PDUs as needed e.g. for
vehicle diagnosis (UDS, ISO 14229) or IP-over-CAN traffic.
This protocol driver implements data transfers according to
ISO 15765-2:2016 for 'classic' CAN and CAN FD frame types.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Link: https://lore.kernel.org/r/20200928200404.82229-1-socketcan@hartkopp.net
[mkl: Removed "WITH Linux-syscall-note" from isotp.c.
      Fixed indention, a checkpatch warning and typos.
      Replaced __u{8,32} by u{8,32}.
      Removed always false (optlen < 0) check in isotp_setsockopt().]
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 MAINTAINERS                    |    1 +
 include/uapi/linux/can/isotp.h |  166 +++++
 net/can/Kconfig                |   13 +
 net/can/Makefile               |    3 +
 net/can/isotp.c                | 1426 ++++++++++++++++++++++++++++++++++++++++
 5 files changed, 1609 insertions(+)
 create mode 100644 include/uapi/linux/can/isotp.h
 create mode 100644 net/can/isotp.c

(limited to 'include/uapi')

diff --git a/MAINTAINERS b/MAINTAINERS
index d651a0934be7..7a8a53adba91 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3912,6 +3912,7 @@ F:	include/net/netns/can.h
 F:	include/uapi/linux/can.h
 F:	include/uapi/linux/can/bcm.h
 F:	include/uapi/linux/can/gw.h
+F:	include/uapi/linux/can/isotp.h
 F:	include/uapi/linux/can/raw.h
 F:	net/can/
 
diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h
new file mode 100644
index 000000000000..553006509f4e
--- /dev/null
+++ b/include/uapi/linux/can/isotp.h
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: ((GPL-2.0-only WITH Linux-syscall-note) OR BSD-3-Clause) */
+/*
+ * linux/can/isotp.h
+ *
+ * Definitions for isotp CAN sockets (ISO 15765-2:2016)
+ *
+ * Copyright (c) 2020 Volkswagen Group Electronic Research
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Volkswagen nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General
+ * Public License ("GPL") version 2, in which case the provisions of the
+ * GPL apply INSTEAD OF those given above.
+ *
+ * The provided data structures and external interfaces from this code
+ * are not restricted to be used by modules with a GPL compatible license.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+#ifndef _UAPI_CAN_ISOTP_H
+#define _UAPI_CAN_ISOTP_H
+
+#include <linux/types.h>
+#include <linux/can.h>
+
+#define SOL_CAN_ISOTP (SOL_CAN_BASE + CAN_ISOTP)
+
+/* for socket options affecting the socket (not the global system) */
+
+#define CAN_ISOTP_OPTS		1	/* pass struct can_isotp_options */
+
+#define CAN_ISOTP_RECV_FC	2	/* pass struct can_isotp_fc_options */
+
+/* sockopts to force stmin timer values for protocol regression tests */
+
+#define CAN_ISOTP_TX_STMIN	3	/* pass __u32 value in nano secs    */
+					/* use this time instead of value   */
+					/* provided in FC from the receiver */
+
+#define CAN_ISOTP_RX_STMIN	4	/* pass __u32 value in nano secs   */
+					/* ignore received CF frames which */
+					/* timestamps differ less than val */
+
+#define CAN_ISOTP_LL_OPTS	5	/* pass struct can_isotp_ll_options */
+
+struct can_isotp_options {
+
+	__u32 flags;		/* set flags for isotp behaviour.	*/
+				/* __u32 value : flags see below	*/
+
+	__u32 frame_txtime;	/* frame transmission time (N_As/N_Ar)	*/
+				/* __u32 value : time in nano secs	*/
+
+	__u8  ext_address;	/* set address for extended addressing	*/
+				/* __u8 value : extended address	*/
+
+	__u8  txpad_content;	/* set content of padding byte (tx)	*/
+				/* __u8 value : content	on tx path	*/
+
+	__u8  rxpad_content;	/* set content of padding byte (rx)	*/
+				/* __u8 value : content	on rx path	*/
+
+	__u8  rx_ext_address;	/* set address for extended addressing	*/
+				/* __u8 value : extended address (rx)	*/
+};
+
+struct can_isotp_fc_options {
+
+	__u8  bs;		/* blocksize provided in FC frame	*/
+				/* __u8 value : blocksize. 0 = off	*/
+
+	__u8  stmin;		/* separation time provided in FC frame	*/
+				/* __u8 value :				*/
+				/* 0x00 - 0x7F : 0 - 127 ms		*/
+				/* 0x80 - 0xF0 : reserved		*/
+				/* 0xF1 - 0xF9 : 100 us - 900 us	*/
+				/* 0xFA - 0xFF : reserved		*/
+
+	__u8  wftmax;		/* max. number of wait frame transmiss.	*/
+				/* __u8 value : 0 = omit FC N_PDU WT	*/
+};
+
+struct can_isotp_ll_options {
+
+	__u8  mtu;		/* generated & accepted CAN frame type	*/
+				/* __u8 value :				*/
+				/* CAN_MTU   (16) -> standard CAN 2.0	*/
+				/* CANFD_MTU (72) -> CAN FD frame	*/
+
+	__u8  tx_dl;		/* tx link layer data length in bytes	*/
+				/* (configured maximum payload length)	*/
+				/* __u8 value : 8,12,16,20,24,32,48,64	*/
+				/* => rx path supports all LL_DL values */
+
+	__u8  tx_flags;		/* set into struct canfd_frame.flags	*/
+				/* at frame creation: e.g. CANFD_BRS	*/
+				/* Obsolete when the BRS flag is fixed	*/
+				/* by the CAN netdriver configuration	*/
+};
+
+/* flags for isotp behaviour */
+
+#define CAN_ISOTP_LISTEN_MODE	0x001	/* listen only (do not send FC) */
+#define CAN_ISOTP_EXTEND_ADDR	0x002	/* enable extended addressing */
+#define CAN_ISOTP_TX_PADDING	0x004	/* enable CAN frame padding tx path */
+#define CAN_ISOTP_RX_PADDING	0x008	/* enable CAN frame padding rx path */
+#define CAN_ISOTP_CHK_PAD_LEN	0x010	/* check received CAN frame padding */
+#define CAN_ISOTP_CHK_PAD_DATA	0x020	/* check received CAN frame padding */
+#define CAN_ISOTP_HALF_DUPLEX	0x040	/* half duplex error state handling */
+#define CAN_ISOTP_FORCE_TXSTMIN	0x080	/* ignore stmin from received FC */
+#define CAN_ISOTP_FORCE_RXSTMIN	0x100	/* ignore CFs depending on rx stmin */
+#define CAN_ISOTP_RX_EXT_ADDR	0x200	/* different rx extended addressing */
+#define CAN_ISOTP_WAIT_TX_DONE	0x400	/* wait for tx completion */
+
+
+/* default values */
+
+#define CAN_ISOTP_DEFAULT_FLAGS		0
+#define CAN_ISOTP_DEFAULT_EXT_ADDRESS	0x00
+#define CAN_ISOTP_DEFAULT_PAD_CONTENT	0xCC /* prevent bit-stuffing */
+#define CAN_ISOTP_DEFAULT_FRAME_TXTIME	0
+#define CAN_ISOTP_DEFAULT_RECV_BS	0
+#define CAN_ISOTP_DEFAULT_RECV_STMIN	0x00
+#define CAN_ISOTP_DEFAULT_RECV_WFTMAX	0
+
+#define CAN_ISOTP_DEFAULT_LL_MTU	CAN_MTU
+#define CAN_ISOTP_DEFAULT_LL_TX_DL	CAN_MAX_DLEN
+#define CAN_ISOTP_DEFAULT_LL_TX_FLAGS	0
+
+/*
+ * Remark on CAN_ISOTP_DEFAULT_RECV_* values:
+ *
+ * We can strongly assume, that the Linux Kernel implementation of
+ * CAN_ISOTP is capable to run with BS=0, STmin=0 and WFTmax=0.
+ * But as we like to be able to behave as a commonly available ECU,
+ * these default settings can be changed via sockopts.
+ * For that reason the STmin value is intentionally _not_ checked for
+ * consistency and copied directly into the flow control (FC) frame.
+ *
+ */
+
+#endif /* !_UAPI_CAN_ISOTP_H */
diff --git a/net/can/Kconfig b/net/can/Kconfig
index 25436a715db3..021fe03a8ed6 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig
@@ -55,6 +55,19 @@ config CAN_GW
 
 source "net/can/j1939/Kconfig"
 
+config CAN_ISOTP
+	tristate "ISO 15765-2:2016 CAN transport protocol"
+	default y
+	help
+	  CAN Transport Protocols offer support for segmented Point-to-Point
+	  communication between CAN nodes via two defined CAN Identifiers.
+	  As CAN frames can only transport a small amount of data bytes
+	  (max. 8 bytes for 'classic' CAN and max. 64 bytes for CAN FD) this
+	  segmentation is needed to transport longer PDUs as needed e.g. for
+	  vehicle diagnosis (UDS, ISO 14229) or IP-over-CAN traffic.
+	  This protocol driver implements data transfers according to
+	  ISO 15765-2:2016 for 'classic' CAN and CAN FD frame types.
+
 source "drivers/net/can/Kconfig"
 
 endif
diff --git a/net/can/Makefile b/net/can/Makefile
index 08bd217fc051..58f2c31c1ef3 100644
--- a/net/can/Makefile
+++ b/net/can/Makefile
@@ -17,3 +17,6 @@ obj-$(CONFIG_CAN_GW)	+= can-gw.o
 can-gw-y		:= gw.o
 
 obj-$(CONFIG_CAN_J1939)	+= j1939/
+
+obj-$(CONFIG_CAN_ISOTP)	+= can-isotp.o
+can-isotp-y		:= isotp.o
diff --git a/net/can/isotp.c b/net/can/isotp.c
new file mode 100644
index 000000000000..e6ff032b5426
--- /dev/null
+++ b/net/can/isotp.c
@@ -0,0 +1,1426 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/* isotp.c - ISO 15765-2 CAN transport protocol for protocol family CAN
+ *
+ * This implementation does not provide ISO-TP specific return values to the
+ * userspace.
+ *
+ * - RX path timeout of data reception leads to -ETIMEDOUT
+ * - RX path SN mismatch leads to -EILSEQ
+ * - RX path data reception with wrong padding leads to -EBADMSG
+ * - TX path flowcontrol reception timeout leads to -ECOMM
+ * - TX path flowcontrol reception overflow leads to -EMSGSIZE
+ * - TX path flowcontrol reception with wrong layout/padding leads to -EBADMSG
+ * - when a transfer (tx) is on the run the next write() blocks until it's done
+ * - use CAN_ISOTP_WAIT_TX_DONE flag to block the caller until the PDU is sent
+ * - as we have static buffers the check whether the PDU fits into the buffer
+ *   is done at FF reception time (no support for sending 'wait frames')
+ * - take care of the tx-queue-len as traffic shaping is still on the TODO list
+ *
+ * Copyright (c) 2020 Volkswagen Group Electronic Research
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Volkswagen nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General
+ * Public License ("GPL") version 2, in which case the provisions of the
+ * GPL apply INSTEAD OF those given above.
+ *
+ * The provided data structures and external interfaces from this code
+ * are not restricted to be used by modules with a GPL compatible license.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/hrtimer.h>
+#include <linux/wait.h>
+#include <linux/uio.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/socket.h>
+#include <linux/if_arp.h>
+#include <linux/skbuff.h>
+#include <linux/can.h>
+#include <linux/can/core.h>
+#include <linux/can/skb.h>
+#include <linux/can/isotp.h>
+#include <linux/slab.h>
+#include <net/sock.h>
+#include <net/net_namespace.h>
+
+#define CAN_ISOTP_VERSION "20200928"
+
+MODULE_DESCRIPTION("PF_CAN isotp 15765-2:2016 protocol");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Oliver Hartkopp <socketcan@hartkopp.net>");
+MODULE_ALIAS("can-proto-6");
+
+#define SINGLE_MASK(id) (((id) & CAN_EFF_FLAG) ? \
+			 (CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG) : \
+			 (CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG))
+
+/* ISO 15765-2:2016 supports more than 4095 byte per ISO PDU as the FF_DL can
+ * take full 32 bit values (4 Gbyte). We would need some good concept to handle
+ * this between user space and kernel space. For now increase the static buffer
+ * to something about 8 kbyte to be able to test this new functionality.
+ */
+#define MAX_MSG_LENGTH 8200
+
+/* N_PCI type values in bits 7-4 of N_PCI bytes */
+#define N_PCI_SF 0x00	/* single frame */
+#define N_PCI_FF 0x10	/* first frame */
+#define N_PCI_CF 0x20	/* consecutive frame */
+#define N_PCI_FC 0x30	/* flow control */
+
+#define N_PCI_SZ 1	/* size of the PCI byte #1 */
+#define SF_PCI_SZ4 1	/* size of SingleFrame PCI including 4 bit SF_DL */
+#define SF_PCI_SZ8 2	/* size of SingleFrame PCI including 8 bit SF_DL */
+#define FF_PCI_SZ12 2	/* size of FirstFrame PCI including 12 bit FF_DL */
+#define FF_PCI_SZ32 6	/* size of FirstFrame PCI including 32 bit FF_DL */
+#define FC_CONTENT_SZ 3	/* flow control content size in byte (FS/BS/STmin) */
+
+#define ISOTP_CHECK_PADDING (CAN_ISOTP_CHK_PAD_LEN | CAN_ISOTP_CHK_PAD_DATA)
+
+/* Flow Status given in FC frame */
+#define ISOTP_FC_CTS 0		/* clear to send */
+#define ISOTP_FC_WT 1		/* wait */
+#define ISOTP_FC_OVFLW 2	/* overflow */
+
+enum {
+	ISOTP_IDLE = 0,
+	ISOTP_WAIT_FIRST_FC,
+	ISOTP_WAIT_FC,
+	ISOTP_WAIT_DATA,
+	ISOTP_SENDING
+};
+
+struct tpcon {
+	int idx;
+	int len;
+	u8 state;
+	u8 bs;
+	u8 sn;
+	u8 ll_dl;
+	u8 buf[MAX_MSG_LENGTH + 1];
+};
+
+struct isotp_sock {
+	struct sock sk;
+	int bound;
+	int ifindex;
+	canid_t txid;
+	canid_t rxid;
+	ktime_t tx_gap;
+	ktime_t lastrxcf_tstamp;
+	struct hrtimer rxtimer, txtimer;
+	struct can_isotp_options opt;
+	struct can_isotp_fc_options rxfc, txfc;
+	struct can_isotp_ll_options ll;
+	u32 force_tx_stmin;
+	u32 force_rx_stmin;
+	struct tpcon rx, tx;
+	struct notifier_block notifier;
+	wait_queue_head_t wait;
+};
+
+static inline struct isotp_sock *isotp_sk(const struct sock *sk)
+{
+	return (struct isotp_sock *)sk;
+}
+
+static enum hrtimer_restart isotp_rx_timer_handler(struct hrtimer *hrtimer)
+{
+	struct isotp_sock *so = container_of(hrtimer, struct isotp_sock,
+					     rxtimer);
+	struct sock *sk = &so->sk;
+
+	if (so->rx.state == ISOTP_WAIT_DATA) {
+		/* we did not get new data frames in time */
+
+		/* report 'connection timed out' */
+		sk->sk_err = ETIMEDOUT;
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_error_report(sk);
+
+		/* reset rx state */
+		so->rx.state = ISOTP_IDLE;
+	}
+
+	return HRTIMER_NORESTART;
+}
+
+static int isotp_send_fc(struct sock *sk, int ae, u8 flowstatus)
+{
+	struct net_device *dev;
+	struct sk_buff *nskb;
+	struct canfd_frame *ncf;
+	struct isotp_sock *so = isotp_sk(sk);
+	int can_send_ret;
+
+	nskb = alloc_skb(so->ll.mtu + sizeof(struct can_skb_priv), gfp_any());
+	if (!nskb)
+		return 1;
+
+	dev = dev_get_by_index(sock_net(sk), so->ifindex);
+	if (!dev) {
+		kfree_skb(nskb);
+		return 1;
+	}
+
+	can_skb_reserve(nskb);
+	can_skb_prv(nskb)->ifindex = dev->ifindex;
+	can_skb_prv(nskb)->skbcnt = 0;
+
+	nskb->dev = dev;
+	can_skb_set_owner(nskb, sk);
+	ncf = (struct canfd_frame *)nskb->data;
+	skb_put(nskb, so->ll.mtu);
+
+	/* create & send flow control reply */
+	ncf->can_id = so->txid;
+
+	if (so->opt.flags & CAN_ISOTP_TX_PADDING) {
+		memset(ncf->data, so->opt.txpad_content, CAN_MAX_DLEN);
+		ncf->len = CAN_MAX_DLEN;
+	} else {
+		ncf->len = ae + FC_CONTENT_SZ;
+	}
+
+	ncf->data[ae] = N_PCI_FC | flowstatus;
+	ncf->data[ae + 1] = so->rxfc.bs;
+	ncf->data[ae + 2] = so->rxfc.stmin;
+
+	if (ae)
+		ncf->data[0] = so->opt.ext_address;
+
+	if (so->ll.mtu == CANFD_MTU)
+		ncf->flags = so->ll.tx_flags;
+
+	can_send_ret = can_send(nskb, 1);
+	if (can_send_ret)
+		printk_once(KERN_NOTICE "can-isotp: %s: can_send_ret %d\n",
+			    __func__, can_send_ret);
+
+	dev_put(dev);
+
+	/* reset blocksize counter */
+	so->rx.bs = 0;
+
+	/* reset last CF frame rx timestamp for rx stmin enforcement */
+	so->lastrxcf_tstamp = ktime_set(0, 0);
+
+	/* start rx timeout watchdog */
+	hrtimer_start(&so->rxtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT);
+	return 0;
+}
+
+static void isotp_rcv_skb(struct sk_buff *skb, struct sock *sk)
+{
+	struct sockaddr_can *addr = (struct sockaddr_can *)skb->cb;
+
+	BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct sockaddr_can));
+
+	memset(addr, 0, sizeof(*addr));
+	addr->can_family = AF_CAN;
+	addr->can_ifindex = skb->dev->ifindex;
+
+	if (sock_queue_rcv_skb(sk, skb) < 0)
+		kfree_skb(skb);
+}
+
+static u8 padlen(u8 datalen)
+{
+	const u8 plen[] = {8, 8, 8, 8, 8, 8, 8, 8, 8,		/* 0 - 8 */
+			   12, 12, 12, 12,			/* 9 - 12 */
+			   16, 16, 16, 16,			/* 13 - 16 */
+			   20, 20, 20, 20,			/* 17 - 20 */
+			   24, 24, 24, 24,			/* 21 - 24 */
+			   32, 32, 32, 32, 32, 32, 32, 32,	/* 25 - 32 */
+			   48, 48, 48, 48, 48, 48, 48, 48,	/* 33 - 40 */
+			   48, 48, 48, 48, 48, 48, 48, 48};	/* 41 - 48 */
+
+	if (datalen > 48)
+		return 64;
+
+	return plen[datalen];
+}
+
+/* check for length optimization and return 1/true when the check fails */
+static int check_optimized(struct canfd_frame *cf, int start_index)
+{
+	/* for CAN_DL <= 8 the start_index is equal to the CAN_DL as the
+	 * padding would start at this point. E.g. if the padding would
+	 * start at cf.data[7] cf->len has to be 7 to be optimal.
+	 * Note: The data[] index starts with zero.
+	 */
+	if (cf->len <= CAN_MAX_DLEN)
+		return (cf->len != start_index);
+
+	/* This relation is also valid in the non-linear DLC range, where
+	 * we need to take care of the minimal next possible CAN_DL.
+	 * The correct check would be (padlen(cf->len) != padlen(start_index)).
+	 * But as cf->len can only take discrete values from 12, .., 64 at this
+	 * point the padlen(cf->len) is always equal to cf->len.
+	 */
+	return (cf->len != padlen(start_index));
+}
+
+/* check padding and return 1/true when the check fails */
+static int check_pad(struct isotp_sock *so, struct canfd_frame *cf,
+		     int start_index, u8 content)
+{
+	int i;
+
+	/* no RX_PADDING value => check length of optimized frame length */
+	if (!(so->opt.flags & CAN_ISOTP_RX_PADDING)) {
+		if (so->opt.flags & CAN_ISOTP_CHK_PAD_LEN)
+			return check_optimized(cf, start_index);
+
+		/* no valid test against empty value => ignore frame */
+		return 1;
+	}
+
+	/* check datalength of correctly padded CAN frame */
+	if ((so->opt.flags & CAN_ISOTP_CHK_PAD_LEN) &&
+	    cf->len != padlen(cf->len))
+		return 1;
+
+	/* check padding content */
+	if (so->opt.flags & CAN_ISOTP_CHK_PAD_DATA) {
+		for (i = start_index; i < cf->len; i++)
+			if (cf->data[i] != content)
+				return 1;
+	}
+	return 0;
+}
+
+static int isotp_rcv_fc(struct isotp_sock *so, struct canfd_frame *cf, int ae)
+{
+	struct sock *sk = &so->sk;
+
+	if (so->tx.state != ISOTP_WAIT_FC &&
+	    so->tx.state != ISOTP_WAIT_FIRST_FC)
+		return 0;
+
+	hrtimer_cancel(&so->txtimer);
+
+	if ((cf->len < ae + FC_CONTENT_SZ) ||
+	    ((so->opt.flags & ISOTP_CHECK_PADDING) &&
+	     check_pad(so, cf, ae + FC_CONTENT_SZ, so->opt.rxpad_content))) {
+		/* malformed PDU - report 'not a data message' */
+		sk->sk_err = EBADMSG;
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_error_report(sk);
+
+		so->tx.state = ISOTP_IDLE;
+		wake_up_interruptible(&so->wait);
+		return 1;
+	}
+
+	/* get communication parameters only from the first FC frame */
+	if (so->tx.state == ISOTP_WAIT_FIRST_FC) {
+		so->txfc.bs = cf->data[ae + 1];
+		so->txfc.stmin = cf->data[ae + 2];
+
+		/* fix wrong STmin values according spec */
+		if (so->txfc.stmin > 0x7F &&
+		    (so->txfc.stmin < 0xF1 || so->txfc.stmin > 0xF9))
+			so->txfc.stmin = 0x7F;
+
+		so->tx_gap = ktime_set(0, 0);
+		/* add transmission time for CAN frame N_As */
+		so->tx_gap = ktime_add_ns(so->tx_gap, so->opt.frame_txtime);
+		/* add waiting time for consecutive frames N_Cs */
+		if (so->opt.flags & CAN_ISOTP_FORCE_TXSTMIN)
+			so->tx_gap = ktime_add_ns(so->tx_gap,
+						  so->force_tx_stmin);
+		else if (so->txfc.stmin < 0x80)
+			so->tx_gap = ktime_add_ns(so->tx_gap,
+						  so->txfc.stmin * 1000000);
+		else
+			so->tx_gap = ktime_add_ns(so->tx_gap,
+						  (so->txfc.stmin - 0xF0)
+						  * 100000);
+		so->tx.state = ISOTP_WAIT_FC;
+	}
+
+	switch (cf->data[ae] & 0x0F) {
+	case ISOTP_FC_CTS:
+		so->tx.bs = 0;
+		so->tx.state = ISOTP_SENDING;
+		/* start cyclic timer for sending CF frame */
+		hrtimer_start(&so->txtimer, so->tx_gap,
+			      HRTIMER_MODE_REL_SOFT);
+		break;
+
+	case ISOTP_FC_WT:
+		/* start timer to wait for next FC frame */
+		hrtimer_start(&so->txtimer, ktime_set(1, 0),
+			      HRTIMER_MODE_REL_SOFT);
+		break;
+
+	case ISOTP_FC_OVFLW:
+		/* overflow on receiver side - report 'message too long' */
+		sk->sk_err = EMSGSIZE;
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_error_report(sk);
+		fallthrough;
+
+	default:
+		/* stop this tx job */
+		so->tx.state = ISOTP_IDLE;
+		wake_up_interruptible(&so->wait);
+	}
+	return 0;
+}
+
+static int isotp_rcv_sf(struct sock *sk, struct canfd_frame *cf, int pcilen,
+			struct sk_buff *skb, int len)
+{
+	struct isotp_sock *so = isotp_sk(sk);
+	struct sk_buff *nskb;
+
+	hrtimer_cancel(&so->rxtimer);
+	so->rx.state = ISOTP_IDLE;
+
+	if (!len || len > cf->len - pcilen)
+		return 1;
+
+	if ((so->opt.flags & ISOTP_CHECK_PADDING) &&
+	    check_pad(so, cf, pcilen + len, so->opt.rxpad_content)) {
+		/* malformed PDU - report 'not a data message' */
+		sk->sk_err = EBADMSG;
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_error_report(sk);
+		return 1;
+	}
+
+	nskb = alloc_skb(len, gfp_any());
+	if (!nskb)
+		return 1;
+
+	memcpy(skb_put(nskb, len), &cf->data[pcilen], len);
+
+	nskb->tstamp = skb->tstamp;
+	nskb->dev = skb->dev;
+	isotp_rcv_skb(nskb, sk);
+	return 0;
+}
+
+static int isotp_rcv_ff(struct sock *sk, struct canfd_frame *cf, int ae)
+{
+	struct isotp_sock *so = isotp_sk(sk);
+	int i;
+	int off;
+	int ff_pci_sz;
+
+	hrtimer_cancel(&so->rxtimer);
+	so->rx.state = ISOTP_IDLE;
+
+	/* get the used sender LL_DL from the (first) CAN frame data length */
+	so->rx.ll_dl = padlen(cf->len);
+
+	/* the first frame has to use the entire frame up to LL_DL length */
+	if (cf->len != so->rx.ll_dl)
+		return 1;
+
+	/* get the FF_DL */
+	so->rx.len = (cf->data[ae] & 0x0F) << 8;
+	so->rx.len += cf->data[ae + 1];
+
+	/* Check for FF_DL escape sequence supporting 32 bit PDU length */
+	if (so->rx.len) {
+		ff_pci_sz = FF_PCI_SZ12;
+	} else {
+		/* FF_DL = 0 => get real length from next 4 bytes */
+		so->rx.len = cf->data[ae + 2] << 24;
+		so->rx.len += cf->data[ae + 3] << 16;
+		so->rx.len += cf->data[ae + 4] << 8;
+		so->rx.len += cf->data[ae + 5];
+		ff_pci_sz = FF_PCI_SZ32;
+	}
+
+	/* take care of a potential SF_DL ESC offset for TX_DL > 8 */
+	off = (so->rx.ll_dl > CAN_MAX_DLEN) ? 1 : 0;
+
+	if (so->rx.len + ae + off + ff_pci_sz < so->rx.ll_dl)
+		return 1;
+
+	if (so->rx.len > MAX_MSG_LENGTH) {
+		/* send FC frame with overflow status */
+		isotp_send_fc(sk, ae, ISOTP_FC_OVFLW);
+		return 1;
+	}
+
+	/* copy the first received data bytes */
+	so->rx.idx = 0;
+	for (i = ae + ff_pci_sz; i < so->rx.ll_dl; i++)
+		so->rx.buf[so->rx.idx++] = cf->data[i];
+
+	/* initial setup for this pdu reception */
+	so->rx.sn = 1;
+	so->rx.state = ISOTP_WAIT_DATA;
+
+	/* no creation of flow control frames */
+	if (so->opt.flags & CAN_ISOTP_LISTEN_MODE)
+		return 0;
+
+	/* send our first FC frame */
+	isotp_send_fc(sk, ae, ISOTP_FC_CTS);
+	return 0;
+}
+
+static int isotp_rcv_cf(struct sock *sk, struct canfd_frame *cf, int ae,
+			struct sk_buff *skb)
+{
+	struct isotp_sock *so = isotp_sk(sk);
+	struct sk_buff *nskb;
+	int i;
+
+	if (so->rx.state != ISOTP_WAIT_DATA)
+		return 0;
+
+	/* drop if timestamp gap is less than force_rx_stmin nano secs */
+	if (so->opt.flags & CAN_ISOTP_FORCE_RXSTMIN) {
+		if (ktime_to_ns(ktime_sub(skb->tstamp, so->lastrxcf_tstamp)) <
+		    so->force_rx_stmin)
+			return 0;
+
+		so->lastrxcf_tstamp = skb->tstamp;
+	}
+
+	hrtimer_cancel(&so->rxtimer);
+
+	/* CFs are never longer than the FF */
+	if (cf->len > so->rx.ll_dl)
+		return 1;
+
+	/* CFs have usually the LL_DL length */
+	if (cf->len < so->rx.ll_dl) {
+		/* this is only allowed for the last CF */
+		if (so->rx.len - so->rx.idx > so->rx.ll_dl - ae - N_PCI_SZ)
+			return 1;
+	}
+
+	if ((cf->data[ae] & 0x0F) != so->rx.sn) {
+		/* wrong sn detected - report 'illegal byte sequence' */
+		sk->sk_err = EILSEQ;
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_error_report(sk);
+
+		/* reset rx state */
+		so->rx.state = ISOTP_IDLE;
+		return 1;
+	}
+	so->rx.sn++;
+	so->rx.sn %= 16;
+
+	for (i = ae + N_PCI_SZ; i < cf->len; i++) {
+		so->rx.buf[so->rx.idx++] = cf->data[i];
+		if (so->rx.idx >= so->rx.len)
+			break;
+	}
+
+	if (so->rx.idx >= so->rx.len) {
+		/* we are done */
+		so->rx.state = ISOTP_IDLE;
+
+		if ((so->opt.flags & ISOTP_CHECK_PADDING) &&
+		    check_pad(so, cf, i + 1, so->opt.rxpad_content)) {
+			/* malformed PDU - report 'not a data message' */
+			sk->sk_err = EBADMSG;
+			if (!sock_flag(sk, SOCK_DEAD))
+				sk->sk_error_report(sk);
+			return 1;
+		}
+
+		nskb = alloc_skb(so->rx.len, gfp_any());
+		if (!nskb)
+			return 1;
+
+		memcpy(skb_put(nskb, so->rx.len), so->rx.buf,
+		       so->rx.len);
+
+		nskb->tstamp = skb->tstamp;
+		nskb->dev = skb->dev;
+		isotp_rcv_skb(nskb, sk);
+		return 0;
+	}
+
+	/* no creation of flow control frames */
+	if (so->opt.flags & CAN_ISOTP_LISTEN_MODE)
+		return 0;
+
+	/* perform blocksize handling, if enabled */
+	if (!so->rxfc.bs || ++so->rx.bs < so->rxfc.bs) {
+		/* start rx timeout watchdog */
+		hrtimer_start(&so->rxtimer, ktime_set(1, 0),
+			      HRTIMER_MODE_REL_SOFT);
+		return 0;
+	}
+
+	/* we reached the specified blocksize so->rxfc.bs */
+	isotp_send_fc(sk, ae, ISOTP_FC_CTS);
+	return 0;
+}
+
+static void isotp_rcv(struct sk_buff *skb, void *data)
+{
+	struct sock *sk = (struct sock *)data;
+	struct isotp_sock *so = isotp_sk(sk);
+	struct canfd_frame *cf;
+	int ae = (so->opt.flags & CAN_ISOTP_EXTEND_ADDR) ? 1 : 0;
+	u8 n_pci_type, sf_dl;
+
+	/* Strictly receive only frames with the configured MTU size
+	 * => clear separation of CAN2.0 / CAN FD transport channels
+	 */
+	if (skb->len != so->ll.mtu)
+		return;
+
+	cf = (struct canfd_frame *)skb->data;
+
+	/* if enabled: check reception of my configured extended address */
+	if (ae && cf->data[0] != so->opt.rx_ext_address)
+		return;
+
+	n_pci_type = cf->data[ae] & 0xF0;
+
+	if (so->opt.flags & CAN_ISOTP_HALF_DUPLEX) {
+		/* check rx/tx path half duplex expectations */
+		if ((so->tx.state != ISOTP_IDLE && n_pci_type != N_PCI_FC) ||
+		    (so->rx.state != ISOTP_IDLE && n_pci_type == N_PCI_FC))
+			return;
+	}
+
+	switch (n_pci_type) {
+	case N_PCI_FC:
+		/* tx path: flow control frame containing the FC parameters */
+		isotp_rcv_fc(so, cf, ae);
+		break;
+
+	case N_PCI_SF:
+		/* rx path: single frame
+		 *
+		 * As we do not have a rx.ll_dl configuration, we can only test
+		 * if the CAN frames payload length matches the LL_DL == 8
+		 * requirements - no matter if it's CAN 2.0 or CAN FD
+		 */
+
+		/* get the SF_DL from the N_PCI byte */
+		sf_dl = cf->data[ae] & 0x0F;
+
+		if (cf->len <= CAN_MAX_DLEN) {
+			isotp_rcv_sf(sk, cf, SF_PCI_SZ4 + ae, skb, sf_dl);
+		} else {
+			if (skb->len == CANFD_MTU) {
+				/* We have a CAN FD frame and CAN_DL is greater than 8:
+				 * Only frames with the SF_DL == 0 ESC value are valid.
+				 *
+				 * If so take care of the increased SF PCI size
+				 * (SF_PCI_SZ8) to point to the message content behind
+				 * the extended SF PCI info and get the real SF_DL
+				 * length value from the formerly first data byte.
+				 */
+				if (sf_dl == 0)
+					isotp_rcv_sf(sk, cf, SF_PCI_SZ8 + ae, skb,
+						     cf->data[SF_PCI_SZ4 + ae]);
+			}
+		}
+		break;
+
+	case N_PCI_FF:
+		/* rx path: first frame */
+		isotp_rcv_ff(sk, cf, ae);
+		break;
+
+	case N_PCI_CF:
+		/* rx path: consecutive frame */
+		isotp_rcv_cf(sk, cf, ae, skb);
+		break;
+	}
+}
+
+static void isotp_fill_dataframe(struct canfd_frame *cf, struct isotp_sock *so,
+				 int ae, int off)
+{
+	int pcilen = N_PCI_SZ + ae + off;
+	int space = so->tx.ll_dl - pcilen;
+	int num = min_t(int, so->tx.len - so->tx.idx, space);
+	int i;
+
+	cf->can_id = so->txid;
+	cf->len = num + pcilen;
+
+	if (num < space) {
+		if (so->opt.flags & CAN_ISOTP_TX_PADDING) {
+			/* user requested padding */
+			cf->len = padlen(cf->len);
+			memset(cf->data, so->opt.txpad_content, cf->len);
+		} else if (cf->len > CAN_MAX_DLEN) {
+			/* mandatory padding for CAN FD frames */
+			cf->len = padlen(cf->len);
+			memset(cf->data, CAN_ISOTP_DEFAULT_PAD_CONTENT,
+			       cf->len);
+		}
+	}
+
+	for (i = 0; i < num; i++)
+		cf->data[pcilen + i] = so->tx.buf[so->tx.idx++];
+
+	if (ae)
+		cf->data[0] = so->opt.ext_address;
+}
+
+static void isotp_create_fframe(struct canfd_frame *cf, struct isotp_sock *so,
+				int ae)
+{
+	int i;
+	int ff_pci_sz;
+
+	cf->can_id = so->txid;
+	cf->len = so->tx.ll_dl;
+	if (ae)
+		cf->data[0] = so->opt.ext_address;
+
+	/* create N_PCI bytes with 12/32 bit FF_DL data length */
+	if (so->tx.len > 4095) {
+		/* use 32 bit FF_DL notation */
+		cf->data[ae] = N_PCI_FF;
+		cf->data[ae + 1] = 0;
+		cf->data[ae + 2] = (u8)(so->tx.len >> 24) & 0xFFU;
+		cf->data[ae + 3] = (u8)(so->tx.len >> 16) & 0xFFU;
+		cf->data[ae + 4] = (u8)(so->tx.len >> 8) & 0xFFU;
+		cf->data[ae + 5] = (u8)so->tx.len & 0xFFU;
+		ff_pci_sz = FF_PCI_SZ32;
+	} else {
+		/* use 12 bit FF_DL notation */
+		cf->data[ae] = (u8)(so->tx.len >> 8) | N_PCI_FF;
+		cf->data[ae + 1] = (u8)so->tx.len & 0xFFU;
+		ff_pci_sz = FF_PCI_SZ12;
+	}
+
+	/* add first data bytes depending on ae */
+	for (i = ae + ff_pci_sz; i < so->tx.ll_dl; i++)
+		cf->data[i] = so->tx.buf[so->tx.idx++];
+
+	so->tx.sn = 1;
+	so->tx.state = ISOTP_WAIT_FIRST_FC;
+}
+
+static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer)
+{
+	struct isotp_sock *so = container_of(hrtimer, struct isotp_sock,
+					     txtimer);
+	struct sock *sk = &so->sk;
+	struct sk_buff *skb;
+	struct net_device *dev;
+	struct canfd_frame *cf;
+	enum hrtimer_restart restart = HRTIMER_NORESTART;
+	int can_send_ret;
+	int ae = (so->opt.flags & CAN_ISOTP_EXTEND_ADDR) ? 1 : 0;
+
+	switch (so->tx.state) {
+	case ISOTP_WAIT_FC:
+	case ISOTP_WAIT_FIRST_FC:
+
+		/* we did not get any flow control frame in time */
+
+		/* report 'communication error on send' */
+		sk->sk_err = ECOMM;
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_error_report(sk);
+
+		/* reset tx state */
+		so->tx.state = ISOTP_IDLE;
+		wake_up_interruptible(&so->wait);
+		break;
+
+	case ISOTP_SENDING:
+
+		/* push out the next segmented pdu */
+		dev = dev_get_by_index(sock_net(sk), so->ifindex);
+		if (!dev)
+			break;
+
+isotp_tx_burst:
+		skb = alloc_skb(so->ll.mtu + sizeof(struct can_skb_priv),
+				gfp_any());
+		if (!skb) {
+			dev_put(dev);
+			break;
+		}
+
+		can_skb_reserve(skb);
+		can_skb_prv(skb)->ifindex = dev->ifindex;
+		can_skb_prv(skb)->skbcnt = 0;
+
+		cf = (struct canfd_frame *)skb->data;
+		skb_put(skb, so->ll.mtu);
+
+		/* create consecutive frame */
+		isotp_fill_dataframe(cf, so, ae, 0);
+
+		/* place consecutive frame N_PCI in appropriate index */
+		cf->data[ae] = N_PCI_CF | so->tx.sn++;
+		so->tx.sn %= 16;
+		so->tx.bs++;
+
+		if (so->ll.mtu == CANFD_MTU)
+			cf->flags = so->ll.tx_flags;
+
+		skb->dev = dev;
+		can_skb_set_owner(skb, sk);
+
+		can_send_ret = can_send(skb, 1);
+		if (can_send_ret)
+			printk_once(KERN_NOTICE "can-isotp: %s: can_send_ret %d\n",
+				    __func__, can_send_ret);
+
+		if (so->tx.idx >= so->tx.len) {
+			/* we are done */
+			so->tx.state = ISOTP_IDLE;
+			dev_put(dev);
+			wake_up_interruptible(&so->wait);
+			break;
+		}
+
+		if (so->txfc.bs && so->tx.bs >= so->txfc.bs) {
+			/* stop and wait for FC */
+			so->tx.state = ISOTP_WAIT_FC;
+			dev_put(dev);
+			hrtimer_set_expires(&so->txtimer,
+					    ktime_add(ktime_get(),
+						      ktime_set(1, 0)));
+			restart = HRTIMER_RESTART;
+			break;
+		}
+
+		/* no gap between data frames needed => use burst mode */
+		if (!so->tx_gap)
+			goto isotp_tx_burst;
+
+		/* start timer to send next data frame with correct delay */
+		dev_put(dev);
+		hrtimer_set_expires(&so->txtimer,
+				    ktime_add(ktime_get(), so->tx_gap));
+		restart = HRTIMER_RESTART;
+		break;
+
+	default:
+		WARN_ON_ONCE(1);
+	}
+
+	return restart;
+}
+
+static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+{
+	struct sock *sk = sock->sk;
+	struct isotp_sock *so = isotp_sk(sk);
+	struct sk_buff *skb;
+	struct net_device *dev;
+	struct canfd_frame *cf;
+	int ae = (so->opt.flags & CAN_ISOTP_EXTEND_ADDR) ? 1 : 0;
+	int wait_tx_done = (so->opt.flags & CAN_ISOTP_WAIT_TX_DONE) ? 1 : 0;
+	int off;
+	int err;
+
+	if (!so->bound)
+		return -EADDRNOTAVAIL;
+
+	/* we do not support multiple buffers - for now */
+	if (so->tx.state != ISOTP_IDLE || wq_has_sleeper(&so->wait)) {
+		if (msg->msg_flags & MSG_DONTWAIT)
+			return -EAGAIN;
+
+		/* wait for complete transmission of current pdu */
+		wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+	}
+
+	if (!size || size > MAX_MSG_LENGTH)
+		return -EINVAL;
+
+	err = memcpy_from_msg(so->tx.buf, msg, size);
+	if (err < 0)
+		return err;
+
+	dev = dev_get_by_index(sock_net(sk), so->ifindex);
+	if (!dev)
+		return -ENXIO;
+
+	skb = sock_alloc_send_skb(sk, so->ll.mtu + sizeof(struct can_skb_priv),
+				  msg->msg_flags & MSG_DONTWAIT, &err);
+	if (!skb) {
+		dev_put(dev);
+		return err;
+	}
+
+	can_skb_reserve(skb);
+	can_skb_prv(skb)->ifindex = dev->ifindex;
+	can_skb_prv(skb)->skbcnt = 0;
+
+	so->tx.state = ISOTP_SENDING;
+	so->tx.len = size;
+	so->tx.idx = 0;
+
+	cf = (struct canfd_frame *)skb->data;
+	skb_put(skb, so->ll.mtu);
+
+	/* take care of a potential SF_DL ESC offset for TX_DL > 8 */
+	off = (so->tx.ll_dl > CAN_MAX_DLEN) ? 1 : 0;
+
+	/* check for single frame transmission depending on TX_DL */
+	if (size <= so->tx.ll_dl - SF_PCI_SZ4 - ae - off) {
+		/* The message size generally fits into a SingleFrame - good.
+		 *
+		 * SF_DL ESC offset optimization:
+		 *
+		 * When TX_DL is greater 8 but the message would still fit
+		 * into a 8 byte CAN frame, we can omit the offset.
+		 * This prevents a protocol caused length extension from
+		 * CAN_DL = 8 to CAN_DL = 12 due to the SF_SL ESC handling.
+		 */
+		if (size <= CAN_MAX_DLEN - SF_PCI_SZ4 - ae)
+			off = 0;
+
+		isotp_fill_dataframe(cf, so, ae, off);
+
+		/* place single frame N_PCI w/o length in appropriate index */
+		cf->data[ae] = N_PCI_SF;
+
+		/* place SF_DL size value depending on the SF_DL ESC offset */
+		if (off)
+			cf->data[SF_PCI_SZ4 + ae] = size;
+		else
+			cf->data[ae] |= size;
+
+		so->tx.state = ISOTP_IDLE;
+		wake_up_interruptible(&so->wait);
+
+		/* don't enable wait queue for a single frame transmission */
+		wait_tx_done = 0;
+	} else {
+		/* send first frame and wait for FC */
+
+		isotp_create_fframe(cf, so, ae);
+
+		/* start timeout for FC */
+		hrtimer_start(&so->txtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT);
+	}
+
+	/* send the first or only CAN frame */
+	if (so->ll.mtu == CANFD_MTU)
+		cf->flags = so->ll.tx_flags;
+
+	skb->dev = dev;
+	skb->sk = sk;
+	err = can_send(skb, 1);
+	dev_put(dev);
+	if (err) {
+		printk_once(KERN_NOTICE "can-isotp: %s: can_send_ret %d\n",
+			    __func__, err);
+		return err;
+	}
+
+	if (wait_tx_done) {
+		/* wait for complete transmission of current pdu */
+		wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+	}
+
+	return size;
+}
+
+static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+			 int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sk_buff *skb;
+	int err = 0;
+	int noblock;
+
+	noblock = flags & MSG_DONTWAIT;
+	flags &= ~MSG_DONTWAIT;
+
+	skb = skb_recv_datagram(sk, flags, noblock, &err);
+	if (!skb)
+		return err;
+
+	if (size < skb->len)
+		msg->msg_flags |= MSG_TRUNC;
+	else
+		size = skb->len;
+
+	err = memcpy_to_msg(msg, skb->data, size);
+	if (err < 0) {
+		skb_free_datagram(sk, skb);
+		return err;
+	}
+
+	sock_recv_timestamp(msg, sk, skb);
+
+	if (msg->msg_name) {
+		msg->msg_namelen = sizeof(struct sockaddr_can);
+		memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
+	}
+
+	skb_free_datagram(sk, skb);
+
+	return size;
+}
+
+static int isotp_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct isotp_sock *so;
+	struct net *net;
+
+	if (!sk)
+		return 0;
+
+	so = isotp_sk(sk);
+	net = sock_net(sk);
+
+	/* wait for complete transmission of current pdu */
+	wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+
+	unregister_netdevice_notifier(&so->notifier);
+
+	lock_sock(sk);
+
+	hrtimer_cancel(&so->txtimer);
+	hrtimer_cancel(&so->rxtimer);
+
+	/* remove current filters & unregister */
+	if (so->bound) {
+		if (so->ifindex) {
+			struct net_device *dev;
+
+			dev = dev_get_by_index(net, so->ifindex);
+			if (dev) {
+				can_rx_unregister(net, dev, so->rxid,
+						  SINGLE_MASK(so->rxid),
+						  isotp_rcv, sk);
+				dev_put(dev);
+			}
+		}
+	}
+
+	so->ifindex = 0;
+	so->bound = 0;
+
+	sock_orphan(sk);
+	sock->sk = NULL;
+
+	release_sock(sk);
+	sock_put(sk);
+
+	return 0;
+}
+
+static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
+{
+	struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
+	struct sock *sk = sock->sk;
+	struct isotp_sock *so = isotp_sk(sk);
+	struct net *net = sock_net(sk);
+	int ifindex;
+	struct net_device *dev;
+	int err = 0;
+	int notify_enetdown = 0;
+
+	if (len < CAN_REQUIRED_SIZE(struct sockaddr_can, can_addr.tp))
+		return -EINVAL;
+
+	if (addr->can_addr.tp.rx_id == addr->can_addr.tp.tx_id)
+		return -EADDRNOTAVAIL;
+
+	if ((addr->can_addr.tp.rx_id | addr->can_addr.tp.tx_id) &
+	    (CAN_ERR_FLAG | CAN_RTR_FLAG))
+		return -EADDRNOTAVAIL;
+
+	if (!addr->can_ifindex)
+		return -ENODEV;
+
+	lock_sock(sk);
+
+	if (so->bound && addr->can_ifindex == so->ifindex &&
+	    addr->can_addr.tp.rx_id == so->rxid &&
+	    addr->can_addr.tp.tx_id == so->txid)
+		goto out;
+
+	dev = dev_get_by_index(net, addr->can_ifindex);
+	if (!dev) {
+		err = -ENODEV;
+		goto out;
+	}
+	if (dev->type != ARPHRD_CAN) {
+		dev_put(dev);
+		err = -ENODEV;
+		goto out;
+	}
+	if (dev->mtu < so->ll.mtu) {
+		dev_put(dev);
+		err = -EINVAL;
+		goto out;
+	}
+	if (!(dev->flags & IFF_UP))
+		notify_enetdown = 1;
+
+	ifindex = dev->ifindex;
+
+	can_rx_register(net, dev, addr->can_addr.tp.rx_id,
+			SINGLE_MASK(addr->can_addr.tp.rx_id), isotp_rcv, sk,
+			"isotp", sk);
+
+	dev_put(dev);
+
+	if (so->bound) {
+		/* unregister old filter */
+		if (so->ifindex) {
+			dev = dev_get_by_index(net, so->ifindex);
+			if (dev) {
+				can_rx_unregister(net, dev, so->rxid,
+						  SINGLE_MASK(so->rxid),
+						  isotp_rcv, sk);
+				dev_put(dev);
+			}
+		}
+	}
+
+	/* switch to new settings */
+	so->ifindex = ifindex;
+	so->rxid = addr->can_addr.tp.rx_id;
+	so->txid = addr->can_addr.tp.tx_id;
+	so->bound = 1;
+
+out:
+	release_sock(sk);
+
+	if (notify_enetdown) {
+		sk->sk_err = ENETDOWN;
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_error_report(sk);
+	}
+
+	return err;
+}
+
+static int isotp_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
+{
+	struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
+	struct sock *sk = sock->sk;
+	struct isotp_sock *so = isotp_sk(sk);
+
+	if (peer)
+		return -EOPNOTSUPP;
+
+	addr->can_family = AF_CAN;
+	addr->can_ifindex = so->ifindex;
+	addr->can_addr.tp.rx_id = so->rxid;
+	addr->can_addr.tp.tx_id = so->txid;
+
+	return sizeof(*addr);
+}
+
+static int isotp_setsockopt(struct socket *sock, int level, int optname,
+			    sockptr_t optval, unsigned int optlen)
+{
+	struct sock *sk = sock->sk;
+	struct isotp_sock *so = isotp_sk(sk);
+	int ret = 0;
+
+	if (level != SOL_CAN_ISOTP)
+		return -EINVAL;
+
+	switch (optname) {
+	case CAN_ISOTP_OPTS:
+		if (optlen != sizeof(struct can_isotp_options))
+			return -EINVAL;
+
+		if (copy_from_sockptr(&so->opt, optval, optlen))
+			return -EFAULT;
+
+		/* no separate rx_ext_address is given => use ext_address */
+		if (!(so->opt.flags & CAN_ISOTP_RX_EXT_ADDR))
+			so->opt.rx_ext_address = so->opt.ext_address;
+		break;
+
+	case CAN_ISOTP_RECV_FC:
+		if (optlen != sizeof(struct can_isotp_fc_options))
+			return -EINVAL;
+
+		if (copy_from_sockptr(&so->rxfc, optval, optlen))
+			return -EFAULT;
+		break;
+
+	case CAN_ISOTP_TX_STMIN:
+		if (optlen != sizeof(u32))
+			return -EINVAL;
+
+		if (copy_from_sockptr(&so->force_tx_stmin, optval, optlen))
+			return -EFAULT;
+		break;
+
+	case CAN_ISOTP_RX_STMIN:
+		if (optlen != sizeof(u32))
+			return -EINVAL;
+
+		if (copy_from_sockptr(&so->force_rx_stmin, optval, optlen))
+			return -EFAULT;
+		break;
+
+	case CAN_ISOTP_LL_OPTS:
+		if (optlen == sizeof(struct can_isotp_ll_options)) {
+			struct can_isotp_ll_options ll;
+
+			if (copy_from_sockptr(&ll, optval, optlen))
+				return -EFAULT;
+
+			/* check for correct ISO 11898-1 DLC data length */
+			if (ll.tx_dl != padlen(ll.tx_dl))
+				return -EINVAL;
+
+			if (ll.mtu != CAN_MTU && ll.mtu != CANFD_MTU)
+				return -EINVAL;
+
+			if (ll.mtu == CAN_MTU && ll.tx_dl > CAN_MAX_DLEN)
+				return -EINVAL;
+
+			memcpy(&so->ll, &ll, sizeof(ll));
+
+			/* set ll_dl for tx path to similar place as for rx */
+			so->tx.ll_dl = ll.tx_dl;
+		} else {
+			return -EINVAL;
+		}
+		break;
+
+	default:
+		ret = -ENOPROTOOPT;
+	}
+
+	return ret;
+}
+
+static int isotp_getsockopt(struct socket *sock, int level, int optname,
+			    char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	struct isotp_sock *so = isotp_sk(sk);
+	int len;
+	void *val;
+
+	if (level != SOL_CAN_ISOTP)
+		return -EINVAL;
+	if (get_user(len, optlen))
+		return -EFAULT;
+	if (len < 0)
+		return -EINVAL;
+
+	switch (optname) {
+	case CAN_ISOTP_OPTS:
+		len = min_t(int, len, sizeof(struct can_isotp_options));
+		val = &so->opt;
+		break;
+
+	case CAN_ISOTP_RECV_FC:
+		len = min_t(int, len, sizeof(struct can_isotp_fc_options));
+		val = &so->rxfc;
+		break;
+
+	case CAN_ISOTP_TX_STMIN:
+		len = min_t(int, len, sizeof(u32));
+		val = &so->force_tx_stmin;
+		break;
+
+	case CAN_ISOTP_RX_STMIN:
+		len = min_t(int, len, sizeof(u32));
+		val = &so->force_rx_stmin;
+		break;
+
+	case CAN_ISOTP_LL_OPTS:
+		len = min_t(int, len, sizeof(struct can_isotp_ll_options));
+		val = &so->ll;
+		break;
+
+	default:
+		return -ENOPROTOOPT;
+	}
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, val, len))
+		return -EFAULT;
+	return 0;
+}
+
+static int isotp_notifier(struct notifier_block *nb, unsigned long msg,
+			  void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct isotp_sock *so = container_of(nb, struct isotp_sock, notifier);
+	struct sock *sk = &so->sk;
+
+	if (!net_eq(dev_net(dev), sock_net(sk)))
+		return NOTIFY_DONE;
+
+	if (dev->type != ARPHRD_CAN)
+		return NOTIFY_DONE;
+
+	if (so->ifindex != dev->ifindex)
+		return NOTIFY_DONE;
+
+	switch (msg) {
+	case NETDEV_UNREGISTER:
+		lock_sock(sk);
+		/* remove current filters & unregister */
+		if (so->bound)
+			can_rx_unregister(dev_net(dev), dev, so->rxid,
+					  SINGLE_MASK(so->rxid),
+					  isotp_rcv, sk);
+
+		so->ifindex = 0;
+		so->bound  = 0;
+		release_sock(sk);
+
+		sk->sk_err = ENODEV;
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_error_report(sk);
+		break;
+
+	case NETDEV_DOWN:
+		sk->sk_err = ENETDOWN;
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_error_report(sk);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static int isotp_init(struct sock *sk)
+{
+	struct isotp_sock *so = isotp_sk(sk);
+
+	so->ifindex = 0;
+	so->bound = 0;
+
+	so->opt.flags = CAN_ISOTP_DEFAULT_FLAGS;
+	so->opt.ext_address = CAN_ISOTP_DEFAULT_EXT_ADDRESS;
+	so->opt.rx_ext_address = CAN_ISOTP_DEFAULT_EXT_ADDRESS;
+	so->opt.rxpad_content = CAN_ISOTP_DEFAULT_PAD_CONTENT;
+	so->opt.txpad_content = CAN_ISOTP_DEFAULT_PAD_CONTENT;
+	so->opt.frame_txtime = CAN_ISOTP_DEFAULT_FRAME_TXTIME;
+	so->rxfc.bs = CAN_ISOTP_DEFAULT_RECV_BS;
+	so->rxfc.stmin = CAN_ISOTP_DEFAULT_RECV_STMIN;
+	so->rxfc.wftmax = CAN_ISOTP_DEFAULT_RECV_WFTMAX;
+	so->ll.mtu = CAN_ISOTP_DEFAULT_LL_MTU;
+	so->ll.tx_dl = CAN_ISOTP_DEFAULT_LL_TX_DL;
+	so->ll.tx_flags = CAN_ISOTP_DEFAULT_LL_TX_FLAGS;
+
+	/* set ll_dl for tx path to similar place as for rx */
+	so->tx.ll_dl = so->ll.tx_dl;
+
+	so->rx.state = ISOTP_IDLE;
+	so->tx.state = ISOTP_IDLE;
+
+	hrtimer_init(&so->rxtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT);
+	so->rxtimer.function = isotp_rx_timer_handler;
+	hrtimer_init(&so->txtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT);
+	so->txtimer.function = isotp_tx_timer_handler;
+
+	init_waitqueue_head(&so->wait);
+
+	so->notifier.notifier_call = isotp_notifier;
+	register_netdevice_notifier(&so->notifier);
+
+	return 0;
+}
+
+static int isotp_sock_no_ioctlcmd(struct socket *sock, unsigned int cmd,
+				  unsigned long arg)
+{
+	/* no ioctls for socket layer -> hand it down to NIC layer */
+	return -ENOIOCTLCMD;
+}
+
+static const struct proto_ops isotp_ops = {
+	.family = PF_CAN,
+	.release = isotp_release,
+	.bind = isotp_bind,
+	.connect = sock_no_connect,
+	.socketpair = sock_no_socketpair,
+	.accept = sock_no_accept,
+	.getname = isotp_getname,
+	.poll = datagram_poll,
+	.ioctl = isotp_sock_no_ioctlcmd,
+	.gettstamp = sock_gettstamp,
+	.listen = sock_no_listen,
+	.shutdown = sock_no_shutdown,
+	.setsockopt = isotp_setsockopt,
+	.getsockopt = isotp_getsockopt,
+	.sendmsg = isotp_sendmsg,
+	.recvmsg = isotp_recvmsg,
+	.mmap = sock_no_mmap,
+	.sendpage = sock_no_sendpage,
+};
+
+static struct proto isotp_proto __read_mostly = {
+	.name = "CAN_ISOTP",
+	.owner = THIS_MODULE,
+	.obj_size = sizeof(struct isotp_sock),
+	.init = isotp_init,
+};
+
+static const struct can_proto isotp_can_proto = {
+	.type = SOCK_DGRAM,
+	.protocol = CAN_ISOTP,
+	.ops = &isotp_ops,
+	.prot = &isotp_proto,
+};
+
+static __init int isotp_module_init(void)
+{
+	int err;
+
+	pr_info("can: isotp protocol (rev " CAN_ISOTP_VERSION ")\n");
+
+	err = can_proto_register(&isotp_can_proto);
+	if (err < 0)
+		pr_err("can: registration of isotp protocol failed\n");
+
+	return err;
+}
+
+static __exit void isotp_module_exit(void)
+{
+	can_proto_unregister(&isotp_can_proto);
+}
+
+module_init(isotp_module_init);
+module_exit(isotp_module_exit);
-- 
cgit v1.2.3


From ec5722adb8b23d851a77412004e8faae4f83dfd0 Mon Sep 17 00:00:00 2001
From: Simon Ser <contact@emersion.fr>
Date: Wed, 24 Jun 2020 13:01:31 +0000
Subject: drm/fourcc: document modifier uniqueness requirements
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There have suggestions to bake pitch alignment, address alignment,
contiguous memory or other placement (hidden VRAM, GTT/BAR, etc)
constraints into modifiers. Last time this was brought up it seemed
like the consensus was to not allow this. Document this in drm_fourcc.h.

There are several reasons for this.

- Encoding all of these constraints in the modifiers would explode the
  search space pretty quickly (we only have 64 bits to work with).
- Modifiers need to be unambiguous: a buffer can only have a single
  modifier.
- Modifier users aren't expected to parse modifiers (except drivers).

v2: add paragraph about aliases (Daniel)

v3: fix unrelated changes sent with the patch

v4: disambiguate users between driver and higher-level programs (Brian,
Daniel)

v5: fix AFBC example (Brian, Daniel)

v6: remove duplicated paragraph (Daniel)

Signed-off-by: Simon Ser <contact@emersion.fr>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Brian Starkey <brian.starkey@arm.com>
Cc: Daniel Stone <daniel@fooishbar.org>
Cc: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Cc: Dave Airlie <airlied@gmail.com>
Cc: Marek Olšák <maraeo@gmail.com>
Cc: Alex Deucher <alexdeucher@gmail.com>
Cc: Neil Armstrong <narmstrong@baylibre.com>
Cc: Michel Dänzer <michel@daenzer.net>
Link: https://patchwork.freedesktop.org/patch/msgid/MGwgeXojKNdNXjCxuMhRlwcJM4vdYph_WJcMeGPPGMcRKtHV41XAXlh2tCc-pPJZCAhS3gwbWMWTd8f03NBA2ZYKfr0QxLhcPivpopr5c6M=@emersion.fr
---
 include/uapi/drm/drm_fourcc.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index 82f327801267..6f0628eb13a6 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -58,6 +58,30 @@ extern "C" {
  * may preserve meaning - such as number of planes - from the fourcc code,
  * whereas others may not.
  *
+ * Modifiers must uniquely encode buffer layout. In other words, a buffer must
+ * match only a single modifier. A modifier must not be a subset of layouts of
+ * another modifier. For instance, it's incorrect to encode pitch alignment in
+ * a modifier: a buffer may match a 64-pixel aligned modifier and a 32-pixel
+ * aligned modifier. That said, modifiers can have implicit minimal
+ * requirements.
+ *
+ * For modifiers where the combination of fourcc code and modifier can alias,
+ * a canonical pair needs to be defined and used by all drivers. Preferred
+ * combinations are also encouraged where all combinations might lead to
+ * confusion and unnecessarily reduced interoperability. An example for the
+ * latter is AFBC, where the ABGR layouts are preferred over ARGB layouts.
+ *
+ * There are two kinds of modifier users:
+ *
+ * - Kernel and user-space drivers: for drivers it's important that modifiers
+ *   don't alias, otherwise two drivers might support the same format but use
+ *   different aliases, preventing them from sharing buffers in an efficient
+ *   format.
+ * - Higher-level programs interfacing with KMS/GBM/EGL/Vulkan/etc: these users
+ *   see modifiers as opaque tokens they can check for equality and intersect.
+ *   These users musn't need to know to reason about the modifier value
+ *   (i.e. they are not expected to extract information out of the modifier).
+ *
  * Vendors should document their modifier usage in as much detail as
  * possible, to ensure maximum compatibility across devices, drivers and
  * applications.
-- 
cgit v1.2.3


From eca43ee6c46db92dd850ce659316b0680d70e137 Mon Sep 17 00:00:00 2001
From: "Nikita V. Shirokov" <tehnerd@tehnerd.com>
Date: Fri, 9 Oct 2020 07:03:25 +0000
Subject: bpf: Add tcp_notsent_lowat bpf setsockopt

Adding support for TCP_NOTSENT_LOWAT sockoption (https://lwn.net/Articles/560082/)
in tcp bpf programs.

Signed-off-by: Nikita V. Shirokov <tehnerd@tehnerd.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20201009070325.226855-1-tehnerd@tehnerd.com
---
 include/uapi/linux/bpf.h                          |  2 +-
 net/core/filter.c                                 |  4 ++++
 tools/include/uapi/linux/bpf.h                    |  2 +-
 tools/testing/selftests/bpf/progs/connect4_prog.c | 19 +++++++++++++++++++
 4 files changed, 25 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d83561e8cd2c..42d2df799397 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1698,7 +1698,7 @@ union bpf_attr {
  * 		  **TCP_CONGESTION**, **TCP_BPF_IW**,
  * 		  **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
  * 		  **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
- * 		  **TCP_SYNCNT**, **TCP_USER_TIMEOUT**.
+ *		  **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**.
  * 		* **IPPROTO_IP**, which supports *optname* **IP_TOS**.
  * 		* **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
  * 	Return
diff --git a/net/core/filter.c b/net/core/filter.c
index 05df73780dd3..5da44b11e1ec 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4827,6 +4827,10 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
 				else
 					icsk->icsk_user_timeout = val;
 				break;
+			case TCP_NOTSENT_LOWAT:
+				tp->notsent_lowat = val;
+				sk->sk_write_space(sk);
+				break;
 			default:
 				ret = -EINVAL;
 			}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index d83561e8cd2c..42d2df799397 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1698,7 +1698,7 @@ union bpf_attr {
  * 		  **TCP_CONGESTION**, **TCP_BPF_IW**,
  * 		  **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
  * 		  **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
- * 		  **TCP_SYNCNT**, **TCP_USER_TIMEOUT**.
+ *		  **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**.
  * 		* **IPPROTO_IP**, which supports *optname* **IP_TOS**.
  * 		* **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
  * 	Return
diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
index b1b2773c0b9d..a943d394fd3a 100644
--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
@@ -23,6 +23,10 @@
 #define TCP_CA_NAME_MAX 16
 #endif
 
+#ifndef TCP_NOTSENT_LOWAT
+#define TCP_NOTSENT_LOWAT 25
+#endif
+
 #ifndef IFNAMSIZ
 #define IFNAMSIZ 16
 #endif
@@ -128,6 +132,18 @@ static __inline int set_keepalive(struct bpf_sock_addr *ctx)
 	return 0;
 }
 
+static __inline int set_notsent_lowat(struct bpf_sock_addr *ctx)
+{
+	int lowat = 65535;
+
+	if (ctx->type == SOCK_STREAM) {
+		if (bpf_setsockopt(ctx, SOL_TCP, TCP_NOTSENT_LOWAT, &lowat, sizeof(lowat)))
+			return 1;
+	}
+
+	return 0;
+}
+
 SEC("cgroup/connect4")
 int connect_v4_prog(struct bpf_sock_addr *ctx)
 {
@@ -148,6 +164,9 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
 	if (set_keepalive(ctx))
 		return 0;
 
+	if (set_notsent_lowat(ctx))
+		return 0;
+
 	if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
 		return 0;
 	else if (ctx->type == SOCK_STREAM)
-- 
cgit v1.2.3


From 8858e8d98d5457ba23bcd0d99ce23e272b8b09a1 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Fri, 9 Oct 2020 18:07:14 +0900
Subject: block: fix uapi blkzoned.h comments

Update the kdoc comments for struct blk_zone (capacity field description
missing) and for struct blk_zone_report (flags field description
missing).

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/blkzoned.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
index 42c3366cc25f..656a326821a2 100644
--- a/include/uapi/linux/blkzoned.h
+++ b/include/uapi/linux/blkzoned.h
@@ -93,12 +93,15 @@ enum blk_zone_report_flags {
  * @non_seq: Flag indicating that the zone is using non-sequential resources
  *           (for host-aware zoned block devices only).
  * @reset: Flag indicating that a zone reset is recommended.
- * @reserved: Padding to 64 B to match the ZBC/ZAC defined zone descriptor size.
+ * @resv: Padding for 8B alignment.
+ * @capacity: Zone usable capacity in 512 B sector units
+ * @reserved: Padding to 64 B to match the ZBC, ZAC and ZNS defined zone
+ *            descriptor size.
  *
- * start, len and wp use the regular 512 B sector unit, regardless of the
- * device logical block size. The overall structure size is 64 B to match the
- * ZBC/ZAC defined zone descriptor and allow support for future additional
- * zone information.
+ * start, len, capacity and wp use the regular 512 B sector unit, regardless
+ * of the device logical block size. The overall structure size is 64 B to
+ * match the ZBC, ZAC and ZNS defined zone descriptor and allow support for
+ * future additional zone information.
  */
 struct blk_zone {
 	__u64	start;		/* Zone start sector */
@@ -118,7 +121,7 @@ struct blk_zone {
  *
  * @sector: starting sector of report
  * @nr_zones: IN maximum / OUT actual
- * @reserved: padding to 16 byte alignment
+ * @flags: one or more flags as defined by enum blk_zone_report_flags.
  * @zones: Space to hold @nr_zones @zones entries on reply.
  *
  * The array of at most @nr_zones must follow this structure in memory.
-- 
cgit v1.2.3


From ccdf07219da6bd1f43c6ddcde4c0e36993c7365a Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Wed, 7 Oct 2020 09:00:43 +0300
Subject: devlink: Add reload action option to devlink reload command

Add devlink reload action to allow the user to request a specific reload
action. The action parameter is optional, if not specified then devlink
driver re-init action is used (backward compatible).
Note that when required to do firmware activation some drivers may need
to reload the driver. On the other hand some drivers may need to reset
the firmware to reinitialize the driver entities. Therefore, the devlink
reload command returns the actions which were actually performed.
Reload actions supported are:
driver_reinit: driver entities re-initialization, applying devlink-param
               and devlink-resource values.
fw_activate: firmware activate.

command examples:
$devlink dev reload pci/0000:82:00.0 action driver_reinit
reload_actions_performed:
  driver_reinit

$devlink dev reload pci/0000:82:00.0 action fw_activate
reload_actions_performed:
  driver_reinit fw_activate

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx4/main.c         |  7 +-
 drivers/net/ethernet/mellanox/mlx5/core/devlink.c |  7 +-
 drivers/net/ethernet/mellanox/mlxsw/core.c        | 10 ++-
 drivers/net/netdevsim/dev.c                       |  8 +-
 include/net/devlink.h                             |  7 +-
 include/uapi/linux/devlink.h                      | 13 +++
 net/core/devlink.c                                | 98 +++++++++++++++++++++--
 7 files changed, 131 insertions(+), 19 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 70cf24ba71e4..649c5323cf9f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -3946,6 +3946,7 @@ static int mlx4_restart_one_up(struct pci_dev *pdev, bool reload,
 			       struct devlink *devlink);
 
 static int mlx4_devlink_reload_down(struct devlink *devlink, bool netns_change,
+				    enum devlink_reload_action action,
 				    struct netlink_ext_ack *extack)
 {
 	struct mlx4_priv *priv = devlink_priv(devlink);
@@ -3962,14 +3963,15 @@ static int mlx4_devlink_reload_down(struct devlink *devlink, bool netns_change,
 	return 0;
 }
 
-static int mlx4_devlink_reload_up(struct devlink *devlink,
-				  struct netlink_ext_ack *extack)
+static int mlx4_devlink_reload_up(struct devlink *devlink, enum devlink_reload_action action,
+				  u32 *actions_performed, struct netlink_ext_ack *extack)
 {
 	struct mlx4_priv *priv = devlink_priv(devlink);
 	struct mlx4_dev *dev = &priv->dev;
 	struct mlx4_dev_persistent *persist = dev->persist;
 	int err;
 
+	*actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
 	err = mlx4_restart_one_up(persist->pdev, true, devlink);
 	if (err)
 		mlx4_err(persist->dev, "mlx4_restart_one_up failed, ret=%d\n",
@@ -3980,6 +3982,7 @@ static int mlx4_devlink_reload_up(struct devlink *devlink,
 
 static const struct devlink_ops mlx4_devlink_ops = {
 	.port_type_set	= mlx4_devlink_port_type_set,
+	.reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT),
 	.reload_down	= mlx4_devlink_reload_down,
 	.reload_up	= mlx4_devlink_reload_up,
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index 9b14e3f805a2..1b248c01a209 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -85,6 +85,7 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
 }
 
 static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
+				    enum devlink_reload_action action,
 				    struct netlink_ext_ack *extack)
 {
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
@@ -93,11 +94,12 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
 	return 0;
 }
 
-static int mlx5_devlink_reload_up(struct devlink *devlink,
-				  struct netlink_ext_ack *extack)
+static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_action action,
+				  u32 *actions_performed, struct netlink_ext_ack *extack)
 {
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
 
+	*actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
 	return mlx5_load_one(dev, false);
 }
 
@@ -114,6 +116,7 @@ static const struct devlink_ops mlx5_devlink_ops = {
 #endif
 	.flash_update = mlx5_devlink_flash_update,
 	.info_get = mlx5_devlink_info_get,
+	.reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT),
 	.reload_down = mlx5_devlink_reload_down,
 	.reload_up = mlx5_devlink_reload_up,
 };
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index a21afa56e3f7..cd9f56c73827 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1414,7 +1414,7 @@ mlxsw_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
 
 static int
 mlxsw_devlink_core_bus_device_reload_down(struct devlink *devlink,
-					  bool netns_change,
+					  bool netns_change, enum devlink_reload_action action,
 					  struct netlink_ext_ack *extack)
 {
 	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
@@ -1427,11 +1427,13 @@ mlxsw_devlink_core_bus_device_reload_down(struct devlink *devlink,
 }
 
 static int
-mlxsw_devlink_core_bus_device_reload_up(struct devlink *devlink,
-					struct netlink_ext_ack *extack)
+mlxsw_devlink_core_bus_device_reload_up(struct devlink *devlink, enum devlink_reload_action action,
+					u32 *actions_performed,	struct netlink_ext_ack *extack)
 {
 	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
 
+	*actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) |
+			     BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE);
 	return mlxsw_core_bus_device_register(mlxsw_core->bus_info,
 					      mlxsw_core->bus,
 					      mlxsw_core->bus_priv, true,
@@ -1564,6 +1566,8 @@ mlxsw_devlink_trap_policer_counter_get(struct devlink *devlink,
 }
 
 static const struct devlink_ops mlxsw_devlink_ops = {
+	.reload_actions		= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) |
+				  BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE),
 	.reload_down		= mlxsw_devlink_core_bus_device_reload_down,
 	.reload_up		= mlxsw_devlink_core_bus_device_reload_up,
 	.port_type_set			= mlxsw_devlink_port_type_set,
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index 56213ba151f6..b57e35c4ef6f 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -701,7 +701,7 @@ static int nsim_dev_reload_create(struct nsim_dev *nsim_dev,
 static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev);
 
 static int nsim_dev_reload_down(struct devlink *devlink, bool netns_change,
-				struct netlink_ext_ack *extack)
+				enum devlink_reload_action action, struct netlink_ext_ack *extack)
 {
 	struct nsim_dev *nsim_dev = devlink_priv(devlink);
 
@@ -717,8 +717,8 @@ static int nsim_dev_reload_down(struct devlink *devlink, bool netns_change,
 	return 0;
 }
 
-static int nsim_dev_reload_up(struct devlink *devlink,
-			      struct netlink_ext_ack *extack)
+static int nsim_dev_reload_up(struct devlink *devlink, enum devlink_reload_action action,
+			      u32 *actions_performed, struct netlink_ext_ack *extack)
 {
 	struct nsim_dev *nsim_dev = devlink_priv(devlink);
 
@@ -730,6 +730,7 @@ static int nsim_dev_reload_up(struct devlink *devlink,
 		return -EINVAL;
 	}
 
+	*actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
 	return nsim_dev_reload_create(nsim_dev, extack);
 }
 
@@ -886,6 +887,7 @@ nsim_dev_devlink_trap_policer_counter_get(struct devlink *devlink,
 static const struct devlink_ops nsim_dev_devlink_ops = {
 	.supported_flash_update_params = DEVLINK_SUPPORT_FLASH_UPDATE_COMPONENT |
 					 DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK,
+	.reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT),
 	.reload_down = nsim_dev_reload_down,
 	.reload_up = nsim_dev_reload_up,
 	.info_get = nsim_dev_info_get,
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 237ba5e29a3b..93c535ae5a4b 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1150,10 +1150,11 @@ struct devlink_ops {
 	 * implemementation.
 	 */
 	u32 supported_flash_update_params;
+	unsigned long reload_actions;
 	int (*reload_down)(struct devlink *devlink, bool netns_change,
-			   struct netlink_ext_ack *extack);
-	int (*reload_up)(struct devlink *devlink,
-			 struct netlink_ext_ack *extack);
+			   enum devlink_reload_action action, struct netlink_ext_ack *extack);
+	int (*reload_up)(struct devlink *devlink, enum devlink_reload_action action,
+			 u32 *actions_performed, struct netlink_ext_ack *extack);
 	int (*port_type_set)(struct devlink_port *devlink_port,
 			     enum devlink_port_type port_type);
 	int (*port_split)(struct devlink *devlink, unsigned int port_index,
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 5f1d6c327670..74bdad252c36 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -301,6 +301,16 @@ enum {
 	DEVLINK_ATTR_TRAP_METADATA_TYPE_FA_COOKIE,
 };
 
+enum devlink_reload_action {
+	DEVLINK_RELOAD_ACTION_UNSPEC,
+	DEVLINK_RELOAD_ACTION_DRIVER_REINIT,	/* Driver entities re-instantiation */
+	DEVLINK_RELOAD_ACTION_FW_ACTIVATE,	/* FW activate */
+
+	/* Add new reload actions above */
+	__DEVLINK_RELOAD_ACTION_MAX,
+	DEVLINK_RELOAD_ACTION_MAX = __DEVLINK_RELOAD_ACTION_MAX - 1
+};
+
 enum devlink_attr {
 	/* don't change the order or add anything between, this is ABI! */
 	DEVLINK_ATTR_UNSPEC,
@@ -493,6 +503,9 @@ enum devlink_attr {
 	DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT,	/* u64 */
 	DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK,	/* bitfield32 */
 
+	DEVLINK_ATTR_RELOAD_ACTION,		/* u8 */
+	DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED,	/* bitfield32 */
+
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 5c45b3964ec3..c026ed3519c9 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -479,6 +479,12 @@ static int devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
 	return 0;
 }
 
+static bool
+devlink_reload_action_is_supported(struct devlink *devlink, enum devlink_reload_action action)
+{
+	return test_bit(action, &devlink->ops->reload_actions);
+}
+
 static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
 			   enum devlink_command cmd, u32 portid,
 			   u32 seq, int flags)
@@ -2984,6 +2990,7 @@ bool devlink_is_reload_failed(const struct devlink *devlink)
 EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
 
 static int devlink_reload(struct devlink *devlink, struct net *dest_net,
+			  enum devlink_reload_action action, u32 *actions_performed,
 			  struct netlink_ext_ack *extack)
 {
 	int err;
@@ -2991,22 +2998,60 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net,
 	if (!devlink->reload_enabled)
 		return -EOPNOTSUPP;
 
-	err = devlink->ops->reload_down(devlink, !!dest_net, extack);
+	err = devlink->ops->reload_down(devlink, !!dest_net, action, extack);
 	if (err)
 		return err;
 
 	if (dest_net && !net_eq(dest_net, devlink_net(devlink)))
 		devlink_reload_netns_change(devlink, dest_net);
 
-	err = devlink->ops->reload_up(devlink, extack);
+	err = devlink->ops->reload_up(devlink, action, actions_performed, extack);
 	devlink_reload_failed_set(devlink, !!err);
-	return err;
+	if (err)
+		return err;
+
+	WARN_ON(!(*actions_performed & BIT(action)));
+	return 0;
+}
+
+static int
+devlink_nl_reload_actions_performed_snd(struct devlink *devlink, u32 actions_performed,
+					enum devlink_command cmd, struct genl_info *info)
+{
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &devlink_nl_family, 0, cmd);
+	if (!hdr)
+		goto free_msg;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto nla_put_failure;
+
+	if (nla_put_bitfield32(msg, DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED, actions_performed,
+			       actions_performed))
+		goto nla_put_failure;
+	genlmsg_end(msg, hdr);
+
+	return genlmsg_reply(msg, info);
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+free_msg:
+	nlmsg_free(msg);
+	return -EMSGSIZE;
 }
 
 static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
 {
 	struct devlink *devlink = info->user_ptr[0];
+	enum devlink_reload_action action;
 	struct net *dest_net = NULL;
+	u32 actions_performed;
 	int err;
 
 	if (!devlink_reload_supported(devlink->ops))
@@ -3026,12 +3071,30 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
 			return PTR_ERR(dest_net);
 	}
 
-	err = devlink_reload(devlink, dest_net, info->extack);
+	if (info->attrs[DEVLINK_ATTR_RELOAD_ACTION])
+		action = nla_get_u8(info->attrs[DEVLINK_ATTR_RELOAD_ACTION]);
+	else
+		action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT;
+
+	if (!devlink_reload_action_is_supported(devlink, action)) {
+		NL_SET_ERR_MSG_MOD(info->extack,
+				   "Requested reload action is not supported by the driver");
+		return -EOPNOTSUPP;
+	}
+
+	err = devlink_reload(devlink, dest_net, action, &actions_performed, info->extack);
 
 	if (dest_net)
 		put_net(dest_net);
 
-	return err;
+	if (err)
+		return err;
+	/* For backward compatibility generate reply only if attributes used by user */
+	if (!info->attrs[DEVLINK_ATTR_RELOAD_ACTION])
+		return 0;
+
+	return devlink_nl_reload_actions_performed_snd(devlink, actions_performed,
+						       DEVLINK_CMD_RELOAD, info);
 }
 
 static int devlink_nl_flash_update_fill(struct sk_buff *msg,
@@ -7282,6 +7345,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_TRAP_POLICER_RATE] = { .type = NLA_U64 },
 	[DEVLINK_ATTR_TRAP_POLICER_BURST] = { .type = NLA_U64 },
 	[DEVLINK_ATTR_PORT_FUNCTION] = { .type = NLA_NESTED },
+	[DEVLINK_ATTR_RELOAD_ACTION] = NLA_POLICY_RANGE(NLA_U8, DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
+							DEVLINK_RELOAD_ACTION_MAX),
 };
 
 static const struct genl_small_ops devlink_nl_ops[] = {
@@ -7615,6 +7680,21 @@ static struct genl_family devlink_nl_family __ro_after_init = {
 	.n_mcgrps	= ARRAY_SIZE(devlink_nl_mcgrps),
 };
 
+static bool devlink_reload_actions_valid(const struct devlink_ops *ops)
+{
+	if (!devlink_reload_supported(ops)) {
+		if (WARN_ON(ops->reload_actions))
+			return false;
+		return true;
+	}
+
+	if (WARN_ON(!ops->reload_actions ||
+		    ops->reload_actions & BIT(DEVLINK_RELOAD_ACTION_UNSPEC) ||
+		    ops->reload_actions >= BIT(__DEVLINK_RELOAD_ACTION_MAX)))
+		return false;
+	return true;
+}
+
 /**
  *	devlink_alloc - Allocate new devlink instance resources
  *
@@ -7631,6 +7711,9 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
 	if (WARN_ON(!ops))
 		return NULL;
 
+	if (!devlink_reload_actions_valid(ops))
+		return NULL;
+
 	devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL);
 	if (!devlink)
 		return NULL;
@@ -9960,6 +10043,7 @@ int devlink_compat_switch_id_get(struct net_device *dev,
 static void __net_exit devlink_pernet_pre_exit(struct net *net)
 {
 	struct devlink *devlink;
+	u32 actions_performed;
 	int err;
 
 	/* In case network namespace is getting destroyed, reload
@@ -9970,7 +10054,9 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
 		if (net_eq(devlink_net(devlink), net)) {
 			if (WARN_ON(!devlink_reload_supported(devlink->ops)))
 				continue;
-			err = devlink_reload(devlink, &init_net, NULL);
+			err = devlink_reload(devlink, &init_net,
+					     DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
+					     &actions_performed, NULL);
 			if (err && err != -EOPNOTSUPP)
 				pr_warn("Failed to reload devlink instance into init_net\n");
 		}
-- 
cgit v1.2.3


From dc64cc7c63102ac78bac3cfbc00ef3abd7a3fdf3 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Wed, 7 Oct 2020 09:00:44 +0300
Subject: devlink: Add devlink reload limit option

Add reload limit to demand restrictions on reload actions.
Reload limits supported:
no_reset: No reset allowed, no down time allowed, no link flap and no
          configuration is lost.

By default reload limit is unspecified and so no constraints on reload
actions are required.

Some combinations of action and limit are invalid. For example, driver
can not reinitialize its entities without any downtime.

The no_reset reload limit will have usecase in this patchset to
implement restricted fw_activate on mlx5.

Have the uapi parameter of reload limit ready for future support of
multiselection.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx4/main.c         |  4 +-
 drivers/net/ethernet/mellanox/mlx5/core/devlink.c |  4 +-
 drivers/net/ethernet/mellanox/mlxsw/core.c        |  4 +-
 drivers/net/netdevsim/dev.c                       |  6 +-
 include/net/devlink.h                             |  8 +-
 include/uapi/linux/devlink.h                      | 14 ++++
 net/core/devlink.c                                | 92 +++++++++++++++++++++--
 7 files changed, 119 insertions(+), 13 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 649c5323cf9f..c326b434734e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -3947,6 +3947,7 @@ static int mlx4_restart_one_up(struct pci_dev *pdev, bool reload,
 
 static int mlx4_devlink_reload_down(struct devlink *devlink, bool netns_change,
 				    enum devlink_reload_action action,
+				    enum devlink_reload_limit limit,
 				    struct netlink_ext_ack *extack)
 {
 	struct mlx4_priv *priv = devlink_priv(devlink);
@@ -3964,7 +3965,8 @@ static int mlx4_devlink_reload_down(struct devlink *devlink, bool netns_change,
 }
 
 static int mlx4_devlink_reload_up(struct devlink *devlink, enum devlink_reload_action action,
-				  u32 *actions_performed, struct netlink_ext_ack *extack)
+				  enum devlink_reload_limit limit, u32 *actions_performed,
+				  struct netlink_ext_ack *extack)
 {
 	struct mlx4_priv *priv = devlink_priv(devlink);
 	struct mlx4_dev *dev = &priv->dev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index 1b248c01a209..0016041e8779 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -86,6 +86,7 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
 
 static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
 				    enum devlink_reload_action action,
+				    enum devlink_reload_limit limit,
 				    struct netlink_ext_ack *extack)
 {
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
@@ -95,7 +96,8 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
 }
 
 static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_action action,
-				  u32 *actions_performed, struct netlink_ext_ack *extack)
+				  enum devlink_reload_limit limit, u32 *actions_performed,
+				  struct netlink_ext_ack *extack)
 {
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index cd9f56c73827..7f77c2a71d1c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1415,6 +1415,7 @@ mlxsw_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
 static int
 mlxsw_devlink_core_bus_device_reload_down(struct devlink *devlink,
 					  bool netns_change, enum devlink_reload_action action,
+					  enum devlink_reload_limit limit,
 					  struct netlink_ext_ack *extack)
 {
 	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
@@ -1428,7 +1429,8 @@ mlxsw_devlink_core_bus_device_reload_down(struct devlink *devlink,
 
 static int
 mlxsw_devlink_core_bus_device_reload_up(struct devlink *devlink, enum devlink_reload_action action,
-					u32 *actions_performed,	struct netlink_ext_ack *extack)
+					enum devlink_reload_limit limit, u32 *actions_performed,
+					struct netlink_ext_ack *extack)
 {
 	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
 
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index b57e35c4ef6f..d07061417675 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -701,7 +701,8 @@ static int nsim_dev_reload_create(struct nsim_dev *nsim_dev,
 static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev);
 
 static int nsim_dev_reload_down(struct devlink *devlink, bool netns_change,
-				enum devlink_reload_action action, struct netlink_ext_ack *extack)
+				enum devlink_reload_action action, enum devlink_reload_limit limit,
+				struct netlink_ext_ack *extack)
 {
 	struct nsim_dev *nsim_dev = devlink_priv(devlink);
 
@@ -718,7 +719,8 @@ static int nsim_dev_reload_down(struct devlink *devlink, bool netns_change,
 }
 
 static int nsim_dev_reload_up(struct devlink *devlink, enum devlink_reload_action action,
-			      u32 *actions_performed, struct netlink_ext_ack *extack)
+			      enum devlink_reload_limit limit, u32 *actions_performed,
+			      struct netlink_ext_ack *extack)
 {
 	struct nsim_dev *nsim_dev = devlink_priv(devlink);
 
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 93c535ae5a4b..9f5c37c391f8 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1151,10 +1151,14 @@ struct devlink_ops {
 	 */
 	u32 supported_flash_update_params;
 	unsigned long reload_actions;
+	unsigned long reload_limits;
 	int (*reload_down)(struct devlink *devlink, bool netns_change,
-			   enum devlink_reload_action action, struct netlink_ext_ack *extack);
+			   enum devlink_reload_action action,
+			   enum devlink_reload_limit limit,
+			   struct netlink_ext_ack *extack);
 	int (*reload_up)(struct devlink *devlink, enum devlink_reload_action action,
-			 u32 *actions_performed, struct netlink_ext_ack *extack);
+			 enum devlink_reload_limit limit, u32 *actions_performed,
+			 struct netlink_ext_ack *extack);
 	int (*port_type_set)(struct devlink_port *devlink_port,
 			     enum devlink_port_type port_type);
 	int (*port_split)(struct devlink *devlink, unsigned int port_index,
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 74bdad252c36..82a5e66c1518 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -311,6 +311,19 @@ enum devlink_reload_action {
 	DEVLINK_RELOAD_ACTION_MAX = __DEVLINK_RELOAD_ACTION_MAX - 1
 };
 
+enum devlink_reload_limit {
+	DEVLINK_RELOAD_LIMIT_UNSPEC,	/* unspecified, no constraints */
+	DEVLINK_RELOAD_LIMIT_NO_RESET,	/* No reset allowed, no down time allowed,
+					 * no link flap and no configuration is lost.
+					 */
+
+	/* Add new reload limit above */
+	__DEVLINK_RELOAD_LIMIT_MAX,
+	DEVLINK_RELOAD_LIMIT_MAX = __DEVLINK_RELOAD_LIMIT_MAX - 1
+};
+
+#define DEVLINK_RELOAD_LIMITS_VALID_MASK (BIT(__DEVLINK_RELOAD_LIMIT_MAX) - 1)
+
 enum devlink_attr {
 	/* don't change the order or add anything between, this is ABI! */
 	DEVLINK_ATTR_UNSPEC,
@@ -505,6 +518,7 @@ enum devlink_attr {
 
 	DEVLINK_ATTR_RELOAD_ACTION,		/* u8 */
 	DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED,	/* bitfield32 */
+	DEVLINK_ATTR_RELOAD_LIMITS,		/* bitfield32 */
 
 	/* add new attributes above here, update the policy in devlink.c */
 
diff --git a/net/core/devlink.c b/net/core/devlink.c
index c026ed3519c9..28b63faa3c6b 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -479,12 +479,44 @@ static int devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
 	return 0;
 }
 
+struct devlink_reload_combination {
+	enum devlink_reload_action action;
+	enum devlink_reload_limit limit;
+};
+
+static const struct devlink_reload_combination devlink_reload_invalid_combinations[] = {
+	{
+		/* can't reinitialize driver with no down time */
+		.action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
+		.limit = DEVLINK_RELOAD_LIMIT_NO_RESET,
+	},
+};
+
+static bool
+devlink_reload_combination_is_invalid(enum devlink_reload_action action,
+				      enum devlink_reload_limit limit)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(devlink_reload_invalid_combinations); i++)
+		if (devlink_reload_invalid_combinations[i].action == action &&
+		    devlink_reload_invalid_combinations[i].limit == limit)
+			return true;
+	return false;
+}
+
 static bool
 devlink_reload_action_is_supported(struct devlink *devlink, enum devlink_reload_action action)
 {
 	return test_bit(action, &devlink->ops->reload_actions);
 }
 
+static bool
+devlink_reload_limit_is_supported(struct devlink *devlink, enum devlink_reload_limit limit)
+{
+	return test_bit(limit, &devlink->ops->reload_limits);
+}
+
 static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
 			   enum devlink_command cmd, u32 portid,
 			   u32 seq, int flags)
@@ -2990,22 +3022,22 @@ bool devlink_is_reload_failed(const struct devlink *devlink)
 EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
 
 static int devlink_reload(struct devlink *devlink, struct net *dest_net,
-			  enum devlink_reload_action action, u32 *actions_performed,
-			  struct netlink_ext_ack *extack)
+			  enum devlink_reload_action action, enum devlink_reload_limit limit,
+			  u32 *actions_performed, struct netlink_ext_ack *extack)
 {
 	int err;
 
 	if (!devlink->reload_enabled)
 		return -EOPNOTSUPP;
 
-	err = devlink->ops->reload_down(devlink, !!dest_net, action, extack);
+	err = devlink->ops->reload_down(devlink, !!dest_net, action, limit, extack);
 	if (err)
 		return err;
 
 	if (dest_net && !net_eq(dest_net, devlink_net(devlink)))
 		devlink_reload_netns_change(devlink, dest_net);
 
-	err = devlink->ops->reload_up(devlink, action, actions_performed, extack);
+	err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack);
 	devlink_reload_failed_set(devlink, !!err);
 	if (err)
 		return err;
@@ -3050,6 +3082,7 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
 {
 	struct devlink *devlink = info->user_ptr[0];
 	enum devlink_reload_action action;
+	enum devlink_reload_limit limit;
 	struct net *dest_net = NULL;
 	u32 actions_performed;
 	int err;
@@ -3082,7 +3115,38 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
 		return -EOPNOTSUPP;
 	}
 
-	err = devlink_reload(devlink, dest_net, action, &actions_performed, info->extack);
+	limit = DEVLINK_RELOAD_LIMIT_UNSPEC;
+	if (info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]) {
+		struct nla_bitfield32 limits;
+		u32 limits_selected;
+
+		limits = nla_get_bitfield32(info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]);
+		limits_selected = limits.value & limits.selector;
+		if (!limits_selected) {
+			NL_SET_ERR_MSG_MOD(info->extack, "Invalid limit selected");
+			return -EINVAL;
+		}
+		for (limit = 0 ; limit <= DEVLINK_RELOAD_LIMIT_MAX ; limit++)
+			if (limits_selected & BIT(limit))
+				break;
+		/* UAPI enables multiselection, but currently it is not used */
+		if (limits_selected != BIT(limit)) {
+			NL_SET_ERR_MSG_MOD(info->extack,
+					   "Multiselection of limit is not supported");
+			return -EOPNOTSUPP;
+		}
+		if (!devlink_reload_limit_is_supported(devlink, limit)) {
+			NL_SET_ERR_MSG_MOD(info->extack,
+					   "Requested limit is not supported by the driver");
+			return -EOPNOTSUPP;
+		}
+		if (devlink_reload_combination_is_invalid(action, limit)) {
+			NL_SET_ERR_MSG_MOD(info->extack,
+					   "Requested limit is invalid for this action");
+			return -EINVAL;
+		}
+	}
+	err = devlink_reload(devlink, dest_net, action, limit, &actions_performed, info->extack);
 
 	if (dest_net)
 		put_net(dest_net);
@@ -3090,7 +3154,7 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
 	if (err)
 		return err;
 	/* For backward compatibility generate reply only if attributes used by user */
-	if (!info->attrs[DEVLINK_ATTR_RELOAD_ACTION])
+	if (!info->attrs[DEVLINK_ATTR_RELOAD_ACTION] && !info->attrs[DEVLINK_ATTR_RELOAD_LIMITS])
 		return 0;
 
 	return devlink_nl_reload_actions_performed_snd(devlink, actions_performed,
@@ -7347,6 +7411,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_PORT_FUNCTION] = { .type = NLA_NESTED },
 	[DEVLINK_ATTR_RELOAD_ACTION] = NLA_POLICY_RANGE(NLA_U8, DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
 							DEVLINK_RELOAD_ACTION_MAX),
+	[DEVLINK_ATTR_RELOAD_LIMITS] = NLA_POLICY_BITFIELD32(DEVLINK_RELOAD_LIMITS_VALID_MASK),
 };
 
 static const struct genl_small_ops devlink_nl_ops[] = {
@@ -7682,6 +7747,9 @@ static struct genl_family devlink_nl_family __ro_after_init = {
 
 static bool devlink_reload_actions_valid(const struct devlink_ops *ops)
 {
+	const struct devlink_reload_combination *comb;
+	int i;
+
 	if (!devlink_reload_supported(ops)) {
 		if (WARN_ON(ops->reload_actions))
 			return false;
@@ -7692,6 +7760,17 @@ static bool devlink_reload_actions_valid(const struct devlink_ops *ops)
 		    ops->reload_actions & BIT(DEVLINK_RELOAD_ACTION_UNSPEC) ||
 		    ops->reload_actions >= BIT(__DEVLINK_RELOAD_ACTION_MAX)))
 		return false;
+
+	if (WARN_ON(ops->reload_limits & BIT(DEVLINK_RELOAD_LIMIT_UNSPEC) ||
+		    ops->reload_limits >= BIT(__DEVLINK_RELOAD_LIMIT_MAX)))
+		return false;
+
+	for (i = 0; i < ARRAY_SIZE(devlink_reload_invalid_combinations); i++)  {
+		comb = &devlink_reload_invalid_combinations[i];
+		if (ops->reload_actions == BIT(comb->action) &&
+		    ops->reload_limits == BIT(comb->limit))
+			return false;
+	}
 	return true;
 }
 
@@ -10056,6 +10135,7 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
 				continue;
 			err = devlink_reload(devlink, &init_net,
 					     DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
+					     DEVLINK_RELOAD_LIMIT_UNSPEC,
 					     &actions_performed, NULL);
 			if (err && err != -EOPNOTSUPP)
 				pr_warn("Failed to reload devlink instance into init_net\n");
-- 
cgit v1.2.3


From a254c264267e8746fb257806c166e54375cf9c06 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Wed, 7 Oct 2020 09:00:45 +0300
Subject: devlink: Add reload stats

Add reload stats to hold the history per reload action type and limit.

For example, the number of times fw_activate has been performed on this
device since the driver module was added or if the firmware activation
was performed with or without reset.

Add devlink notification on stats update.

Expose devlink reload stats to the user through devlink dev get command.

Examples:
$ devlink dev show
pci/0000:82:00.0:
  stats:
      reload:
        driver_reinit 2 fw_activate 1 fw_activate_no_reset 0
pci/0000:82:00.1:
  stats:
      reload:
        driver_reinit 1 fw_activate 0 fw_activate_no_reset 0

$ devlink dev show -jp
{
    "dev": {
        "pci/0000:82:00.0": {
            "stats": {
                "reload": {
                    "driver_reinit": 2,
                    "fw_activate": 1,
                    "fw_activate_no_reset": 0
                }
            }
        },
        "pci/0000:82:00.1": {
            "stats": {
                "reload": {
                    "driver_reinit": 1,
                    "fw_activate": 0,
                    "fw_activate_no_reset": 0
                }
            }
        }
    }
}

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/devlink.h        |  8 ++++
 include/uapi/linux/devlink.h |  6 +++
 net/core/devlink.c           | 90 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 104 insertions(+)

(limited to 'include/uapi')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 9f5c37c391f8..d091c6ba82ce 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -20,6 +20,13 @@
 #include <uapi/linux/devlink.h>
 #include <linux/xarray.h>
 
+#define DEVLINK_RELOAD_STATS_ARRAY_SIZE \
+	(__DEVLINK_RELOAD_LIMIT_MAX * __DEVLINK_RELOAD_ACTION_MAX)
+
+struct devlink_dev_stats {
+	u32 reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
+};
+
 struct devlink_ops;
 
 struct devlink {
@@ -38,6 +45,7 @@ struct devlink {
 	struct list_head trap_policer_list;
 	const struct devlink_ops *ops;
 	struct xarray snapshot_ids;
+	struct devlink_dev_stats stats;
 	struct device *dev;
 	possible_net_t _net;
 	struct mutex lock; /* Serializes access to devlink instance specific objects such as
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 82a5e66c1518..ab15fc597b74 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -520,6 +520,12 @@ enum devlink_attr {
 	DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED,	/* bitfield32 */
 	DEVLINK_ATTR_RELOAD_LIMITS,		/* bitfield32 */
 
+	DEVLINK_ATTR_DEV_STATS,			/* nested */
+	DEVLINK_ATTR_RELOAD_STATS,		/* nested */
+	DEVLINK_ATTR_RELOAD_STATS_ENTRY,	/* nested */
+	DEVLINK_ATTR_RELOAD_STATS_LIMIT,	/* u8 */
+	DEVLINK_ATTR_RELOAD_STATS_VALUE,	/* u32 */
+
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 28b63faa3c6b..a167c3bb468c 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -517,10 +517,66 @@ devlink_reload_limit_is_supported(struct devlink *devlink, enum devlink_reload_l
 	return test_bit(limit, &devlink->ops->reload_limits);
 }
 
+static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_action action,
+				   enum devlink_reload_limit limit, u32 value)
+{
+	struct nlattr *reload_stats_entry;
+
+	reload_stats_entry = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS_ENTRY);
+	if (!reload_stats_entry)
+		return -EMSGSIZE;
+
+	if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, action) ||
+	    nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) ||
+	    nla_put_u32(msg, DEVLINK_ATTR_RELOAD_STATS_VALUE, value))
+		goto nla_put_failure;
+	nla_nest_end(msg, reload_stats_entry);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, reload_stats_entry);
+	return -EMSGSIZE;
+}
+
+static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink)
+{
+	struct nlattr *reload_stats_attr;
+	int i, j, stat_idx;
+	u32 value;
+
+	reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS);
+
+	if (!reload_stats_attr)
+		return -EMSGSIZE;
+
+	for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) {
+		if (j != DEVLINK_RELOAD_LIMIT_UNSPEC &&
+		    !devlink_reload_limit_is_supported(devlink, j))
+			continue;
+		for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) {
+			if (!devlink_reload_action_is_supported(devlink, i) ||
+			    devlink_reload_combination_is_invalid(i, j))
+				continue;
+
+			stat_idx = j * __DEVLINK_RELOAD_ACTION_MAX + i;
+			value = devlink->stats.reload_stats[stat_idx];
+			if (devlink_reload_stat_put(msg, i, j, value))
+				goto nla_put_failure;
+		}
+	}
+	nla_nest_end(msg, reload_stats_attr);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, reload_stats_attr);
+	return -EMSGSIZE;
+}
+
 static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
 			   enum devlink_command cmd, u32 portid,
 			   u32 seq, int flags)
 {
+	struct nlattr *dev_stats;
 	void *hdr;
 
 	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
@@ -532,9 +588,19 @@ static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
 	if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_FAILED, devlink->reload_failed))
 		goto nla_put_failure;
 
+	dev_stats = nla_nest_start(msg, DEVLINK_ATTR_DEV_STATS);
+	if (!dev_stats)
+		goto nla_put_failure;
+
+	if (devlink_reload_stats_put(msg, devlink))
+		goto dev_stats_nest_cancel;
+
+	nla_nest_end(msg, dev_stats);
 	genlmsg_end(msg, hdr);
 	return 0;
 
+dev_stats_nest_cancel:
+	nla_nest_cancel(msg, dev_stats);
 nla_put_failure:
 	genlmsg_cancel(msg, hdr);
 	return -EMSGSIZE;
@@ -3021,6 +3087,29 @@ bool devlink_is_reload_failed(const struct devlink *devlink)
 }
 EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
 
+static void
+__devlink_reload_stats_update(struct devlink *devlink, u32 *reload_stats,
+			      enum devlink_reload_limit limit, u32 actions_performed)
+{
+	unsigned long actions = actions_performed;
+	int stat_idx;
+	int action;
+
+	for_each_set_bit(action, &actions, __DEVLINK_RELOAD_ACTION_MAX) {
+		stat_idx = limit * __DEVLINK_RELOAD_ACTION_MAX + action;
+		reload_stats[stat_idx]++;
+	}
+	devlink_notify(devlink, DEVLINK_CMD_NEW);
+}
+
+static void
+devlink_reload_stats_update(struct devlink *devlink, enum devlink_reload_limit limit,
+			    u32 actions_performed)
+{
+	__devlink_reload_stats_update(devlink, devlink->stats.reload_stats, limit,
+				      actions_performed);
+}
+
 static int devlink_reload(struct devlink *devlink, struct net *dest_net,
 			  enum devlink_reload_action action, enum devlink_reload_limit limit,
 			  u32 *actions_performed, struct netlink_ext_ack *extack)
@@ -3043,6 +3132,7 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net,
 		return err;
 
 	WARN_ON(!(*actions_performed & BIT(action)));
+	devlink_reload_stats_update(devlink, limit, *actions_performed);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 77069ba2e3adf48c472fbbd9cbd7a4f5370b17df Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Wed, 7 Oct 2020 09:00:46 +0300
Subject: devlink: Add remote reload stats

Add remote reload stats to hold the history of actions performed due
devlink reload commands initiated by remote host. For example, in case
firmware activation with reset finished successfully but was initiated
by remote host.

The function devlink_remote_reload_actions_performed() is exported to
enable drivers update on remote reload actions performed as it was not
initiated by their own devlink instance.

Expose devlink remote reload stats to the user through devlink dev get
command.

Examples:
$ devlink dev show
pci/0000:82:00.0:
  stats:
      reload:
        driver_reinit 2 fw_activate 1 fw_activate_no_reset 0
      remote_reload:
        driver_reinit 0 fw_activate 0 fw_activate_no_reset 0
pci/0000:82:00.1:
  stats:
      reload:
        driver_reinit 1 fw_activate 0 fw_activate_no_reset 0
      remote_reload:
        driver_reinit 1 fw_activate 1 fw_activate_no_reset 0

$ devlink dev show -jp
{
    "dev": {
        "pci/0000:82:00.0": {
            "stats": {
                "reload": {
                    "driver_reinit": 2,
                    "fw_activate": 1,
                    "fw_activate_no_reset": 0
                },
                "remote_reload": {
                    "driver_reinit": 0,
                    "fw_activate": 0,
                    "fw_activate_no_reset": 0
                }
            }
        },
        "pci/0000:82:00.1": {
            "stats": {
                "reload": {
                    "driver_reinit": 1,
                    "fw_activate": 0,
                    "fw_activate_no_reset": 0
                },
                "remote_reload": {
                    "driver_reinit": 1,
                    "fw_activate": 1,
                    "fw_activate_no_reset": 0
                }
            }
        }
    }
}

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/devlink.h        |  4 +++
 include/uapi/linux/devlink.h |  1 +
 net/core/devlink.c           | 60 +++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 59 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index d091c6ba82ce..d2771e57a278 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -25,6 +25,7 @@
 
 struct devlink_dev_stats {
 	u32 reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
+	u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
 };
 
 struct devlink_ops;
@@ -1567,6 +1568,9 @@ void
 devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter);
 
 bool devlink_is_reload_failed(const struct devlink *devlink);
+void devlink_remote_reload_actions_performed(struct devlink *devlink,
+					     enum devlink_reload_limit limit,
+					     u32 actions_performed);
 
 void devlink_flash_update_begin_notify(struct devlink *devlink);
 void devlink_flash_update_end_notify(struct devlink *devlink);
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index ab15fc597b74..0113bc4db9f5 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -525,6 +525,7 @@ enum devlink_attr {
 	DEVLINK_ATTR_RELOAD_STATS_ENTRY,	/* nested */
 	DEVLINK_ATTR_RELOAD_STATS_LIMIT,	/* u8 */
 	DEVLINK_ATTR_RELOAD_STATS_VALUE,	/* u32 */
+	DEVLINK_ATTR_REMOTE_RELOAD_STATS,	/* nested */
 
 	/* add new attributes above here, update the policy in devlink.c */
 
diff --git a/net/core/devlink.c b/net/core/devlink.c
index a167c3bb468c..dd889334fed9 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -538,28 +538,39 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink)
+static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink, bool is_remote)
 {
 	struct nlattr *reload_stats_attr;
 	int i, j, stat_idx;
 	u32 value;
 
-	reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS);
+	if (!is_remote)
+		reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS);
+	else
+		reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_REMOTE_RELOAD_STATS);
 
 	if (!reload_stats_attr)
 		return -EMSGSIZE;
 
 	for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) {
-		if (j != DEVLINK_RELOAD_LIMIT_UNSPEC &&
+		/* Remote stats are shown even if not locally supported. Stats
+		 * of actions with unspecified limit are shown though drivers
+		 * don't need to register unspecified limit.
+		 */
+		if (!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC &&
 		    !devlink_reload_limit_is_supported(devlink, j))
 			continue;
 		for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) {
-			if (!devlink_reload_action_is_supported(devlink, i) ||
+			if ((!is_remote && !devlink_reload_action_is_supported(devlink, i)) ||
+			    i == DEVLINK_RELOAD_ACTION_UNSPEC ||
 			    devlink_reload_combination_is_invalid(i, j))
 				continue;
 
 			stat_idx = j * __DEVLINK_RELOAD_ACTION_MAX + i;
-			value = devlink->stats.reload_stats[stat_idx];
+			if (!is_remote)
+				value = devlink->stats.reload_stats[stat_idx];
+			else
+				value = devlink->stats.remote_reload_stats[stat_idx];
 			if (devlink_reload_stat_put(msg, i, j, value))
 				goto nla_put_failure;
 		}
@@ -592,7 +603,9 @@ static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
 	if (!dev_stats)
 		goto nla_put_failure;
 
-	if (devlink_reload_stats_put(msg, devlink))
+	if (devlink_reload_stats_put(msg, devlink, false))
+		goto dev_stats_nest_cancel;
+	if (devlink_reload_stats_put(msg, devlink, true))
 		goto dev_stats_nest_cancel;
 
 	nla_nest_end(msg, dev_stats);
@@ -3110,15 +3123,47 @@ devlink_reload_stats_update(struct devlink *devlink, enum devlink_reload_limit l
 				      actions_performed);
 }
 
+/**
+ *	devlink_remote_reload_actions_performed - Update devlink on reload actions
+ *	  performed which are not a direct result of devlink reload call.
+ *
+ *	This should be called by a driver after performing reload actions in case it was not
+ *	a result of devlink reload call. For example fw_activate was performed as a result
+ *	of devlink reload triggered fw_activate on another host.
+ *	The motivation for this function is to keep data on reload actions performed on this
+ *	function whether it was done due to direct devlink reload call or not.
+ *
+ *	@devlink: devlink
+ *	@limit: reload limit
+ *	@actions_performed: bitmask of actions performed
+ */
+void devlink_remote_reload_actions_performed(struct devlink *devlink,
+					     enum devlink_reload_limit limit,
+					     u32 actions_performed)
+{
+	if (WARN_ON(!actions_performed ||
+		    actions_performed & BIT(DEVLINK_RELOAD_ACTION_UNSPEC) ||
+		    actions_performed >= BIT(__DEVLINK_RELOAD_ACTION_MAX) ||
+		    limit > DEVLINK_RELOAD_LIMIT_MAX))
+		return;
+
+	__devlink_reload_stats_update(devlink, devlink->stats.remote_reload_stats, limit,
+				      actions_performed);
+}
+EXPORT_SYMBOL_GPL(devlink_remote_reload_actions_performed);
+
 static int devlink_reload(struct devlink *devlink, struct net *dest_net,
 			  enum devlink_reload_action action, enum devlink_reload_limit limit,
 			  u32 *actions_performed, struct netlink_ext_ack *extack)
 {
+	u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
 	int err;
 
 	if (!devlink->reload_enabled)
 		return -EOPNOTSUPP;
 
+	memcpy(remote_reload_stats, devlink->stats.remote_reload_stats,
+	       sizeof(remote_reload_stats));
 	err = devlink->ops->reload_down(devlink, !!dest_net, action, limit, extack);
 	if (err)
 		return err;
@@ -3132,6 +3177,9 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net,
 		return err;
 
 	WARN_ON(!(*actions_performed & BIT(action)));
+	/* Catch driver on updating the remote action within devlink reload */
+	WARN_ON(memcmp(remote_reload_stats, devlink->stats.remote_reload_stats,
+		       sizeof(remote_reload_stats)));
 	devlink_reload_stats_update(devlink, limit, *actions_performed);
 	return 0;
 }
-- 
cgit v1.2.3


From 44f3625bc61653ea3bde9960298faf2f5518fda5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 8 Oct 2020 12:45:17 +0200
Subject: netlink: export policy in extended ACK

Add a new attribute NLMSGERR_ATTR_POLICY to the extended ACK
to advertise the policy, e.g. if an attribute was out of range,
you'll know the range that's permissible.

Add new NL_SET_ERR_MSG_ATTR_POL() and NL_SET_ERR_MSG_ATTR_POL()
macros to set this, since realistically it's only useful to do
this when the bad attribute (offset) is also returned.

Use it in lib/nlattr.c which practically does all the policy
validation.

v2:
 - add and use netlink_policy_dump_attr_size_estimate()
v3:
 - remove redundant break
v4:
 - really remove redundant break ... sorry

Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netlink.h      | 30 ++++++++++++++--------
 include/net/netlink.h        |  4 +++
 include/uapi/linux/netlink.h |  2 ++
 lib/nlattr.c                 | 35 +++++++++++++------------
 net/netlink/af_netlink.c     |  5 ++++
 net/netlink/policy.c         | 61 ++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 110 insertions(+), 27 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index e3e49f0e5c13..666cd0390699 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -68,12 +68,14 @@ netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg)
  * @_msg: message string to report - don't access directly, use
  *	%NL_SET_ERR_MSG
  * @bad_attr: attribute with error
+ * @policy: policy for a bad attribute
  * @cookie: cookie data to return to userspace (for success)
  * @cookie_len: actual cookie data length
  */
 struct netlink_ext_ack {
 	const char *_msg;
 	const struct nlattr *bad_attr;
+	const struct nla_policy *policy;
 	u8 cookie[NETLINK_MAX_COOKIE_LEN];
 	u8 cookie_len;
 };
@@ -95,21 +97,29 @@ struct netlink_ext_ack {
 #define NL_SET_ERR_MSG_MOD(extack, msg)			\
 	NL_SET_ERR_MSG((extack), KBUILD_MODNAME ": " msg)
 
-#define NL_SET_BAD_ATTR(extack, attr) do {		\
-	if ((extack))					\
+#define NL_SET_BAD_ATTR_POLICY(extack, attr, pol) do {	\
+	if ((extack)) {					\
 		(extack)->bad_attr = (attr);		\
+		(extack)->policy = (pol);		\
+	}						\
 } while (0)
 
-#define NL_SET_ERR_MSG_ATTR(extack, attr, msg) do {	\
-	static const char __msg[] = msg;		\
-	struct netlink_ext_ack *__extack = (extack);	\
-							\
-	if (__extack) {					\
-		__extack->_msg = __msg;			\
-		__extack->bad_attr = (attr);		\
-	}						\
+#define NL_SET_BAD_ATTR(extack, attr) NL_SET_BAD_ATTR_POLICY(extack, attr, NULL)
+
+#define NL_SET_ERR_MSG_ATTR_POL(extack, attr, pol, msg) do {	\
+	static const char __msg[] = msg;			\
+	struct netlink_ext_ack *__extack = (extack);		\
+								\
+	if (__extack) {						\
+		__extack->_msg = __msg;				\
+		__extack->bad_attr = (attr);			\
+		__extack->policy = (pol);			\
+	}							\
 } while (0)
 
+#define NL_SET_ERR_MSG_ATTR(extack, attr, msg)		\
+	NL_SET_ERR_MSG_ATTR_POL(extack, attr, NULL, msg)
+
 static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack,
 					    u64 cookie)
 {
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 2b9e41075f19..7356f41d23ba 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -1957,6 +1957,10 @@ int netlink_policy_dump_get_policy_idx(struct netlink_policy_dump_state *state,
 bool netlink_policy_dump_loop(struct netlink_policy_dump_state *state);
 int netlink_policy_dump_write(struct sk_buff *skb,
 			      struct netlink_policy_dump_state *state);
+int netlink_policy_dump_attr_size_estimate(const struct nla_policy *pt);
+int netlink_policy_dump_write_attr(struct sk_buff *skb,
+				   const struct nla_policy *pt,
+				   int nestattr);
 void netlink_policy_dump_free(struct netlink_policy_dump_state *state);
 
 #endif
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index d02e472ba54c..c3816ff7bfc3 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -129,6 +129,7 @@ struct nlmsgerr {
  * @NLMSGERR_ATTR_COOKIE: arbitrary subsystem specific cookie to
  *	be used - in the success case - to identify a created
  *	object or operation or similar (binary)
+ * @NLMSGERR_ATTR_POLICY: policy for a rejected attribute
  * @__NLMSGERR_ATTR_MAX: number of attributes
  * @NLMSGERR_ATTR_MAX: highest attribute number
  */
@@ -137,6 +138,7 @@ enum nlmsgerr_attrs {
 	NLMSGERR_ATTR_MSG,
 	NLMSGERR_ATTR_OFFS,
 	NLMSGERR_ATTR_COOKIE,
+	NLMSGERR_ATTR_POLICY,
 
 	__NLMSGERR_ATTR_MAX,
 	NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1
diff --git a/lib/nlattr.c b/lib/nlattr.c
index 9c99f5daa4d2..74019c8ebf6b 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -96,8 +96,8 @@ static int nla_validate_array(const struct nlattr *head, int len, int maxtype,
 			continue;
 
 		if (nla_len(entry) < NLA_HDRLEN) {
-			NL_SET_ERR_MSG_ATTR(extack, entry,
-					    "Array element too short");
+			NL_SET_ERR_MSG_ATTR_POL(extack, entry, policy,
+						"Array element too short");
 			return -ERANGE;
 		}
 
@@ -195,8 +195,8 @@ static int nla_validate_range_unsigned(const struct nla_policy *pt,
 		pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n",
 				    current->comm, pt->type);
 		if (validate & NL_VALIDATE_STRICT_ATTRS) {
-			NL_SET_ERR_MSG_ATTR(extack, nla,
-					    "invalid attribute length");
+			NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+						"invalid attribute length");
 			return -EINVAL;
 		}
 
@@ -208,11 +208,11 @@ static int nla_validate_range_unsigned(const struct nla_policy *pt,
 		bool binary = pt->type == NLA_BINARY;
 
 		if (binary)
-			NL_SET_ERR_MSG_ATTR(extack, nla,
-					    "binary attribute size out of range");
+			NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+						"binary attribute size out of range");
 		else
-			NL_SET_ERR_MSG_ATTR(extack, nla,
-					    "integer out of range");
+			NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+						"integer out of range");
 
 		return -ERANGE;
 	}
@@ -291,8 +291,8 @@ static int nla_validate_int_range_signed(const struct nla_policy *pt,
 	nla_get_range_signed(pt, &range);
 
 	if (value < range.min || value > range.max) {
-		NL_SET_ERR_MSG_ATTR(extack, nla,
-				    "integer out of range");
+		NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+					"integer out of range");
 		return -ERANGE;
 	}
 
@@ -377,8 +377,8 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 		pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n",
 				    current->comm, type);
 		if (validate & NL_VALIDATE_STRICT_ATTRS) {
-			NL_SET_ERR_MSG_ATTR(extack, nla,
-					    "invalid attribute length");
+			NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+						"invalid attribute length");
 			return -EINVAL;
 		}
 	}
@@ -386,14 +386,14 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 	if (validate & NL_VALIDATE_NESTED) {
 		if ((pt->type == NLA_NESTED || pt->type == NLA_NESTED_ARRAY) &&
 		    !(nla->nla_type & NLA_F_NESTED)) {
-			NL_SET_ERR_MSG_ATTR(extack, nla,
-					    "NLA_F_NESTED is missing");
+			NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+						"NLA_F_NESTED is missing");
 			return -EINVAL;
 		}
 		if (pt->type != NLA_NESTED && pt->type != NLA_NESTED_ARRAY &&
 		    pt->type != NLA_UNSPEC && (nla->nla_type & NLA_F_NESTED)) {
-			NL_SET_ERR_MSG_ATTR(extack, nla,
-					    "NLA_F_NESTED not expected");
+			NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+						"NLA_F_NESTED not expected");
 			return -EINVAL;
 		}
 	}
@@ -550,7 +550,8 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 
 	return 0;
 out_err:
-	NL_SET_ERR_MSG_ATTR(extack, nla, "Attribute failed policy validation");
+	NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+				"Attribute failed policy validation");
 	return err;
 }
 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index df675a8e1918..daca50d6bb12 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2420,6 +2420,8 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
 		tlvlen += nla_total_size(sizeof(u32));
 	if (nlk_has_extack && extack && extack->cookie_len)
 		tlvlen += nla_total_size(extack->cookie_len);
+	if (err && nlk_has_extack && extack && extack->policy)
+		tlvlen += netlink_policy_dump_attr_size_estimate(extack->policy);
 
 	if (tlvlen)
 		flags |= NLM_F_ACK_TLVS;
@@ -2452,6 +2454,9 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
 		if (extack->cookie_len)
 			WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE,
 					extack->cookie_len, extack->cookie));
+		if (extack->policy)
+			netlink_policy_dump_write_attr(skb, extack->policy,
+						       NLMSGERR_ATTR_POLICY);
 	}
 
 	nlmsg_end(skb, rep);
diff --git a/net/netlink/policy.c b/net/netlink/policy.c
index 4383436759e2..8d7c900e27f4 100644
--- a/net/netlink/policy.c
+++ b/net/netlink/policy.c
@@ -196,12 +196,54 @@ bool netlink_policy_dump_loop(struct netlink_policy_dump_state *state)
 	return !netlink_policy_dump_finished(state);
 }
 
+int netlink_policy_dump_attr_size_estimate(const struct nla_policy *pt)
+{
+	/* nested + type */
+	int common = 2 * nla_attr_size(sizeof(u32));
+
+	switch (pt->type) {
+	case NLA_UNSPEC:
+	case NLA_REJECT:
+		/* these actually don't need any space */
+		return 0;
+	case NLA_NESTED:
+	case NLA_NESTED_ARRAY:
+		/* common, policy idx, policy maxattr */
+		return common + 2 * nla_attr_size(sizeof(u32));
+	case NLA_U8:
+	case NLA_U16:
+	case NLA_U32:
+	case NLA_U64:
+	case NLA_MSECS:
+	case NLA_S8:
+	case NLA_S16:
+	case NLA_S32:
+	case NLA_S64:
+		/* maximum is common, u64 min/max with padding */
+		return common +
+		       2 * (nla_attr_size(0) + nla_attr_size(sizeof(u64)));
+	case NLA_BITFIELD32:
+		return common + nla_attr_size(sizeof(u32));
+	case NLA_STRING:
+	case NLA_NUL_STRING:
+	case NLA_BINARY:
+		/* maximum is common, u32 min-length/max-length */
+		return common + 2 * nla_attr_size(sizeof(u32));
+	case NLA_FLAG:
+		return common;
+	}
+
+	/* this should then cause a warning later */
+	return 0;
+}
+
 static int
 __netlink_policy_dump_write_attr(struct netlink_policy_dump_state *state,
 				 struct sk_buff *skb,
 				 const struct nla_policy *pt,
 				 int nestattr)
 {
+	int estimate = netlink_policy_dump_attr_size_estimate(pt);
 	enum netlink_attribute_type type;
 	struct nlattr *attr;
 
@@ -334,12 +376,31 @@ __netlink_policy_dump_write_attr(struct netlink_policy_dump_state *state,
 		goto nla_put_failure;
 
 	nla_nest_end(skb, attr);
+	WARN_ON(attr->nla_len > estimate);
+
 	return 0;
 nla_put_failure:
 	nla_nest_cancel(skb, attr);
 	return -ENOBUFS;
 }
 
+/**
+ * netlink_policy_dump_write_attr - write a given attribute policy
+ * @skb: the message skb to write to
+ * @pt: the attribute's policy
+ * @nestattr: the nested attribute ID to use
+ *
+ * Returns: 0 on success, an error code otherwise; -%ENODATA is
+ *	    special, indicating that there's no policy data and
+ *	    the attribute is generally rejected.
+ */
+int netlink_policy_dump_write_attr(struct sk_buff *skb,
+				   const struct nla_policy *pt,
+				   int nestattr)
+{
+	return __netlink_policy_dump_write_attr(NULL, skb, pt, nestattr);
+}
+
 /**
  * netlink_policy_dump_write - write current policy dump attributes
  * @skb: the message skb to write to
-- 
cgit v1.2.3


From dd2ce6a5373c6f5c830be54be10775458a8bd312 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 11 Oct 2020 01:40:01 +0200
Subject: bpf: Improve bpf_redirect_neigh helper description

Follow-up to address David's feedback that we should better describe internals
of the bpf_redirect_neigh() helper.

Suggested-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: David Ahern <dsahern@gmail.com>
Link: https://lore.kernel.org/bpf/20201010234006.7075-2-daniel@iogearbox.net
---
 include/uapi/linux/bpf.h       | 10 +++++++---
 tools/include/uapi/linux/bpf.h | 10 +++++++---
 2 files changed, 14 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 42d2df799397..4272cc53d478 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3679,10 +3679,14 @@ union bpf_attr {
  * 		Redirect the packet to another net device of index *ifindex*
  * 		and fill in L2 addresses from neighboring subsystem. This helper
  * 		is somewhat similar to **bpf_redirect**\ (), except that it
- * 		fills in e.g. MAC addresses based on the L3 information from
- * 		the packet. This helper is supported for IPv4 and IPv6 protocols.
+ * 		populates L2 addresses as well, meaning, internally, the helper
+ * 		performs a FIB lookup based on the skb's networking header to
+ * 		get the address of the next hop and then relies on the neighbor
+ * 		lookup for the L2 address of the nexthop.
+ *
  * 		The *flags* argument is reserved and must be 0. The helper is
- * 		currently only supported for tc BPF program types.
+ * 		currently only supported for tc BPF program types, and enabled
+ * 		for IPv4 and IPv6 protocols.
  * 	Return
  * 		The helper returns **TC_ACT_REDIRECT** on success or
  * 		**TC_ACT_SHOT** on error.
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 42d2df799397..4272cc53d478 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3679,10 +3679,14 @@ union bpf_attr {
  * 		Redirect the packet to another net device of index *ifindex*
  * 		and fill in L2 addresses from neighboring subsystem. This helper
  * 		is somewhat similar to **bpf_redirect**\ (), except that it
- * 		fills in e.g. MAC addresses based on the L3 information from
- * 		the packet. This helper is supported for IPv4 and IPv6 protocols.
+ * 		populates L2 addresses as well, meaning, internally, the helper
+ * 		performs a FIB lookup based on the skb's networking header to
+ * 		get the address of the next hop and then relies on the neighbor
+ * 		lookup for the L2 address of the nexthop.
+ *
  * 		The *flags* argument is reserved and must be 0. The helper is
- * 		currently only supported for tc BPF program types.
+ * 		currently only supported for tc BPF program types, and enabled
+ * 		for IPv4 and IPv6 protocols.
  * 	Return
  * 		The helper returns **TC_ACT_REDIRECT** on success or
  * 		**TC_ACT_SHOT** on error.
-- 
cgit v1.2.3


From 9aa1206e8f48222f35a0c809f33b2f4aaa1e2661 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 11 Oct 2020 01:40:02 +0200
Subject: bpf: Add redirect_peer helper

Add an efficient ingress to ingress netns switch that can be used out of tc BPF
programs in order to redirect traffic from host ns ingress into a container
veth device ingress without having to go via CPU backlog queue [0]. For local
containers this can also be utilized and path via CPU backlog queue only needs
to be taken once, not twice. On a high level this borrows from ipvlan which does
similar switch in __netif_receive_skb_core() and then iterates via another_round.
This helps to reduce latency for mentioned use cases.

Pod to remote pod with redirect(), TCP_RR [1]:

  # percpu_netperf 10.217.1.33
          RT_LATENCY:         122.450         (per CPU:         122.666         122.401         122.333         122.401 )
        MEAN_LATENCY:         121.210         (per CPU:         121.100         121.260         121.320         121.160 )
      STDDEV_LATENCY:         120.040         (per CPU:         119.420         119.910         125.460         115.370 )
         MIN_LATENCY:          46.500         (per CPU:          47.000          47.000          47.000          45.000 )
         P50_LATENCY:         118.500         (per CPU:         118.000         119.000         118.000         119.000 )
         P90_LATENCY:         127.500         (per CPU:         127.000         128.000         127.000         128.000 )
         P99_LATENCY:         130.750         (per CPU:         131.000         131.000         129.000         132.000 )

    TRANSACTION_RATE:       32666.400         (per CPU:        8152.200        8169.842        8174.439        8169.897 )

Pod to remote pod with redirect_peer(), TCP_RR:

  # percpu_netperf 10.217.1.33
          RT_LATENCY:          44.449         (per CPU:          43.767          43.127          45.279          45.622 )
        MEAN_LATENCY:          45.065         (per CPU:          44.030          45.530          45.190          45.510 )
      STDDEV_LATENCY:          84.823         (per CPU:          66.770          97.290          84.380          90.850 )
         MIN_LATENCY:          33.500         (per CPU:          33.000          33.000          34.000          34.000 )
         P50_LATENCY:          43.250         (per CPU:          43.000          43.000          43.000          44.000 )
         P90_LATENCY:          46.750         (per CPU:          46.000          47.000          47.000          47.000 )
         P99_LATENCY:          52.750         (per CPU:          51.000          54.000          53.000          53.000 )

    TRANSACTION_RATE:       90039.500         (per CPU:       22848.186       23187.089       22085.077       21919.130 )

  [0] https://linuxplumbersconf.org/event/7/contributions/674/attachments/568/1002/plumbers_2020_cilium_load_balancer.pdf
  [1] https://github.com/borkmann/netperf_scripts/blob/master/percpu_netperf

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201010234006.7075-3-daniel@iogearbox.net
---
 drivers/net/veth.c             |  9 +++++++
 include/linux/netdevice.h      |  4 ++++
 include/uapi/linux/bpf.h       | 17 +++++++++++++
 net/core/dev.c                 | 15 +++++++++---
 net/core/filter.c              | 54 ++++++++++++++++++++++++++++++++++++------
 tools/include/uapi/linux/bpf.h | 17 +++++++++++++
 6 files changed, 106 insertions(+), 10 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 091e5b4ba042..8c737668008a 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -420,6 +420,14 @@ static int veth_select_rxq(struct net_device *dev)
 	return smp_processor_id() % dev->real_num_rx_queues;
 }
 
+static struct net_device *veth_peer_dev(struct net_device *dev)
+{
+	struct veth_priv *priv = netdev_priv(dev);
+
+	/* Callers must be under RCU read side. */
+	return rcu_dereference(priv->peer);
+}
+
 static int veth_xdp_xmit(struct net_device *dev, int n,
 			 struct xdp_frame **frames,
 			 u32 flags, bool ndo_xmit)
@@ -1224,6 +1232,7 @@ static const struct net_device_ops veth_netdev_ops = {
 	.ndo_set_rx_headroom	= veth_set_rx_headroom,
 	.ndo_bpf		= veth_xdp,
 	.ndo_xdp_xmit		= veth_ndo_xdp_xmit,
+	.ndo_get_peer_dev	= veth_peer_dev,
 };
 
 #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 28cfa53daf72..0533f86018dd 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1277,6 +1277,9 @@ struct netdev_net_notifier {
  * int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p,
  *			 int cmd);
  *	Add, change, delete or get information on an IPv4 tunnel.
+ * struct net_device *(*ndo_get_peer_dev)(struct net_device *dev);
+ *	If a device is paired with a peer device, return the peer instance.
+ *	The caller must be under RCU read context.
  */
 struct net_device_ops {
 	int			(*ndo_init)(struct net_device *dev);
@@ -1484,6 +1487,7 @@ struct net_device_ops {
 	struct devlink_port *	(*ndo_get_devlink_port)(struct net_device *dev);
 	int			(*ndo_tunnel_ctl)(struct net_device *dev,
 						  struct ip_tunnel_parm *p, int cmd);
+	struct net_device *	(*ndo_get_peer_dev)(struct net_device *dev);
 };
 
 /**
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4272cc53d478..b97bc5abb3b8 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3719,6 +3719,22 @@ union bpf_attr {
  *		never return NULL.
  *	Return
  *		A pointer pointing to the kernel percpu variable on this cpu.
+ *
+ * long bpf_redirect_peer(u32 ifindex, u64 flags)
+ * 	Description
+ * 		Redirect the packet to another net device of index *ifindex*.
+ * 		This helper is somewhat similar to **bpf_redirect**\ (), except
+ * 		that the redirection happens to the *ifindex*' peer device and
+ * 		the netns switch takes place from ingress to ingress without
+ * 		going through the CPU's backlog queue.
+ *
+ * 		The *flags* argument is reserved and must be 0. The helper is
+ * 		currently only supported for tc BPF program types at the ingress
+ * 		hook and for veth device types. The peer device must reside in a
+ * 		different network namespace.
+ * 	Return
+ * 		The helper returns **TC_ACT_REDIRECT** on success or
+ * 		**TC_ACT_SHOT** on error.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3876,6 +3892,7 @@ union bpf_attr {
 	FN(redirect_neigh),		\
 	FN(bpf_per_cpu_ptr),            \
 	FN(bpf_this_cpu_ptr),		\
+	FN(redirect_peer),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/net/core/dev.c b/net/core/dev.c
index 9d55bf5d1a65..7dd015823593 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4930,7 +4930,7 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
 
 static inline struct sk_buff *
 sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
-		   struct net_device *orig_dev)
+		   struct net_device *orig_dev, bool *another)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
@@ -4974,7 +4974,11 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
 		 * redirecting to another netdev
 		 */
 		__skb_push(skb, skb->mac_len);
-		skb_do_redirect(skb);
+		if (skb_do_redirect(skb) == -EAGAIN) {
+			__skb_pull(skb, skb->mac_len);
+			*another = true;
+			break;
+		}
 		return NULL;
 	case TC_ACT_CONSUMED:
 		return NULL;
@@ -5163,7 +5167,12 @@ another_round:
 skip_taps:
 #ifdef CONFIG_NET_INGRESS
 	if (static_branch_unlikely(&ingress_needed_key)) {
-		skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev);
+		bool another = false;
+
+		skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev,
+					 &another);
+		if (another)
+			goto another_round;
 		if (!skb)
 			goto out;
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 5da44b11e1ec..fab951c6be57 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2380,8 +2380,9 @@ out:
 
 /* Internal, non-exposed redirect flags. */
 enum {
-	BPF_F_NEIGH = (1ULL << 1),
-#define BPF_F_REDIRECT_INTERNAL	(BPF_F_NEIGH)
+	BPF_F_NEIGH	= (1ULL << 1),
+	BPF_F_PEER	= (1ULL << 2),
+#define BPF_F_REDIRECT_INTERNAL	(BPF_F_NEIGH | BPF_F_PEER)
 };
 
 BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
@@ -2430,19 +2431,35 @@ EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
 int skb_do_redirect(struct sk_buff *skb)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+	struct net *net = dev_net(skb->dev);
 	struct net_device *dev;
 	u32 flags = ri->flags;
 
-	dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->tgt_index);
+	dev = dev_get_by_index_rcu(net, ri->tgt_index);
 	ri->tgt_index = 0;
-	if (unlikely(!dev)) {
-		kfree_skb(skb);
-		return -EINVAL;
+	ri->flags = 0;
+	if (unlikely(!dev))
+		goto out_drop;
+	if (flags & BPF_F_PEER) {
+		const struct net_device_ops *ops = dev->netdev_ops;
+
+		if (unlikely(!ops->ndo_get_peer_dev ||
+			     !skb_at_tc_ingress(skb)))
+			goto out_drop;
+		dev = ops->ndo_get_peer_dev(dev);
+		if (unlikely(!dev ||
+			     !is_skb_forwardable(dev, skb) ||
+			     net_eq(net, dev_net(dev))))
+			goto out_drop;
+		skb->dev = dev;
+		return -EAGAIN;
 	}
-
 	return flags & BPF_F_NEIGH ?
 	       __bpf_redirect_neigh(skb, dev) :
 	       __bpf_redirect(skb, dev, flags);
+out_drop:
+	kfree_skb(skb);
+	return -EINVAL;
 }
 
 BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
@@ -2466,6 +2483,27 @@ static const struct bpf_func_proto bpf_redirect_proto = {
 	.arg2_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_2(bpf_redirect_peer, u32, ifindex, u64, flags)
+{
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+	if (unlikely(flags))
+		return TC_ACT_SHOT;
+
+	ri->flags = BPF_F_PEER;
+	ri->tgt_index = ifindex;
+
+	return TC_ACT_REDIRECT;
+}
+
+static const struct bpf_func_proto bpf_redirect_peer_proto = {
+	.func           = bpf_redirect_peer,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_ANYTHING,
+	.arg2_type      = ARG_ANYTHING,
+};
+
 BPF_CALL_2(bpf_redirect_neigh, u32, ifindex, u64, flags)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
@@ -7053,6 +7091,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_redirect_proto;
 	case BPF_FUNC_redirect_neigh:
 		return &bpf_redirect_neigh_proto;
+	case BPF_FUNC_redirect_peer:
+		return &bpf_redirect_peer_proto;
 	case BPF_FUNC_get_route_realm:
 		return &bpf_get_route_realm_proto;
 	case BPF_FUNC_get_hash_recalc:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4272cc53d478..b97bc5abb3b8 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3719,6 +3719,22 @@ union bpf_attr {
  *		never return NULL.
  *	Return
  *		A pointer pointing to the kernel percpu variable on this cpu.
+ *
+ * long bpf_redirect_peer(u32 ifindex, u64 flags)
+ * 	Description
+ * 		Redirect the packet to another net device of index *ifindex*.
+ * 		This helper is somewhat similar to **bpf_redirect**\ (), except
+ * 		that the redirection happens to the *ifindex*' peer device and
+ * 		the netns switch takes place from ingress to ingress without
+ * 		going through the CPU's backlog queue.
+ *
+ * 		The *flags* argument is reserved and must be 0. The helper is
+ * 		currently only supported for tc BPF program types at the ingress
+ * 		hook and for veth device types. The peer device must reside in a
+ * 		different network namespace.
+ * 	Return
+ * 		The helper returns **TC_ACT_REDIRECT** on success or
+ * 		**TC_ACT_SHOT** on error.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3876,6 +3892,7 @@ union bpf_attr {
 	FN(redirect_neigh),		\
 	FN(bpf_per_cpu_ptr),            \
 	FN(bpf_this_cpu_ptr),		\
+	FN(redirect_peer),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 4a8f87e60f6db40e640f1db555d063b2c4dea5f1 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 11 Oct 2020 01:40:03 +0200
Subject: bpf: Allow for map-in-map with dynamic inner array map entries

Recent work in f4d05259213f ("bpf: Add map_meta_equal map ops") and 134fede4eecf
("bpf: Relax max_entries check for most of the inner map types") added support
for dynamic inner max elements for most map-in-map types. Exceptions were maps
like array or prog array where the map_gen_lookup() callback uses the maps'
max_entries field as a constant when emitting instructions.

We recently implemented Maglev consistent hashing into Cilium's load balancer
which uses map-in-map with an outer map being hash and inner being array holding
the Maglev backend table for each service. This has been designed this way in
order to reduce overall memory consumption given the outer hash map allows to
avoid preallocating a large, flat memory area for all services. Also, the
number of service mappings is not always known a-priori.

The use case for dynamic inner array map entries is to further reduce memory
overhead, for example, some services might just have a small number of back
ends while others could have a large number. Right now the Maglev backend table
for small and large number of backends would need to have the same inner array
map entries which adds a lot of unneeded overhead.

Dynamic inner array map entries can be realized by avoiding the inlined code
generation for their lookup. The lookup will still be efficient since it will
be calling into array_map_lookup_elem() directly and thus avoiding retpoline.
The patch adds a BPF_F_INNER_MAP flag to map creation which therefore skips
inline code generation and relaxes array_map_meta_equal() check to ignore both
maps' max_entries. This also still allows to have faster lookups for map-in-map
when BPF_F_INNER_MAP is not specified and hence dynamic max_entries not needed.

Example code generation where inner map is dynamic sized array:

  # bpftool p d x i 125
  int handle__sys_enter(void * ctx):
  ; int handle__sys_enter(void *ctx)
     0: (b4) w1 = 0
  ; int key = 0;
     1: (63) *(u32 *)(r10 -4) = r1
     2: (bf) r2 = r10
  ;
     3: (07) r2 += -4
  ; inner_map = bpf_map_lookup_elem(&outer_arr_dyn, &key);
     4: (18) r1 = map[id:468]
     6: (07) r1 += 272
     7: (61) r0 = *(u32 *)(r2 +0)
     8: (35) if r0 >= 0x3 goto pc+5
     9: (67) r0 <<= 3
    10: (0f) r0 += r1
    11: (79) r0 = *(u64 *)(r0 +0)
    12: (15) if r0 == 0x0 goto pc+1
    13: (05) goto pc+1
    14: (b7) r0 = 0
    15: (b4) w6 = -1
  ; if (!inner_map)
    16: (15) if r0 == 0x0 goto pc+6
    17: (bf) r2 = r10
  ;
    18: (07) r2 += -4
  ; val = bpf_map_lookup_elem(inner_map, &key);
    19: (bf) r1 = r0                               | No inlining but instead
    20: (85) call array_map_lookup_elem#149280     | call to array_map_lookup_elem()
  ; return val ? *val : -1;                        | for inner array lookup.
    21: (15) if r0 == 0x0 goto pc+1
  ; return val ? *val : -1;
    22: (61) r6 = *(u32 *)(r0 +0)
  ; }
    23: (bc) w0 = w6
    24: (95) exit

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201010234006.7075-4-daniel@iogearbox.net
---
 include/linux/bpf.h            |  2 +-
 include/uapi/linux/bpf.h       |  3 +++
 kernel/bpf/arraymap.c          | 17 +++++++++++------
 kernel/bpf/hashtab.c           |  6 +++---
 kernel/bpf/verifier.c          |  6 ++++--
 net/xdp/xskmap.c               |  2 +-
 tools/include/uapi/linux/bpf.h |  3 +++
 7 files changed, 26 insertions(+), 13 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index dc63eeed4fd9..2b16bf48aab6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -82,7 +82,7 @@ struct bpf_map_ops {
 	void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
 				int fd);
 	void (*map_fd_put_ptr)(void *ptr);
-	u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
+	int (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
 	u32 (*map_fd_sys_lookup_elem)(void *ptr);
 	void (*map_seq_show_elem)(struct bpf_map *map, void *key,
 				  struct seq_file *m);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b97bc5abb3b8..bf5a99d803e4 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -435,6 +435,9 @@ enum {
 
 /* Share perf_event among processes */
 	BPF_F_PRESERVE_ELEMS	= (1U << 11),
+
+/* Create a map that is suitable to be an inner map with dynamic max entries */
+	BPF_F_INNER_MAP		= (1U << 12),
 };
 
 /* Flags for BPF_PROG_QUERY. */
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index bd777dd6f967..c6c81eceb68f 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -16,7 +16,7 @@
 
 #define ARRAY_CREATE_FLAG_MASK \
 	(BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK | \
-	 BPF_F_PRESERVE_ELEMS)
+	 BPF_F_PRESERVE_ELEMS | BPF_F_INNER_MAP)
 
 static void bpf_array_free_percpu(struct bpf_array *array)
 {
@@ -62,7 +62,7 @@ int array_map_alloc_check(union bpf_attr *attr)
 		return -EINVAL;
 
 	if (attr->map_type != BPF_MAP_TYPE_ARRAY &&
-	    attr->map_flags & BPF_F_MMAPABLE)
+	    attr->map_flags & (BPF_F_MMAPABLE | BPF_F_INNER_MAP))
 		return -EINVAL;
 
 	if (attr->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
@@ -214,7 +214,7 @@ static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm,
 }
 
 /* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
-static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
+static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
 {
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
 	struct bpf_insn *insn = insn_buf;
@@ -223,6 +223,9 @@ static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
 	const int map_ptr = BPF_REG_1;
 	const int index = BPF_REG_2;
 
+	if (map->map_flags & BPF_F_INNER_MAP)
+		return -EOPNOTSUPP;
+
 	*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
 	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
 	if (!map->bypass_spec_v1) {
@@ -496,8 +499,10 @@ static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
 static bool array_map_meta_equal(const struct bpf_map *meta0,
 				 const struct bpf_map *meta1)
 {
-	return meta0->max_entries == meta1->max_entries &&
-		bpf_map_meta_equal(meta0, meta1);
+	if (!bpf_map_meta_equal(meta0, meta1))
+		return false;
+	return meta0->map_flags & BPF_F_INNER_MAP ? true :
+	       meta0->max_entries == meta1->max_entries;
 }
 
 struct bpf_iter_seq_array_map_info {
@@ -1251,7 +1256,7 @@ static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
 	return READ_ONCE(*inner_map);
 }
 
-static u32 array_of_map_gen_lookup(struct bpf_map *map,
+static int array_of_map_gen_lookup(struct bpf_map *map,
 				   struct bpf_insn *insn_buf)
 {
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 3395cf140d22..1815e97d4c9c 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -612,7 +612,7 @@ static void *htab_map_lookup_elem(struct bpf_map *map, void *key)
  * bpf_prog
  *   __htab_map_lookup_elem
  */
-static u32 htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
+static int htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
 {
 	struct bpf_insn *insn = insn_buf;
 	const int ret = BPF_REG_0;
@@ -651,7 +651,7 @@ static void *htab_lru_map_lookup_elem_sys(struct bpf_map *map, void *key)
 	return __htab_lru_map_lookup_elem(map, key, false);
 }
 
-static u32 htab_lru_map_gen_lookup(struct bpf_map *map,
+static int htab_lru_map_gen_lookup(struct bpf_map *map,
 				   struct bpf_insn *insn_buf)
 {
 	struct bpf_insn *insn = insn_buf;
@@ -2070,7 +2070,7 @@ static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key)
 	return READ_ONCE(*inner_map);
 }
 
-static u32 htab_of_map_gen_lookup(struct bpf_map *map,
+static int htab_of_map_gen_lookup(struct bpf_map *map,
 				  struct bpf_insn *insn_buf)
 {
 	struct bpf_insn *insn = insn_buf;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f3e36eade3d4..fa5badc9279a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -11049,7 +11049,9 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 			if (insn->imm == BPF_FUNC_map_lookup_elem &&
 			    ops->map_gen_lookup) {
 				cnt = ops->map_gen_lookup(map_ptr, insn_buf);
-				if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
+				if (cnt == -EOPNOTSUPP)
+					goto patch_map_ops_generic;
+				if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
 					verbose(env, "bpf verifier is misconfigured\n");
 					return -EINVAL;
 				}
@@ -11079,7 +11081,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 				     (int (*)(struct bpf_map *map, void *value))NULL));
 			BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
 				     (int (*)(struct bpf_map *map, void *value))NULL));
-
+patch_map_ops_generic:
 			switch (insn->imm) {
 			case BPF_FUNC_map_lookup_elem:
 				insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
index 0c5df593bc56..49da2b8ace8b 100644
--- a/net/xdp/xskmap.c
+++ b/net/xdp/xskmap.c
@@ -132,7 +132,7 @@ static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 	return 0;
 }
 
-static u32 xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
+static int xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
 {
 	const int ret = BPF_REG_0, mp = BPF_REG_1, index = BPF_REG_2;
 	struct bpf_insn *insn = insn_buf;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b97bc5abb3b8..bf5a99d803e4 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -435,6 +435,9 @@ enum {
 
 /* Share perf_event among processes */
 	BPF_F_PRESERVE_ELEMS	= (1U << 11),
+
+/* Create a map that is suitable to be an inner map with dynamic max entries */
+	BPF_F_INNER_MAP		= (1U << 12),
 };
 
 /* Flags for BPF_PROG_QUERY. */
-- 
cgit v1.2.3


From 60a3815da702fd9e4759945f26cce5c47d3967ad Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 8 Oct 2020 01:14:47 +0200
Subject: netfilter: add inet ingress support

This patch adds the NF_INET_INGRESS pseudohook for the NFPROTO_INET
family. This is a mapping this new hook to the existing NFPROTO_NETDEV
and NF_NETDEV_INGRESS hook. The hook does not guarantee that packets are
inet only, users must filter out non-ip traffic explicitly.

This infrastructure makes it easier to support this new hook in nf_tables.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter.h |   1 +
 net/netfilter/core.c           | 103 ++++++++++++++++++++++++++++++++---------
 2 files changed, 83 insertions(+), 21 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h
index ca9e63d6e0e4..6a6179af0d7c 100644
--- a/include/uapi/linux/netfilter.h
+++ b/include/uapi/linux/netfilter.h
@@ -45,6 +45,7 @@ enum nf_inet_hooks {
 	NF_INET_FORWARD,
 	NF_INET_LOCAL_OUT,
 	NF_INET_POST_ROUTING,
+	NF_INET_INGRESS,
 	NF_INET_NUMHOOKS
 };
 
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index c82f779a587e..63d032191e62 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -281,6 +281,16 @@ nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum,
 		if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_bridge) <= hooknum))
 			return NULL;
 		return net->nf.hooks_bridge + hooknum;
+#endif
+#ifdef CONFIG_NETFILTER_INGRESS
+	case NFPROTO_INET:
+		if (WARN_ON_ONCE(hooknum != NF_INET_INGRESS))
+			return NULL;
+		if (!dev || dev_net(dev) != net) {
+			WARN_ON_ONCE(1);
+			return NULL;
+		}
+		return &dev->nf_hooks_ingress;
 #endif
 	case NFPROTO_IPV4:
 		if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv4) <= hooknum))
@@ -311,22 +321,56 @@ nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum,
 	return NULL;
 }
 
+static int nf_ingress_check(struct net *net, const struct nf_hook_ops *reg,
+			    int hooknum)
+{
+#ifndef CONFIG_NETFILTER_INGRESS
+	if (reg->hooknum == hooknum)
+		return -EOPNOTSUPP;
+#endif
+	if (reg->hooknum != hooknum ||
+	    !reg->dev || dev_net(reg->dev) != net)
+		return -EINVAL;
+
+	return 0;
+}
+
 static inline bool nf_ingress_hook(const struct nf_hook_ops *reg, int pf)
 {
-	return pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS;
+	if ((pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) ||
+	    (pf == NFPROTO_INET && reg->hooknum == NF_INET_INGRESS))
+		return true;
+
+	return false;
 }
 
 static void nf_static_key_inc(const struct nf_hook_ops *reg, int pf)
 {
 #ifdef CONFIG_JUMP_LABEL
-       static_key_slow_inc(&nf_hooks_needed[pf][reg->hooknum]);
+	int hooknum;
+
+	if (pf == NFPROTO_INET && reg->hooknum == NF_INET_INGRESS) {
+		pf = NFPROTO_NETDEV;
+		hooknum = NF_NETDEV_INGRESS;
+	} else {
+		hooknum = reg->hooknum;
+	}
+	static_key_slow_inc(&nf_hooks_needed[pf][hooknum]);
 #endif
 }
 
 static void nf_static_key_dec(const struct nf_hook_ops *reg, int pf)
 {
 #ifdef CONFIG_JUMP_LABEL
-       static_key_slow_dec(&nf_hooks_needed[pf][reg->hooknum]);
+	int hooknum;
+
+	if (pf == NFPROTO_INET && reg->hooknum == NF_INET_INGRESS) {
+		pf = NFPROTO_NETDEV;
+		hooknum = NF_NETDEV_INGRESS;
+	} else {
+		hooknum = reg->hooknum;
+	}
+	static_key_slow_dec(&nf_hooks_needed[pf][hooknum]);
 #endif
 }
 
@@ -335,15 +379,22 @@ static int __nf_register_net_hook(struct net *net, int pf,
 {
 	struct nf_hook_entries *p, *new_hooks;
 	struct nf_hook_entries __rcu **pp;
+	int err;
 
-	if (pf == NFPROTO_NETDEV) {
-#ifndef CONFIG_NETFILTER_INGRESS
-		if (reg->hooknum == NF_NETDEV_INGRESS)
-			return -EOPNOTSUPP;
-#endif
-		if (reg->hooknum != NF_NETDEV_INGRESS ||
-		    !reg->dev || dev_net(reg->dev) != net)
-			return -EINVAL;
+	switch (pf) {
+	case NFPROTO_NETDEV:
+		err = nf_ingress_check(net, reg, NF_NETDEV_INGRESS);
+		if (err < 0)
+			return err;
+		break;
+	case NFPROTO_INET:
+		if (reg->hooknum != NF_INET_INGRESS)
+			break;
+
+		err = nf_ingress_check(net, reg, NF_INET_INGRESS);
+		if (err < 0)
+			return err;
+		break;
 	}
 
 	pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev);
@@ -441,8 +492,12 @@ static void __nf_unregister_net_hook(struct net *net, int pf,
 void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
 {
 	if (reg->pf == NFPROTO_INET) {
-		__nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
-		__nf_unregister_net_hook(net, NFPROTO_IPV6, reg);
+		if (reg->hooknum == NF_INET_INGRESS) {
+			__nf_unregister_net_hook(net, NFPROTO_INET, reg);
+		} else {
+			__nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
+			__nf_unregister_net_hook(net, NFPROTO_IPV6, reg);
+		}
 	} else {
 		__nf_unregister_net_hook(net, reg->pf, reg);
 	}
@@ -467,14 +522,20 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
 	int err;
 
 	if (reg->pf == NFPROTO_INET) {
-		err = __nf_register_net_hook(net, NFPROTO_IPV4, reg);
-		if (err < 0)
-			return err;
-
-		err = __nf_register_net_hook(net, NFPROTO_IPV6, reg);
-		if (err < 0) {
-			__nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
-			return err;
+		if (reg->hooknum == NF_INET_INGRESS) {
+			err = __nf_register_net_hook(net, NFPROTO_INET, reg);
+			if (err < 0)
+				return err;
+		} else {
+			err = __nf_register_net_hook(net, NFPROTO_IPV4, reg);
+			if (err < 0)
+				return err;
+
+			err = __nf_register_net_hook(net, NFPROTO_IPV6, reg);
+			if (err < 0) {
+				__nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
+				return err;
+			}
 		}
 	} else {
 		err = __nf_register_net_hook(net, reg->pf, reg);
-- 
cgit v1.2.3


From ac911bfeb34b5d79fb4e23a08b8db0b89c529b53 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Mon, 12 Oct 2020 09:43:53 +0200
Subject: can: isotp: implement cleanups / improvements from review

As pointed out by Jakub Kicinski here:
http://lore.kernel.org/r/20201009175751.5c54097f@kicinski-fedora-pc1c0hjn.dhcp.thefacebook.com
this patch addresses the remarked issues:

- remove empty line in comment
- remove default=y for CAN_ISOTP in Kconfig
- make use of pr_notice_once()
- use GFP_ATOMIC instead of gfp_any() in soft hrtimer context

The version strings in the CAN subsystem are removed by a separate patch.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Link: https://lore.kernel.org/r/20201012074354.25839-1-socketcan@hartkopp.net
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can/isotp.h |  1 -
 net/can/Kconfig                |  3 ++-
 net/can/isotp.c                | 14 +++++++-------
 3 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h
index 553006509f4e..7793b26aa154 100644
--- a/include/uapi/linux/can/isotp.h
+++ b/include/uapi/linux/can/isotp.h
@@ -160,7 +160,6 @@ struct can_isotp_ll_options {
  * these default settings can be changed via sockopts.
  * For that reason the STmin value is intentionally _not_ checked for
  * consistency and copied directly into the flow control (FC) frame.
- *
  */
 
 #endif /* !_UAPI_CAN_ISOTP_H */
diff --git a/net/can/Kconfig b/net/can/Kconfig
index 021fe03a8ed6..224e5e0283a9 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig
@@ -57,7 +57,6 @@ source "net/can/j1939/Kconfig"
 
 config CAN_ISOTP
 	tristate "ISO 15765-2:2016 CAN transport protocol"
-	default y
 	help
 	  CAN Transport Protocols offer support for segmented Point-to-Point
 	  communication between CAN nodes via two defined CAN Identifiers.
@@ -67,6 +66,8 @@ config CAN_ISOTP
 	  vehicle diagnosis (UDS, ISO 14229) or IP-over-CAN traffic.
 	  This protocol driver implements data transfers according to
 	  ISO 15765-2:2016 for 'classic' CAN and CAN FD frame types.
+	  If you want to perform automotive vehicle diagnostic services (UDS),
+	  say 'y'.
 
 source "drivers/net/can/Kconfig"
 
diff --git a/net/can/isotp.c b/net/can/isotp.c
index e6ff032b5426..ca63061bb932 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -222,8 +222,8 @@ static int isotp_send_fc(struct sock *sk, int ae, u8 flowstatus)
 
 	can_send_ret = can_send(nskb, 1);
 	if (can_send_ret)
-		printk_once(KERN_NOTICE "can-isotp: %s: can_send_ret %d\n",
-			    __func__, can_send_ret);
+		pr_notice_once("can-isotp: %s: can_send_ret %d\n",
+			       __func__, can_send_ret);
 
 	dev_put(dev);
 
@@ -769,7 +769,7 @@ static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer)
 
 isotp_tx_burst:
 		skb = alloc_skb(so->ll.mtu + sizeof(struct can_skb_priv),
-				gfp_any());
+				GFP_ATOMIC);
 		if (!skb) {
 			dev_put(dev);
 			break;
@@ -798,8 +798,8 @@ isotp_tx_burst:
 
 		can_send_ret = can_send(skb, 1);
 		if (can_send_ret)
-			printk_once(KERN_NOTICE "can-isotp: %s: can_send_ret %d\n",
-				    __func__, can_send_ret);
+			pr_notice_once("can-isotp: %s: can_send_ret %d\n",
+				       __func__, can_send_ret);
 
 		if (so->tx.idx >= so->tx.len) {
 			/* we are done */
@@ -942,8 +942,8 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 	err = can_send(skb, 1);
 	dev_put(dev);
 	if (err) {
-		printk_once(KERN_NOTICE "can-isotp: %s: can_send_ret %d\n",
-			    __func__, err);
+		pr_notice_once("can-isotp: %s: can_send_ret %d\n",
+			       __func__, err);
 		return err;
 	}
 
-- 
cgit v1.2.3


From d25e2e9388eda61b6e298585024ee3355f50c493 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 14 Oct 2020 21:34:32 +0200
Subject: netfilter: restore NF_INET_NUMHOOKS

This definition is used by the iptables legacy UAPI, restore it.

Fixes: d3519cb89f6d ("netfilter: nf_tables: add inet ingress support")
Reported-by: Jason A. Donenfeld <Jason@zx2c4.com>
Tested-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/netfilter/nf_tables.h | 4 +++-
 include/uapi/linux/netfilter.h    | 4 ++--
 net/netfilter/nf_tables_api.c     | 2 +-
 3 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 3965ce18226f..3f7e56b1171e 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -14,6 +14,8 @@
 #include <net/netlink.h>
 #include <net/flow_offload.h>
 
+#define NFT_MAX_HOOKS	(NF_INET_INGRESS + 1)
+
 struct module;
 
 #define NFT_JUMP_STACK_SIZE	16
@@ -979,7 +981,7 @@ struct nft_chain_type {
 	int				family;
 	struct module			*owner;
 	unsigned int			hook_mask;
-	nf_hookfn			*hooks[NF_MAX_HOOKS];
+	nf_hookfn			*hooks[NFT_MAX_HOOKS];
 	int				(*ops_register)(struct net *net, const struct nf_hook_ops *ops);
 	void				(*ops_unregister)(struct net *net, const struct nf_hook_ops *ops);
 };
diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h
index 6a6179af0d7c..ef9a44286e23 100644
--- a/include/uapi/linux/netfilter.h
+++ b/include/uapi/linux/netfilter.h
@@ -45,8 +45,8 @@ enum nf_inet_hooks {
 	NF_INET_FORWARD,
 	NF_INET_LOCAL_OUT,
 	NF_INET_POST_ROUTING,
-	NF_INET_INGRESS,
-	NF_INET_NUMHOOKS
+	NF_INET_NUMHOOKS,
+	NF_INET_INGRESS = NF_INET_NUMHOOKS,
 };
 
 enum nf_dev_hooks {
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index f22ad21d0230..7f1c184c00d2 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1864,7 +1864,7 @@ static int nft_chain_parse_hook(struct net *net,
 		if (IS_ERR(type))
 			return PTR_ERR(type);
 	}
-	if (hook->num > NF_MAX_HOOKS || !(type->hook_mask & (1 << hook->num)))
+	if (hook->num >= NFT_MAX_HOOKS || !(type->hook_mask & (1 << hook->num)))
 		return -EOPNOTSUPP;
 
 	if (type->type == NFT_CHAIN_T_NAT &&
-- 
cgit v1.2.3


From 346e320cb2103edef709c4466a29140c4a8e527a Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Thu, 15 Oct 2020 18:39:27 +0200
Subject: netfilter: nftables: allow re-computing sctp CRC-32C in 'payload'
 statements

nftables payload statements are used to mangle SCTP headers, but they can
only replace the Internet Checksum. As a consequence, nftables rules that
mangle sport/dport/vtag in SCTP headers potentially generate packets that
are discarded by the receiver, unless the CRC-32C is "offloaded" (e.g the
rule mangles a skb having 'ip_summed' equal to 'CHECKSUM_PARTIAL'.

Fix this extending uAPI definitions and L4 checksum update function, in a
way that userspace programs (e.g. nft) can instruct the kernel to compute
CRC-32C in SCTP headers. Also ensure that LIBCRC32C is built if NF_TABLES
is 'y' or 'm' in the kernel build configuration.

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/netfilter/nf_tables.h |  2 ++
 net/netfilter/Kconfig                    |  1 +
 net/netfilter/nft_payload.c              | 28 ++++++++++++++++++++++++++++
 3 files changed, 31 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 352ee51707a1..98272cb5f617 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -749,10 +749,12 @@ enum nft_payload_bases {
  *
  * @NFT_PAYLOAD_CSUM_NONE: no checksumming
  * @NFT_PAYLOAD_CSUM_INET: internet checksum (RFC 791)
+ * @NFT_PAYLOAD_CSUM_SCTP: CRC-32c, for use in SCTP header (RFC 3309)
  */
 enum nft_payload_csum_types {
 	NFT_PAYLOAD_CSUM_NONE,
 	NFT_PAYLOAD_CSUM_INET,
+	NFT_PAYLOAD_CSUM_SCTP,
 };
 
 enum nft_payload_csum_flags {
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 25313c29d799..52370211e46b 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -441,6 +441,7 @@ endif # NF_CONNTRACK
 
 config NF_TABLES
 	select NETFILTER_NETLINK
+	select LIBCRC32C
 	tristate "Netfilter nf_tables support"
 	help
 	  nftables is the new packet classification framework that intends to
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 7a2e59638499..dcd3c7b8a367 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -22,6 +22,7 @@
 #include <linux/icmpv6.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
+#include <net/sctp/checksum.h>
 
 static bool nft_payload_rebuild_vlan_hdr(const struct sk_buff *skb, int mac_off,
 					 struct vlan_ethhdr *veth)
@@ -484,6 +485,19 @@ static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt,
 	return 0;
 }
 
+static int nft_payload_csum_sctp(struct sk_buff *skb, int offset)
+{
+	struct sctphdr *sh;
+
+	if (skb_ensure_writable(skb, offset + sizeof(*sh)))
+		return -1;
+
+	sh = (struct sctphdr *)(skb->data + offset);
+	sh->checksum = sctp_compute_cksum(skb, offset);
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+	return 0;
+}
+
 static int nft_payload_l4csum_update(const struct nft_pktinfo *pkt,
 				     struct sk_buff *skb,
 				     __wsum fsum, __wsum tsum)
@@ -587,6 +601,13 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
 	    skb_store_bits(skb, offset, src, priv->len) < 0)
 		goto err;
 
+	if (priv->csum_type == NFT_PAYLOAD_CSUM_SCTP &&
+	    pkt->tprot == IPPROTO_SCTP &&
+	    skb->ip_summed != CHECKSUM_PARTIAL) {
+		if (nft_payload_csum_sctp(skb, pkt->xt.thoff))
+			goto err;
+	}
+
 	return;
 err:
 	regs->verdict.code = NFT_BREAK;
@@ -623,6 +644,13 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
 	case NFT_PAYLOAD_CSUM_NONE:
 	case NFT_PAYLOAD_CSUM_INET:
 		break;
+	case NFT_PAYLOAD_CSUM_SCTP:
+		if (priv->base != NFT_PAYLOAD_TRANSPORT_HEADER)
+			return -EINVAL;
+
+		if (priv->csum_offset != offsetof(struct sctphdr, checksum))
+			return -EINVAL;
+		break;
 	default:
 		return -EOPNOTSUPP;
 	}
-- 
cgit v1.2.3


From 8c39076c276be0b31982e44654e2c2357473258a Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Fri, 16 Oct 2020 09:25:45 -0400
Subject: NFSv4.2: support EXCHGID4_FLAG_SUPP_FENCE_OPS 4.2 EXCHANGE_ID flag

RFC 7862 introduced a new flag that either client or server is
allowed to set: EXCHGID4_FLAG_SUPP_FENCE_OPS.

Client needs to update its bitmask to allow for this flag value.

v2: changed minor version argument to unsigned int

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
CC: <stable@vger.kernel.org>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c         | 9 ++++++---
 include/uapi/linux/nfs4.h | 3 +++
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 2e33995691f5..9e0ca9b2b210 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -8133,9 +8133,11 @@ int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name,
  * both PNFS and NON_PNFS flags set, and not having one of NON_PNFS, PNFS, or
  * DS flags set.
  */
-static int nfs4_check_cl_exchange_flags(u32 flags)
+static int nfs4_check_cl_exchange_flags(u32 flags, u32 version)
 {
-	if (flags & ~EXCHGID4_FLAG_MASK_R)
+	if (version >= 2 && (flags & ~EXCHGID4_2_FLAG_MASK_R))
+		goto out_inval;
+	else if (version < 2 && (flags & ~EXCHGID4_FLAG_MASK_R))
 		goto out_inval;
 	if ((flags & EXCHGID4_FLAG_USE_PNFS_MDS) &&
 	    (flags & EXCHGID4_FLAG_USE_NON_PNFS))
@@ -8548,7 +8550,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cre
 	if (status  != 0)
 		goto out;
 
-	status = nfs4_check_cl_exchange_flags(resp->flags);
+	status = nfs4_check_cl_exchange_flags(resp->flags,
+			clp->cl_mvops->minor_version);
 	if (status  != 0)
 		goto out;
 
diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h
index bf197e99b98f..ed5415e0f1c1 100644
--- a/include/uapi/linux/nfs4.h
+++ b/include/uapi/linux/nfs4.h
@@ -139,6 +139,8 @@
 
 #define EXCHGID4_FLAG_UPD_CONFIRMED_REC_A	0x40000000
 #define EXCHGID4_FLAG_CONFIRMED_R		0x80000000
+
+#define EXCHGID4_FLAG_SUPP_FENCE_OPS		0x00000004
 /*
  * Since the validity of these bits depends on whether
  * they're set in the argument or response, have separate
@@ -146,6 +148,7 @@
  */
 #define EXCHGID4_FLAG_MASK_A			0x40070103
 #define EXCHGID4_FLAG_MASK_R			0x80070103
+#define EXCHGID4_2_FLAG_MASK_R			0x80070107
 
 #define SEQ4_STATUS_CB_PATH_DOWN		0x00000001
 #define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING	0x00000002
-- 
cgit v1.2.3


From c6e5f02b5281a3166a9b7b4d66830cc234421ba5 Mon Sep 17 00:00:00 2001
From: "Saheed O. Bolarinwa" <refactormyself@gmail.com>
Date: Thu, 15 Oct 2020 14:30:31 -0500
Subject: PCI/ASPM: Remove struct aspm_register_info.support

Previously we stored the "ASPM Support" field from the Link Capabilities
register in the struct aspm_register_info.

Read the Link Capabilities directly when needed and remove it from the
struct aspm_register_info.  No functional change intended.

[bhelgaas: remove pci_dev cached copy since LNKCAP isn't truly read-only,
add PCI_EXP_LNKCAP_ASPM_L0S & PCI_EXP_LNKCAP_ASPM_L1, check them directly
instead of adding aspm_support()]
Link: https://lore.kernel.org/r/20201015193039.12585-5-helgaas@kernel.org
Signed-off-by: Saheed O. Bolarinwa <refactormyself@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pcie/aspm.c       | 25 ++++++++++++++-----------
 include/uapi/linux/pci_regs.h |  2 ++
 2 files changed, 16 insertions(+), 11 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 0725511cbeb5..82ce34e2ef53 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -381,7 +381,6 @@ static void encode_l12_threshold(u32 threshold_us, u32 *scale, u32 *value)
 }
 
 struct aspm_register_info {
-	u32 support:2;
 	u32 enabled:2;
 	u32 latency_encoding_l0s;
 	u32 latency_encoding_l1;
@@ -400,7 +399,6 @@ static void pcie_get_aspm_reg(struct pci_dev *pdev,
 	u32 reg32;
 
 	pcie_capability_read_dword(pdev, PCI_EXP_LNKCAP, &reg32);
-	info->support = (reg32 & PCI_EXP_LNKCAP_ASPMS) >> 10;
 	info->latency_encoding_l0s = (reg32 & PCI_EXP_LNKCAP_L0SEL) >> 12;
 	info->latency_encoding_l1  = (reg32 & PCI_EXP_LNKCAP_L1EL) >> 15;
 	pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &reg16);
@@ -550,6 +548,7 @@ static void aspm_calc_l1ss_info(struct pcie_link_state *link,
 static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
 {
 	struct pci_dev *child = link->downstream, *parent = link->pdev;
+	u32 parent_lnkcap, child_lnkcap;
 	struct pci_bus *linkbus = parent->subordinate;
 	struct aspm_register_info upreg, dwreg;
 
@@ -560,24 +559,26 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
 		return;
 	}
 
-	/* Get upstream/downstream components' register state */
-	pcie_get_aspm_reg(parent, &upreg);
-	pcie_get_aspm_reg(child, &dwreg);
-
 	/*
 	 * If ASPM not supported, don't mess with the clocks and link,
 	 * bail out now.
 	 */
-	if (!(upreg.support & dwreg.support))
+	pcie_capability_read_dword(parent, PCI_EXP_LNKCAP, &parent_lnkcap);
+	pcie_capability_read_dword(child, PCI_EXP_LNKCAP, &child_lnkcap);
+	if (!(parent_lnkcap & child_lnkcap & PCI_EXP_LNKCAP_ASPMS))
 		return;
 
 	/* Configure common clock before checking latencies */
 	pcie_aspm_configure_common_clock(link);
 
 	/*
-	 * Re-read upstream/downstream components' register state
-	 * after clock configuration
+	 * Re-read upstream/downstream components' register state after
+	 * clock configuration.  L0s & L1 exit latencies in the otherwise
+	 * read-only Link Capabilities may change depending on common clock
+	 * configuration (PCIe r5.0, sec 7.5.3.6).
 	 */
+	pcie_capability_read_dword(parent, PCI_EXP_LNKCAP, &parent_lnkcap);
+	pcie_capability_read_dword(child, PCI_EXP_LNKCAP, &child_lnkcap);
 	pcie_get_aspm_reg(parent, &upreg);
 	pcie_get_aspm_reg(child, &dwreg);
 
@@ -588,8 +589,9 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
 	 * given link unless components on both sides of the link each
 	 * support L0s.
 	 */
-	if (dwreg.support & upreg.support & PCIE_LINK_STATE_L0S)
+	if (parent_lnkcap & child_lnkcap & PCI_EXP_LNKCAP_ASPM_L0S)
 		link->aspm_support |= ASPM_STATE_L0S;
+
 	if (dwreg.enabled & PCIE_LINK_STATE_L0S)
 		link->aspm_enabled |= ASPM_STATE_L0S_UP;
 	if (upreg.enabled & PCIE_LINK_STATE_L0S)
@@ -598,8 +600,9 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
 	link->latency_dw.l0s = calc_l0s_latency(dwreg.latency_encoding_l0s);
 
 	/* Setup L1 state */
-	if (upreg.support & dwreg.support & PCIE_LINK_STATE_L1)
+	if (parent_lnkcap & child_lnkcap & PCI_EXP_LNKCAP_ASPM_L1)
 		link->aspm_support |= ASPM_STATE_L1;
+
 	if (upreg.enabled & dwreg.enabled & PCIE_LINK_STATE_L1)
 		link->aspm_enabled |= ASPM_STATE_L1;
 	link->latency_up.l1 = calc_l1_latency(upreg.latency_encoding_l1);
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index f9701410d3b5..06846ec2e071 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -532,6 +532,8 @@
 #define  PCI_EXP_LNKCAP_SLS_32_0GB 0x00000005 /* LNKCAP2 SLS Vector bit 4 */
 #define  PCI_EXP_LNKCAP_MLW	0x000003f0 /* Maximum Link Width */
 #define  PCI_EXP_LNKCAP_ASPMS	0x00000c00 /* ASPM Support */
+#define  PCI_EXP_LNKCAP_ASPM_L0S 0x00000400 /* ASPM L0s Support */
+#define  PCI_EXP_LNKCAP_ASPM_L1  0x00000800 /* ASPM L1 Support */
 #define  PCI_EXP_LNKCAP_L0SEL	0x00007000 /* L0s Exit Latency */
 #define  PCI_EXP_LNKCAP_L1EL	0x00038000 /* L1 Exit Latency */
 #define  PCI_EXP_LNKCAP_CLKPM	0x00040000 /* Clock Power Management */
-- 
cgit v1.2.3


From df8f10587d3d11b055d54138994a1a9a681da0c4 Mon Sep 17 00:00:00 2001
From: "Saheed O. Bolarinwa" <refactormyself@gmail.com>
Date: Thu, 15 Oct 2020 14:30:39 -0500
Subject: PCI/ASPM: Remove struct pcie_link_state.l1ss

Previously we computed L1.2 parameters in the enumeration path, saved them
in struct pcie_link_state.l1ss, and programmed them into the devices
whenever we enabled or disabled L1.2 on the link.  But these parameters are
constant and don't need to be updated when enabling/disabling L1.2.

Compute and program the L1.2 parameters once during enumeration and remove
the struct pcie_link_state.l1ss member.  No functional change intended.

[bhelgaas: rework to program L1.2 parameters during enumeration]
Link: https://lore.kernel.org/r/20201015193039.12585-13-helgaas@kernel.org
Signed-off-by: Saheed O. Bolarinwa <refactormyself@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pcie/aspm.c       | 84 +++++++++++++++++++++++++------------------
 include/uapi/linux/pci_regs.h |  1 +
 2 files changed, 50 insertions(+), 35 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index d76f23908d67..ac0557a305af 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -74,12 +74,6 @@ struct pcie_link_state {
 	 * has one slot under it, so at most there are 8 functions.
 	 */
 	struct aspm_latency acceptable[8];
-
-	/* L1 PM Substate info */
-	struct {
-		u32 ctl1;		/* value to be programmed in ctl1 */
-		u32 ctl2;		/* value to be programmed in ctl2 */
-	} l1ss;
 };
 
 static int aspm_disabled, aspm_force;
@@ -461,8 +455,9 @@ static void aspm_calc_l1ss_info(struct pcie_link_state *link,
 	struct pci_dev *child = link->downstream, *parent = link->pdev;
 	u32 val1, val2, scale1, scale2;
 	u32 t_common_mode, t_power_on, l1_2_threshold, scale, value;
-
-	link->l1ss.ctl1 = link->l1ss.ctl2 = 0;
+	u32 ctl1 = 0, ctl2 = 0;
+	u32 pctl1, pctl2, cctl1, cctl2;
+	u32 pl1_2_enables, cl1_2_enables;
 
 	if (!(link->aspm_support & ASPM_STATE_L1_2_MASK))
 		return;
@@ -480,10 +475,10 @@ static void aspm_calc_l1ss_info(struct pcie_link_state *link,
 
 	if (calc_l1ss_pwron(parent, scale1, val1) >
 	    calc_l1ss_pwron(child, scale2, val2)) {
-		link->l1ss.ctl2 |= scale1 | (val1 << 3);
+		ctl2 |= scale1 | (val1 << 3);
 		t_power_on = calc_l1ss_pwron(parent, scale1, val1);
 	} else {
-		link->l1ss.ctl2 |= scale2 | (val2 << 3);
+		ctl2 |= scale2 | (val2 << 3);
 		t_power_on = calc_l1ss_pwron(child, scale2, val2);
 	}
 
@@ -499,7 +494,50 @@ static void aspm_calc_l1ss_info(struct pcie_link_state *link,
 	 */
 	l1_2_threshold = 2 + 4 + t_common_mode + t_power_on;
 	encode_l12_threshold(l1_2_threshold, &scale, &value);
-	link->l1ss.ctl1 |= t_common_mode << 8 | scale << 29 | value << 16;
+	ctl1 |= t_common_mode << 8 | scale << 29 | value << 16;
+
+	pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, &pctl1);
+	pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CTL2, &pctl2);
+	pci_read_config_dword(child, child->l1ss + PCI_L1SS_CTL1, &cctl1);
+	pci_read_config_dword(child, child->l1ss + PCI_L1SS_CTL2, &cctl2);
+
+	if (ctl1 == pctl1 && ctl1 == cctl1 &&
+	    ctl2 == pctl2 && ctl2 == cctl2)
+		return;
+
+	/* Disable L1.2 while updating.  See PCIe r5.0, sec 5.5.4, 7.8.3.3 */
+	pl1_2_enables = pctl1 & PCI_L1SS_CTL1_L1_2_MASK;
+	cl1_2_enables = cctl1 & PCI_L1SS_CTL1_L1_2_MASK;
+
+	if (pl1_2_enables || cl1_2_enables) {
+		pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1,
+					PCI_L1SS_CTL1_L1_2_MASK, 0);
+		pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
+					PCI_L1SS_CTL1_L1_2_MASK, 0);
+	}
+
+	/* Program T_POWER_ON times in both ports */
+	pci_write_config_dword(parent, parent->l1ss + PCI_L1SS_CTL2, ctl2);
+	pci_write_config_dword(child, child->l1ss + PCI_L1SS_CTL2, ctl2);
+
+	/* Program Common_Mode_Restore_Time in upstream device */
+	pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
+				PCI_L1SS_CTL1_CM_RESTORE_TIME, ctl1);
+
+	/* Program LTR_L1.2_THRESHOLD time in both ports */
+	pci_clear_and_set_dword(parent,	parent->l1ss + PCI_L1SS_CTL1,
+				PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
+				PCI_L1SS_CTL1_LTR_L12_TH_SCALE, ctl1);
+	pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1,
+				PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
+				PCI_L1SS_CTL1_LTR_L12_TH_SCALE, ctl1);
+
+	if (pl1_2_enables || cl1_2_enables) {
+		pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1, 0,
+					pl1_2_enables);
+		pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1, 0,
+					cl1_2_enables);
+	}
 }
 
 static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
@@ -679,30 +717,6 @@ static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state)
 						   PCI_EXP_LNKCTL_ASPM_L1, 0);
 	}
 
-	if (enable_req & ASPM_STATE_L1_2_MASK) {
-
-		/* Program T_POWER_ON times in both ports */
-		pci_write_config_dword(parent, parent->l1ss + PCI_L1SS_CTL2,
-				       link->l1ss.ctl2);
-		pci_write_config_dword(child, child->l1ss + PCI_L1SS_CTL2,
-				       link->l1ss.ctl2);
-
-		/* Program Common_Mode_Restore_Time in upstream device */
-		pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
-					PCI_L1SS_CTL1_CM_RESTORE_TIME,
-					link->l1ss.ctl1);
-
-		/* Program LTR_L1.2_THRESHOLD time in both ports */
-		pci_clear_and_set_dword(parent,	parent->l1ss + PCI_L1SS_CTL1,
-					PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
-					PCI_L1SS_CTL1_LTR_L12_TH_SCALE,
-					link->l1ss.ctl1);
-		pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1,
-					PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
-					PCI_L1SS_CTL1_LTR_L12_TH_SCALE,
-					link->l1ss.ctl1);
-	}
-
 	val = 0;
 	if (state & ASPM_STATE_L1_1)
 		val |= PCI_L1SS_CTL1_ASPM_L1_1;
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 06846ec2e071..c7e0acba0e20 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -1058,6 +1058,7 @@
 #define  PCI_L1SS_CTL1_PCIPM_L1_1	0x00000002  /* PCI-PM L1.1 Enable */
 #define  PCI_L1SS_CTL1_ASPM_L1_2	0x00000004  /* ASPM L1.2 Enable */
 #define  PCI_L1SS_CTL1_ASPM_L1_1	0x00000008  /* ASPM L1.1 Enable */
+#define  PCI_L1SS_CTL1_L1_2_MASK	0x00000005
 #define  PCI_L1SS_CTL1_L1SS_MASK	0x0000000f
 #define  PCI_L1SS_CTL1_CM_RESTORE_TIME	0x0000ff00  /* Common_Mode_Restore_Time */
 #define  PCI_L1SS_CTL1_LTR_L12_TH_VALUE	0x03ff0000  /* LTR_L1.2_THRESHOLD_Value */
-- 
cgit v1.2.3


From e0d696d201dd5d31813787d9b61a42fc459eee89 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Thu, 15 Oct 2020 20:42:18 -0300
Subject: RDMA/rxe: Move the definitions for rxe_av.network_type to uAPI

RXE was wrongly using an internal kernel enum as part of its uAPI, split
this out into a dedicated uAPI enum just for RXE. It only uses the IPv4
and IPv6 values.

This was exposed by changing the internal kernel enum definition which
broke RXE.

Fixes: 1c15b4f2a42f ("RDMA/core: Modify enum ib_gid_type and enum rdma_network_type")
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_net.c | 8 ++++----
 include/uapi/rdma/rdma_user_rxe.h   | 6 ++++++
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 31b93e7e1e2f..575e1a4ec821 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -133,14 +133,14 @@ static struct dst_entry *rxe_find_route(struct net_device *ndev,
 		if (dst)
 			dst_release(dst);
 
-		if (av->network_type == RDMA_NETWORK_IPV4) {
+		if (av->network_type == RXE_NETWORK_TYPE_IPV4) {
 			struct in_addr *saddr;
 			struct in_addr *daddr;
 
 			saddr = &av->sgid_addr._sockaddr_in.sin_addr;
 			daddr = &av->dgid_addr._sockaddr_in.sin_addr;
 			dst = rxe_find_route4(ndev, saddr, daddr);
-		} else if (av->network_type == RDMA_NETWORK_IPV6) {
+		} else if (av->network_type == RXE_NETWORK_TYPE_IPV6) {
 			struct in6_addr *saddr6;
 			struct in6_addr *daddr6;
 
@@ -442,7 +442,7 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
 	if (IS_ERR(attr))
 		return NULL;
 
-	if (av->network_type == RDMA_NETWORK_IPV4)
+	if (av->network_type == RXE_NETWORK_TYPE_IPV6)
 		hdr_len = ETH_HLEN + sizeof(struct udphdr) +
 			sizeof(struct iphdr);
 	else
@@ -469,7 +469,7 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
 	skb->dev	= ndev;
 	rcu_read_unlock();
 
-	if (av->network_type == RDMA_NETWORK_IPV4)
+	if (av->network_type == RXE_NETWORK_TYPE_IPV4)
 		skb->protocol = htons(ETH_P_IP);
 	else
 		skb->protocol = htons(ETH_P_IPV6);
diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h
index d8f2e0e46dab..e591d8c1f3cf 100644
--- a/include/uapi/rdma/rdma_user_rxe.h
+++ b/include/uapi/rdma/rdma_user_rxe.h
@@ -39,6 +39,11 @@
 #include <linux/in.h>
 #include <linux/in6.h>
 
+enum {
+	RXE_NETWORK_TYPE_IPV4 = 1,
+	RXE_NETWORK_TYPE_IPV6 = 2,
+};
+
 union rxe_gid {
 	__u8	raw[16];
 	struct {
@@ -57,6 +62,7 @@ struct rxe_global_route {
 
 struct rxe_av {
 	__u8			port_num;
+	/* From RXE_NETWORK_TYPE_* */
 	__u8			network_type;
 	__u8			dmac[6];
 	struct rxe_global_route	grh;
-- 
cgit v1.2.3


From ecb8ac8b1f146915aa6b96449b66dd48984caacc Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Sat, 17 Oct 2020 16:14:59 -0700
Subject: mm/madvise: introduce process_madvise() syscall: an external memory
 hinting API

There is usecase that System Management Software(SMS) want to give a
memory hint like MADV_[COLD|PAGEEOUT] to other processes and in the
case of Android, it is the ActivityManagerService.

The information required to make the reclaim decision is not known to the
app.  Instead, it is known to the centralized userspace
daemon(ActivityManagerService), and that daemon must be able to initiate
reclaim on its own without any app involvement.

To solve the issue, this patch introduces a new syscall
process_madvise(2).  It uses pidfd of an external process to give the
hint.  It also supports vector address range because Android app has
thousands of vmas due to zygote so it's totally waste of CPU and power if
we should call the syscall one by one for each vma.(With testing 2000-vma
syscall vs 1-vector syscall, it showed 15% performance improvement.  I
think it would be bigger in real practice because the testing ran very
cache friendly environment).

Another potential use case for the vector range is to amortize the cost
ofTLB shootdowns for multiple ranges when using MADV_DONTNEED; this could
benefit users like TCP receive zerocopy and malloc implementations.  In
future, we could find more usecases for other advises so let's make it
happens as API since we introduce a new syscall at this moment.  With
that, existing madvise(2) user could replace it with process_madvise(2)
with their own pid if they want to have batch address ranges support
feature.

ince it could affect other process's address range, only privileged
process(PTRACE_MODE_ATTACH_FSCREDS) or something else(e.g., being the same
UID) gives it the right to ptrace the process could use it successfully.
The flag argument is reserved for future use if we need to extend the API.

I think supporting all hints madvise has/will supported/support to
process_madvise is rather risky.  Because we are not sure all hints make
sense from external process and implementation for the hint may rely on
the caller being in the current context so it could be error-prone.  Thus,
I just limited hints as MADV_[COLD|PAGEOUT] in this patch.

If someone want to add other hints, we could hear the usecase and review
it for each hint.  It's safer for maintenance rather than introducing a
buggy syscall but hard to fix it later.

So finally, the API is as follows,

      ssize_t process_madvise(int pidfd, const struct iovec *iovec,
                unsigned long vlen, int advice, unsigned int flags);

    DESCRIPTION
      The process_madvise() system call is used to give advice or directions
      to the kernel about the address ranges from external process as well as
      local process. It provides the advice to address ranges of process
      described by iovec and vlen. The goal of such advice is to improve
      system or application performance.

      The pidfd selects the process referred to by the PID file descriptor
      specified in pidfd. (See pidofd_open(2) for further information)

      The pointer iovec points to an array of iovec structures, defined in
      <sys/uio.h> as:

        struct iovec {
            void *iov_base;         /* starting address */
            size_t iov_len;         /* number of bytes to be advised */
        };

      The iovec describes address ranges beginning at address(iov_base)
      and with size length of bytes(iov_len).

      The vlen represents the number of elements in iovec.

      The advice is indicated in the advice argument, which is one of the
      following at this moment if the target process specified by pidfd is
      external.

        MADV_COLD
        MADV_PAGEOUT

      Permission to provide a hint to external process is governed by a
      ptrace access mode PTRACE_MODE_ATTACH_FSCREDS check; see ptrace(2).

      The process_madvise supports every advice madvise(2) has if target
      process is in same thread group with calling process so user could
      use process_madvise(2) to extend existing madvise(2) to support
      vector address ranges.

    RETURN VALUE
      On success, process_madvise() returns the number of bytes advised.
      This return value may be less than the total number of requested
      bytes, if an error occurred. The caller should check return value
      to determine whether a partial advice occurred.

FAQ:

Q.1 - Why does any external entity have better knowledge?

Quote from Sandeep

"For Android, every application (including the special SystemServer)
are forked from Zygote.  The reason of course is to share as many
libraries and classes between the two as possible to benefit from the
preloading during boot.

After applications start, (almost) all of the APIs end up calling into
this SystemServer process over IPC (binder) and back to the
application.

In a fully running system, the SystemServer monitors every single
process periodically to calculate their PSS / RSS and also decides
which process is "important" to the user for interactivity.

So, because of how these processes start _and_ the fact that the
SystemServer is looping to monitor each process, it does tend to *know*
which address range of the application is not used / useful.

Besides, we can never rely on applications to clean things up
themselves.  We've had the "hey app1, the system is low on memory,
please trim your memory usage down" notifications for a long time[1].
They rely on applications honoring the broadcasts and very few do.

So, if we want to avoid the inevitable killing of the application and
restarting it, some way to be able to tell the OS about unimportant
memory in these applications will be useful.

- ssp

Q.2 - How to guarantee the race(i.e., object validation) between when
giving a hint from an external process and get the hint from the target
process?

process_madvise operates on the target process's address space as it
exists at the instant that process_madvise is called.  If the space
target process can run between the time the process_madvise process
inspects the target process address space and the time that
process_madvise is actually called, process_madvise may operate on
memory regions that the calling process does not expect.  It's the
responsibility of the process calling process_madvise to close this
race condition.  For example, the calling process can suspend the
target process with ptrace, SIGSTOP, or the freezer cgroup so that it
doesn't have an opportunity to change its own address space before
process_madvise is called.  Another option is to operate on memory
regions that the caller knows a priori will be unchanged in the target
process.  Yet another option is to accept the race for certain
process_madvise calls after reasoning that mistargeting will do no
harm.  The suggested API itself does not provide synchronization.  It
also apply other APIs like move_pages, process_vm_write.

The race isn't really a problem though.  Why is it so wrong to require
that callers do their own synchronization in some manner?  Nobody
objects to write(2) merely because it's possible for two processes to
open the same file and clobber each other's writes --- instead, we tell
people to use flock or something.  Think about mmap.  It never
guarantees newly allocated address space is still valid when the user
tries to access it because other threads could unmap the memory right
before.  That's where we need synchronization by using other API or
design from userside.  It shouldn't be part of API itself.  If someone
needs more fine-grained synchronization rather than process level,
there were two ideas suggested - cookie[2] and anon-fd[3].  Both are
applicable via using last reserved argument of the API but I don't
think it's necessary right now since we have already ways to prevent
the race so don't want to add additional complexity with more
fine-grained optimization model.

To make the API extend, it reserved an unsigned long as last argument
so we could support it in future if someone really needs it.

Q.3 - Why doesn't ptrace work?

Injecting an madvise in the target process using ptrace would not work
for us because such injected madvise would have to be executed by the
target process, which means that process would have to be runnable and
that creates the risk of the abovementioned race and hinting a wrong
VMA.  Furthermore, we want to act the hint in caller's context, not the
callee's, because the callee is usually limited in cpuset/cgroups or
even freezed state so they can't act by themselves quick enough, which
causes more thrashing/kill.  It doesn't work if the target process are
ptraced(e.g., strace, debugger, minidump) because a process can have at
most one ptracer.

[1] https://developer.android.com/topic/performance/memory"

[2] process_getinfo for getting the cookie which is updated whenever
    vma of process address layout are changed - Daniel Colascione -
    https://lore.kernel.org/lkml/20190520035254.57579-1-minchan@kernel.org/T/#m7694416fd179b2066a2c62b5b139b14e3894e224

[3] anonymous fd which is used for the object(i.e., address range)
    validation - Michal Hocko -
    https://lore.kernel.org/lkml/20200120112722.GY18451@dhcp22.suse.cz/

[minchan@kernel.org: fix process_madvise build break for arm64]
  Link: http://lkml.kernel.org/r/20200303145756.GA219683@google.com
[minchan@kernel.org: fix build error for mips of process_madvise]
  Link: http://lkml.kernel.org/r/20200508052517.GA197378@google.com
[akpm@linux-foundation.org: fix patch ordering issue]
[akpm@linux-foundation.org: fix arm64 whoops]
[minchan@kernel.org: make process_madvise() vlen arg have type size_t, per Florian]
[akpm@linux-foundation.org: fix i386 build]
[sfr@canb.auug.org.au: fix syscall numbering]
  Link: https://lkml.kernel.org/r/20200905142639.49fc3f1a@canb.auug.org.au
[sfr@canb.auug.org.au: madvise.c needs compat.h]
  Link: https://lkml.kernel.org/r/20200908204547.285646b4@canb.auug.org.au
[minchan@kernel.org: fix mips build]
  Link: https://lkml.kernel.org/r/20200909173655.GC2435453@google.com
[yuehaibing@huawei.com: remove duplicate header which is included twice]
  Link: https://lkml.kernel.org/r/20200915121550.30584-1-yuehaibing@huawei.com
[minchan@kernel.org: do not use helper functions for process_madvise]
  Link: https://lkml.kernel.org/r/20200921175539.GB387368@google.com
[akpm@linux-foundation.org: pidfd_get_pid() gained an argument]
[sfr@canb.auug.org.au: fix up for "iov_iter: transparently handle compat iovecs in import_iovec"]
  Link: https://lkml.kernel.org/r/20200928212542.468e1fef@canb.auug.org.au

Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Cc: Brian Geffon <bgeffon@google.com>
Cc: Christian Brauner <christian@brauner.io>
Cc: Daniel Colascione <dancol@google.com>
Cc: Jann Horn <jannh@google.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: John Dias <joaodias@google.com>
Cc: Kirill Tkhai <ktkhai@virtuozzo.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oleksandr Natalenko <oleksandr@redhat.com>
Cc: Sandeep Patil <sspatil@google.com>
Cc: SeongJae Park <sj38.park@gmail.com>
Cc: SeongJae Park <sjpark@amazon.de>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Sonny Rao <sonnyrao@google.com>
Cc: Tim Murray <timmurray@google.com>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Florian Weimer <fw@deneb.enyo.de>
Cc: <linux-man@vger.kernel.org>
Link: http://lkml.kernel.org/r/20200302193630.68771-3-minchan@kernel.org
Link: http://lkml.kernel.org/r/20200508183320.GA125527@google.com
Link: http://lkml.kernel.org/r/20200622192900.22757-4-minchan@kernel.org
Link: https://lkml.kernel.org/r/20200901000633.1920247-4-minchan@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/syscalls/syscall.tbl      |  1 +
 arch/arm/tools/syscall.tbl                  |  1 +
 arch/arm64/include/asm/unistd.h             |  2 +-
 arch/arm64/include/asm/unistd32.h           |  2 +
 arch/ia64/kernel/syscalls/syscall.tbl       |  1 +
 arch/m68k/kernel/syscalls/syscall.tbl       |  1 +
 arch/microblaze/kernel/syscalls/syscall.tbl |  1 +
 arch/mips/kernel/syscalls/syscall_n32.tbl   |  1 +
 arch/mips/kernel/syscalls/syscall_n64.tbl   |  1 +
 arch/mips/kernel/syscalls/syscall_o32.tbl   |  1 +
 arch/parisc/kernel/syscalls/syscall.tbl     |  1 +
 arch/powerpc/kernel/syscalls/syscall.tbl    |  1 +
 arch/s390/kernel/syscalls/syscall.tbl       |  1 +
 arch/sh/kernel/syscalls/syscall.tbl         |  1 +
 arch/sparc/kernel/syscalls/syscall.tbl      |  1 +
 arch/x86/entry/syscalls/syscall_32.tbl      |  1 +
 arch/x86/entry/syscalls/syscall_64.tbl      |  1 +
 arch/xtensa/kernel/syscalls/syscall.tbl     |  1 +
 include/linux/syscalls.h                    |  2 +
 include/uapi/asm-generic/unistd.h           |  4 +-
 kernel/sys_ni.c                             |  1 +
 mm/madvise.c                                | 93 ++++++++++++++++++++++++++++-
 22 files changed, 117 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index ec8bed9e7b75..ee7b01bb7346 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -479,3 +479,4 @@
 547	common	openat2				sys_openat2
 548	common	pidfd_getfd			sys_pidfd_getfd
 549	common	faccessat2			sys_faccessat2
+550	common	process_madvise			sys_process_madvise
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 171077cbf419..d056a548358e 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -453,3 +453,4 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 3b859596840d..b3b2019f8d16 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -38,7 +38,7 @@
 #define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE + 5)
 #define __ARM_NR_COMPAT_END		(__ARM_NR_COMPAT_BASE + 0x800)
 
-#define __NR_compat_syscalls		440
+#define __NR_compat_syscalls		441
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 2a3ad9b9accd..107f08e03b9f 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -887,6 +887,8 @@ __SYSCALL(__NR_openat2, sys_openat2)
 __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
 #define __NR_faccessat2 439
 __SYSCALL(__NR_faccessat2, sys_faccessat2)
+#define __NR_process_madvise 440
+__SYSCALL(__NR_process_madvise, sys_process_madvise)
 
 /*
  * Please add new compat syscalls above this comment and update
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index 4799c96c325f..b96ed8b8a508 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -360,3 +360,4 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 81fc799d8392..625fb6d32842 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -439,3 +439,4 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index b4e263916f41..aae729c95cf9 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -445,3 +445,4 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index cf72a0206a87..32817c954435 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -378,3 +378,4 @@
 437	n32	openat2				sys_openat2
 438	n32	pidfd_getfd			sys_pidfd_getfd
 439	n32	faccessat2			sys_faccessat2
+440	n32	process_madvise			sys_process_madvise
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index 557f9954a2b9..9e4ea3c31b1c 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -354,3 +354,4 @@
 437	n64	openat2				sys_openat2
 438	n64	pidfd_getfd			sys_pidfd_getfd
 439	n64	faccessat2			sys_faccessat2
+440	n64	process_madvise			sys_process_madvise
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index a17aab5abeb2..29f5f28cf5ce 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -427,3 +427,4 @@
 437	o32	openat2				sys_openat2
 438	o32	pidfd_getfd			sys_pidfd_getfd
 439	o32	faccessat2			sys_faccessat2
+440	o32	process_madvise			sys_process_madvise
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index ae3dab371f6f..38c63e5404bc 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -437,3 +437,4 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 9d7fb4ced290..1275daec7fec 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -529,3 +529,4 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 1c3b48165e86..28c168000483 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -442,3 +442,4 @@
 437  common	openat2			sys_openat2			sys_openat2
 438  common	pidfd_getfd		sys_pidfd_getfd			sys_pidfd_getfd
 439  common	faccessat2		sys_faccessat2			sys_faccessat2
+440  common	process_madvise		sys_process_madvise		sys_process_madvise
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index ae0a00beea5f..783738448ff5 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -442,3 +442,4 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index 37ec52b34c73..78160260991b 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -485,3 +485,4 @@
 437	common	openat2			sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 9b6931f8d555..0d0667a9fbd7 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -444,3 +444,4 @@
 437	i386	openat2			sys_openat2
 438	i386	pidfd_getfd		sys_pidfd_getfd
 439	i386	faccessat2		sys_faccessat2
+440	i386	process_madvise		sys_process_madvise
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 347809649ba2..1f47e24fb65c 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -361,6 +361,7 @@
 437	common	openat2			sys_openat2
 438	common	pidfd_getfd		sys_pidfd_getfd
 439	common	faccessat2		sys_faccessat2
+440	common	process_madvise		sys_process_madvise
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 6276e3c2d3fc..b070f272995d 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -410,3 +410,4 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 06db09875aa4..2eda7678fe1d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -879,6 +879,8 @@ asmlinkage long sys_munlockall(void);
 asmlinkage long sys_mincore(unsigned long start, size_t len,
 				unsigned char __user * vec);
 asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior);
+asmlinkage long sys_process_madvise(int pidfd, const struct iovec __user *vec,
+			size_t vlen, int behavior, unsigned int flags);
 asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
 			unsigned long prot, unsigned long pgoff,
 			unsigned long flags);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index f2b5d72a46c2..2056318988f7 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -857,9 +857,11 @@ __SYSCALL(__NR_openat2, sys_openat2)
 __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
 #define __NR_faccessat2 439
 __SYSCALL(__NR_faccessat2, sys_faccessat2)
+#define __NR_process_madvise 440
+__SYSCALL(__NR_process_madvise, sys_process_madvise)
 
 #undef __NR_syscalls
-#define __NR_syscalls 440
+#define __NR_syscalls 441
 
 /*
  * 32 bit systems traditionally used different
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index c925d1e1777e..f27ac94d5fa7 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -280,6 +280,7 @@ COND_SYSCALL(mlockall);
 COND_SYSCALL(munlockall);
 COND_SYSCALL(mincore);
 COND_SYSCALL(madvise);
+COND_SYSCALL(process_madvise);
 COND_SYSCALL(remap_file_pages);
 COND_SYSCALL(mbind);
 COND_SYSCALL_COMPAT(mbind);
diff --git a/mm/madvise.c b/mm/madvise.c
index d550ef045288..416a56b8e757 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -17,6 +17,8 @@
 #include <linux/falloc.h>
 #include <linux/fadvise.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/uio.h>
 #include <linux/ksm.h>
 #include <linux/fs.h>
 #include <linux/file.h>
@@ -27,7 +29,6 @@
 #include <linux/swapops.h>
 #include <linux/shmem_fs.h>
 #include <linux/mmu_notifier.h>
-#include <linux/sched/mm.h>
 
 #include <asm/tlb.h>
 
@@ -988,6 +989,18 @@ madvise_behavior_valid(int behavior)
 	}
 }
 
+static bool
+process_madvise_behavior_valid(int behavior)
+{
+	switch (behavior) {
+	case MADV_COLD:
+	case MADV_PAGEOUT:
+		return true;
+	default:
+		return false;
+	}
+}
+
 /*
  * The madvise(2) system call.
  *
@@ -1035,6 +1048,11 @@ madvise_behavior_valid(int behavior)
  *  MADV_DONTDUMP - the application wants to prevent pages in the given range
  *		from being included in its core dump.
  *  MADV_DODUMP - cancel MADV_DONTDUMP: no longer exclude from core dump.
+ *  MADV_COLD - the application is not expected to use this memory soon,
+ *		deactivate pages in this range so that they can be reclaimed
+ *		easily if memory pressure hanppens.
+ *  MADV_PAGEOUT - the application is not expected to use this memory soon,
+ *		page out the pages in this range immediately.
  *
  * return values:
  *  zero    - success
@@ -1151,3 +1169,76 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
 {
 	return do_madvise(current->mm, start, len_in, behavior);
 }
+
+SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
+		size_t, vlen, int, behavior, unsigned int, flags)
+{
+	ssize_t ret;
+	struct iovec iovstack[UIO_FASTIOV], iovec;
+	struct iovec *iov = iovstack;
+	struct iov_iter iter;
+	struct pid *pid;
+	struct task_struct *task;
+	struct mm_struct *mm;
+	size_t total_len;
+	unsigned int f_flags;
+
+	if (flags != 0) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
+	if (ret < 0)
+		goto out;
+
+	pid = pidfd_get_pid(pidfd, &f_flags);
+	if (IS_ERR(pid)) {
+		ret = PTR_ERR(pid);
+		goto free_iov;
+	}
+
+	task = get_pid_task(pid, PIDTYPE_PID);
+	if (!task) {
+		ret = -ESRCH;
+		goto put_pid;
+	}
+
+	if (task->mm != current->mm &&
+			!process_madvise_behavior_valid(behavior)) {
+		ret = -EINVAL;
+		goto release_task;
+	}
+
+	mm = mm_access(task, PTRACE_MODE_ATTACH_FSCREDS);
+	if (IS_ERR_OR_NULL(mm)) {
+		ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
+		goto release_task;
+	}
+
+	total_len = iov_iter_count(&iter);
+
+	while (iov_iter_count(&iter)) {
+		iovec = iov_iter_iovec(&iter);
+		ret = do_madvise(mm, (unsigned long)iovec.iov_base,
+					iovec.iov_len, behavior);
+		if (ret < 0)
+			break;
+		iov_iter_advance(&iter, iovec.iov_len);
+	}
+
+	if (ret == 0)
+		ret = total_len - iov_iter_count(&iter);
+
+	mmput(mm);
+	return ret;
+
+release_task:
+	put_task_struct(task);
+put_pid:
+	put_pid(pid);
+free_iov:
+	kfree(iov);
+out:
+	return ret;
+}
-- 
cgit v1.2.3


From f3d301c1f2f5676465cdf3259737ea19cc82731f Mon Sep 17 00:00:00 2001
From: Al Grant <al.grant@foss.arm.com>
Date: Mon, 21 Sep 2020 21:46:37 +0100
Subject: perf: correct SNOOPX field offset

perf_event.h has macros that define the field offsets in the
data_src bitmask in perf records. The SNOOPX and REMOTE offsets
were both 37. These are distinct fields, and the bitfield layout
in perf_mem_data_src confirms that SNOOPX should be at offset 38.

Fixes: 52839e653b5629bd ("perf tools: Add support for printing new mem_info encodings")
Signed-off-by: Al Grant <al.grant@foss.arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Link: https://lkml.kernel.org/r/4ac9f5cc-4388-b34a-9999-418a4099415d@foss.arm.com
---
 include/uapi/linux/perf_event.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 077e7ee69e3d..b95d3c485d27 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -1196,7 +1196,7 @@ union perf_mem_data_src {
 
 #define PERF_MEM_SNOOPX_FWD	0x01 /* forward */
 /* 1 free */
-#define PERF_MEM_SNOOPX_SHIFT	37
+#define PERF_MEM_SNOOPX_SHIFT  38
 
 /* locked instruction */
 #define PERF_MEM_LOCK_NA	0x01 /* not available */
-- 
cgit v1.2.3


From 47170f89f76b28c55f359b4766da41d6b91d9ffe Mon Sep 17 00:00:00 2001
From: Matteo Franchin <matteo.franchin@arm.com>
Date: Mon, 12 Oct 2020 17:40:43 +0100
Subject: drm/fourcc: Add AXBXGXRX106106106106 format

Add ABGR format with 10-bit components packed in 64-bit per pixel.
This format can be used to handle
VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16 on little-endian
architectures.

Signed-off-by: Matteo Franchin <matteo.franchin@arm.com>
Reviewed-by: Brian Starkey <brian.starkey@arm.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201012164043.23630-1-matteo.franchin@arm.com
---
 drivers/gpu/drm/drm_fourcc.c  | 1 +
 include/uapi/drm/drm_fourcc.h | 6 ++++++
 2 files changed, 7 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/drm_fourcc.c b/drivers/gpu/drm/drm_fourcc.c
index 722c7ebe4e88..03262472059c 100644
--- a/drivers/gpu/drm/drm_fourcc.c
+++ b/drivers/gpu/drm/drm_fourcc.c
@@ -202,6 +202,7 @@ const struct drm_format_info *__drm_format_info(u32 format)
 		{ .format = DRM_FORMAT_XBGR16161616F,	.depth = 0,  .num_planes = 1, .cpp = { 8, 0, 0 }, .hsub = 1, .vsub = 1 },
 		{ .format = DRM_FORMAT_ARGB16161616F,	.depth = 0,  .num_planes = 1, .cpp = { 8, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true },
 		{ .format = DRM_FORMAT_ABGR16161616F,	.depth = 0,  .num_planes = 1, .cpp = { 8, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true },
+		{ .format = DRM_FORMAT_AXBXGXRX106106106106, .depth = 0, .num_planes = 1, .cpp = { 8, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true },
 		{ .format = DRM_FORMAT_RGB888_A8,	.depth = 32, .num_planes = 2, .cpp = { 3, 1, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true },
 		{ .format = DRM_FORMAT_BGR888_A8,	.depth = 32, .num_planes = 2, .cpp = { 3, 1, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true },
 		{ .format = DRM_FORMAT_XRGB8888_A8,	.depth = 32, .num_planes = 2, .cpp = { 4, 1, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true },
diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index 6f0628eb13a6..d720f1e8ae5e 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -179,6 +179,12 @@ extern "C" {
 #define DRM_FORMAT_ARGB16161616F fourcc_code('A', 'R', '4', 'H') /* [63:0] A:R:G:B 16:16:16:16 little endian */
 #define DRM_FORMAT_ABGR16161616F fourcc_code('A', 'B', '4', 'H') /* [63:0] A:B:G:R 16:16:16:16 little endian */
 
+/*
+ * RGBA format with 10-bit components packed in 64-bit per pixel, with 6 bits
+ * of unused padding per component:
+ */
+#define DRM_FORMAT_AXBXGXRX106106106106 fourcc_code('A', 'B', '1', '0') /* [63:0] A:x:B:x:G:x:R:x 10:6:10:6:10:6:10:6 little endian */
+
 /* packed YCbCr */
 #define DRM_FORMAT_YUYV		fourcc_code('Y', 'U', 'Y', 'V') /* [31:0] Cr0:Y1:Cb0:Y0 8:8:8:8 little endian */
 #define DRM_FORMAT_YVYU		fourcc_code('Y', 'V', 'Y', 'U') /* [31:0] Cb0:Y1:Cr0:Y0 8:8:8:8 little endian */
-- 
cgit v1.2.3


From 66570e966dd9cb4fd57811d0056c6472a14a2c41 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Tue, 18 Aug 2020 15:24:28 +0000
Subject: kvm: x86: only provide PV features if enabled in guest's CPUID

KVM unconditionally provides PV features to the guest, regardless of the
configured CPUID. An unwitting guest that doesn't check
KVM_CPUID_FEATURES before use could access paravirt features that
userspace did not intend to provide. Fix this by checking the guest's
CPUID before performing any paravirtual operations.

Introduce a capability, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, to gate the
aforementioned enforcement. Migrating a VM from a host w/o this patch to
a host with this patch could silently change the ABI exposed to the
guest, warranting that we default to the old behavior and opt-in for
the new one.

Reviewed-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Peter Shier <pshier@google.com>
Signed-off-by: Oliver Upton <oupton@google.com>
Change-Id: I202a0926f65035b872bfe8ad15307c026de59a98
Message-Id: <20200818152429.1923996-4-oupton@google.com>
Reviewed-by: Wanpeng Li <wanpengli@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/api.rst  | 11 +++++++
 arch/x86/include/asm/kvm_host.h | 15 +++++++++
 arch/x86/kvm/cpuid.c            |  7 +++++
 arch/x86/kvm/cpuid.h            | 10 ++++++
 arch/x86/kvm/x86.c              | 67 ++++++++++++++++++++++++++++++++++++++---
 include/uapi/linux/kvm.h        |  1 +
 6 files changed, 106 insertions(+), 5 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 9ece9a827a58..76317221d29f 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6380,3 +6380,14 @@ ranges that KVM should reject access to.
 In combination with KVM_CAP_X86_USER_SPACE_MSR, this allows user space to
 trap and emulate MSRs that are outside of the scope of KVM as well as
 limit the attack surface on KVM's MSR emulation code.
+
+
+8.26 KVM_CAP_ENFORCE_PV_CPUID
+-----------------------------
+
+Architectures: x86
+
+When enabled, KVM will disable paravirtual features provided to the
+guest according to the bits in the KVM_CPUID_FEATURES CPUID leaf
+(0x40000001). Otherwise, a guest may use the paravirtual features
+regardless of what has actually been exposed through the CPUID leaf.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d0f77235da92..15e51343957e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -789,6 +789,21 @@ struct kvm_vcpu_arch {
 
 	/* AMD MSRC001_0015 Hardware Configuration */
 	u64 msr_hwcr;
+
+	/* pv related cpuid info */
+	struct {
+		/*
+		 * value of the eax register in the KVM_CPUID_FEATURES CPUID
+		 * leaf.
+		 */
+		u32 features;
+
+		/*
+		 * indicates whether pv emulation should be disabled if features
+		 * are not present in the guest's cpuid
+		 */
+		bool enforce;
+	} pv_cpuid;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 37c3668a774f..d253c023ee76 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -107,6 +107,13 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
 		(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
 		best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
 
+	/*
+	 * save the feature bitmap to avoid cpuid lookup for every PV
+	 * operation
+	 */
+	if (best)
+		vcpu->arch.pv_cpuid.features = best->eax;
+
 	if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
 		best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
 		if (best)
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 1d2c4f2e4bb6..bf8577947ed2 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -5,6 +5,7 @@
 #include "x86.h"
 #include <asm/cpu.h>
 #include <asm/processor.h>
+#include <uapi/asm/kvm_para.h>
 
 extern u32 kvm_cpu_caps[NCAPINTS] __read_mostly;
 void kvm_set_cpu_caps(void);
@@ -313,4 +314,13 @@ static inline bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
 	return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
 }
 
+static __always_inline bool guest_pv_has(struct kvm_vcpu *vcpu,
+					 unsigned int kvm_feature)
+{
+	if (!vcpu->arch.pv_cpuid.enforce)
+		return true;
+
+	return vcpu->arch.pv_cpuid.features & (1u << kvm_feature);
+}
+
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b928e092da03..ca940de53e18 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2877,6 +2877,14 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
 	if (data & 0x30)
 		return 1;
 
+	if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_VMEXIT) &&
+	    (data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT))
+		return 1;
+
+	if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT) &&
+	    (data & KVM_ASYNC_PF_DELIVERY_AS_INT))
+		return 1;
+
 	if (!lapic_in_kernel(vcpu))
 		return data ? 1 : 0;
 
@@ -2954,10 +2962,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
 	 * Doing a TLB flush here, on the guest's behalf, can avoid
 	 * expensive IPIs.
 	 */
-	trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
-		st->preempted & KVM_VCPU_FLUSH_TLB);
-	if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
-		kvm_vcpu_flush_tlb_guest(vcpu);
+	if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
+		trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
+				       st->preempted & KVM_VCPU_FLUSH_TLB);
+		if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
+			kvm_vcpu_flush_tlb_guest(vcpu);
+	}
 
 	vcpu->arch.st.preempted = 0;
 
@@ -3118,30 +3128,54 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		vcpu->arch.smi_count = data;
 		break;
 	case MSR_KVM_WALL_CLOCK_NEW:
+		if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
+			return 1;
+
+		kvm_write_wall_clock(vcpu->kvm, data);
+		break;
 	case MSR_KVM_WALL_CLOCK:
+		if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
+			return 1;
+
 		kvm_write_wall_clock(vcpu->kvm, data);
 		break;
 	case MSR_KVM_SYSTEM_TIME_NEW:
+		if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
+			return 1;
+
 		kvm_write_system_time(vcpu, data, false, msr_info->host_initiated);
 		break;
 	case MSR_KVM_SYSTEM_TIME:
-		kvm_write_system_time(vcpu, data, true, msr_info->host_initiated);
+		if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
+			return 1;
+
+		kvm_write_system_time(vcpu, data, true,  msr_info->host_initiated);
 		break;
 	case MSR_KVM_ASYNC_PF_EN:
+		if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
+			return 1;
+
 		if (kvm_pv_enable_async_pf(vcpu, data))
 			return 1;
 		break;
 	case MSR_KVM_ASYNC_PF_INT:
+		if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
+			return 1;
+
 		if (kvm_pv_enable_async_pf_int(vcpu, data))
 			return 1;
 		break;
 	case MSR_KVM_ASYNC_PF_ACK:
+		if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
+			return 1;
 		if (data & 0x1) {
 			vcpu->arch.apf.pageready_pending = false;
 			kvm_check_async_pf_completion(vcpu);
 		}
 		break;
 	case MSR_KVM_STEAL_TIME:
+		if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME))
+			return 1;
 
 		if (unlikely(!sched_info_on()))
 			return 1;
@@ -3158,11 +3192,17 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
 		break;
 	case MSR_KVM_PV_EOI_EN:
+		if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
+			return 1;
+
 		if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8)))
 			return 1;
 		break;
 
 	case MSR_KVM_POLL_CONTROL:
+		if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL))
+			return 1;
+
 		/* only enable bit supported */
 		if (data & (-1ULL << 1))
 			return 1;
@@ -3658,6 +3698,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_LAST_CPU:
 	case KVM_CAP_X86_USER_SPACE_MSR:
 	case KVM_CAP_X86_MSR_FILTER:
+	case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
 		r = 1;
 		break;
 	case KVM_CAP_SYNC_REGS:
@@ -4528,6 +4569,11 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 
 		return kvm_x86_ops.enable_direct_tlbflush(vcpu);
 
+	case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
+		vcpu->arch.pv_cpuid.enforce = cap->args[0];
+
+		return 0;
+
 	default:
 		return -EINVAL;
 	}
@@ -8000,11 +8046,16 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 		goto out;
 	}
 
+	ret = -KVM_ENOSYS;
+
 	switch (nr) {
 	case KVM_HC_VAPIC_POLL_IRQ:
 		ret = 0;
 		break;
 	case KVM_HC_KICK_CPU:
+		if (!guest_pv_has(vcpu, KVM_FEATURE_PV_UNHALT))
+			break;
+
 		kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
 		kvm_sched_yield(vcpu->kvm, a1);
 		ret = 0;
@@ -8015,9 +8066,15 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 		break;
 #endif
 	case KVM_HC_SEND_IPI:
+		if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SEND_IPI))
+			break;
+
 		ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
 		break;
 	case KVM_HC_SCHED_YIELD:
+		if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED_YIELD))
+			break;
+
 		kvm_sched_yield(vcpu->kvm, a0);
 		ret = 0;
 		break;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 58f43aa1fc21..ca41220b40b8 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1052,6 +1052,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_STEAL_TIME 187
 #define KVM_CAP_X86_USER_SPACE_MSR 188
 #define KVM_CAP_X86_MSR_FILTER 189
+#define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From ba452c9e996d8a4c347b32805f91abb70de5de7e Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Tue, 20 Oct 2020 23:25:56 +0200
Subject: bpf: Fix bpf_redirect_neigh helper api to support supplying nexthop
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Based on the discussion in [0], update the bpf_redirect_neigh() helper to
accept an optional parameter specifying the nexthop information. This makes
it possible to combine bpf_fib_lookup() and bpf_redirect_neigh() without
incurring a duplicate FIB lookup - since the FIB lookup helper will return
the nexthop information even if no neighbour is present, this can simply
be passed on to bpf_redirect_neigh() if bpf_fib_lookup() returns
BPF_FIB_LKUP_RET_NO_NEIGH. Thus fix & extend it before helper API is frozen.

  [0] https://lore.kernel.org/bpf/393e17fc-d187-3a8d-2f0d-a627c7c63fca@iogearbox.net/

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/bpf/160322915615.32199.1187570224032024535.stgit@toke.dk
---
 include/linux/filter.h         |   9 +++
 include/uapi/linux/bpf.h       |  22 ++++--
 net/core/filter.c              | 158 ++++++++++++++++++++++++++---------------
 scripts/bpf_helpers_doc.py     |   1 +
 tools/include/uapi/linux/bpf.h |  22 ++++--
 5 files changed, 145 insertions(+), 67 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 20fc24c9779a..72d62cbc1578 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -607,12 +607,21 @@ struct bpf_skb_data_end {
 	void *data_end;
 };
 
+struct bpf_nh_params {
+	u32 nh_family;
+	union {
+		u32 ipv4_nh;
+		struct in6_addr ipv6_nh;
+	};
+};
+
 struct bpf_redirect_info {
 	u32 flags;
 	u32 tgt_index;
 	void *tgt_value;
 	struct bpf_map *map;
 	u32 kern_flags;
+	struct bpf_nh_params nh;
 };
 
 DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index bf5a99d803e4..e6ceac3f7d62 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3677,15 +3677,19 @@ union bpf_attr {
  * 	Return
  * 		The id is returned or 0 in case the id could not be retrieved.
  *
- * long bpf_redirect_neigh(u32 ifindex, u64 flags)
+ * long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags)
  * 	Description
  * 		Redirect the packet to another net device of index *ifindex*
  * 		and fill in L2 addresses from neighboring subsystem. This helper
  * 		is somewhat similar to **bpf_redirect**\ (), except that it
  * 		populates L2 addresses as well, meaning, internally, the helper
- * 		performs a FIB lookup based on the skb's networking header to
- * 		get the address of the next hop and then relies on the neighbor
- * 		lookup for the L2 address of the nexthop.
+ * 		relies on the neighbor lookup for the L2 address of the nexthop.
+ *
+ * 		The helper will perform a FIB lookup based on the skb's
+ * 		networking header to get the address of the next hop, unless
+ * 		this is supplied by the caller in the *params* argument. The
+ * 		*plen* argument indicates the len of *params* and should be set
+ * 		to 0 if *params* is NULL.
  *
  * 		The *flags* argument is reserved and must be 0. The helper is
  * 		currently only supported for tc BPF program types, and enabled
@@ -4906,6 +4910,16 @@ struct bpf_fib_lookup {
 	__u8	dmac[6];     /* ETH_ALEN */
 };
 
+struct bpf_redir_neigh {
+	/* network family for lookup (AF_INET, AF_INET6) */
+	__u32 nh_family;
+	/* network address of nexthop; skips fib lookup to find gateway */
+	union {
+		__be32		ipv4_nh;
+		__u32		ipv6_nh[4];  /* in6_addr; network order */
+	};
+};
+
 enum bpf_task_fd_type {
 	BPF_FD_TYPE_RAW_TRACEPOINT,	/* tp name */
 	BPF_FD_TYPE_TRACEPOINT,		/* tp name */
diff --git a/net/core/filter.c b/net/core/filter.c
index c5e2a1c5fd8d..6d0fa65a4a46 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2165,12 +2165,12 @@ static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
+static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
+			    struct net_device *dev, struct bpf_nh_params *nh)
 {
-	struct dst_entry *dst = skb_dst(skb);
-	struct net_device *dev = dst->dev;
 	u32 hh_len = LL_RESERVED_SPACE(dev);
 	const struct in6_addr *nexthop;
+	struct dst_entry *dst = NULL;
 	struct neighbour *neigh;
 
 	if (dev_xmit_recursion()) {
@@ -2196,8 +2196,13 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
 	}
 
 	rcu_read_lock_bh();
-	nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
-			      &ipv6_hdr(skb)->daddr);
+	if (!nh) {
+		dst = skb_dst(skb);
+		nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
+				      &ipv6_hdr(skb)->daddr);
+	} else {
+		nexthop = &nh->ipv6_nh;
+	}
 	neigh = ip_neigh_gw6(dev, nexthop);
 	if (likely(!IS_ERR(neigh))) {
 		int ret;
@@ -2210,36 +2215,43 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
 		return ret;
 	}
 	rcu_read_unlock_bh();
-	IP6_INC_STATS(dev_net(dst->dev),
-		      ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+	if (dst)
+		IP6_INC_STATS(dev_net(dst->dev),
+			      ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 out_drop:
 	kfree_skb(skb);
 	return -ENETDOWN;
 }
 
-static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
+static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
+				   struct bpf_nh_params *nh)
 {
 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
 	struct net *net = dev_net(dev);
 	int err, ret = NET_XMIT_DROP;
-	struct dst_entry *dst;
-	struct flowi6 fl6 = {
-		.flowi6_flags	= FLOWI_FLAG_ANYSRC,
-		.flowi6_mark	= skb->mark,
-		.flowlabel	= ip6_flowinfo(ip6h),
-		.flowi6_oif	= dev->ifindex,
-		.flowi6_proto	= ip6h->nexthdr,
-		.daddr		= ip6h->daddr,
-		.saddr		= ip6h->saddr,
-	};
 
-	dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
-	if (IS_ERR(dst))
-		goto out_drop;
+	if (!nh) {
+		struct dst_entry *dst;
+		struct flowi6 fl6 = {
+			.flowi6_flags = FLOWI_FLAG_ANYSRC,
+			.flowi6_mark  = skb->mark,
+			.flowlabel    = ip6_flowinfo(ip6h),
+			.flowi6_oif   = dev->ifindex,
+			.flowi6_proto = ip6h->nexthdr,
+			.daddr	      = ip6h->daddr,
+			.saddr	      = ip6h->saddr,
+		};
+
+		dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
+		if (IS_ERR(dst))
+			goto out_drop;
 
-	skb_dst_set(skb, dst);
+		skb_dst_set(skb, dst);
+	} else if (nh->nh_family != AF_INET6) {
+		goto out_drop;
+	}
 
-	err = bpf_out_neigh_v6(net, skb);
+	err = bpf_out_neigh_v6(net, skb, dev, nh);
 	if (unlikely(net_xmit_eval(err)))
 		dev->stats.tx_errors++;
 	else
@@ -2252,7 +2264,8 @@ out_xmit:
 	return ret;
 }
 #else
-static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
+static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
+				   struct bpf_nh_params *nh)
 {
 	kfree_skb(skb);
 	return NET_XMIT_DROP;
@@ -2260,11 +2273,9 @@ static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
 #endif /* CONFIG_IPV6 */
 
 #if IS_ENABLED(CONFIG_INET)
-static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb)
+static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
+			    struct net_device *dev, struct bpf_nh_params *nh)
 {
-	struct dst_entry *dst = skb_dst(skb);
-	struct rtable *rt = container_of(dst, struct rtable, dst);
-	struct net_device *dev = dst->dev;
 	u32 hh_len = LL_RESERVED_SPACE(dev);
 	struct neighbour *neigh;
 	bool is_v6gw = false;
@@ -2292,7 +2303,21 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb)
 	}
 
 	rcu_read_lock_bh();
-	neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
+	if (!nh) {
+		struct dst_entry *dst = skb_dst(skb);
+		struct rtable *rt = container_of(dst, struct rtable, dst);
+
+		neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
+	} else if (nh->nh_family == AF_INET6) {
+		neigh = ip_neigh_gw6(dev, &nh->ipv6_nh);
+		is_v6gw = true;
+	} else if (nh->nh_family == AF_INET) {
+		neigh = ip_neigh_gw4(dev, nh->ipv4_nh);
+	} else {
+		rcu_read_unlock_bh();
+		goto out_drop;
+	}
+
 	if (likely(!IS_ERR(neigh))) {
 		int ret;
 
@@ -2309,33 +2334,37 @@ out_drop:
 	return -ENETDOWN;
 }
 
-static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev)
+static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
+				   struct bpf_nh_params *nh)
 {
 	const struct iphdr *ip4h = ip_hdr(skb);
 	struct net *net = dev_net(dev);
 	int err, ret = NET_XMIT_DROP;
-	struct rtable *rt;
-	struct flowi4 fl4 = {
-		.flowi4_flags	= FLOWI_FLAG_ANYSRC,
-		.flowi4_mark	= skb->mark,
-		.flowi4_tos	= RT_TOS(ip4h->tos),
-		.flowi4_oif	= dev->ifindex,
-		.flowi4_proto	= ip4h->protocol,
-		.daddr		= ip4h->daddr,
-		.saddr		= ip4h->saddr,
-	};
 
-	rt = ip_route_output_flow(net, &fl4, NULL);
-	if (IS_ERR(rt))
-		goto out_drop;
-	if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
-		ip_rt_put(rt);
-		goto out_drop;
-	}
+	if (!nh) {
+		struct flowi4 fl4 = {
+			.flowi4_flags = FLOWI_FLAG_ANYSRC,
+			.flowi4_mark  = skb->mark,
+			.flowi4_tos   = RT_TOS(ip4h->tos),
+			.flowi4_oif   = dev->ifindex,
+			.flowi4_proto = ip4h->protocol,
+			.daddr	      = ip4h->daddr,
+			.saddr	      = ip4h->saddr,
+		};
+		struct rtable *rt;
+
+		rt = ip_route_output_flow(net, &fl4, NULL);
+		if (IS_ERR(rt))
+			goto out_drop;
+		if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
+			ip_rt_put(rt);
+			goto out_drop;
+		}
 
-	skb_dst_set(skb, &rt->dst);
+		skb_dst_set(skb, &rt->dst);
+	}
 
-	err = bpf_out_neigh_v4(net, skb);
+	err = bpf_out_neigh_v4(net, skb, dev, nh);
 	if (unlikely(net_xmit_eval(err)))
 		dev->stats.tx_errors++;
 	else
@@ -2348,14 +2377,16 @@ out_xmit:
 	return ret;
 }
 #else
-static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev)
+static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
+				   struct bpf_nh_params *nh)
 {
 	kfree_skb(skb);
 	return NET_XMIT_DROP;
 }
 #endif /* CONFIG_INET */
 
-static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev)
+static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev,
+				struct bpf_nh_params *nh)
 {
 	struct ethhdr *ethh = eth_hdr(skb);
 
@@ -2370,9 +2401,9 @@ static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev)
 	skb_reset_network_header(skb);
 
 	if (skb->protocol == htons(ETH_P_IP))
-		return __bpf_redirect_neigh_v4(skb, dev);
+		return __bpf_redirect_neigh_v4(skb, dev, nh);
 	else if (skb->protocol == htons(ETH_P_IPV6))
-		return __bpf_redirect_neigh_v6(skb, dev);
+		return __bpf_redirect_neigh_v6(skb, dev, nh);
 out:
 	kfree_skb(skb);
 	return -ENOTSUPP;
@@ -2382,7 +2413,8 @@ out:
 enum {
 	BPF_F_NEIGH	= (1ULL << 1),
 	BPF_F_PEER	= (1ULL << 2),
-#define BPF_F_REDIRECT_INTERNAL	(BPF_F_NEIGH | BPF_F_PEER)
+	BPF_F_NEXTHOP	= (1ULL << 3),
+#define BPF_F_REDIRECT_INTERNAL	(BPF_F_NEIGH | BPF_F_PEER | BPF_F_NEXTHOP)
 };
 
 BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
@@ -2455,7 +2487,8 @@ int skb_do_redirect(struct sk_buff *skb)
 		return -EAGAIN;
 	}
 	return flags & BPF_F_NEIGH ?
-	       __bpf_redirect_neigh(skb, dev) :
+	       __bpf_redirect_neigh(skb, dev, flags & BPF_F_NEXTHOP ?
+				    &ri->nh : NULL) :
 	       __bpf_redirect(skb, dev, flags);
 out_drop:
 	kfree_skb(skb);
@@ -2504,16 +2537,21 @@ static const struct bpf_func_proto bpf_redirect_peer_proto = {
 	.arg2_type      = ARG_ANYTHING,
 };
 
-BPF_CALL_2(bpf_redirect_neigh, u32, ifindex, u64, flags)
+BPF_CALL_4(bpf_redirect_neigh, u32, ifindex, struct bpf_redir_neigh *, params,
+	   int, plen, u64, flags)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 
-	if (unlikely(flags))
+	if (unlikely((plen && plen < sizeof(*params)) || flags))
 		return TC_ACT_SHOT;
 
-	ri->flags = BPF_F_NEIGH;
+	ri->flags = BPF_F_NEIGH | (plen ? BPF_F_NEXTHOP : 0);
 	ri->tgt_index = ifindex;
 
+	BUILD_BUG_ON(sizeof(struct bpf_redir_neigh) != sizeof(struct bpf_nh_params));
+	if (plen)
+		memcpy(&ri->nh, params, sizeof(ri->nh));
+
 	return TC_ACT_REDIRECT;
 }
 
@@ -2522,7 +2560,9 @@ static const struct bpf_func_proto bpf_redirect_neigh_proto = {
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_ANYTHING,
-	.arg2_type	= ARG_ANYTHING,
+	.arg2_type      = ARG_PTR_TO_MEM_OR_NULL,
+	.arg3_type      = ARG_CONST_SIZE_OR_ZERO,
+	.arg4_type	= ARG_ANYTHING,
 };
 
 BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes)
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index 7d86fdd190be..6769caae142f 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -453,6 +453,7 @@ class PrinterHelpers(Printer):
             'struct bpf_perf_event_data',
             'struct bpf_perf_event_value',
             'struct bpf_pidns_info',
+            'struct bpf_redir_neigh',
             'struct bpf_sk_lookup',
             'struct bpf_sock',
             'struct bpf_sock_addr',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index bf5a99d803e4..e6ceac3f7d62 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3677,15 +3677,19 @@ union bpf_attr {
  * 	Return
  * 		The id is returned or 0 in case the id could not be retrieved.
  *
- * long bpf_redirect_neigh(u32 ifindex, u64 flags)
+ * long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags)
  * 	Description
  * 		Redirect the packet to another net device of index *ifindex*
  * 		and fill in L2 addresses from neighboring subsystem. This helper
  * 		is somewhat similar to **bpf_redirect**\ (), except that it
  * 		populates L2 addresses as well, meaning, internally, the helper
- * 		performs a FIB lookup based on the skb's networking header to
- * 		get the address of the next hop and then relies on the neighbor
- * 		lookup for the L2 address of the nexthop.
+ * 		relies on the neighbor lookup for the L2 address of the nexthop.
+ *
+ * 		The helper will perform a FIB lookup based on the skb's
+ * 		networking header to get the address of the next hop, unless
+ * 		this is supplied by the caller in the *params* argument. The
+ * 		*plen* argument indicates the len of *params* and should be set
+ * 		to 0 if *params* is NULL.
  *
  * 		The *flags* argument is reserved and must be 0. The helper is
  * 		currently only supported for tc BPF program types, and enabled
@@ -4906,6 +4910,16 @@ struct bpf_fib_lookup {
 	__u8	dmac[6];     /* ETH_ALEN */
 };
 
+struct bpf_redir_neigh {
+	/* network family for lookup (AF_INET, AF_INET6) */
+	__u32 nh_family;
+	/* network address of nexthop; skips fib lookup to find gateway */
+	union {
+		__be32		ipv4_nh;
+		__u32		ipv6_nh[4];  /* in6_addr; network order */
+	};
+};
+
 enum bpf_task_fd_type {
 	BPF_FD_TYPE_RAW_TRACEPOINT,	/* tp name */
 	BPF_FD_TYPE_TRACEPOINT,		/* tp name */
-- 
cgit v1.2.3


From 1b48dc03e575a872404f33b04cd237953c5d7498 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Fri, 23 Oct 2020 17:00:42 +0800
Subject: vhost: vdpa: report iova range

This patch introduces a new ioctl for vhost-vdpa device that can
report the iova range by the device.

For device that implements get_iova_range() method, we fetch it from
the vDPA device. If device doesn't implement get_iova_range() but
depends on platform IOMMU, we will query via DOMAIN_ATTR_GEOMETRY,
otherwise [0, ULLONG_MAX] is assumed.

For safety, this patch also rules out the map request which is not in
the valid range.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Link: https://lore.kernel.org/r/20201023090043.14430-3-jasowang@redhat.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/vdpa.c             | 41 ++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/vhost.h       |  4 ++++
 include/uapi/linux/vhost_types.h |  9 +++++++++
 3 files changed, 54 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index a2dbc85e0b0d..846de69d9c01 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -47,6 +47,7 @@ struct vhost_vdpa {
 	int minor;
 	struct eventfd_ctx *config_ctx;
 	int in_batch;
+	struct vdpa_iova_range range;
 };
 
 static DEFINE_IDA(vhost_vdpa_ida);
@@ -337,6 +338,16 @@ static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp)
 	return 0;
 }
 
+static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp)
+{
+	struct vhost_vdpa_iova_range range = {
+		.first = v->range.first,
+		.last = v->range.last,
+	};
+
+	return copy_to_user(argp, &range, sizeof(range));
+}
+
 static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
 				   void __user *argp)
 {
@@ -471,6 +482,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
 		features = VHOST_VDPA_BACKEND_FEATURES;
 		r = copy_to_user(featurep, &features, sizeof(features));
 		break;
+	case VHOST_VDPA_GET_IOVA_RANGE:
+		r = vhost_vdpa_get_iova_range(v, argp);
+		break;
 	default:
 		r = vhost_dev_ioctl(&v->vdev, cmd, argp);
 		if (r == -ENOIOCTLCMD)
@@ -597,6 +611,10 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
 	long pinned;
 	int ret = 0;
 
+	if (msg->iova < v->range.first ||
+	    msg->iova + msg->size - 1 > v->range.last)
+		return -EINVAL;
+
 	if (vhost_iotlb_itree_first(iotlb, msg->iova,
 				    msg->iova + msg->size - 1))
 		return -EEXIST;
@@ -783,6 +801,27 @@ static void vhost_vdpa_free_domain(struct vhost_vdpa *v)
 	v->domain = NULL;
 }
 
+static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v)
+{
+	struct vdpa_iova_range *range = &v->range;
+	struct iommu_domain_geometry geo;
+	struct vdpa_device *vdpa = v->vdpa;
+	const struct vdpa_config_ops *ops = vdpa->config;
+
+	if (ops->get_iova_range) {
+		*range = ops->get_iova_range(vdpa);
+	} else if (v->domain &&
+		   !iommu_domain_get_attr(v->domain,
+		   DOMAIN_ATTR_GEOMETRY, &geo) &&
+		   geo.force_aperture) {
+		range->first = geo.aperture_start;
+		range->last = geo.aperture_end;
+	} else {
+		range->first = 0;
+		range->last = ULLONG_MAX;
+	}
+}
+
 static int vhost_vdpa_open(struct inode *inode, struct file *filep)
 {
 	struct vhost_vdpa *v;
@@ -823,6 +862,8 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
 	if (r)
 		goto err_init_iotlb;
 
+	vhost_vdpa_set_iova_range(v);
+
 	filep->private_data = v;
 
 	return 0;
diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
index 75232185324a..c998860d7bbc 100644
--- a/include/uapi/linux/vhost.h
+++ b/include/uapi/linux/vhost.h
@@ -146,4 +146,8 @@
 
 /* Set event fd for config interrupt*/
 #define VHOST_VDPA_SET_CONFIG_CALL	_IOW(VHOST_VIRTIO, 0x77, int)
+
+/* Get the valid iova range */
+#define VHOST_VDPA_GET_IOVA_RANGE	_IOR(VHOST_VIRTIO, 0x78, \
+					     struct vhost_vdpa_iova_range)
 #endif
diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h
index 9a269a88a6ff..f7f6a3a28977 100644
--- a/include/uapi/linux/vhost_types.h
+++ b/include/uapi/linux/vhost_types.h
@@ -138,6 +138,15 @@ struct vhost_vdpa_config {
 	__u8 buf[0];
 };
 
+/* vhost vdpa IOVA range
+ * @first: First address that can be mapped by vhost-vDPA
+ * @last: Last address that can be mapped by vhost-vDPA
+ */
+struct vhost_vdpa_iova_range {
+	__u64 first;
+	__u64 last;
+};
+
 /* Feature bits */
 /* Log all write descriptors. Can be changed while device is active. */
 #define VHOST_F_LOG_ALL 26
-- 
cgit v1.2.3


From c45dd3bda1c809eb120452597097e14a96b58c1f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Oct 2020 18:32:58 +0200
Subject: drm/amdgpu: fix some kernel-doc markups
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some functions have different names between their prototypes
and the kernel-doc markup.

Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c       | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 2 +-
 include/uapi/drm/amdgpu_drm.h                | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 27fbe361e300..5b162429920b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2166,7 +2166,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
 
 
 /**
- * amdgpu_vm_bo_insert_mapping - insert a new mapping
+ * amdgpu_vm_bo_insert_map - insert a new mapping
  *
  * @adev: amdgpu_device pointer
  * @bo_va: bo_va to store the address
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index dc73c0c2dbc8..a3dd909f78ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -531,7 +531,7 @@ error_free:
 }
 
 /**
- * amdgpu_vram_mgr_alloc_sgt - allocate and fill a sg table
+ * amdgpu_vram_mgr_free_sgt - allocate and fill a sg table
  *
  * @adev: amdgpu device pointer
  * @sgt: sg table to free
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index f7d7bce7d3b0..7fb9c09ee93f 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -667,7 +667,7 @@ struct drm_amdgpu_cs_chunk_data {
 	};
 };
 
-/**
+/*
  *  Query h/w info: Flag that this is integrated (a.h.a. fusion) GPU
  *
  */
-- 
cgit v1.2.3


From f7b6603c666798a1f8379e692d11d500885f32d8 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Oct 2020 18:33:35 +0200
Subject: ALSA: fix kernel-doc markups

Kernel-doc markups should use this format:
        identifier - description

There is a common comment marked, instead, with kernel-doc
notation.

Some identifiers have different names between their prototypes
and the kernel-doc markup.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/535182d6f55d7a7de293dda9676df68f5f60afc6.1603469755.git.mchehab+huawei@kernel.org
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/core.h                  | 3 ++-
 include/sound/pcm.h                   | 4 ++--
 include/uapi/sound/compress_offload.h | 2 +-
 sound/core/control.c                  | 4 ++--
 sound/core/pcm_dmaengine.c            | 3 ++-
 sound/core/pcm_lib.c                  | 2 +-
 sound/core/pcm_native.c               | 4 ++--
 sound/soc/soc-core.c                  | 2 +-
 sound/soc/soc-dapm.c                  | 2 +-
 9 files changed, 14 insertions(+), 12 deletions(-)

(limited to 'include/uapi')

diff --git a/include/sound/core.h b/include/sound/core.h
index 381a010a1bd4..0462c577d7a3 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -332,7 +332,8 @@ void __snd_printk(unsigned int level, const char *file, int line,
 #define snd_BUG()		WARN(1, "BUG?\n")
 
 /**
- * Suppress high rates of output when CONFIG_SND_DEBUG is enabled.
+ * snd_printd_ratelimit - Suppress high rates of output when
+ * 			  CONFIG_SND_DEBUG is enabled.
  */
 #define snd_printd_ratelimit() printk_ratelimit()
 
diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index 2ba5df2c9e23..2336bf9243e1 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -1284,8 +1284,8 @@ snd_pcm_sgbuf_get_ptr(struct snd_pcm_substream *substream, unsigned int ofs)
 }
 
 /**
- * snd_pcm_sgbuf_chunk_size - Compute the max size that fits within the contig.
- * page from the given size
+ * snd_pcm_sgbuf_get_chunk_size - Compute the max size that fits within the
+ * contig. page from the given size
  * @substream: PCM substream
  * @ofs: byte offset
  * @size: byte size to examine
diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/sound/compress_offload.h
index 7184265c0b0d..9555f31c8425 100644
--- a/include/uapi/sound/compress_offload.h
+++ b/include/uapi/sound/compress_offload.h
@@ -144,7 +144,7 @@ struct snd_compr_metadata {
 	 __u32 value[8];
 } __attribute__((packed, aligned(4)));
 
-/**
+/*
  * compress path ioctl definitions
  * SNDRV_COMPRESS_GET_CAPS: Query capability of DSP
  * SNDRV_COMPRESS_GET_CODEC_CAPS: Query capability of a codec
diff --git a/sound/core/control.c b/sound/core/control.c
index 421ddc76f264..4373de42a5a0 100644
--- a/sound/core/control.c
+++ b/sound/core/control.c
@@ -1925,8 +1925,8 @@ EXPORT_SYMBOL(snd_ctl_unregister_ioctl);
 
 #ifdef CONFIG_COMPAT
 /**
- * snd_ctl_unregister_ioctl - de-register the device-specific compat 32bit
- * control-ioctls
+ * snd_ctl_unregister_ioctl_compat - de-register the device-specific compat
+ * 32bit control-ioctls
  * @fcn: ioctl callback function to unregister
  */
 int snd_ctl_unregister_ioctl_compat(snd_kctl_ioctl_func_t fcn)
diff --git a/sound/core/pcm_dmaengine.c b/sound/core/pcm_dmaengine.c
index 4d059ff2b2e4..4d0e8fe535a1 100644
--- a/sound/core/pcm_dmaengine.c
+++ b/sound/core/pcm_dmaengine.c
@@ -356,7 +356,8 @@ int snd_dmaengine_pcm_close(struct snd_pcm_substream *substream)
 EXPORT_SYMBOL_GPL(snd_dmaengine_pcm_close);
 
 /**
- * snd_dmaengine_pcm_release_chan_close - Close a dmaengine based PCM substream and release channel
+ * snd_dmaengine_pcm_close_release_chan - Close a dmaengine based PCM
+ *					  substream and release channel
  * @substream: PCM substream
  *
  * Releases the DMA channel associated with the PCM substream.
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index d531e1bc2b81..bda3514c7b2d 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -490,7 +490,7 @@ void snd_pcm_set_ops(struct snd_pcm *pcm, int direction,
 EXPORT_SYMBOL(snd_pcm_set_ops);
 
 /**
- * snd_pcm_sync - set the PCM sync id
+ * snd_pcm_set_sync - set the PCM sync id
  * @substream: the pcm substream
  *
  * Sets the PCM sync identifier for the card.
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 9e0b2d73faf6..47b155a49226 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -112,7 +112,7 @@ void snd_pcm_stream_lock(struct snd_pcm_substream *substream)
 EXPORT_SYMBOL_GPL(snd_pcm_stream_lock);
 
 /**
- * snd_pcm_stream_lock - Unlock the PCM stream
+ * snd_pcm_stream_unlock - Unlock the PCM stream
  * @substream: PCM substream
  *
  * This unlocks the PCM stream that has been locked via snd_pcm_stream_lock().
@@ -595,7 +595,7 @@ static void snd_pcm_sync_stop(struct snd_pcm_substream *substream)
 }
 
 /**
- * snd_pcm_hw_param_choose - choose a configuration defined by @params
+ * snd_pcm_hw_params_choose - choose a configuration defined by @params
  * @pcm: PCM instance
  * @params: the hw_params instance
  *
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index ea3986a46c12..05a085f6dc7c 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -2341,7 +2341,7 @@ struct snd_soc_dai *snd_soc_register_dai(struct snd_soc_component *component,
 }
 
 /**
- * snd_soc_unregister_dai - Unregister DAIs from the ASoC core
+ * snd_soc_unregister_dais - Unregister DAIs from the ASoC core
  *
  * @component: The component for which the DAIs should be unregistered
  */
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 980f2c330b87..7f87b449f950 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -1276,7 +1276,7 @@ static int is_connected_input_ep(struct snd_soc_dapm_widget *widget,
 }
 
 /**
- * snd_soc_dapm_get_connected_widgets - query audio path and it's widgets.
+ * snd_soc_dapm_dai_get_connected_widgets - query audio path and it's widgets.
  * @dai: the soc DAI.
  * @stream: stream direction.
  * @list: list of active widgets for this stream.
-- 
cgit v1.2.3


From 2a45a08a5bcfb97a211c5064e81cbbbabbc32e7a Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 20 May 2020 15:53:08 +0530
Subject: asm-generic/sembuf: Update architecture related information in
 comment

The structure came originally from x86_32 but is used by most of the
architectures now. Update the comment which says it is for x86 only.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/64efe033394b6f0dfef043a63fd8897a81ba6d16.1589970173.git.viresh.kumar@linaro.org'
---
 include/uapi/asm-generic/sembuf.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/asm-generic/sembuf.h b/include/uapi/asm-generic/sembuf.h
index 0e709bd3d730..f54e48fc91ae 100644
--- a/include/uapi/asm-generic/sembuf.h
+++ b/include/uapi/asm-generic/sembuf.h
@@ -6,9 +6,9 @@
 #include <asm/ipcbuf.h>
 
 /*
- * The semid64_ds structure for x86 architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
+ * The semid64_ds structure for most architectures (though it came from x86_32
+ * originally). Note extra padding because this structure is passed back and
+ * forth between kernel and user space.
  *
  * semid64_ds was originally meant to be architecture specific, but
  * everyone just ended up making identical copies without specific
-- 
cgit v1.2.3


From 70bb9193728627e84e02eb0960b0aa138ae2cef5 Mon Sep 17 00:00:00 2001
From: Simon Ser <contact@emersion.fr>
Date: Mon, 26 Oct 2020 14:12:30 +0100
Subject: drm: deprecate DRM_FORMAT_MOD_NONE

DRM_FORMAT_MOD_NONE is in the list of vendors, which is pretty
confusing. We already have DRM_FORMAT_MOD_VENDOR_NONE. Move it down in
the list of format modifiers.

DRM_FORMAT_MOD_NONE is an alias for DRM_FORMAT_MOD_LINEAR, however the
name is confusing: NONE doesn't mean that the modifier is implicit,
instead it means that the layout is linear. Deprecate it.

Signed-off-by: Simon Ser <contact@emersion.fr>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Pekka Paalanen <pekka.paalanen@collabora.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/a2j8KTgc26k5QniSAhDSTgCw4XWZhmsNHwG8UVa6U@cp4-web-014.plabs.ch
---
 include/uapi/drm/drm_fourcc.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index d720f1e8ae5e..9f7e19c9416c 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -350,7 +350,6 @@ extern "C" {
  */
 
 /* Vendor Ids: */
-#define DRM_FORMAT_MOD_NONE           0
 #define DRM_FORMAT_MOD_VENDOR_NONE    0
 #define DRM_FORMAT_MOD_VENDOR_INTEL   0x01
 #define DRM_FORMAT_MOD_VENDOR_AMD     0x02
@@ -422,6 +421,16 @@ extern "C" {
  */
 #define DRM_FORMAT_MOD_LINEAR	fourcc_mod_code(NONE, 0)
 
+/*
+ * Deprecated: use DRM_FORMAT_MOD_LINEAR instead
+ *
+ * The "none" format modifier doesn't actually mean that the modifier is
+ * implicit, instead it means that the layout is linear. Whether modifiers are
+ * used is out-of-band information carried in an API-specific way (e.g. in a
+ * flag for drm_mode_fb_cmd2).
+ */
+#define DRM_FORMAT_MOD_NONE	0
+
 /* Intel framebuffer modifiers */
 
 /*
-- 
cgit v1.2.3


From 874163aab75a6cd7422e71f1fbc6db12977fcf1d Mon Sep 17 00:00:00 2001
From: Shyam Sundar <ssundar@marvell.com>
Date: Wed, 21 Oct 2020 02:27:11 -0700
Subject: scsi: fc: Update formal FPIN descriptor definitions

Add Fabric Performance Impact Notification (FPIN) descriptor definitions
for the following FPINs:

 - Delivery Notification Descriptor

 - Peer Congestion Notification Descriptor

 - Congestion Notification Descriptor

Link: https://lore.kernel.org/r/20201021092715.22669-2-njavali@marvell.com
Reviewed-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Shyam Sundar <ssundar@marvell.com>
Signed-off-by: Nilesh Javali <njavali@marvell.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/uapi/scsi/fc/fc_els.h | 114 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 113 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/scsi/fc/fc_els.h b/include/uapi/scsi/fc/fc_els.h
index 8c704e510e39..91d4be987220 100644
--- a/include/uapi/scsi/fc/fc_els.h
+++ b/include/uapi/scsi/fc/fc_els.h
@@ -916,7 +916,9 @@ enum fc_els_clid_ic {
 	ELS_CLID_IC_LIP =	8,	/* receiving LIP */
 };
 
-
+/*
+ * Link Integrity event types
+ */
 enum fc_fpin_li_event_types {
 	FPIN_LI_UNKNOWN =		0x0,
 	FPIN_LI_LINK_FAILURE =		0x1,
@@ -943,6 +945,54 @@ enum fc_fpin_li_event_types {
 	{ FPIN_LI_DEVICE_SPEC,		"Device Specific" },		\
 }
 
+/*
+ * Delivery event types
+ */
+enum fc_fpin_deli_event_types {
+	FPIN_DELI_UNKNOWN =		0x0,
+	FPIN_DELI_TIMEOUT =		0x1,
+	FPIN_DELI_UNABLE_TO_ROUTE =	0x2,
+	FPIN_DELI_DEVICE_SPEC =		0xF,
+};
+
+/*
+ * Initializer useful for decoding table.
+ * Please keep this in sync with the above definitions.
+ */
+#define FC_FPIN_DELI_EVT_TYPES_INIT {					\
+	{ FPIN_DELI_UNKNOWN,		"Unknown" },			\
+	{ FPIN_DELI_TIMEOUT,		"Timeout" },			\
+	{ FPIN_DELI_UNABLE_TO_ROUTE,	"Unable to Route" },		\
+	{ FPIN_DELI_DEVICE_SPEC,	"Device Specific" },		\
+}
+
+/*
+ * Congestion event types
+ */
+enum fc_fpin_congn_event_types {
+	FPIN_CONGN_CLEAR =		0x0,
+	FPIN_CONGN_LOST_CREDIT =	0x1,
+	FPIN_CONGN_CREDIT_STALL =	0x2,
+	FPIN_CONGN_OVERSUBSCRIPTION =	0x3,
+	FPIN_CONGN_DEVICE_SPEC =	0xF,
+};
+
+/*
+ * Initializer useful for decoding table.
+ * Please keep this in sync with the above definitions.
+ */
+#define FC_FPIN_CONGN_EVT_TYPES_INIT {					\
+	{ FPIN_CONGN_CLEAR,		"Clear" },			\
+	{ FPIN_CONGN_LOST_CREDIT,	"Lost Credit" },		\
+	{ FPIN_CONGN_CREDIT_STALL,	"Credit Stall" },		\
+	{ FPIN_CONGN_OVERSUBSCRIPTION,	"Oversubscription" },		\
+	{ FPIN_CONGN_DEVICE_SPEC,	"Device Specific" },		\
+}
+
+enum fc_fpin_congn_severity_types {
+	FPIN_CONGN_SEVERITY_WARNING =	0xF1,
+	FPIN_CONGN_SEVERITY_ERROR =	0xF7,
+};
 
 /*
  * Link Integrity Notification Descriptor
@@ -974,6 +1024,68 @@ struct fc_fn_li_desc {
 					 */
 };
 
+/*
+ * Delivery Notification Descriptor
+ */
+struct fc_fn_deli_desc {
+	__be32		desc_tag;	/* Descriptor Tag (0x00020002) */
+	__be32		desc_len;	/* Length of Descriptor (in bytes).
+					 * Size of descriptor excluding
+					 * desc_tag and desc_len fields.
+					 */
+	__be64		detecting_wwpn;	/* Port Name that detected event */
+	__be64		attached_wwpn;	/* Port Name of device attached to
+					 * detecting Port Name
+					 */
+	__be32		deli_reason_code;/* see enum fc_fpin_deli_event_types */
+};
+
+/*
+ * Peer Congestion Notification Descriptor
+ */
+struct fc_fn_peer_congn_desc {
+	__be32		desc_tag;	/* Descriptor Tag (0x00020003) */
+	__be32		desc_len;	/* Length of Descriptor (in bytes).
+					 * Size of descriptor excluding
+					 * desc_tag and desc_len fields.
+					 */
+	__be64		detecting_wwpn;	/* Port Name that detected event */
+	__be64		attached_wwpn;	/* Port Name of device attached to
+					 * detecting Port Name
+					 */
+	__be16		event_type;	/* see enum fc_fpin_congn_event_types */
+	__be16		event_modifier;	/* Implementation specific value
+					 * describing the event type
+					 */
+	__be32		event_period;	/* duration (ms) of the detected
+					 * congestion event
+					 */
+	__be32		pname_count;	/* number of portname_list elements */
+	__be64		pname_list[0];	/* list of N_Port_Names accessible
+					 * through the attached port
+					 */
+};
+
+/*
+ * Congestion Notification Descriptor
+ */
+struct fc_fn_congn_desc {
+	__be32		desc_tag;	/* Descriptor Tag (0x00020004) */
+	__be32		desc_len;	/* Length of Descriptor (in bytes).
+					 * Size of descriptor excluding
+					 * desc_tag and desc_len fields.
+					 */
+	__be16		event_type;	/* see enum fc_fpin_congn_event_types */
+	__be16		event_modifier;	/* Implementation specific value
+					 * describing the event type
+					 */
+	__be32		event_period;	/* duration (ms) of the detected
+					 * congestion event
+					 */
+	__u8		severity;	/* command */
+	__u8		resv[3];	/* reserved - must be zero */
+};
+
 /*
  * ELS_FPIN - Fabric Performance Impact Notification
  */
-- 
cgit v1.2.3


From 26e990badde40b2fb824bfa3cb9d4288a79584bc Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Sat, 3 Oct 2020 20:20:06 -0300
Subject: RDMA: Check attr_mask during modify_qp

Each driver should check that it can support the provided attr_mask during
modify_qp. IB_USER_VERBS_EX_CMD_MODIFY_QP was being used to block
modify_qp_ex because the driver didn't check RATE_LIMIT.

Link: https://lore.kernel.org/r/6-v1-caa70ba3d1ab+1436e-ucmd_mask_jgg@nvidia.com
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/core/device.c             |  1 +
 drivers/infiniband/core/uverbs_cmd.c         |  8 ++------
 drivers/infiniband/hw/bnxt_re/ib_verbs.c     |  3 +++
 drivers/infiniband/hw/cxgb4/qp.c             |  3 +++
 drivers/infiniband/hw/efa/efa_verbs.c        |  3 +++
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c   |  2 ++
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c   |  3 +++
 drivers/infiniband/hw/i40iw/i40iw_verbs.c    |  3 +++
 drivers/infiniband/hw/mlx4/qp.c              |  3 +++
 drivers/infiniband/hw/mlx5/main.c            |  3 +--
 drivers/infiniband/hw/mlx5/qp.c              |  3 +++
 drivers/infiniband/hw/mthca/mthca_qp.c       |  3 +++
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.c  |  3 +++
 drivers/infiniband/hw/qedr/verbs.c           |  3 +++
 drivers/infiniband/hw/usnic/usnic_ib_verbs.c |  3 +++
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c |  3 +++
 drivers/infiniband/sw/rdmavt/qp.c            |  3 +++
 drivers/infiniband/sw/rxe/rxe_verbs.c        |  3 +++
 drivers/infiniband/sw/siw/siw_verbs.c        |  3 +++
 include/rdma/ib_verbs.h                      |  2 ++
 include/uapi/rdma/ib_user_verbs.h            | 14 --------------
 21 files changed, 53 insertions(+), 22 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index efcadbda4409..6d2603571771 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -639,6 +639,7 @@ struct ib_device *_ib_alloc_device(size_t size)
 		BIT_ULL(IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL) |
 		BIT_ULL(IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
 		BIT_ULL(IB_USER_VERBS_EX_CMD_MODIFY_CQ) |
+		BIT_ULL(IB_USER_VERBS_EX_CMD_MODIFY_QP) |
 		BIT_ULL(IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
 		BIT_ULL(IB_USER_VERBS_EX_CMD_QUERY_DEVICE);
 
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index f85a61175772..54c3eb463da8 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1906,8 +1906,7 @@ static int ib_uverbs_modify_qp(struct uverbs_attr_bundle *attrs)
 	if (ret)
 		return ret;
 
-	if (cmd.base.attr_mask &
-	    ~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1))
+	if (cmd.base.attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
 		return -EOPNOTSUPP;
 
 	return modify_qp(attrs, &cmd);
@@ -1929,10 +1928,7 @@ static int ib_uverbs_ex_modify_qp(struct uverbs_attr_bundle *attrs)
 	 * Last bit is reserved for extending the attr_mask by
 	 * using another field.
 	 */
-	BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1ULL << 31));
-
-	if (cmd.base.attr_mask &
-	    ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1))
+	if (cmd.base.attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT))
 		return -EOPNOTSUPP;
 
 	ret = modify_qp(attrs, &cmd);
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index f9c999d5ba28..f3ec6d3fba62 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -1829,6 +1829,9 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
 	unsigned int flags;
 	u8 nw_type;
 
+	if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+		return -EOPNOTSUPP;
+
 	qp->qplib_qp.modify_flags = 0;
 	if (qp_attr_mask & IB_QP_STATE) {
 		curr_qp_state = __to_ib_qp_state(qp->qplib_qp.cur_qp_state);
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index d2b46c5c1645..79e69d449b07 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -2374,6 +2374,9 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 
 	pr_debug("ib_qp %p\n", ibqp);
 
+	if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+		return -EOPNOTSUPP;
+
 	/* iwarp does not support the RTR state */
 	if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
 		attr_mask &= ~IB_QP_STATE;
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index 191e0843f090..e3d9a5a5f4d9 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -917,6 +917,9 @@ int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 	enum ib_qp_state new_state;
 	int err;
 
+	if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+		return -EOPNOTSUPP;
+
 	if (udata->inlen &&
 	    !ib_is_udata_cleared(udata, 0, udata->inlen)) {
 		ibdev_dbg(&dev->ibdev,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index b3d5ba8ef439..f18380f827dd 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -3256,6 +3256,8 @@ static int hns_roce_v1_modify_qp(struct ib_qp *ibqp,
 				 enum ib_qp_state cur_state,
 				 enum ib_qp_state new_state)
 {
+	if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+		return -EOPNOTSUPP;
 
 	if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI)
 		return hns_roce_v1_m_sqp(ibqp, attr, attr_mask, cur_state,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 6d30850696c5..a0b679254a8e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -4757,6 +4757,9 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
 	unsigned long rq_flag = 0;
 	int ret;
 
+	if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+		return -EOPNOTSUPP;
+
 	/*
 	 * In v2 engine, software pass context and context mask to hardware
 	 * when modifying qp. If software need modify some fields in context,
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 76f7d8bb3b60..acc5e945d303 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -855,6 +855,9 @@ int i40iw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	u32 err;
 	unsigned long flags;
 
+	if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+		return -EOPNOTSUPP;
+
 	memset(&info, 0, sizeof(info));
 	ctx_info = &iwqp->ctx_info;
 	iwarp_info = &iwqp->iwarp_info;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 5cb8e602294c..8834629615bc 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -2787,6 +2787,9 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	struct mlx4_ib_qp *mqp = to_mqp(ibqp);
 	int ret;
 
+	if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+		return -EOPNOTSUPP;
+
 	ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata);
 
 	if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index ab469bc835dc..b9a12a1d1c5c 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -4144,8 +4144,7 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
 		(1ull << IB_USER_VERBS_CMD_DESTROY_AH);
 	dev->ib_dev.uverbs_ex_cmd_mask |=
 		(1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ)	|
-		(1ull << IB_USER_VERBS_EX_CMD_CREATE_QP)	|
-		(1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP);
+		(1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
 
 	if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) &&
 	    IS_ENABLED(CONFIG_MLX5_CORE_IPOIB))
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 600e056798c0..19361132336c 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -4247,6 +4247,9 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	int err = -EINVAL;
 	int port;
 
+	if (attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT))
+		return -EOPNOTSUPP;
+
 	if (ibqp->rwq_ind_tbl)
 		return -ENOSYS;
 
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 08a2a7afafd3..07cfc0934b17 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -863,6 +863,9 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
 	enum ib_qp_state cur_state, new_state;
 	int err = -EINVAL;
 
+	if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+		return -EOPNOTSUPP;
+
 	mutex_lock(&qp->mutex);
 	if (attr_mask & IB_QP_CUR_STATE) {
 		cur_state = attr->cur_qp_state;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index b392e15d7592..244dd22d53ef 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -1391,6 +1391,9 @@ int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	struct ocrdma_dev *dev;
 	enum ib_qp_state old_qps, new_qps;
 
+	if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+		return -EOPNOTSUPP;
+
 	qp = get_ocrdma_qp(ibqp);
 	dev = get_ocrdma_dev(ibqp->device);
 
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 29a96ff6fc66..34c07a18c2c2 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -2472,6 +2472,9 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
 		 attr->qp_state);
 
+	if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+		return -EOPNOTSUPP;
+
 	old_qp_state = qedr_get_ibqp_state(qp->state);
 	if (attr_mask & IB_QP_STATE)
 		new_qp_state = attr->qp_state;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index 9e961f8ffa10..a89d5816685a 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -557,6 +557,9 @@ int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	int status;
 	usnic_dbg("\n");
 
+	if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+		return -EOPNOTSUPP;
+
 	qp_grp = to_uqp_grp(ibqp);
 
 	mutex_lock(&qp_grp->vf->pf->usdev_lock);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index 428256c55065..9fdec5b9553c 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -544,6 +544,9 @@ int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	enum ib_qp_state cur_state, next_state;
 	int ret;
 
+	if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+		return -EOPNOTSUPP;
+
 	/* Sanity checking. Should need lock here */
 	mutex_lock(&qp->mutex);
 	cur_state = (attr_mask & IB_QP_CUR_STATE) ? attr->cur_qp_state :
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index ee48befc8978..7b93e7bb0072 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -1469,6 +1469,9 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	int pmtu = 0; /* for gcc warning only */
 	int opa_ah;
 
+	if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+		return -EOPNOTSUPP;
+
 	spin_lock_irq(&qp->r_lock);
 	spin_lock(&qp->s_hlock);
 	spin_lock(&qp->s_lock);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index d40ebb2e0fce..dafcc0329148 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -436,6 +436,9 @@ static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	struct rxe_dev *rxe = to_rdev(ibqp->device);
 	struct rxe_qp *qp = to_rqp(ibqp);
 
+	if (mask & ~IB_QP_ATTR_STANDARD_BITS)
+		return -EOPNOTSUPP;
+
 	err = rxe_qp_chk_attr(rxe, qp, attr, mask);
 	if (err)
 		goto err1;
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 1c469f967ab9..947b8b1cbe9a 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -544,6 +544,9 @@ int siw_verbs_modify_qp(struct ib_qp *base_qp, struct ib_qp_attr *attr,
 	if (!attr_mask)
 		return 0;
 
+	if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+		return -EOPNOTSUPP;
+
 	memset(&new_attrs, 0, sizeof(new_attrs));
 
 	if (attr_mask & IB_QP_ACCESS_FLAGS) {
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 9bf6c319a670..0f9ce27bedcb 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1234,6 +1234,8 @@ enum ib_qp_attr_mask {
 	IB_QP_RESERVED3			= (1<<23),
 	IB_QP_RESERVED4			= (1<<24),
 	IB_QP_RATE_LIMIT		= (1<<25),
+
+	IB_QP_ATTR_STANDARD_BITS = GENMASK(20, 0),
 };
 
 enum ib_qp_state {
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 456438c18c2c..7ee73a0652f1 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -596,20 +596,6 @@ enum {
 	IB_UVERBS_CREATE_QP_SUP_COMP_MASK = IB_UVERBS_CREATE_QP_MASK_IND_TABLE,
 };
 
-enum {
-	/*
-	 * This value is equal to IB_QP_DEST_QPN.
-	 */
-	IB_USER_LEGACY_LAST_QP_ATTR_MASK = 1ULL << 20,
-};
-
-enum {
-	/*
-	 * This value is equal to IB_QP_RATE_LIMIT.
-	 */
-	IB_USER_LAST_QP_ATTR_MASK = 1ULL << 25,
-};
-
 struct ib_uverbs_ex_create_qp {
 	__aligned_u64 user_handle;
 	__u32 pd_handle;
-- 
cgit v1.2.3


From 5760648e63e6c1006a3ed0bfc2167f623b8bcbcd Mon Sep 17 00:00:00 2001
From: Kent Gibson <warthog618@gmail.com>
Date: Mon, 5 Oct 2020 15:03:25 +0800
Subject: gpio: uapi: fix kernel-doc warnings

Fix kernel-doc warnings, specifically gpioline_info_changed.padding is
not documented and 'GPIO event types' describes defines, which are not
documented by kernel-doc.

Signed-off-by: Kent Gibson <warthog618@gmail.com>
Reviewed-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20201005070329.21055-2-warthog618@gmail.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/uapi/linux/gpio.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h
index 07865c601099..b0d5e7a1c693 100644
--- a/include/uapi/linux/gpio.h
+++ b/include/uapi/linux/gpio.h
@@ -346,6 +346,7 @@ enum {
  * @timestamp: estimate of time of status change occurrence, in nanoseconds
  * @event_type: one of GPIOLINE_CHANGED_REQUESTED, GPIOLINE_CHANGED_RELEASED
  * and GPIOLINE_CHANGED_CONFIG
+ * @padding: reserved for future use
  *
  * Note: struct gpioline_info embedded here has 32-bit alignment on its own,
  * but it works fine with 64-bit alignment too. With its 72 byte size, we can
@@ -469,7 +470,7 @@ struct gpioevent_request {
 	int fd;
 };
 
-/**
+/*
  * GPIO event types
  */
 #define GPIOEVENT_EVENT_RISING_EDGE 0x01
-- 
cgit v1.2.3


From f20160217537e9006ce4a625da62b358416fc4ed Mon Sep 17 00:00:00 2001
From: Kent Gibson <warthog618@gmail.com>
Date: Mon, 5 Oct 2020 15:03:26 +0800
Subject: gpio: uapi: comment consistency

Make debounce_period_us field documentation consistent with other fields
in the union.

Signed-off-by: Kent Gibson <warthog618@gmail.com>
Reviewed-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20201005070329.21055-3-warthog618@gmail.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/uapi/linux/gpio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h
index b0d5e7a1c693..1fdb0e851f83 100644
--- a/include/uapi/linux/gpio.h
+++ b/include/uapi/linux/gpio.h
@@ -98,7 +98,7 @@ struct gpio_v2_line_values {
  * identifying which field of the attribute union is in use.
  * @GPIO_V2_LINE_ATTR_ID_FLAGS: flags field is in use
  * @GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES: values field is in use
- * @GPIO_V2_LINE_ATTR_ID_DEBOUNCE: debounce_period_us is in use
+ * @GPIO_V2_LINE_ATTR_ID_DEBOUNCE: debounce_period_us field is in use
  */
 enum gpio_v2_line_attr_id {
 	GPIO_V2_LINE_ATTR_ID_FLAGS		= 1,
-- 
cgit v1.2.3


From 2cc522d3931ba2aa744d09d41f874d61bf3a1851 Mon Sep 17 00:00:00 2001
From: Kent Gibson <warthog618@gmail.com>
Date: Mon, 5 Oct 2020 15:03:27 +0800
Subject: gpio: uapi: kernel-doc formatting improvements

Add kernel-doc formatting to all references to structs, enums, fields
and constants, and move deprecation warnings into the Note section of
the deprecated struct.

Replace 'OR:ed' with 'added', as the former looks odd.

Signed-off-by: Kent Gibson <warthog618@gmail.com>
Reviewed-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20201005070329.21055-4-warthog618@gmail.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/uapi/linux/gpio.h | 93 ++++++++++++++++++++++++-----------------------
 1 file changed, 47 insertions(+), 46 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h
index 1fdb0e851f83..32dd18f238c3 100644
--- a/include/uapi/linux/gpio.h
+++ b/include/uapi/linux/gpio.h
@@ -110,17 +110,17 @@ enum gpio_v2_line_attr_id {
  * struct gpio_v2_line_attribute - a configurable attribute of a line
  * @id: attribute identifier with value from &enum gpio_v2_line_attr_id
  * @padding: reserved for future use and must be zero filled
- * @flags: if id is GPIO_V2_LINE_ATTR_ID_FLAGS, the flags for the GPIO
- * line, with values from enum gpio_v2_line_flag, such as
- * GPIO_V2_LINE_FLAG_ACTIVE_LOW, GPIO_V2_LINE_FLAG_OUTPUT etc, OR:ed
+ * @flags: if id is %GPIO_V2_LINE_ATTR_ID_FLAGS, the flags for the GPIO
+ * line, with values from &enum gpio_v2_line_flag, such as
+ * %GPIO_V2_LINE_FLAG_ACTIVE_LOW, %GPIO_V2_LINE_FLAG_OUTPUT etc, added
  * together.  This overrides the default flags contained in the &struct
  * gpio_v2_line_config for the associated line.
- * @values: if id is GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES, a bitmap
+ * @values: if id is %GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES, a bitmap
  * containing the values to which the lines will be set, with each bit
  * number corresponding to the index into &struct
  * gpio_v2_line_request.offsets.
- * @debounce_period_us: if id is GPIO_V2_LINE_ATTR_ID_DEBOUNCE, the desired
- * debounce period, in microseconds
+ * @debounce_period_us: if id is %GPIO_V2_LINE_ATTR_ID_DEBOUNCE, the
+ * desired debounce period, in microseconds
  */
 struct gpio_v2_line_attribute {
 	__u32 id;
@@ -147,12 +147,12 @@ struct gpio_v2_line_config_attribute {
 
 /**
  * struct gpio_v2_line_config - Configuration for GPIO lines
- * @flags: flags for the GPIO lines, with values from enum
- * gpio_v2_line_flag, such as GPIO_V2_LINE_FLAG_ACTIVE_LOW,
- * GPIO_V2_LINE_FLAG_OUTPUT etc, OR:ed together.  This is the default for
+ * @flags: flags for the GPIO lines, with values from &enum
+ * gpio_v2_line_flag, such as %GPIO_V2_LINE_FLAG_ACTIVE_LOW,
+ * %GPIO_V2_LINE_FLAG_OUTPUT etc, added together.  This is the default for
  * all requested lines but may be overridden for particular lines using
- * attrs.
- * @num_attrs: the number of attributes in attrs
+ * @attrs.
+ * @num_attrs: the number of attributes in @attrs
  * @padding: reserved for future use and must be zero filled
  * @attrs: the configuration attributes associated with the requested
  * lines.  Any attribute should only be associated with a particular line
@@ -175,17 +175,17 @@ struct gpio_v2_line_config {
  * "my-bitbanged-relay"
  * @config: requested configuration for the lines.
  * @num_lines: number of lines requested in this request, i.e. the number
- * of valid fields in the GPIO_V2_LINES_MAX sized arrays, set to 1 to
+ * of valid fields in the %GPIO_V2_LINES_MAX sized arrays, set to 1 to
  * request a single line
  * @event_buffer_size: a suggested minimum number of line events that the
  * kernel should buffer.  This is only relevant if edge detection is
  * enabled in the configuration. Note that this is only a suggested value
  * and the kernel may allocate a larger buffer or cap the size of the
  * buffer. If this field is zero then the buffer size defaults to a minimum
- * of num_lines*16.
+ * of @num_lines * 16.
  * @padding: reserved for future use and must be zero filled
  * @fd: if successful this field will contain a valid anonymous file handle
- * after a GPIO_GET_LINE_IOCTL operation, zero or negative value means
+ * after a %GPIO_GET_LINE_IOCTL operation, zero or negative value means
  * error
  */
 struct gpio_v2_line_request {
@@ -207,11 +207,12 @@ struct gpio_v2_line_request {
  * @consumer: a functional name for the consumer of this GPIO line as set
  * by whatever is using it, will be empty if there is no current user but
  * may also be empty if the consumer doesn't set this up
- * @flags: flags for the GPIO line, such as GPIO_V2_LINE_FLAG_ACTIVE_LOW,
- * GPIO_V2_LINE_FLAG_OUTPUT etc, OR:ed together
  * @offset: the local offset on this GPIO chip, fill this in when
  * requesting the line information from the kernel
- * @num_attrs: the number of attributes in attrs
+ * @num_attrs: the number of attributes in @attrs
+ * @flags: flags for the GPIO lines, with values from &enum
+ * gpio_v2_line_flag, such as %GPIO_V2_LINE_FLAG_ACTIVE_LOW,
+ * %GPIO_V2_LINE_FLAG_OUTPUT etc, added together.
  * @attrs: the configuration attributes associated with the line
  * @padding: reserved for future use
  */
@@ -244,7 +245,7 @@ enum gpio_v2_line_changed_type {
  * of a GPIO line
  * @info: updated line information
  * @timestamp_ns: estimate of time of status change occurrence, in nanoseconds
- * @event_type: the type of change with a value from enum
+ * @event_type: the type of change with a value from &enum
  * gpio_v2_line_changed_type
  * @padding: reserved for future use
  */
@@ -269,10 +270,10 @@ enum gpio_v2_line_event_id {
 /**
  * struct gpio_v2_line_event - The actual event being pushed to userspace
  * @timestamp_ns: best estimate of time of event occurrence, in nanoseconds.
- * The timestamp_ns is read from CLOCK_MONOTONIC and is intended to allow the
- * accurate measurement of the time between events.  It does not provide
+ * The @timestamp_ns is read from %CLOCK_MONOTONIC and is intended to allow
+ * the accurate measurement of the time between events. It does not provide
  * the wall-clock time.
- * @id: event identifier with value from enum gpio_v2_line_event_id
+ * @id: event identifier with value from &enum gpio_v2_line_event_id
  * @offset: the offset of the line that triggered the event
  * @seqno: the sequence number for this event in the sequence of events for
  * all the lines in this line request
@@ -319,8 +320,8 @@ struct gpio_v2_line_event {
  * whatever is using it, will be empty if there is no current user but may
  * also be empty if the consumer doesn't set this up
  *
- * This struct is part of ABI v1 and is deprecated.
- * Use struct gpio_v2_line_info instead.
+ * Note: This struct is part of ABI v1 and is deprecated.
+ * Use &struct gpio_v2_line_info instead.
  */
 struct gpioline_info {
 	__u32 line_offset;
@@ -344,18 +345,18 @@ enum {
  * of a GPIO line
  * @info: updated line information
  * @timestamp: estimate of time of status change occurrence, in nanoseconds
- * @event_type: one of GPIOLINE_CHANGED_REQUESTED, GPIOLINE_CHANGED_RELEASED
- * and GPIOLINE_CHANGED_CONFIG
+ * @event_type: one of %GPIOLINE_CHANGED_REQUESTED,
+ * %GPIOLINE_CHANGED_RELEASED and %GPIOLINE_CHANGED_CONFIG
  * @padding: reserved for future use
  *
- * Note: struct gpioline_info embedded here has 32-bit alignment on its own,
+ * The &struct gpioline_info embedded here has 32-bit alignment on its own,
  * but it works fine with 64-bit alignment too. With its 72 byte size, we can
  * guarantee there are no implicit holes between it and subsequent members.
  * The 20-byte padding at the end makes sure we don't add any implicit padding
  * at the end of the structure on 64-bit architectures.
  *
- * This struct is part of ABI v1 and is deprecated.
- * Use struct gpio_v2_line_info_changed instead.
+ * Note: This struct is part of ABI v1 and is deprecated.
+ * Use &struct gpio_v2_line_info_changed instead.
  */
 struct gpioline_info_changed {
 	struct gpioline_info info;
@@ -379,13 +380,13 @@ struct gpioline_info_changed {
  * @lineoffsets: an array of desired lines, specified by offset index for the
  * associated GPIO device
  * @flags: desired flags for the desired GPIO lines, such as
- * GPIOHANDLE_REQUEST_OUTPUT, GPIOHANDLE_REQUEST_ACTIVE_LOW etc, OR:ed
+ * %GPIOHANDLE_REQUEST_OUTPUT, %GPIOHANDLE_REQUEST_ACTIVE_LOW etc, added
  * together. Note that even if multiple lines are requested, the same flags
  * must be applicable to all of them, if you want lines with individual
  * flags set, request them one by one. It is possible to select
  * a batch of input or output lines, but they must all have the same
  * characteristics, i.e. all inputs or all outputs, all active low etc
- * @default_values: if the GPIOHANDLE_REQUEST_OUTPUT is set for a requested
+ * @default_values: if the %GPIOHANDLE_REQUEST_OUTPUT is set for a requested
  * line, this specifies the default output value, should be 0 (low) or
  * 1 (high), anything else than 0 or 1 will be interpreted as 1 (high)
  * @consumer_label: a desired consumer label for the selected GPIO line(s)
@@ -393,11 +394,11 @@ struct gpioline_info_changed {
  * @lines: number of lines requested in this request, i.e. the number of
  * valid fields in the above arrays, set to 1 to request a single line
  * @fd: if successful this field will contain a valid anonymous file handle
- * after a GPIO_GET_LINEHANDLE_IOCTL operation, zero or negative value
+ * after a %GPIO_GET_LINEHANDLE_IOCTL operation, zero or negative value
  * means error
  *
- * This struct is part of ABI v1 and is deprecated.
- * Use struct gpio_v2_line_request instead.
+ * Note: This struct is part of ABI v1 and is deprecated.
+ * Use &struct gpio_v2_line_request instead.
  */
 struct gpiohandle_request {
 	__u32 lineoffsets[GPIOHANDLES_MAX];
@@ -411,15 +412,15 @@ struct gpiohandle_request {
 /**
  * struct gpiohandle_config - Configuration for a GPIO handle request
  * @flags: updated flags for the requested GPIO lines, such as
- * GPIOHANDLE_REQUEST_OUTPUT, GPIOHANDLE_REQUEST_ACTIVE_LOW etc, OR:ed
+ * %GPIOHANDLE_REQUEST_OUTPUT, %GPIOHANDLE_REQUEST_ACTIVE_LOW etc, added
  * together
- * @default_values: if the GPIOHANDLE_REQUEST_OUTPUT is set in flags,
+ * @default_values: if the %GPIOHANDLE_REQUEST_OUTPUT is set in flags,
  * this specifies the default output value, should be 0 (low) or
  * 1 (high), anything else than 0 or 1 will be interpreted as 1 (high)
  * @padding: reserved for future use and should be zero filled
  *
- * This struct is part of ABI v1 and is deprecated.
- * Use struct gpio_v2_line_config instead.
+ * Note: This struct is part of ABI v1 and is deprecated.
+ * Use &struct gpio_v2_line_config instead.
  */
 struct gpiohandle_config {
 	__u32 flags;
@@ -433,8 +434,8 @@ struct gpiohandle_config {
  * state of a line, when setting the state of lines these should contain
  * the desired target state
  *
- * This struct is part of ABI v1 and is deprecated.
- * Use struct gpio_v2_line_values instead.
+ * Note: This struct is part of ABI v1 and is deprecated.
+ * Use &struct gpio_v2_line_values instead.
  */
 struct gpiohandle_data {
 	__u8 values[GPIOHANDLES_MAX];
@@ -450,17 +451,17 @@ struct gpiohandle_data {
  * @lineoffset: the desired line to subscribe to events from, specified by
  * offset index for the associated GPIO device
  * @handleflags: desired handle flags for the desired GPIO line, such as
- * GPIOHANDLE_REQUEST_ACTIVE_LOW or GPIOHANDLE_REQUEST_OPEN_DRAIN
+ * %GPIOHANDLE_REQUEST_ACTIVE_LOW or %GPIOHANDLE_REQUEST_OPEN_DRAIN
  * @eventflags: desired flags for the desired GPIO event line, such as
- * GPIOEVENT_REQUEST_RISING_EDGE or GPIOEVENT_REQUEST_FALLING_EDGE
+ * %GPIOEVENT_REQUEST_RISING_EDGE or %GPIOEVENT_REQUEST_FALLING_EDGE
  * @consumer_label: a desired consumer label for the selected GPIO line(s)
  * such as "my-listener"
  * @fd: if successful this field will contain a valid anonymous file handle
- * after a GPIO_GET_LINEEVENT_IOCTL operation, zero or negative value
+ * after a %GPIO_GET_LINEEVENT_IOCTL operation, zero or negative value
  * means error
  *
- * This struct is part of ABI v1 and is deprecated.
- * Use struct gpio_v2_line_request instead.
+ * Note: This struct is part of ABI v1 and is deprecated.
+ * Use &struct gpio_v2_line_request instead.
  */
 struct gpioevent_request {
 	__u32 lineoffset;
@@ -481,8 +482,8 @@ struct gpioevent_request {
  * @timestamp: best estimate of time of event occurrence, in nanoseconds
  * @id: event identifier
  *
- * This struct is part of ABI v1 and is deprecated.
- * Use struct gpio_v2_line_event instead.
+ * Note: This struct is part of ABI v1 and is deprecated.
+ * Use &struct gpio_v2_line_event instead.
  */
 struct gpioevent_data {
 	__u64 timestamp;
-- 
cgit v1.2.3


From c303c51c87a61ace7330b5e0217468b1b8f98a75 Mon Sep 17 00:00:00 2001
From: Kent Gibson <warthog618@gmail.com>
Date: Mon, 5 Oct 2020 15:03:28 +0800
Subject: gpio: uapi: remove whitespace

Remove leading whitespace in ABI v1 comment.

Signed-off-by: Kent Gibson <warthog618@gmail.com>
Reviewed-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20201005070329.21055-5-warthog618@gmail.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/uapi/linux/gpio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h
index 32dd18f238c3..ad3f56dd87ec 100644
--- a/include/uapi/linux/gpio.h
+++ b/include/uapi/linux/gpio.h
@@ -292,7 +292,7 @@ struct gpio_v2_line_event {
 };
 
 /*
- *  ABI v1
+ * ABI v1
  *
  * This version of the ABI is deprecated.
  * Use the latest version of the ABI, defined above, instead.
-- 
cgit v1.2.3


From 2f84a2de539cc4301a332c2c76473fc25baf21b7 Mon Sep 17 00:00:00 2001
From: Kent Gibson <warthog618@gmail.com>
Date: Mon, 5 Oct 2020 15:03:29 +0800
Subject: gpio: uapi: clarify the meaning of 'empty' char arrays

Clarify that a char array containing a string is considered 'empty' if
the first character is the null terminator. The remaining characters
are not relevant to this determination.

Signed-off-by: Kent Gibson <warthog618@gmail.com>
Reviewed-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20201005070329.21055-6-warthog618@gmail.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/uapi/linux/gpio.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h
index ad3f56dd87ec..2072c260f5d0 100644
--- a/include/uapi/linux/gpio.h
+++ b/include/uapi/linux/gpio.h
@@ -26,7 +26,7 @@
  * struct gpiochip_info - Information about a certain GPIO chip
  * @name: the Linux kernel name of this GPIO chip
  * @label: a functional name for this GPIO chip, such as a product
- * number, may be empty
+ * number, may be empty (i.e. label[0] == '\0')
  * @lines: number of GPIO lines on this chip
  */
 struct gpiochip_info {
@@ -203,7 +203,7 @@ struct gpio_v2_line_request {
  * struct gpio_v2_line_info - Information about a certain GPIO line
  * @name: the name of this GPIO line, such as the output pin of the line on
  * the chip, a rail or a pin header name on a board, as specified by the
- * GPIO chip, may be empty
+ * GPIO chip, may be empty (i.e. name[0] == '\0')
  * @consumer: a functional name for the consumer of this GPIO line as set
  * by whatever is using it, will be empty if there is no current user but
  * may also be empty if the consumer doesn't set this up
@@ -315,7 +315,7 @@ struct gpio_v2_line_event {
  * @flags: various flags for this line
  * @name: the name of this GPIO line, such as the output pin of the line on the
  * chip, a rail or a pin header name on a board, as specified by the gpio
- * chip, may be empty
+ * chip, may be empty (i.e. name[0] == '\0')
  * @consumer: a functional name for the consumer of this GPIO line as set by
  * whatever is using it, will be empty if there is no current user but may
  * also be empty if the consumer doesn't set this up
-- 
cgit v1.2.3


From 80ade22c06ca115b81dd168e99479c8e09843513 Mon Sep 17 00:00:00 2001
From: Sudeep Dutt <sudeep.dutt@intel.com>
Date: Tue, 27 Oct 2020 20:14:15 -0700
Subject: misc: mic: remove the MIC drivers

This patch removes the MIC drivers from the kernel tree
since the corresponding devices have been discontinued.

Removing the dma and char-misc changes in one patch and
merging via the char-misc tree is best to avoid any
potential build breakage.

Cc: Nikhil Rao <nikhil.rao@intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com>
Acked-By: Vinod Koul <vkoul@kernel.org>
Reviewed-by: Sherry Sun <sherry.sun@nxp.com>
Link: https://lore.kernel.org/r/8c1443136563de34699d2c084df478181c205db4.1603854416.git.sudeep.dutt@intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/misc-devices/mic/index.rst         |   16 -
 Documentation/misc-devices/mic/mic_overview.rst  |   85 -
 Documentation/misc-devices/mic/scif_overview.rst |  108 --
 MAINTAINERS                                      |   16 -
 drivers/dma/Kconfig                              |   18 -
 drivers/dma/Makefile                             |    1 -
 drivers/dma/mic_x100_dma.c                       |  770 ---------
 drivers/dma/mic_x100_dma.h                       |  275 ---
 drivers/misc/Kconfig                             |    1 -
 drivers/misc/Makefile                            |    1 -
 drivers/misc/mic/Kconfig                         |  141 --
 drivers/misc/mic/Makefile                        |   12 -
 drivers/misc/mic/bus/Makefile                    |    9 -
 drivers/misc/mic/bus/cosm_bus.c                  |  130 --
 drivers/misc/mic/bus/cosm_bus.h                  |  125 --
 drivers/misc/mic/bus/mic_bus.c                   |  194 ---
 drivers/misc/mic/bus/scif_bus.c                  |  201 ---
 drivers/misc/mic/bus/scif_bus.h                  |  125 --
 drivers/misc/mic/bus/vop_bus.c                   |  194 ---
 drivers/misc/mic/bus/vop_bus.h                   |  129 --
 drivers/misc/mic/card/Makefile                   |   11 -
 drivers/misc/mic/card/mic_debugfs.c              |   85 -
 drivers/misc/mic/card/mic_device.c               |  417 -----
 drivers/misc/mic/card/mic_device.h               |  137 --
 drivers/misc/mic/card/mic_x100.c                 |  347 ----
 drivers/misc/mic/card/mic_x100.h                 |   37 -
 drivers/misc/mic/common/mic_dev.h                |   55 -
 drivers/misc/mic/cosm/Makefile                   |   11 -
 drivers/misc/mic/cosm/cosm_debugfs.c             |  116 --
 drivers/misc/mic/cosm/cosm_main.c                |  382 -----
 drivers/misc/mic/cosm/cosm_main.h                |   61 -
 drivers/misc/mic/cosm/cosm_scif_server.c         |  399 -----
 drivers/misc/mic/cosm/cosm_sysfs.c               |  449 -----
 drivers/misc/mic/cosm_client/Makefile            |    8 -
 drivers/misc/mic/cosm_client/cosm_scif_client.c  |  269 ---
 drivers/misc/mic/host/Makefile                   |   12 -
 drivers/misc/mic/host/mic_boot.c                 |  588 -------
 drivers/misc/mic/host/mic_debugfs.c              |  149 --
 drivers/misc/mic/host/mic_device.h               |  157 --
 drivers/misc/mic/host/mic_intr.c                 |  635 -------
 drivers/misc/mic/host/mic_intr.h                 |  137 --
 drivers/misc/mic/host/mic_main.c                 |  335 ----
 drivers/misc/mic/host/mic_smpt.c                 |  427 -----
 drivers/misc/mic/host/mic_smpt.h                 |   87 -
 drivers/misc/mic/host/mic_x100.c                 |  585 -------
 drivers/misc/mic/host/mic_x100.h                 |   77 -
 drivers/misc/mic/scif/Makefile                   |   21 -
 drivers/misc/mic/scif/scif_api.c                 | 1485 -----------------
 drivers/misc/mic/scif/scif_debugfs.c             |  116 --
 drivers/misc/mic/scif/scif_dma.c                 | 1940 ----------------------
 drivers/misc/mic/scif/scif_epd.c                 |  357 ----
 drivers/misc/mic/scif/scif_epd.h                 |  200 ---
 drivers/misc/mic/scif/scif_fd.c                  |  462 ------
 drivers/misc/mic/scif/scif_fence.c               |  783 ---------
 drivers/misc/mic/scif/scif_main.c                |  351 ----
 drivers/misc/mic/scif/scif_main.h                |  274 ---
 drivers/misc/mic/scif/scif_map.h                 |  127 --
 drivers/misc/mic/scif/scif_mmap.c                |  690 --------
 drivers/misc/mic/scif/scif_nm.c                  |  229 ---
 drivers/misc/mic/scif/scif_nodeqp.c              | 1349 ---------------
 drivers/misc/mic/scif/scif_nodeqp.h              |  221 ---
 drivers/misc/mic/scif/scif_peer_bus.c            |  175 --
 drivers/misc/mic/scif/scif_peer_bus.h            |   23 -
 drivers/misc/mic/scif/scif_ports.c               |  116 --
 drivers/misc/mic/scif/scif_rb.c                  |  240 ---
 drivers/misc/mic/scif/scif_rb.h                  |  100 --
 drivers/misc/mic/scif/scif_rma.c                 | 1760 --------------------
 drivers/misc/mic/scif/scif_rma.h                 |  477 ------
 drivers/misc/mic/scif/scif_rma_list.c            |  282 ----
 drivers/misc/mic/scif/scif_rma_list.h            |   48 -
 drivers/misc/mic/vop/Makefile                    |   10 -
 drivers/misc/mic/vop/vop_debugfs.c               |  184 --
 drivers/misc/mic/vop/vop_main.c                  |  784 ---------
 drivers/misc/mic/vop/vop_main.h                  |  158 --
 drivers/misc/mic/vop/vop_vringh.c                | 1166 -------------
 include/linux/mic_bus.h                          |  100 --
 include/linux/scif.h                             | 1339 ---------------
 include/uapi/linux/mic_common.h                  |  235 ---
 include/uapi/linux/mic_ioctl.h                   |   77 -
 samples/mic/mpssd/.gitignore                     |    2 -
 samples/mic/mpssd/Makefile                       |   28 -
 samples/mic/mpssd/micctrl                        |  162 --
 samples/mic/mpssd/mpss                           |  189 ---
 samples/mic/mpssd/mpssd.c                        | 1815 --------------------
 samples/mic/mpssd/mpssd.h                        |   89 -
 samples/mic/mpssd/sysfs.c                        |   91 -
 86 files changed, 26779 deletions(-)
 delete mode 100644 Documentation/misc-devices/mic/index.rst
 delete mode 100644 Documentation/misc-devices/mic/mic_overview.rst
 delete mode 100644 Documentation/misc-devices/mic/scif_overview.rst
 delete mode 100644 drivers/dma/mic_x100_dma.c
 delete mode 100644 drivers/dma/mic_x100_dma.h
 delete mode 100644 drivers/misc/mic/Kconfig
 delete mode 100644 drivers/misc/mic/Makefile
 delete mode 100644 drivers/misc/mic/bus/Makefile
 delete mode 100644 drivers/misc/mic/bus/cosm_bus.c
 delete mode 100644 drivers/misc/mic/bus/cosm_bus.h
 delete mode 100644 drivers/misc/mic/bus/mic_bus.c
 delete mode 100644 drivers/misc/mic/bus/scif_bus.c
 delete mode 100644 drivers/misc/mic/bus/scif_bus.h
 delete mode 100644 drivers/misc/mic/bus/vop_bus.c
 delete mode 100644 drivers/misc/mic/bus/vop_bus.h
 delete mode 100644 drivers/misc/mic/card/Makefile
 delete mode 100644 drivers/misc/mic/card/mic_debugfs.c
 delete mode 100644 drivers/misc/mic/card/mic_device.c
 delete mode 100644 drivers/misc/mic/card/mic_device.h
 delete mode 100644 drivers/misc/mic/card/mic_x100.c
 delete mode 100644 drivers/misc/mic/card/mic_x100.h
 delete mode 100644 drivers/misc/mic/common/mic_dev.h
 delete mode 100644 drivers/misc/mic/cosm/Makefile
 delete mode 100644 drivers/misc/mic/cosm/cosm_debugfs.c
 delete mode 100644 drivers/misc/mic/cosm/cosm_main.c
 delete mode 100644 drivers/misc/mic/cosm/cosm_main.h
 delete mode 100644 drivers/misc/mic/cosm/cosm_scif_server.c
 delete mode 100644 drivers/misc/mic/cosm/cosm_sysfs.c
 delete mode 100644 drivers/misc/mic/cosm_client/Makefile
 delete mode 100644 drivers/misc/mic/cosm_client/cosm_scif_client.c
 delete mode 100644 drivers/misc/mic/host/Makefile
 delete mode 100644 drivers/misc/mic/host/mic_boot.c
 delete mode 100644 drivers/misc/mic/host/mic_debugfs.c
 delete mode 100644 drivers/misc/mic/host/mic_device.h
 delete mode 100644 drivers/misc/mic/host/mic_intr.c
 delete mode 100644 drivers/misc/mic/host/mic_intr.h
 delete mode 100644 drivers/misc/mic/host/mic_main.c
 delete mode 100644 drivers/misc/mic/host/mic_smpt.c
 delete mode 100644 drivers/misc/mic/host/mic_smpt.h
 delete mode 100644 drivers/misc/mic/host/mic_x100.c
 delete mode 100644 drivers/misc/mic/host/mic_x100.h
 delete mode 100644 drivers/misc/mic/scif/Makefile
 delete mode 100644 drivers/misc/mic/scif/scif_api.c
 delete mode 100644 drivers/misc/mic/scif/scif_debugfs.c
 delete mode 100644 drivers/misc/mic/scif/scif_dma.c
 delete mode 100644 drivers/misc/mic/scif/scif_epd.c
 delete mode 100644 drivers/misc/mic/scif/scif_epd.h
 delete mode 100644 drivers/misc/mic/scif/scif_fd.c
 delete mode 100644 drivers/misc/mic/scif/scif_fence.c
 delete mode 100644 drivers/misc/mic/scif/scif_main.c
 delete mode 100644 drivers/misc/mic/scif/scif_main.h
 delete mode 100644 drivers/misc/mic/scif/scif_map.h
 delete mode 100644 drivers/misc/mic/scif/scif_mmap.c
 delete mode 100644 drivers/misc/mic/scif/scif_nm.c
 delete mode 100644 drivers/misc/mic/scif/scif_nodeqp.c
 delete mode 100644 drivers/misc/mic/scif/scif_nodeqp.h
 delete mode 100644 drivers/misc/mic/scif/scif_peer_bus.c
 delete mode 100644 drivers/misc/mic/scif/scif_peer_bus.h
 delete mode 100644 drivers/misc/mic/scif/scif_ports.c
 delete mode 100644 drivers/misc/mic/scif/scif_rb.c
 delete mode 100644 drivers/misc/mic/scif/scif_rb.h
 delete mode 100644 drivers/misc/mic/scif/scif_rma.c
 delete mode 100644 drivers/misc/mic/scif/scif_rma.h
 delete mode 100644 drivers/misc/mic/scif/scif_rma_list.c
 delete mode 100644 drivers/misc/mic/scif/scif_rma_list.h
 delete mode 100644 drivers/misc/mic/vop/Makefile
 delete mode 100644 drivers/misc/mic/vop/vop_debugfs.c
 delete mode 100644 drivers/misc/mic/vop/vop_main.c
 delete mode 100644 drivers/misc/mic/vop/vop_main.h
 delete mode 100644 drivers/misc/mic/vop/vop_vringh.c
 delete mode 100644 include/linux/mic_bus.h
 delete mode 100644 include/linux/scif.h
 delete mode 100644 include/uapi/linux/mic_common.h
 delete mode 100644 include/uapi/linux/mic_ioctl.h
 delete mode 100644 samples/mic/mpssd/.gitignore
 delete mode 100644 samples/mic/mpssd/Makefile
 delete mode 100755 samples/mic/mpssd/micctrl
 delete mode 100755 samples/mic/mpssd/mpss
 delete mode 100644 samples/mic/mpssd/mpssd.c
 delete mode 100644 samples/mic/mpssd/mpssd.h
 delete mode 100644 samples/mic/mpssd/sysfs.c

(limited to 'include/uapi')

diff --git a/Documentation/misc-devices/mic/index.rst b/Documentation/misc-devices/mic/index.rst
deleted file mode 100644
index 3a8d06367ef1..000000000000
--- a/Documentation/misc-devices/mic/index.rst
+++ /dev/null
@@ -1,16 +0,0 @@
-=============================================
-Intel Many Integrated Core (MIC) architecture
-=============================================
-
-.. toctree::
-    :maxdepth: 1
-
-    mic_overview
-    scif_overview
-
-.. only::  subproject and html
-
-   Indices
-   =======
-
-   * :ref:`genindex`
diff --git a/Documentation/misc-devices/mic/mic_overview.rst b/Documentation/misc-devices/mic/mic_overview.rst
deleted file mode 100644
index 17d956bdaf7c..000000000000
--- a/Documentation/misc-devices/mic/mic_overview.rst
+++ /dev/null
@@ -1,85 +0,0 @@
-======================================================
-Intel Many Integrated Core (MIC) architecture overview
-======================================================
-
-An Intel MIC X100 device is a PCIe form factor add-in coprocessor
-card based on the Intel Many Integrated Core (MIC) architecture
-that runs a Linux OS. It is a PCIe endpoint in a platform and therefore
-implements the three required standard address spaces i.e. configuration,
-memory and I/O. The host OS loads a device driver as is typical for
-PCIe devices. The card itself runs a bootstrap after reset that
-transfers control to the card OS downloaded from the host driver. The
-host driver supports OSPM suspend and resume operations. It shuts down
-the card during suspend and reboots the card OS during resume.
-The card OS as shipped by Intel is a Linux kernel with modifications
-for the X100 devices.
-
-Since it is a PCIe card, it does not have the ability to host hardware
-devices for networking, storage and console. We provide these devices
-on X100 coprocessors thus enabling a self-bootable equivalent
-environment for applications. A key benefit of our solution is that it
-leverages the standard virtio framework for network, disk and console
-devices, though in our case the virtio framework is used across a PCIe
-bus. A Virtio Over PCIe (VOP) driver allows creating user space
-backends or devices on the host which are used to probe virtio drivers
-for these devices on the MIC card. The existing VRINGH infrastructure
-in the kernel is used to access virtio rings from the host. The card
-VOP driver allows card virtio drivers to communicate with their user
-space backends on the host via a device page. Ring 3 apps on the host
-can add, remove and configure virtio devices. A thin MIC specific
-virtio_config_ops is implemented which is borrowed heavily from
-previous similar implementations in lguest and s390.
-
-MIC PCIe card has a dma controller with 8 channels. These channels are
-shared between the host s/w and the card s/w. 0 to 3 are used by host
-and 4 to 7 by card. As the dma device doesn't show up as PCIe device,
-a virtual bus called mic bus is created and virtual dma devices are
-created on it by the host/card drivers. On host the channels are private
-and used only by the host driver to transfer data for the virtio devices.
-
-The Symmetric Communication Interface (SCIF (pronounced as skiff)) is a
-low level communications API across PCIe currently implemented for MIC.
-More details are available at scif_overview.txt.
-
-The Coprocessor State Management (COSM) driver on the host allows for
-boot, shutdown and reset of Intel MIC devices. It communicates with a COSM
-"client" driver on the MIC cards over SCIF to perform these functions.
-
-Here is a block diagram of the various components described above. The
-virtio backends are situated on the host rather than the card given better
-single threaded performance for the host compared to MIC, the ability of
-the host to initiate DMA's to/from the card using the MIC DMA engine and
-the fact that the virtio block storage backend can only be on the host::
-
-               +----------+           |             +----------+
-               | Card OS  |           |             | Host OS  |
-               +----------+           |             +----------+
-                                      |
-        +-------+ +--------+ +------+ | +---------+  +--------+ +--------+
-        | Virtio| |Virtio  | |Virtio| | |Virtio   |  |Virtio  | |Virtio  |
-        | Net   | |Console | |Block | | |Net      |  |Console | |Block   |
-        | Driver| |Driver  | |Driver| | |backend  |  |backend | |backend |
-        +---+---+ +---+----+ +--+---+ | +---------+  +----+---+ +--------+
-            |         |         |     |      |            |         |
-            |         |         |     |User  |            |         |
-            |         |         |     |------|------------|--+------|-------
-            +---------+---------+     |Kernel                |
-                      |               |                      |
-  +---------+     +---+----+ +------+ | +------+ +------+ +--+---+  +-------+
-  |MIC DMA  |     |  VOP   | | SCIF | | | SCIF | | COSM | | VOP  |  |MIC DMA|
-  +---+-----+     +---+----+ +--+---+ | +--+---+ +--+---+ +------+  +----+--+
-      |               |         |     |    |        |                    |
-  +---+-----+     +---+----+ +--+---+ | +--+---+ +--+---+ +------+  +----+--+
-  |MIC      |     |  VOP   | |SCIF  | | |SCIF  | | COSM | | VOP  |  | MIC   |
-  |HW Bus   |     |  HW Bus| |HW Bus| | |HW Bus| | Bus  | |HW Bus|  |HW Bus |
-  +---------+     +--------+ +--+---+ | +--+---+ +------+ +------+  +-------+
-      |               |         |     |       |     |                    |
-      |   +-----------+--+      |     |       |    +---------------+     |
-      |   |Intel MIC     |      |     |       |    |Intel MIC      |     |
-      |   |Card Driver   |      |     |       |    |Host Driver    |     |
-      +---+--------------+------+     |       +----+---------------+-----+
-                 |                    |                   |
-             +-------------------------------------------------------------+
-             |                                                             |
-             |                    PCIe Bus                                 |
-             +-------------------------------------------------------------+
diff --git a/Documentation/misc-devices/mic/scif_overview.rst b/Documentation/misc-devices/mic/scif_overview.rst
deleted file mode 100644
index 4c8ad9e43706..000000000000
--- a/Documentation/misc-devices/mic/scif_overview.rst
+++ /dev/null
@@ -1,108 +0,0 @@
-========================================
-Symmetric Communication Interface (SCIF)
-========================================
-
-The Symmetric Communication Interface (SCIF (pronounced as skiff)) is a low
-level communications API across PCIe currently implemented for MIC. Currently
-SCIF provides inter-node communication within a single host platform, where a
-node is a MIC Coprocessor or Xeon based host. SCIF abstracts the details of
-communicating over the PCIe bus while providing an API that is symmetric
-across all the nodes in the PCIe network. An important design objective for SCIF
-is to deliver the maximum possible performance given the communication
-abilities of the hardware. SCIF has been used to implement an offload compiler
-runtime and OFED support for MPI implementations for MIC coprocessors.
-
-SCIF API Components
-===================
-
-The SCIF API has the following parts:
-
-1. Connection establishment using a client server model
-2. Byte stream messaging intended for short messages
-3. Node enumeration to determine online nodes
-4. Poll semantics for detection of incoming connections and messages
-5. Memory registration to pin down pages
-6. Remote memory mapping for low latency CPU accesses via mmap
-7. Remote DMA (RDMA) for high bandwidth DMA transfers
-8. Fence APIs for RDMA synchronization
-
-SCIF exposes the notion of a connection which can be used by peer processes on
-nodes in a SCIF PCIe "network" to share memory "windows" and to communicate. A
-process in a SCIF node initiates a SCIF connection to a peer process on a
-different node via a SCIF "endpoint". SCIF endpoints support messaging APIs
-which are similar to connection oriented socket APIs. Connected SCIF endpoints
-can also register local memory which is followed by data transfer using either
-DMA, CPU copies or remote memory mapping via mmap. SCIF supports both user and
-kernel mode clients which are functionally equivalent.
-
-SCIF Performance for MIC
-========================
-
-DMA bandwidth comparison between the TCP (over ethernet over PCIe) stack versus
-SCIF shows the performance advantages of SCIF for HPC applications and
-runtimes::
-
-             Comparison of TCP and SCIF based BW
-
-  Throughput (GB/sec)
-    8 +                                             PCIe Bandwidth ******
-      +                                                        TCP ######
-    7 +    **************************************             SCIF %%%%%%
-      |                       %%%%%%%%%%%%%%%%%%%
-    6 +                   %%%%
-      |                 %%
-      |               %%%
-    5 +              %%
-      |            %%
-    4 +           %%
-      |          %%
-    3 +         %%
-      |        %
-    2 +      %%
-      |     %%
-      |    %
-    1 +
-      +    ######################################
-    0 +++---+++--+--+-+--+--+-++-+--+-++-+--+-++-+-
-      1       10     100      1000   10000   100000
-                   Transfer Size (KBytes)
-
-SCIF allows memory sharing via mmap(..) between processes on different PCIe
-nodes and thus provides bare-metal PCIe latency. The round trip SCIF mmap
-latency from the host to an x100 MIC for an 8 byte message is 0.44 usecs.
-
-SCIF has a user space library which is a thin IOCTL wrapper providing a user
-space API similar to the kernel API in scif.h. The SCIF user space library
-is distributed @ https://software.intel.com/en-us/mic-developer
-
-Here is some pseudo code for an example of how two applications on two PCIe
-nodes would typically use the SCIF API::
-
-  Process A (on node A)			Process B (on node B)
-
-  /* get online node information */
-  scif_get_node_ids(..)			scif_get_node_ids(..)
-  scif_open(..)				scif_open(..)
-  scif_bind(..)				scif_bind(..)
-  scif_listen(..)
-  scif_accept(..)				scif_connect(..)
-  /* SCIF connection established */
-
-  /* Send and receive short messages */
-  scif_send(..)/scif_recv(..)		scif_send(..)/scif_recv(..)
-
-  /* Register memory */
-  scif_register(..)			scif_register(..)
-
-  /* RDMA */
-  scif_readfrom(..)/scif_writeto(..)	scif_readfrom(..)/scif_writeto(..)
-
-  /* Fence DMAs */
-  scif_fence_signal(..)			scif_fence_signal(..)
-
-  mmap(..)				mmap(..)
-
-  /* Access remote registered memory */
-
-  /* Close the endpoints */
-  scif_close(..)				scif_close(..)
diff --git a/MAINTAINERS b/MAINTAINERS
index e73636b75f29..9289a9b43a51 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8976,22 +8976,6 @@ S:	Supported
 W:	https://01.org/linux-acpi
 F:	drivers/platform/x86/intel_menlow.c
 
-INTEL MIC DRIVERS (mic)
-M:	Sudeep Dutt <sudeep.dutt@intel.com>
-M:	Ashutosh Dixit <ashutosh.dixit@intel.com>
-S:	Supported
-W:	https://github.com/sudeepdutt/mic
-W:	http://software.intel.com/en-us/mic-developer
-F:	Documentation/misc-devices/mic/
-F:	drivers/dma/mic_x100_dma.c
-F:	drivers/dma/mic_x100_dma.h
-F:	drivers/misc/mic/
-F:	include/linux/mic_bus.h
-F:	include/linux/scif.h
-F:	include/uapi/linux/mic_common.h
-F:	include/uapi/linux/mic_ioctl.h
-F:	include/uapi/linux/scif_ioctl.h
-
 INTEL P-Unit IPC DRIVER
 M:	Zha Qipeng <qipeng.zha@intel.com>
 L:	platform-driver-x86@vger.kernel.org
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 518a1437862a..90284ffda58a 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -318,24 +318,6 @@ config INTEL_IOP_ADMA
 	help
 	  Enable support for the Intel(R) IOP Series RAID engines.
 
-config INTEL_MIC_X100_DMA
-	tristate "Intel MIC X100 DMA Driver"
-	depends on 64BIT && X86 && INTEL_MIC_BUS
-	select DMA_ENGINE
-	help
-	  This enables DMA support for the Intel Many Integrated Core
-	  (MIC) family of PCIe form factor coprocessor X100 devices that
-	  run a 64 bit Linux OS. This driver will be used by both MIC
-	  host and card drivers.
-
-	  If you are building host kernel with a MIC device or a card
-	  kernel for a MIC device, then say M (recommended) or Y, else
-	  say N. If unsure say N.
-
-	  More information about the Intel MIC family as well as the Linux
-	  OS and tools for MIC to use with this driver are available from
-	  <http://software.intel.com/en-us/mic-developer>.
-
 config K3_DMA
 	tristate "Hisilicon K3 DMA support"
 	depends on ARCH_HI3xxx || ARCH_HISI || COMPILE_TEST
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index e60f81331d4c..948a8da05f8b 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -44,7 +44,6 @@ obj-$(CONFIG_INTEL_IDMA64) += idma64.o
 obj-$(CONFIG_INTEL_IOATDMA) += ioat/
 obj-$(CONFIG_INTEL_IDXD) += idxd/
 obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
-obj-$(CONFIG_INTEL_MIC_X100_DMA) += mic_x100_dma.o
 obj-$(CONFIG_K3_DMA) += k3dma.o
 obj-$(CONFIG_LPC18XX_DMAMUX) += lpc18xx-dmamux.o
 obj-$(CONFIG_MILBEAUT_HDMAC) += milbeaut-hdmac.o
diff --git a/drivers/dma/mic_x100_dma.c b/drivers/dma/mic_x100_dma.c
deleted file mode 100644
index fea8608a7810..000000000000
--- a/drivers/dma/mic_x100_dma.c
+++ /dev/null
@@ -1,770 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Intel MIC X100 DMA Driver.
- *
- * Adapted from IOAT dma driver.
- */
-#include <linux/module.h>
-#include <linux/io.h>
-#include <linux/seq_file.h>
-#include <linux/vmalloc.h>
-
-#include "mic_x100_dma.h"
-
-#define MIC_DMA_MAX_XFER_SIZE_CARD  (1 * 1024 * 1024 -\
-				       MIC_DMA_ALIGN_BYTES)
-#define MIC_DMA_MAX_XFER_SIZE_HOST  (1 * 1024 * 1024 >> 1)
-#define MIC_DMA_DESC_TYPE_SHIFT	60
-#define MIC_DMA_MEMCPY_LEN_SHIFT 46
-#define MIC_DMA_STAT_INTR_SHIFT 59
-
-/* high-water mark for pushing dma descriptors */
-static int mic_dma_pending_level = 4;
-
-/* Status descriptor is used to write a 64 bit value to a memory location */
-enum mic_dma_desc_format_type {
-	MIC_DMA_MEMCPY = 1,
-	MIC_DMA_STATUS,
-};
-
-static inline u32 mic_dma_hw_ring_inc(u32 val)
-{
-	return (val + 1) % MIC_DMA_DESC_RX_SIZE;
-}
-
-static inline u32 mic_dma_hw_ring_dec(u32 val)
-{
-	return val ? val - 1 : MIC_DMA_DESC_RX_SIZE - 1;
-}
-
-static inline void mic_dma_hw_ring_inc_head(struct mic_dma_chan *ch)
-{
-	ch->head = mic_dma_hw_ring_inc(ch->head);
-}
-
-/* Prepare a memcpy desc */
-static inline void mic_dma_memcpy_desc(struct mic_dma_desc *desc,
-	dma_addr_t src_phys, dma_addr_t dst_phys, u64 size)
-{
-	u64 qw0, qw1;
-
-	qw0 = src_phys;
-	qw0 |= (size >> MIC_DMA_ALIGN_SHIFT) << MIC_DMA_MEMCPY_LEN_SHIFT;
-	qw1 = MIC_DMA_MEMCPY;
-	qw1 <<= MIC_DMA_DESC_TYPE_SHIFT;
-	qw1 |= dst_phys;
-	desc->qw0 = qw0;
-	desc->qw1 = qw1;
-}
-
-/* Prepare a status desc. with @data to be written at @dst_phys */
-static inline void mic_dma_prep_status_desc(struct mic_dma_desc *desc, u64 data,
-	dma_addr_t dst_phys, bool generate_intr)
-{
-	u64 qw0, qw1;
-
-	qw0 = data;
-	qw1 = (u64) MIC_DMA_STATUS << MIC_DMA_DESC_TYPE_SHIFT | dst_phys;
-	if (generate_intr)
-		qw1 |= (1ULL << MIC_DMA_STAT_INTR_SHIFT);
-	desc->qw0 = qw0;
-	desc->qw1 = qw1;
-}
-
-static void mic_dma_cleanup(struct mic_dma_chan *ch)
-{
-	struct dma_async_tx_descriptor *tx;
-	u32 tail;
-	u32 last_tail;
-
-	spin_lock(&ch->cleanup_lock);
-	tail = mic_dma_read_cmp_cnt(ch);
-	/*
-	 * This is the barrier pair for smp_wmb() in fn.
-	 * mic_dma_tx_submit_unlock. It's required so that we read the
-	 * updated cookie value from tx->cookie.
-	 */
-	smp_rmb();
-	for (last_tail = ch->last_tail; tail != last_tail;) {
-		tx = &ch->tx_array[last_tail];
-		if (tx->cookie) {
-			dma_cookie_complete(tx);
-			dmaengine_desc_get_callback_invoke(tx, NULL);
-			tx->callback = NULL;
-		}
-		last_tail = mic_dma_hw_ring_inc(last_tail);
-	}
-	/* finish all completion callbacks before incrementing tail */
-	smp_mb();
-	ch->last_tail = last_tail;
-	spin_unlock(&ch->cleanup_lock);
-}
-
-static u32 mic_dma_ring_count(u32 head, u32 tail)
-{
-	u32 count;
-
-	if (head >= tail)
-		count = (tail - 0) + (MIC_DMA_DESC_RX_SIZE - head);
-	else
-		count = tail - head;
-	return count - 1;
-}
-
-/* Returns the num. of free descriptors on success, -ENOMEM on failure */
-static int mic_dma_avail_desc_ring_space(struct mic_dma_chan *ch, int required)
-{
-	struct device *dev = mic_dma_ch_to_device(ch);
-	u32 count;
-
-	count = mic_dma_ring_count(ch->head, ch->last_tail);
-	if (count < required) {
-		mic_dma_cleanup(ch);
-		count = mic_dma_ring_count(ch->head, ch->last_tail);
-	}
-
-	if (count < required) {
-		dev_dbg(dev, "Not enough desc space");
-		dev_dbg(dev, "%s %d required=%u, avail=%u\n",
-			__func__, __LINE__, required, count);
-		return -ENOMEM;
-	} else {
-		return count;
-	}
-}
-
-/* Program memcpy descriptors into the descriptor ring and update s/w head ptr*/
-static int mic_dma_prog_memcpy_desc(struct mic_dma_chan *ch, dma_addr_t src,
-				    dma_addr_t dst, size_t len)
-{
-	size_t current_transfer_len;
-	size_t max_xfer_size = to_mic_dma_dev(ch)->max_xfer_size;
-	/* 3 is added to make sure we have enough space for status desc */
-	int num_desc = len / max_xfer_size + 3;
-	int ret;
-
-	if (len % max_xfer_size)
-		num_desc++;
-
-	ret = mic_dma_avail_desc_ring_space(ch, num_desc);
-	if (ret < 0)
-		return ret;
-	do {
-		current_transfer_len = min(len, max_xfer_size);
-		mic_dma_memcpy_desc(&ch->desc_ring[ch->head],
-				    src, dst, current_transfer_len);
-		mic_dma_hw_ring_inc_head(ch);
-		len -= current_transfer_len;
-		dst = dst + current_transfer_len;
-		src = src + current_transfer_len;
-	} while (len > 0);
-	return 0;
-}
-
-/* It's a h/w quirk and h/w needs 2 status descriptors for every status desc */
-static void mic_dma_prog_intr(struct mic_dma_chan *ch)
-{
-	mic_dma_prep_status_desc(&ch->desc_ring[ch->head], 0,
-				 ch->status_dest_micpa, false);
-	mic_dma_hw_ring_inc_head(ch);
-	mic_dma_prep_status_desc(&ch->desc_ring[ch->head], 0,
-				 ch->status_dest_micpa, true);
-	mic_dma_hw_ring_inc_head(ch);
-}
-
-/* Wrapper function to program memcpy descriptors/status descriptors */
-static int mic_dma_do_dma(struct mic_dma_chan *ch, int flags, dma_addr_t src,
-			  dma_addr_t dst, size_t len)
-{
-	if (len && -ENOMEM == mic_dma_prog_memcpy_desc(ch, src, dst, len)) {
-		return -ENOMEM;
-	} else {
-		/* 3 is the maximum number of status descriptors */
-		int ret = mic_dma_avail_desc_ring_space(ch, 3);
-
-		if (ret < 0)
-			return ret;
-	}
-
-	/* Above mic_dma_prog_memcpy_desc() makes sure we have enough space */
-	if (flags & DMA_PREP_FENCE) {
-		mic_dma_prep_status_desc(&ch->desc_ring[ch->head], 0,
-					 ch->status_dest_micpa, false);
-		mic_dma_hw_ring_inc_head(ch);
-	}
-
-	if (flags & DMA_PREP_INTERRUPT)
-		mic_dma_prog_intr(ch);
-
-	return 0;
-}
-
-static inline void mic_dma_issue_pending(struct dma_chan *ch)
-{
-	struct mic_dma_chan *mic_ch = to_mic_dma_chan(ch);
-
-	spin_lock(&mic_ch->issue_lock);
-	/*
-	 * Write to head triggers h/w to act on the descriptors.
-	 * On MIC, writing the same head value twice causes
-	 * a h/w error. On second write, h/w assumes we filled
-	 * the entire ring & overwrote some of the descriptors.
-	 */
-	if (mic_ch->issued == mic_ch->submitted)
-		goto out;
-	mic_ch->issued = mic_ch->submitted;
-	/*
-	 * make descriptor updates visible before advancing head,
-	 * this is purposefully not smp_wmb() since we are also
-	 * publishing the descriptor updates to a dma device
-	 */
-	wmb();
-	mic_dma_write_reg(mic_ch, MIC_DMA_REG_DHPR, mic_ch->issued);
-out:
-	spin_unlock(&mic_ch->issue_lock);
-}
-
-static inline void mic_dma_update_pending(struct mic_dma_chan *ch)
-{
-	if (mic_dma_ring_count(ch->issued, ch->submitted)
-			> mic_dma_pending_level)
-		mic_dma_issue_pending(&ch->api_ch);
-}
-
-static dma_cookie_t mic_dma_tx_submit_unlock(struct dma_async_tx_descriptor *tx)
-{
-	struct mic_dma_chan *mic_ch = to_mic_dma_chan(tx->chan);
-	dma_cookie_t cookie;
-
-	dma_cookie_assign(tx);
-	cookie = tx->cookie;
-	/*
-	 * We need an smp write barrier here because another CPU might see
-	 * an update to submitted and update h/w head even before we
-	 * assigned a cookie to this tx.
-	 */
-	smp_wmb();
-	mic_ch->submitted = mic_ch->head;
-	spin_unlock(&mic_ch->prep_lock);
-	mic_dma_update_pending(mic_ch);
-	return cookie;
-}
-
-static inline struct dma_async_tx_descriptor *
-allocate_tx(struct mic_dma_chan *ch)
-{
-	u32 idx = mic_dma_hw_ring_dec(ch->head);
-	struct dma_async_tx_descriptor *tx = &ch->tx_array[idx];
-
-	dma_async_tx_descriptor_init(tx, &ch->api_ch);
-	tx->tx_submit = mic_dma_tx_submit_unlock;
-	return tx;
-}
-
-/* Program a status descriptor with dst as address and value to be written */
-static struct dma_async_tx_descriptor *
-mic_dma_prep_status_lock(struct dma_chan *ch, dma_addr_t dst, u64 src_val,
-			 unsigned long flags)
-{
-	struct mic_dma_chan *mic_ch = to_mic_dma_chan(ch);
-	int result;
-
-	spin_lock(&mic_ch->prep_lock);
-	result = mic_dma_avail_desc_ring_space(mic_ch, 4);
-	if (result < 0)
-		goto error;
-	mic_dma_prep_status_desc(&mic_ch->desc_ring[mic_ch->head], src_val, dst,
-				 false);
-	mic_dma_hw_ring_inc_head(mic_ch);
-	result = mic_dma_do_dma(mic_ch, flags, 0, 0, 0);
-	if (result < 0)
-		goto error;
-
-	return allocate_tx(mic_ch);
-error:
-	dev_err(mic_dma_ch_to_device(mic_ch),
-		"Error enqueueing dma status descriptor, error=%d\n", result);
-	spin_unlock(&mic_ch->prep_lock);
-	return NULL;
-}
-
-/*
- * Prepare a memcpy descriptor to be added to the ring.
- * Note that the temporary descriptor adds an extra overhead of copying the
- * descriptor to ring. So, we copy directly to the descriptor ring
- */
-static struct dma_async_tx_descriptor *
-mic_dma_prep_memcpy_lock(struct dma_chan *ch, dma_addr_t dma_dest,
-			 dma_addr_t dma_src, size_t len, unsigned long flags)
-{
-	struct mic_dma_chan *mic_ch = to_mic_dma_chan(ch);
-	struct device *dev = mic_dma_ch_to_device(mic_ch);
-	int result;
-
-	if (!len && !flags)
-		return NULL;
-
-	spin_lock(&mic_ch->prep_lock);
-	result = mic_dma_do_dma(mic_ch, flags, dma_src, dma_dest, len);
-	if (result >= 0)
-		return allocate_tx(mic_ch);
-	dev_err(dev, "Error enqueueing dma, error=%d\n", result);
-	spin_unlock(&mic_ch->prep_lock);
-	return NULL;
-}
-
-static struct dma_async_tx_descriptor *
-mic_dma_prep_interrupt_lock(struct dma_chan *ch, unsigned long flags)
-{
-	struct mic_dma_chan *mic_ch = to_mic_dma_chan(ch);
-	int ret;
-
-	spin_lock(&mic_ch->prep_lock);
-	ret = mic_dma_do_dma(mic_ch, flags, 0, 0, 0);
-	if (!ret)
-		return allocate_tx(mic_ch);
-	spin_unlock(&mic_ch->prep_lock);
-	return NULL;
-}
-
-/* Return the status of the transaction */
-static enum dma_status
-mic_dma_tx_status(struct dma_chan *ch, dma_cookie_t cookie,
-		  struct dma_tx_state *txstate)
-{
-	struct mic_dma_chan *mic_ch = to_mic_dma_chan(ch);
-
-	if (DMA_COMPLETE != dma_cookie_status(ch, cookie, txstate))
-		mic_dma_cleanup(mic_ch);
-
-	return dma_cookie_status(ch, cookie, txstate);
-}
-
-static irqreturn_t mic_dma_thread_fn(int irq, void *data)
-{
-	mic_dma_cleanup((struct mic_dma_chan *)data);
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t mic_dma_intr_handler(int irq, void *data)
-{
-	struct mic_dma_chan *ch = ((struct mic_dma_chan *)data);
-
-	mic_dma_ack_interrupt(ch);
-	return IRQ_WAKE_THREAD;
-}
-
-static int mic_dma_alloc_desc_ring(struct mic_dma_chan *ch)
-{
-	u64 desc_ring_size = MIC_DMA_DESC_RX_SIZE * sizeof(*ch->desc_ring);
-	struct device *dev = &to_mbus_device(ch)->dev;
-
-	desc_ring_size = ALIGN(desc_ring_size, MIC_DMA_ALIGN_BYTES);
-	ch->desc_ring = kzalloc(desc_ring_size, GFP_KERNEL);
-
-	if (!ch->desc_ring)
-		return -ENOMEM;
-
-	ch->desc_ring_micpa = dma_map_single(dev, ch->desc_ring,
-					     desc_ring_size, DMA_BIDIRECTIONAL);
-	if (dma_mapping_error(dev, ch->desc_ring_micpa))
-		goto map_error;
-
-	ch->tx_array = vzalloc(array_size(MIC_DMA_DESC_RX_SIZE,
-					  sizeof(*ch->tx_array)));
-	if (!ch->tx_array)
-		goto tx_error;
-	return 0;
-tx_error:
-	dma_unmap_single(dev, ch->desc_ring_micpa, desc_ring_size,
-			 DMA_BIDIRECTIONAL);
-map_error:
-	kfree(ch->desc_ring);
-	return -ENOMEM;
-}
-
-static void mic_dma_free_desc_ring(struct mic_dma_chan *ch)
-{
-	u64 desc_ring_size = MIC_DMA_DESC_RX_SIZE * sizeof(*ch->desc_ring);
-
-	vfree(ch->tx_array);
-	desc_ring_size = ALIGN(desc_ring_size, MIC_DMA_ALIGN_BYTES);
-	dma_unmap_single(&to_mbus_device(ch)->dev, ch->desc_ring_micpa,
-			 desc_ring_size, DMA_BIDIRECTIONAL);
-	kfree(ch->desc_ring);
-	ch->desc_ring = NULL;
-}
-
-static void mic_dma_free_status_dest(struct mic_dma_chan *ch)
-{
-	dma_unmap_single(&to_mbus_device(ch)->dev, ch->status_dest_micpa,
-			 L1_CACHE_BYTES, DMA_BIDIRECTIONAL);
-	kfree(ch->status_dest);
-}
-
-static int mic_dma_alloc_status_dest(struct mic_dma_chan *ch)
-{
-	struct device *dev = &to_mbus_device(ch)->dev;
-
-	ch->status_dest = kzalloc(L1_CACHE_BYTES, GFP_KERNEL);
-	if (!ch->status_dest)
-		return -ENOMEM;
-	ch->status_dest_micpa = dma_map_single(dev, ch->status_dest,
-					L1_CACHE_BYTES, DMA_BIDIRECTIONAL);
-	if (dma_mapping_error(dev, ch->status_dest_micpa)) {
-		kfree(ch->status_dest);
-		ch->status_dest = NULL;
-		return -ENOMEM;
-	}
-	return 0;
-}
-
-static int mic_dma_check_chan(struct mic_dma_chan *ch)
-{
-	if (mic_dma_read_reg(ch, MIC_DMA_REG_DCHERR) ||
-	    mic_dma_read_reg(ch, MIC_DMA_REG_DSTAT) & MIC_DMA_CHAN_QUIESCE) {
-		mic_dma_disable_chan(ch);
-		mic_dma_chan_mask_intr(ch);
-		dev_err(mic_dma_ch_to_device(ch),
-			"%s %d error setting up mic dma chan %d\n",
-			__func__, __LINE__, ch->ch_num);
-		return -EBUSY;
-	}
-	return 0;
-}
-
-static int mic_dma_chan_setup(struct mic_dma_chan *ch)
-{
-	if (MIC_DMA_CHAN_MIC == ch->owner)
-		mic_dma_chan_set_owner(ch);
-	mic_dma_disable_chan(ch);
-	mic_dma_chan_mask_intr(ch);
-	mic_dma_write_reg(ch, MIC_DMA_REG_DCHERRMSK, 0);
-	mic_dma_chan_set_desc_ring(ch);
-	ch->last_tail = mic_dma_read_reg(ch, MIC_DMA_REG_DTPR);
-	ch->head = ch->last_tail;
-	ch->issued = 0;
-	mic_dma_chan_unmask_intr(ch);
-	mic_dma_enable_chan(ch);
-	return mic_dma_check_chan(ch);
-}
-
-static void mic_dma_chan_destroy(struct mic_dma_chan *ch)
-{
-	mic_dma_disable_chan(ch);
-	mic_dma_chan_mask_intr(ch);
-}
-
-static int mic_dma_setup_irq(struct mic_dma_chan *ch)
-{
-	ch->cookie =
-		to_mbus_hw_ops(ch)->request_threaded_irq(to_mbus_device(ch),
-			mic_dma_intr_handler, mic_dma_thread_fn,
-			"mic dma_channel", ch, ch->ch_num);
-	return PTR_ERR_OR_ZERO(ch->cookie);
-}
-
-static inline void mic_dma_free_irq(struct mic_dma_chan *ch)
-{
-	to_mbus_hw_ops(ch)->free_irq(to_mbus_device(ch), ch->cookie, ch);
-}
-
-static int mic_dma_chan_init(struct mic_dma_chan *ch)
-{
-	int ret = mic_dma_alloc_desc_ring(ch);
-
-	if (ret)
-		goto ring_error;
-	ret = mic_dma_alloc_status_dest(ch);
-	if (ret)
-		goto status_error;
-	ret = mic_dma_chan_setup(ch);
-	if (ret)
-		goto chan_error;
-	return ret;
-chan_error:
-	mic_dma_free_status_dest(ch);
-status_error:
-	mic_dma_free_desc_ring(ch);
-ring_error:
-	return ret;
-}
-
-static int mic_dma_drain_chan(struct mic_dma_chan *ch)
-{
-	struct dma_async_tx_descriptor *tx;
-	int err = 0;
-	dma_cookie_t cookie;
-
-	tx = mic_dma_prep_memcpy_lock(&ch->api_ch, 0, 0, 0, DMA_PREP_FENCE);
-	if (!tx) {
-		err = -ENOMEM;
-		goto error;
-	}
-
-	cookie = tx->tx_submit(tx);
-	if (dma_submit_error(cookie))
-		err = -ENOMEM;
-	else
-		err = dma_sync_wait(&ch->api_ch, cookie);
-	if (err) {
-		dev_err(mic_dma_ch_to_device(ch), "%s %d TO chan 0x%x\n",
-			__func__, __LINE__, ch->ch_num);
-		err = -EIO;
-	}
-error:
-	mic_dma_cleanup(ch);
-	return err;
-}
-
-static inline void mic_dma_chan_uninit(struct mic_dma_chan *ch)
-{
-	mic_dma_chan_destroy(ch);
-	mic_dma_cleanup(ch);
-	mic_dma_free_status_dest(ch);
-	mic_dma_free_desc_ring(ch);
-}
-
-static int mic_dma_init(struct mic_dma_device *mic_dma_dev,
-			enum mic_dma_chan_owner owner)
-{
-	int i, first_chan = mic_dma_dev->start_ch;
-	struct mic_dma_chan *ch;
-	int ret;
-
-	for (i = first_chan; i < first_chan + MIC_DMA_NUM_CHAN; i++) {
-		ch = &mic_dma_dev->mic_ch[i];
-		ch->ch_num = i;
-		ch->owner = owner;
-		spin_lock_init(&ch->cleanup_lock);
-		spin_lock_init(&ch->prep_lock);
-		spin_lock_init(&ch->issue_lock);
-		ret = mic_dma_setup_irq(ch);
-		if (ret)
-			goto error;
-	}
-	return 0;
-error:
-	for (i = i - 1; i >= first_chan; i--)
-		mic_dma_free_irq(ch);
-	return ret;
-}
-
-static void mic_dma_uninit(struct mic_dma_device *mic_dma_dev)
-{
-	int i, first_chan = mic_dma_dev->start_ch;
-	struct mic_dma_chan *ch;
-
-	for (i = first_chan; i < first_chan + MIC_DMA_NUM_CHAN; i++) {
-		ch = &mic_dma_dev->mic_ch[i];
-		mic_dma_free_irq(ch);
-	}
-}
-
-static int mic_dma_alloc_chan_resources(struct dma_chan *ch)
-{
-	int ret = mic_dma_chan_init(to_mic_dma_chan(ch));
-	if (ret)
-		return ret;
-	return MIC_DMA_DESC_RX_SIZE;
-}
-
-static void mic_dma_free_chan_resources(struct dma_chan *ch)
-{
-	struct mic_dma_chan *mic_ch = to_mic_dma_chan(ch);
-	mic_dma_drain_chan(mic_ch);
-	mic_dma_chan_uninit(mic_ch);
-}
-
-/* Set the fn. handlers and register the dma device with dma api */
-static int mic_dma_register_dma_device(struct mic_dma_device *mic_dma_dev,
-				       enum mic_dma_chan_owner owner)
-{
-	int i, first_chan = mic_dma_dev->start_ch;
-
-	dma_cap_zero(mic_dma_dev->dma_dev.cap_mask);
-	/*
-	 * This dma engine is not capable of host memory to host memory
-	 * transfers
-	 */
-	dma_cap_set(DMA_MEMCPY, mic_dma_dev->dma_dev.cap_mask);
-
-	if (MIC_DMA_CHAN_HOST == owner)
-		dma_cap_set(DMA_PRIVATE, mic_dma_dev->dma_dev.cap_mask);
-	mic_dma_dev->dma_dev.device_alloc_chan_resources =
-		mic_dma_alloc_chan_resources;
-	mic_dma_dev->dma_dev.device_free_chan_resources =
-		mic_dma_free_chan_resources;
-	mic_dma_dev->dma_dev.device_tx_status = mic_dma_tx_status;
-	mic_dma_dev->dma_dev.device_prep_dma_memcpy = mic_dma_prep_memcpy_lock;
-	mic_dma_dev->dma_dev.device_prep_dma_imm_data =
-		mic_dma_prep_status_lock;
-	mic_dma_dev->dma_dev.device_prep_dma_interrupt =
-		mic_dma_prep_interrupt_lock;
-	mic_dma_dev->dma_dev.device_issue_pending = mic_dma_issue_pending;
-	mic_dma_dev->dma_dev.copy_align = MIC_DMA_ALIGN_SHIFT;
-	INIT_LIST_HEAD(&mic_dma_dev->dma_dev.channels);
-	for (i = first_chan; i < first_chan + MIC_DMA_NUM_CHAN; i++) {
-		mic_dma_dev->mic_ch[i].api_ch.device = &mic_dma_dev->dma_dev;
-		dma_cookie_init(&mic_dma_dev->mic_ch[i].api_ch);
-		list_add_tail(&mic_dma_dev->mic_ch[i].api_ch.device_node,
-			      &mic_dma_dev->dma_dev.channels);
-	}
-	return dmaenginem_async_device_register(&mic_dma_dev->dma_dev);
-}
-
-/*
- * Initializes dma channels and registers the dma device with the
- * dma engine api.
- */
-static struct mic_dma_device *mic_dma_dev_reg(struct mbus_device *mbdev,
-					      enum mic_dma_chan_owner owner)
-{
-	struct mic_dma_device *mic_dma_dev;
-	int ret;
-	struct device *dev = &mbdev->dev;
-
-	mic_dma_dev = devm_kzalloc(dev, sizeof(*mic_dma_dev), GFP_KERNEL);
-	if (!mic_dma_dev) {
-		ret = -ENOMEM;
-		goto alloc_error;
-	}
-	mic_dma_dev->mbdev = mbdev;
-	mic_dma_dev->dma_dev.dev = dev;
-	mic_dma_dev->mmio = mbdev->mmio_va;
-	if (MIC_DMA_CHAN_HOST == owner) {
-		mic_dma_dev->start_ch = 0;
-		mic_dma_dev->max_xfer_size = MIC_DMA_MAX_XFER_SIZE_HOST;
-	} else {
-		mic_dma_dev->start_ch = 4;
-		mic_dma_dev->max_xfer_size = MIC_DMA_MAX_XFER_SIZE_CARD;
-	}
-	ret = mic_dma_init(mic_dma_dev, owner);
-	if (ret)
-		goto init_error;
-	ret = mic_dma_register_dma_device(mic_dma_dev, owner);
-	if (ret)
-		goto reg_error;
-	return mic_dma_dev;
-reg_error:
-	mic_dma_uninit(mic_dma_dev);
-init_error:
-	mic_dma_dev = NULL;
-alloc_error:
-	dev_err(dev, "Error at %s %d ret=%d\n", __func__, __LINE__, ret);
-	return mic_dma_dev;
-}
-
-static void mic_dma_dev_unreg(struct mic_dma_device *mic_dma_dev)
-{
-	mic_dma_uninit(mic_dma_dev);
-}
-
-/* DEBUGFS CODE */
-static int mic_dma_reg_show(struct seq_file *s, void *pos)
-{
-	struct mic_dma_device *mic_dma_dev = s->private;
-	int i, chan_num, first_chan = mic_dma_dev->start_ch;
-	struct mic_dma_chan *ch;
-
-	seq_printf(s, "SBOX_DCR: %#x\n",
-		   mic_dma_mmio_read(&mic_dma_dev->mic_ch[first_chan],
-				     MIC_DMA_SBOX_BASE + MIC_DMA_SBOX_DCR));
-	seq_puts(s, "DMA Channel Registers\n");
-	seq_printf(s, "%-10s| %-10s %-10s %-10s %-10s %-10s",
-		   "Channel", "DCAR", "DTPR", "DHPR", "DRAR_HI", "DRAR_LO");
-	seq_printf(s, " %-11s %-14s %-10s\n", "DCHERR", "DCHERRMSK", "DSTAT");
-	for (i = first_chan; i < first_chan + MIC_DMA_NUM_CHAN; i++) {
-		ch = &mic_dma_dev->mic_ch[i];
-		chan_num = ch->ch_num;
-		seq_printf(s, "%-10i| %-#10x %-#10x %-#10x %-#10x",
-			   chan_num,
-			   mic_dma_read_reg(ch, MIC_DMA_REG_DCAR),
-			   mic_dma_read_reg(ch, MIC_DMA_REG_DTPR),
-			   mic_dma_read_reg(ch, MIC_DMA_REG_DHPR),
-			   mic_dma_read_reg(ch, MIC_DMA_REG_DRAR_HI));
-		seq_printf(s, " %-#10x %-#10x %-#14x %-#10x\n",
-			   mic_dma_read_reg(ch, MIC_DMA_REG_DRAR_LO),
-			   mic_dma_read_reg(ch, MIC_DMA_REG_DCHERR),
-			   mic_dma_read_reg(ch, MIC_DMA_REG_DCHERRMSK),
-			   mic_dma_read_reg(ch, MIC_DMA_REG_DSTAT));
-	}
-	return 0;
-}
-
-DEFINE_SHOW_ATTRIBUTE(mic_dma_reg);
-
-/* Debugfs parent dir */
-static struct dentry *mic_dma_dbg;
-
-static int mic_dma_driver_probe(struct mbus_device *mbdev)
-{
-	struct mic_dma_device *mic_dma_dev;
-	enum mic_dma_chan_owner owner;
-
-	if (MBUS_DEV_DMA_MIC == mbdev->id.device)
-		owner = MIC_DMA_CHAN_MIC;
-	else
-		owner = MIC_DMA_CHAN_HOST;
-
-	mic_dma_dev = mic_dma_dev_reg(mbdev, owner);
-	dev_set_drvdata(&mbdev->dev, mic_dma_dev);
-
-	if (mic_dma_dbg) {
-		mic_dma_dev->dbg_dir = debugfs_create_dir(dev_name(&mbdev->dev),
-							  mic_dma_dbg);
-		debugfs_create_file("mic_dma_reg", 0444, mic_dma_dev->dbg_dir,
-				    mic_dma_dev, &mic_dma_reg_fops);
-	}
-	return 0;
-}
-
-static void mic_dma_driver_remove(struct mbus_device *mbdev)
-{
-	struct mic_dma_device *mic_dma_dev;
-
-	mic_dma_dev = dev_get_drvdata(&mbdev->dev);
-	debugfs_remove_recursive(mic_dma_dev->dbg_dir);
-	mic_dma_dev_unreg(mic_dma_dev);
-}
-
-static struct mbus_device_id id_table[] = {
-	{MBUS_DEV_DMA_MIC, MBUS_DEV_ANY_ID},
-	{MBUS_DEV_DMA_HOST, MBUS_DEV_ANY_ID},
-	{0},
-};
-
-static struct mbus_driver mic_dma_driver = {
-	.driver.name =	KBUILD_MODNAME,
-	.driver.owner =	THIS_MODULE,
-	.id_table = id_table,
-	.probe = mic_dma_driver_probe,
-	.remove = mic_dma_driver_remove,
-};
-
-static int __init mic_x100_dma_init(void)
-{
-	int rc = mbus_register_driver(&mic_dma_driver);
-	if (rc)
-		return rc;
-	mic_dma_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL);
-	return 0;
-}
-
-static void __exit mic_x100_dma_exit(void)
-{
-	debugfs_remove_recursive(mic_dma_dbg);
-	mbus_unregister_driver(&mic_dma_driver);
-}
-
-module_init(mic_x100_dma_init);
-module_exit(mic_x100_dma_exit);
-
-MODULE_DEVICE_TABLE(mbus, id_table);
-MODULE_AUTHOR("Intel Corporation");
-MODULE_DESCRIPTION("Intel(R) MIC X100 DMA Driver");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/mic_x100_dma.h b/drivers/dma/mic_x100_dma.h
deleted file mode 100644
index 68ef43a91714..000000000000
--- a/drivers/dma/mic_x100_dma.h
+++ /dev/null
@@ -1,275 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Intel MIC X100 DMA Driver.
- *
- * Adapted from IOAT dma driver.
- */
-#ifndef _MIC_X100_DMA_H_
-#define _MIC_X100_DMA_H_
-
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/sched.h>
-#include <linux/debugfs.h>
-#include <linux/slab.h>
-#include <linux/interrupt.h>
-#include <linux/mic_bus.h>
-
-#include "dmaengine.h"
-
-/*
- * MIC has a total of 8 dma channels.
- * Four channels are assigned for host SW use & the remaining for MIC SW.
- * MIC DMA transfer size & addresses need to be 64 byte aligned.
- */
-#define MIC_DMA_MAX_NUM_CHAN	8
-#define MIC_DMA_NUM_CHAN	4
-#define MIC_DMA_ALIGN_SHIFT	DMAENGINE_ALIGN_64_BYTES
-#define MIC_DMA_ALIGN_BYTES	(1 << MIC_DMA_ALIGN_SHIFT)
-#define MIC_DMA_DESC_RX_SIZE	(128 * 1024 - 4)
-
-/*
- * Register descriptions
- * All the registers are 32 bit registers.
- * DCR is a global register and all others are per-channel.
- * DCR - bits 0, 2, 4, 6, 8, 10, 12, 14 - enable bits for channels 0 to 7
- *	 bits 1, 3, 5, 7, 9, 11, 13, 15 - owner bits for channels 0 to 7
- * DCAR - bit 24 & 25 interrupt masks for mic owned & host owned channels
- * DHPR - head of the descriptor ring updated by s/w
- * DTPR - tail of the descriptor ring updated by h/w
- * DRAR_LO - lower 32 bits of descriptor ring's mic address
- * DRAR_HI - 3:0 - remaining 4 bits of descriptor ring's mic address
- *	     20:4 descriptor ring size
- *	     25:21 mic smpt entry number
- * DSTAT - 16:0 h/w completion count; 31:28 dma engine status
- * DCHERR - this register is non-zero on error
- * DCHERRMSK - interrupt mask register
- */
-#define MIC_DMA_HW_CMP_CNT_MASK		0x1ffff
-#define MIC_DMA_CHAN_QUIESCE		0x20000000
-#define MIC_DMA_SBOX_BASE		0x00010000
-#define MIC_DMA_SBOX_DCR		0x0000A280
-#define MIC_DMA_SBOX_CH_BASE		0x0001A000
-#define MIC_DMA_SBOX_CHAN_OFF		0x40
-#define MIC_DMA_SBOX_DCAR_IM0		(0x1 << 24)
-#define MIC_DMA_SBOX_DCAR_IM1		(0x1 << 25)
-#define MIC_DMA_SBOX_DRARHI_SYS_MASK	(0x1 << 26)
-#define MIC_DMA_REG_DCAR		0
-#define MIC_DMA_REG_DHPR		4
-#define MIC_DMA_REG_DTPR		8
-#define MIC_DMA_REG_DRAR_LO		20
-#define MIC_DMA_REG_DRAR_HI		24
-#define MIC_DMA_REG_DSTAT		32
-#define MIC_DMA_REG_DCHERR		44
-#define MIC_DMA_REG_DCHERRMSK		48
-
-/* HW dma desc */
-struct mic_dma_desc {
-	u64 qw0;
-	u64 qw1;
-};
-
-enum mic_dma_chan_owner {
-	MIC_DMA_CHAN_MIC = 0,
-	MIC_DMA_CHAN_HOST
-};
-
-/*
- * mic_dma_chan - channel specific information
- * @ch_num: channel number
- * @owner: owner of this channel
- * @last_tail: cached value of descriptor ring tail
- * @head: index of next descriptor in desc_ring
- * @issued: hardware notification point
- * @submitted: index that will be used to submit descriptors to h/w
- * @api_ch: dma engine api channel
- * @desc_ring: dma descriptor ring
- * @desc_ring_micpa: mic physical address of desc_ring
- * @status_dest: destination for status (fence) descriptor
- * @status_dest_micpa: mic address for status_dest,
- *		       DMA controller uses this address
- * @tx_array: array of async_tx
- * @cleanup_lock: lock held when processing completed tx
- * @prep_lock: lock held in prep_memcpy & released in tx_submit
- * @issue_lock: lock used to synchronize writes to head
- * @cookie: mic_irq cookie used with mic irq request
- */
-struct mic_dma_chan {
-	int ch_num;
-	enum mic_dma_chan_owner owner;
-	u32 last_tail;
-	u32 head;
-	u32 issued;
-	u32 submitted;
-	struct dma_chan api_ch;
-	struct mic_dma_desc *desc_ring;
-	dma_addr_t desc_ring_micpa;
-	u64 *status_dest;
-	dma_addr_t status_dest_micpa;
-	struct dma_async_tx_descriptor *tx_array;
-	spinlock_t cleanup_lock;
-	spinlock_t prep_lock;
-	spinlock_t issue_lock;
-	struct mic_irq *cookie;
-};
-
-/*
- * struct mic_dma_device - per mic device
- * @mic_ch: dma channels
- * @dma_dev: underlying dma device
- * @mbdev: mic bus dma device
- * @mmio: virtual address of the mmio space
- * @dbg_dir: debugfs directory
- * @start_ch: first channel number that can be used
- * @max_xfer_size: maximum transfer size per dma descriptor
- */
-struct mic_dma_device {
-	struct mic_dma_chan mic_ch[MIC_DMA_MAX_NUM_CHAN];
-	struct dma_device dma_dev;
-	struct mbus_device *mbdev;
-	void __iomem *mmio;
-	struct dentry *dbg_dir;
-	int start_ch;
-	size_t max_xfer_size;
-};
-
-static inline struct mic_dma_chan *to_mic_dma_chan(struct dma_chan *ch)
-{
-	return container_of(ch, struct mic_dma_chan, api_ch);
-}
-
-static inline struct mic_dma_device *to_mic_dma_dev(struct mic_dma_chan *ch)
-{
-	return
-	container_of((const typeof(((struct mic_dma_device *)0)->mic_ch)*)
-		     (ch - ch->ch_num), struct mic_dma_device, mic_ch);
-}
-
-static inline struct mbus_device *to_mbus_device(struct mic_dma_chan *ch)
-{
-	return to_mic_dma_dev(ch)->mbdev;
-}
-
-static inline struct mbus_hw_ops *to_mbus_hw_ops(struct mic_dma_chan *ch)
-{
-	return to_mbus_device(ch)->hw_ops;
-}
-
-static inline struct device *mic_dma_ch_to_device(struct mic_dma_chan *ch)
-{
-	return to_mic_dma_dev(ch)->dma_dev.dev;
-}
-
-static inline void __iomem *mic_dma_chan_to_mmio(struct mic_dma_chan *ch)
-{
-	return to_mic_dma_dev(ch)->mmio;
-}
-
-static inline u32 mic_dma_read_reg(struct mic_dma_chan *ch, u32 reg)
-{
-	return ioread32(mic_dma_chan_to_mmio(ch) + MIC_DMA_SBOX_CH_BASE +
-			ch->ch_num * MIC_DMA_SBOX_CHAN_OFF + reg);
-}
-
-static inline void mic_dma_write_reg(struct mic_dma_chan *ch, u32 reg, u32 val)
-{
-	iowrite32(val, mic_dma_chan_to_mmio(ch) + MIC_DMA_SBOX_CH_BASE +
-		  ch->ch_num * MIC_DMA_SBOX_CHAN_OFF + reg);
-}
-
-static inline u32 mic_dma_mmio_read(struct mic_dma_chan *ch, u32 offset)
-{
-	return ioread32(mic_dma_chan_to_mmio(ch) + offset);
-}
-
-static inline void mic_dma_mmio_write(struct mic_dma_chan *ch, u32 val,
-				      u32 offset)
-{
-	iowrite32(val, mic_dma_chan_to_mmio(ch) + offset);
-}
-
-static inline u32 mic_dma_read_cmp_cnt(struct mic_dma_chan *ch)
-{
-	return mic_dma_read_reg(ch, MIC_DMA_REG_DSTAT) &
-	       MIC_DMA_HW_CMP_CNT_MASK;
-}
-
-static inline void mic_dma_chan_set_owner(struct mic_dma_chan *ch)
-{
-	u32 dcr = mic_dma_mmio_read(ch, MIC_DMA_SBOX_BASE + MIC_DMA_SBOX_DCR);
-	u32 chan_num = ch->ch_num;
-
-	dcr = (dcr & ~(0x1 << (chan_num * 2))) | (ch->owner << (chan_num * 2));
-	mic_dma_mmio_write(ch, dcr, MIC_DMA_SBOX_BASE + MIC_DMA_SBOX_DCR);
-}
-
-static inline void mic_dma_enable_chan(struct mic_dma_chan *ch)
-{
-	u32 dcr = mic_dma_mmio_read(ch, MIC_DMA_SBOX_BASE + MIC_DMA_SBOX_DCR);
-
-	dcr |= 2 << (ch->ch_num << 1);
-	mic_dma_mmio_write(ch, dcr, MIC_DMA_SBOX_BASE + MIC_DMA_SBOX_DCR);
-}
-
-static inline void mic_dma_disable_chan(struct mic_dma_chan *ch)
-{
-	u32 dcr = mic_dma_mmio_read(ch, MIC_DMA_SBOX_BASE + MIC_DMA_SBOX_DCR);
-
-	dcr &= ~(2 << (ch->ch_num << 1));
-	mic_dma_mmio_write(ch, dcr, MIC_DMA_SBOX_BASE + MIC_DMA_SBOX_DCR);
-}
-
-static void mic_dma_chan_set_desc_ring(struct mic_dma_chan *ch)
-{
-	u32 drar_hi;
-	dma_addr_t desc_ring_micpa = ch->desc_ring_micpa;
-
-	drar_hi = (MIC_DMA_DESC_RX_SIZE & 0x1ffff) << 4;
-	if (MIC_DMA_CHAN_MIC == ch->owner) {
-		drar_hi |= (desc_ring_micpa >> 32) & 0xf;
-	} else {
-		drar_hi |= MIC_DMA_SBOX_DRARHI_SYS_MASK;
-		drar_hi |= ((desc_ring_micpa >> 34)
-			    & 0x1f) << 21;
-		drar_hi |= (desc_ring_micpa >> 32) & 0x3;
-	}
-	mic_dma_write_reg(ch, MIC_DMA_REG_DRAR_LO, (u32) desc_ring_micpa);
-	mic_dma_write_reg(ch, MIC_DMA_REG_DRAR_HI, drar_hi);
-}
-
-static inline void mic_dma_chan_mask_intr(struct mic_dma_chan *ch)
-{
-	u32 dcar = mic_dma_read_reg(ch, MIC_DMA_REG_DCAR);
-
-	if (MIC_DMA_CHAN_MIC == ch->owner)
-		dcar |= MIC_DMA_SBOX_DCAR_IM0;
-	else
-		dcar |= MIC_DMA_SBOX_DCAR_IM1;
-	mic_dma_write_reg(ch, MIC_DMA_REG_DCAR, dcar);
-}
-
-static inline void mic_dma_chan_unmask_intr(struct mic_dma_chan *ch)
-{
-	u32 dcar = mic_dma_read_reg(ch, MIC_DMA_REG_DCAR);
-
-	if (MIC_DMA_CHAN_MIC == ch->owner)
-		dcar &= ~MIC_DMA_SBOX_DCAR_IM0;
-	else
-		dcar &= ~MIC_DMA_SBOX_DCAR_IM1;
-	mic_dma_write_reg(ch, MIC_DMA_REG_DCAR, dcar);
-}
-
-static void mic_dma_ack_interrupt(struct mic_dma_chan *ch)
-{
-	if (MIC_DMA_CHAN_MIC == ch->owner) {
-		/* HW errata */
-		mic_dma_chan_mask_intr(ch);
-		mic_dma_chan_unmask_intr(ch);
-	}
-	to_mbus_hw_ops(ch)->ack_interrupt(to_mbus_device(ch), ch->ch_num);
-}
-#endif
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index d5ce8082b0a0..fafa8b0d8099 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -474,7 +474,6 @@ source "drivers/misc/lis3lv02d/Kconfig"
 source "drivers/misc/altera-stapl/Kconfig"
 source "drivers/misc/mei/Kconfig"
 source "drivers/misc/vmw_vmci/Kconfig"
-source "drivers/misc/mic/Kconfig"
 source "drivers/misc/genwqe/Kconfig"
 source "drivers/misc/echo/Kconfig"
 source "drivers/misc/cxl/Kconfig"
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 2521359e8ef7..d23231e73330 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -46,7 +46,6 @@ obj-$(CONFIG_VMWARE_VMCI)	+= vmw_vmci/
 obj-$(CONFIG_LATTICE_ECP3_CONFIG)	+= lattice-ecp3-config.o
 obj-$(CONFIG_SRAM)		+= sram.o
 obj-$(CONFIG_SRAM_EXEC)		+= sram-exec.o
-obj-y				+= mic/
 obj-$(CONFIG_GENWQE)		+= genwqe/
 obj-$(CONFIG_ECHO)		+= echo/
 obj-$(CONFIG_CXL_BASE)		+= cxl/
diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig
deleted file mode 100644
index 8a7c2c5711d5..000000000000
--- a/drivers/misc/mic/Kconfig
+++ /dev/null
@@ -1,141 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-menu "Intel MIC & related support"
-
-config INTEL_MIC_BUS
-	tristate "Intel MIC Bus Driver"
-	depends on 64BIT && PCI && X86
-	select DMA_OPS
-	help
-	  This option is selected by any driver which registers a
-	  device or driver on the MIC Bus, such as CONFIG_INTEL_MIC_HOST,
-	  CONFIG_INTEL_MIC_CARD, CONFIG_INTEL_MIC_X100_DMA etc.
-
-	  If you are building a host/card kernel with an Intel MIC device
-	  then say M (recommended) or Y, else say N. If unsure say N.
-
-	  More information about the Intel MIC family as well as the Linux
-	  OS and tools for MIC to use with this driver are available from
-	  <http://software.intel.com/en-us/mic-developer>.
-
-config SCIF_BUS
-	tristate "SCIF Bus Driver"
-	depends on 64BIT && PCI && X86
-	select DMA_OPS
-	help
-	  This option is selected by any driver which registers a
-	  device or driver on the SCIF Bus, such as CONFIG_INTEL_MIC_HOST
-	  and CONFIG_INTEL_MIC_CARD.
-
-	  If you are building a host/card kernel with an Intel MIC device
-	  then say M (recommended) or Y, else say N. If unsure say N.
-
-	  More information about the Intel MIC family as well as the Linux
-	  OS and tools for MIC to use with this driver are available from
-	  <http://software.intel.com/en-us/mic-developer>.
-
-config VOP_BUS
-	tristate "VOP Bus Driver"
-	depends on HAS_DMA
-	select DMA_OPS
-	help
-	  This option is selected by any driver which registers a
-	  device or driver on the VOP Bus, such as CONFIG_INTEL_MIC_HOST
-	  and CONFIG_INTEL_MIC_CARD.
-
-	  If you are building a host/card kernel with an Intel MIC device
-	  then say M (recommended) or Y, else say N. If unsure say N.
-
-	  More information about the Intel MIC family as well as the Linux
-	  OS and tools for MIC to use with this driver are available from
-	  <http://software.intel.com/en-us/mic-developer>.
-
-config INTEL_MIC_HOST
-	tristate "Intel MIC Host Driver"
-	depends on 64BIT && PCI && X86
-	depends on INTEL_MIC_BUS && SCIF_BUS && MIC_COSM && VOP_BUS
-	select DMA_OPS
-	help
-	  This enables Host Driver support for the Intel Many Integrated
-	  Core (MIC) family of PCIe form factor coprocessor devices that
-	  run a 64 bit Linux OS. The driver manages card OS state and
-	  enables communication between host and card. Intel MIC X100
-	  devices are currently supported.
-
-	  If you are building a host kernel with an Intel MIC device then
-	  say M (recommended) or Y, else say N. If unsure say N.
-
-	  More information about the Intel MIC family as well as the Linux
-	  OS and tools for MIC to use with this driver are available from
-	  <http://software.intel.com/en-us/mic-developer>.
-
-config INTEL_MIC_CARD
-	tristate "Intel MIC Card Driver"
-	depends on 64BIT && X86
-	depends on INTEL_MIC_BUS && SCIF_BUS && MIC_COSM && VOP_BUS
-	select VIRTIO
-	help
-	  This enables card driver support for the Intel Many Integrated
-	  Core (MIC) device family. The card driver communicates shutdown/
-	  crash events to the host and allows registration/configuration of
-	  virtio devices. Intel MIC X100 devices are currently supported.
-
-	  If you are building a card kernel for an Intel MIC device then
-	  say M (recommended) or Y, else say N. If unsure say N.
-
-	  For more information see
-	  <http://software.intel.com/en-us/mic-developer>.
-
-config SCIF
-	tristate "SCIF Driver"
-	depends on 64BIT && PCI && X86 && SCIF_BUS && IOMMU_SUPPORT
-	select IOMMU_IOVA
-	help
-	  This enables SCIF Driver support for the Intel Many Integrated
-	  Core (MIC) family of PCIe form factor coprocessor devices that
-	  run a 64 bit Linux OS. The Symmetric Communication Interface
-	  (SCIF (pronounced as skiff)) is a low level communications API
-	  across PCIe currently implemented for MIC.
-
-	  If you are building a host kernel with an Intel MIC device then
-	  say M (recommended) or Y, else say N. If unsure say N.
-
-	  More information about the Intel MIC family as well as the Linux
-	  OS and tools for MIC to use with this driver are available from
-	  <http://software.intel.com/en-us/mic-developer>.
-
-config MIC_COSM
-	tristate "Intel MIC Coprocessor State Management (COSM) Drivers"
-	depends on 64BIT && PCI && X86 && SCIF
-	help
-	  This enables COSM driver support for the Intel Many
-	  Integrated Core (MIC) family of PCIe form factor coprocessor
-	  devices. COSM drivers implement functions such as boot,
-	  shutdown, reset and reboot of MIC devices.
-
-	  If you are building a host kernel with an Intel MIC device then
-	  say M (recommended) or Y, else say N. If unsure say N.
-
-	  More information about the Intel MIC family as well as the Linux
-	  OS and tools for MIC to use with this driver are available from
-	  <http://software.intel.com/en-us/mic-developer>.
-
-config VOP
-	tristate "VOP Driver"
-	depends on VOP_BUS
-	select VHOST_RING
-	select VIRTIO
-	help
-	  This enables VOP (Virtio over PCIe) Driver support for the Intel
-	  Many Integrated Core (MIC) family of PCIe form factor coprocessor
-	  devices. The VOP driver allows virtio drivers, e.g. net, console
-	  and block drivers, on the card connect to user space virtio
-	  devices on the host.
-
-	  If you are building a host kernel with an Intel MIC device then
-	  say M (recommended) or Y, else say N. If unsure say N.
-
-	  More information about the Intel MIC family as well as the Linux
-	  OS and tools for MIC to use with this driver are available from
-	  <http://software.intel.com/en-us/mic-developer>.
-
-endmenu
diff --git a/drivers/misc/mic/Makefile b/drivers/misc/mic/Makefile
deleted file mode 100644
index 1a43622b183f..000000000000
--- a/drivers/misc/mic/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile - Intel MIC Linux driver.
-# Copyright(c) 2013, Intel Corporation.
-#
-obj-$(CONFIG_INTEL_MIC_HOST) += host/
-obj-$(CONFIG_INTEL_MIC_CARD) += card/
-obj-y += bus/
-obj-$(CONFIG_SCIF) += scif/
-obj-$(CONFIG_MIC_COSM) += cosm/
-obj-$(CONFIG_MIC_COSM) += cosm_client/
-obj-$(CONFIG_VOP) += vop/
diff --git a/drivers/misc/mic/bus/Makefile b/drivers/misc/mic/bus/Makefile
deleted file mode 100644
index 0a6aa21b2f67..000000000000
--- a/drivers/misc/mic/bus/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Makefile - Intel MIC Linux driver.
-# Copyright(c) 2014, Intel Corporation.
-#
-obj-$(CONFIG_INTEL_MIC_BUS) += mic_bus.o
-obj-$(CONFIG_SCIF_BUS) += scif_bus.o
-obj-$(CONFIG_MIC_COSM) += cosm_bus.o
-obj-$(CONFIG_VOP_BUS) += vop_bus.o
diff --git a/drivers/misc/mic/bus/cosm_bus.c b/drivers/misc/mic/bus/cosm_bus.c
deleted file mode 100644
index 5f2141c71738..000000000000
--- a/drivers/misc/mic/bus/cosm_bus.c
+++ /dev/null
@@ -1,130 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Intel MIC COSM Bus Driver
- */
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/idr.h>
-#include "cosm_bus.h"
-
-/* Unique numbering for cosm devices. */
-static DEFINE_IDA(cosm_index_ida);
-
-static int cosm_dev_probe(struct device *d)
-{
-	struct cosm_device *dev = dev_to_cosm(d);
-	struct cosm_driver *drv = drv_to_cosm(dev->dev.driver);
-
-	return drv->probe(dev);
-}
-
-static int cosm_dev_remove(struct device *d)
-{
-	struct cosm_device *dev = dev_to_cosm(d);
-	struct cosm_driver *drv = drv_to_cosm(dev->dev.driver);
-
-	drv->remove(dev);
-	return 0;
-}
-
-static struct bus_type cosm_bus = {
-	.name  = "cosm_bus",
-	.probe = cosm_dev_probe,
-	.remove = cosm_dev_remove,
-};
-
-int cosm_register_driver(struct cosm_driver *driver)
-{
-	driver->driver.bus = &cosm_bus;
-	return driver_register(&driver->driver);
-}
-EXPORT_SYMBOL_GPL(cosm_register_driver);
-
-void cosm_unregister_driver(struct cosm_driver *driver)
-{
-	driver_unregister(&driver->driver);
-}
-EXPORT_SYMBOL_GPL(cosm_unregister_driver);
-
-static inline void cosm_release_dev(struct device *d)
-{
-	struct cosm_device *cdev = dev_to_cosm(d);
-
-	kfree(cdev);
-}
-
-struct cosm_device *
-cosm_register_device(struct device *pdev, struct cosm_hw_ops *hw_ops)
-{
-	struct cosm_device *cdev;
-	int ret;
-
-	cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
-	if (!cdev)
-		return ERR_PTR(-ENOMEM);
-
-	cdev->dev.parent = pdev;
-	cdev->dev.release = cosm_release_dev;
-	cdev->hw_ops = hw_ops;
-	dev_set_drvdata(&cdev->dev, cdev);
-	cdev->dev.bus = &cosm_bus;
-
-	/* Assign a unique device index and hence name */
-	ret = ida_simple_get(&cosm_index_ida, 0, 0, GFP_KERNEL);
-	if (ret < 0)
-		goto free_cdev;
-
-	cdev->index = ret;
-	cdev->dev.id = ret;
-	dev_set_name(&cdev->dev, "cosm-dev%u", cdev->index);
-
-	ret = device_register(&cdev->dev);
-	if (ret)
-		goto ida_remove;
-	return cdev;
-ida_remove:
-	ida_simple_remove(&cosm_index_ida, cdev->index);
-free_cdev:
-	put_device(&cdev->dev);
-	return ERR_PTR(ret);
-}
-EXPORT_SYMBOL_GPL(cosm_register_device);
-
-void cosm_unregister_device(struct cosm_device *dev)
-{
-	int index = dev->index; /* save for after device release */
-
-	device_unregister(&dev->dev);
-	ida_simple_remove(&cosm_index_ida, index);
-}
-EXPORT_SYMBOL_GPL(cosm_unregister_device);
-
-struct cosm_device *cosm_find_cdev_by_id(int id)
-{
-	struct device *dev = subsys_find_device_by_id(&cosm_bus, id, NULL);
-
-	return dev ? container_of(dev, struct cosm_device, dev) : NULL;
-}
-EXPORT_SYMBOL_GPL(cosm_find_cdev_by_id);
-
-static int __init cosm_init(void)
-{
-	return bus_register(&cosm_bus);
-}
-
-static void __exit cosm_exit(void)
-{
-	bus_unregister(&cosm_bus);
-	ida_destroy(&cosm_index_ida);
-}
-
-core_initcall(cosm_init);
-module_exit(cosm_exit);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_DESCRIPTION("Intel(R) MIC card OS state management bus driver");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/bus/cosm_bus.h b/drivers/misc/mic/bus/cosm_bus.h
deleted file mode 100644
index d50d7aea168d..000000000000
--- a/drivers/misc/mic/bus/cosm_bus.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Intel MIC COSM Bus Driver
- */
-#ifndef _COSM_BUS_H_
-#define _COSM_BUS_H_
-
-#include <linux/scif.h>
-#include <linux/mic_common.h>
-#include "../common/mic_dev.h"
-
-/**
- * cosm_device - representation of a cosm device
- *
- * @attr_group: Pointer to list of sysfs attribute groups.
- * @sdev: Device for sysfs entries.
- * @state: MIC state.
- * @prev_state: MIC state previous to MIC_RESETTING
- * @shutdown_status: MIC status reported by card for shutdown/crashes.
- * @shutdown_status_int: Internal shutdown status maintained by the driver
- * @cosm_mutex: Mutex for synchronizing access to data structures.
- * @reset_trigger_work: Work for triggering reset requests.
- * @scif_work: Work for handling per device SCIF connections
- * @cmdline: Kernel command line.
- * @firmware: Firmware file name.
- * @ramdisk: Ramdisk file name.
- * @bootmode: Boot mode i.e. "linux" or "elf" for flash updates.
- * @log_buf_addr: Log buffer address for MIC.
- * @log_buf_len: Log buffer length address for MIC.
- * @state_sysfs: Sysfs dirent for notifying ring 3 about MIC state changes.
- * @hw_ops: the hardware bus ops for this device.
- * @dev: underlying device.
- * @index: unique position on the cosm bus
- * @dbg_dir: debug fs directory
- * @newepd: new endpoint from scif accept to be assigned to this cdev
- * @epd: SCIF endpoint for this cdev
- * @heartbeat_watchdog_enable: if heartbeat watchdog is enabled for this cdev
- * @sysfs_heartbeat_enable: sysfs setting for disabling heartbeat notification
- */
-struct cosm_device {
-	const struct attribute_group **attr_group;
-	struct device *sdev;
-	u8 state;
-	u8 prev_state;
-	u8 shutdown_status;
-	u8 shutdown_status_int;
-	struct mutex cosm_mutex;
-	struct work_struct reset_trigger_work;
-	struct work_struct scif_work;
-	char *cmdline;
-	char *firmware;
-	char *ramdisk;
-	char *bootmode;
-	void *log_buf_addr;
-	int *log_buf_len;
-	struct kernfs_node *state_sysfs;
-	struct cosm_hw_ops *hw_ops;
-	struct device dev;
-	int index;
-	struct dentry *dbg_dir;
-	scif_epd_t newepd;
-	scif_epd_t epd;
-	bool heartbeat_watchdog_enable;
-	bool sysfs_heartbeat_enable;
-};
-
-/**
- * cosm_driver - operations for a cosm driver
- *
- * @driver: underlying device driver (populate name and owner).
- * @probe: the function to call when a device is found.  Returns 0 or -errno.
- * @remove: the function to call when a device is removed.
- */
-struct cosm_driver {
-	struct device_driver driver;
-	int (*probe)(struct cosm_device *dev);
-	void (*remove)(struct cosm_device *dev);
-};
-
-/**
- * cosm_hw_ops - cosm bus ops
- *
- * @reset: trigger MIC reset
- * @force_reset: force MIC reset
- * @post_reset: inform MIC reset is complete
- * @ready: is MIC ready for OS download
- * @start: boot MIC
- * @stop: prepare MIC for reset
- * @family: return MIC HW family string
- * @stepping: return MIC HW stepping string
- * @aper: return MIC PCIe aperture
- */
-struct cosm_hw_ops {
-	void (*reset)(struct cosm_device *cdev);
-	void (*force_reset)(struct cosm_device *cdev);
-	void (*post_reset)(struct cosm_device *cdev, enum mic_states state);
-	bool (*ready)(struct cosm_device *cdev);
-	int (*start)(struct cosm_device *cdev, int id);
-	void (*stop)(struct cosm_device *cdev, bool force);
-	ssize_t (*family)(struct cosm_device *cdev, char *buf);
-	ssize_t (*stepping)(struct cosm_device *cdev, char *buf);
-	struct mic_mw *(*aper)(struct cosm_device *cdev);
-};
-
-struct cosm_device *
-cosm_register_device(struct device *pdev, struct cosm_hw_ops *hw_ops);
-void cosm_unregister_device(struct cosm_device *dev);
-int cosm_register_driver(struct cosm_driver *drv);
-void cosm_unregister_driver(struct cosm_driver *drv);
-struct cosm_device *cosm_find_cdev_by_id(int id);
-
-static inline struct cosm_device *dev_to_cosm(struct device *dev)
-{
-	return container_of(dev, struct cosm_device, dev);
-}
-
-static inline struct cosm_driver *drv_to_cosm(struct device_driver *drv)
-{
-	return container_of(drv, struct cosm_driver, driver);
-}
-#endif /* _COSM_BUS_H */
diff --git a/drivers/misc/mic/bus/mic_bus.c b/drivers/misc/mic/bus/mic_bus.c
deleted file mode 100644
index a08cb29692a8..000000000000
--- a/drivers/misc/mic/bus/mic_bus.c
+++ /dev/null
@@ -1,194 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Intel MIC Bus driver.
- *
- * This implementation is very similar to the the virtio bus driver
- * implementation @ drivers/virtio/virtio.c
- */
-#include <linux/dma-map-ops.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/idr.h>
-#include <linux/mic_bus.h>
-
-static ssize_t device_show(struct device *d,
-			   struct device_attribute *attr, char *buf)
-{
-	struct mbus_device *dev = dev_to_mbus(d);
-	return sprintf(buf, "0x%04x\n", dev->id.device);
-}
-static DEVICE_ATTR_RO(device);
-
-static ssize_t vendor_show(struct device *d,
-			   struct device_attribute *attr, char *buf)
-{
-	struct mbus_device *dev = dev_to_mbus(d);
-	return sprintf(buf, "0x%04x\n", dev->id.vendor);
-}
-static DEVICE_ATTR_RO(vendor);
-
-static ssize_t modalias_show(struct device *d,
-			     struct device_attribute *attr, char *buf)
-{
-	struct mbus_device *dev = dev_to_mbus(d);
-	return sprintf(buf, "mbus:d%08Xv%08X\n",
-		       dev->id.device, dev->id.vendor);
-}
-static DEVICE_ATTR_RO(modalias);
-
-static struct attribute *mbus_dev_attrs[] = {
-	&dev_attr_device.attr,
-	&dev_attr_vendor.attr,
-	&dev_attr_modalias.attr,
-	NULL,
-};
-ATTRIBUTE_GROUPS(mbus_dev);
-
-static inline int mbus_id_match(const struct mbus_device *dev,
-				const struct mbus_device_id *id)
-{
-	if (id->device != dev->id.device && id->device != MBUS_DEV_ANY_ID)
-		return 0;
-
-	return id->vendor == MBUS_DEV_ANY_ID || id->vendor == dev->id.vendor;
-}
-
-/*
- * This looks through all the IDs a driver claims to support.  If any of them
- * match, we return 1 and the kernel will call mbus_dev_probe().
- */
-static int mbus_dev_match(struct device *dv, struct device_driver *dr)
-{
-	unsigned int i;
-	struct mbus_device *dev = dev_to_mbus(dv);
-	const struct mbus_device_id *ids;
-
-	ids = drv_to_mbus(dr)->id_table;
-	for (i = 0; ids[i].device; i++)
-		if (mbus_id_match(dev, &ids[i]))
-			return 1;
-	return 0;
-}
-
-static int mbus_uevent(struct device *dv, struct kobj_uevent_env *env)
-{
-	struct mbus_device *dev = dev_to_mbus(dv);
-
-	return add_uevent_var(env, "MODALIAS=mbus:d%08Xv%08X",
-			      dev->id.device, dev->id.vendor);
-}
-
-static int mbus_dev_probe(struct device *d)
-{
-	int err;
-	struct mbus_device *dev = dev_to_mbus(d);
-	struct mbus_driver *drv = drv_to_mbus(dev->dev.driver);
-
-	err = drv->probe(dev);
-	if (!err)
-		if (drv->scan)
-			drv->scan(dev);
-	return err;
-}
-
-static int mbus_dev_remove(struct device *d)
-{
-	struct mbus_device *dev = dev_to_mbus(d);
-	struct mbus_driver *drv = drv_to_mbus(dev->dev.driver);
-
-	drv->remove(dev);
-	return 0;
-}
-
-static struct bus_type mic_bus = {
-	.name  = "mic_bus",
-	.match = mbus_dev_match,
-	.dev_groups = mbus_dev_groups,
-	.uevent = mbus_uevent,
-	.probe = mbus_dev_probe,
-	.remove = mbus_dev_remove,
-};
-
-int mbus_register_driver(struct mbus_driver *driver)
-{
-	driver->driver.bus = &mic_bus;
-	return driver_register(&driver->driver);
-}
-EXPORT_SYMBOL_GPL(mbus_register_driver);
-
-void mbus_unregister_driver(struct mbus_driver *driver)
-{
-	driver_unregister(&driver->driver);
-}
-EXPORT_SYMBOL_GPL(mbus_unregister_driver);
-
-static void mbus_release_dev(struct device *d)
-{
-	struct mbus_device *mbdev = dev_to_mbus(d);
-	kfree(mbdev);
-}
-
-struct mbus_device *
-mbus_register_device(struct device *pdev, int id, const struct dma_map_ops *dma_ops,
-		     struct mbus_hw_ops *hw_ops, int index,
-		     void __iomem *mmio_va)
-{
-	int ret;
-	struct mbus_device *mbdev;
-
-	mbdev = kzalloc(sizeof(*mbdev), GFP_KERNEL);
-	if (!mbdev)
-		return ERR_PTR(-ENOMEM);
-
-	mbdev->mmio_va = mmio_va;
-	mbdev->dev.parent = pdev;
-	mbdev->id.device = id;
-	mbdev->id.vendor = MBUS_DEV_ANY_ID;
-	mbdev->dev.dma_ops = dma_ops;
-	mbdev->dev.dma_mask = &mbdev->dev.coherent_dma_mask;
-	dma_set_mask(&mbdev->dev, DMA_BIT_MASK(64));
-	mbdev->dev.release = mbus_release_dev;
-	mbdev->hw_ops = hw_ops;
-	mbdev->dev.bus = &mic_bus;
-	mbdev->index = index;
-	dev_set_name(&mbdev->dev, "mbus-dev%u", mbdev->index);
-	/*
-	 * device_register() causes the bus infrastructure to look for a
-	 * matching driver.
-	 */
-	ret = device_register(&mbdev->dev);
-	if (ret)
-		goto free_mbdev;
-	return mbdev;
-free_mbdev:
-	put_device(&mbdev->dev);
-	return ERR_PTR(ret);
-}
-EXPORT_SYMBOL_GPL(mbus_register_device);
-
-void mbus_unregister_device(struct mbus_device *mbdev)
-{
-	device_unregister(&mbdev->dev);
-}
-EXPORT_SYMBOL_GPL(mbus_unregister_device);
-
-static int __init mbus_init(void)
-{
-	return bus_register(&mic_bus);
-}
-
-static void __exit mbus_exit(void)
-{
-	bus_unregister(&mic_bus);
-}
-
-core_initcall(mbus_init);
-module_exit(mbus_exit);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_DESCRIPTION("Intel(R) MIC Bus driver");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/bus/scif_bus.c b/drivers/misc/mic/bus/scif_bus.c
deleted file mode 100644
index ad7c3604f151..000000000000
--- a/drivers/misc/mic/bus/scif_bus.c
+++ /dev/null
@@ -1,201 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Intel Symmetric Communications Interface Bus driver.
- */
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/idr.h>
-#include <linux/dma-map-ops.h>
-
-#include "scif_bus.h"
-
-static ssize_t device_show(struct device *d,
-			   struct device_attribute *attr, char *buf)
-{
-	struct scif_hw_dev *dev = dev_to_scif(d);
-
-	return sprintf(buf, "0x%04x\n", dev->id.device);
-}
-static DEVICE_ATTR_RO(device);
-
-static ssize_t vendor_show(struct device *d,
-			   struct device_attribute *attr, char *buf)
-{
-	struct scif_hw_dev *dev = dev_to_scif(d);
-
-	return sprintf(buf, "0x%04x\n", dev->id.vendor);
-}
-static DEVICE_ATTR_RO(vendor);
-
-static ssize_t modalias_show(struct device *d,
-			     struct device_attribute *attr, char *buf)
-{
-	struct scif_hw_dev *dev = dev_to_scif(d);
-
-	return sprintf(buf, "scif:d%08Xv%08X\n",
-		       dev->id.device, dev->id.vendor);
-}
-static DEVICE_ATTR_RO(modalias);
-
-static struct attribute *scif_dev_attrs[] = {
-	&dev_attr_device.attr,
-	&dev_attr_vendor.attr,
-	&dev_attr_modalias.attr,
-	NULL,
-};
-ATTRIBUTE_GROUPS(scif_dev);
-
-static inline int scif_id_match(const struct scif_hw_dev *dev,
-				const struct scif_hw_dev_id *id)
-{
-	if (id->device != dev->id.device && id->device != SCIF_DEV_ANY_ID)
-		return 0;
-
-	return id->vendor == SCIF_DEV_ANY_ID || id->vendor == dev->id.vendor;
-}
-
-/*
- * This looks through all the IDs a driver claims to support.  If any of them
- * match, we return 1 and the kernel will call scif_dev_probe().
- */
-static int scif_dev_match(struct device *dv, struct device_driver *dr)
-{
-	unsigned int i;
-	struct scif_hw_dev *dev = dev_to_scif(dv);
-	const struct scif_hw_dev_id *ids;
-
-	ids = drv_to_scif(dr)->id_table;
-	for (i = 0; ids[i].device; i++)
-		if (scif_id_match(dev, &ids[i]))
-			return 1;
-	return 0;
-}
-
-static int scif_uevent(struct device *dv, struct kobj_uevent_env *env)
-{
-	struct scif_hw_dev *dev = dev_to_scif(dv);
-
-	return add_uevent_var(env, "MODALIAS=scif:d%08Xv%08X",
-			      dev->id.device, dev->id.vendor);
-}
-
-static int scif_dev_probe(struct device *d)
-{
-	struct scif_hw_dev *dev = dev_to_scif(d);
-	struct scif_driver *drv = drv_to_scif(dev->dev.driver);
-
-	return drv->probe(dev);
-}
-
-static int scif_dev_remove(struct device *d)
-{
-	struct scif_hw_dev *dev = dev_to_scif(d);
-	struct scif_driver *drv = drv_to_scif(dev->dev.driver);
-
-	drv->remove(dev);
-	return 0;
-}
-
-static struct bus_type scif_bus = {
-	.name  = "scif_bus",
-	.match = scif_dev_match,
-	.dev_groups = scif_dev_groups,
-	.uevent = scif_uevent,
-	.probe = scif_dev_probe,
-	.remove = scif_dev_remove,
-};
-
-int scif_register_driver(struct scif_driver *driver)
-{
-	driver->driver.bus = &scif_bus;
-	return driver_register(&driver->driver);
-}
-EXPORT_SYMBOL_GPL(scif_register_driver);
-
-void scif_unregister_driver(struct scif_driver *driver)
-{
-	driver_unregister(&driver->driver);
-}
-EXPORT_SYMBOL_GPL(scif_unregister_driver);
-
-static void scif_release_dev(struct device *d)
-{
-	struct scif_hw_dev *sdev = dev_to_scif(d);
-
-	kfree(sdev);
-}
-
-struct scif_hw_dev *
-scif_register_device(struct device *pdev, int id, const struct dma_map_ops *dma_ops,
-		     struct scif_hw_ops *hw_ops, u8 dnode, u8 snode,
-		     struct mic_mw *mmio, struct mic_mw *aper, void *dp,
-		     void __iomem *rdp, struct dma_chan **chan, int num_chan,
-		     bool card_rel_da)
-{
-	int ret;
-	struct scif_hw_dev *sdev;
-
-	sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
-	if (!sdev)
-		return ERR_PTR(-ENOMEM);
-
-	sdev->dev.parent = pdev;
-	sdev->id.device = id;
-	sdev->id.vendor = SCIF_DEV_ANY_ID;
-	sdev->dev.dma_ops = dma_ops;
-	sdev->dev.release = scif_release_dev;
-	sdev->hw_ops = hw_ops;
-	sdev->dnode = dnode;
-	sdev->snode = snode;
-	dev_set_drvdata(&sdev->dev, sdev);
-	sdev->dev.bus = &scif_bus;
-	sdev->mmio = mmio;
-	sdev->aper = aper;
-	sdev->dp = dp;
-	sdev->rdp = rdp;
-	sdev->dev.dma_mask = &sdev->dev.coherent_dma_mask;
-	dma_set_mask(&sdev->dev, DMA_BIT_MASK(64));
-	sdev->dma_ch = chan;
-	sdev->num_dma_ch = num_chan;
-	sdev->card_rel_da = card_rel_da;
-	dev_set_name(&sdev->dev, "scif-dev%u", sdev->dnode);
-	/*
-	 * device_register() causes the bus infrastructure to look for a
-	 * matching driver.
-	 */
-	ret = device_register(&sdev->dev);
-	if (ret)
-		goto free_sdev;
-	return sdev;
-free_sdev:
-	put_device(&sdev->dev);
-	return ERR_PTR(ret);
-}
-EXPORT_SYMBOL_GPL(scif_register_device);
-
-void scif_unregister_device(struct scif_hw_dev *sdev)
-{
-	device_unregister(&sdev->dev);
-}
-EXPORT_SYMBOL_GPL(scif_unregister_device);
-
-static int __init scif_init(void)
-{
-	return bus_register(&scif_bus);
-}
-
-static void __exit scif_exit(void)
-{
-	bus_unregister(&scif_bus);
-}
-
-core_initcall(scif_init);
-module_exit(scif_exit);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_DESCRIPTION("Intel(R) SCIF Bus driver");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/bus/scif_bus.h b/drivers/misc/mic/bus/scif_bus.h
deleted file mode 100644
index 4981eb56f879..000000000000
--- a/drivers/misc/mic/bus/scif_bus.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Intel Symmetric Communications Interface Bus driver.
- */
-#ifndef _SCIF_BUS_H_
-#define _SCIF_BUS_H_
-/*
- * Everything a scif driver needs to work with any particular scif
- * hardware abstraction layer.
- */
-#include <linux/dma-map-ops.h>
-
-#include <linux/mic_common.h>
-#include "../common/mic_dev.h"
-
-struct scif_hw_dev_id {
-	u32 device;
-	u32 vendor;
-};
-
-#define MIC_SCIF_DEV 1
-#define SCIF_DEV_ANY_ID 0xffffffff
-
-/**
- * scif_hw_dev - representation of a hardware device abstracted for scif
- * @hw_ops: the hardware ops supported by this device
- * @id: the device type identification (used to match it with a driver)
- * @mmio: MMIO memory window
- * @aper: Aperture memory window
- * @dev: underlying device
- * @dnode - The destination node which this device will communicate with.
- * @snode - The source node for this device.
- * @dp - Self device page
- * @rdp - Remote device page
- * @dma_ch - Array of DMA channels
- * @num_dma_ch - Number of DMA channels available
- * @card_rel_da - Set to true if DMA addresses programmed in the DMA engine
- *		are relative to the card point of view
- */
-struct scif_hw_dev {
-	struct scif_hw_ops *hw_ops;
-	struct scif_hw_dev_id id;
-	struct mic_mw *mmio;
-	struct mic_mw *aper;
-	struct device dev;
-	u8 dnode;
-	u8 snode;
-	void *dp;
-	void __iomem *rdp;
-	struct dma_chan **dma_ch;
-	int num_dma_ch;
-	bool card_rel_da;
-};
-
-/**
- * scif_driver - operations for a scif I/O driver
- * @driver: underlying device driver (populate name and owner).
- * @id_table: the ids serviced by this driver.
- * @probe: the function to call when a device is found.  Returns 0 or -errno.
- * @remove: the function to call when a device is removed.
- */
-struct scif_driver {
-	struct device_driver driver;
-	const struct scif_hw_dev_id *id_table;
-	int (*probe)(struct scif_hw_dev *dev);
-	void (*remove)(struct scif_hw_dev *dev);
-};
-
-/**
- * scif_hw_ops - Hardware operations for accessing a SCIF device on the SCIF bus.
- *
- * @next_db: Obtain the next available doorbell.
- * @request_irq: Request an interrupt on a particular doorbell.
- * @free_irq: Free an interrupt requested previously.
- * @ack_interrupt: acknowledge an interrupt in the ISR.
- * @send_intr: Send an interrupt to the remote node on a specified doorbell.
- * @send_p2p_intr: Send an interrupt to the peer node on a specified doorbell
- * which is specifically targeted for a peer to peer node.
- * @remap: Map a buffer with the specified physical address and length.
- * @unmap: Unmap a buffer previously mapped.
- */
-struct scif_hw_ops {
-	int (*next_db)(struct scif_hw_dev *sdev);
-	struct mic_irq * (*request_irq)(struct scif_hw_dev *sdev,
-					irqreturn_t (*func)(int irq,
-							    void *data),
-					const char *name, void *data,
-					int db);
-	void (*free_irq)(struct scif_hw_dev *sdev,
-			 struct mic_irq *cookie, void *data);
-	void (*ack_interrupt)(struct scif_hw_dev *sdev, int num);
-	void (*send_intr)(struct scif_hw_dev *sdev, int db);
-	void (*send_p2p_intr)(struct scif_hw_dev *sdev, int db,
-			      struct mic_mw *mw);
-	void __iomem * (*remap)(struct scif_hw_dev *sdev,
-				  phys_addr_t pa, size_t len);
-	void (*unmap)(struct scif_hw_dev *sdev, void __iomem *va);
-};
-
-int scif_register_driver(struct scif_driver *driver);
-void scif_unregister_driver(struct scif_driver *driver);
-struct scif_hw_dev *
-scif_register_device(struct device *pdev, int id,
-		     const struct dma_map_ops *dma_ops,
-		     struct scif_hw_ops *hw_ops, u8 dnode, u8 snode,
-		     struct mic_mw *mmio, struct mic_mw *aper,
-		     void *dp, void __iomem *rdp,
-		     struct dma_chan **chan, int num_chan,
-		     bool card_rel_da);
-void scif_unregister_device(struct scif_hw_dev *sdev);
-
-static inline struct scif_hw_dev *dev_to_scif(struct device *dev)
-{
-	return container_of(dev, struct scif_hw_dev, dev);
-}
-
-static inline struct scif_driver *drv_to_scif(struct device_driver *drv)
-{
-	return container_of(drv, struct scif_driver, driver);
-}
-#endif /* _SCIF_BUS_H */
diff --git a/drivers/misc/mic/bus/vop_bus.c b/drivers/misc/mic/bus/vop_bus.c
deleted file mode 100644
index 6935ddca1bd5..000000000000
--- a/drivers/misc/mic/bus/vop_bus.c
+++ /dev/null
@@ -1,194 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Intel Virtio Over PCIe (VOP) Bus driver.
- */
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/idr.h>
-#include <linux/dma-map-ops.h>
-
-#include "vop_bus.h"
-
-static ssize_t device_show(struct device *d,
-			   struct device_attribute *attr, char *buf)
-{
-	struct vop_device *dev = dev_to_vop(d);
-
-	return sprintf(buf, "0x%04x\n", dev->id.device);
-}
-static DEVICE_ATTR_RO(device);
-
-static ssize_t vendor_show(struct device *d,
-			   struct device_attribute *attr, char *buf)
-{
-	struct vop_device *dev = dev_to_vop(d);
-
-	return sprintf(buf, "0x%04x\n", dev->id.vendor);
-}
-static DEVICE_ATTR_RO(vendor);
-
-static ssize_t modalias_show(struct device *d,
-			     struct device_attribute *attr, char *buf)
-{
-	struct vop_device *dev = dev_to_vop(d);
-
-	return sprintf(buf, "vop:d%08Xv%08X\n",
-		       dev->id.device, dev->id.vendor);
-}
-static DEVICE_ATTR_RO(modalias);
-
-static struct attribute *vop_dev_attrs[] = {
-	&dev_attr_device.attr,
-	&dev_attr_vendor.attr,
-	&dev_attr_modalias.attr,
-	NULL,
-};
-ATTRIBUTE_GROUPS(vop_dev);
-
-static inline int vop_id_match(const struct vop_device *dev,
-			       const struct vop_device_id *id)
-{
-	if (id->device != dev->id.device && id->device != VOP_DEV_ANY_ID)
-		return 0;
-
-	return id->vendor == VOP_DEV_ANY_ID || id->vendor == dev->id.vendor;
-}
-
-/*
- * This looks through all the IDs a driver claims to support.  If any of them
- * match, we return 1 and the kernel will call vop_dev_probe().
- */
-static int vop_dev_match(struct device *dv, struct device_driver *dr)
-{
-	unsigned int i;
-	struct vop_device *dev = dev_to_vop(dv);
-	const struct vop_device_id *ids;
-
-	ids = drv_to_vop(dr)->id_table;
-	for (i = 0; ids[i].device; i++)
-		if (vop_id_match(dev, &ids[i]))
-			return 1;
-	return 0;
-}
-
-static int vop_uevent(struct device *dv, struct kobj_uevent_env *env)
-{
-	struct vop_device *dev = dev_to_vop(dv);
-
-	return add_uevent_var(env, "MODALIAS=vop:d%08Xv%08X",
-			      dev->id.device, dev->id.vendor);
-}
-
-static int vop_dev_probe(struct device *d)
-{
-	struct vop_device *dev = dev_to_vop(d);
-	struct vop_driver *drv = drv_to_vop(dev->dev.driver);
-
-	return drv->probe(dev);
-}
-
-static int vop_dev_remove(struct device *d)
-{
-	struct vop_device *dev = dev_to_vop(d);
-	struct vop_driver *drv = drv_to_vop(dev->dev.driver);
-
-	drv->remove(dev);
-	return 0;
-}
-
-static struct bus_type vop_bus = {
-	.name  = "vop_bus",
-	.match = vop_dev_match,
-	.dev_groups = vop_dev_groups,
-	.uevent = vop_uevent,
-	.probe = vop_dev_probe,
-	.remove = vop_dev_remove,
-};
-
-int vop_register_driver(struct vop_driver *driver)
-{
-	driver->driver.bus = &vop_bus;
-	return driver_register(&driver->driver);
-}
-EXPORT_SYMBOL_GPL(vop_register_driver);
-
-void vop_unregister_driver(struct vop_driver *driver)
-{
-	driver_unregister(&driver->driver);
-}
-EXPORT_SYMBOL_GPL(vop_unregister_driver);
-
-static void vop_release_dev(struct device *d)
-{
-	struct vop_device *dev = dev_to_vop(d);
-
-	kfree(dev);
-}
-
-struct vop_device *
-vop_register_device(struct device *pdev, int id,
-		    const struct dma_map_ops *dma_ops,
-		    struct vop_hw_ops *hw_ops, u8 dnode, struct mic_mw *aper,
-		    struct dma_chan *chan)
-{
-	int ret;
-	struct vop_device *vdev;
-
-	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
-	if (!vdev)
-		return ERR_PTR(-ENOMEM);
-
-	vdev->dev.parent = pdev;
-	vdev->id.device = id;
-	vdev->id.vendor = VOP_DEV_ANY_ID;
-	vdev->dev.dma_ops = dma_ops;
-	vdev->dev.dma_mask = &vdev->dev.coherent_dma_mask;
-	dma_set_mask(&vdev->dev, DMA_BIT_MASK(64));
-	vdev->dev.release = vop_release_dev;
-	vdev->hw_ops = hw_ops;
-	vdev->dev.bus = &vop_bus;
-	vdev->dnode = dnode;
-	vdev->aper = aper;
-	vdev->dma_ch = chan;
-	vdev->index = dnode - 1;
-	dev_set_name(&vdev->dev, "vop-dev%u", vdev->index);
-	/*
-	 * device_register() causes the bus infrastructure to look for a
-	 * matching driver.
-	 */
-	ret = device_register(&vdev->dev);
-	if (ret)
-		goto free_vdev;
-	return vdev;
-free_vdev:
-	put_device(&vdev->dev);
-	return ERR_PTR(ret);
-}
-EXPORT_SYMBOL_GPL(vop_register_device);
-
-void vop_unregister_device(struct vop_device *dev)
-{
-	device_unregister(&dev->dev);
-}
-EXPORT_SYMBOL_GPL(vop_unregister_device);
-
-static int __init vop_init(void)
-{
-	return bus_register(&vop_bus);
-}
-
-static void __exit vop_exit(void)
-{
-	bus_unregister(&vop_bus);
-}
-
-core_initcall(vop_init);
-module_exit(vop_exit);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_DESCRIPTION("Intel(R) VOP Bus driver");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/bus/vop_bus.h b/drivers/misc/mic/bus/vop_bus.h
deleted file mode 100644
index 4fa02808c1e2..000000000000
--- a/drivers/misc/mic/bus/vop_bus.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Intel Virtio over PCIe Bus driver.
- */
-#ifndef _VOP_BUS_H_
-#define _VOP_BUS_H_
-/*
- * Everything a vop driver needs to work with any particular vop
- * implementation.
- */
-#include <linux/dmaengine.h>
-#include <linux/interrupt.h>
-
-#include "../common/mic_dev.h"
-
-struct vop_device_id {
-	u32 device;
-	u32 vendor;
-};
-
-#define VOP_DEV_TRNSP 1
-#define VOP_DEV_ANY_ID 0xffffffff
-/*
- * Size of the internal buffer used during DMA's as an intermediate buffer
- * for copy to/from user. Must be an integral number of pages.
- */
-#define VOP_INT_DMA_BUF_SIZE PAGE_ALIGN(64 * 1024ULL)
-
-/**
- * vop_device - representation of a device using vop
- * @hw_ops: the hardware ops supported by this device.
- * @id: the device type identification (used to match it with a driver).
- * @dev: underlying device.
- * @dnode - The destination node which this device will communicate with.
- * @aper: Aperture memory window
- * @dma_ch - DMA channel
- * @index: unique position on the vop bus
- */
-struct vop_device {
-	struct vop_hw_ops *hw_ops;
-	struct vop_device_id id;
-	struct device dev;
-	u8 dnode;
-	struct mic_mw *aper;
-	struct dma_chan *dma_ch;
-	int index;
-};
-
-/**
- * vop_driver - operations for a vop I/O driver
- * @driver: underlying device driver (populate name and owner).
- * @id_table: the ids serviced by this driver.
- * @probe: the function to call when a device is found.  Returns 0 or -errno.
- * @remove: the function to call when a device is removed.
- */
-struct vop_driver {
-	struct device_driver driver;
-	const struct vop_device_id *id_table;
-	int (*probe)(struct vop_device *dev);
-	void (*remove)(struct vop_device *dev);
-};
-
-/**
- * vop_hw_ops - Hardware operations for accessing a VOP device on the VOP bus.
- *
- * @next_db: Obtain the next available doorbell.
- * @request_irq: Request an interrupt on a particular doorbell.
- * @free_irq: Free an interrupt requested previously.
- * @ack_interrupt: acknowledge an interrupt in the ISR.
- * @get_remote_dp: Get access to the virtio device page used by the remote
- *                 node to add/remove/configure virtio devices.
- * @get_dp: Get access to the virtio device page used by the self
- *          node to add/remove/configure virtio devices.
- * @send_intr: Send an interrupt to the peer node on a specified doorbell.
- * @remap: Map a buffer with the specified DMA address and length.
- * @unmap: Unmap a buffer previously mapped.
- * @dma_filter: The DMA filter function to use for obtaining access to
- *		a DMA channel on the peer node.
- */
-struct vop_hw_ops {
-	int (*next_db)(struct vop_device *vpdev);
-	struct mic_irq *(*request_irq)(struct vop_device *vpdev,
-				       irqreturn_t (*func)(int irq, void *data),
-				       const char *name, void *data,
-				       int intr_src);
-	void (*free_irq)(struct vop_device *vpdev,
-			 struct mic_irq *cookie, void *data);
-	void (*ack_interrupt)(struct vop_device *vpdev, int num);
-	void __iomem * (*get_remote_dp)(struct vop_device *vpdev);
-	void * (*get_dp)(struct vop_device *vpdev);
-	void (*send_intr)(struct vop_device *vpdev, int db);
-	void __iomem * (*remap)(struct vop_device *vpdev,
-				  dma_addr_t pa, size_t len);
-	void (*unmap)(struct vop_device *vpdev, void __iomem *va);
-};
-
-struct vop_device *
-vop_register_device(struct device *pdev, int id,
-		    const struct dma_map_ops *dma_ops,
-		    struct vop_hw_ops *hw_ops, u8 dnode, struct mic_mw *aper,
-		    struct dma_chan *chan);
-void vop_unregister_device(struct vop_device *dev);
-int vop_register_driver(struct vop_driver *drv);
-void vop_unregister_driver(struct vop_driver *drv);
-
-/*
- * module_vop_driver() - Helper macro for drivers that don't do
- * anything special in module init/exit.  This eliminates a lot of
- * boilerplate.  Each module may only use this macro once, and
- * calling it replaces module_init() and module_exit()
- */
-#define module_vop_driver(__vop_driver) \
-	module_driver(__vop_driver, vop_register_driver, \
-			vop_unregister_driver)
-
-static inline struct vop_device *dev_to_vop(struct device *dev)
-{
-	return container_of(dev, struct vop_device, dev);
-}
-
-static inline struct vop_driver *drv_to_vop(struct device_driver *drv)
-{
-	return container_of(drv, struct vop_driver, driver);
-}
-#endif /* _VOP_BUS_H */
diff --git a/drivers/misc/mic/card/Makefile b/drivers/misc/mic/card/Makefile
deleted file mode 100644
index 921a7e7e0fbd..000000000000
--- a/drivers/misc/mic/card/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile - Intel MIC Linux driver.
-# Copyright(c) 2013, Intel Corporation.
-#
-ccflags-y += -DINTEL_MIC_CARD
-
-obj-$(CONFIG_INTEL_MIC_CARD) += mic_card.o
-mic_card-y += mic_x100.o
-mic_card-y += mic_device.o
-mic_card-y += mic_debugfs.o
diff --git a/drivers/misc/mic/card/mic_debugfs.c b/drivers/misc/mic/card/mic_debugfs.c
deleted file mode 100644
index 4c326e8f4d99..000000000000
--- a/drivers/misc/mic/card/mic_debugfs.c
+++ /dev/null
@@ -1,85 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * Disclaimer: The codes contained in these modules may be specific to
- * the Intel Software Development Platform codenamed: Knights Ferry, and
- * the Intel product codenamed: Knights Corner, and are not backward
- * compatible with other Intel products. Additionally, Intel will NOT
- * support the codes or instruction set in future products.
- *
- * Intel MIC Card driver.
- */
-#include <linux/debugfs.h>
-#include <linux/delay.h>
-#include <linux/seq_file.h>
-#include <linux/interrupt.h>
-#include <linux/device.h>
-
-#include "../common/mic_dev.h"
-#include "mic_device.h"
-
-/* Debugfs parent dir */
-static struct dentry *mic_dbg;
-
-/*
- * mic_intr_show - Send interrupts to host.
- */
-static int mic_intr_show(struct seq_file *s, void *unused)
-{
-	struct mic_driver *mdrv = s->private;
-	struct mic_device *mdev = &mdrv->mdev;
-
-	mic_send_intr(mdev, 0);
-	msleep(1000);
-	mic_send_intr(mdev, 1);
-	msleep(1000);
-	mic_send_intr(mdev, 2);
-	msleep(1000);
-	mic_send_intr(mdev, 3);
-	msleep(1000);
-
-	return 0;
-}
-
-DEFINE_SHOW_ATTRIBUTE(mic_intr);
-
-/*
- * mic_create_card_debug_dir - Initialize MIC debugfs entries.
- */
-void __init mic_create_card_debug_dir(struct mic_driver *mdrv)
-{
-	if (!mic_dbg)
-		return;
-
-	mdrv->dbg_dir = debugfs_create_dir(mdrv->name, mic_dbg);
-
-	debugfs_create_file("intr_test", 0444, mdrv->dbg_dir, mdrv,
-			    &mic_intr_fops);
-}
-
-/*
- * mic_delete_card_debug_dir - Uninitialize MIC debugfs entries.
- */
-void mic_delete_card_debug_dir(struct mic_driver *mdrv)
-{
-	debugfs_remove_recursive(mdrv->dbg_dir);
-}
-
-/*
- * mic_init_card_debugfs - Initialize global debugfs entry.
- */
-void __init mic_init_card_debugfs(void)
-{
-	mic_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL);
-}
-
-/*
- * mic_exit_card_debugfs - Uninitialize global debugfs entry
- */
-void mic_exit_card_debugfs(void)
-{
-	debugfs_remove(mic_dbg);
-}
diff --git a/drivers/misc/mic/card/mic_device.c b/drivers/misc/mic/card/mic_device.c
deleted file mode 100644
index a15606259bdc..000000000000
--- a/drivers/misc/mic/card/mic_device.c
+++ /dev/null
@@ -1,417 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * Disclaimer: The codes contained in these modules may be specific to
- * the Intel Software Development Platform codenamed: Knights Ferry, and
- * the Intel product codenamed: Knights Corner, and are not backward
- * compatible with other Intel products. Additionally, Intel will NOT
- * support the codes or instruction set in future products.
- *
- * Intel MIC Card driver.
- */
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/reboot.h>
-#include <linux/dmaengine.h>
-#include <linux/kmod.h>
-
-#include <linux/mic_common.h>
-#include "../common/mic_dev.h"
-#include "mic_device.h"
-
-static struct mic_driver *g_drv;
-
-static int __init mic_dp_init(void)
-{
-	struct mic_driver *mdrv = g_drv;
-	struct mic_device *mdev = &mdrv->mdev;
-	struct mic_bootparam __iomem *bootparam;
-	u64 lo, hi, dp_dma_addr;
-	u32 magic;
-
-	lo = mic_read_spad(&mdrv->mdev, MIC_DPLO_SPAD);
-	hi = mic_read_spad(&mdrv->mdev, MIC_DPHI_SPAD);
-
-	dp_dma_addr = lo | (hi << 32);
-	mdrv->dp = mic_card_map(mdev, dp_dma_addr, MIC_DP_SIZE);
-	if (!mdrv->dp) {
-		dev_err(mdrv->dev, "Cannot remap Aperture BAR\n");
-		return -ENOMEM;
-	}
-	bootparam = mdrv->dp;
-	magic = ioread32(&bootparam->magic);
-	if (MIC_MAGIC != magic) {
-		dev_err(mdrv->dev, "bootparam magic mismatch 0x%x\n", magic);
-		return -EIO;
-	}
-	return 0;
-}
-
-/* Uninitialize the device page */
-static void mic_dp_uninit(void)
-{
-	mic_card_unmap(&g_drv->mdev, g_drv->dp);
-}
-
-/**
- * mic_request_card_irq - request an irq.
- *
- * @handler: interrupt handler passed to request_threaded_irq.
- * @thread_fn: thread fn. passed to request_threaded_irq.
- * @name: The ASCII name of the callee requesting the irq.
- * @data: private data that is returned back when calling the
- * function handler.
- * @index: The doorbell index of the requester.
- *
- * returns: The cookie that is transparent to the caller. Passed
- * back when calling mic_free_irq. An appropriate error code
- * is returned on failure. Caller needs to use IS_ERR(return_val)
- * to check for failure and PTR_ERR(return_val) to obtained the
- * error code.
- *
- */
-struct mic_irq *
-mic_request_card_irq(irq_handler_t handler,
-		     irq_handler_t thread_fn, const char *name,
-		     void *data, int index)
-{
-	int rc = 0;
-	unsigned long cookie;
-	struct mic_driver *mdrv = g_drv;
-
-	rc  = request_threaded_irq(mic_db_to_irq(mdrv, index), handler,
-				   thread_fn, 0, name, data);
-	if (rc) {
-		dev_err(mdrv->dev, "request_threaded_irq failed rc = %d\n", rc);
-		goto err;
-	}
-	mdrv->irq_info.irq_usage_count[index]++;
-	cookie = index;
-	return (struct mic_irq *)cookie;
-err:
-	return ERR_PTR(rc);
-}
-
-/**
- * mic_free_card_irq - free irq.
- *
- * @cookie: cookie obtained during a successful call to mic_request_threaded_irq
- * @data: private data specified by the calling function during the
- * mic_request_threaded_irq
- *
- * returns: none.
- */
-void mic_free_card_irq(struct mic_irq *cookie, void *data)
-{
-	int index;
-	struct mic_driver *mdrv = g_drv;
-
-	index = (unsigned long)cookie & 0xFFFFU;
-	free_irq(mic_db_to_irq(mdrv, index), data);
-	mdrv->irq_info.irq_usage_count[index]--;
-}
-
-/**
- * mic_next_card_db - Get the doorbell with minimum usage count.
- *
- * Returns the irq index.
- */
-int mic_next_card_db(void)
-{
-	int i;
-	int index = 0;
-	struct mic_driver *mdrv = g_drv;
-
-	for (i = 0; i < mdrv->intr_info.num_intr; i++) {
-		if (mdrv->irq_info.irq_usage_count[i] <
-			mdrv->irq_info.irq_usage_count[index])
-			index = i;
-	}
-
-	return index;
-}
-
-/**
- * mic_init_irq - Initialize irq information.
- *
- * Returns 0 in success. Appropriate error code on failure.
- */
-static int mic_init_irq(void)
-{
-	struct mic_driver *mdrv = g_drv;
-
-	mdrv->irq_info.irq_usage_count = kzalloc((sizeof(u32) *
-			mdrv->intr_info.num_intr),
-			GFP_KERNEL);
-	if (!mdrv->irq_info.irq_usage_count)
-		return -ENOMEM;
-	return 0;
-}
-
-/**
- * mic_uninit_irq - Uninitialize irq information.
- *
- * None.
- */
-static void mic_uninit_irq(void)
-{
-	struct mic_driver *mdrv = g_drv;
-
-	kfree(mdrv->irq_info.irq_usage_count);
-}
-
-static inline struct mic_driver *scdev_to_mdrv(struct scif_hw_dev *scdev)
-{
-	return dev_get_drvdata(scdev->dev.parent);
-}
-
-static struct mic_irq *
-___mic_request_irq(struct scif_hw_dev *scdev,
-		   irqreturn_t (*func)(int irq, void *data),
-				       const char *name, void *data,
-				       int db)
-{
-	return mic_request_card_irq(func, NULL, name, data, db);
-}
-
-static void
-___mic_free_irq(struct scif_hw_dev *scdev,
-		struct mic_irq *cookie, void *data)
-{
-	return mic_free_card_irq(cookie, data);
-}
-
-static void ___mic_ack_interrupt(struct scif_hw_dev *scdev, int num)
-{
-	struct mic_driver *mdrv = scdev_to_mdrv(scdev);
-
-	mic_ack_interrupt(&mdrv->mdev);
-}
-
-static int ___mic_next_db(struct scif_hw_dev *scdev)
-{
-	return mic_next_card_db();
-}
-
-static void ___mic_send_intr(struct scif_hw_dev *scdev, int db)
-{
-	struct mic_driver *mdrv = scdev_to_mdrv(scdev);
-
-	mic_send_intr(&mdrv->mdev, db);
-}
-
-static void ___mic_send_p2p_intr(struct scif_hw_dev *scdev, int db,
-				 struct mic_mw *mw)
-{
-	mic_send_p2p_intr(db, mw);
-}
-
-static void __iomem *
-___mic_ioremap(struct scif_hw_dev *scdev,
-	       phys_addr_t pa, size_t len)
-{
-	struct mic_driver *mdrv = scdev_to_mdrv(scdev);
-
-	return mic_card_map(&mdrv->mdev, pa, len);
-}
-
-static void ___mic_iounmap(struct scif_hw_dev *scdev, void __iomem *va)
-{
-	struct mic_driver *mdrv = scdev_to_mdrv(scdev);
-
-	mic_card_unmap(&mdrv->mdev, va);
-}
-
-static struct scif_hw_ops scif_hw_ops = {
-	.request_irq = ___mic_request_irq,
-	.free_irq = ___mic_free_irq,
-	.ack_interrupt = ___mic_ack_interrupt,
-	.next_db = ___mic_next_db,
-	.send_intr = ___mic_send_intr,
-	.send_p2p_intr = ___mic_send_p2p_intr,
-	.remap = ___mic_ioremap,
-	.unmap = ___mic_iounmap,
-};
-
-static inline struct mic_driver *vpdev_to_mdrv(struct vop_device *vpdev)
-{
-	return dev_get_drvdata(vpdev->dev.parent);
-}
-
-static struct mic_irq *
-__mic_request_irq(struct vop_device *vpdev,
-		  irqreturn_t (*func)(int irq, void *data),
-		   const char *name, void *data, int intr_src)
-{
-	return mic_request_card_irq(func, NULL, name, data, intr_src);
-}
-
-static void __mic_free_irq(struct vop_device *vpdev,
-			   struct mic_irq *cookie, void *data)
-{
-	return mic_free_card_irq(cookie, data);
-}
-
-static void __mic_ack_interrupt(struct vop_device *vpdev, int num)
-{
-	struct mic_driver *mdrv = vpdev_to_mdrv(vpdev);
-
-	mic_ack_interrupt(&mdrv->mdev);
-}
-
-static int __mic_next_db(struct vop_device *vpdev)
-{
-	return mic_next_card_db();
-}
-
-static void __iomem *__mic_get_remote_dp(struct vop_device *vpdev)
-{
-	struct mic_driver *mdrv = vpdev_to_mdrv(vpdev);
-
-	return mdrv->dp;
-}
-
-static void __mic_send_intr(struct vop_device *vpdev, int db)
-{
-	struct mic_driver *mdrv = vpdev_to_mdrv(vpdev);
-
-	mic_send_intr(&mdrv->mdev, db);
-}
-
-static void __iomem *__mic_ioremap(struct vop_device *vpdev,
-				   dma_addr_t pa, size_t len)
-{
-	struct mic_driver *mdrv = vpdev_to_mdrv(vpdev);
-
-	return mic_card_map(&mdrv->mdev, pa, len);
-}
-
-static void __mic_iounmap(struct vop_device *vpdev, void __iomem *va)
-{
-	struct mic_driver *mdrv = vpdev_to_mdrv(vpdev);
-
-	mic_card_unmap(&mdrv->mdev, va);
-}
-
-static struct vop_hw_ops vop_hw_ops = {
-	.request_irq = __mic_request_irq,
-	.free_irq = __mic_free_irq,
-	.ack_interrupt = __mic_ack_interrupt,
-	.next_db = __mic_next_db,
-	.get_remote_dp = __mic_get_remote_dp,
-	.send_intr = __mic_send_intr,
-	.remap = __mic_ioremap,
-	.unmap = __mic_iounmap,
-};
-
-static int mic_request_dma_chans(struct mic_driver *mdrv)
-{
-	dma_cap_mask_t mask;
-	struct dma_chan *chan;
-
-	dma_cap_zero(mask);
-	dma_cap_set(DMA_MEMCPY, mask);
-
-	do {
-		chan = dma_request_channel(mask, NULL, NULL);
-		if (chan) {
-			mdrv->dma_ch[mdrv->num_dma_ch++] = chan;
-			if (mdrv->num_dma_ch >= MIC_MAX_DMA_CHAN)
-				break;
-		}
-	} while (chan);
-	dev_info(mdrv->dev, "DMA channels # %d\n", mdrv->num_dma_ch);
-	return mdrv->num_dma_ch;
-}
-
-static void mic_free_dma_chans(struct mic_driver *mdrv)
-{
-	int i = 0;
-
-	for (i = 0; i < mdrv->num_dma_ch; i++) {
-		dma_release_channel(mdrv->dma_ch[i]);
-		mdrv->dma_ch[i] = NULL;
-	}
-	mdrv->num_dma_ch = 0;
-}
-
-/*
- * mic_driver_init - MIC driver initialization tasks.
- *
- * Returns 0 in success. Appropriate error code on failure.
- */
-int __init mic_driver_init(struct mic_driver *mdrv)
-{
-	int rc;
-	struct mic_bootparam __iomem *bootparam;
-	u8 node_id;
-
-	g_drv = mdrv;
-	/* Unloading the card module is not supported. */
-	if (!try_module_get(mdrv->dev->driver->owner)) {
-		rc = -ENODEV;
-		goto done;
-	}
-	rc = mic_dp_init();
-	if (rc)
-		goto put;
-	rc = mic_init_irq();
-	if (rc)
-		goto dp_uninit;
-	if (!mic_request_dma_chans(mdrv)) {
-		rc = -ENODEV;
-		goto irq_uninit;
-	}
-	mdrv->vpdev = vop_register_device(mdrv->dev, VOP_DEV_TRNSP,
-					  NULL, &vop_hw_ops, 0,
-					  NULL, mdrv->dma_ch[0]);
-	if (IS_ERR(mdrv->vpdev)) {
-		rc = PTR_ERR(mdrv->vpdev);
-		goto dma_free;
-	}
-	bootparam = mdrv->dp;
-	node_id = ioread8(&bootparam->node_id);
-	mdrv->scdev = scif_register_device(mdrv->dev, MIC_SCIF_DEV,
-					   NULL, &scif_hw_ops,
-					   0, node_id, &mdrv->mdev.mmio, NULL,
-					   NULL, mdrv->dp, mdrv->dma_ch,
-					   mdrv->num_dma_ch, true);
-	if (IS_ERR(mdrv->scdev)) {
-		rc = PTR_ERR(mdrv->scdev);
-		goto vop_remove;
-	}
-	mic_create_card_debug_dir(mdrv);
-done:
-	return rc;
-vop_remove:
-	vop_unregister_device(mdrv->vpdev);
-dma_free:
-	mic_free_dma_chans(mdrv);
-irq_uninit:
-	mic_uninit_irq();
-dp_uninit:
-	mic_dp_uninit();
-put:
-	module_put(mdrv->dev->driver->owner);
-	return rc;
-}
-
-/*
- * mic_driver_uninit - MIC driver uninitialization tasks.
- *
- * Returns None
- */
-void mic_driver_uninit(struct mic_driver *mdrv)
-{
-	mic_delete_card_debug_dir(mdrv);
-	scif_unregister_device(mdrv->scdev);
-	vop_unregister_device(mdrv->vpdev);
-	mic_free_dma_chans(mdrv);
-	mic_uninit_irq();
-	mic_dp_uninit();
-	module_put(mdrv->dev->driver->owner);
-}
diff --git a/drivers/misc/mic/card/mic_device.h b/drivers/misc/mic/card/mic_device.h
deleted file mode 100644
index d6cc69a235a3..000000000000
--- a/drivers/misc/mic/card/mic_device.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * Disclaimer: The codes contained in these modules may be specific to
- * the Intel Software Development Platform codenamed: Knights Ferry, and
- * the Intel product codenamed: Knights Corner, and are not backward
- * compatible with other Intel products. Additionally, Intel will NOT
- * support the codes or instruction set in future products.
- *
- * Intel MIC Card driver.
- */
-#ifndef _MIC_CARD_DEVICE_H_
-#define _MIC_CARD_DEVICE_H_
-
-#include <linux/workqueue.h>
-#include <linux/io.h>
-#include <linux/interrupt.h>
-#include <linux/mic_bus.h>
-#include "../bus/scif_bus.h"
-#include "../bus/vop_bus.h"
-
-/**
- * struct mic_intr_info - Contains h/w specific interrupt sources info
- *
- * @num_intr: The number of irqs available
- */
-struct mic_intr_info {
-	u32 num_intr;
-};
-
-/**
- * struct mic_irq_info - OS specific irq information
- *
- * @irq_usage_count: usage count array tracking the number of sources
- * assigned for each irq.
- */
-struct mic_irq_info {
-	int *irq_usage_count;
-};
-
-/**
- * struct mic_device -  MIC device information.
- *
- * @mmio: MMIO bar information.
- */
-struct mic_device {
-	struct mic_mw mmio;
-};
-
-/**
- * struct mic_driver - MIC card driver information.
- *
- * @name: Name for MIC driver.
- * @dbg_dir: debugfs directory of this MIC device.
- * @dev: The device backing this MIC.
- * @dp: The pointer to the virtio device page.
- * @mdev: MIC device information for the host.
- * @hotplug_work: Hot plug work for adding/removing virtio devices.
- * @irq_info: The OS specific irq information
- * @intr_info: H/W specific interrupt information.
- * @dma_mbdev: dma device on the MIC virtual bus.
- * @dma_ch - Array of DMA channels
- * @num_dma_ch - Number of DMA channels available
- * @scdev: SCIF device on the SCIF virtual bus.
- * @vpdev: Virtio over PCIe device on the VOP virtual bus.
- */
-struct mic_driver {
-	char name[20];
-	struct dentry *dbg_dir;
-	struct device *dev;
-	void __iomem *dp;
-	struct mic_device mdev;
-	struct work_struct hotplug_work;
-	struct mic_irq_info irq_info;
-	struct mic_intr_info intr_info;
-	struct mbus_device *dma_mbdev;
-	struct dma_chan *dma_ch[MIC_MAX_DMA_CHAN];
-	int num_dma_ch;
-	struct scif_hw_dev *scdev;
-	struct vop_device *vpdev;
-};
-
-/**
- * struct mic_irq - opaque pointer used as cookie
- */
-struct mic_irq;
-
-/**
- * mic_mmio_read - read from an MMIO register.
- * @mw: MMIO register base virtual address.
- * @offset: register offset.
- *
- * RETURNS: register value.
- */
-static inline u32 mic_mmio_read(struct mic_mw *mw, u32 offset)
-{
-	return ioread32(mw->va + offset);
-}
-
-/**
- * mic_mmio_write - write to an MMIO register.
- * @mw: MMIO register base virtual address.
- * @val: the data value to put into the register
- * @offset: register offset.
- *
- * RETURNS: none.
- */
-static inline void
-mic_mmio_write(struct mic_mw *mw, u32 val, u32 offset)
-{
-	iowrite32(val, mw->va + offset);
-}
-
-int mic_driver_init(struct mic_driver *mdrv);
-void mic_driver_uninit(struct mic_driver *mdrv);
-int mic_next_card_db(void);
-struct mic_irq *
-mic_request_card_irq(irq_handler_t handler, irq_handler_t thread_fn,
-		     const char *name, void *data, int db);
-void mic_free_card_irq(struct mic_irq *cookie, void *data);
-u32 mic_read_spad(struct mic_device *mdev, unsigned int idx);
-void mic_send_intr(struct mic_device *mdev, int doorbell);
-void mic_send_p2p_intr(int doorbell, struct mic_mw *mw);
-int mic_db_to_irq(struct mic_driver *mdrv, int db);
-u32 mic_ack_interrupt(struct mic_device *mdev);
-void mic_hw_intr_init(struct mic_driver *mdrv);
-void __iomem *
-mic_card_map(struct mic_device *mdev, dma_addr_t addr, size_t size);
-void mic_card_unmap(struct mic_device *mdev, void __iomem *addr);
-void __init mic_create_card_debug_dir(struct mic_driver *mdrv);
-void mic_delete_card_debug_dir(struct mic_driver *mdrv);
-void __init mic_init_card_debugfs(void);
-void mic_exit_card_debugfs(void);
-#endif
diff --git a/drivers/misc/mic/card/mic_x100.c b/drivers/misc/mic/card/mic_x100.c
deleted file mode 100644
index c8bff2916d3d..000000000000
--- a/drivers/misc/mic/card/mic_x100.c
+++ /dev/null
@@ -1,347 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * Disclaimer: The codes contained in these modules may be specific to
- * the Intel Software Development Platform codenamed: Knights Ferry, and
- * the Intel product codenamed: Knights Corner, and are not backward
- * compatible with other Intel products. Additionally, Intel will NOT
- * support the codes or instruction set in future products.
- *
- * Intel MIC Card driver.
- */
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/platform_device.h>
-
-#include "../common/mic_dev.h"
-#include "mic_device.h"
-#include "mic_x100.h"
-
-static const char mic_driver_name[] = "mic";
-
-static struct mic_driver g_drv;
-
-/**
- * mic_read_spad - read from the scratchpad register
- * @mdev: pointer to mic_device instance
- * @idx: index to scratchpad register, 0 based
- *
- * This function allows reading of the 32bit scratchpad register.
- *
- * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
- */
-u32 mic_read_spad(struct mic_device *mdev, unsigned int idx)
-{
-	return mic_mmio_read(&mdev->mmio,
-		MIC_X100_SBOX_BASE_ADDRESS +
-		MIC_X100_SBOX_SPAD0 + idx * 4);
-}
-
-/**
- * __mic_send_intr - Send interrupt to Host.
- * @mdev: pointer to mic_device instance
- * @doorbell: Doorbell number.
- */
-void mic_send_intr(struct mic_device *mdev, int doorbell)
-{
-	struct mic_mw *mw = &mdev->mmio;
-
-	if (doorbell > MIC_X100_MAX_DOORBELL_IDX)
-		return;
-	/* Ensure that the interrupt is ordered w.r.t previous stores. */
-	wmb();
-	mic_mmio_write(mw, MIC_X100_SBOX_SDBIC0_DBREQ_BIT,
-		       MIC_X100_SBOX_BASE_ADDRESS +
-		       (MIC_X100_SBOX_SDBIC0 + (4 * doorbell)));
-}
-
-/*
- * mic_x100_send_sbox_intr - Send an MIC_X100_SBOX interrupt to MIC.
- */
-static void mic_x100_send_sbox_intr(struct mic_mw *mw, int doorbell)
-{
-	u64 apic_icr_offset = MIC_X100_SBOX_APICICR0 + doorbell * 8;
-	u32 apicicr_low = mic_mmio_read(mw, MIC_X100_SBOX_BASE_ADDRESS +
-					apic_icr_offset);
-
-	/* for MIC we need to make sure we "hit" the send_icr bit (13) */
-	apicicr_low = (apicicr_low | (1 << 13));
-	/*
-	 * Ensure that the interrupt is ordered w.r.t. previous stores
-	 * to main memory. Fence instructions are not implemented in X100
-	 * since execution is in order but a compiler barrier is still
-	 * required.
-	 */
-	wmb();
-	mic_mmio_write(mw, apicicr_low,
-		       MIC_X100_SBOX_BASE_ADDRESS + apic_icr_offset);
-}
-
-static void mic_x100_send_rdmasr_intr(struct mic_mw *mw, int doorbell)
-{
-	int rdmasr_offset = MIC_X100_SBOX_RDMASR0 + (doorbell << 2);
-	/*
-	 * Ensure that the interrupt is ordered w.r.t. previous stores
-	 * to main memory. Fence instructions are not implemented in X100
-	 * since execution is in order but a compiler barrier is still
-	 * required.
-	 */
-	wmb();
-	mic_mmio_write(mw, 0, MIC_X100_SBOX_BASE_ADDRESS + rdmasr_offset);
-}
-
-/**
- * mic_ack_interrupt - Device specific interrupt handling.
- * @mdev: pointer to mic_device instance
- *
- * Returns: bitmask of doorbell events triggered.
- */
-u32 mic_ack_interrupt(struct mic_device *mdev)
-{
-	return 0;
-}
-
-static inline int mic_get_sbox_irq(int db)
-{
-	return MIC_X100_IRQ_BASE + db;
-}
-
-static inline int mic_get_rdmasr_irq(int index)
-{
-	return  MIC_X100_RDMASR_IRQ_BASE + index;
-}
-
-void mic_send_p2p_intr(int db, struct mic_mw *mw)
-{
-	int rdmasr_index;
-
-	if (db < MIC_X100_NUM_SBOX_IRQ) {
-		mic_x100_send_sbox_intr(mw, db);
-	} else {
-		rdmasr_index = db - MIC_X100_NUM_SBOX_IRQ;
-		mic_x100_send_rdmasr_intr(mw, rdmasr_index);
-	}
-}
-
-/**
- * mic_hw_intr_init - Initialize h/w specific interrupt
- * information.
- * @mdrv: pointer to mic_driver
- */
-void mic_hw_intr_init(struct mic_driver *mdrv)
-{
-	mdrv->intr_info.num_intr = MIC_X100_NUM_SBOX_IRQ +
-				MIC_X100_NUM_RDMASR_IRQ;
-}
-
-/**
- * mic_db_to_irq - Retrieve irq number corresponding to a doorbell.
- * @mdrv: pointer to mic_driver
- * @db: The doorbell obtained for which the irq is needed. Doorbell
- * may correspond to an sbox doorbell or an rdmasr index.
- *
- * Returns the irq corresponding to the doorbell.
- */
-int mic_db_to_irq(struct mic_driver *mdrv, int db)
-{
-	int rdmasr_index;
-
-	/*
-	 * The total number of doorbell interrupts on the card are 16. Indices
-	 * 0-8 falls in the SBOX category and 8-15 fall in the RDMASR category.
-	 */
-	if (db < MIC_X100_NUM_SBOX_IRQ) {
-		return mic_get_sbox_irq(db);
-	} else {
-		rdmasr_index = db - MIC_X100_NUM_SBOX_IRQ;
-		return mic_get_rdmasr_irq(rdmasr_index);
-	}
-}
-
-/*
- * mic_card_map - Allocate virtual address for a remote memory region.
- * @mdev: pointer to mic_device instance.
- * @addr: Remote DMA address.
- * @size: Size of the region.
- *
- * Returns: Virtual address backing the remote memory region.
- */
-void __iomem *
-mic_card_map(struct mic_device *mdev, dma_addr_t addr, size_t size)
-{
-	return ioremap(addr, size);
-}
-
-/*
- * mic_card_unmap - Unmap the virtual address for a remote memory region.
- * @mdev: pointer to mic_device instance.
- * @addr: Virtual address for remote memory region.
- *
- * Returns: None.
- */
-void mic_card_unmap(struct mic_device *mdev, void __iomem *addr)
-{
-	iounmap(addr);
-}
-
-static inline struct mic_driver *mbdev_to_mdrv(struct mbus_device *mbdev)
-{
-	return dev_get_drvdata(mbdev->dev.parent);
-}
-
-static struct mic_irq *
-_mic_request_threaded_irq(struct mbus_device *mbdev,
-			  irq_handler_t handler, irq_handler_t thread_fn,
-			  const char *name, void *data, int intr_src)
-{
-	int rc = 0;
-	unsigned int irq = intr_src;
-	unsigned long cookie = irq;
-
-	rc  = request_threaded_irq(irq, handler, thread_fn, 0, name, data);
-	if (rc) {
-		dev_err(mbdev_to_mdrv(mbdev)->dev,
-			"request_threaded_irq failed rc = %d\n", rc);
-		return ERR_PTR(rc);
-	}
-	return (struct mic_irq *)cookie;
-}
-
-static void _mic_free_irq(struct mbus_device *mbdev,
-			  struct mic_irq *cookie, void *data)
-{
-	unsigned long irq = (unsigned long)cookie;
-	free_irq(irq, data);
-}
-
-static void _mic_ack_interrupt(struct mbus_device *mbdev, int num)
-{
-	mic_ack_interrupt(&mbdev_to_mdrv(mbdev)->mdev);
-}
-
-static struct mbus_hw_ops mbus_hw_ops = {
-	.request_threaded_irq = _mic_request_threaded_irq,
-	.free_irq = _mic_free_irq,
-	.ack_interrupt = _mic_ack_interrupt,
-};
-
-static int __init mic_probe(struct platform_device *pdev)
-{
-	struct mic_driver *mdrv = &g_drv;
-	struct mic_device *mdev = &mdrv->mdev;
-	int rc = 0;
-
-	mdrv->dev = &pdev->dev;
-	snprintf(mdrv->name, sizeof(mic_driver_name), mic_driver_name);
-
-	/* FIXME: use dma_set_mask_and_coherent() and check result */
-	dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
-
-	mdev->mmio.pa = MIC_X100_MMIO_BASE;
-	mdev->mmio.len = MIC_X100_MMIO_LEN;
-	mdev->mmio.va = devm_ioremap(&pdev->dev, MIC_X100_MMIO_BASE,
-				     MIC_X100_MMIO_LEN);
-	if (!mdev->mmio.va) {
-		dev_err(&pdev->dev, "Cannot remap MMIO BAR\n");
-		rc = -EIO;
-		goto done;
-	}
-	mic_hw_intr_init(mdrv);
-	platform_set_drvdata(pdev, mdrv);
-	mdrv->dma_mbdev = mbus_register_device(mdrv->dev, MBUS_DEV_DMA_MIC,
-					       NULL, &mbus_hw_ops, 0,
-					       mdrv->mdev.mmio.va);
-	if (IS_ERR(mdrv->dma_mbdev)) {
-		rc = PTR_ERR(mdrv->dma_mbdev);
-		dev_err(&pdev->dev, "mbus_add_device failed rc %d\n", rc);
-		goto done;
-	}
-	rc = mic_driver_init(mdrv);
-	if (rc) {
-		dev_err(&pdev->dev, "mic_driver_init failed rc %d\n", rc);
-		goto remove_dma;
-	}
-done:
-	return rc;
-remove_dma:
-	mbus_unregister_device(mdrv->dma_mbdev);
-	return rc;
-}
-
-static int mic_remove(struct platform_device *pdev)
-{
-	struct mic_driver *mdrv = &g_drv;
-
-	mic_driver_uninit(mdrv);
-	mbus_unregister_device(mdrv->dma_mbdev);
-	return 0;
-}
-
-static void mic_platform_shutdown(struct platform_device *pdev)
-{
-	mic_remove(pdev);
-}
-
-static struct platform_driver __refdata mic_platform_driver = {
-	.probe = mic_probe,
-	.remove = mic_remove,
-	.shutdown = mic_platform_shutdown,
-	.driver         = {
-		.name   = mic_driver_name,
-	},
-};
-
-static struct platform_device *mic_platform_dev;
-
-static int __init mic_init(void)
-{
-	int ret;
-	struct cpuinfo_x86 *c = &cpu_data(0);
-
-	if (!(c->x86 == 11 && c->x86_model == 1)) {
-		ret = -ENODEV;
-		pr_err("%s not running on X100 ret %d\n", __func__, ret);
-		goto done;
-	}
-
-	request_module("mic_x100_dma");
-	mic_init_card_debugfs();
-
-	mic_platform_dev = platform_device_register_simple(mic_driver_name,
-							   0, NULL, 0);
-	ret = PTR_ERR_OR_ZERO(mic_platform_dev);
-	if (ret) {
-		pr_err("platform_device_register_full ret %d\n", ret);
-		goto cleanup_debugfs;
-	}
-	ret = platform_driver_register(&mic_platform_driver);
-	if (ret) {
-		pr_err("platform_driver_register ret %d\n", ret);
-		goto device_unregister;
-	}
-	return ret;
-
-device_unregister:
-	platform_device_unregister(mic_platform_dev);
-cleanup_debugfs:
-	mic_exit_card_debugfs();
-done:
-	return ret;
-}
-
-static void __exit mic_exit(void)
-{
-	platform_driver_unregister(&mic_platform_driver);
-	platform_device_unregister(mic_platform_dev);
-	mic_exit_card_debugfs();
-}
-
-module_init(mic_init);
-module_exit(mic_exit);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_DESCRIPTION("Intel(R) MIC X100 Card driver");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/card/mic_x100.h b/drivers/misc/mic/card/mic_x100.h
deleted file mode 100644
index 46644dde0c07..000000000000
--- a/drivers/misc/mic/card/mic_x100.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * Disclaimer: The codes contained in these modules may be specific to
- * the Intel Software Development Platform codenamed: Knights Ferry, and
- * the Intel product codenamed: Knights Corner, and are not backward
- * compatible with other Intel products. Additionally, Intel will NOT
- * support the codes or instruction set in future products.
- *
- * Intel MIC Card driver.
- */
-#ifndef _MIC_X100_CARD_H_
-#define _MIC_X100_CARD_H_
-
-#define MIC_X100_MMIO_BASE 0x08007C0000ULL
-#define MIC_X100_MMIO_LEN 0x00020000ULL
-#define MIC_X100_SBOX_BASE_ADDRESS 0x00010000ULL
-
-#define MIC_X100_SBOX_SPAD0 0x0000AB20
-#define MIC_X100_SBOX_SDBIC0 0x0000CC90
-#define MIC_X100_SBOX_SDBIC0_DBREQ_BIT 0x80000000
-#define MIC_X100_SBOX_RDMASR0	0x0000B180
-#define MIC_X100_SBOX_APICICR0 0x0000A9D0
-
-#define MIC_X100_MAX_DOORBELL_IDX 8
-
-#define MIC_X100_NUM_SBOX_IRQ 8
-#define MIC_X100_NUM_RDMASR_IRQ 8
-#define MIC_X100_SBOX_IRQ_BASE 0
-#define MIC_X100_RDMASR_IRQ_BASE 17
-
-#define MIC_X100_IRQ_BASE 26
-
-#endif
diff --git a/drivers/misc/mic/common/mic_dev.h b/drivers/misc/mic/common/mic_dev.h
deleted file mode 100644
index f94f08df0260..000000000000
--- a/drivers/misc/mic/common/mic_dev.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * Intel MIC driver.
- */
-#ifndef __MIC_DEV_H__
-#define __MIC_DEV_H__
-
-/* The maximum number of MIC devices supported in a single host system. */
-#define MIC_MAX_NUM_DEVS 128
-
-/**
- * enum mic_hw_family - The hardware family to which a device belongs.
- */
-enum mic_hw_family {
-	MIC_FAMILY_X100 = 0,
-	MIC_FAMILY_X200,
-	MIC_FAMILY_UNKNOWN,
-	MIC_FAMILY_LAST
-};
-
-/**
- * struct mic_mw - MIC memory window
- *
- * @pa: Base physical address.
- * @va: Base ioremap'd virtual address.
- * @len: Size of the memory window.
- */
-struct mic_mw {
-	phys_addr_t pa;
-	void __iomem *va;
-	resource_size_t len;
-};
-
-/*
- * Scratch pad register offsets used by the host to communicate
- * device page DMA address to the card.
- */
-#define MIC_DPLO_SPAD 14
-#define MIC_DPHI_SPAD 15
-
-/*
- * These values are supposed to be in the config_change field of the
- * device page when the host sends a config change interrupt to the card.
- */
-#define MIC_VIRTIO_PARAM_DEV_REMOVE 0x1
-#define MIC_VIRTIO_PARAM_CONFIG_CHANGED 0x2
-
-/* Maximum number of DMA channels */
-#define MIC_MAX_DMA_CHAN 4
-
-#endif
diff --git a/drivers/misc/mic/cosm/Makefile b/drivers/misc/mic/cosm/Makefile
deleted file mode 100644
index 97d74cb12030..000000000000
--- a/drivers/misc/mic/cosm/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile - Intel MIC Coprocessor State Management (COSM) Driver
-# Copyright(c) 2015, Intel Corporation.
-#
-obj-$(CONFIG_MIC_COSM) += mic_cosm.o
-
-mic_cosm-objs := cosm_main.o
-mic_cosm-objs += cosm_debugfs.o
-mic_cosm-objs += cosm_sysfs.o
-mic_cosm-objs += cosm_scif_server.o
diff --git a/drivers/misc/mic/cosm/cosm_debugfs.c b/drivers/misc/mic/cosm/cosm_debugfs.c
deleted file mode 100644
index cb55653cf1f9..000000000000
--- a/drivers/misc/mic/cosm/cosm_debugfs.c
+++ /dev/null
@@ -1,116 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Intel MIC Coprocessor State Management (COSM) Driver
- */
-
-#include <linux/debugfs.h>
-#include <linux/slab.h>
-#include <linux/io.h>
-#include "cosm_main.h"
-
-/* Debugfs parent dir */
-static struct dentry *cosm_dbg;
-
-/*
- * log_buf_show - Display MIC kernel log buffer
- *
- * log_buf addr/len is read from System.map by user space
- * and populated in sysfs entries.
- */
-static int log_buf_show(struct seq_file *s, void *unused)
-{
-	void __iomem *log_buf_va;
-	int __iomem *log_buf_len_va;
-	struct cosm_device *cdev = s->private;
-	void *kva;
-	int size;
-	u64 aper_offset;
-
-	if (!cdev || !cdev->log_buf_addr || !cdev->log_buf_len)
-		goto done;
-
-	mutex_lock(&cdev->cosm_mutex);
-	switch (cdev->state) {
-	case MIC_BOOTING:
-	case MIC_ONLINE:
-	case MIC_SHUTTING_DOWN:
-		break;
-	default:
-		goto unlock;
-	}
-
-	/*
-	 * Card kernel will never be relocated and any kernel text/data mapping
-	 * can be translated to phys address by subtracting __START_KERNEL_map.
-	 */
-	aper_offset = (u64)cdev->log_buf_len - __START_KERNEL_map;
-	log_buf_len_va = cdev->hw_ops->aper(cdev)->va + aper_offset;
-	aper_offset = (u64)cdev->log_buf_addr - __START_KERNEL_map;
-	log_buf_va = cdev->hw_ops->aper(cdev)->va + aper_offset;
-
-	size = ioread32(log_buf_len_va);
-	kva = kmalloc(size, GFP_KERNEL);
-	if (!kva)
-		goto unlock;
-
-	memcpy_fromio(kva, log_buf_va, size);
-	seq_write(s, kva, size);
-	kfree(kva);
-unlock:
-	mutex_unlock(&cdev->cosm_mutex);
-done:
-	return 0;
-}
-
-DEFINE_SHOW_ATTRIBUTE(log_buf);
-
-/*
- * force_reset_show - Force MIC reset
- *
- * Invokes the force_reset COSM bus op instead of the standard reset
- * op in case a force reset of the MIC device is required
- */
-static int force_reset_show(struct seq_file *s, void *pos)
-{
-	struct cosm_device *cdev = s->private;
-
-	cosm_stop(cdev, true);
-	return 0;
-}
-
-DEFINE_SHOW_ATTRIBUTE(force_reset);
-
-void cosm_create_debug_dir(struct cosm_device *cdev)
-{
-	char name[16];
-
-	if (!cosm_dbg)
-		return;
-
-	scnprintf(name, sizeof(name), "mic%d", cdev->index);
-	cdev->dbg_dir = debugfs_create_dir(name, cosm_dbg);
-
-	debugfs_create_file("log_buf", 0444, cdev->dbg_dir, cdev,
-			    &log_buf_fops);
-	debugfs_create_file("force_reset", 0444, cdev->dbg_dir, cdev,
-			    &force_reset_fops);
-}
-
-void cosm_delete_debug_dir(struct cosm_device *cdev)
-{
-	debugfs_remove_recursive(cdev->dbg_dir);
-}
-
-void cosm_init_debugfs(void)
-{
-	cosm_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL);
-}
-
-void cosm_exit_debugfs(void)
-{
-	debugfs_remove(cosm_dbg);
-}
diff --git a/drivers/misc/mic/cosm/cosm_main.c b/drivers/misc/mic/cosm/cosm_main.c
deleted file mode 100644
index ebb0eac43754..000000000000
--- a/drivers/misc/mic/cosm/cosm_main.c
+++ /dev/null
@@ -1,382 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Intel MIC Coprocessor State Management (COSM) Driver
- */
-
-#include <linux/module.h>
-#include <linux/delay.h>
-#include <linux/idr.h>
-#include <linux/slab.h>
-#include <linux/cred.h>
-#include "cosm_main.h"
-
-static const char cosm_driver_name[] = "mic";
-
-/* COSM ID allocator */
-static struct ida g_cosm_ida;
-/* Class of MIC devices for sysfs accessibility. */
-static struct class *g_cosm_class;
-/* Number of MIC devices */
-static atomic_t g_num_dev;
-
-/**
- * cosm_hw_reset - Issue a HW reset for the MIC device
- * @cdev: pointer to cosm_device instance
- * @force: force a MIC to reset even if it is already reset and ready
- */
-static void cosm_hw_reset(struct cosm_device *cdev, bool force)
-{
-	int i;
-
-#define MIC_RESET_TO (45)
-	if (force && cdev->hw_ops->force_reset)
-		cdev->hw_ops->force_reset(cdev);
-	else
-		cdev->hw_ops->reset(cdev);
-
-	for (i = 0; i < MIC_RESET_TO; i++) {
-		if (cdev->hw_ops->ready(cdev)) {
-			cosm_set_state(cdev, MIC_READY);
-			return;
-		}
-		/*
-		 * Resets typically take 10s of seconds to complete.
-		 * Since an MMIO read is required to check if the
-		 * firmware is ready or not, a 1 second delay works nicely.
-		 */
-		msleep(1000);
-	}
-	cosm_set_state(cdev, MIC_RESET_FAILED);
-}
-
-/**
- * cosm_start - Start the MIC
- * @cdev: pointer to cosm_device instance
- *
- * This function prepares an MIC for boot and initiates boot.
- * RETURNS: An appropriate -ERRNO error value on error, or 0 for success.
- */
-int cosm_start(struct cosm_device *cdev)
-{
-	const struct cred *orig_cred;
-	struct cred *override_cred;
-	int rc;
-
-	mutex_lock(&cdev->cosm_mutex);
-	if (!cdev->bootmode) {
-		dev_err(&cdev->dev, "%s %d bootmode not set\n",
-			__func__, __LINE__);
-		rc = -EINVAL;
-		goto unlock_ret;
-	}
-retry:
-	if (cdev->state != MIC_READY) {
-		dev_err(&cdev->dev, "%s %d MIC state not READY\n",
-			__func__, __LINE__);
-		rc = -EINVAL;
-		goto unlock_ret;
-	}
-	if (!cdev->hw_ops->ready(cdev)) {
-		cosm_hw_reset(cdev, false);
-		/*
-		 * The state will either be MIC_READY if the reset succeeded
-		 * or MIC_RESET_FAILED if the firmware reset failed.
-		 */
-		goto retry;
-	}
-
-	/*
-	 * Set credentials to root to allow non-root user to download initramsfs
-	 * with 600 permissions
-	 */
-	override_cred = prepare_creds();
-	if (!override_cred) {
-		dev_err(&cdev->dev, "%s %d prepare_creds failed\n",
-			__func__, __LINE__);
-		rc = -ENOMEM;
-		goto unlock_ret;
-	}
-	override_cred->fsuid = GLOBAL_ROOT_UID;
-	orig_cred = override_creds(override_cred);
-
-	rc = cdev->hw_ops->start(cdev, cdev->index);
-
-	revert_creds(orig_cred);
-	put_cred(override_cred);
-	if (rc)
-		goto unlock_ret;
-
-	/*
-	 * If linux is being booted, card is treated 'online' only
-	 * when the scif interface in the card is up. If anything else
-	 * is booted, we set card to 'online' immediately.
-	 */
-	if (!strcmp(cdev->bootmode, "linux"))
-		cosm_set_state(cdev, MIC_BOOTING);
-	else
-		cosm_set_state(cdev, MIC_ONLINE);
-unlock_ret:
-	mutex_unlock(&cdev->cosm_mutex);
-	if (rc)
-		dev_err(&cdev->dev, "cosm_start failed rc %d\n", rc);
-	return rc;
-}
-
-/**
- * cosm_stop - Prepare the MIC for reset and trigger reset
- * @cdev: pointer to cosm_device instance
- * @force: force a MIC to reset even if it is already reset and ready.
- *
- * RETURNS: None
- */
-void cosm_stop(struct cosm_device *cdev, bool force)
-{
-	mutex_lock(&cdev->cosm_mutex);
-	if (cdev->state != MIC_READY || force) {
-		/*
-		 * Don't call hw_ops if they have been called previously.
-		 * stop(..) calls device_unregister and will crash the system if
-		 * called multiple times.
-		 */
-		u8 state = cdev->state == MIC_RESETTING ?
-					cdev->prev_state : cdev->state;
-		bool call_hw_ops = state != MIC_RESET_FAILED &&
-					state != MIC_READY;
-
-		if (cdev->state != MIC_RESETTING)
-			cosm_set_state(cdev, MIC_RESETTING);
-		cdev->heartbeat_watchdog_enable = false;
-		if (call_hw_ops)
-			cdev->hw_ops->stop(cdev, force);
-		cosm_hw_reset(cdev, force);
-		cosm_set_shutdown_status(cdev, MIC_NOP);
-		if (call_hw_ops && cdev->hw_ops->post_reset)
-			cdev->hw_ops->post_reset(cdev, cdev->state);
-	}
-	mutex_unlock(&cdev->cosm_mutex);
-	flush_work(&cdev->scif_work);
-}
-
-/**
- * cosm_reset_trigger_work - Trigger MIC reset
- * @work: The work structure
- *
- * This work is scheduled whenever the host wants to reset the MIC.
- */
-static void cosm_reset_trigger_work(struct work_struct *work)
-{
-	struct cosm_device *cdev = container_of(work, struct cosm_device,
-						reset_trigger_work);
-	cosm_stop(cdev, false);
-}
-
-/**
- * cosm_reset - Schedule MIC reset
- * @cdev: pointer to cosm_device instance
- *
- * RETURNS: An -EINVAL if the card is already READY or 0 for success.
- */
-int cosm_reset(struct cosm_device *cdev)
-{
-	int rc = 0;
-
-	mutex_lock(&cdev->cosm_mutex);
-	if (cdev->state != MIC_READY) {
-		if (cdev->state != MIC_RESETTING) {
-			cdev->prev_state = cdev->state;
-			cosm_set_state(cdev, MIC_RESETTING);
-			schedule_work(&cdev->reset_trigger_work);
-		}
-	} else {
-		dev_err(&cdev->dev, "%s %d MIC is READY\n", __func__, __LINE__);
-		rc = -EINVAL;
-	}
-	mutex_unlock(&cdev->cosm_mutex);
-	return rc;
-}
-
-/**
- * cosm_shutdown - Initiate MIC shutdown.
- * @cdev: pointer to cosm_device instance
- *
- * RETURNS: None
- */
-int cosm_shutdown(struct cosm_device *cdev)
-{
-	struct cosm_msg msg = { .id = COSM_MSG_SHUTDOWN };
-	int rc = 0;
-
-	mutex_lock(&cdev->cosm_mutex);
-	if (cdev->state != MIC_ONLINE) {
-		rc = -EINVAL;
-		dev_err(&cdev->dev, "%s %d skipping shutdown in state: %s\n",
-			__func__, __LINE__, cosm_state_string[cdev->state]);
-		goto err;
-	}
-
-	if (!cdev->epd) {
-		rc = -ENOTCONN;
-		dev_err(&cdev->dev, "%s %d scif endpoint not connected rc %d\n",
-			__func__, __LINE__, rc);
-		goto err;
-	}
-
-	rc = scif_send(cdev->epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
-	if (rc < 0) {
-		dev_err(&cdev->dev, "%s %d scif_send failed rc %d\n",
-			__func__, __LINE__, rc);
-		goto err;
-	}
-	cdev->heartbeat_watchdog_enable = false;
-	cosm_set_state(cdev, MIC_SHUTTING_DOWN);
-	rc = 0;
-err:
-	mutex_unlock(&cdev->cosm_mutex);
-	return rc;
-}
-
-static int cosm_driver_probe(struct cosm_device *cdev)
-{
-	int rc;
-
-	/* Initialize SCIF server at first probe */
-	if (atomic_add_return(1, &g_num_dev) == 1) {
-		rc = cosm_scif_init();
-		if (rc)
-			goto scif_exit;
-	}
-	mutex_init(&cdev->cosm_mutex);
-	INIT_WORK(&cdev->reset_trigger_work, cosm_reset_trigger_work);
-	INIT_WORK(&cdev->scif_work, cosm_scif_work);
-	cdev->sysfs_heartbeat_enable = true;
-	cosm_sysfs_init(cdev);
-	cdev->sdev = device_create_with_groups(g_cosm_class, cdev->dev.parent,
-			       MKDEV(0, cdev->index), cdev, cdev->attr_group,
-			       "mic%d", cdev->index);
-	if (IS_ERR(cdev->sdev)) {
-		rc = PTR_ERR(cdev->sdev);
-		dev_err(&cdev->dev, "device_create_with_groups failed rc %d\n",
-			rc);
-		goto scif_exit;
-	}
-
-	cdev->state_sysfs = sysfs_get_dirent(cdev->sdev->kobj.sd,
-		"state");
-	if (!cdev->state_sysfs) {
-		rc = -ENODEV;
-		dev_err(&cdev->dev, "sysfs_get_dirent failed rc %d\n", rc);
-		goto destroy_device;
-	}
-	cosm_create_debug_dir(cdev);
-	return 0;
-destroy_device:
-	device_destroy(g_cosm_class, MKDEV(0, cdev->index));
-scif_exit:
-	if (atomic_dec_and_test(&g_num_dev))
-		cosm_scif_exit();
-	return rc;
-}
-
-static void cosm_driver_remove(struct cosm_device *cdev)
-{
-	cosm_delete_debug_dir(cdev);
-	sysfs_put(cdev->state_sysfs);
-	device_destroy(g_cosm_class, MKDEV(0, cdev->index));
-	flush_work(&cdev->reset_trigger_work);
-	cosm_stop(cdev, false);
-	if (atomic_dec_and_test(&g_num_dev))
-		cosm_scif_exit();
-
-	/* These sysfs entries might have allocated */
-	kfree(cdev->cmdline);
-	kfree(cdev->firmware);
-	kfree(cdev->ramdisk);
-	kfree(cdev->bootmode);
-}
-
-static int cosm_suspend(struct device *dev)
-{
-	struct cosm_device *cdev = dev_to_cosm(dev);
-
-	mutex_lock(&cdev->cosm_mutex);
-	switch (cdev->state) {
-	/**
-	 * Suspend/freeze hooks in userspace have already shutdown the card.
-	 * Card should be 'ready' in most cases. It is however possible that
-	 * some userspace application initiated a boot. In those cases, we
-	 * simply reset the card.
-	 */
-	case MIC_ONLINE:
-	case MIC_BOOTING:
-	case MIC_SHUTTING_DOWN:
-		mutex_unlock(&cdev->cosm_mutex);
-		cosm_stop(cdev, false);
-		break;
-	default:
-		mutex_unlock(&cdev->cosm_mutex);
-		break;
-	}
-	return 0;
-}
-
-static const struct dev_pm_ops cosm_pm_ops = {
-	.suspend = cosm_suspend,
-	.freeze = cosm_suspend
-};
-
-static struct cosm_driver cosm_driver = {
-	.driver = {
-		.name =  KBUILD_MODNAME,
-		.owner = THIS_MODULE,
-		.pm = &cosm_pm_ops,
-	},
-	.probe = cosm_driver_probe,
-	.remove = cosm_driver_remove
-};
-
-static int __init cosm_init(void)
-{
-	int ret;
-
-	cosm_init_debugfs();
-
-	g_cosm_class = class_create(THIS_MODULE, cosm_driver_name);
-	if (IS_ERR(g_cosm_class)) {
-		ret = PTR_ERR(g_cosm_class);
-		pr_err("class_create failed ret %d\n", ret);
-		goto cleanup_debugfs;
-	}
-
-	ida_init(&g_cosm_ida);
-	ret = cosm_register_driver(&cosm_driver);
-	if (ret) {
-		pr_err("cosm_register_driver failed ret %d\n", ret);
-		goto ida_destroy;
-	}
-	return 0;
-ida_destroy:
-	ida_destroy(&g_cosm_ida);
-	class_destroy(g_cosm_class);
-cleanup_debugfs:
-	cosm_exit_debugfs();
-	return ret;
-}
-
-static void __exit cosm_exit(void)
-{
-	cosm_unregister_driver(&cosm_driver);
-	ida_destroy(&g_cosm_ida);
-	class_destroy(g_cosm_class);
-	cosm_exit_debugfs();
-}
-
-module_init(cosm_init);
-module_exit(cosm_exit);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_DESCRIPTION("Intel(R) MIC Coprocessor State Management (COSM) Driver");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/cosm/cosm_main.h b/drivers/misc/mic/cosm/cosm_main.h
deleted file mode 100644
index 5188ad245814..000000000000
--- a/drivers/misc/mic/cosm/cosm_main.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Intel MIC Coprocessor State Management (COSM) Driver
- */
-#ifndef _COSM_COSM_H_
-#define _COSM_COSM_H_
-
-#include <linux/scif.h>
-#include "../bus/cosm_bus.h"
-
-#define COSM_HEARTBEAT_SEND_SEC 30
-#define SCIF_COSM_LISTEN_PORT  201
-
-/**
- * enum COSM msg id's
- * @COSM_MSG_SHUTDOWN: host->card trigger shutdown
- * @COSM_MSG_SYNC_TIME: host->card send host time to card to sync time
- * @COSM_MSG_HEARTBEAT: card->host heartbeat
- * @COSM_MSG_SHUTDOWN_STATUS: card->host with shutdown status as payload
- */
-enum cosm_msg_id {
-	COSM_MSG_SHUTDOWN,
-	COSM_MSG_SYNC_TIME,
-	COSM_MSG_HEARTBEAT,
-	COSM_MSG_SHUTDOWN_STATUS,
-};
-
-struct cosm_msg {
-	u64 id;
-	union {
-		u64 shutdown_status;
-		struct {
-			u64 tv_sec;
-			u64 tv_nsec;
-		} timespec;
-	};
-};
-
-extern const char * const cosm_state_string[];
-extern const char * const cosm_shutdown_status_string[];
-
-void cosm_sysfs_init(struct cosm_device *cdev);
-int cosm_start(struct cosm_device *cdev);
-void cosm_stop(struct cosm_device *cdev, bool force);
-int cosm_reset(struct cosm_device *cdev);
-int cosm_shutdown(struct cosm_device *cdev);
-void cosm_set_state(struct cosm_device *cdev, u8 state);
-void cosm_set_shutdown_status(struct cosm_device *cdev, u8 status);
-void cosm_init_debugfs(void);
-void cosm_exit_debugfs(void);
-void cosm_create_debug_dir(struct cosm_device *cdev);
-void cosm_delete_debug_dir(struct cosm_device *cdev);
-int cosm_scif_init(void);
-void cosm_scif_exit(void);
-void cosm_scif_work(struct work_struct *work);
-
-#endif
diff --git a/drivers/misc/mic/cosm/cosm_scif_server.c b/drivers/misc/mic/cosm/cosm_scif_server.c
deleted file mode 100644
index 7baec9fd8756..000000000000
--- a/drivers/misc/mic/cosm/cosm_scif_server.c
+++ /dev/null
@@ -1,399 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Intel MIC Coprocessor State Management (COSM) Driver
- */
-#include <linux/kthread.h>
-#include <linux/sched/signal.h>
-
-#include "cosm_main.h"
-
-/*
- * The COSM driver uses SCIF to communicate between the management node and the
- * MIC cards. SCIF is used to (a) Send a shutdown command to the card (b)
- * receive a shutdown status back from the card upon completion of shutdown and
- * (c) receive periodic heartbeat messages from the card used to deduce if the
- * card has crashed.
- *
- * A COSM server consisting of a SCIF listening endpoint waits for incoming
- * connections from the card. Upon acceptance of the connection, a separate
- * work-item is scheduled to handle SCIF message processing for that card. The
- * life-time of this work-item is therefore the time from which the connection
- * from a card is accepted to the time at which the connection is closed. A new
- * work-item starts each time the card boots and is alive till the card (a)
- * shuts down (b) is reset (c) crashes (d) cosm_client driver on the card is
- * unloaded.
- *
- * From the point of view of COSM interactions with SCIF during card
- * shutdown, reset and crash are as follows:
- *
- * Card shutdown
- * -------------
- * 1. COSM client on the card invokes orderly_poweroff() in response to SHUTDOWN
- *    message from the host.
- * 2. Card driver shutdown callback invokes scif_unregister_device(..) resulting
- *    in scif_remove(..) getting called on the card
- * 3. scif_remove -> scif_stop -> scif_handle_remove_node ->
- *    scif_peer_unregister_device -> device_unregister for the host peer device
- * 4. During device_unregister remove(..) method of cosm_client is invoked which
- *    closes the COSM SCIF endpoint on the card. This results in a SCIF_DISCNCT
- *    message being sent to host SCIF. SCIF_DISCNCT message processing on the
- *    host SCIF sets the host COSM SCIF endpoint state to DISCONNECTED and wakes
- *    up the host COSM thread blocked in scif_poll(..) resulting in
- *    scif_poll(..)  returning EPOLLHUP.
- * 5. On the card, scif_peer_release_dev is next called which results in an
- *    SCIF_EXIT message being sent to the host and after receiving the
- *    SCIF_EXIT_ACK from the host the peer device teardown on the card is
- *    complete.
- * 6. As part of the SCIF_EXIT message processing on the host, host sends a
- *    SCIF_REMOVE_NODE to itself corresponding to the card being removed. This
- *    starts a similar SCIF peer device teardown sequence on the host
- *    corresponding to the card being shut down.
- *
- * Card reset
- * ----------
- * The case of interest here is when the card has not been previously shut down
- * since most of the steps below are skipped in that case:
-
- * 1. cosm_stop(..) invokes hw_ops->stop(..) method of the base PCIe driver
- *    which unregisters the SCIF HW device resulting in scif_remove(..) being
- *    called on the host.
- * 2. scif_remove(..) calls scif_disconnect_node(..) which results in a
- *    SCIF_EXIT message being sent to the card.
- * 3. The card executes scif_stop() as part of SCIF_EXIT message
- *    processing. This results in the COSM endpoint on the card being closed and
- *    the SCIF host peer device on the card getting unregistered similar to
- *    steps 3, 4 and 5 for the card shutdown case above. scif_poll(..) on the
- *    host returns EPOLLHUP as a result.
- * 4. On the host, card peer device unregister and SCIF HW remove(..) also
- *    subsequently complete.
- *
- * Card crash
- * ----------
- * If a reset is issued after the card has crashed, there is no SCIF_DISCNT
- * message from the card which would result in scif_poll(..) returning
- * EPOLLHUP. In this case when the host SCIF driver sends a SCIF_REMOVE_NODE
- * message to itself resulting in the card SCIF peer device being unregistered,
- * this results in a scif_peer_release_dev -> scif_cleanup_scifdev->
- * scif_invalidate_ep call sequence which sets the endpoint state to
- * DISCONNECTED and results in scif_poll(..) returning EPOLLHUP.
- */
-
-#define COSM_SCIF_BACKLOG 16
-#define COSM_HEARTBEAT_CHECK_DELTA_SEC 10
-#define COSM_HEARTBEAT_TIMEOUT_SEC \
-		(COSM_HEARTBEAT_SEND_SEC + COSM_HEARTBEAT_CHECK_DELTA_SEC)
-#define COSM_HEARTBEAT_TIMEOUT_MSEC (COSM_HEARTBEAT_TIMEOUT_SEC * MSEC_PER_SEC)
-
-static struct task_struct *server_thread;
-static scif_epd_t listen_epd;
-
-/* Publish MIC card's shutdown status to user space MIC daemon */
-static void cosm_update_mic_status(struct cosm_device *cdev)
-{
-	if (cdev->shutdown_status_int != MIC_NOP) {
-		cosm_set_shutdown_status(cdev, cdev->shutdown_status_int);
-		cdev->shutdown_status_int = MIC_NOP;
-	}
-}
-
-/* Store MIC card's shutdown status internally when it is received */
-static void cosm_shutdown_status_int(struct cosm_device *cdev,
-				     enum mic_status shutdown_status)
-{
-	switch (shutdown_status) {
-	case MIC_HALTED:
-	case MIC_POWER_OFF:
-	case MIC_RESTART:
-	case MIC_CRASHED:
-		break;
-	default:
-		dev_err(&cdev->dev, "%s %d Unexpected shutdown_status %d\n",
-			__func__, __LINE__, shutdown_status);
-		return;
-	};
-	cdev->shutdown_status_int = shutdown_status;
-	cdev->heartbeat_watchdog_enable = false;
-
-	if (cdev->state != MIC_SHUTTING_DOWN)
-		cosm_set_state(cdev, MIC_SHUTTING_DOWN);
-}
-
-/* Non-blocking recv. Read and process all available messages */
-static void cosm_scif_recv(struct cosm_device *cdev)
-{
-	struct cosm_msg msg;
-	int rc;
-
-	while (1) {
-		rc = scif_recv(cdev->epd, &msg, sizeof(msg), 0);
-		if (!rc) {
-			break;
-		} else if (rc < 0) {
-			dev_dbg(&cdev->dev, "%s: %d rc %d\n",
-				__func__, __LINE__, rc);
-			break;
-		}
-		dev_dbg(&cdev->dev, "%s: %d rc %d id 0x%llx\n",
-			__func__, __LINE__, rc, msg.id);
-
-		switch (msg.id) {
-		case COSM_MSG_SHUTDOWN_STATUS:
-			cosm_shutdown_status_int(cdev, msg.shutdown_status);
-			break;
-		case COSM_MSG_HEARTBEAT:
-			/* Nothing to do, heartbeat only unblocks scif_poll */
-			break;
-		default:
-			dev_err(&cdev->dev, "%s: %d unknown msg.id %lld\n",
-				__func__, __LINE__, msg.id);
-			break;
-		}
-	}
-}
-
-/* Publish crashed status for this MIC card */
-static void cosm_set_crashed(struct cosm_device *cdev)
-{
-	dev_err(&cdev->dev, "node alive timeout\n");
-	cosm_shutdown_status_int(cdev, MIC_CRASHED);
-	cosm_update_mic_status(cdev);
-}
-
-/* Send host time to the MIC card to sync system time between host and MIC */
-static void cosm_send_time(struct cosm_device *cdev)
-{
-	struct cosm_msg msg = { .id = COSM_MSG_SYNC_TIME };
-	struct timespec64 ts;
-	int rc;
-
-	ktime_get_real_ts64(&ts);
-	msg.timespec.tv_sec = ts.tv_sec;
-	msg.timespec.tv_nsec = ts.tv_nsec;
-
-	rc = scif_send(cdev->epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
-	if (rc < 0)
-		dev_err(&cdev->dev, "%s %d scif_send failed rc %d\n",
-			__func__, __LINE__, rc);
-}
-
-/*
- * Close this cosm_device's endpoint after its peer endpoint on the card has
- * been closed. In all cases except MIC card crash EPOLLHUP on the host is
- * triggered by the client's endpoint being closed.
- */
-static void cosm_scif_close(struct cosm_device *cdev)
-{
-	/*
-	 * Because SHUTDOWN_STATUS message is sent by the MIC cards in the
-	 * reboot notifier when shutdown is still not complete, we notify mpssd
-	 * to reset the card when SCIF endpoint is closed.
-	 */
-	cosm_update_mic_status(cdev);
-	scif_close(cdev->epd);
-	cdev->epd = NULL;
-	dev_dbg(&cdev->dev, "%s %d\n", __func__, __LINE__);
-}
-
-/*
- * Set card state to ONLINE when a new SCIF connection from a MIC card is
- * received. Normally the state is BOOTING when the connection comes in, but can
- * be ONLINE if cosm_client driver on the card was unloaded and then reloaded.
- */
-static int cosm_set_online(struct cosm_device *cdev)
-{
-	int rc = 0;
-
-	if (MIC_BOOTING == cdev->state || MIC_ONLINE == cdev->state) {
-		cdev->heartbeat_watchdog_enable = cdev->sysfs_heartbeat_enable;
-		cdev->epd = cdev->newepd;
-		if (cdev->state == MIC_BOOTING)
-			cosm_set_state(cdev, MIC_ONLINE);
-		cosm_send_time(cdev);
-		dev_dbg(&cdev->dev, "%s %d\n", __func__, __LINE__);
-	} else {
-		dev_warn(&cdev->dev, "%s %d not going online in state: %s\n",
-			 __func__, __LINE__, cosm_state_string[cdev->state]);
-		rc = -EINVAL;
-	}
-	/* Drop reference acquired by bus_find_device in the server thread */
-	put_device(&cdev->dev);
-	return rc;
-}
-
-/*
- * Work function for handling work for a SCIF connection from a particular MIC
- * card. It first sets the card state to ONLINE and then calls scif_poll to
- * block on activity such as incoming messages on the SCIF endpoint. When the
- * endpoint is closed, the work function exits, completing its life cycle, from
- * MIC card boot to card shutdown/reset/crash.
- */
-void cosm_scif_work(struct work_struct *work)
-{
-	struct cosm_device *cdev = container_of(work, struct cosm_device,
-						scif_work);
-	struct scif_pollepd pollepd;
-	int rc;
-
-	mutex_lock(&cdev->cosm_mutex);
-	if (cosm_set_online(cdev))
-		goto exit;
-
-	while (1) {
-		pollepd.epd = cdev->epd;
-		pollepd.events = EPOLLIN;
-
-		/* Drop the mutex before blocking in scif_poll(..) */
-		mutex_unlock(&cdev->cosm_mutex);
-		/* poll(..) with timeout on our endpoint */
-		rc = scif_poll(&pollepd, 1, COSM_HEARTBEAT_TIMEOUT_MSEC);
-		mutex_lock(&cdev->cosm_mutex);
-		if (rc < 0) {
-			dev_err(&cdev->dev, "%s %d scif_poll rc %d\n",
-				__func__, __LINE__, rc);
-			continue;
-		}
-
-		/* There is a message from the card */
-		if (pollepd.revents & EPOLLIN)
-			cosm_scif_recv(cdev);
-
-		/* The peer endpoint is closed or this endpoint disconnected */
-		if (pollepd.revents & EPOLLHUP) {
-			cosm_scif_close(cdev);
-			break;
-		}
-
-		/* Did we timeout from poll? */
-		if (!rc && cdev->heartbeat_watchdog_enable)
-			cosm_set_crashed(cdev);
-	}
-exit:
-	dev_dbg(&cdev->dev, "%s %d exiting\n", __func__, __LINE__);
-	mutex_unlock(&cdev->cosm_mutex);
-}
-
-/*
- * COSM SCIF server thread function. Accepts incoming SCIF connections from MIC
- * cards, finds the correct cosm_device to associate that connection with and
- * schedules individual work items for each MIC card.
- */
-static int cosm_scif_server(void *unused)
-{
-	struct cosm_device *cdev;
-	scif_epd_t newepd;
-	struct scif_port_id port_id;
-	int rc;
-
-	allow_signal(SIGKILL);
-
-	while (!kthread_should_stop()) {
-		rc = scif_accept(listen_epd, &port_id, &newepd,
-				 SCIF_ACCEPT_SYNC);
-		if (rc < 0) {
-			if (-ERESTARTSYS != rc)
-				pr_err("%s %d rc %d\n", __func__, __LINE__, rc);
-			continue;
-		}
-
-		/*
-		 * Associate the incoming connection with a particular
-		 * cosm_device, COSM device ID == SCIF node ID - 1
-		 */
-		cdev = cosm_find_cdev_by_id(port_id.node - 1);
-		if (!cdev)
-			continue;
-		cdev->newepd = newepd;
-		schedule_work(&cdev->scif_work);
-	}
-
-	pr_debug("%s %d Server thread stopped\n", __func__, __LINE__);
-	return 0;
-}
-
-static int cosm_scif_listen(void)
-{
-	int rc;
-
-	listen_epd = scif_open();
-	if (!listen_epd) {
-		pr_err("%s %d scif_open failed\n", __func__, __LINE__);
-		return -ENOMEM;
-	}
-
-	rc = scif_bind(listen_epd, SCIF_COSM_LISTEN_PORT);
-	if (rc < 0) {
-		pr_err("%s %d scif_bind failed rc %d\n",
-		       __func__, __LINE__, rc);
-		goto err;
-	}
-
-	rc = scif_listen(listen_epd, COSM_SCIF_BACKLOG);
-	if (rc < 0) {
-		pr_err("%s %d scif_listen rc %d\n", __func__, __LINE__, rc);
-		goto err;
-	}
-	pr_debug("%s %d listen_epd set up\n", __func__, __LINE__);
-	return 0;
-err:
-	scif_close(listen_epd);
-	listen_epd = NULL;
-	return rc;
-}
-
-static void cosm_scif_listen_exit(void)
-{
-	pr_debug("%s %d closing listen_epd\n", __func__, __LINE__);
-	if (listen_epd) {
-		scif_close(listen_epd);
-		listen_epd = NULL;
-	}
-}
-
-/*
- * Create a listening SCIF endpoint and a server kthread which accepts incoming
- * SCIF connections from MIC cards
- */
-int cosm_scif_init(void)
-{
-	int rc = cosm_scif_listen();
-
-	if (rc) {
-		pr_err("%s %d cosm_scif_listen rc %d\n",
-		       __func__, __LINE__, rc);
-		goto err;
-	}
-
-	server_thread = kthread_run(cosm_scif_server, NULL, "cosm_server");
-	if (IS_ERR(server_thread)) {
-		rc = PTR_ERR(server_thread);
-		pr_err("%s %d kthread_run rc %d\n", __func__, __LINE__, rc);
-		goto listen_exit;
-	}
-	return 0;
-listen_exit:
-	cosm_scif_listen_exit();
-err:
-	return rc;
-}
-
-/* Stop the running server thread and close the listening SCIF endpoint */
-void cosm_scif_exit(void)
-{
-	int rc;
-
-	if (!IS_ERR_OR_NULL(server_thread)) {
-		rc = send_sig(SIGKILL, server_thread, 0);
-		if (rc) {
-			pr_err("%s %d send_sig rc %d\n",
-			       __func__, __LINE__, rc);
-			return;
-		}
-		kthread_stop(server_thread);
-	}
-
-	cosm_scif_listen_exit();
-}
diff --git a/drivers/misc/mic/cosm/cosm_sysfs.c b/drivers/misc/mic/cosm/cosm_sysfs.c
deleted file mode 100644
index e6dac967c1af..000000000000
--- a/drivers/misc/mic/cosm/cosm_sysfs.c
+++ /dev/null
@@ -1,449 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Intel MIC Coprocessor State Management (COSM) Driver
- */
-#include <linux/slab.h>
-#include "cosm_main.h"
-
-/*
- * A state-to-string lookup table, for exposing a human readable state
- * via sysfs. Always keep in sync with enum cosm_states
- */
-const char * const cosm_state_string[] = {
-	[MIC_READY] = "ready",
-	[MIC_BOOTING] = "booting",
-	[MIC_ONLINE] = "online",
-	[MIC_SHUTTING_DOWN] = "shutting_down",
-	[MIC_RESETTING] = "resetting",
-	[MIC_RESET_FAILED] = "reset_failed",
-};
-
-/*
- * A shutdown-status-to-string lookup table, for exposing a human
- * readable state via sysfs. Always keep in sync with enum cosm_shutdown_status
- */
-const char * const cosm_shutdown_status_string[] = {
-	[MIC_NOP] = "nop",
-	[MIC_CRASHED] = "crashed",
-	[MIC_HALTED] = "halted",
-	[MIC_POWER_OFF] = "poweroff",
-	[MIC_RESTART] = "restart",
-};
-
-void cosm_set_shutdown_status(struct cosm_device *cdev, u8 shutdown_status)
-{
-	dev_dbg(&cdev->dev, "Shutdown Status %s -> %s\n",
-		cosm_shutdown_status_string[cdev->shutdown_status],
-		cosm_shutdown_status_string[shutdown_status]);
-	cdev->shutdown_status = shutdown_status;
-}
-
-void cosm_set_state(struct cosm_device *cdev, u8 state)
-{
-	dev_dbg(&cdev->dev, "State %s -> %s\n",
-		cosm_state_string[cdev->state],
-		cosm_state_string[state]);
-	cdev->state = state;
-	sysfs_notify_dirent(cdev->state_sysfs);
-}
-
-static ssize_t
-family_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct cosm_device *cdev = dev_get_drvdata(dev);
-
-	if (!cdev)
-		return -EINVAL;
-
-	return cdev->hw_ops->family(cdev, buf);
-}
-static DEVICE_ATTR_RO(family);
-
-static ssize_t
-stepping_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct cosm_device *cdev = dev_get_drvdata(dev);
-
-	if (!cdev)
-		return -EINVAL;
-
-	return cdev->hw_ops->stepping(cdev, buf);
-}
-static DEVICE_ATTR_RO(stepping);
-
-static ssize_t
-state_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct cosm_device *cdev = dev_get_drvdata(dev);
-
-	if (!cdev || cdev->state >= MIC_LAST)
-		return -EINVAL;
-
-	return scnprintf(buf, PAGE_SIZE, "%s\n",
-		cosm_state_string[cdev->state]);
-}
-
-static ssize_t
-state_store(struct device *dev, struct device_attribute *attr,
-	    const char *buf, size_t count)
-{
-	struct cosm_device *cdev = dev_get_drvdata(dev);
-	int rc;
-
-	if (!cdev)
-		return -EINVAL;
-
-	if (sysfs_streq(buf, "boot")) {
-		rc = cosm_start(cdev);
-		goto done;
-	}
-	if (sysfs_streq(buf, "reset")) {
-		rc = cosm_reset(cdev);
-		goto done;
-	}
-
-	if (sysfs_streq(buf, "shutdown")) {
-		rc = cosm_shutdown(cdev);
-		goto done;
-	}
-	rc = -EINVAL;
-done:
-	if (rc)
-		count = rc;
-	return count;
-}
-static DEVICE_ATTR_RW(state);
-
-static ssize_t shutdown_status_show(struct device *dev,
-				    struct device_attribute *attr, char *buf)
-{
-	struct cosm_device *cdev = dev_get_drvdata(dev);
-
-	if (!cdev || cdev->shutdown_status >= MIC_STATUS_LAST)
-		return -EINVAL;
-
-	return scnprintf(buf, PAGE_SIZE, "%s\n",
-		cosm_shutdown_status_string[cdev->shutdown_status]);
-}
-static DEVICE_ATTR_RO(shutdown_status);
-
-static ssize_t
-heartbeat_enable_show(struct device *dev,
-		      struct device_attribute *attr, char *buf)
-{
-	struct cosm_device *cdev = dev_get_drvdata(dev);
-
-	if (!cdev)
-		return -EINVAL;
-
-	return scnprintf(buf, PAGE_SIZE, "%d\n", cdev->sysfs_heartbeat_enable);
-}
-
-static ssize_t
-heartbeat_enable_store(struct device *dev,
-		       struct device_attribute *attr,
-		       const char *buf, size_t count)
-{
-	struct cosm_device *cdev = dev_get_drvdata(dev);
-	int enable;
-	int ret;
-
-	if (!cdev)
-		return -EINVAL;
-
-	mutex_lock(&cdev->cosm_mutex);
-	ret = kstrtoint(buf, 10, &enable);
-	if (ret)
-		goto unlock;
-
-	cdev->sysfs_heartbeat_enable = enable;
-	/* if state is not online, cdev->heartbeat_watchdog_enable is 0 */
-	if (cdev->state == MIC_ONLINE)
-		cdev->heartbeat_watchdog_enable = enable;
-	ret = count;
-unlock:
-	mutex_unlock(&cdev->cosm_mutex);
-	return ret;
-}
-static DEVICE_ATTR_RW(heartbeat_enable);
-
-static ssize_t
-cmdline_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct cosm_device *cdev = dev_get_drvdata(dev);
-	char *cmdline;
-
-	if (!cdev)
-		return -EINVAL;
-
-	cmdline = cdev->cmdline;
-
-	if (cmdline)
-		return scnprintf(buf, PAGE_SIZE, "%s\n", cmdline);
-	return 0;
-}
-
-static ssize_t
-cmdline_store(struct device *dev, struct device_attribute *attr,
-	      const char *buf, size_t count)
-{
-	struct cosm_device *cdev = dev_get_drvdata(dev);
-
-	if (!cdev)
-		return -EINVAL;
-
-	mutex_lock(&cdev->cosm_mutex);
-	kfree(cdev->cmdline);
-
-	cdev->cmdline = kmalloc(count + 1, GFP_KERNEL);
-	if (!cdev->cmdline) {
-		count = -ENOMEM;
-		goto unlock;
-	}
-
-	strncpy(cdev->cmdline, buf, count);
-
-	if (cdev->cmdline[count - 1] == '\n')
-		cdev->cmdline[count - 1] = '\0';
-	else
-		cdev->cmdline[count] = '\0';
-unlock:
-	mutex_unlock(&cdev->cosm_mutex);
-	return count;
-}
-static DEVICE_ATTR_RW(cmdline);
-
-static ssize_t
-firmware_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct cosm_device *cdev = dev_get_drvdata(dev);
-	char *firmware;
-
-	if (!cdev)
-		return -EINVAL;
-
-	firmware = cdev->firmware;
-
-	if (firmware)
-		return scnprintf(buf, PAGE_SIZE, "%s\n", firmware);
-	return 0;
-}
-
-static ssize_t
-firmware_store(struct device *dev, struct device_attribute *attr,
-	       const char *buf, size_t count)
-{
-	struct cosm_device *cdev = dev_get_drvdata(dev);
-
-	if (!cdev)
-		return -EINVAL;
-
-	mutex_lock(&cdev->cosm_mutex);
-	kfree(cdev->firmware);
-
-	cdev->firmware = kmalloc(count + 1, GFP_KERNEL);
-	if (!cdev->firmware) {
-		count = -ENOMEM;
-		goto unlock;
-	}
-	strncpy(cdev->firmware, buf, count);
-
-	if (cdev->firmware[count - 1] == '\n')
-		cdev->firmware[count - 1] = '\0';
-	else
-		cdev->firmware[count] = '\0';
-unlock:
-	mutex_unlock(&cdev->cosm_mutex);
-	return count;
-}
-static DEVICE_ATTR_RW(firmware);
-
-static ssize_t
-ramdisk_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct cosm_device *cdev = dev_get_drvdata(dev);
-	char *ramdisk;
-
-	if (!cdev)
-		return -EINVAL;
-
-	ramdisk = cdev->ramdisk;
-
-	if (ramdisk)
-		return scnprintf(buf, PAGE_SIZE, "%s\n", ramdisk);
-	return 0;
-}
-
-static ssize_t
-ramdisk_store(struct device *dev, struct device_attribute *attr,
-	      const char *buf, size_t count)
-{
-	struct cosm_device *cdev = dev_get_drvdata(dev);
-
-	if (!cdev)
-		return -EINVAL;
-
-	mutex_lock(&cdev->cosm_mutex);
-	kfree(cdev->ramdisk);
-
-	cdev->ramdisk = kmalloc(count + 1, GFP_KERNEL);
-	if (!cdev->ramdisk) {
-		count = -ENOMEM;
-		goto unlock;
-	}
-
-	strncpy(cdev->ramdisk, buf, count);
-
-	if (cdev->ramdisk[count - 1] == '\n')
-		cdev->ramdisk[count - 1] = '\0';
-	else
-		cdev->ramdisk[count] = '\0';
-unlock:
-	mutex_unlock(&cdev->cosm_mutex);
-	return count;
-}
-static DEVICE_ATTR_RW(ramdisk);
-
-static ssize_t
-bootmode_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct cosm_device *cdev = dev_get_drvdata(dev);
-	char *bootmode;
-
-	if (!cdev)
-		return -EINVAL;
-
-	bootmode = cdev->bootmode;
-
-	if (bootmode)
-		return scnprintf(buf, PAGE_SIZE, "%s\n", bootmode);
-	return 0;
-}
-
-static ssize_t
-bootmode_store(struct device *dev, struct device_attribute *attr,
-	       const char *buf, size_t count)
-{
-	struct cosm_device *cdev = dev_get_drvdata(dev);
-
-	if (!cdev)
-		return -EINVAL;
-
-	if (!sysfs_streq(buf, "linux") && !sysfs_streq(buf, "flash"))
-		return -EINVAL;
-
-	mutex_lock(&cdev->cosm_mutex);
-	kfree(cdev->bootmode);
-
-	cdev->bootmode = kmalloc(count + 1, GFP_KERNEL);
-	if (!cdev->bootmode) {
-		count = -ENOMEM;
-		goto unlock;
-	}
-
-	strncpy(cdev->bootmode, buf, count);
-
-	if (cdev->bootmode[count - 1] == '\n')
-		cdev->bootmode[count - 1] = '\0';
-	else
-		cdev->bootmode[count] = '\0';
-unlock:
-	mutex_unlock(&cdev->cosm_mutex);
-	return count;
-}
-static DEVICE_ATTR_RW(bootmode);
-
-static ssize_t
-log_buf_addr_show(struct device *dev, struct device_attribute *attr,
-		  char *buf)
-{
-	struct cosm_device *cdev = dev_get_drvdata(dev);
-
-	if (!cdev)
-		return -EINVAL;
-
-	return scnprintf(buf, PAGE_SIZE, "%p\n", cdev->log_buf_addr);
-}
-
-static ssize_t
-log_buf_addr_store(struct device *dev, struct device_attribute *attr,
-		   const char *buf, size_t count)
-{
-	struct cosm_device *cdev = dev_get_drvdata(dev);
-	int ret;
-	unsigned long addr;
-
-	if (!cdev)
-		return -EINVAL;
-
-	ret = kstrtoul(buf, 16, &addr);
-	if (ret)
-		goto exit;
-
-	cdev->log_buf_addr = (void *)addr;
-	ret = count;
-exit:
-	return ret;
-}
-static DEVICE_ATTR_RW(log_buf_addr);
-
-static ssize_t
-log_buf_len_show(struct device *dev, struct device_attribute *attr,
-		 char *buf)
-{
-	struct cosm_device *cdev = dev_get_drvdata(dev);
-
-	if (!cdev)
-		return -EINVAL;
-
-	return scnprintf(buf, PAGE_SIZE, "%p\n", cdev->log_buf_len);
-}
-
-static ssize_t
-log_buf_len_store(struct device *dev, struct device_attribute *attr,
-		  const char *buf, size_t count)
-{
-	struct cosm_device *cdev = dev_get_drvdata(dev);
-	int ret;
-	unsigned long addr;
-
-	if (!cdev)
-		return -EINVAL;
-
-	ret = kstrtoul(buf, 16, &addr);
-	if (ret)
-		goto exit;
-
-	cdev->log_buf_len = (int *)addr;
-	ret = count;
-exit:
-	return ret;
-}
-static DEVICE_ATTR_RW(log_buf_len);
-
-static struct attribute *cosm_default_attrs[] = {
-	&dev_attr_family.attr,
-	&dev_attr_stepping.attr,
-	&dev_attr_state.attr,
-	&dev_attr_shutdown_status.attr,
-	&dev_attr_heartbeat_enable.attr,
-	&dev_attr_cmdline.attr,
-	&dev_attr_firmware.attr,
-	&dev_attr_ramdisk.attr,
-	&dev_attr_bootmode.attr,
-	&dev_attr_log_buf_addr.attr,
-	&dev_attr_log_buf_len.attr,
-
-	NULL
-};
-
-ATTRIBUTE_GROUPS(cosm_default);
-
-void cosm_sysfs_init(struct cosm_device *cdev)
-{
-	cdev->attr_group = cosm_default_groups;
-}
diff --git a/drivers/misc/mic/cosm_client/Makefile b/drivers/misc/mic/cosm_client/Makefile
deleted file mode 100644
index 5b62270bc2ab..000000000000
--- a/drivers/misc/mic/cosm_client/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Makefile - Intel MIC COSM Client Driver
-# Copyright(c) 2015, Intel Corporation.
-#
-obj-$(CONFIG_MIC_COSM) += cosm_client.o
-
-cosm_client-objs += cosm_scif_client.o
diff --git a/drivers/misc/mic/cosm_client/cosm_scif_client.c b/drivers/misc/mic/cosm_client/cosm_scif_client.c
deleted file mode 100644
index a03213dd9319..000000000000
--- a/drivers/misc/mic/cosm_client/cosm_scif_client.c
+++ /dev/null
@@ -1,269 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Intel MIC COSM Client Driver
- */
-#include <linux/module.h>
-#include <linux/delay.h>
-#include <linux/reboot.h>
-#include <linux/kthread.h>
-#include <linux/sched/signal.h>
-
-#include "../cosm/cosm_main.h"
-
-#define COSM_SCIF_MAX_RETRIES 10
-#define COSM_HEARTBEAT_SEND_MSEC (COSM_HEARTBEAT_SEND_SEC * MSEC_PER_SEC)
-
-static struct task_struct *client_thread;
-static scif_epd_t client_epd;
-static struct scif_peer_dev *client_spdev;
-
-/*
- * Reboot notifier: receives shutdown status from the OS and communicates it
- * back to the COSM process on the host
- */
-static int cosm_reboot_event(struct notifier_block *this, unsigned long event,
-			     void *ptr)
-{
-	struct cosm_msg msg = { .id = COSM_MSG_SHUTDOWN_STATUS };
-	int rc;
-
-	event = (event == SYS_RESTART) ? SYSTEM_RESTART : event;
-	dev_info(&client_spdev->dev, "%s %d received event %ld\n",
-		 __func__, __LINE__, event);
-
-	msg.shutdown_status = event;
-	rc = scif_send(client_epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
-	if (rc < 0)
-		dev_err(&client_spdev->dev, "%s %d scif_send rc %d\n",
-			__func__, __LINE__, rc);
-
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block cosm_reboot = {
-	.notifier_call  = cosm_reboot_event,
-};
-
-/* Set system time from timespec value received from the host */
-static void cosm_set_time(struct cosm_msg *msg)
-{
-	struct timespec64 ts = {
-		.tv_sec = msg->timespec.tv_sec,
-		.tv_nsec = msg->timespec.tv_nsec,
-	};
-	int rc = do_settimeofday64(&ts);
-
-	if (rc)
-		dev_err(&client_spdev->dev, "%s: %d settimeofday rc %d\n",
-			__func__, __LINE__, rc);
-}
-
-/* COSM client receive message processing */
-static void cosm_client_recv(void)
-{
-	struct cosm_msg msg;
-	int rc;
-
-	while (1) {
-		rc = scif_recv(client_epd, &msg, sizeof(msg), 0);
-		if (!rc) {
-			return;
-		} else if (rc < 0) {
-			dev_err(&client_spdev->dev, "%s: %d rc %d\n",
-				__func__, __LINE__, rc);
-			return;
-		}
-
-		dev_dbg(&client_spdev->dev, "%s: %d rc %d id 0x%llx\n",
-			__func__, __LINE__, rc, msg.id);
-
-		switch (msg.id) {
-		case COSM_MSG_SYNC_TIME:
-			cosm_set_time(&msg);
-			break;
-		case COSM_MSG_SHUTDOWN:
-			orderly_poweroff(true);
-			break;
-		default:
-			dev_err(&client_spdev->dev, "%s: %d unknown id %lld\n",
-				__func__, __LINE__, msg.id);
-			break;
-		}
-	}
-}
-
-/* Initiate connection to the COSM server on the host */
-static int cosm_scif_connect(void)
-{
-	struct scif_port_id port_id;
-	int i, rc;
-
-	client_epd = scif_open();
-	if (!client_epd) {
-		dev_err(&client_spdev->dev, "%s %d scif_open failed\n",
-			__func__, __LINE__);
-		return -ENOMEM;
-	}
-
-	port_id.node = 0;
-	port_id.port = SCIF_COSM_LISTEN_PORT;
-
-	for (i = 0; i < COSM_SCIF_MAX_RETRIES; i++) {
-		rc = scif_connect(client_epd, &port_id);
-		if (rc < 0)
-			msleep(1000);
-		else
-			break;
-	}
-
-	if (rc < 0) {
-		dev_err(&client_spdev->dev, "%s %d scif_connect rc %d\n",
-			__func__, __LINE__, rc);
-		scif_close(client_epd);
-		client_epd = NULL;
-	}
-	return rc < 0 ? rc : 0;
-}
-
-/* Close host SCIF connection */
-static void cosm_scif_connect_exit(void)
-{
-	if (client_epd) {
-		scif_close(client_epd);
-		client_epd = NULL;
-	}
-}
-
-/*
- * COSM SCIF client thread function: waits for messages from the host and sends
- * a heartbeat to the host
- */
-static int cosm_scif_client(void *unused)
-{
-	struct cosm_msg msg = { .id = COSM_MSG_HEARTBEAT };
-	struct scif_pollepd pollepd;
-	int rc;
-
-	allow_signal(SIGKILL);
-
-	while (!kthread_should_stop()) {
-		pollepd.epd = client_epd;
-		pollepd.events = EPOLLIN;
-
-		rc = scif_poll(&pollepd, 1, COSM_HEARTBEAT_SEND_MSEC);
-		if (rc < 0) {
-			if (-EINTR != rc)
-				dev_err(&client_spdev->dev,
-					"%s %d scif_poll rc %d\n",
-					__func__, __LINE__, rc);
-			continue;
-		}
-
-		if (pollepd.revents & EPOLLIN)
-			cosm_client_recv();
-
-		msg.id = COSM_MSG_HEARTBEAT;
-		rc = scif_send(client_epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
-		if (rc < 0)
-			dev_err(&client_spdev->dev, "%s %d scif_send rc %d\n",
-				__func__, __LINE__, rc);
-	}
-
-	dev_dbg(&client_spdev->dev, "%s %d Client thread stopped\n",
-		__func__, __LINE__);
-	return 0;
-}
-
-static void cosm_scif_probe(struct scif_peer_dev *spdev)
-{
-	int rc;
-
-	dev_dbg(&spdev->dev, "%s %d: dnode %d\n",
-		__func__, __LINE__, spdev->dnode);
-
-	/* We are only interested in the host with spdev->dnode == 0 */
-	if (spdev->dnode)
-		return;
-
-	client_spdev = spdev;
-	rc = cosm_scif_connect();
-	if (rc)
-		goto exit;
-
-	rc = register_reboot_notifier(&cosm_reboot);
-	if (rc) {
-		dev_err(&spdev->dev,
-			"reboot notifier registration failed rc %d\n", rc);
-		goto connect_exit;
-	}
-
-	client_thread = kthread_run(cosm_scif_client, NULL, "cosm_client");
-	if (IS_ERR(client_thread)) {
-		rc = PTR_ERR(client_thread);
-		dev_err(&spdev->dev, "%s %d kthread_run rc %d\n",
-			__func__, __LINE__, rc);
-		goto unreg_reboot;
-	}
-	return;
-unreg_reboot:
-	unregister_reboot_notifier(&cosm_reboot);
-connect_exit:
-	cosm_scif_connect_exit();
-exit:
-	client_spdev = NULL;
-}
-
-static void cosm_scif_remove(struct scif_peer_dev *spdev)
-{
-	int rc;
-
-	dev_dbg(&spdev->dev, "%s %d: dnode %d\n",
-		__func__, __LINE__, spdev->dnode);
-
-	if (spdev->dnode)
-		return;
-
-	if (!IS_ERR_OR_NULL(client_thread)) {
-		rc = send_sig(SIGKILL, client_thread, 0);
-		if (rc) {
-			pr_err("%s %d send_sig rc %d\n",
-			       __func__, __LINE__, rc);
-			return;
-		}
-		kthread_stop(client_thread);
-	}
-	unregister_reboot_notifier(&cosm_reboot);
-	cosm_scif_connect_exit();
-	client_spdev = NULL;
-}
-
-static struct scif_client scif_client_cosm = {
-	.name = KBUILD_MODNAME,
-	.probe = cosm_scif_probe,
-	.remove = cosm_scif_remove,
-};
-
-static int __init cosm_client_init(void)
-{
-	int rc = scif_client_register(&scif_client_cosm);
-
-	if (rc)
-		pr_err("scif_client_register failed rc %d\n", rc);
-	return rc;
-}
-
-static void __exit cosm_client_exit(void)
-{
-	scif_client_unregister(&scif_client_cosm);
-}
-
-module_init(cosm_client_init);
-module_exit(cosm_client_exit);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_DESCRIPTION("Intel(R) MIC card OS state management client driver");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/host/Makefile b/drivers/misc/mic/host/Makefile
deleted file mode 100644
index 25f153367980..000000000000
--- a/drivers/misc/mic/host/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile - Intel MIC Linux driver.
-# Copyright(c) 2013, Intel Corporation.
-#
-obj-$(CONFIG_INTEL_MIC_HOST) += mic_host.o
-mic_host-objs := mic_main.o
-mic_host-objs += mic_x100.o
-mic_host-objs += mic_smpt.o
-mic_host-objs += mic_intr.o
-mic_host-objs += mic_boot.o
-mic_host-objs += mic_debugfs.o
diff --git a/drivers/misc/mic/host/mic_boot.c b/drivers/misc/mic/host/mic_boot.c
deleted file mode 100644
index 8cb85b8b3e19..000000000000
--- a/drivers/misc/mic/host/mic_boot.c
+++ /dev/null
@@ -1,588 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * Intel MIC Host driver.
- */
-#include <linux/delay.h>
-#include <linux/firmware.h>
-#include <linux/pci.h>
-#include <linux/kmod.h>
-#include <linux/dma-map-ops.h>
-#include <linux/mic_common.h>
-#include <linux/mic_bus.h>
-#include "../bus/scif_bus.h"
-#include "../bus/vop_bus.h"
-#include "../common/mic_dev.h"
-#include "mic_device.h"
-#include "mic_smpt.h"
-
-static inline struct mic_device *vpdev_to_mdev(struct device *dev)
-{
-	return dev_get_drvdata(dev->parent);
-}
-
-static dma_addr_t
-_mic_dma_map_page(struct device *dev, struct page *page,
-		  unsigned long offset, size_t size,
-		  enum dma_data_direction dir, unsigned long attrs)
-{
-	void *va = phys_to_virt(page_to_phys(page)) + offset;
-	struct mic_device *mdev = vpdev_to_mdev(dev);
-
-	return mic_map_single(mdev, va, size);
-}
-
-static void _mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
-				size_t size, enum dma_data_direction dir,
-				unsigned long attrs)
-{
-	struct mic_device *mdev = vpdev_to_mdev(dev);
-
-	mic_unmap_single(mdev, dma_addr, size);
-}
-
-static const struct dma_map_ops _mic_dma_ops = {
-	.map_page = _mic_dma_map_page,
-	.unmap_page = _mic_dma_unmap_page,
-};
-
-static struct mic_irq *
-__mic_request_irq(struct vop_device *vpdev,
-		  irqreturn_t (*func)(int irq, void *data),
-		  const char *name, void *data, int intr_src)
-{
-	struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev);
-
-	return mic_request_threaded_irq(mdev, func, NULL, name, data,
-					intr_src, MIC_INTR_DB);
-}
-
-static void __mic_free_irq(struct vop_device *vpdev,
-			   struct mic_irq *cookie, void *data)
-{
-	struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev);
-
-	mic_free_irq(mdev, cookie, data);
-}
-
-static void __mic_ack_interrupt(struct vop_device *vpdev, int num)
-{
-	struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev);
-
-	mdev->ops->intr_workarounds(mdev);
-}
-
-static int __mic_next_db(struct vop_device *vpdev)
-{
-	struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev);
-
-	return mic_next_db(mdev);
-}
-
-static void *__mic_get_dp(struct vop_device *vpdev)
-{
-	struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev);
-
-	return mdev->dp;
-}
-
-static void __iomem *__mic_get_remote_dp(struct vop_device *vpdev)
-{
-	return NULL;
-}
-
-static void __mic_send_intr(struct vop_device *vpdev, int db)
-{
-	struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev);
-
-	mdev->ops->send_intr(mdev, db);
-}
-
-static void __iomem *__mic_ioremap(struct vop_device *vpdev,
-				   dma_addr_t pa, size_t len)
-{
-	struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev);
-
-	return mdev->aper.va + pa;
-}
-
-static void __mic_iounmap(struct vop_device *vpdev, void __iomem *va)
-{
-	/* nothing to do */
-}
-
-static struct vop_hw_ops vop_hw_ops = {
-	.request_irq = __mic_request_irq,
-	.free_irq = __mic_free_irq,
-	.ack_interrupt = __mic_ack_interrupt,
-	.next_db = __mic_next_db,
-	.get_dp = __mic_get_dp,
-	.get_remote_dp = __mic_get_remote_dp,
-	.send_intr = __mic_send_intr,
-	.remap = __mic_ioremap,
-	.unmap = __mic_iounmap,
-};
-
-static inline struct mic_device *scdev_to_mdev(struct scif_hw_dev *scdev)
-{
-	return dev_get_drvdata(scdev->dev.parent);
-}
-
-static void *__mic_dma_alloc(struct device *dev, size_t size,
-			     dma_addr_t *dma_handle, gfp_t gfp,
-			     unsigned long attrs)
-{
-	struct scif_hw_dev *scdev = dev_get_drvdata(dev);
-	struct mic_device *mdev = scdev_to_mdev(scdev);
-	dma_addr_t tmp;
-	void *va = kzalloc(size, gfp);
-
-	if (va) {
-		tmp = mic_map_single(mdev, va, size);
-		if (dma_mapping_error(dev, tmp)) {
-			kfree(va);
-			va = NULL;
-		} else {
-			*dma_handle = tmp;
-		}
-	}
-	return va;
-}
-
-static void __mic_dma_free(struct device *dev, size_t size, void *vaddr,
-			   dma_addr_t dma_handle, unsigned long attrs)
-{
-	struct scif_hw_dev *scdev = dev_get_drvdata(dev);
-	struct mic_device *mdev = scdev_to_mdev(scdev);
-
-	mic_unmap_single(mdev, dma_handle, size);
-	kfree(vaddr);
-}
-
-static dma_addr_t
-__mic_dma_map_page(struct device *dev, struct page *page, unsigned long offset,
-		   size_t size, enum dma_data_direction dir,
-		   unsigned long attrs)
-{
-	void *va = phys_to_virt(page_to_phys(page)) + offset;
-	struct scif_hw_dev *scdev = dev_get_drvdata(dev);
-	struct mic_device *mdev = scdev_to_mdev(scdev);
-
-	return mic_map_single(mdev, va, size);
-}
-
-static void
-__mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
-		     size_t size, enum dma_data_direction dir,
-		     unsigned long attrs)
-{
-	struct scif_hw_dev *scdev = dev_get_drvdata(dev);
-	struct mic_device *mdev = scdev_to_mdev(scdev);
-
-	mic_unmap_single(mdev, dma_addr, size);
-}
-
-static int __mic_dma_map_sg(struct device *dev, struct scatterlist *sg,
-			    int nents, enum dma_data_direction dir,
-			    unsigned long attrs)
-{
-	struct scif_hw_dev *scdev = dev_get_drvdata(dev);
-	struct mic_device *mdev = scdev_to_mdev(scdev);
-	struct scatterlist *s;
-	int i, j, ret;
-	dma_addr_t da;
-
-	ret = dma_map_sg(&mdev->pdev->dev, sg, nents, dir);
-	if (ret <= 0)
-		return 0;
-
-	for_each_sg(sg, s, nents, i) {
-		da = mic_map(mdev, sg_dma_address(s) + s->offset, s->length);
-		if (!da)
-			goto err;
-		sg_dma_address(s) = da;
-	}
-	return nents;
-err:
-	for_each_sg(sg, s, i, j) {
-		mic_unmap(mdev, sg_dma_address(s), s->length);
-		sg_dma_address(s) = mic_to_dma_addr(mdev, sg_dma_address(s));
-	}
-	dma_unmap_sg(&mdev->pdev->dev, sg, nents, dir);
-	return 0;
-}
-
-static void __mic_dma_unmap_sg(struct device *dev,
-			       struct scatterlist *sg, int nents,
-			       enum dma_data_direction dir,
-			       unsigned long attrs)
-{
-	struct scif_hw_dev *scdev = dev_get_drvdata(dev);
-	struct mic_device *mdev = scdev_to_mdev(scdev);
-	struct scatterlist *s;
-	dma_addr_t da;
-	int i;
-
-	for_each_sg(sg, s, nents, i) {
-		da = mic_to_dma_addr(mdev, sg_dma_address(s));
-		mic_unmap(mdev, sg_dma_address(s), s->length);
-		sg_dma_address(s) = da;
-	}
-	dma_unmap_sg(&mdev->pdev->dev, sg, nents, dir);
-}
-
-static const struct dma_map_ops __mic_dma_ops = {
-	.alloc = __mic_dma_alloc,
-	.free = __mic_dma_free,
-	.map_page = __mic_dma_map_page,
-	.unmap_page = __mic_dma_unmap_page,
-	.map_sg = __mic_dma_map_sg,
-	.unmap_sg = __mic_dma_unmap_sg,
-};
-
-static struct mic_irq *
-___mic_request_irq(struct scif_hw_dev *scdev,
-		   irqreturn_t (*func)(int irq, void *data),
-				       const char *name,
-				       void *data, int db)
-{
-	struct mic_device *mdev = scdev_to_mdev(scdev);
-
-	return mic_request_threaded_irq(mdev, func, NULL, name, data,
-					db, MIC_INTR_DB);
-}
-
-static void
-___mic_free_irq(struct scif_hw_dev *scdev,
-		struct mic_irq *cookie, void *data)
-{
-	struct mic_device *mdev = scdev_to_mdev(scdev);
-
-	mic_free_irq(mdev, cookie, data);
-}
-
-static void ___mic_ack_interrupt(struct scif_hw_dev *scdev, int num)
-{
-	struct mic_device *mdev = scdev_to_mdev(scdev);
-
-	mdev->ops->intr_workarounds(mdev);
-}
-
-static int ___mic_next_db(struct scif_hw_dev *scdev)
-{
-	struct mic_device *mdev = scdev_to_mdev(scdev);
-
-	return mic_next_db(mdev);
-}
-
-static void ___mic_send_intr(struct scif_hw_dev *scdev, int db)
-{
-	struct mic_device *mdev = scdev_to_mdev(scdev);
-
-	mdev->ops->send_intr(mdev, db);
-}
-
-static void __iomem *___mic_ioremap(struct scif_hw_dev *scdev,
-				    phys_addr_t pa, size_t len)
-{
-	struct mic_device *mdev = scdev_to_mdev(scdev);
-
-	return mdev->aper.va + pa;
-}
-
-static void ___mic_iounmap(struct scif_hw_dev *scdev, void __iomem *va)
-{
-	/* nothing to do */
-}
-
-static struct scif_hw_ops scif_hw_ops = {
-	.request_irq = ___mic_request_irq,
-	.free_irq = ___mic_free_irq,
-	.ack_interrupt = ___mic_ack_interrupt,
-	.next_db = ___mic_next_db,
-	.send_intr = ___mic_send_intr,
-	.remap = ___mic_ioremap,
-	.unmap = ___mic_iounmap,
-};
-
-static inline struct mic_device *mbdev_to_mdev(struct mbus_device *mbdev)
-{
-	return dev_get_drvdata(mbdev->dev.parent);
-}
-
-static dma_addr_t
-mic_dma_map_page(struct device *dev, struct page *page,
-		 unsigned long offset, size_t size, enum dma_data_direction dir,
-		 unsigned long attrs)
-{
-	void *va = phys_to_virt(page_to_phys(page)) + offset;
-	struct mic_device *mdev = dev_get_drvdata(dev->parent);
-
-	return mic_map_single(mdev, va, size);
-}
-
-static void
-mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
-		   size_t size, enum dma_data_direction dir,
-		   unsigned long attrs)
-{
-	struct mic_device *mdev = dev_get_drvdata(dev->parent);
-	mic_unmap_single(mdev, dma_addr, size);
-}
-
-static const struct dma_map_ops mic_dma_ops = {
-	.map_page = mic_dma_map_page,
-	.unmap_page = mic_dma_unmap_page,
-};
-
-static struct mic_irq *
-_mic_request_threaded_irq(struct mbus_device *mbdev,
-			  irq_handler_t handler, irq_handler_t thread_fn,
-			  const char *name, void *data, int intr_src)
-{
-	return mic_request_threaded_irq(mbdev_to_mdev(mbdev), handler,
-					thread_fn, name, data,
-					intr_src, MIC_INTR_DMA);
-}
-
-static void _mic_free_irq(struct mbus_device *mbdev,
-			  struct mic_irq *cookie, void *data)
-{
-	mic_free_irq(mbdev_to_mdev(mbdev), cookie, data);
-}
-
-static void _mic_ack_interrupt(struct mbus_device *mbdev, int num)
-{
-	struct mic_device *mdev = mbdev_to_mdev(mbdev);
-	mdev->ops->intr_workarounds(mdev);
-}
-
-static struct mbus_hw_ops mbus_hw_ops = {
-	.request_threaded_irq = _mic_request_threaded_irq,
-	.free_irq = _mic_free_irq,
-	.ack_interrupt = _mic_ack_interrupt,
-};
-
-/* Initialize the MIC bootparams */
-void mic_bootparam_init(struct mic_device *mdev)
-{
-	struct mic_bootparam *bootparam = mdev->dp;
-
-	bootparam->magic = cpu_to_le32(MIC_MAGIC);
-	bootparam->h2c_config_db = -1;
-	bootparam->node_id = mdev->id + 1;
-	bootparam->scif_host_dma_addr = 0x0;
-	bootparam->scif_card_dma_addr = 0x0;
-	bootparam->c2h_scif_db = -1;
-	bootparam->h2c_scif_db = -1;
-}
-
-static inline struct mic_device *cosmdev_to_mdev(struct cosm_device *cdev)
-{
-	return dev_get_drvdata(cdev->dev.parent);
-}
-
-static void _mic_reset(struct cosm_device *cdev)
-{
-	struct mic_device *mdev = cosmdev_to_mdev(cdev);
-
-	mdev->ops->reset_fw_ready(mdev);
-	mdev->ops->reset(mdev);
-}
-
-static bool _mic_ready(struct cosm_device *cdev)
-{
-	struct mic_device *mdev = cosmdev_to_mdev(cdev);
-
-	return mdev->ops->is_fw_ready(mdev);
-}
-
-/**
- * mic_request_dma_chans - Request DMA channels
- * @mdev: pointer to mic_device instance
- *
- * returns number of DMA channels acquired
- */
-static int mic_request_dma_chans(struct mic_device *mdev)
-{
-	dma_cap_mask_t mask;
-	struct dma_chan *chan;
-
-	dma_cap_zero(mask);
-	dma_cap_set(DMA_MEMCPY, mask);
-
-	do {
-		chan = dma_request_channel(mask, mdev->ops->dma_filter,
-					   &mdev->pdev->dev);
-		if (chan) {
-			mdev->dma_ch[mdev->num_dma_ch++] = chan;
-			if (mdev->num_dma_ch >= MIC_MAX_DMA_CHAN)
-				break;
-		}
-	} while (chan);
-	dev_info(&mdev->pdev->dev, "DMA channels # %d\n", mdev->num_dma_ch);
-	return mdev->num_dma_ch;
-}
-
-/**
- * mic_free_dma_chans - release DMA channels
- * @mdev: pointer to mic_device instance
- *
- * returns none
- */
-static void mic_free_dma_chans(struct mic_device *mdev)
-{
-	int i = 0;
-
-	for (i = 0; i < mdev->num_dma_ch; i++) {
-		dma_release_channel(mdev->dma_ch[i]);
-		mdev->dma_ch[i] = NULL;
-	}
-	mdev->num_dma_ch = 0;
-}
-
-/**
- * _mic_start - Start the MIC.
- * @cdev: pointer to cosm_device instance
- * @id: MIC device id/index provided by COSM used in other drivers like SCIF
- *
- * This function prepares an MIC for boot and initiates boot.
- * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
- *
- * For all cosm_hw_ops the caller holds a mutex to ensure serialization.
- */
-static int _mic_start(struct cosm_device *cdev, int id)
-{
-	struct mic_device *mdev = cosmdev_to_mdev(cdev);
-	int rc;
-
-	mic_bootparam_init(mdev);
-	mdev->dma_mbdev = mbus_register_device(&mdev->pdev->dev,
-					       MBUS_DEV_DMA_HOST, &mic_dma_ops,
-					       &mbus_hw_ops, id, mdev->mmio.va);
-	if (IS_ERR(mdev->dma_mbdev)) {
-		rc = PTR_ERR(mdev->dma_mbdev);
-		goto unlock_ret;
-	}
-	if (!mic_request_dma_chans(mdev)) {
-		rc = -ENODEV;
-		goto dma_remove;
-	}
-	mdev->scdev = scif_register_device(&mdev->pdev->dev, MIC_SCIF_DEV,
-					   &__mic_dma_ops, &scif_hw_ops,
-					   id + 1, 0, &mdev->mmio,
-					   &mdev->aper, mdev->dp, NULL,
-					   mdev->dma_ch, mdev->num_dma_ch,
-					   true);
-	if (IS_ERR(mdev->scdev)) {
-		rc = PTR_ERR(mdev->scdev);
-		goto dma_free;
-	}
-
-	mdev->vpdev = vop_register_device(&mdev->pdev->dev,
-					  VOP_DEV_TRNSP, &_mic_dma_ops,
-					  &vop_hw_ops, id + 1, &mdev->aper,
-					  mdev->dma_ch[0]);
-	if (IS_ERR(mdev->vpdev)) {
-		rc = PTR_ERR(mdev->vpdev);
-		goto scif_remove;
-	}
-
-	rc = mdev->ops->load_mic_fw(mdev, NULL);
-	if (rc)
-		goto vop_remove;
-	mic_smpt_restore(mdev);
-	mic_intr_restore(mdev);
-	mdev->intr_ops->enable_interrupts(mdev);
-	mdev->ops->write_spad(mdev, MIC_DPLO_SPAD, mdev->dp_dma_addr);
-	mdev->ops->write_spad(mdev, MIC_DPHI_SPAD, mdev->dp_dma_addr >> 32);
-	mdev->ops->send_firmware_intr(mdev);
-	goto unlock_ret;
-vop_remove:
-	vop_unregister_device(mdev->vpdev);
-scif_remove:
-	scif_unregister_device(mdev->scdev);
-dma_free:
-	mic_free_dma_chans(mdev);
-dma_remove:
-	mbus_unregister_device(mdev->dma_mbdev);
-unlock_ret:
-	return rc;
-}
-
-/**
- * _mic_stop - Prepare the MIC for reset and trigger reset.
- * @cdev: pointer to cosm_device instance
- * @force: force a MIC to reset even if it is already offline.
- *
- * RETURNS: None.
- */
-static void _mic_stop(struct cosm_device *cdev, bool force)
-{
-	struct mic_device *mdev = cosmdev_to_mdev(cdev);
-
-	/*
-	 * Since SCIF handles card shutdown and reset (using COSM), it will
-	 * will be the first to be registered and the last to be
-	 * unregistered.
-	 */
-	vop_unregister_device(mdev->vpdev);
-	scif_unregister_device(mdev->scdev);
-	mic_free_dma_chans(mdev);
-	mbus_unregister_device(mdev->dma_mbdev);
-	mic_bootparam_init(mdev);
-}
-
-static ssize_t _mic_family(struct cosm_device *cdev, char *buf)
-{
-	struct mic_device *mdev = cosmdev_to_mdev(cdev);
-	static const char *family[MIC_FAMILY_LAST] = { "x100", "Unknown" };
-
-	return scnprintf(buf, PAGE_SIZE, "%s\n", family[mdev->family]);
-}
-
-static ssize_t _mic_stepping(struct cosm_device *cdev, char *buf)
-{
-	struct mic_device *mdev = cosmdev_to_mdev(cdev);
-	const char *string = "??";
-
-	switch (mdev->stepping) {
-	case MIC_A0_STEP:
-		string = "A0";
-		break;
-	case MIC_B0_STEP:
-		string = "B0";
-		break;
-	case MIC_B1_STEP:
-		string = "B1";
-		break;
-	case MIC_C0_STEP:
-		string = "C0";
-		break;
-	default:
-		break;
-	}
-	return scnprintf(buf, PAGE_SIZE, "%s\n", string);
-}
-
-static struct mic_mw *_mic_aper(struct cosm_device *cdev)
-{
-	struct mic_device *mdev = cosmdev_to_mdev(cdev);
-
-	return &mdev->aper;
-}
-
-struct cosm_hw_ops cosm_hw_ops = {
-	.reset = _mic_reset,
-	.force_reset = _mic_reset,
-	.post_reset = NULL,
-	.ready = _mic_ready,
-	.start = _mic_start,
-	.stop = _mic_stop,
-	.family = _mic_family,
-	.stepping = _mic_stepping,
-	.aper = _mic_aper,
-};
diff --git a/drivers/misc/mic/host/mic_debugfs.c b/drivers/misc/mic/host/mic_debugfs.c
deleted file mode 100644
index ffda740e20d5..000000000000
--- a/drivers/misc/mic/host/mic_debugfs.c
+++ /dev/null
@@ -1,149 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * Intel MIC Host driver.
- */
-#include <linux/debugfs.h>
-#include <linux/pci.h>
-#include <linux/seq_file.h>
-
-#include <linux/mic_common.h>
-#include "../common/mic_dev.h"
-#include "mic_device.h"
-#include "mic_smpt.h"
-
-/* Debugfs parent dir */
-static struct dentry *mic_dbg;
-
-static int mic_smpt_show(struct seq_file *s, void *pos)
-{
-	int i;
-	struct mic_device *mdev = s->private;
-	unsigned long flags;
-
-	seq_printf(s, "MIC %-2d |%-10s| %-14s %-10s\n",
-		   mdev->id, "SMPT entry", "SW DMA addr", "RefCount");
-	seq_puts(s, "====================================================\n");
-
-	if (mdev->smpt) {
-		struct mic_smpt_info *smpt_info = mdev->smpt;
-		spin_lock_irqsave(&smpt_info->smpt_lock, flags);
-		for (i = 0; i < smpt_info->info.num_reg; i++) {
-			seq_printf(s, "%9s|%-10d| %-#14llx %-10lld\n",
-				   " ",  i, smpt_info->entry[i].dma_addr,
-				   smpt_info->entry[i].ref_count);
-		}
-		spin_unlock_irqrestore(&smpt_info->smpt_lock, flags);
-	}
-	seq_puts(s, "====================================================\n");
-	return 0;
-}
-
-DEFINE_SHOW_ATTRIBUTE(mic_smpt);
-
-static int mic_post_code_show(struct seq_file *s, void *pos)
-{
-	struct mic_device *mdev = s->private;
-	u32 reg = mdev->ops->get_postcode(mdev);
-
-	seq_printf(s, "%c%c", reg & 0xff, (reg >> 8) & 0xff);
-	return 0;
-}
-
-DEFINE_SHOW_ATTRIBUTE(mic_post_code);
-
-static int mic_msi_irq_info_show(struct seq_file *s, void *pos)
-{
-	struct mic_device *mdev  = s->private;
-	int reg;
-	int i, j;
-	u16 entry;
-	u16 vector;
-	struct pci_dev *pdev = mdev->pdev;
-
-	if (pci_dev_msi_enabled(pdev)) {
-		for (i = 0; i < mdev->irq_info.num_vectors; i++) {
-			if (pdev->msix_enabled) {
-				entry = mdev->irq_info.msix_entries[i].entry;
-				vector = mdev->irq_info.msix_entries[i].vector;
-			} else {
-				entry = 0;
-				vector = pdev->irq;
-			}
-
-			reg = mdev->intr_ops->read_msi_to_src_map(mdev, entry);
-
-			seq_printf(s, "%s %-10d %s %-10d MXAR[%d]: %08X\n",
-				   "IRQ:", vector, "Entry:", entry, i, reg);
-
-			seq_printf(s, "%-10s", "offset:");
-			for (j = (MIC_NUM_OFFSETS - 1); j >= 0; j--)
-				seq_printf(s, "%4d ", j);
-			seq_puts(s, "\n");
-
-
-			seq_printf(s, "%-10s", "count:");
-			for (j = (MIC_NUM_OFFSETS - 1); j >= 0; j--)
-				seq_printf(s, "%4d ",
-					   (mdev->irq_info.mic_msi_map[i] &
-					   BIT(j)) ? 1 : 0);
-			seq_puts(s, "\n\n");
-		}
-	} else {
-		seq_puts(s, "MSI/MSIx interrupts not enabled\n");
-	}
-
-	return 0;
-}
-
-DEFINE_SHOW_ATTRIBUTE(mic_msi_irq_info);
-
-/*
- * mic_create_debug_dir - Initialize MIC debugfs entries.
- */
-void mic_create_debug_dir(struct mic_device *mdev)
-{
-	char name[16];
-
-	if (!mic_dbg)
-		return;
-
-	scnprintf(name, sizeof(name), "mic%d", mdev->id);
-	mdev->dbg_dir = debugfs_create_dir(name, mic_dbg);
-
-	debugfs_create_file("smpt", 0444, mdev->dbg_dir, mdev,
-			    &mic_smpt_fops);
-
-	debugfs_create_file("post_code", 0444, mdev->dbg_dir, mdev,
-			    &mic_post_code_fops);
-
-	debugfs_create_file("msi_irq_info", 0444, mdev->dbg_dir, mdev,
-			    &mic_msi_irq_info_fops);
-}
-
-/*
- * mic_delete_debug_dir - Uninitialize MIC debugfs entries.
- */
-void mic_delete_debug_dir(struct mic_device *mdev)
-{
-	debugfs_remove_recursive(mdev->dbg_dir);
-}
-
-/*
- * mic_init_debugfs - Initialize global debugfs entry.
- */
-void __init mic_init_debugfs(void)
-{
-	mic_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL);
-}
-
-/*
- * mic_exit_debugfs - Uninitialize global debugfs entry
- */
-void mic_exit_debugfs(void)
-{
-	debugfs_remove(mic_dbg);
-}
diff --git a/drivers/misc/mic/host/mic_device.h b/drivers/misc/mic/host/mic_device.h
deleted file mode 100644
index 41bcd308ae59..000000000000
--- a/drivers/misc/mic/host/mic_device.h
+++ /dev/null
@@ -1,157 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * Intel MIC Host driver.
- */
-#ifndef _MIC_DEVICE_H_
-#define _MIC_DEVICE_H_
-
-#include <linux/cdev.h>
-#include <linux/idr.h>
-#include <linux/notifier.h>
-#include <linux/irqreturn.h>
-#include <linux/dmaengine.h>
-#include <linux/miscdevice.h>
-#include <linux/mic_bus.h>
-#include "../bus/scif_bus.h"
-#include "../bus/vop_bus.h"
-#include "../bus/cosm_bus.h"
-#include "mic_intr.h"
-
-/**
- * enum mic_stepping - MIC stepping ids.
- */
-enum mic_stepping {
-	MIC_A0_STEP = 0x0,
-	MIC_B0_STEP = 0x10,
-	MIC_B1_STEP = 0x11,
-	MIC_C0_STEP = 0x20,
-};
-
-extern struct cosm_hw_ops cosm_hw_ops;
-
-/**
- * struct mic_device -  MIC device information for each card.
- *
- * @mmio: MMIO bar information.
- * @aper: Aperture bar information.
- * @family: The MIC family to which this device belongs.
- * @ops: MIC HW specific operations.
- * @id: The unique device id for this MIC device.
- * @stepping: Stepping ID.
- * @pdev: Underlying PCI device.
- * @mic_mutex: Mutex for synchronizing access to mic_device.
- * @intr_ops: HW specific interrupt operations.
- * @smpt_ops: Hardware specific SMPT operations.
- * @smpt: MIC SMPT information.
- * @intr_info: H/W specific interrupt information.
- * @irq_info: The OS specific irq information
- * @dbg_dir: debugfs directory of this MIC device.
- * @bootaddr: MIC boot address.
- * @dp: virtio device page
- * @dp_dma_addr: virtio device page DMA address.
- * @dma_mbdev: MIC BUS DMA device.
- * @dma_ch - Array of DMA channels
- * @num_dma_ch - Number of DMA channels available
- * @scdev: SCIF device on the SCIF virtual bus.
- * @vpdev: Virtio over PCIe device on the VOP virtual bus.
- * @cosm_dev: COSM device
- */
-struct mic_device {
-	struct mic_mw mmio;
-	struct mic_mw aper;
-	enum mic_hw_family family;
-	struct mic_hw_ops *ops;
-	int id;
-	enum mic_stepping stepping;
-	struct pci_dev *pdev;
-	struct mutex mic_mutex;
-	struct mic_hw_intr_ops *intr_ops;
-	struct mic_smpt_ops *smpt_ops;
-	struct mic_smpt_info *smpt;
-	struct mic_intr_info *intr_info;
-	struct mic_irq_info irq_info;
-	struct dentry *dbg_dir;
-	u32 bootaddr;
-	void *dp;
-	dma_addr_t dp_dma_addr;
-	struct mbus_device *dma_mbdev;
-	struct dma_chan *dma_ch[MIC_MAX_DMA_CHAN];
-	int num_dma_ch;
-	struct scif_hw_dev *scdev;
-	struct vop_device *vpdev;
-	struct cosm_device *cosm_dev;
-};
-
-/**
- * struct mic_hw_ops - MIC HW specific operations.
- * @aper_bar: Aperture bar resource number.
- * @mmio_bar: MMIO bar resource number.
- * @read_spad: Read from scratch pad register.
- * @write_spad: Write to scratch pad register.
- * @send_intr: Send an interrupt for a particular doorbell on the card.
- * @ack_interrupt: Hardware specific operations to ack the h/w on
- * receipt of an interrupt.
- * @intr_workarounds: Hardware specific workarounds needed after
- * handling an interrupt.
- * @reset: Reset the remote processor.
- * @reset_fw_ready: Reset firmware ready field.
- * @is_fw_ready: Check if firmware is ready for OS download.
- * @send_firmware_intr: Send an interrupt to the card firmware.
- * @load_mic_fw: Load firmware segments required to boot the card
- * into card memory. This includes the kernel, command line, ramdisk etc.
- * @get_postcode: Get post code status from firmware.
- * @dma_filter: DMA filter function to be used.
- */
-struct mic_hw_ops {
-	u8 aper_bar;
-	u8 mmio_bar;
-	u32 (*read_spad)(struct mic_device *mdev, unsigned int idx);
-	void (*write_spad)(struct mic_device *mdev, unsigned int idx, u32 val);
-	void (*send_intr)(struct mic_device *mdev, int doorbell);
-	u32 (*ack_interrupt)(struct mic_device *mdev);
-	void (*intr_workarounds)(struct mic_device *mdev);
-	void (*reset)(struct mic_device *mdev);
-	void (*reset_fw_ready)(struct mic_device *mdev);
-	bool (*is_fw_ready)(struct mic_device *mdev);
-	void (*send_firmware_intr)(struct mic_device *mdev);
-	int (*load_mic_fw)(struct mic_device *mdev, const char *buf);
-	u32 (*get_postcode)(struct mic_device *mdev);
-	bool (*dma_filter)(struct dma_chan *chan, void *param);
-};
-
-/**
- * mic_mmio_read - read from an MMIO register.
- * @mw: MMIO register base virtual address.
- * @offset: register offset.
- *
- * RETURNS: register value.
- */
-static inline u32 mic_mmio_read(struct mic_mw *mw, u32 offset)
-{
-	return ioread32(mw->va + offset);
-}
-
-/**
- * mic_mmio_write - write to an MMIO register.
- * @mw: MMIO register base virtual address.
- * @val: the data value to put into the register
- * @offset: register offset.
- *
- * RETURNS: none.
- */
-static inline void
-mic_mmio_write(struct mic_mw *mw, u32 val, u32 offset)
-{
-	iowrite32(val, mw->va + offset);
-}
-
-void mic_bootparam_init(struct mic_device *mdev);
-void mic_create_debug_dir(struct mic_device *dev);
-void mic_delete_debug_dir(struct mic_device *dev);
-void __init mic_init_debugfs(void);
-void mic_exit_debugfs(void);
-#endif
diff --git a/drivers/misc/mic/host/mic_intr.c b/drivers/misc/mic/host/mic_intr.c
deleted file mode 100644
index 85b3221b5d40..000000000000
--- a/drivers/misc/mic/host/mic_intr.c
+++ /dev/null
@@ -1,635 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * Intel MIC Host driver.
- */
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-
-#include "../common/mic_dev.h"
-#include "mic_device.h"
-
-static irqreturn_t mic_thread_fn(int irq, void *dev)
-{
-	struct mic_device *mdev = dev;
-	struct mic_intr_info *intr_info = mdev->intr_info;
-	struct mic_irq_info *irq_info = &mdev->irq_info;
-	struct mic_intr_cb *intr_cb;
-	struct pci_dev *pdev = mdev->pdev;
-	int i;
-
-	spin_lock(&irq_info->mic_thread_lock);
-	for (i = intr_info->intr_start_idx[MIC_INTR_DB];
-			i < intr_info->intr_len[MIC_INTR_DB]; i++)
-		if (test_and_clear_bit(i, &irq_info->mask)) {
-			list_for_each_entry(intr_cb, &irq_info->cb_list[i],
-					    list)
-				if (intr_cb->thread_fn)
-					intr_cb->thread_fn(pdev->irq,
-							 intr_cb->data);
-		}
-	spin_unlock(&irq_info->mic_thread_lock);
-	return IRQ_HANDLED;
-}
-/**
- * mic_interrupt - Generic interrupt handler for
- * MSI and INTx based interrupts.
- * @irq:  interrupt to handle (unused)
- * @dev: pointer to the mic_device instance
- */
-static irqreturn_t mic_interrupt(int irq, void *dev)
-{
-	struct mic_device *mdev = dev;
-	struct mic_intr_info *intr_info = mdev->intr_info;
-	struct mic_irq_info *irq_info = &mdev->irq_info;
-	struct mic_intr_cb *intr_cb;
-	struct pci_dev *pdev = mdev->pdev;
-	u32 mask;
-	int i;
-
-	mask = mdev->ops->ack_interrupt(mdev);
-	if (!mask)
-		return IRQ_NONE;
-
-	spin_lock(&irq_info->mic_intr_lock);
-	for (i = intr_info->intr_start_idx[MIC_INTR_DB];
-			i < intr_info->intr_len[MIC_INTR_DB]; i++)
-		if (mask & BIT(i)) {
-			list_for_each_entry(intr_cb, &irq_info->cb_list[i],
-					    list)
-				if (intr_cb->handler)
-					intr_cb->handler(pdev->irq,
-							 intr_cb->data);
-			set_bit(i, &irq_info->mask);
-		}
-	spin_unlock(&irq_info->mic_intr_lock);
-	return IRQ_WAKE_THREAD;
-}
-
-/* Return the interrupt offset from the index. Index is 0 based. */
-static u16 mic_map_src_to_offset(struct mic_device *mdev,
-				 int intr_src, enum mic_intr_type type)
-{
-	if (type >= MIC_NUM_INTR_TYPES)
-		return MIC_NUM_OFFSETS;
-	if (intr_src >= mdev->intr_info->intr_len[type])
-		return MIC_NUM_OFFSETS;
-
-	return mdev->intr_info->intr_start_idx[type] + intr_src;
-}
-
-/* Return next available msix_entry. */
-static struct msix_entry *mic_get_available_vector(struct mic_device *mdev)
-{
-	int i;
-	struct mic_irq_info *info = &mdev->irq_info;
-
-	for (i = 0; i < info->num_vectors; i++)
-		if (!info->mic_msi_map[i])
-			return &info->msix_entries[i];
-	return NULL;
-}
-
-/**
- * mic_register_intr_callback - Register a callback handler for the
- * given source id.
- *
- * @mdev: pointer to the mic_device instance
- * @idx: The source id to be registered.
- * @handler: The function to be called when the source id receives
- * the interrupt.
- * @thread_fn: thread fn. corresponding to the handler
- * @data: Private data of the requester.
- * Return the callback structure that was registered or an
- * appropriate error on failure.
- */
-static struct mic_intr_cb *mic_register_intr_callback(struct mic_device *mdev,
-			u8 idx, irq_handler_t handler, irq_handler_t thread_fn,
-			void *data)
-{
-	struct mic_intr_cb *intr_cb;
-	unsigned long flags;
-	int rc;
-	intr_cb = kmalloc(sizeof(*intr_cb), GFP_KERNEL);
-
-	if (!intr_cb)
-		return ERR_PTR(-ENOMEM);
-
-	intr_cb->handler = handler;
-	intr_cb->thread_fn = thread_fn;
-	intr_cb->data = data;
-	intr_cb->cb_id = ida_simple_get(&mdev->irq_info.cb_ida,
-		0, 0, GFP_KERNEL);
-	if (intr_cb->cb_id < 0) {
-		rc = intr_cb->cb_id;
-		goto ida_fail;
-	}
-
-	spin_lock(&mdev->irq_info.mic_thread_lock);
-	spin_lock_irqsave(&mdev->irq_info.mic_intr_lock, flags);
-	list_add_tail(&intr_cb->list, &mdev->irq_info.cb_list[idx]);
-	spin_unlock_irqrestore(&mdev->irq_info.mic_intr_lock, flags);
-	spin_unlock(&mdev->irq_info.mic_thread_lock);
-
-	return intr_cb;
-ida_fail:
-	kfree(intr_cb);
-	return ERR_PTR(rc);
-}
-
-/**
- * mic_unregister_intr_callback - Unregister the callback handler
- * identified by its callback id.
- *
- * @mdev: pointer to the mic_device instance
- * @idx: The callback structure id to be unregistered.
- * Return the source id that was unregistered or MIC_NUM_OFFSETS if no
- * such callback handler was found.
- */
-static u8 mic_unregister_intr_callback(struct mic_device *mdev, u32 idx)
-{
-	struct list_head *pos, *tmp;
-	struct mic_intr_cb *intr_cb;
-	unsigned long flags;
-	int i;
-
-	spin_lock(&mdev->irq_info.mic_thread_lock);
-	spin_lock_irqsave(&mdev->irq_info.mic_intr_lock, flags);
-	for (i = 0;  i < MIC_NUM_OFFSETS; i++) {
-		list_for_each_safe(pos, tmp, &mdev->irq_info.cb_list[i]) {
-			intr_cb = list_entry(pos, struct mic_intr_cb, list);
-			if (intr_cb->cb_id == idx) {
-				list_del(pos);
-				ida_simple_remove(&mdev->irq_info.cb_ida,
-						  intr_cb->cb_id);
-				kfree(intr_cb);
-				spin_unlock_irqrestore(
-					&mdev->irq_info.mic_intr_lock, flags);
-				spin_unlock(&mdev->irq_info.mic_thread_lock);
-				return i;
-			}
-		}
-	}
-	spin_unlock_irqrestore(&mdev->irq_info.mic_intr_lock, flags);
-	spin_unlock(&mdev->irq_info.mic_thread_lock);
-	return MIC_NUM_OFFSETS;
-}
-
-/**
- * mic_setup_msix - Initializes MSIx interrupts.
- *
- * @mdev: pointer to mic_device instance
- * @pdev: PCI device structure
- *
- * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
- */
-static int mic_setup_msix(struct mic_device *mdev, struct pci_dev *pdev)
-{
-	int rc, i;
-	int entry_size = sizeof(*mdev->irq_info.msix_entries);
-
-	mdev->irq_info.msix_entries = kmalloc_array(MIC_MIN_MSIX,
-						    entry_size, GFP_KERNEL);
-	if (!mdev->irq_info.msix_entries) {
-		rc = -ENOMEM;
-		goto err_nomem1;
-	}
-
-	for (i = 0; i < MIC_MIN_MSIX; i++)
-		mdev->irq_info.msix_entries[i].entry = i;
-
-	rc = pci_enable_msix_exact(pdev, mdev->irq_info.msix_entries,
-				   MIC_MIN_MSIX);
-	if (rc) {
-		dev_dbg(&pdev->dev, "Error enabling MSIx. rc = %d\n", rc);
-		goto err_enable_msix;
-	}
-
-	mdev->irq_info.num_vectors = MIC_MIN_MSIX;
-	mdev->irq_info.mic_msi_map = kzalloc((sizeof(u32) *
-		mdev->irq_info.num_vectors), GFP_KERNEL);
-
-	if (!mdev->irq_info.mic_msi_map) {
-		rc = -ENOMEM;
-		goto err_nomem2;
-	}
-
-	dev_dbg(&mdev->pdev->dev,
-		"%d MSIx irqs setup\n", mdev->irq_info.num_vectors);
-	return 0;
-err_nomem2:
-	pci_disable_msix(pdev);
-err_enable_msix:
-	kfree(mdev->irq_info.msix_entries);
-err_nomem1:
-	mdev->irq_info.num_vectors = 0;
-	return rc;
-}
-
-/**
- * mic_setup_callbacks - Initialize data structures needed
- * to handle callbacks.
- *
- * @mdev: pointer to mic_device instance
- */
-static int mic_setup_callbacks(struct mic_device *mdev)
-{
-	int i;
-
-	mdev->irq_info.cb_list = kmalloc_array(MIC_NUM_OFFSETS,
-					       sizeof(*mdev->irq_info.cb_list),
-					       GFP_KERNEL);
-	if (!mdev->irq_info.cb_list)
-		return -ENOMEM;
-
-	for (i = 0; i < MIC_NUM_OFFSETS; i++)
-		INIT_LIST_HEAD(&mdev->irq_info.cb_list[i]);
-	ida_init(&mdev->irq_info.cb_ida);
-	spin_lock_init(&mdev->irq_info.mic_intr_lock);
-	spin_lock_init(&mdev->irq_info.mic_thread_lock);
-	return 0;
-}
-
-/**
- * mic_release_callbacks - Uninitialize data structures needed
- * to handle callbacks.
- *
- * @mdev: pointer to mic_device instance
- */
-static void mic_release_callbacks(struct mic_device *mdev)
-{
-	unsigned long flags;
-	struct list_head *pos, *tmp;
-	struct mic_intr_cb *intr_cb;
-	int i;
-
-	spin_lock(&mdev->irq_info.mic_thread_lock);
-	spin_lock_irqsave(&mdev->irq_info.mic_intr_lock, flags);
-	for (i = 0; i < MIC_NUM_OFFSETS; i++) {
-		if (list_empty(&mdev->irq_info.cb_list[i]))
-			break;
-
-		list_for_each_safe(pos, tmp, &mdev->irq_info.cb_list[i]) {
-			intr_cb = list_entry(pos, struct mic_intr_cb, list);
-			list_del(pos);
-			ida_simple_remove(&mdev->irq_info.cb_ida,
-					  intr_cb->cb_id);
-			kfree(intr_cb);
-		}
-	}
-	spin_unlock_irqrestore(&mdev->irq_info.mic_intr_lock, flags);
-	spin_unlock(&mdev->irq_info.mic_thread_lock);
-	ida_destroy(&mdev->irq_info.cb_ida);
-	kfree(mdev->irq_info.cb_list);
-}
-
-/**
- * mic_setup_msi - Initializes MSI interrupts.
- *
- * @mdev: pointer to mic_device instance
- * @pdev: PCI device structure
- *
- * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
- */
-static int mic_setup_msi(struct mic_device *mdev, struct pci_dev *pdev)
-{
-	int rc;
-
-	rc = pci_enable_msi(pdev);
-	if (rc) {
-		dev_dbg(&pdev->dev, "Error enabling MSI. rc = %d\n", rc);
-		return rc;
-	}
-
-	mdev->irq_info.num_vectors = 1;
-	mdev->irq_info.mic_msi_map = kzalloc((sizeof(u32) *
-		mdev->irq_info.num_vectors), GFP_KERNEL);
-
-	if (!mdev->irq_info.mic_msi_map) {
-		rc = -ENOMEM;
-		goto err_nomem1;
-	}
-
-	rc = mic_setup_callbacks(mdev);
-	if (rc) {
-		dev_err(&pdev->dev, "Error setting up callbacks\n");
-		goto err_nomem2;
-	}
-
-	rc = request_threaded_irq(pdev->irq, mic_interrupt, mic_thread_fn,
-				  0, "mic-msi", mdev);
-	if (rc) {
-		dev_err(&pdev->dev, "Error allocating MSI interrupt\n");
-		goto err_irq_req_fail;
-	}
-
-	dev_dbg(&pdev->dev, "%d MSI irqs setup\n", mdev->irq_info.num_vectors);
-	return 0;
-err_irq_req_fail:
-	mic_release_callbacks(mdev);
-err_nomem2:
-	kfree(mdev->irq_info.mic_msi_map);
-err_nomem1:
-	pci_disable_msi(pdev);
-	mdev->irq_info.num_vectors = 0;
-	return rc;
-}
-
-/**
- * mic_setup_intx - Initializes legacy interrupts.
- *
- * @mdev: pointer to mic_device instance
- * @pdev: PCI device structure
- *
- * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
- */
-static int mic_setup_intx(struct mic_device *mdev, struct pci_dev *pdev)
-{
-	int rc;
-
-	/* Enable intx */
-	pci_intx(pdev, 1);
-	rc = mic_setup_callbacks(mdev);
-	if (rc) {
-		dev_err(&pdev->dev, "Error setting up callbacks\n");
-		goto err_nomem;
-	}
-
-	rc = request_threaded_irq(pdev->irq, mic_interrupt, mic_thread_fn,
-				  IRQF_SHARED, "mic-intx", mdev);
-	if (rc)
-		goto err;
-
-	dev_dbg(&pdev->dev, "intx irq setup\n");
-	return 0;
-err:
-	mic_release_callbacks(mdev);
-err_nomem:
-	return rc;
-}
-
-/**
- * mic_next_db - Retrieve the next doorbell interrupt source id.
- * The id is picked sequentially from the available pool of
- * doorlbell ids.
- *
- * @mdev: pointer to the mic_device instance.
- *
- * Returns the next doorbell interrupt source.
- */
-int mic_next_db(struct mic_device *mdev)
-{
-	int next_db;
-
-	next_db = mdev->irq_info.next_avail_src %
-		mdev->intr_info->intr_len[MIC_INTR_DB];
-	mdev->irq_info.next_avail_src++;
-	return next_db;
-}
-
-#define COOKIE_ID_SHIFT 16
-#define GET_ENTRY(cookie) ((cookie) & 0xFFFF)
-#define GET_OFFSET(cookie) ((cookie) >> COOKIE_ID_SHIFT)
-#define MK_COOKIE(x, y) ((x) | (y) << COOKIE_ID_SHIFT)
-
-/**
- * mic_request_threaded_irq - request an irq. mic_mutex needs
- * to be held before calling this function.
- *
- * @mdev: pointer to mic_device instance
- * @handler: The callback function that handles the interrupt.
- * The function needs to call ack_interrupts
- * (mdev->ops->ack_interrupt(mdev)) when handling the interrupts.
- * @thread_fn: thread fn required by request_threaded_irq.
- * @name: The ASCII name of the callee requesting the irq.
- * @data: private data that is returned back when calling the
- * function handler.
- * @intr_src: The source id of the requester. Its the doorbell id
- * for Doorbell interrupts and DMA channel id for DMA interrupts.
- * @type: The type of interrupt. Values defined in mic_intr_type
- *
- * returns: The cookie that is transparent to the caller. Passed
- * back when calling mic_free_irq. An appropriate error code
- * is returned on failure. Caller needs to use IS_ERR(return_val)
- * to check for failure and PTR_ERR(return_val) to obtained the
- * error code.
- *
- */
-struct mic_irq *
-mic_request_threaded_irq(struct mic_device *mdev,
-			 irq_handler_t handler, irq_handler_t thread_fn,
-			 const char *name, void *data, int intr_src,
-			 enum mic_intr_type type)
-{
-	u16 offset;
-	int rc = 0;
-	struct msix_entry *msix = NULL;
-	unsigned long cookie = 0;
-	u16 entry;
-	struct mic_intr_cb *intr_cb;
-	struct pci_dev *pdev = mdev->pdev;
-
-	offset = mic_map_src_to_offset(mdev, intr_src, type);
-	if (offset >= MIC_NUM_OFFSETS) {
-		dev_err(&mdev->pdev->dev,
-			"Error mapping index %d to a valid source id.\n",
-			intr_src);
-		rc = -EINVAL;
-		goto err;
-	}
-
-	if (mdev->irq_info.num_vectors > 1) {
-		msix = mic_get_available_vector(mdev);
-		if (!msix) {
-			dev_err(&mdev->pdev->dev,
-				"No MSIx vectors available for use.\n");
-			rc = -ENOSPC;
-			goto err;
-		}
-
-		rc = request_threaded_irq(msix->vector, handler, thread_fn,
-					  0, name, data);
-		if (rc) {
-			dev_dbg(&mdev->pdev->dev,
-				"request irq failed rc = %d\n", rc);
-			goto err;
-		}
-		entry = msix->entry;
-		mdev->irq_info.mic_msi_map[entry] |= BIT(offset);
-		mdev->intr_ops->program_msi_to_src_map(mdev,
-				entry, offset, true);
-		cookie = MK_COOKIE(entry, offset);
-		dev_dbg(&mdev->pdev->dev, "irq: %d assigned for src: %d\n",
-			msix->vector, intr_src);
-	} else {
-		intr_cb = mic_register_intr_callback(mdev, offset, handler,
-						     thread_fn, data);
-		if (IS_ERR(intr_cb)) {
-			dev_err(&mdev->pdev->dev,
-				"No available callback entries for use\n");
-			rc = PTR_ERR(intr_cb);
-			goto err;
-		}
-
-		entry = 0;
-		if (pci_dev_msi_enabled(pdev)) {
-			mdev->irq_info.mic_msi_map[entry] |= (1 << offset);
-			mdev->intr_ops->program_msi_to_src_map(mdev,
-				entry, offset, true);
-		}
-		cookie = MK_COOKIE(entry, intr_cb->cb_id);
-		dev_dbg(&mdev->pdev->dev, "callback %d registered for src: %d\n",
-			intr_cb->cb_id, intr_src);
-	}
-	return (struct mic_irq *)cookie;
-err:
-	return ERR_PTR(rc);
-}
-
-/**
- * mic_free_irq - free irq. mic_mutex
- *  needs to be held before calling this function.
- *
- * @mdev: pointer to mic_device instance
- * @cookie: cookie obtained during a successful call to mic_request_threaded_irq
- * @data: private data specified by the calling function during the
- * mic_request_threaded_irq
- *
- * returns: none.
- */
-void mic_free_irq(struct mic_device *mdev,
-		  struct mic_irq *cookie, void *data)
-{
-	u32 offset;
-	u32 entry;
-	u8 src_id;
-	unsigned int irq;
-	struct pci_dev *pdev = mdev->pdev;
-
-	entry = GET_ENTRY((unsigned long)cookie);
-	offset = GET_OFFSET((unsigned long)cookie);
-	if (mdev->irq_info.num_vectors > 1) {
-		if (entry >= mdev->irq_info.num_vectors) {
-			dev_warn(&mdev->pdev->dev,
-				 "entry %d should be < num_irq %d\n",
-				entry, mdev->irq_info.num_vectors);
-			return;
-		}
-		irq = mdev->irq_info.msix_entries[entry].vector;
-		free_irq(irq, data);
-		mdev->irq_info.mic_msi_map[entry] &= ~(BIT(offset));
-		mdev->intr_ops->program_msi_to_src_map(mdev,
-			entry, offset, false);
-
-		dev_dbg(&mdev->pdev->dev, "irq: %d freed\n", irq);
-	} else {
-		irq = pdev->irq;
-		src_id = mic_unregister_intr_callback(mdev, offset);
-		if (src_id >= MIC_NUM_OFFSETS) {
-			dev_warn(&mdev->pdev->dev, "Error unregistering callback\n");
-			return;
-		}
-		if (pci_dev_msi_enabled(pdev)) {
-			mdev->irq_info.mic_msi_map[entry] &= ~(BIT(src_id));
-			mdev->intr_ops->program_msi_to_src_map(mdev,
-				entry, src_id, false);
-		}
-		dev_dbg(&mdev->pdev->dev, "callback %d unregistered for src: %d\n",
-			offset, src_id);
-	}
-}
-
-/**
- * mic_setup_interrupts - Initializes interrupts.
- *
- * @mdev: pointer to mic_device instance
- * @pdev: PCI device structure
- *
- * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
- */
-int mic_setup_interrupts(struct mic_device *mdev, struct pci_dev *pdev)
-{
-	int rc;
-
-	rc = mic_setup_msix(mdev, pdev);
-	if (!rc)
-		goto done;
-
-	rc = mic_setup_msi(mdev, pdev);
-	if (!rc)
-		goto done;
-
-	rc = mic_setup_intx(mdev, pdev);
-	if (rc) {
-		dev_err(&mdev->pdev->dev, "no usable interrupts\n");
-		return rc;
-	}
-done:
-	mdev->intr_ops->enable_interrupts(mdev);
-	return 0;
-}
-
-/**
- * mic_free_interrupts - Frees interrupts setup by mic_setup_interrupts
- *
- * @mdev: pointer to mic_device instance
- * @pdev: PCI device structure
- *
- * returns none.
- */
-void mic_free_interrupts(struct mic_device *mdev, struct pci_dev *pdev)
-{
-	int i;
-
-	mdev->intr_ops->disable_interrupts(mdev);
-	if (mdev->irq_info.num_vectors > 1) {
-		for (i = 0; i < mdev->irq_info.num_vectors; i++) {
-			if (mdev->irq_info.mic_msi_map[i])
-				dev_warn(&pdev->dev, "irq %d may still be in use.\n",
-					 mdev->irq_info.msix_entries[i].vector);
-		}
-		kfree(mdev->irq_info.mic_msi_map);
-		kfree(mdev->irq_info.msix_entries);
-		pci_disable_msix(pdev);
-	} else {
-		if (pci_dev_msi_enabled(pdev)) {
-			free_irq(pdev->irq, mdev);
-			kfree(mdev->irq_info.mic_msi_map);
-			pci_disable_msi(pdev);
-		} else {
-			free_irq(pdev->irq, mdev);
-		}
-		mic_release_callbacks(mdev);
-	}
-}
-
-/**
- * mic_intr_restore - Restore MIC interrupt registers.
- *
- * @mdev: pointer to mic_device instance.
- *
- * Restore the interrupt registers to values previously
- * stored in the SW data structures. mic_mutex needs to
- * be held before calling this function.
- *
- * returns None.
- */
-void mic_intr_restore(struct mic_device *mdev)
-{
-	int entry, offset;
-	struct pci_dev *pdev = mdev->pdev;
-
-	if (!pci_dev_msi_enabled(pdev))
-		return;
-
-	for (entry = 0; entry < mdev->irq_info.num_vectors; entry++) {
-		for (offset = 0; offset < MIC_NUM_OFFSETS; offset++) {
-			if (mdev->irq_info.mic_msi_map[entry] & BIT(offset))
-				mdev->intr_ops->program_msi_to_src_map(mdev,
-					entry, offset, true);
-		}
-	}
-}
diff --git a/drivers/misc/mic/host/mic_intr.h b/drivers/misc/mic/host/mic_intr.h
deleted file mode 100644
index b14ba818006f..000000000000
--- a/drivers/misc/mic/host/mic_intr.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * Intel MIC Host driver.
- */
-#ifndef _MIC_INTR_H_
-#define _MIC_INTR_H_
-
-#include <linux/bitops.h>
-#include <linux/interrupt.h>
-/*
- * The minimum number of msix vectors required for normal operation.
- * 3 for virtio network, console and block devices.
- * 1 for card shutdown notifications.
- * 4 for host owned DMA channels.
- * 1 for SCIF
- */
-#define MIC_MIN_MSIX 9
-#define MIC_NUM_OFFSETS 32
-
-/**
- * mic_intr_source - The type of source that will generate
- * the interrupt.The number of types needs to be in sync with
- * MIC_NUM_INTR_TYPES
- *
- * MIC_INTR_DB: The source is a doorbell
- * MIC_INTR_DMA: The source is a DMA channel
- * MIC_INTR_ERR: The source is an error interrupt e.g. SBOX ERR
- * MIC_NUM_INTR_TYPES: Total number of interrupt sources.
- */
-enum mic_intr_type {
-	MIC_INTR_DB = 0,
-	MIC_INTR_DMA,
-	MIC_INTR_ERR,
-	MIC_NUM_INTR_TYPES
-};
-
-/**
- * struct mic_intr_info - Contains h/w specific interrupt sources
- * information.
- *
- * @intr_start_idx: Contains the starting indexes of the
- * interrupt types.
- * @intr_len: Contains the length of the interrupt types.
- */
-struct mic_intr_info {
-	u16 intr_start_idx[MIC_NUM_INTR_TYPES];
-	u16 intr_len[MIC_NUM_INTR_TYPES];
-};
-
-/**
- * struct mic_irq_info - OS specific irq information
- *
- * @next_avail_src: next available doorbell that can be assigned.
- * @msix_entries: msix entries allocated while setting up MSI-x
- * @mic_msi_map: The MSI/MSI-x mapping information.
- * @num_vectors: The number of MSI/MSI-x vectors that have been allocated.
- * @cb_ida: callback ID allocator to track the callbacks registered.
- * @mic_intr_lock: spinlock to protect the interrupt callback list.
- * @mic_thread_lock: spinlock to protect the thread callback list.
- *		   This lock is used to protect against thread_fn while
- *		   mic_intr_lock is used to protect against interrupt handler.
- * @cb_list: Array of callback lists one for each source.
- * @mask: Mask used by the main thread fn to call the underlying thread fns.
- */
-struct mic_irq_info {
-	int next_avail_src;
-	struct msix_entry *msix_entries;
-	u32 *mic_msi_map;
-	u16 num_vectors;
-	struct ida cb_ida;
-	spinlock_t mic_intr_lock;
-	spinlock_t mic_thread_lock;
-	struct list_head *cb_list;
-	unsigned long mask;
-};
-
-/**
- * struct mic_intr_cb - Interrupt callback structure.
- *
- * @handler: The callback function
- * @thread_fn: The thread_fn.
- * @data: Private data of the requester.
- * @cb_id: The callback id. Identifies this callback.
- * @list: list head pointing to the next callback structure.
- */
-struct mic_intr_cb {
-	irq_handler_t handler;
-	irq_handler_t thread_fn;
-	void *data;
-	int cb_id;
-	struct list_head list;
-};
-
-/**
- * struct mic_irq - opaque pointer used as cookie
- */
-struct mic_irq;
-
-/* Forward declaration */
-struct mic_device;
-
-/**
- * struct mic_hw_intr_ops: MIC HW specific interrupt operations
- * @intr_init: Initialize H/W specific interrupt information.
- * @enable_interrupts: Enable interrupts from the hardware.
- * @disable_interrupts: Disable interrupts from the hardware.
- * @program_msi_to_src_map: Update MSI mapping registers with
- * irq information.
- * @read_msi_to_src_map: Read MSI mapping registers containing
- * irq information.
- */
-struct mic_hw_intr_ops {
-	void (*intr_init)(struct mic_device *mdev);
-	void (*enable_interrupts)(struct mic_device *mdev);
-	void (*disable_interrupts)(struct mic_device *mdev);
-	void (*program_msi_to_src_map) (struct mic_device *mdev,
-			int idx, int intr_src, bool set);
-	u32 (*read_msi_to_src_map) (struct mic_device *mdev,
-			int idx);
-};
-
-int mic_next_db(struct mic_device *mdev);
-struct mic_irq *
-mic_request_threaded_irq(struct mic_device *mdev,
-			 irq_handler_t handler, irq_handler_t thread_fn,
-			 const char *name, void *data, int intr_src,
-			 enum mic_intr_type type);
-void mic_free_irq(struct mic_device *mdev,
-		struct mic_irq *cookie, void *data);
-int mic_setup_interrupts(struct mic_device *mdev, struct pci_dev *pdev);
-void mic_free_interrupts(struct mic_device *mdev, struct pci_dev *pdev);
-void mic_intr_restore(struct mic_device *mdev);
-#endif
diff --git a/drivers/misc/mic/host/mic_main.c b/drivers/misc/mic/host/mic_main.c
deleted file mode 100644
index ea4608527ea0..000000000000
--- a/drivers/misc/mic/host/mic_main.c
+++ /dev/null
@@ -1,335 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * Intel MIC Host driver.
- */
-#include <linux/fs.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/poll.h>
-
-#include <linux/mic_common.h>
-#include "../common/mic_dev.h"
-#include "mic_device.h"
-#include "mic_x100.h"
-#include "mic_smpt.h"
-
-static const char mic_driver_name[] = "mic";
-
-static const struct pci_device_id mic_pci_tbl[] = {
-	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2250)},
-	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2251)},
-	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2252)},
-	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2253)},
-	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2254)},
-	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2255)},
-	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2256)},
-	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2257)},
-	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2258)},
-	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2259)},
-	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225a)},
-	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225b)},
-	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225c)},
-	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225d)},
-	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225e)},
-
-	/* required last entry */
-	{ 0, }
-};
-
-MODULE_DEVICE_TABLE(pci, mic_pci_tbl);
-
-/* ID allocator for MIC devices */
-static struct ida g_mic_ida;
-
-/* Initialize the device page */
-static int mic_dp_init(struct mic_device *mdev)
-{
-	mdev->dp = kzalloc(MIC_DP_SIZE, GFP_KERNEL);
-	if (!mdev->dp)
-		return -ENOMEM;
-
-	mdev->dp_dma_addr = mic_map_single(mdev,
-		mdev->dp, MIC_DP_SIZE);
-	if (mic_map_error(mdev->dp_dma_addr)) {
-		kfree(mdev->dp);
-		dev_err(&mdev->pdev->dev, "%s %d err %d\n",
-			__func__, __LINE__, -ENOMEM);
-		return -ENOMEM;
-	}
-	mdev->ops->write_spad(mdev, MIC_DPLO_SPAD, mdev->dp_dma_addr);
-	mdev->ops->write_spad(mdev, MIC_DPHI_SPAD, mdev->dp_dma_addr >> 32);
-	return 0;
-}
-
-/* Uninitialize the device page */
-static void mic_dp_uninit(struct mic_device *mdev)
-{
-	mic_unmap_single(mdev, mdev->dp_dma_addr, MIC_DP_SIZE);
-	kfree(mdev->dp);
-}
-
-/**
- * mic_ops_init: Initialize HW specific operation tables.
- *
- * @mdev: pointer to mic_device instance
- *
- * returns none.
- */
-static void mic_ops_init(struct mic_device *mdev)
-{
-	switch (mdev->family) {
-	case MIC_FAMILY_X100:
-		mdev->ops = &mic_x100_ops;
-		mdev->intr_ops = &mic_x100_intr_ops;
-		mdev->smpt_ops = &mic_x100_smpt_ops;
-		break;
-	default:
-		break;
-	}
-}
-
-/**
- * mic_get_family - Determine hardware family to which this MIC belongs.
- *
- * @pdev: The pci device structure
- *
- * returns family.
- */
-static enum mic_hw_family mic_get_family(struct pci_dev *pdev)
-{
-	enum mic_hw_family family;
-
-	switch (pdev->device) {
-	case MIC_X100_PCI_DEVICE_2250:
-	case MIC_X100_PCI_DEVICE_2251:
-	case MIC_X100_PCI_DEVICE_2252:
-	case MIC_X100_PCI_DEVICE_2253:
-	case MIC_X100_PCI_DEVICE_2254:
-	case MIC_X100_PCI_DEVICE_2255:
-	case MIC_X100_PCI_DEVICE_2256:
-	case MIC_X100_PCI_DEVICE_2257:
-	case MIC_X100_PCI_DEVICE_2258:
-	case MIC_X100_PCI_DEVICE_2259:
-	case MIC_X100_PCI_DEVICE_225a:
-	case MIC_X100_PCI_DEVICE_225b:
-	case MIC_X100_PCI_DEVICE_225c:
-	case MIC_X100_PCI_DEVICE_225d:
-	case MIC_X100_PCI_DEVICE_225e:
-		family = MIC_FAMILY_X100;
-		break;
-	default:
-		family = MIC_FAMILY_UNKNOWN;
-		break;
-	}
-	return family;
-}
-
-/**
- * mic_device_init - Allocates and initializes the MIC device structure
- *
- * @mdev: pointer to mic_device instance
- * @pdev: The pci device structure
- *
- * returns none.
- */
-static void
-mic_device_init(struct mic_device *mdev, struct pci_dev *pdev)
-{
-	mdev->pdev = pdev;
-	mdev->family = mic_get_family(pdev);
-	mdev->stepping = pdev->revision;
-	mic_ops_init(mdev);
-	mutex_init(&mdev->mic_mutex);
-	mdev->irq_info.next_avail_src = 0;
-}
-
-/**
- * mic_probe - Device Initialization Routine
- *
- * @pdev: PCI device structure
- * @ent: entry in mic_pci_tbl
- *
- * returns 0 on success, < 0 on failure.
- */
-static int mic_probe(struct pci_dev *pdev,
-		     const struct pci_device_id *ent)
-{
-	int rc;
-	struct mic_device *mdev;
-
-	mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
-	if (!mdev) {
-		rc = -ENOMEM;
-		goto mdev_alloc_fail;
-	}
-	mdev->id = ida_simple_get(&g_mic_ida, 0, MIC_MAX_NUM_DEVS, GFP_KERNEL);
-	if (mdev->id < 0) {
-		rc = mdev->id;
-		dev_err(&pdev->dev, "ida_simple_get failed rc %d\n", rc);
-		goto ida_fail;
-	}
-
-	mic_device_init(mdev, pdev);
-
-	rc = pci_enable_device(pdev);
-	if (rc) {
-		dev_err(&pdev->dev, "failed to enable pci device.\n");
-		goto ida_remove;
-	}
-
-	pci_set_master(pdev);
-
-	rc = pci_request_regions(pdev, mic_driver_name);
-	if (rc) {
-		dev_err(&pdev->dev, "failed to get pci regions.\n");
-		goto disable_device;
-	}
-
-	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
-	if (rc) {
-		dev_err(&pdev->dev, "Cannot set DMA mask\n");
-		goto release_regions;
-	}
-
-	mdev->mmio.pa = pci_resource_start(pdev, mdev->ops->mmio_bar);
-	mdev->mmio.len = pci_resource_len(pdev, mdev->ops->mmio_bar);
-	mdev->mmio.va = pci_ioremap_bar(pdev, mdev->ops->mmio_bar);
-	if (!mdev->mmio.va) {
-		dev_err(&pdev->dev, "Cannot remap MMIO BAR\n");
-		rc = -EIO;
-		goto release_regions;
-	}
-
-	mdev->aper.pa = pci_resource_start(pdev, mdev->ops->aper_bar);
-	mdev->aper.len = pci_resource_len(pdev, mdev->ops->aper_bar);
-	mdev->aper.va = ioremap_wc(mdev->aper.pa, mdev->aper.len);
-	if (!mdev->aper.va) {
-		dev_err(&pdev->dev, "Cannot remap Aperture BAR\n");
-		rc = -EIO;
-		goto unmap_mmio;
-	}
-
-	mdev->intr_ops->intr_init(mdev);
-	rc = mic_setup_interrupts(mdev, pdev);
-	if (rc) {
-		dev_err(&pdev->dev, "mic_setup_interrupts failed %d\n", rc);
-		goto unmap_aper;
-	}
-	rc = mic_smpt_init(mdev);
-	if (rc) {
-		dev_err(&pdev->dev, "smpt_init failed %d\n", rc);
-		goto free_interrupts;
-	}
-
-	pci_set_drvdata(pdev, mdev);
-
-	rc = mic_dp_init(mdev);
-	if (rc) {
-		dev_err(&pdev->dev, "mic_dp_init failed rc %d\n", rc);
-		goto smpt_uninit;
-	}
-	mic_bootparam_init(mdev);
-	mic_create_debug_dir(mdev);
-
-	mdev->cosm_dev = cosm_register_device(&mdev->pdev->dev, &cosm_hw_ops);
-	if (IS_ERR(mdev->cosm_dev)) {
-		rc = PTR_ERR(mdev->cosm_dev);
-		dev_err(&pdev->dev, "cosm_add_device failed rc %d\n", rc);
-		goto cleanup_debug_dir;
-	}
-	return 0;
-cleanup_debug_dir:
-	mic_delete_debug_dir(mdev);
-	mic_dp_uninit(mdev);
-smpt_uninit:
-	mic_smpt_uninit(mdev);
-free_interrupts:
-	mic_free_interrupts(mdev, pdev);
-unmap_aper:
-	iounmap(mdev->aper.va);
-unmap_mmio:
-	iounmap(mdev->mmio.va);
-release_regions:
-	pci_release_regions(pdev);
-disable_device:
-	pci_disable_device(pdev);
-ida_remove:
-	ida_simple_remove(&g_mic_ida, mdev->id);
-ida_fail:
-	kfree(mdev);
-mdev_alloc_fail:
-	dev_err(&pdev->dev, "Probe failed rc %d\n", rc);
-	return rc;
-}
-
-/**
- * mic_remove - Device Removal Routine
- * mic_remove is called by the PCI subsystem to alert the driver
- * that it should release a PCI device.
- *
- * @pdev: PCI device structure
- */
-static void mic_remove(struct pci_dev *pdev)
-{
-	struct mic_device *mdev;
-
-	mdev = pci_get_drvdata(pdev);
-	if (!mdev)
-		return;
-
-	cosm_unregister_device(mdev->cosm_dev);
-	mic_delete_debug_dir(mdev);
-	mic_dp_uninit(mdev);
-	mic_smpt_uninit(mdev);
-	mic_free_interrupts(mdev, pdev);
-	iounmap(mdev->aper.va);
-	iounmap(mdev->mmio.va);
-	pci_release_regions(pdev);
-	pci_disable_device(pdev);
-	ida_simple_remove(&g_mic_ida, mdev->id);
-	kfree(mdev);
-}
-
-static struct pci_driver mic_driver = {
-	.name = mic_driver_name,
-	.id_table = mic_pci_tbl,
-	.probe = mic_probe,
-	.remove = mic_remove
-};
-
-static int __init mic_init(void)
-{
-	int ret;
-
-	request_module("mic_x100_dma");
-	mic_init_debugfs();
-	ida_init(&g_mic_ida);
-	ret = pci_register_driver(&mic_driver);
-	if (ret) {
-		pr_err("pci_register_driver failed ret %d\n", ret);
-		goto cleanup_debugfs;
-	}
-	return 0;
-cleanup_debugfs:
-	ida_destroy(&g_mic_ida);
-	mic_exit_debugfs();
-	return ret;
-}
-
-static void __exit mic_exit(void)
-{
-	pci_unregister_driver(&mic_driver);
-	ida_destroy(&g_mic_ida);
-	mic_exit_debugfs();
-}
-
-module_init(mic_init);
-module_exit(mic_exit);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_DESCRIPTION("Intel(R) MIC X100 Host driver");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/host/mic_smpt.c b/drivers/misc/mic/host/mic_smpt.c
deleted file mode 100644
index 50d1bebecd54..000000000000
--- a/drivers/misc/mic/host/mic_smpt.c
+++ /dev/null
@@ -1,427 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * Intel MIC Host driver.
- */
-#include <linux/pci.h>
-
-#include "../common/mic_dev.h"
-#include "mic_device.h"
-#include "mic_smpt.h"
-
-static inline u64 mic_system_page_mask(struct mic_device *mdev)
-{
-	return (1ULL << mdev->smpt->info.page_shift) - 1ULL;
-}
-
-static inline u8 mic_sys_addr_to_smpt(struct mic_device *mdev, dma_addr_t pa)
-{
-	return (pa - mdev->smpt->info.base) >> mdev->smpt->info.page_shift;
-}
-
-static inline u64 mic_smpt_to_pa(struct mic_device *mdev, u8 index)
-{
-	return mdev->smpt->info.base + (index * mdev->smpt->info.page_size);
-}
-
-static inline u64 mic_smpt_offset(struct mic_device *mdev, dma_addr_t pa)
-{
-	return pa & mic_system_page_mask(mdev);
-}
-
-static inline u64 mic_smpt_align_low(struct mic_device *mdev, dma_addr_t pa)
-{
-	return ALIGN(pa - mic_system_page_mask(mdev),
-		mdev->smpt->info.page_size);
-}
-
-static inline u64 mic_smpt_align_high(struct mic_device *mdev, dma_addr_t pa)
-{
-	return ALIGN(pa, mdev->smpt->info.page_size);
-}
-
-/* Total Cumulative system memory accessible by MIC across all SMPT entries */
-static inline u64 mic_max_system_memory(struct mic_device *mdev)
-{
-	return mdev->smpt->info.num_reg * mdev->smpt->info.page_size;
-}
-
-/* Maximum system memory address accessible by MIC */
-static inline u64 mic_max_system_addr(struct mic_device *mdev)
-{
-	return mdev->smpt->info.base + mic_max_system_memory(mdev) - 1ULL;
-}
-
-/* Check if the DMA address is a MIC system memory address */
-static inline bool
-mic_is_system_addr(struct mic_device *mdev, dma_addr_t pa)
-{
-	return pa >= mdev->smpt->info.base && pa <= mic_max_system_addr(mdev);
-}
-
-/* Populate an SMPT entry and update the reference counts. */
-static void mic_add_smpt_entry(int spt, s64 *ref, u64 addr,
-			       int entries, struct mic_device *mdev)
-{
-	struct mic_smpt_info *smpt_info = mdev->smpt;
-	int i;
-
-	for (i = spt; i < spt + entries; i++,
-		addr += smpt_info->info.page_size) {
-		if (!smpt_info->entry[i].ref_count &&
-		    (smpt_info->entry[i].dma_addr != addr)) {
-			mdev->smpt_ops->set(mdev, addr, i);
-			smpt_info->entry[i].dma_addr = addr;
-		}
-		smpt_info->entry[i].ref_count += ref[i - spt];
-	}
-}
-
-/*
- * Find an available MIC address in MIC SMPT address space
- * for a given DMA address and size.
- */
-static dma_addr_t mic_smpt_op(struct mic_device *mdev, u64 dma_addr,
-			      int entries, s64 *ref, size_t size)
-{
-	int spt;
-	int ae = 0;
-	int i;
-	unsigned long flags;
-	dma_addr_t mic_addr = 0;
-	dma_addr_t addr = dma_addr;
-	struct mic_smpt_info *smpt_info = mdev->smpt;
-
-	spin_lock_irqsave(&smpt_info->smpt_lock, flags);
-
-	/* find existing entries */
-	for (i = 0; i < smpt_info->info.num_reg; i++) {
-		if (smpt_info->entry[i].dma_addr == addr) {
-			ae++;
-			addr += smpt_info->info.page_size;
-		} else if (ae) /* cannot find contiguous entries */
-			goto not_found;
-
-		if (ae == entries)
-			goto found;
-	}
-
-	/* find free entry */
-	for (ae = 0, i = 0; i < smpt_info->info.num_reg; i++) {
-		ae = (smpt_info->entry[i].ref_count == 0) ? ae + 1 : 0;
-		if (ae == entries)
-			goto found;
-	}
-
-not_found:
-	spin_unlock_irqrestore(&smpt_info->smpt_lock, flags);
-	return mic_addr;
-
-found:
-	spt = i - entries + 1;
-	mic_addr = mic_smpt_to_pa(mdev, spt);
-	mic_add_smpt_entry(spt, ref, dma_addr, entries, mdev);
-	smpt_info->map_count++;
-	smpt_info->ref_count += (s64)size;
-	spin_unlock_irqrestore(&smpt_info->smpt_lock, flags);
-	return mic_addr;
-}
-
-/*
- * Returns number of smpt entries needed for dma_addr to dma_addr + size
- * also returns the reference count array for each of those entries
- * and the starting smpt address
- */
-static int mic_get_smpt_ref_count(struct mic_device *mdev, dma_addr_t dma_addr,
-				  size_t size, s64 *ref,  u64 *smpt_start)
-{
-	u64 start =  dma_addr;
-	u64 end = dma_addr + size;
-	int i = 0;
-
-	while (start < end) {
-		ref[i++] = min(mic_smpt_align_high(mdev, start + 1),
-			end) - start;
-		start = mic_smpt_align_high(mdev, start + 1);
-	}
-
-	if (smpt_start)
-		*smpt_start = mic_smpt_align_low(mdev, dma_addr);
-
-	return i;
-}
-
-/*
- * mic_to_dma_addr - Converts a MIC address to a DMA address.
- *
- * @mdev: pointer to mic_device instance.
- * @mic_addr: MIC address.
- *
- * returns a DMA address.
- */
-dma_addr_t mic_to_dma_addr(struct mic_device *mdev, dma_addr_t mic_addr)
-{
-	struct mic_smpt_info *smpt_info = mdev->smpt;
-	int spt;
-	dma_addr_t dma_addr;
-
-	if (!mic_is_system_addr(mdev, mic_addr)) {
-		dev_err(&mdev->pdev->dev,
-			"mic_addr is invalid. mic_addr = 0x%llx\n", mic_addr);
-		return -EINVAL;
-	}
-	spt = mic_sys_addr_to_smpt(mdev, mic_addr);
-	dma_addr = smpt_info->entry[spt].dma_addr +
-		mic_smpt_offset(mdev, mic_addr);
-	return dma_addr;
-}
-
-/**
- * mic_map - Maps a DMA address to a MIC physical address.
- *
- * @mdev: pointer to mic_device instance.
- * @dma_addr: DMA address.
- * @size: Size of the region to be mapped.
- *
- * This API converts the DMA address provided to a DMA address understood
- * by MIC. Caller should check for errors by calling mic_map_error(..).
- *
- * returns DMA address as required by MIC.
- */
-dma_addr_t mic_map(struct mic_device *mdev, dma_addr_t dma_addr, size_t size)
-{
-	dma_addr_t mic_addr = 0;
-	int num_entries;
-	s64 *ref;
-	u64 smpt_start;
-
-	if (!size || size > mic_max_system_memory(mdev))
-		return mic_addr;
-
-	ref = kmalloc_array(mdev->smpt->info.num_reg, sizeof(s64), GFP_ATOMIC);
-	if (!ref)
-		return mic_addr;
-
-	num_entries = mic_get_smpt_ref_count(mdev, dma_addr, size,
-					     ref, &smpt_start);
-
-	/* Set the smpt table appropriately and get 16G aligned mic address */
-	mic_addr = mic_smpt_op(mdev, smpt_start, num_entries, ref, size);
-
-	kfree(ref);
-
-	/*
-	 * If mic_addr is zero then its an error case
-	 * since mic_addr can never be zero.
-	 * else generate mic_addr by adding the 16G offset in dma_addr
-	 */
-	if (!mic_addr && MIC_FAMILY_X100 == mdev->family) {
-		dev_err(&mdev->pdev->dev,
-			"mic_map failed dma_addr 0x%llx size 0x%lx\n",
-			dma_addr, size);
-		return mic_addr;
-	} else {
-		return mic_addr + mic_smpt_offset(mdev, dma_addr);
-	}
-}
-
-/**
- * mic_unmap - Unmaps a MIC physical address.
- *
- * @mdev: pointer to mic_device instance.
- * @mic_addr: MIC physical address.
- * @size: Size of the region to be unmapped.
- *
- * This API unmaps the mappings created by mic_map(..).
- *
- * returns None.
- */
-void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size)
-{
-	struct mic_smpt_info *smpt_info = mdev->smpt;
-	s64 *ref;
-	int num_smpt;
-	int spt;
-	int i;
-	unsigned long flags;
-
-	if (!size)
-		return;
-
-	if (!mic_is_system_addr(mdev, mic_addr)) {
-		dev_err(&mdev->pdev->dev,
-			"invalid address: 0x%llx\n", mic_addr);
-		return;
-	}
-
-	spt = mic_sys_addr_to_smpt(mdev, mic_addr);
-	ref = kmalloc_array(mdev->smpt->info.num_reg, sizeof(s64), GFP_ATOMIC);
-	if (!ref)
-		return;
-
-	/* Get number of smpt entries to be mapped, ref count array */
-	num_smpt = mic_get_smpt_ref_count(mdev, mic_addr, size, ref, NULL);
-
-	spin_lock_irqsave(&smpt_info->smpt_lock, flags);
-	smpt_info->unmap_count++;
-	smpt_info->ref_count -= (s64)size;
-
-	for (i = spt; i < spt + num_smpt; i++) {
-		smpt_info->entry[i].ref_count -= ref[i - spt];
-		if (smpt_info->entry[i].ref_count < 0)
-			dev_warn(&mdev->pdev->dev,
-				 "ref count for entry %d is negative\n", i);
-	}
-	spin_unlock_irqrestore(&smpt_info->smpt_lock, flags);
-	kfree(ref);
-}
-
-/**
- * mic_map_single - Maps a virtual address to a MIC physical address.
- *
- * @mdev: pointer to mic_device instance.
- * @va: Kernel direct mapped virtual address.
- * @size: Size of the region to be mapped.
- *
- * This API calls pci_map_single(..) for the direct mapped virtual address
- * and then converts the DMA address provided to a DMA address understood
- * by MIC. Caller should check for errors by calling mic_map_error(..).
- *
- * returns DMA address as required by MIC.
- */
-dma_addr_t mic_map_single(struct mic_device *mdev, void *va, size_t size)
-{
-	dma_addr_t mic_addr = 0;
-	struct pci_dev *pdev = mdev->pdev;
-	dma_addr_t dma_addr =
-		pci_map_single(pdev, va, size, PCI_DMA_BIDIRECTIONAL);
-
-	if (!pci_dma_mapping_error(pdev, dma_addr)) {
-		mic_addr = mic_map(mdev, dma_addr, size);
-		if (!mic_addr) {
-			dev_err(&mdev->pdev->dev,
-				"mic_map failed dma_addr 0x%llx size 0x%lx\n",
-				dma_addr, size);
-			pci_unmap_single(pdev, dma_addr,
-					 size, PCI_DMA_BIDIRECTIONAL);
-		}
-	}
-	return mic_addr;
-}
-
-/**
- * mic_unmap_single - Unmaps a MIC physical address.
- *
- * @mdev: pointer to mic_device instance.
- * @mic_addr: MIC physical address.
- * @size: Size of the region to be unmapped.
- *
- * This API unmaps the mappings created by mic_map_single(..).
- *
- * returns None.
- */
-void
-mic_unmap_single(struct mic_device *mdev, dma_addr_t mic_addr, size_t size)
-{
-	struct pci_dev *pdev = mdev->pdev;
-	dma_addr_t dma_addr = mic_to_dma_addr(mdev, mic_addr);
-	mic_unmap(mdev, mic_addr, size);
-	pci_unmap_single(pdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL);
-}
-
-/**
- * mic_smpt_init - Initialize MIC System Memory Page Tables.
- *
- * @mdev: pointer to mic_device instance.
- *
- * returns 0 for success and -errno for error.
- */
-int mic_smpt_init(struct mic_device *mdev)
-{
-	int i, err = 0;
-	dma_addr_t dma_addr;
-	struct mic_smpt_info *smpt_info;
-
-	mdev->smpt = kmalloc(sizeof(*mdev->smpt), GFP_KERNEL);
-	if (!mdev->smpt)
-		return -ENOMEM;
-
-	smpt_info = mdev->smpt;
-	mdev->smpt_ops->init(mdev);
-	smpt_info->entry = kmalloc_array(smpt_info->info.num_reg,
-					 sizeof(*smpt_info->entry), GFP_KERNEL);
-	if (!smpt_info->entry) {
-		err = -ENOMEM;
-		goto free_smpt;
-	}
-	spin_lock_init(&smpt_info->smpt_lock);
-	for (i = 0; i < smpt_info->info.num_reg; i++) {
-		dma_addr = i * smpt_info->info.page_size;
-		smpt_info->entry[i].dma_addr = dma_addr;
-		smpt_info->entry[i].ref_count = 0;
-		mdev->smpt_ops->set(mdev, dma_addr, i);
-	}
-	smpt_info->ref_count = 0;
-	smpt_info->map_count = 0;
-	smpt_info->unmap_count = 0;
-	return 0;
-free_smpt:
-	kfree(smpt_info);
-	return err;
-}
-
-/**
- * mic_smpt_uninit - UnInitialize MIC System Memory Page Tables.
- *
- * @mdev: pointer to mic_device instance.
- *
- * returns None.
- */
-void mic_smpt_uninit(struct mic_device *mdev)
-{
-	struct mic_smpt_info *smpt_info = mdev->smpt;
-	int i;
-
-	dev_dbg(&mdev->pdev->dev,
-		"nodeid %d SMPT ref count %lld map %lld unmap %lld\n",
-		mdev->id, smpt_info->ref_count,
-		smpt_info->map_count, smpt_info->unmap_count);
-
-	for (i = 0; i < smpt_info->info.num_reg; i++) {
-		dev_dbg(&mdev->pdev->dev,
-			"SMPT entry[%d] dma_addr = 0x%llx ref_count = %lld\n",
-			i, smpt_info->entry[i].dma_addr,
-			smpt_info->entry[i].ref_count);
-		if (smpt_info->entry[i].ref_count)
-			dev_warn(&mdev->pdev->dev,
-				 "ref count for entry %d is not zero\n", i);
-	}
-	kfree(smpt_info->entry);
-	kfree(smpt_info);
-}
-
-/**
- * mic_smpt_restore - Restore MIC System Memory Page Tables.
- *
- * @mdev: pointer to mic_device instance.
- *
- * Restore the SMPT registers to values previously stored in the
- * SW data structures. Some MIC steppings lose register state
- * across resets and this API should be called for performing
- * a restore operation if required.
- *
- * returns None.
- */
-void mic_smpt_restore(struct mic_device *mdev)
-{
-	int i;
-	dma_addr_t dma_addr;
-
-	for (i = 0; i < mdev->smpt->info.num_reg; i++) {
-		dma_addr = mdev->smpt->entry[i].dma_addr;
-		mdev->smpt_ops->set(mdev, dma_addr, i);
-	}
-}
diff --git a/drivers/misc/mic/host/mic_smpt.h b/drivers/misc/mic/host/mic_smpt.h
deleted file mode 100644
index 3b1ec14a9d81..000000000000
--- a/drivers/misc/mic/host/mic_smpt.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * Intel MIC Host driver.
- */
-#ifndef MIC_SMPT_H
-#define MIC_SMPT_H
-/**
- * struct mic_smpt_ops - MIC HW specific SMPT operations.
- * @init: Initialize hardware specific SMPT information in mic_smpt_hw_info.
- * @set: Set the value for a particular SMPT entry.
- */
-struct mic_smpt_ops {
-	void (*init)(struct mic_device *mdev);
-	void (*set)(struct mic_device *mdev, dma_addr_t dma_addr, u8 index);
-};
-
-/**
- * struct mic_smpt - MIC SMPT entry information.
- * @dma_addr: Base DMA address for this SMPT entry.
- * @ref_count: Number of active mappings for this SMPT entry in bytes.
- */
-struct mic_smpt {
-	dma_addr_t dma_addr;
-	s64 ref_count;
-};
-
-/**
- * struct mic_smpt_hw_info - MIC SMPT hardware specific information.
- * @num_reg: Number of SMPT registers.
- * @page_shift: System memory page shift.
- * @page_size: System memory page size.
- * @base: System address base.
- */
-struct mic_smpt_hw_info {
-	u8 num_reg;
-	u8 page_shift;
-	u64 page_size;
-	u64 base;
-};
-
-/**
- * struct mic_smpt_info - MIC SMPT information.
- * @entry: Array of SMPT entries.
- * @smpt_lock: Spin lock protecting access to SMPT data structures.
- * @info: Hardware specific SMPT information.
- * @ref_count: Number of active SMPT mappings (for debug).
- * @map_count: Number of SMPT mappings created (for debug).
- * @unmap_count: Number of SMPT mappings destroyed (for debug).
- */
-struct mic_smpt_info {
-	struct mic_smpt *entry;
-	spinlock_t smpt_lock;
-	struct mic_smpt_hw_info info;
-	s64 ref_count;
-	s64 map_count;
-	s64 unmap_count;
-};
-
-dma_addr_t mic_map_single(struct mic_device *mdev, void *va, size_t size);
-void mic_unmap_single(struct mic_device *mdev,
-	dma_addr_t mic_addr, size_t size);
-dma_addr_t mic_map(struct mic_device *mdev,
-	dma_addr_t dma_addr, size_t size);
-void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size);
-dma_addr_t mic_to_dma_addr(struct mic_device *mdev, dma_addr_t mic_addr);
-
-/**
- * mic_map_error - Check a MIC address for errors.
- *
- * @mdev: pointer to mic_device instance.
- *
- * returns Whether there was an error during mic_map..(..) APIs.
- */
-static inline bool mic_map_error(dma_addr_t mic_addr)
-{
-	return !mic_addr;
-}
-
-int mic_smpt_init(struct mic_device *mdev);
-void mic_smpt_uninit(struct mic_device *mdev);
-void mic_smpt_restore(struct mic_device *mdev);
-
-#endif
diff --git a/drivers/misc/mic/host/mic_x100.c b/drivers/misc/mic/host/mic_x100.c
deleted file mode 100644
index f5536c1ad607..000000000000
--- a/drivers/misc/mic/host/mic_x100.c
+++ /dev/null
@@ -1,585 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * Intel MIC Host driver.
- */
-#include <linux/fs.h>
-#include <linux/pci.h>
-#include <linux/sched.h>
-#include <linux/firmware.h>
-#include <linux/delay.h>
-
-#include "../common/mic_dev.h"
-#include "mic_device.h"
-#include "mic_x100.h"
-#include "mic_smpt.h"
-
-static const u16 mic_x100_intr_init[] = {
-		MIC_X100_DOORBELL_IDX_START,
-		MIC_X100_DMA_IDX_START,
-		MIC_X100_ERR_IDX_START,
-		MIC_X100_NUM_DOORBELL,
-		MIC_X100_NUM_DMA,
-		MIC_X100_NUM_ERR,
-};
-
-/**
- * mic_x100_write_spad - write to the scratchpad register
- * @mdev: pointer to mic_device instance
- * @idx: index to the scratchpad register, 0 based
- * @val: the data value to put into the register
- *
- * This function allows writing of a 32bit value to the indexed scratchpad
- * register.
- *
- * RETURNS: none.
- */
-static void
-mic_x100_write_spad(struct mic_device *mdev, unsigned int idx, u32 val)
-{
-	dev_dbg(&mdev->pdev->dev, "Writing 0x%x to scratch pad index %d\n",
-		val, idx);
-	mic_mmio_write(&mdev->mmio, val,
-		       MIC_X100_SBOX_BASE_ADDRESS +
-		       MIC_X100_SBOX_SPAD0 + idx * 4);
-}
-
-/**
- * mic_x100_read_spad - read from the scratchpad register
- * @mdev: pointer to mic_device instance
- * @idx: index to scratchpad register, 0 based
- *
- * This function allows reading of the 32bit scratchpad register.
- *
- * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
- */
-static u32
-mic_x100_read_spad(struct mic_device *mdev, unsigned int idx)
-{
-	u32 val = mic_mmio_read(&mdev->mmio,
-		MIC_X100_SBOX_BASE_ADDRESS +
-		MIC_X100_SBOX_SPAD0 + idx * 4);
-
-	dev_dbg(&mdev->pdev->dev,
-		"Reading 0x%x from scratch pad index %d\n", val, idx);
-	return val;
-}
-
-/**
- * mic_x100_enable_interrupts - Enable interrupts.
- * @mdev: pointer to mic_device instance
- */
-static void mic_x100_enable_interrupts(struct mic_device *mdev)
-{
-	u32 reg;
-	struct mic_mw *mw = &mdev->mmio;
-	u32 sice0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SICE0;
-	u32 siac0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SIAC0;
-
-	reg = mic_mmio_read(mw, sice0);
-	reg |= MIC_X100_SBOX_DBR_BITS(0xf) | MIC_X100_SBOX_DMA_BITS(0xff);
-	mic_mmio_write(mw, reg, sice0);
-
-	/*
-	 * Enable auto-clear when enabling interrupts. Applicable only for
-	 * MSI-x. Legacy and MSI mode cannot have auto-clear enabled.
-	 */
-	if (mdev->irq_info.num_vectors > 1) {
-		reg = mic_mmio_read(mw, siac0);
-		reg |= MIC_X100_SBOX_DBR_BITS(0xf) |
-			MIC_X100_SBOX_DMA_BITS(0xff);
-		mic_mmio_write(mw, reg, siac0);
-	}
-}
-
-/**
- * mic_x100_disable_interrupts - Disable interrupts.
- * @mdev: pointer to mic_device instance
- */
-static void mic_x100_disable_interrupts(struct mic_device *mdev)
-{
-	u32 reg;
-	struct mic_mw *mw = &mdev->mmio;
-	u32 sice0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SICE0;
-	u32 siac0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SIAC0;
-	u32 sicc0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SICC0;
-
-	reg = mic_mmio_read(mw, sice0);
-	mic_mmio_write(mw, reg, sicc0);
-
-	if (mdev->irq_info.num_vectors > 1) {
-		reg = mic_mmio_read(mw, siac0);
-		reg &= ~(MIC_X100_SBOX_DBR_BITS(0xf) |
-			MIC_X100_SBOX_DMA_BITS(0xff));
-		mic_mmio_write(mw, reg, siac0);
-	}
-}
-
-/**
- * mic_x100_send_sbox_intr - Send an MIC_X100_SBOX interrupt to MIC.
- * @mdev: pointer to mic_device instance
- * @doorbell: doorbell number
- */
-static void mic_x100_send_sbox_intr(struct mic_device *mdev,
-				    int doorbell)
-{
-	struct mic_mw *mw = &mdev->mmio;
-	u64 apic_icr_offset = MIC_X100_SBOX_APICICR0 + doorbell * 8;
-	u32 apicicr_low = mic_mmio_read(mw, MIC_X100_SBOX_BASE_ADDRESS +
-					apic_icr_offset);
-
-	/* for MIC we need to make sure we "hit" the send_icr bit (13) */
-	apicicr_low = (apicicr_low | (1 << 13));
-
-	/* Ensure that the interrupt is ordered w.r.t. previous stores. */
-	wmb();
-	mic_mmio_write(mw, apicicr_low,
-		       MIC_X100_SBOX_BASE_ADDRESS + apic_icr_offset);
-}
-
-/**
- * mic_x100_send_rdmasr_intr - Send an RDMASR interrupt to MIC.
- * @mdev: pointer to mic_device instance
- * @doorbell: doorbell number
- */
-static void mic_x100_send_rdmasr_intr(struct mic_device *mdev,
-				      int doorbell)
-{
-	int rdmasr_offset = MIC_X100_SBOX_RDMASR0 + (doorbell << 2);
-	/* Ensure that the interrupt is ordered w.r.t. previous stores. */
-	wmb();
-	mic_mmio_write(&mdev->mmio, 0,
-		       MIC_X100_SBOX_BASE_ADDRESS + rdmasr_offset);
-}
-
-/**
- * __mic_x100_send_intr - Send interrupt to MIC.
- * @mdev: pointer to mic_device instance
- * @doorbell: doorbell number.
- */
-static void mic_x100_send_intr(struct mic_device *mdev, int doorbell)
-{
-	int rdmasr_db;
-	if (doorbell < MIC_X100_NUM_SBOX_IRQ) {
-		mic_x100_send_sbox_intr(mdev, doorbell);
-	} else {
-		rdmasr_db = doorbell - MIC_X100_NUM_SBOX_IRQ;
-		mic_x100_send_rdmasr_intr(mdev, rdmasr_db);
-	}
-}
-
-/**
- * mic_x100_ack_interrupt - Read the interrupt sources register and
- * clear it. This function will be called in the MSI/INTx case.
- * @mdev: Pointer to mic_device instance.
- *
- * Returns: bitmask of interrupt sources triggered.
- */
-static u32 mic_x100_ack_interrupt(struct mic_device *mdev)
-{
-	u32 sicr0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SICR0;
-	u32 reg = mic_mmio_read(&mdev->mmio, sicr0);
-	mic_mmio_write(&mdev->mmio, reg, sicr0);
-	return reg;
-}
-
-/**
- * mic_x100_intr_workarounds - These hardware specific workarounds are
- * to be invoked everytime an interrupt is handled.
- * @mdev: Pointer to mic_device instance.
- *
- * Returns: none
- */
-static void mic_x100_intr_workarounds(struct mic_device *mdev)
-{
-	struct mic_mw *mw = &mdev->mmio;
-
-	/* Clear pending bit array. */
-	if (MIC_A0_STEP == mdev->stepping)
-		mic_mmio_write(mw, 1, MIC_X100_SBOX_BASE_ADDRESS +
-			MIC_X100_SBOX_MSIXPBACR);
-
-	if (mdev->stepping >= MIC_B0_STEP)
-		mdev->intr_ops->enable_interrupts(mdev);
-}
-
-/**
- * mic_x100_hw_intr_init - Initialize h/w specific interrupt
- * information.
- * @mdev: pointer to mic_device instance
- */
-static void mic_x100_hw_intr_init(struct mic_device *mdev)
-{
-	mdev->intr_info = (struct mic_intr_info *)mic_x100_intr_init;
-}
-
-/**
- * mic_x100_read_msi_to_src_map - read from the MSI mapping registers
- * @mdev: pointer to mic_device instance
- * @idx: index to the mapping register, 0 based
- *
- * This function allows reading of the 32bit MSI mapping register.
- *
- * RETURNS: The value in the register.
- */
-static u32
-mic_x100_read_msi_to_src_map(struct mic_device *mdev, int idx)
-{
-	return mic_mmio_read(&mdev->mmio,
-		MIC_X100_SBOX_BASE_ADDRESS +
-		MIC_X100_SBOX_MXAR0 + idx * 4);
-}
-
-/**
- * mic_x100_program_msi_to_src_map - program the MSI mapping registers
- * @mdev: pointer to mic_device instance
- * @idx: index to the mapping register, 0 based
- * @offset: The bit offset in the register that needs to be updated.
- * @set: boolean specifying if the bit in the specified offset needs
- * to be set or cleared.
- *
- * RETURNS: None.
- */
-static void
-mic_x100_program_msi_to_src_map(struct mic_device *mdev,
-				int idx, int offset, bool set)
-{
-	unsigned long reg;
-	struct mic_mw *mw = &mdev->mmio;
-	u32 mxar = MIC_X100_SBOX_BASE_ADDRESS +
-		MIC_X100_SBOX_MXAR0 + idx * 4;
-
-	reg = mic_mmio_read(mw, mxar);
-	if (set)
-		__set_bit(offset, &reg);
-	else
-		__clear_bit(offset, &reg);
-	mic_mmio_write(mw, reg, mxar);
-}
-
-/*
- * mic_x100_reset_fw_ready - Reset Firmware ready status field.
- * @mdev: pointer to mic_device instance
- */
-static void mic_x100_reset_fw_ready(struct mic_device *mdev)
-{
-	mdev->ops->write_spad(mdev, MIC_X100_DOWNLOAD_INFO, 0);
-}
-
-/*
- * mic_x100_is_fw_ready - Check if firmware is ready.
- * @mdev: pointer to mic_device instance
- */
-static bool mic_x100_is_fw_ready(struct mic_device *mdev)
-{
-	u32 scratch2 = mdev->ops->read_spad(mdev, MIC_X100_DOWNLOAD_INFO);
-	return MIC_X100_SPAD2_DOWNLOAD_STATUS(scratch2) ? true : false;
-}
-
-/**
- * mic_x100_get_apic_id - Get bootstrap APIC ID.
- * @mdev: pointer to mic_device instance
- */
-static u32 mic_x100_get_apic_id(struct mic_device *mdev)
-{
-	u32 scratch2 = 0;
-
-	scratch2 = mdev->ops->read_spad(mdev, MIC_X100_DOWNLOAD_INFO);
-	return MIC_X100_SPAD2_APIC_ID(scratch2);
-}
-
-/**
- * mic_x100_send_firmware_intr - Send an interrupt to the firmware on MIC.
- * @mdev: pointer to mic_device instance
- */
-static void mic_x100_send_firmware_intr(struct mic_device *mdev)
-{
-	u32 apicicr_low;
-	u64 apic_icr_offset = MIC_X100_SBOX_APICICR7;
-	int vector = MIC_X100_BSP_INTERRUPT_VECTOR;
-	struct mic_mw *mw = &mdev->mmio;
-
-	/*
-	 * For MIC we need to make sure we "hit"
-	 * the send_icr bit (13).
-	 */
-	apicicr_low = (vector | (1 << 13));
-
-	mic_mmio_write(mw, mic_x100_get_apic_id(mdev),
-		       MIC_X100_SBOX_BASE_ADDRESS + apic_icr_offset + 4);
-
-	/* Ensure that the interrupt is ordered w.r.t. previous stores. */
-	wmb();
-	mic_mmio_write(mw, apicicr_low,
-		       MIC_X100_SBOX_BASE_ADDRESS + apic_icr_offset);
-}
-
-/**
- * mic_x100_hw_reset - Reset the MIC device.
- * @mdev: pointer to mic_device instance
- */
-static void mic_x100_hw_reset(struct mic_device *mdev)
-{
-	u32 reset_reg;
-	u32 rgcr = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_RGCR;
-	struct mic_mw *mw = &mdev->mmio;
-
-	/* Ensure that the reset is ordered w.r.t. previous loads and stores */
-	mb();
-	/* Trigger reset */
-	reset_reg = mic_mmio_read(mw, rgcr);
-	reset_reg |= 0x1;
-	mic_mmio_write(mw, reset_reg, rgcr);
-	/*
-	 * It seems we really want to delay at least 1 second
-	 * after touching reset to prevent a lot of problems.
-	 */
-	msleep(1000);
-}
-
-/**
- * mic_x100_load_command_line - Load command line to MIC.
- * @mdev: pointer to mic_device instance
- * @fw: the firmware image
- *
- * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
- */
-static int
-mic_x100_load_command_line(struct mic_device *mdev, const struct firmware *fw)
-{
-	u32 len = 0;
-	u32 boot_mem;
-	char *buf;
-	void __iomem *cmd_line_va = mdev->aper.va + mdev->bootaddr + fw->size;
-#define CMDLINE_SIZE 2048
-
-	boot_mem = mdev->aper.len >> 20;
-	buf = kzalloc(CMDLINE_SIZE, GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-
-	len += scnprintf(buf, CMDLINE_SIZE - len,
-		" mem=%dM", boot_mem);
-	if (mdev->cosm_dev->cmdline)
-		scnprintf(buf + len, CMDLINE_SIZE - len, " %s",
-			 mdev->cosm_dev->cmdline);
-	memcpy_toio(cmd_line_va, buf, strlen(buf) + 1);
-	kfree(buf);
-	return 0;
-}
-
-/**
- * mic_x100_load_ramdisk - Load ramdisk to MIC.
- * @mdev: pointer to mic_device instance
- *
- * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
- */
-static int
-mic_x100_load_ramdisk(struct mic_device *mdev)
-{
-	const struct firmware *fw;
-	int rc;
-	struct boot_params __iomem *bp = mdev->aper.va + mdev->bootaddr;
-
-	rc = request_firmware(&fw, mdev->cosm_dev->ramdisk, &mdev->pdev->dev);
-	if (rc < 0) {
-		dev_err(&mdev->pdev->dev,
-			"ramdisk request_firmware failed: %d %s\n",
-			rc, mdev->cosm_dev->ramdisk);
-		goto error;
-	}
-	/*
-	 * Typically the bootaddr for card OS is 64M
-	 * so copy over the ramdisk @ 128M.
-	 */
-	memcpy_toio(mdev->aper.va + (mdev->bootaddr << 1), fw->data, fw->size);
-	iowrite32(mdev->bootaddr << 1, &bp->hdr.ramdisk_image);
-	iowrite32(fw->size, &bp->hdr.ramdisk_size);
-	release_firmware(fw);
-error:
-	return rc;
-}
-
-/**
- * mic_x100_get_boot_addr - Get MIC boot address.
- * @mdev: pointer to mic_device instance
- *
- * This function is called during firmware load to determine
- * the address at which the OS should be downloaded in card
- * memory i.e. GDDR.
- * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
- */
-static int
-mic_x100_get_boot_addr(struct mic_device *mdev)
-{
-	u32 scratch2, boot_addr;
-	int rc = 0;
-
-	scratch2 = mdev->ops->read_spad(mdev, MIC_X100_DOWNLOAD_INFO);
-	boot_addr = MIC_X100_SPAD2_DOWNLOAD_ADDR(scratch2);
-	dev_dbg(&mdev->pdev->dev, "%s %d boot_addr 0x%x\n",
-		__func__, __LINE__, boot_addr);
-	if (boot_addr > (1 << 31)) {
-		dev_err(&mdev->pdev->dev,
-			"incorrect bootaddr 0x%x\n",
-			boot_addr);
-		rc = -EINVAL;
-		goto error;
-	}
-	mdev->bootaddr = boot_addr;
-error:
-	return rc;
-}
-
-/**
- * mic_x100_load_firmware - Load firmware to MIC.
- * @mdev: pointer to mic_device instance
- * @buf: buffer containing boot string including firmware/ramdisk path.
- *
- * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
- */
-static int
-mic_x100_load_firmware(struct mic_device *mdev, const char *buf)
-{
-	int rc;
-	const struct firmware *fw;
-
-	rc = mic_x100_get_boot_addr(mdev);
-	if (rc)
-		return rc;
-	/* load OS */
-	rc = request_firmware(&fw, mdev->cosm_dev->firmware, &mdev->pdev->dev);
-	if (rc < 0) {
-		dev_err(&mdev->pdev->dev,
-			"ramdisk request_firmware failed: %d %s\n",
-			rc, mdev->cosm_dev->firmware);
-		return rc;
-	}
-	if (mdev->bootaddr > mdev->aper.len - fw->size) {
-		rc = -EINVAL;
-		dev_err(&mdev->pdev->dev, "%s %d rc %d bootaddr 0x%x\n",
-			__func__, __LINE__, rc, mdev->bootaddr);
-		goto error;
-	}
-	memcpy_toio(mdev->aper.va + mdev->bootaddr, fw->data, fw->size);
-	mdev->ops->write_spad(mdev, MIC_X100_FW_SIZE, fw->size);
-	if (!strcmp(mdev->cosm_dev->bootmode, "flash")) {
-		rc = -EINVAL;
-		dev_err(&mdev->pdev->dev, "%s %d rc %d\n",
-			__func__, __LINE__, rc);
-		goto error;
-	}
-	/* load command line */
-	rc = mic_x100_load_command_line(mdev, fw);
-	if (rc) {
-		dev_err(&mdev->pdev->dev, "%s %d rc %d\n",
-			__func__, __LINE__, rc);
-		goto error;
-	}
-	release_firmware(fw);
-	/* load ramdisk */
-	if (mdev->cosm_dev->ramdisk)
-		rc = mic_x100_load_ramdisk(mdev);
-
-	return rc;
-
-error:
-	release_firmware(fw);
-	return rc;
-}
-
-/**
- * mic_x100_get_postcode - Get postcode status from firmware.
- * @mdev: pointer to mic_device instance
- *
- * RETURNS: postcode.
- */
-static u32 mic_x100_get_postcode(struct mic_device *mdev)
-{
-	return mic_mmio_read(&mdev->mmio, MIC_X100_POSTCODE);
-}
-
-/**
- * mic_x100_smpt_set - Update an SMPT entry with a DMA address.
- * @mdev: pointer to mic_device instance
- * @dma_addr: DMA address to use
- * @index: entry to write to
- *
- * RETURNS: none.
- */
-static void
-mic_x100_smpt_set(struct mic_device *mdev, dma_addr_t dma_addr, u8 index)
-{
-#define SNOOP_ON	(0 << 0)
-#define SNOOP_OFF	(1 << 0)
-/*
- * Sbox Smpt Reg Bits:
- * Bits	31:2	Host address
- * Bits	1	RSVD
- * Bits	0	No snoop
- */
-#define BUILD_SMPT(NO_SNOOP, HOST_ADDR)  \
-	(u32)(((HOST_ADDR) << 2) | ((NO_SNOOP) & 0x01))
-
-	uint32_t smpt_reg_val = BUILD_SMPT(SNOOP_ON,
-			dma_addr >> mdev->smpt->info.page_shift);
-	mic_mmio_write(&mdev->mmio, smpt_reg_val,
-		       MIC_X100_SBOX_BASE_ADDRESS +
-		       MIC_X100_SBOX_SMPT00 + (4 * index));
-}
-
-/**
- * mic_x100_smpt_hw_init - Initialize SMPT X100 specific fields.
- * @mdev: pointer to mic_device instance
- *
- * RETURNS: none.
- */
-static void mic_x100_smpt_hw_init(struct mic_device *mdev)
-{
-	struct mic_smpt_hw_info *info = &mdev->smpt->info;
-
-	info->num_reg = 32;
-	info->page_shift = 34;
-	info->page_size = (1ULL << info->page_shift);
-	info->base = 0x8000000000ULL;
-}
-
-struct mic_smpt_ops mic_x100_smpt_ops = {
-	.init = mic_x100_smpt_hw_init,
-	.set = mic_x100_smpt_set,
-};
-
-static bool mic_x100_dma_filter(struct dma_chan *chan, void *param)
-{
-	if (chan->device->dev->parent == (struct device *)param)
-		return true;
-	return false;
-}
-
-struct mic_hw_ops mic_x100_ops = {
-	.aper_bar = MIC_X100_APER_BAR,
-	.mmio_bar = MIC_X100_MMIO_BAR,
-	.read_spad = mic_x100_read_spad,
-	.write_spad = mic_x100_write_spad,
-	.send_intr = mic_x100_send_intr,
-	.ack_interrupt = mic_x100_ack_interrupt,
-	.intr_workarounds = mic_x100_intr_workarounds,
-	.reset = mic_x100_hw_reset,
-	.reset_fw_ready = mic_x100_reset_fw_ready,
-	.is_fw_ready = mic_x100_is_fw_ready,
-	.send_firmware_intr = mic_x100_send_firmware_intr,
-	.load_mic_fw = mic_x100_load_firmware,
-	.get_postcode = mic_x100_get_postcode,
-	.dma_filter = mic_x100_dma_filter,
-};
-
-struct mic_hw_intr_ops mic_x100_intr_ops = {
-	.intr_init = mic_x100_hw_intr_init,
-	.enable_interrupts = mic_x100_enable_interrupts,
-	.disable_interrupts = mic_x100_disable_interrupts,
-	.program_msi_to_src_map = mic_x100_program_msi_to_src_map,
-	.read_msi_to_src_map = mic_x100_read_msi_to_src_map,
-};
diff --git a/drivers/misc/mic/host/mic_x100.h b/drivers/misc/mic/host/mic_x100.h
deleted file mode 100644
index aebcaed6fa72..000000000000
--- a/drivers/misc/mic/host/mic_x100.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * Intel MIC Host driver.
- */
-#ifndef _MIC_X100_HW_H_
-#define _MIC_X100_HW_H_
-
-#define MIC_X100_PCI_DEVICE_2250 0x2250
-#define MIC_X100_PCI_DEVICE_2251 0x2251
-#define MIC_X100_PCI_DEVICE_2252 0x2252
-#define MIC_X100_PCI_DEVICE_2253 0x2253
-#define MIC_X100_PCI_DEVICE_2254 0x2254
-#define MIC_X100_PCI_DEVICE_2255 0x2255
-#define MIC_X100_PCI_DEVICE_2256 0x2256
-#define MIC_X100_PCI_DEVICE_2257 0x2257
-#define MIC_X100_PCI_DEVICE_2258 0x2258
-#define MIC_X100_PCI_DEVICE_2259 0x2259
-#define MIC_X100_PCI_DEVICE_225a 0x225a
-#define MIC_X100_PCI_DEVICE_225b 0x225b
-#define MIC_X100_PCI_DEVICE_225c 0x225c
-#define MIC_X100_PCI_DEVICE_225d 0x225d
-#define MIC_X100_PCI_DEVICE_225e 0x225e
-
-#define MIC_X100_APER_BAR 0
-#define MIC_X100_MMIO_BAR 4
-
-#define MIC_X100_SBOX_BASE_ADDRESS 0x00010000
-#define MIC_X100_SBOX_SPAD0 0x0000AB20
-#define MIC_X100_SBOX_SICR0_DBR(x) ((x) & 0xf)
-#define MIC_X100_SBOX_SICR0_DMA(x) (((x) >> 8) & 0xff)
-#define MIC_X100_SBOX_SICE0_DBR(x) ((x) & 0xf)
-#define MIC_X100_SBOX_DBR_BITS(x) ((x) & 0xf)
-#define MIC_X100_SBOX_SICE0_DMA(x) (((x) >> 8) & 0xff)
-#define MIC_X100_SBOX_DMA_BITS(x) (((x) & 0xff) << 8)
-
-#define MIC_X100_SBOX_APICICR0 0x0000A9D0
-#define MIC_X100_SBOX_SICR0 0x00009004
-#define MIC_X100_SBOX_SICE0 0x0000900C
-#define MIC_X100_SBOX_SICC0 0x00009010
-#define MIC_X100_SBOX_SIAC0 0x00009014
-#define MIC_X100_SBOX_MSIXPBACR 0x00009084
-#define MIC_X100_SBOX_MXAR0 0x00009044
-#define MIC_X100_SBOX_SMPT00 0x00003100
-#define MIC_X100_SBOX_RDMASR0 0x0000B180
-
-#define MIC_X100_DOORBELL_IDX_START 0
-#define MIC_X100_NUM_DOORBELL 4
-#define MIC_X100_DMA_IDX_START 8
-#define MIC_X100_NUM_DMA 8
-#define MIC_X100_ERR_IDX_START 30
-#define MIC_X100_NUM_ERR 1
-
-#define MIC_X100_NUM_SBOX_IRQ 8
-#define MIC_X100_NUM_RDMASR_IRQ 8
-#define MIC_X100_RDMASR_IRQ_BASE 17
-#define MIC_X100_SPAD2_DOWNLOAD_STATUS(x) ((x) & 0x1)
-#define MIC_X100_SPAD2_APIC_ID(x)	(((x) >> 1) & 0x1ff)
-#define MIC_X100_SPAD2_DOWNLOAD_ADDR(x) ((x) & 0xfffff000)
-#define MIC_X100_SBOX_APICICR7 0x0000AA08
-#define MIC_X100_SBOX_RGCR 0x00004010
-#define MIC_X100_SBOX_SDBIC0 0x0000CC90
-#define MIC_X100_DOWNLOAD_INFO 2
-#define MIC_X100_FW_SIZE 5
-#define MIC_X100_POSTCODE 0x242c
-
-/* Host->Card(bootstrap) Interrupt Vector */
-#define MIC_X100_BSP_INTERRUPT_VECTOR 229
-
-extern struct mic_hw_ops mic_x100_ops;
-extern struct mic_smpt_ops mic_x100_smpt_ops;
-extern struct mic_hw_intr_ops mic_x100_intr_ops;
-
-#endif
diff --git a/drivers/misc/mic/scif/Makefile b/drivers/misc/mic/scif/Makefile
deleted file mode 100644
index ff372555d118..000000000000
--- a/drivers/misc/mic/scif/Makefile
+++ /dev/null
@@ -1,21 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile - SCIF driver.
-# Copyright(c) 2014, Intel Corporation.
-#
-obj-$(CONFIG_SCIF) += scif.o
-scif-objs := scif_main.o
-scif-objs += scif_peer_bus.o
-scif-objs += scif_ports.o
-scif-objs += scif_debugfs.o
-scif-objs += scif_fd.o
-scif-objs += scif_api.o
-scif-objs += scif_epd.o
-scif-objs += scif_rb.o
-scif-objs += scif_nodeqp.o
-scif-objs += scif_nm.o
-scif-objs += scif_dma.o
-scif-objs += scif_fence.o
-scif-objs += scif_mmap.o
-scif-objs += scif_rma.o
-scif-objs += scif_rma_list.o
diff --git a/drivers/misc/mic/scif/scif_api.c b/drivers/misc/mic/scif/scif_api.c
deleted file mode 100644
index 304d6c833712..000000000000
--- a/drivers/misc/mic/scif/scif_api.c
+++ /dev/null
@@ -1,1485 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Intel SCIF driver.
- */
-#include <linux/scif.h>
-#include "scif_main.h"
-#include "scif_map.h"
-
-static const char * const scif_ep_states[] = {
-	"Unbound",
-	"Bound",
-	"Listening",
-	"Connected",
-	"Connecting",
-	"Mapping",
-	"Closing",
-	"Close Listening",
-	"Disconnected",
-	"Zombie"};
-
-enum conn_async_state {
-	ASYNC_CONN_IDLE = 1,	/* ep setup for async connect */
-	ASYNC_CONN_INPROGRESS,	/* async connect in progress */
-	ASYNC_CONN_FLUSH_WORK	/* async work flush in progress  */
-};
-
-/*
- * File operations for anonymous inode file associated with a SCIF endpoint,
- * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the
- * poll API in the kernel and these take in a struct file *. Since a struct
- * file is not available to kernel mode SCIF, it uses an anonymous file for
- * this purpose.
- */
-const struct file_operations scif_anon_fops = {
-	.owner = THIS_MODULE,
-};
-
-scif_epd_t scif_open(void)
-{
-	struct scif_endpt *ep;
-	int err;
-
-	might_sleep();
-	ep = kzalloc(sizeof(*ep), GFP_KERNEL);
-	if (!ep)
-		goto err_ep_alloc;
-
-	ep->qp_info.qp = kzalloc(sizeof(*ep->qp_info.qp), GFP_KERNEL);
-	if (!ep->qp_info.qp)
-		goto err_qp_alloc;
-
-	err = scif_anon_inode_getfile(ep);
-	if (err)
-		goto err_anon_inode;
-
-	spin_lock_init(&ep->lock);
-	mutex_init(&ep->sendlock);
-	mutex_init(&ep->recvlock);
-
-	scif_rma_ep_init(ep);
-	ep->state = SCIFEP_UNBOUND;
-	dev_dbg(scif_info.mdev.this_device,
-		"SCIFAPI open: ep %p success\n", ep);
-	return ep;
-
-err_anon_inode:
-	kfree(ep->qp_info.qp);
-err_qp_alloc:
-	kfree(ep);
-err_ep_alloc:
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(scif_open);
-
-/*
- * scif_disconnect_ep - Disconnects the endpoint if found
- * @epd: The end point returned from scif_open()
- */
-static struct scif_endpt *scif_disconnect_ep(struct scif_endpt *ep)
-{
-	struct scifmsg msg;
-	struct scif_endpt *fep = NULL;
-	struct scif_endpt *tmpep;
-	struct list_head *pos, *tmpq;
-	int err;
-
-	/*
-	 * Wake up any threads blocked in send()/recv() before closing
-	 * out the connection. Grabbing and releasing the send/recv lock
-	 * will ensure that any blocked senders/receivers have exited for
-	 * Ring 0 endpoints. It is a Ring 0 bug to call send/recv after
-	 * close. Ring 3 endpoints are not affected since close will not
-	 * be called while there are IOCTLs executing.
-	 */
-	wake_up_interruptible(&ep->sendwq);
-	wake_up_interruptible(&ep->recvwq);
-	mutex_lock(&ep->sendlock);
-	mutex_unlock(&ep->sendlock);
-	mutex_lock(&ep->recvlock);
-	mutex_unlock(&ep->recvlock);
-
-	/* Remove from the connected list */
-	mutex_lock(&scif_info.connlock);
-	list_for_each_safe(pos, tmpq, &scif_info.connected) {
-		tmpep = list_entry(pos, struct scif_endpt, list);
-		if (tmpep == ep) {
-			list_del(pos);
-			fep = tmpep;
-			spin_lock(&ep->lock);
-			break;
-		}
-	}
-
-	if (!fep) {
-		/*
-		 * The other side has completed the disconnect before
-		 * the end point can be removed from the list. Therefore
-		 * the ep lock is not locked, traverse the disconnected
-		 * list to find the endpoint and release the conn lock.
-		 */
-		list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
-			tmpep = list_entry(pos, struct scif_endpt, list);
-			if (tmpep == ep) {
-				list_del(pos);
-				break;
-			}
-		}
-		mutex_unlock(&scif_info.connlock);
-		return NULL;
-	}
-
-	init_completion(&ep->discon);
-	msg.uop = SCIF_DISCNCT;
-	msg.src = ep->port;
-	msg.dst = ep->peer;
-	msg.payload[0] = (u64)ep;
-	msg.payload[1] = ep->remote_ep;
-
-	err = scif_nodeqp_send(ep->remote_dev, &msg);
-	spin_unlock(&ep->lock);
-	mutex_unlock(&scif_info.connlock);
-
-	if (!err)
-		/* Wait for the remote node to respond with SCIF_DISCNT_ACK */
-		wait_for_completion_timeout(&ep->discon,
-					    SCIF_NODE_ALIVE_TIMEOUT);
-	return ep;
-}
-
-int scif_close(scif_epd_t epd)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-	struct scif_endpt *tmpep;
-	struct list_head *pos, *tmpq;
-	enum scif_epd_state oldstate;
-	bool flush_conn;
-
-	dev_dbg(scif_info.mdev.this_device, "SCIFAPI close: ep %p %s\n",
-		ep, scif_ep_states[ep->state]);
-	might_sleep();
-	spin_lock(&ep->lock);
-	flush_conn = (ep->conn_async_state == ASYNC_CONN_INPROGRESS);
-	spin_unlock(&ep->lock);
-
-	if (flush_conn)
-		flush_work(&scif_info.conn_work);
-
-	spin_lock(&ep->lock);
-	oldstate = ep->state;
-
-	ep->state = SCIFEP_CLOSING;
-
-	switch (oldstate) {
-	case SCIFEP_ZOMBIE:
-		dev_err(scif_info.mdev.this_device,
-			"SCIFAPI close: zombie state unexpected\n");
-		fallthrough;
-	case SCIFEP_DISCONNECTED:
-		spin_unlock(&ep->lock);
-		scif_unregister_all_windows(epd);
-		/* Remove from the disconnected list */
-		mutex_lock(&scif_info.connlock);
-		list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
-			tmpep = list_entry(pos, struct scif_endpt, list);
-			if (tmpep == ep) {
-				list_del(pos);
-				break;
-			}
-		}
-		mutex_unlock(&scif_info.connlock);
-		break;
-	case SCIFEP_UNBOUND:
-	case SCIFEP_BOUND:
-	case SCIFEP_CONNECTING:
-		spin_unlock(&ep->lock);
-		break;
-	case SCIFEP_MAPPING:
-	case SCIFEP_CONNECTED:
-	case SCIFEP_CLOSING:
-	{
-		spin_unlock(&ep->lock);
-		scif_unregister_all_windows(epd);
-		scif_disconnect_ep(ep);
-		break;
-	}
-	case SCIFEP_LISTENING:
-	case SCIFEP_CLLISTEN:
-	{
-		struct scif_conreq *conreq;
-		struct scifmsg msg;
-		struct scif_endpt *aep;
-
-		spin_unlock(&ep->lock);
-		mutex_lock(&scif_info.eplock);
-
-		/* remove from listen list */
-		list_for_each_safe(pos, tmpq, &scif_info.listen) {
-			tmpep = list_entry(pos, struct scif_endpt, list);
-			if (tmpep == ep)
-				list_del(pos);
-		}
-		/* Remove any dangling accepts */
-		while (ep->acceptcnt) {
-			aep = list_first_entry(&ep->li_accept,
-					       struct scif_endpt, liacceptlist);
-			list_del(&aep->liacceptlist);
-			scif_put_port(aep->port.port);
-			list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
-				tmpep = list_entry(pos, struct scif_endpt,
-						   miacceptlist);
-				if (tmpep == aep) {
-					list_del(pos);
-					break;
-				}
-			}
-			mutex_unlock(&scif_info.eplock);
-			mutex_lock(&scif_info.connlock);
-			list_for_each_safe(pos, tmpq, &scif_info.connected) {
-				tmpep = list_entry(pos,
-						   struct scif_endpt, list);
-				if (tmpep == aep) {
-					list_del(pos);
-					break;
-				}
-			}
-			list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
-				tmpep = list_entry(pos,
-						   struct scif_endpt, list);
-				if (tmpep == aep) {
-					list_del(pos);
-					break;
-				}
-			}
-			mutex_unlock(&scif_info.connlock);
-			scif_teardown_ep(aep);
-			mutex_lock(&scif_info.eplock);
-			scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD);
-			ep->acceptcnt--;
-		}
-
-		spin_lock(&ep->lock);
-		mutex_unlock(&scif_info.eplock);
-
-		/* Remove and reject any pending connection requests. */
-		while (ep->conreqcnt) {
-			conreq = list_first_entry(&ep->conlist,
-						  struct scif_conreq, list);
-			list_del(&conreq->list);
-
-			msg.uop = SCIF_CNCT_REJ;
-			msg.dst.node = conreq->msg.src.node;
-			msg.dst.port = conreq->msg.src.port;
-			msg.payload[0] = conreq->msg.payload[0];
-			msg.payload[1] = conreq->msg.payload[1];
-			/*
-			 * No Error Handling on purpose for scif_nodeqp_send().
-			 * If the remote node is lost we still want free the
-			 * connection requests on the self node.
-			 */
-			scif_nodeqp_send(&scif_dev[conreq->msg.src.node],
-					 &msg);
-			ep->conreqcnt--;
-			kfree(conreq);
-		}
-
-		spin_unlock(&ep->lock);
-		/* If a kSCIF accept is waiting wake it up */
-		wake_up_interruptible(&ep->conwq);
-		break;
-	}
-	}
-	scif_put_port(ep->port.port);
-	scif_anon_inode_fput(ep);
-	scif_teardown_ep(ep);
-	scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(scif_close);
-
-/**
- * scif_flush() - Wakes up any blocking accepts. The endpoint will no longer
- *			accept new connections.
- * @epd: The end point returned from scif_open()
- */
-int __scif_flush(scif_epd_t epd)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-
-	switch (ep->state) {
-	case SCIFEP_LISTENING:
-	{
-		ep->state = SCIFEP_CLLISTEN;
-
-		/* If an accept is waiting wake it up */
-		wake_up_interruptible(&ep->conwq);
-		break;
-	}
-	default:
-		break;
-	}
-	return 0;
-}
-
-int scif_bind(scif_epd_t epd, u16 pn)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-	int ret = 0;
-	int tmp;
-
-	dev_dbg(scif_info.mdev.this_device,
-		"SCIFAPI bind: ep %p %s requested port number %d\n",
-		ep, scif_ep_states[ep->state], pn);
-	if (pn) {
-		/*
-		 * Similar to IETF RFC 1700, SCIF ports below
-		 * SCIF_ADMIN_PORT_END can only be bound by system (or root)
-		 * processes or by processes executed by privileged users.
-		 */
-		if (pn < SCIF_ADMIN_PORT_END && !capable(CAP_SYS_ADMIN)) {
-			ret = -EACCES;
-			goto scif_bind_admin_exit;
-		}
-	}
-
-	spin_lock(&ep->lock);
-	if (ep->state == SCIFEP_BOUND) {
-		ret = -EINVAL;
-		goto scif_bind_exit;
-	} else if (ep->state != SCIFEP_UNBOUND) {
-		ret = -EISCONN;
-		goto scif_bind_exit;
-	}
-
-	if (pn) {
-		tmp = scif_rsrv_port(pn);
-		if (tmp != pn) {
-			ret = -EINVAL;
-			goto scif_bind_exit;
-		}
-	} else {
-		ret = scif_get_new_port();
-		if (ret < 0)
-			goto scif_bind_exit;
-		pn = ret;
-	}
-
-	ep->state = SCIFEP_BOUND;
-	ep->port.node = scif_info.nodeid;
-	ep->port.port = pn;
-	ep->conn_async_state = ASYNC_CONN_IDLE;
-	ret = pn;
-	dev_dbg(scif_info.mdev.this_device,
-		"SCIFAPI bind: bound to port number %d\n", pn);
-scif_bind_exit:
-	spin_unlock(&ep->lock);
-scif_bind_admin_exit:
-	return ret;
-}
-EXPORT_SYMBOL_GPL(scif_bind);
-
-int scif_listen(scif_epd_t epd, int backlog)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-
-	dev_dbg(scif_info.mdev.this_device,
-		"SCIFAPI listen: ep %p %s\n", ep, scif_ep_states[ep->state]);
-	spin_lock(&ep->lock);
-	switch (ep->state) {
-	case SCIFEP_ZOMBIE:
-	case SCIFEP_CLOSING:
-	case SCIFEP_CLLISTEN:
-	case SCIFEP_UNBOUND:
-	case SCIFEP_DISCONNECTED:
-		spin_unlock(&ep->lock);
-		return -EINVAL;
-	case SCIFEP_LISTENING:
-	case SCIFEP_CONNECTED:
-	case SCIFEP_CONNECTING:
-	case SCIFEP_MAPPING:
-		spin_unlock(&ep->lock);
-		return -EISCONN;
-	case SCIFEP_BOUND:
-		break;
-	}
-
-	ep->state = SCIFEP_LISTENING;
-	ep->backlog = backlog;
-
-	ep->conreqcnt = 0;
-	ep->acceptcnt = 0;
-	INIT_LIST_HEAD(&ep->conlist);
-	init_waitqueue_head(&ep->conwq);
-	INIT_LIST_HEAD(&ep->li_accept);
-	spin_unlock(&ep->lock);
-
-	/*
-	 * Listen status is complete so delete the qp information not needed
-	 * on a listen before placing on the list of listening ep's
-	 */
-	scif_teardown_ep(ep);
-	ep->qp_info.qp = NULL;
-
-	mutex_lock(&scif_info.eplock);
-	list_add_tail(&ep->list, &scif_info.listen);
-	mutex_unlock(&scif_info.eplock);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(scif_listen);
-
-/*
- ************************************************************************
- * SCIF connection flow:
- *
- * 1) A SCIF listening endpoint can call scif_accept(..) to wait for SCIF
- *	connections via a SCIF_CNCT_REQ message
- * 2) A SCIF endpoint can initiate a SCIF connection by calling
- *	scif_connect(..) which calls scif_setup_qp_connect(..) which
- *	allocates the local qp for the endpoint ring buffer and then sends
- *	a SCIF_CNCT_REQ to the remote node and waits for a SCIF_CNCT_GNT or
- *	a SCIF_CNCT_REJ message
- * 3) The peer node handles a SCIF_CNCT_REQ via scif_cnctreq_resp(..) which
- *	wakes up any threads blocked in step 1 or sends a SCIF_CNCT_REJ
- *	message otherwise
- * 4) A thread blocked waiting for incoming connections allocates its local
- *	endpoint QP and ring buffer following which it sends a SCIF_CNCT_GNT
- *	and waits for a SCIF_CNCT_GNT(N)ACK. If the allocation fails then
- *	the node sends a SCIF_CNCT_REJ message
- * 5) Upon receipt of a SCIF_CNCT_GNT or a SCIF_CNCT_REJ message the
- *	connecting endpoint is woken up as part of handling
- *	scif_cnctgnt_resp(..) following which it maps the remote endpoints'
- *	QP, updates its outbound QP and sends a SCIF_CNCT_GNTACK message on
- *	success or a SCIF_CNCT_GNTNACK message on failure and completes
- *	the scif_connect(..) API
- * 6) Upon receipt of a SCIF_CNCT_GNT(N)ACK the accepting endpoint blocked
- *	in step 4 is woken up and completes the scif_accept(..) API
- * 7) The SCIF connection is now established between the two SCIF endpoints.
- */
-static int scif_conn_func(struct scif_endpt *ep)
-{
-	int err = 0;
-	struct scifmsg msg;
-	struct device *spdev;
-
-	err = scif_reserve_dma_chan(ep);
-	if (err) {
-		dev_err(&ep->remote_dev->sdev->dev,
-			"%s %d err %d\n", __func__, __LINE__, err);
-		ep->state = SCIFEP_BOUND;
-		goto connect_error_simple;
-	}
-	/* Initiate the first part of the endpoint QP setup */
-	err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset,
-				    SCIF_ENDPT_QP_SIZE, ep->remote_dev);
-	if (err) {
-		dev_err(&ep->remote_dev->sdev->dev,
-			"%s err %d qp_offset 0x%llx\n",
-			__func__, err, ep->qp_info.qp_offset);
-		ep->state = SCIFEP_BOUND;
-		goto connect_error_simple;
-	}
-
-	spdev = scif_get_peer_dev(ep->remote_dev);
-	if (IS_ERR(spdev)) {
-		err = PTR_ERR(spdev);
-		goto cleanup_qp;
-	}
-	/* Format connect message and send it */
-	msg.src = ep->port;
-	msg.dst = ep->conn_port;
-	msg.uop = SCIF_CNCT_REQ;
-	msg.payload[0] = (u64)ep;
-	msg.payload[1] = ep->qp_info.qp_offset;
-	err = _scif_nodeqp_send(ep->remote_dev, &msg);
-	if (err)
-		goto connect_error_dec;
-	scif_put_peer_dev(spdev);
-	/*
-	 * Wait for the remote node to respond with SCIF_CNCT_GNT or
-	 * SCIF_CNCT_REJ message.
-	 */
-	err = wait_event_timeout(ep->conwq, ep->state != SCIFEP_CONNECTING,
-				 SCIF_NODE_ALIVE_TIMEOUT);
-	if (!err) {
-		dev_err(&ep->remote_dev->sdev->dev,
-			"%s %d timeout\n", __func__, __LINE__);
-		ep->state = SCIFEP_BOUND;
-	}
-	spdev = scif_get_peer_dev(ep->remote_dev);
-	if (IS_ERR(spdev)) {
-		err = PTR_ERR(spdev);
-		goto cleanup_qp;
-	}
-	if (ep->state == SCIFEP_MAPPING) {
-		err = scif_setup_qp_connect_response(ep->remote_dev,
-						     ep->qp_info.qp,
-						     ep->qp_info.gnt_pld);
-		/*
-		 * If the resource to map the queue are not available then
-		 * we need to tell the other side to terminate the accept
-		 */
-		if (err) {
-			dev_err(&ep->remote_dev->sdev->dev,
-				"%s %d err %d\n", __func__, __LINE__, err);
-			msg.uop = SCIF_CNCT_GNTNACK;
-			msg.payload[0] = ep->remote_ep;
-			_scif_nodeqp_send(ep->remote_dev, &msg);
-			ep->state = SCIFEP_BOUND;
-			goto connect_error_dec;
-		}
-
-		msg.uop = SCIF_CNCT_GNTACK;
-		msg.payload[0] = ep->remote_ep;
-		err = _scif_nodeqp_send(ep->remote_dev, &msg);
-		if (err) {
-			ep->state = SCIFEP_BOUND;
-			goto connect_error_dec;
-		}
-		ep->state = SCIFEP_CONNECTED;
-		mutex_lock(&scif_info.connlock);
-		list_add_tail(&ep->list, &scif_info.connected);
-		mutex_unlock(&scif_info.connlock);
-		dev_dbg(&ep->remote_dev->sdev->dev,
-			"SCIFAPI connect: ep %p connected\n", ep);
-	} else if (ep->state == SCIFEP_BOUND) {
-		dev_dbg(&ep->remote_dev->sdev->dev,
-			"SCIFAPI connect: ep %p connection refused\n", ep);
-		err = -ECONNREFUSED;
-		goto connect_error_dec;
-	}
-	scif_put_peer_dev(spdev);
-	return err;
-connect_error_dec:
-	scif_put_peer_dev(spdev);
-cleanup_qp:
-	scif_cleanup_ep_qp(ep);
-connect_error_simple:
-	return err;
-}
-
-/*
- * scif_conn_handler:
- *
- * Workqueue handler for servicing non-blocking SCIF connect
- *
- */
-void scif_conn_handler(struct work_struct *work)
-{
-	struct scif_endpt *ep;
-
-	do {
-		ep = NULL;
-		spin_lock(&scif_info.nb_connect_lock);
-		if (!list_empty(&scif_info.nb_connect_list)) {
-			ep = list_first_entry(&scif_info.nb_connect_list,
-					      struct scif_endpt, conn_list);
-			list_del(&ep->conn_list);
-		}
-		spin_unlock(&scif_info.nb_connect_lock);
-		if (ep) {
-			ep->conn_err = scif_conn_func(ep);
-			wake_up_interruptible(&ep->conn_pend_wq);
-		}
-	} while (ep);
-}
-
-int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-	int err = 0;
-	struct scif_dev *remote_dev;
-	struct device *spdev;
-
-	dev_dbg(scif_info.mdev.this_device, "SCIFAPI connect: ep %p %s\n", ep,
-		scif_ep_states[ep->state]);
-
-	if (!scif_dev || dst->node > scif_info.maxid)
-		return -ENODEV;
-
-	might_sleep();
-
-	remote_dev = &scif_dev[dst->node];
-	spdev = scif_get_peer_dev(remote_dev);
-	if (IS_ERR(spdev)) {
-		err = PTR_ERR(spdev);
-		return err;
-	}
-
-	spin_lock(&ep->lock);
-	switch (ep->state) {
-	case SCIFEP_ZOMBIE:
-	case SCIFEP_CLOSING:
-		err = -EINVAL;
-		break;
-	case SCIFEP_DISCONNECTED:
-		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
-			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
-		else
-			err = -EINVAL;
-		break;
-	case SCIFEP_LISTENING:
-	case SCIFEP_CLLISTEN:
-		err = -EOPNOTSUPP;
-		break;
-	case SCIFEP_CONNECTING:
-	case SCIFEP_MAPPING:
-		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
-			err = -EINPROGRESS;
-		else
-			err = -EISCONN;
-		break;
-	case SCIFEP_CONNECTED:
-		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
-			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
-		else
-			err = -EISCONN;
-		break;
-	case SCIFEP_UNBOUND:
-		err = scif_get_new_port();
-		if (err < 0)
-			break;
-		ep->port.port = err;
-		ep->port.node = scif_info.nodeid;
-		ep->conn_async_state = ASYNC_CONN_IDLE;
-		fallthrough;
-	case SCIFEP_BOUND:
-		/*
-		 * If a non-blocking connect has been already initiated
-		 * (conn_async_state is either ASYNC_CONN_INPROGRESS or
-		 * ASYNC_CONN_FLUSH_WORK), the end point could end up in
-		 * SCIF_BOUND due an error in the connection process
-		 * (e.g., connection refused) If conn_async_state is
-		 * ASYNC_CONN_INPROGRESS - transition to ASYNC_CONN_FLUSH_WORK
-		 * so that the error status can be collected. If the state is
-		 * already ASYNC_CONN_FLUSH_WORK - then set the error to
-		 * EINPROGRESS since some other thread is waiting to collect
-		 * error status.
-		 */
-		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
-			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
-		} else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
-			err = -EINPROGRESS;
-		} else {
-			ep->conn_port = *dst;
-			init_waitqueue_head(&ep->sendwq);
-			init_waitqueue_head(&ep->recvwq);
-			init_waitqueue_head(&ep->conwq);
-			ep->conn_async_state = 0;
-
-			if (unlikely(non_block))
-				ep->conn_async_state = ASYNC_CONN_INPROGRESS;
-		}
-		break;
-	}
-
-	if (err || ep->conn_async_state == ASYNC_CONN_FLUSH_WORK)
-			goto connect_simple_unlock1;
-
-	ep->state = SCIFEP_CONNECTING;
-	ep->remote_dev = &scif_dev[dst->node];
-	ep->qp_info.qp->magic = SCIFEP_MAGIC;
-	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
-		init_waitqueue_head(&ep->conn_pend_wq);
-		spin_lock(&scif_info.nb_connect_lock);
-		list_add_tail(&ep->conn_list, &scif_info.nb_connect_list);
-		spin_unlock(&scif_info.nb_connect_lock);
-		err = -EINPROGRESS;
-		schedule_work(&scif_info.conn_work);
-	}
-connect_simple_unlock1:
-	spin_unlock(&ep->lock);
-	scif_put_peer_dev(spdev);
-	if (err) {
-		return err;
-	} else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
-		flush_work(&scif_info.conn_work);
-		err = ep->conn_err;
-		spin_lock(&ep->lock);
-		ep->conn_async_state = ASYNC_CONN_IDLE;
-		spin_unlock(&ep->lock);
-	} else {
-		err = scif_conn_func(ep);
-	}
-	return err;
-}
-
-int scif_connect(scif_epd_t epd, struct scif_port_id *dst)
-{
-	return __scif_connect(epd, dst, false);
-}
-EXPORT_SYMBOL_GPL(scif_connect);
-
-/*
- * scif_accept() - Accept a connection request from the remote node
- *
- * The function accepts a connection request from the remote node.  Successful
- * complete is indicate by a new end point being created and passed back
- * to the caller for future reference.
- *
- * Upon successful complete a zero will be returned and the peer information
- * will be filled in.
- *
- * If the end point is not in the listening state -EINVAL will be returned.
- *
- * If during the connection sequence resource allocation fails the -ENOMEM
- * will be returned.
- *
- * If the function is called with the ASYNC flag set and no connection requests
- * are pending it will return -EAGAIN.
- *
- * If the remote side is not sending any connection requests the caller may
- * terminate this function with a signal.  If so a -EINTR will be returned.
- */
-int scif_accept(scif_epd_t epd, struct scif_port_id *peer,
-		scif_epd_t *newepd, int flags)
-{
-	struct scif_endpt *lep = (struct scif_endpt *)epd;
-	struct scif_endpt *cep;
-	struct scif_conreq *conreq;
-	struct scifmsg msg;
-	int err;
-	struct device *spdev;
-
-	dev_dbg(scif_info.mdev.this_device,
-		"SCIFAPI accept: ep %p %s\n", lep, scif_ep_states[lep->state]);
-
-	if (flags & ~SCIF_ACCEPT_SYNC)
-		return -EINVAL;
-
-	if (!peer || !newepd)
-		return -EINVAL;
-
-	might_sleep();
-	spin_lock(&lep->lock);
-	if (lep->state != SCIFEP_LISTENING) {
-		spin_unlock(&lep->lock);
-		return -EINVAL;
-	}
-
-	if (!lep->conreqcnt && !(flags & SCIF_ACCEPT_SYNC)) {
-		/* No connection request present and we do not want to wait */
-		spin_unlock(&lep->lock);
-		return -EAGAIN;
-	}
-
-	lep->files = current->files;
-retry_connection:
-	spin_unlock(&lep->lock);
-	/* Wait for the remote node to send us a SCIF_CNCT_REQ */
-	err = wait_event_interruptible(lep->conwq,
-				       (lep->conreqcnt ||
-				       (lep->state != SCIFEP_LISTENING)));
-	if (err)
-		return err;
-
-	if (lep->state != SCIFEP_LISTENING)
-		return -EINTR;
-
-	spin_lock(&lep->lock);
-
-	if (!lep->conreqcnt)
-		goto retry_connection;
-
-	/* Get the first connect request off the list */
-	conreq = list_first_entry(&lep->conlist, struct scif_conreq, list);
-	list_del(&conreq->list);
-	lep->conreqcnt--;
-	spin_unlock(&lep->lock);
-
-	/* Fill in the peer information */
-	peer->node = conreq->msg.src.node;
-	peer->port = conreq->msg.src.port;
-
-	cep = kzalloc(sizeof(*cep), GFP_KERNEL);
-	if (!cep) {
-		err = -ENOMEM;
-		goto scif_accept_error_epalloc;
-	}
-	spin_lock_init(&cep->lock);
-	mutex_init(&cep->sendlock);
-	mutex_init(&cep->recvlock);
-	cep->state = SCIFEP_CONNECTING;
-	cep->remote_dev = &scif_dev[peer->node];
-	cep->remote_ep = conreq->msg.payload[0];
-
-	scif_rma_ep_init(cep);
-
-	err = scif_reserve_dma_chan(cep);
-	if (err) {
-		dev_err(scif_info.mdev.this_device,
-			"%s %d err %d\n", __func__, __LINE__, err);
-		goto scif_accept_error_qpalloc;
-	}
-
-	cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL);
-	if (!cep->qp_info.qp) {
-		err = -ENOMEM;
-		goto scif_accept_error_qpalloc;
-	}
-
-	err = scif_anon_inode_getfile(cep);
-	if (err)
-		goto scif_accept_error_anon_inode;
-
-	cep->qp_info.qp->magic = SCIFEP_MAGIC;
-	spdev = scif_get_peer_dev(cep->remote_dev);
-	if (IS_ERR(spdev)) {
-		err = PTR_ERR(spdev);
-		goto scif_accept_error_map;
-	}
-	err = scif_setup_qp_accept(cep->qp_info.qp, &cep->qp_info.qp_offset,
-				   conreq->msg.payload[1], SCIF_ENDPT_QP_SIZE,
-				   cep->remote_dev);
-	if (err) {
-		dev_dbg(&cep->remote_dev->sdev->dev,
-			"SCIFAPI accept: ep %p new %p scif_setup_qp_accept %d qp_offset 0x%llx\n",
-			lep, cep, err, cep->qp_info.qp_offset);
-		scif_put_peer_dev(spdev);
-		goto scif_accept_error_map;
-	}
-
-	cep->port.node = lep->port.node;
-	cep->port.port = lep->port.port;
-	cep->peer.node = peer->node;
-	cep->peer.port = peer->port;
-	init_waitqueue_head(&cep->sendwq);
-	init_waitqueue_head(&cep->recvwq);
-	init_waitqueue_head(&cep->conwq);
-
-	msg.uop = SCIF_CNCT_GNT;
-	msg.src = cep->port;
-	msg.payload[0] = cep->remote_ep;
-	msg.payload[1] = cep->qp_info.qp_offset;
-	msg.payload[2] = (u64)cep;
-
-	err = _scif_nodeqp_send(cep->remote_dev, &msg);
-	scif_put_peer_dev(spdev);
-	if (err)
-		goto scif_accept_error_map;
-retry:
-	/* Wait for the remote node to respond with SCIF_CNCT_GNT(N)ACK */
-	err = wait_event_timeout(cep->conwq, cep->state != SCIFEP_CONNECTING,
-				 SCIF_NODE_ACCEPT_TIMEOUT);
-	if (!err && scifdev_alive(cep))
-		goto retry;
-	err = !err ? -ENODEV : 0;
-	if (err)
-		goto scif_accept_error_map;
-	kfree(conreq);
-
-	spin_lock(&cep->lock);
-
-	if (cep->state == SCIFEP_CLOSING) {
-		/*
-		 * Remote failed to allocate resources and NAKed the grant.
-		 * There is at this point nothing referencing the new end point.
-		 */
-		spin_unlock(&cep->lock);
-		scif_teardown_ep(cep);
-		kfree(cep);
-
-		/* If call with sync flag then go back and wait. */
-		if (flags & SCIF_ACCEPT_SYNC) {
-			spin_lock(&lep->lock);
-			goto retry_connection;
-		}
-		return -EAGAIN;
-	}
-
-	scif_get_port(cep->port.port);
-	*newepd = (scif_epd_t)cep;
-	spin_unlock(&cep->lock);
-	return 0;
-scif_accept_error_map:
-	scif_anon_inode_fput(cep);
-scif_accept_error_anon_inode:
-	scif_teardown_ep(cep);
-scif_accept_error_qpalloc:
-	kfree(cep);
-scif_accept_error_epalloc:
-	msg.uop = SCIF_CNCT_REJ;
-	msg.dst.node = conreq->msg.src.node;
-	msg.dst.port = conreq->msg.src.port;
-	msg.payload[0] = conreq->msg.payload[0];
-	msg.payload[1] = conreq->msg.payload[1];
-	scif_nodeqp_send(&scif_dev[conreq->msg.src.node], &msg);
-	kfree(conreq);
-	return err;
-}
-EXPORT_SYMBOL_GPL(scif_accept);
-
-/*
- * scif_msg_param_check:
- * @epd: The end point returned from scif_open()
- * @len: Length to receive
- * @flags: blocking or non blocking
- *
- * Validate parameters for messaging APIs scif_send(..)/scif_recv(..).
- */
-static inline int scif_msg_param_check(scif_epd_t epd, int len, int flags)
-{
-	int ret = -EINVAL;
-
-	if (len < 0)
-		goto err_ret;
-	if (flags && (!(flags & SCIF_RECV_BLOCK)))
-		goto err_ret;
-	ret = 0;
-err_ret:
-	return ret;
-}
-
-static int _scif_send(scif_epd_t epd, void *msg, int len, int flags)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-	struct scifmsg notif_msg;
-	int curr_xfer_len = 0, sent_len = 0, write_count;
-	int ret = 0;
-	struct scif_qp *qp = ep->qp_info.qp;
-
-	if (flags & SCIF_SEND_BLOCK)
-		might_sleep();
-
-	spin_lock(&ep->lock);
-	while (sent_len != len && SCIFEP_CONNECTED == ep->state) {
-		write_count = scif_rb_space(&qp->outbound_q);
-		if (write_count) {
-			/* Best effort to send as much data as possible */
-			curr_xfer_len = min(len - sent_len, write_count);
-			ret = scif_rb_write(&qp->outbound_q, msg,
-					    curr_xfer_len);
-			if (ret < 0)
-				break;
-			/* Success. Update write pointer */
-			scif_rb_commit(&qp->outbound_q);
-			/*
-			 * Send a notification to the peer about the
-			 * produced data message.
-			 */
-			notif_msg.src = ep->port;
-			notif_msg.uop = SCIF_CLIENT_SENT;
-			notif_msg.payload[0] = ep->remote_ep;
-			ret = _scif_nodeqp_send(ep->remote_dev, &notif_msg);
-			if (ret)
-				break;
-			sent_len += curr_xfer_len;
-			msg = msg + curr_xfer_len;
-			continue;
-		}
-		curr_xfer_len = min(len - sent_len, SCIF_ENDPT_QP_SIZE - 1);
-		/* Not enough RB space. return for the Non Blocking case */
-		if (!(flags & SCIF_SEND_BLOCK))
-			break;
-
-		spin_unlock(&ep->lock);
-		/* Wait for a SCIF_CLIENT_RCVD message in the Blocking case */
-		ret =
-		wait_event_interruptible(ep->sendwq,
-					 (SCIFEP_CONNECTED != ep->state) ||
-					 (scif_rb_space(&qp->outbound_q) >=
-					 curr_xfer_len));
-		spin_lock(&ep->lock);
-		if (ret)
-			break;
-	}
-	if (sent_len)
-		ret = sent_len;
-	else if (!ret && SCIFEP_CONNECTED != ep->state)
-		ret = SCIFEP_DISCONNECTED == ep->state ?
-			-ECONNRESET : -ENOTCONN;
-	spin_unlock(&ep->lock);
-	return ret;
-}
-
-static int _scif_recv(scif_epd_t epd, void *msg, int len, int flags)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-	struct scifmsg notif_msg;
-	int curr_recv_len = 0, remaining_len = len, read_count;
-	int ret = 0;
-	struct scif_qp *qp = ep->qp_info.qp;
-
-	if (flags & SCIF_RECV_BLOCK)
-		might_sleep();
-	spin_lock(&ep->lock);
-	while (remaining_len && (SCIFEP_CONNECTED == ep->state ||
-				 SCIFEP_DISCONNECTED == ep->state)) {
-		read_count = scif_rb_count(&qp->inbound_q, remaining_len);
-		if (read_count) {
-			/*
-			 * Best effort to recv as much data as there
-			 * are bytes to read in the RB particularly
-			 * important for the Non Blocking case.
-			 */
-			curr_recv_len = min(remaining_len, read_count);
-			scif_rb_get_next(&qp->inbound_q, msg, curr_recv_len);
-			if (ep->state == SCIFEP_CONNECTED) {
-				/*
-				 * Update the read pointer only if the endpoint
-				 * is still connected else the read pointer
-				 * might no longer exist since the peer has
-				 * freed resources!
-				 */
-				scif_rb_update_read_ptr(&qp->inbound_q);
-				/*
-				 * Send a notification to the peer about the
-				 * consumed data message only if the EP is in
-				 * SCIFEP_CONNECTED state.
-				 */
-				notif_msg.src = ep->port;
-				notif_msg.uop = SCIF_CLIENT_RCVD;
-				notif_msg.payload[0] = ep->remote_ep;
-				ret = _scif_nodeqp_send(ep->remote_dev,
-							&notif_msg);
-				if (ret)
-					break;
-			}
-			remaining_len -= curr_recv_len;
-			msg = msg + curr_recv_len;
-			continue;
-		}
-		/*
-		 * Bail out now if the EP is in SCIFEP_DISCONNECTED state else
-		 * we will keep looping forever.
-		 */
-		if (ep->state == SCIFEP_DISCONNECTED)
-			break;
-		/*
-		 * Return in the Non Blocking case if there is no data
-		 * to read in this iteration.
-		 */
-		if (!(flags & SCIF_RECV_BLOCK))
-			break;
-		curr_recv_len = min(remaining_len, SCIF_ENDPT_QP_SIZE - 1);
-		spin_unlock(&ep->lock);
-		/*
-		 * Wait for a SCIF_CLIENT_SEND message in the blocking case
-		 * or until other side disconnects.
-		 */
-		ret =
-		wait_event_interruptible(ep->recvwq,
-					 SCIFEP_CONNECTED != ep->state ||
-					 scif_rb_count(&qp->inbound_q,
-						       curr_recv_len)
-					 >= curr_recv_len);
-		spin_lock(&ep->lock);
-		if (ret)
-			break;
-	}
-	if (len - remaining_len)
-		ret = len - remaining_len;
-	else if (!ret && ep->state != SCIFEP_CONNECTED)
-		ret = ep->state == SCIFEP_DISCONNECTED ?
-			-ECONNRESET : -ENOTCONN;
-	spin_unlock(&ep->lock);
-	return ret;
-}
-
-/**
- * scif_user_send() - Send data to connection queue
- * @epd: The end point returned from scif_open()
- * @msg: Address to place data
- * @len: Length to receive
- * @flags: blocking or non blocking
- *
- * This function is called from the driver IOCTL entry point
- * only and is a wrapper for _scif_send().
- */
-int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-	int err = 0;
-	int sent_len = 0;
-	char *tmp;
-	int loop_len;
-	int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
-
-	dev_dbg(scif_info.mdev.this_device,
-		"SCIFAPI send (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
-	if (!len)
-		return 0;
-
-	err = scif_msg_param_check(epd, len, flags);
-	if (err)
-		goto send_err;
-
-	tmp = kmalloc(chunk_len, GFP_KERNEL);
-	if (!tmp) {
-		err = -ENOMEM;
-		goto send_err;
-	}
-	/*
-	 * Grabbing the lock before breaking up the transfer in
-	 * multiple chunks is required to ensure that messages do
-	 * not get fragmented and reordered.
-	 */
-	mutex_lock(&ep->sendlock);
-	while (sent_len != len) {
-		loop_len = len - sent_len;
-		loop_len = min(chunk_len, loop_len);
-		if (copy_from_user(tmp, msg, loop_len)) {
-			err = -EFAULT;
-			goto send_free_err;
-		}
-		err = _scif_send(epd, tmp, loop_len, flags);
-		if (err < 0)
-			goto send_free_err;
-		sent_len += err;
-		msg += err;
-		if (err != loop_len)
-			goto send_free_err;
-	}
-send_free_err:
-	mutex_unlock(&ep->sendlock);
-	kfree(tmp);
-send_err:
-	return err < 0 ? err : sent_len;
-}
-
-/**
- * scif_user_recv() - Receive data from connection queue
- * @epd: The end point returned from scif_open()
- * @msg: Address to place data
- * @len: Length to receive
- * @flags: blocking or non blocking
- *
- * This function is called from the driver IOCTL entry point
- * only and is a wrapper for _scif_recv().
- */
-int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-	int err = 0;
-	int recv_len = 0;
-	char *tmp;
-	int loop_len;
-	int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
-
-	dev_dbg(scif_info.mdev.this_device,
-		"SCIFAPI recv (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
-	if (!len)
-		return 0;
-
-	err = scif_msg_param_check(epd, len, flags);
-	if (err)
-		goto recv_err;
-
-	tmp = kmalloc(chunk_len, GFP_KERNEL);
-	if (!tmp) {
-		err = -ENOMEM;
-		goto recv_err;
-	}
-	/*
-	 * Grabbing the lock before breaking up the transfer in
-	 * multiple chunks is required to ensure that messages do
-	 * not get fragmented and reordered.
-	 */
-	mutex_lock(&ep->recvlock);
-	while (recv_len != len) {
-		loop_len = len - recv_len;
-		loop_len = min(chunk_len, loop_len);
-		err = _scif_recv(epd, tmp, loop_len, flags);
-		if (err < 0)
-			goto recv_free_err;
-		if (copy_to_user(msg, tmp, err)) {
-			err = -EFAULT;
-			goto recv_free_err;
-		}
-		recv_len += err;
-		msg += err;
-		if (err != loop_len)
-			goto recv_free_err;
-	}
-recv_free_err:
-	mutex_unlock(&ep->recvlock);
-	kfree(tmp);
-recv_err:
-	return err < 0 ? err : recv_len;
-}
-
-/**
- * scif_send() - Send data to connection queue
- * @epd: The end point returned from scif_open()
- * @msg: Address to place data
- * @len: Length to receive
- * @flags: blocking or non blocking
- *
- * This function is called from the kernel mode only and is
- * a wrapper for _scif_send().
- */
-int scif_send(scif_epd_t epd, void *msg, int len, int flags)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-	int ret;
-
-	dev_dbg(scif_info.mdev.this_device,
-		"SCIFAPI send (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
-	if (!len)
-		return 0;
-
-	ret = scif_msg_param_check(epd, len, flags);
-	if (ret)
-		return ret;
-	if (!ep->remote_dev)
-		return -ENOTCONN;
-	/*
-	 * Grab the mutex lock in the blocking case only
-	 * to ensure messages do not get fragmented/reordered.
-	 * The non blocking mode is protected using spin locks
-	 * in _scif_send().
-	 */
-	if (flags & SCIF_SEND_BLOCK)
-		mutex_lock(&ep->sendlock);
-
-	ret = _scif_send(epd, msg, len, flags);
-
-	if (flags & SCIF_SEND_BLOCK)
-		mutex_unlock(&ep->sendlock);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(scif_send);
-
-/**
- * scif_recv() - Receive data from connection queue
- * @epd: The end point returned from scif_open()
- * @msg: Address to place data
- * @len: Length to receive
- * @flags: blocking or non blocking
- *
- * This function is called from the kernel mode only and is
- * a wrapper for _scif_recv().
- */
-int scif_recv(scif_epd_t epd, void *msg, int len, int flags)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-	int ret;
-
-	dev_dbg(scif_info.mdev.this_device,
-		"SCIFAPI recv (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
-	if (!len)
-		return 0;
-
-	ret = scif_msg_param_check(epd, len, flags);
-	if (ret)
-		return ret;
-	/*
-	 * Grab the mutex lock in the blocking case only
-	 * to ensure messages do not get fragmented/reordered.
-	 * The non blocking mode is protected using spin locks
-	 * in _scif_send().
-	 */
-	if (flags & SCIF_RECV_BLOCK)
-		mutex_lock(&ep->recvlock);
-
-	ret = _scif_recv(epd, msg, len, flags);
-
-	if (flags & SCIF_RECV_BLOCK)
-		mutex_unlock(&ep->recvlock);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(scif_recv);
-
-static inline void _scif_poll_wait(struct file *f, wait_queue_head_t *wq,
-				   poll_table *p, struct scif_endpt *ep)
-{
-	/*
-	 * Because poll_wait makes a GFP_KERNEL allocation, give up the lock
-	 * and regrab it afterwards. Because the endpoint state might have
-	 * changed while the lock was given up, the state must be checked
-	 * again after re-acquiring the lock. The code in __scif_pollfd(..)
-	 * does this.
-	 */
-	spin_unlock(&ep->lock);
-	poll_wait(f, wq, p);
-	spin_lock(&ep->lock);
-}
-
-__poll_t
-__scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep)
-{
-	__poll_t mask = 0;
-
-	dev_dbg(scif_info.mdev.this_device,
-		"SCIFAPI pollfd: ep %p %s\n", ep, scif_ep_states[ep->state]);
-
-	spin_lock(&ep->lock);
-
-	/* Endpoint is waiting for a non-blocking connect to complete */
-	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
-		_scif_poll_wait(f, &ep->conn_pend_wq, wait, ep);
-		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
-			if (ep->state == SCIFEP_CONNECTED ||
-			    ep->state == SCIFEP_DISCONNECTED ||
-			    ep->conn_err)
-				mask |= EPOLLOUT;
-			goto exit;
-		}
-	}
-
-	/* Endpoint is listening for incoming connection requests */
-	if (ep->state == SCIFEP_LISTENING) {
-		_scif_poll_wait(f, &ep->conwq, wait, ep);
-		if (ep->state == SCIFEP_LISTENING) {
-			if (ep->conreqcnt)
-				mask |= EPOLLIN;
-			goto exit;
-		}
-	}
-
-	/* Endpoint is connected or disconnected */
-	if (ep->state == SCIFEP_CONNECTED || ep->state == SCIFEP_DISCONNECTED) {
-		if (poll_requested_events(wait) & EPOLLIN)
-			_scif_poll_wait(f, &ep->recvwq, wait, ep);
-		if (poll_requested_events(wait) & EPOLLOUT)
-			_scif_poll_wait(f, &ep->sendwq, wait, ep);
-		if (ep->state == SCIFEP_CONNECTED ||
-		    ep->state == SCIFEP_DISCONNECTED) {
-			/* Data can be read without blocking */
-			if (scif_rb_count(&ep->qp_info.qp->inbound_q, 1))
-				mask |= EPOLLIN;
-			/* Data can be written without blocking */
-			if (scif_rb_space(&ep->qp_info.qp->outbound_q))
-				mask |= EPOLLOUT;
-			/* Return EPOLLHUP if endpoint is disconnected */
-			if (ep->state == SCIFEP_DISCONNECTED)
-				mask |= EPOLLHUP;
-			goto exit;
-		}
-	}
-
-	/* Return EPOLLERR if the endpoint is in none of the above states */
-	mask |= EPOLLERR;
-exit:
-	spin_unlock(&ep->lock);
-	return mask;
-}
-
-/**
- * scif_poll() - Kernel mode SCIF poll
- * @ufds: Array of scif_pollepd structures containing the end points
- *	  and events to poll on
- * @nfds: Size of the ufds array
- * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout
- *
- * The code flow in this function is based on do_poll(..) in select.c
- *
- * Returns the number of endpoints which have pending events or 0 in
- * the event of a timeout. If a signal is used for wake up, -EINTR is
- * returned.
- */
-int
-scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs)
-{
-	struct poll_wqueues table;
-	poll_table *pt;
-	int i, count = 0, timed_out = timeout_msecs == 0;
-	__poll_t mask;
-	u64 timeout = timeout_msecs < 0 ? MAX_SCHEDULE_TIMEOUT
-		: msecs_to_jiffies(timeout_msecs);
-
-	poll_initwait(&table);
-	pt = &table.pt;
-	while (1) {
-		for (i = 0; i < nfds; i++) {
-			pt->_key = ufds[i].events | EPOLLERR | EPOLLHUP;
-			mask = __scif_pollfd(ufds[i].epd->anon,
-					     pt, ufds[i].epd);
-			mask &= ufds[i].events | EPOLLERR | EPOLLHUP;
-			if (mask) {
-				count++;
-				pt->_qproc = NULL;
-			}
-			ufds[i].revents = mask;
-		}
-		pt->_qproc = NULL;
-		if (!count) {
-			count = table.error;
-			if (signal_pending(current))
-				count = -EINTR;
-		}
-		if (count || timed_out)
-			break;
-
-		if (!schedule_timeout_interruptible(timeout))
-			timed_out = 1;
-	}
-	poll_freewait(&table);
-	return count;
-}
-EXPORT_SYMBOL_GPL(scif_poll);
-
-int scif_get_node_ids(u16 *nodes, int len, u16 *self)
-{
-	int online = 0;
-	int offset = 0;
-	int node;
-
-	if (!scif_is_mgmt_node())
-		scif_get_node_info();
-
-	*self = scif_info.nodeid;
-	mutex_lock(&scif_info.conflock);
-	len = min_t(int, len, scif_info.total);
-	for (node = 0; node <= scif_info.maxid; node++) {
-		if (_scifdev_alive(&scif_dev[node])) {
-			online++;
-			if (offset < len)
-				nodes[offset++] = node;
-		}
-	}
-	dev_dbg(scif_info.mdev.this_device,
-		"SCIFAPI get_node_ids total %d online %d filled in %d nodes\n",
-		scif_info.total, online, offset);
-	mutex_unlock(&scif_info.conflock);
-
-	return online;
-}
-EXPORT_SYMBOL_GPL(scif_get_node_ids);
-
-static int scif_add_client_dev(struct device *dev, struct subsys_interface *si)
-{
-	struct scif_client *client =
-		container_of(si, struct scif_client, si);
-	struct scif_peer_dev *spdev =
-		container_of(dev, struct scif_peer_dev, dev);
-
-	if (client->probe)
-		client->probe(spdev);
-	return 0;
-}
-
-static void scif_remove_client_dev(struct device *dev,
-				   struct subsys_interface *si)
-{
-	struct scif_client *client =
-		container_of(si, struct scif_client, si);
-	struct scif_peer_dev *spdev =
-		container_of(dev, struct scif_peer_dev, dev);
-
-	if (client->remove)
-		client->remove(spdev);
-}
-
-void scif_client_unregister(struct scif_client *client)
-{
-	subsys_interface_unregister(&client->si);
-}
-EXPORT_SYMBOL_GPL(scif_client_unregister);
-
-int scif_client_register(struct scif_client *client)
-{
-	struct subsys_interface *si = &client->si;
-
-	si->name = client->name;
-	si->subsys = &scif_peer_bus;
-	si->add_dev = scif_add_client_dev;
-	si->remove_dev = scif_remove_client_dev;
-
-	return subsys_interface_register(&client->si);
-}
-EXPORT_SYMBOL_GPL(scif_client_register);
diff --git a/drivers/misc/mic/scif/scif_debugfs.c b/drivers/misc/mic/scif/scif_debugfs.c
deleted file mode 100644
index 8fe38e7ca6e6..000000000000
--- a/drivers/misc/mic/scif/scif_debugfs.c
+++ /dev/null
@@ -1,116 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Intel SCIF driver.
- */
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-#include "../common/mic_dev.h"
-#include "scif_main.h"
-
-/* Debugfs parent dir */
-static struct dentry *scif_dbg;
-
-static int scif_dev_show(struct seq_file *s, void *unused)
-{
-	int node;
-
-	seq_printf(s, "Total Nodes %d Self Node Id %d Maxid %d\n",
-		   scif_info.total, scif_info.nodeid,
-		   scif_info.maxid);
-
-	if (!scif_dev)
-		return 0;
-
-	seq_printf(s, "%-16s\t%-16s\n", "node_id", "state");
-
-	for (node = 0; node <= scif_info.maxid; node++)
-		seq_printf(s, "%-16d\t%-16s\n", scif_dev[node].node,
-			   _scifdev_alive(&scif_dev[node]) ?
-			   "Running" : "Offline");
-	return 0;
-}
-
-DEFINE_SHOW_ATTRIBUTE(scif_dev);
-
-static void scif_display_window(struct scif_window *window, struct seq_file *s)
-{
-	int j;
-	struct scatterlist *sg;
-	scif_pinned_pages_t pin = window->pinned_pages;
-
-	seq_printf(s, "window %p type %d temp %d offset 0x%llx ",
-		   window, window->type, window->temp, window->offset);
-	seq_printf(s, "nr_pages 0x%llx nr_contig_chunks 0x%x prot %d ",
-		   window->nr_pages, window->nr_contig_chunks, window->prot);
-	seq_printf(s, "ref_count %d magic 0x%llx peer_window 0x%llx ",
-		   window->ref_count, window->magic, window->peer_window);
-	seq_printf(s, "unreg_state 0x%x va_for_temp 0x%lx\n",
-		   window->unreg_state, window->va_for_temp);
-
-	for (j = 0; j < window->nr_contig_chunks; j++)
-		seq_printf(s, "page[%d] dma_addr 0x%llx num_pages 0x%llx\n", j,
-			   window->dma_addr[j], window->num_pages[j]);
-
-	if (window->type == SCIF_WINDOW_SELF && pin)
-		for (j = 0; j < window->nr_pages; j++)
-			seq_printf(s, "page[%d] = pinned_pages %p address %p\n",
-				   j, pin->pages[j],
-				   page_address(pin->pages[j]));
-
-	if (window->st)
-		for_each_sg(window->st->sgl, sg, window->st->nents, j)
-			seq_printf(s, "sg[%d] dma addr 0x%llx length 0x%x\n",
-				   j, sg_dma_address(sg), sg_dma_len(sg));
-}
-
-static void scif_display_all_windows(struct list_head *head, struct seq_file *s)
-{
-	struct list_head *item;
-	struct scif_window *window;
-
-	list_for_each(item, head) {
-		window = list_entry(item, struct scif_window, list);
-		scif_display_window(window, s);
-	}
-}
-
-static int scif_rma_show(struct seq_file *s, void *unused)
-{
-	struct scif_endpt *ep;
-	struct list_head *pos;
-
-	mutex_lock(&scif_info.connlock);
-	list_for_each(pos, &scif_info.connected) {
-		ep = list_entry(pos, struct scif_endpt, list);
-		seq_printf(s, "ep %p self windows\n", ep);
-		mutex_lock(&ep->rma_info.rma_lock);
-		scif_display_all_windows(&ep->rma_info.reg_list, s);
-		seq_printf(s, "ep %p remote windows\n", ep);
-		scif_display_all_windows(&ep->rma_info.remote_reg_list, s);
-		mutex_unlock(&ep->rma_info.rma_lock);
-	}
-	mutex_unlock(&scif_info.connlock);
-	return 0;
-}
-
-DEFINE_SHOW_ATTRIBUTE(scif_rma);
-
-void __init scif_init_debugfs(void)
-{
-	scif_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL);
-
-	debugfs_create_file("scif_dev", 0444, scif_dbg, NULL, &scif_dev_fops);
-	debugfs_create_file("scif_rma", 0444, scif_dbg, NULL, &scif_rma_fops);
-	debugfs_create_u8("en_msg_log", 0666, scif_dbg, &scif_info.en_msg_log);
-	debugfs_create_u8("p2p_enable", 0666, scif_dbg, &scif_info.p2p_enable);
-}
-
-void scif_exit_debugfs(void)
-{
-	debugfs_remove_recursive(scif_dbg);
-}
diff --git a/drivers/misc/mic/scif/scif_dma.c b/drivers/misc/mic/scif/scif_dma.c
deleted file mode 100644
index 401b98e5ad79..000000000000
--- a/drivers/misc/mic/scif/scif_dma.c
+++ /dev/null
@@ -1,1940 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Intel SCIF driver.
- */
-#include "scif_main.h"
-#include "scif_map.h"
-
-/*
- * struct scif_dma_comp_cb - SCIF DMA completion callback
- *
- * @dma_completion_func: DMA completion callback
- * @cb_cookie: DMA completion callback cookie
- * @temp_buf: Temporary buffer
- * @temp_buf_to_free: Temporary buffer to be freed
- * @is_cache: Is a kmem_cache allocated buffer
- * @dst_offset: Destination registration offset
- * @dst_window: Destination registration window
- * @len: Length of the temp buffer
- * @temp_phys: DMA address of the temp buffer
- * @sdev: The SCIF device
- * @header_padding: padding for cache line alignment
- */
-struct scif_dma_comp_cb {
-	void (*dma_completion_func)(void *cookie);
-	void *cb_cookie;
-	u8 *temp_buf;
-	u8 *temp_buf_to_free;
-	bool is_cache;
-	s64 dst_offset;
-	struct scif_window *dst_window;
-	size_t len;
-	dma_addr_t temp_phys;
-	struct scif_dev *sdev;
-	int header_padding;
-};
-
-/**
- * struct scif_copy_work - Work for DMA copy
- *
- * @src_offset: Starting source offset
- * @dst_offset: Starting destination offset
- * @src_window: Starting src registered window
- * @dst_window: Starting dst registered window
- * @loopback: true if this is a loopback DMA transfer
- * @len: Length of the transfer
- * @comp_cb: DMA copy completion callback
- * @remote_dev: The remote SCIF peer device
- * @fence_type: polling or interrupt based
- * @ordered: is this a tail byte ordered DMA transfer
- */
-struct scif_copy_work {
-	s64 src_offset;
-	s64 dst_offset;
-	struct scif_window *src_window;
-	struct scif_window *dst_window;
-	int loopback;
-	size_t len;
-	struct scif_dma_comp_cb   *comp_cb;
-	struct scif_dev	*remote_dev;
-	int fence_type;
-	bool ordered;
-};
-
-/**
- * scif_reserve_dma_chan:
- * @ep: Endpoint Descriptor.
- *
- * This routine reserves a DMA channel for a particular
- * endpoint. All DMA transfers for an endpoint are always
- * programmed on the same DMA channel.
- */
-int scif_reserve_dma_chan(struct scif_endpt *ep)
-{
-	int err = 0;
-	struct scif_dev *scifdev;
-	struct scif_hw_dev *sdev;
-	struct dma_chan *chan;
-
-	/* Loopback DMAs are not supported on the management node */
-	if (!scif_info.nodeid && scifdev_self(ep->remote_dev))
-		return 0;
-	if (scif_info.nodeid)
-		scifdev = &scif_dev[0];
-	else
-		scifdev = ep->remote_dev;
-	sdev = scifdev->sdev;
-	if (!sdev->num_dma_ch)
-		return -ENODEV;
-	chan = sdev->dma_ch[scifdev->dma_ch_idx];
-	scifdev->dma_ch_idx = (scifdev->dma_ch_idx + 1) % sdev->num_dma_ch;
-	mutex_lock(&ep->rma_info.rma_lock);
-	ep->rma_info.dma_chan = chan;
-	mutex_unlock(&ep->rma_info.rma_lock);
-	return err;
-}
-
-#ifdef CONFIG_MMU_NOTIFIER
-/*
- * scif_rma_destroy_tcw:
- *
- * This routine destroys temporary cached windows
- */
-static
-void __scif_rma_destroy_tcw(struct scif_mmu_notif *mmn,
-			    u64 start, u64 len)
-{
-	struct list_head *item, *tmp;
-	struct scif_window *window;
-	u64 start_va, end_va;
-	u64 end = start + len;
-
-	if (end <= start)
-		return;
-
-	list_for_each_safe(item, tmp, &mmn->tc_reg_list) {
-		window = list_entry(item, struct scif_window, list);
-		if (!len)
-			break;
-		start_va = window->va_for_temp;
-		end_va = start_va + (window->nr_pages << PAGE_SHIFT);
-		if (start < start_va && end <= start_va)
-			break;
-		if (start >= end_va)
-			continue;
-		__scif_rma_destroy_tcw_helper(window);
-	}
-}
-
-static void scif_rma_destroy_tcw(struct scif_mmu_notif *mmn, u64 start, u64 len)
-{
-	struct scif_endpt *ep = mmn->ep;
-
-	spin_lock(&ep->rma_info.tc_lock);
-	__scif_rma_destroy_tcw(mmn, start, len);
-	spin_unlock(&ep->rma_info.tc_lock);
-}
-
-static void scif_rma_destroy_tcw_ep(struct scif_endpt *ep)
-{
-	struct list_head *item, *tmp;
-	struct scif_mmu_notif *mmn;
-
-	list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) {
-		mmn = list_entry(item, struct scif_mmu_notif, list);
-		scif_rma_destroy_tcw(mmn, 0, ULONG_MAX);
-	}
-}
-
-static void __scif_rma_destroy_tcw_ep(struct scif_endpt *ep)
-{
-	struct list_head *item, *tmp;
-	struct scif_mmu_notif *mmn;
-
-	spin_lock(&ep->rma_info.tc_lock);
-	list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) {
-		mmn = list_entry(item, struct scif_mmu_notif, list);
-		__scif_rma_destroy_tcw(mmn, 0, ULONG_MAX);
-	}
-	spin_unlock(&ep->rma_info.tc_lock);
-}
-
-static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes)
-{
-	if ((cur_bytes >> PAGE_SHIFT) > scif_info.rma_tc_limit)
-		return false;
-	if ((atomic_read(&ep->rma_info.tcw_total_pages)
-			+ (cur_bytes >> PAGE_SHIFT)) >
-			scif_info.rma_tc_limit) {
-		dev_info(scif_info.mdev.this_device,
-			 "%s %d total=%d, current=%zu reached max\n",
-			 __func__, __LINE__,
-			 atomic_read(&ep->rma_info.tcw_total_pages),
-			 (1 + (cur_bytes >> PAGE_SHIFT)));
-		scif_rma_destroy_tcw_invalid();
-		__scif_rma_destroy_tcw_ep(ep);
-	}
-	return true;
-}
-
-static void scif_mmu_notifier_release(struct mmu_notifier *mn,
-				      struct mm_struct *mm)
-{
-	struct scif_mmu_notif	*mmn;
-
-	mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
-	scif_rma_destroy_tcw(mmn, 0, ULONG_MAX);
-	schedule_work(&scif_info.misc_work);
-}
-
-static int scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
-					const struct mmu_notifier_range *range)
-{
-	struct scif_mmu_notif	*mmn;
-
-	mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
-	scif_rma_destroy_tcw(mmn, range->start, range->end - range->start);
-
-	return 0;
-}
-
-static void scif_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
-			const struct mmu_notifier_range *range)
-{
-	/*
-	 * Nothing to do here, everything needed was done in
-	 * invalidate_range_start.
-	 */
-}
-
-static const struct mmu_notifier_ops scif_mmu_notifier_ops = {
-	.release = scif_mmu_notifier_release,
-	.clear_flush_young = NULL,
-	.invalidate_range_start = scif_mmu_notifier_invalidate_range_start,
-	.invalidate_range_end = scif_mmu_notifier_invalidate_range_end};
-
-static void scif_ep_unregister_mmu_notifier(struct scif_endpt *ep)
-{
-	struct scif_endpt_rma_info *rma = &ep->rma_info;
-	struct scif_mmu_notif *mmn = NULL;
-	struct list_head *item, *tmp;
-
-	mutex_lock(&ep->rma_info.mmn_lock);
-	list_for_each_safe(item, tmp, &rma->mmn_list) {
-		mmn = list_entry(item, struct scif_mmu_notif, list);
-		mmu_notifier_unregister(&mmn->ep_mmu_notifier, mmn->mm);
-		list_del(item);
-		kfree(mmn);
-	}
-	mutex_unlock(&ep->rma_info.mmn_lock);
-}
-
-static void scif_init_mmu_notifier(struct scif_mmu_notif *mmn,
-				   struct mm_struct *mm, struct scif_endpt *ep)
-{
-	mmn->ep = ep;
-	mmn->mm = mm;
-	mmn->ep_mmu_notifier.ops = &scif_mmu_notifier_ops;
-	INIT_LIST_HEAD(&mmn->list);
-	INIT_LIST_HEAD(&mmn->tc_reg_list);
-}
-
-static struct scif_mmu_notif *
-scif_find_mmu_notifier(struct mm_struct *mm, struct scif_endpt_rma_info *rma)
-{
-	struct scif_mmu_notif *mmn;
-
-	list_for_each_entry(mmn, &rma->mmn_list, list)
-		if (mmn->mm == mm)
-			return mmn;
-	return NULL;
-}
-
-static struct scif_mmu_notif *
-scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep)
-{
-	struct scif_mmu_notif *mmn
-		 = kzalloc(sizeof(*mmn), GFP_KERNEL);
-
-	if (!mmn)
-		return ERR_PTR(-ENOMEM);
-
-	scif_init_mmu_notifier(mmn, current->mm, ep);
-	if (mmu_notifier_register(&mmn->ep_mmu_notifier, current->mm)) {
-		kfree(mmn);
-		return ERR_PTR(-EBUSY);
-	}
-	list_add(&mmn->list, &ep->rma_info.mmn_list);
-	return mmn;
-}
-
-/*
- * Called from the misc thread to destroy temporary cached windows and
- * unregister the MMU notifier for the SCIF endpoint.
- */
-void scif_mmu_notif_handler(struct work_struct *work)
-{
-	struct list_head *pos, *tmpq;
-	struct scif_endpt *ep;
-restart:
-	scif_rma_destroy_tcw_invalid();
-	spin_lock(&scif_info.rmalock);
-	list_for_each_safe(pos, tmpq, &scif_info.mmu_notif_cleanup) {
-		ep = list_entry(pos, struct scif_endpt, mmu_list);
-		list_del(&ep->mmu_list);
-		spin_unlock(&scif_info.rmalock);
-		scif_rma_destroy_tcw_ep(ep);
-		scif_ep_unregister_mmu_notifier(ep);
-		goto restart;
-	}
-	spin_unlock(&scif_info.rmalock);
-}
-
-static bool scif_is_set_reg_cache(int flags)
-{
-	return !!(flags & SCIF_RMA_USECACHE);
-}
-#else
-static struct scif_mmu_notif *
-scif_find_mmu_notifier(struct mm_struct *mm,
-		       struct scif_endpt_rma_info *rma)
-{
-	return NULL;
-}
-
-static struct scif_mmu_notif *
-scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep)
-{
-	return NULL;
-}
-
-void scif_mmu_notif_handler(struct work_struct *work)
-{
-}
-
-static bool scif_is_set_reg_cache(int flags)
-{
-	return false;
-}
-
-static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes)
-{
-	return false;
-}
-#endif
-
-/**
- * scif_register_temp:
- * @epd: End Point Descriptor.
- * @addr: virtual address to/from which to copy
- * @len: length of range to copy
- * @prot: read/write protection
- * @out_offset: computed offset returned by reference.
- * @out_window: allocated registered window returned by reference.
- *
- * Create a temporary registered window. The peer will not know about this
- * window. This API is used for scif_vreadfrom()/scif_vwriteto() API's.
- */
-static int
-scif_register_temp(scif_epd_t epd, unsigned long addr, size_t len, int prot,
-		   off_t *out_offset, struct scif_window **out_window)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-	int err;
-	scif_pinned_pages_t pinned_pages;
-	size_t aligned_len;
-
-	aligned_len = ALIGN(len, PAGE_SIZE);
-
-	err = __scif_pin_pages((void *)(addr & PAGE_MASK),
-			       aligned_len, &prot, 0, &pinned_pages);
-	if (err)
-		return err;
-
-	pinned_pages->prot = prot;
-
-	/* Compute the offset for this registration */
-	err = scif_get_window_offset(ep, 0, 0,
-				     aligned_len >> PAGE_SHIFT,
-				     (s64 *)out_offset);
-	if (err)
-		goto error_unpin;
-
-	/* Allocate and prepare self registration window */
-	*out_window = scif_create_window(ep, aligned_len >> PAGE_SHIFT,
-					*out_offset, true);
-	if (!*out_window) {
-		scif_free_window_offset(ep, NULL, *out_offset);
-		err = -ENOMEM;
-		goto error_unpin;
-	}
-
-	(*out_window)->pinned_pages = pinned_pages;
-	(*out_window)->nr_pages = pinned_pages->nr_pages;
-	(*out_window)->prot = pinned_pages->prot;
-
-	(*out_window)->va_for_temp = addr & PAGE_MASK;
-	err = scif_map_window(ep->remote_dev, *out_window);
-	if (err) {
-		/* Something went wrong! Rollback */
-		scif_destroy_window(ep, *out_window);
-		*out_window = NULL;
-	} else {
-		*out_offset |= (addr - (*out_window)->va_for_temp);
-	}
-	return err;
-error_unpin:
-	if (err)
-		dev_err(&ep->remote_dev->sdev->dev,
-			"%s %d err %d\n", __func__, __LINE__, err);
-	scif_unpin_pages(pinned_pages);
-	return err;
-}
-
-#define SCIF_DMA_TO (3 * HZ)
-
-/*
- * scif_sync_dma - Program a DMA without an interrupt descriptor
- *
- * @dev - The address of the pointer to the device instance used
- * for DMA registration.
- * @chan - DMA channel to be used.
- * @sync_wait: Wait for DMA to complete?
- *
- * Return 0 on success and -errno on error.
- */
-static int scif_sync_dma(struct scif_hw_dev *sdev, struct dma_chan *chan,
-			 bool sync_wait)
-{
-	int err = 0;
-	struct dma_async_tx_descriptor *tx = NULL;
-	enum dma_ctrl_flags flags = DMA_PREP_FENCE;
-	dma_cookie_t cookie;
-	struct dma_device *ddev;
-
-	if (!chan) {
-		err = -EIO;
-		dev_err(&sdev->dev, "%s %d err %d\n",
-			__func__, __LINE__, err);
-		return err;
-	}
-	ddev = chan->device;
-
-	tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags);
-	if (!tx) {
-		err = -ENOMEM;
-		dev_err(&sdev->dev, "%s %d err %d\n",
-			__func__, __LINE__, err);
-		goto release;
-	}
-	cookie = tx->tx_submit(tx);
-
-	if (dma_submit_error(cookie)) {
-		err = -ENOMEM;
-		dev_err(&sdev->dev, "%s %d err %d\n",
-			__func__, __LINE__, err);
-		goto release;
-	}
-	if (!sync_wait) {
-		dma_async_issue_pending(chan);
-	} else {
-		if (dma_sync_wait(chan, cookie) == DMA_COMPLETE) {
-			err = 0;
-		} else {
-			err = -EIO;
-			dev_err(&sdev->dev, "%s %d err %d\n",
-				__func__, __LINE__, err);
-		}
-	}
-release:
-	return err;
-}
-
-static void scif_dma_callback(void *arg)
-{
-	struct completion *done = (struct completion *)arg;
-
-	complete(done);
-}
-
-#define SCIF_DMA_SYNC_WAIT true
-#define SCIF_DMA_POLL BIT(0)
-#define SCIF_DMA_INTR BIT(1)
-
-/*
- * scif_async_dma - Program a DMA with an interrupt descriptor
- *
- * @dev - The address of the pointer to the device instance used
- * for DMA registration.
- * @chan - DMA channel to be used.
- * Return 0 on success and -errno on error.
- */
-static int scif_async_dma(struct scif_hw_dev *sdev, struct dma_chan *chan)
-{
-	int err = 0;
-	struct dma_device *ddev;
-	struct dma_async_tx_descriptor *tx = NULL;
-	enum dma_ctrl_flags flags = DMA_PREP_INTERRUPT | DMA_PREP_FENCE;
-	DECLARE_COMPLETION_ONSTACK(done_wait);
-	dma_cookie_t cookie;
-	enum dma_status status;
-
-	if (!chan) {
-		err = -EIO;
-		dev_err(&sdev->dev, "%s %d err %d\n",
-			__func__, __LINE__, err);
-		return err;
-	}
-	ddev = chan->device;
-
-	tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags);
-	if (!tx) {
-		err = -ENOMEM;
-		dev_err(&sdev->dev, "%s %d err %d\n",
-			__func__, __LINE__, err);
-		goto release;
-	}
-	reinit_completion(&done_wait);
-	tx->callback = scif_dma_callback;
-	tx->callback_param = &done_wait;
-	cookie = tx->tx_submit(tx);
-
-	if (dma_submit_error(cookie)) {
-		err = -ENOMEM;
-		dev_err(&sdev->dev, "%s %d err %d\n",
-			__func__, __LINE__, err);
-		goto release;
-	}
-	dma_async_issue_pending(chan);
-
-	err = wait_for_completion_timeout(&done_wait, SCIF_DMA_TO);
-	if (!err) {
-		err = -EIO;
-		dev_err(&sdev->dev, "%s %d err %d\n",
-			__func__, __LINE__, err);
-		goto release;
-	}
-	err = 0;
-	status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
-	if (status != DMA_COMPLETE) {
-		err = -EIO;
-		dev_err(&sdev->dev, "%s %d err %d\n",
-			__func__, __LINE__, err);
-		goto release;
-	}
-release:
-	return err;
-}
-
-/*
- * scif_drain_dma_poll - Drain all outstanding DMA operations for a particular
- * DMA channel via polling.
- *
- * @sdev - The SCIF device
- * @chan - DMA channel
- * Return 0 on success and -errno on error.
- */
-static int scif_drain_dma_poll(struct scif_hw_dev *sdev, struct dma_chan *chan)
-{
-	if (!chan)
-		return -EINVAL;
-	return scif_sync_dma(sdev, chan, SCIF_DMA_SYNC_WAIT);
-}
-
-/*
- * scif_drain_dma_intr - Drain all outstanding DMA operations for a particular
- * DMA channel via interrupt based blocking wait.
- *
- * @sdev - The SCIF device
- * @chan - DMA channel
- * Return 0 on success and -errno on error.
- */
-int scif_drain_dma_intr(struct scif_hw_dev *sdev, struct dma_chan *chan)
-{
-	if (!chan)
-		return -EINVAL;
-	return scif_async_dma(sdev, chan);
-}
-
-/**
- * scif_rma_destroy_windows:
- *
- * This routine destroys all windows queued for cleanup
- */
-void scif_rma_destroy_windows(void)
-{
-	struct list_head *item, *tmp;
-	struct scif_window *window;
-	struct scif_endpt *ep;
-	struct dma_chan *chan;
-
-	might_sleep();
-restart:
-	spin_lock(&scif_info.rmalock);
-	list_for_each_safe(item, tmp, &scif_info.rma) {
-		window = list_entry(item, struct scif_window,
-				    list);
-		ep = (struct scif_endpt *)window->ep;
-		chan = ep->rma_info.dma_chan;
-
-		list_del_init(&window->list);
-		spin_unlock(&scif_info.rmalock);
-		if (!chan || !scifdev_alive(ep) ||
-		    !scif_drain_dma_intr(ep->remote_dev->sdev,
-					 ep->rma_info.dma_chan))
-			/* Remove window from global list */
-			window->unreg_state = OP_COMPLETED;
-		else
-			dev_warn(&ep->remote_dev->sdev->dev,
-				 "DMA engine hung?\n");
-		if (window->unreg_state == OP_COMPLETED) {
-			if (window->type == SCIF_WINDOW_SELF)
-				scif_destroy_window(ep, window);
-			else
-				scif_destroy_remote_window(window);
-			atomic_dec(&ep->rma_info.tw_refcount);
-		}
-		goto restart;
-	}
-	spin_unlock(&scif_info.rmalock);
-}
-
-/**
- * scif_rma_destroy_tcw:
- *
- * This routine destroys temporary cached registered windows
- * which have been queued for cleanup.
- */
-void scif_rma_destroy_tcw_invalid(void)
-{
-	struct list_head *item, *tmp;
-	struct scif_window *window;
-	struct scif_endpt *ep;
-	struct dma_chan *chan;
-
-	might_sleep();
-restart:
-	spin_lock(&scif_info.rmalock);
-	list_for_each_safe(item, tmp, &scif_info.rma_tc) {
-		window = list_entry(item, struct scif_window, list);
-		ep = (struct scif_endpt *)window->ep;
-		chan = ep->rma_info.dma_chan;
-		list_del_init(&window->list);
-		spin_unlock(&scif_info.rmalock);
-		mutex_lock(&ep->rma_info.rma_lock);
-		if (!chan || !scifdev_alive(ep) ||
-		    !scif_drain_dma_intr(ep->remote_dev->sdev,
-					 ep->rma_info.dma_chan)) {
-			atomic_sub(window->nr_pages,
-				   &ep->rma_info.tcw_total_pages);
-			scif_destroy_window(ep, window);
-			atomic_dec(&ep->rma_info.tcw_refcount);
-		} else {
-			dev_warn(&ep->remote_dev->sdev->dev,
-				 "DMA engine hung?\n");
-		}
-		mutex_unlock(&ep->rma_info.rma_lock);
-		goto restart;
-	}
-	spin_unlock(&scif_info.rmalock);
-}
-
-static inline
-void *_get_local_va(off_t off, struct scif_window *window, size_t len)
-{
-	int page_nr = (off - window->offset) >> PAGE_SHIFT;
-	off_t page_off = off & ~PAGE_MASK;
-	void *va = NULL;
-
-	if (window->type == SCIF_WINDOW_SELF) {
-		struct page **pages = window->pinned_pages->pages;
-
-		va = page_address(pages[page_nr]) + page_off;
-	}
-	return va;
-}
-
-static inline
-void *ioremap_remote(off_t off, struct scif_window *window,
-		     size_t len, struct scif_dev *dev,
-		     struct scif_window_iter *iter)
-{
-	dma_addr_t phys = scif_off_to_dma_addr(window, off, NULL, iter);
-
-	/*
-	 * If the DMA address is not card relative then we need the DMA
-	 * addresses to be an offset into the bar. The aperture base was already
-	 * added so subtract it here since scif_ioremap is going to add it again
-	 */
-	if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER &&
-	    dev->sdev->aper && !dev->sdev->card_rel_da)
-		phys = phys - dev->sdev->aper->pa;
-	return scif_ioremap(phys, len, dev);
-}
-
-static inline void
-iounmap_remote(void *virt, size_t size, struct scif_copy_work *work)
-{
-	scif_iounmap(virt, size, work->remote_dev);
-}
-
-/*
- * Takes care of ordering issue caused by
- * 1. Hardware:  Only in the case of cpu copy from mgmt node to card
- * because of WC memory.
- * 2. Software: If memcpy reorders copy instructions for optimization.
- * This could happen at both mgmt node and card.
- */
-static inline void
-scif_ordered_memcpy_toio(char *dst, const char *src, size_t count)
-{
-	if (!count)
-		return;
-
-	memcpy_toio((void __iomem __force *)dst, src, --count);
-	/* Order the last byte with the previous stores */
-	wmb();
-	*(dst + count) = *(src + count);
-}
-
-static inline void scif_unaligned_cpy_toio(char *dst, const char *src,
-					   size_t count, bool ordered)
-{
-	if (ordered)
-		scif_ordered_memcpy_toio(dst, src, count);
-	else
-		memcpy_toio((void __iomem __force *)dst, src, count);
-}
-
-static inline
-void scif_ordered_memcpy_fromio(char *dst, const char *src, size_t count)
-{
-	if (!count)
-		return;
-
-	memcpy_fromio(dst, (void __iomem __force *)src, --count);
-	/* Order the last byte with the previous loads */
-	rmb();
-	*(dst + count) = *(src + count);
-}
-
-static inline void scif_unaligned_cpy_fromio(char *dst, const char *src,
-					     size_t count, bool ordered)
-{
-	if (ordered)
-		scif_ordered_memcpy_fromio(dst, src, count);
-	else
-		memcpy_fromio(dst, (void __iomem __force *)src, count);
-}
-
-#define SCIF_RMA_ERROR_CODE (~(dma_addr_t)0x0)
-
-/*
- * scif_off_to_dma_addr:
- * Obtain the dma_addr given the window and the offset.
- * @window: Registered window.
- * @off: Window offset.
- * @nr_bytes: Return the number of contiguous bytes till next DMA addr index.
- * @index: Return the index of the dma_addr array found.
- * @start_off: start offset of index of the dma addr array found.
- * The nr_bytes provides the callee an estimate of the maximum possible
- * DMA xfer possible while the index/start_off provide faster lookups
- * for the next iteration.
- */
-dma_addr_t scif_off_to_dma_addr(struct scif_window *window, s64 off,
-				size_t *nr_bytes, struct scif_window_iter *iter)
-{
-	int i, page_nr;
-	s64 start, end;
-	off_t page_off;
-
-	if (window->nr_pages == window->nr_contig_chunks) {
-		page_nr = (off - window->offset) >> PAGE_SHIFT;
-		page_off = off & ~PAGE_MASK;
-
-		if (nr_bytes)
-			*nr_bytes = PAGE_SIZE - page_off;
-		return window->dma_addr[page_nr] | page_off;
-	}
-	if (iter) {
-		i = iter->index;
-		start = iter->offset;
-	} else {
-		i =  0;
-		start =  window->offset;
-	}
-	for (; i < window->nr_contig_chunks; i++) {
-		end = start + (window->num_pages[i] << PAGE_SHIFT);
-		if (off >= start && off < end) {
-			if (iter) {
-				iter->index = i;
-				iter->offset = start;
-			}
-			if (nr_bytes)
-				*nr_bytes = end - off;
-			return (window->dma_addr[i] + (off - start));
-		}
-		start += (window->num_pages[i] << PAGE_SHIFT);
-	}
-	dev_err(scif_info.mdev.this_device,
-		"%s %d BUG. Addr not found? window %p off 0x%llx\n",
-		__func__, __LINE__, window, off);
-	return SCIF_RMA_ERROR_CODE;
-}
-
-/*
- * Copy between rma window and temporary buffer
- */
-static void scif_rma_local_cpu_copy(s64 offset, struct scif_window *window,
-				    u8 *temp, size_t rem_len, bool to_temp)
-{
-	void *window_virt;
-	size_t loop_len;
-	int offset_in_page;
-	s64 end_offset;
-
-	offset_in_page = offset & ~PAGE_MASK;
-	loop_len = PAGE_SIZE - offset_in_page;
-
-	if (rem_len < loop_len)
-		loop_len = rem_len;
-
-	window_virt = _get_local_va(offset, window, loop_len);
-	if (!window_virt)
-		return;
-	if (to_temp)
-		memcpy(temp, window_virt, loop_len);
-	else
-		memcpy(window_virt, temp, loop_len);
-
-	offset += loop_len;
-	temp += loop_len;
-	rem_len -= loop_len;
-
-	end_offset = window->offset +
-		(window->nr_pages << PAGE_SHIFT);
-	while (rem_len) {
-		if (offset == end_offset) {
-			window = list_next_entry(window, list);
-			end_offset = window->offset +
-				(window->nr_pages << PAGE_SHIFT);
-		}
-		loop_len = min(PAGE_SIZE, rem_len);
-		window_virt = _get_local_va(offset, window, loop_len);
-		if (!window_virt)
-			return;
-		if (to_temp)
-			memcpy(temp, window_virt, loop_len);
-		else
-			memcpy(window_virt, temp, loop_len);
-		offset	+= loop_len;
-		temp	+= loop_len;
-		rem_len	-= loop_len;
-	}
-}
-
-/**
- * scif_rma_completion_cb:
- * @data: RMA cookie
- *
- * RMA interrupt completion callback.
- */
-static void scif_rma_completion_cb(void *data)
-{
-	struct scif_dma_comp_cb *comp_cb = data;
-
-	/* Free DMA Completion CB. */
-	if (comp_cb->dst_window)
-		scif_rma_local_cpu_copy(comp_cb->dst_offset,
-					comp_cb->dst_window,
-					comp_cb->temp_buf +
-					comp_cb->header_padding,
-					comp_cb->len, false);
-	scif_unmap_single(comp_cb->temp_phys, comp_cb->sdev,
-			  SCIF_KMEM_UNALIGNED_BUF_SIZE);
-	if (comp_cb->is_cache)
-		kmem_cache_free(unaligned_cache,
-				comp_cb->temp_buf_to_free);
-	else
-		kfree(comp_cb->temp_buf_to_free);
-}
-
-/* Copies between temporary buffer and offsets provided in work */
-static int
-scif_rma_list_dma_copy_unaligned(struct scif_copy_work *work,
-				 u8 *temp, struct dma_chan *chan,
-				 bool src_local)
-{
-	struct scif_dma_comp_cb *comp_cb = work->comp_cb;
-	dma_addr_t window_dma_addr, temp_dma_addr;
-	dma_addr_t temp_phys = comp_cb->temp_phys;
-	size_t loop_len, nr_contig_bytes = 0, remaining_len = work->len;
-	int offset_in_ca, ret = 0;
-	s64 end_offset, offset;
-	struct scif_window *window;
-	void *window_virt_addr;
-	size_t tail_len;
-	struct dma_async_tx_descriptor *tx;
-	struct dma_device *dev = chan->device;
-	dma_cookie_t cookie;
-
-	if (src_local) {
-		offset = work->dst_offset;
-		window = work->dst_window;
-	} else {
-		offset = work->src_offset;
-		window = work->src_window;
-	}
-
-	offset_in_ca = offset & (L1_CACHE_BYTES - 1);
-	if (offset_in_ca) {
-		loop_len = L1_CACHE_BYTES - offset_in_ca;
-		loop_len = min(loop_len, remaining_len);
-		window_virt_addr = ioremap_remote(offset, window,
-						  loop_len,
-						  work->remote_dev,
-						  NULL);
-		if (!window_virt_addr)
-			return -ENOMEM;
-		if (src_local)
-			scif_unaligned_cpy_toio(window_virt_addr, temp,
-						loop_len,
-						work->ordered &&
-						!(remaining_len - loop_len));
-		else
-			scif_unaligned_cpy_fromio(temp, window_virt_addr,
-						  loop_len, work->ordered &&
-						  !(remaining_len - loop_len));
-		iounmap_remote(window_virt_addr, loop_len, work);
-
-		offset += loop_len;
-		temp += loop_len;
-		temp_phys += loop_len;
-		remaining_len -= loop_len;
-	}
-
-	offset_in_ca = offset & ~PAGE_MASK;
-	end_offset = window->offset +
-		(window->nr_pages << PAGE_SHIFT);
-
-	tail_len = remaining_len & (L1_CACHE_BYTES - 1);
-	remaining_len -= tail_len;
-	while (remaining_len) {
-		if (offset == end_offset) {
-			window = list_next_entry(window, list);
-			end_offset = window->offset +
-				(window->nr_pages << PAGE_SHIFT);
-		}
-		if (scif_is_mgmt_node())
-			temp_dma_addr = temp_phys;
-		else
-			/* Fix if we ever enable IOMMU on the card */
-			temp_dma_addr = (dma_addr_t)virt_to_phys(temp);
-		window_dma_addr = scif_off_to_dma_addr(window, offset,
-						       &nr_contig_bytes,
-						       NULL);
-		loop_len = min(nr_contig_bytes, remaining_len);
-		if (src_local) {
-			if (work->ordered && !tail_len &&
-			    !(remaining_len - loop_len) &&
-			    loop_len != L1_CACHE_BYTES) {
-				/*
-				 * Break up the last chunk of the transfer into
-				 * two steps. if there is no tail to guarantee
-				 * DMA ordering. SCIF_DMA_POLLING inserts
-				 * a status update descriptor in step 1 which
-				 * acts as a double sided synchronization fence
-				 * for the DMA engine to ensure that the last
-				 * cache line in step 2 is updated last.
-				 */
-				/* Step 1) DMA: Body Length - L1_CACHE_BYTES. */
-				tx =
-				dev->device_prep_dma_memcpy(chan,
-							    window_dma_addr,
-							    temp_dma_addr,
-							    loop_len -
-							    L1_CACHE_BYTES,
-							    DMA_PREP_FENCE);
-				if (!tx) {
-					ret = -ENOMEM;
-					goto err;
-				}
-				cookie = tx->tx_submit(tx);
-				if (dma_submit_error(cookie)) {
-					ret = -ENOMEM;
-					goto err;
-				}
-				dma_async_issue_pending(chan);
-				offset += (loop_len - L1_CACHE_BYTES);
-				temp_dma_addr += (loop_len - L1_CACHE_BYTES);
-				window_dma_addr += (loop_len - L1_CACHE_BYTES);
-				remaining_len -= (loop_len - L1_CACHE_BYTES);
-				loop_len = remaining_len;
-
-				/* Step 2) DMA: L1_CACHE_BYTES */
-				tx =
-				dev->device_prep_dma_memcpy(chan,
-							    window_dma_addr,
-							    temp_dma_addr,
-							    loop_len, 0);
-				if (!tx) {
-					ret = -ENOMEM;
-					goto err;
-				}
-				cookie = tx->tx_submit(tx);
-				if (dma_submit_error(cookie)) {
-					ret = -ENOMEM;
-					goto err;
-				}
-				dma_async_issue_pending(chan);
-			} else {
-				tx =
-				dev->device_prep_dma_memcpy(chan,
-							    window_dma_addr,
-							    temp_dma_addr,
-							    loop_len, 0);
-				if (!tx) {
-					ret = -ENOMEM;
-					goto err;
-				}
-				cookie = tx->tx_submit(tx);
-				if (dma_submit_error(cookie)) {
-					ret = -ENOMEM;
-					goto err;
-				}
-				dma_async_issue_pending(chan);
-			}
-		} else {
-			tx = dev->device_prep_dma_memcpy(chan, temp_dma_addr,
-					window_dma_addr, loop_len, 0);
-			if (!tx) {
-				ret = -ENOMEM;
-				goto err;
-			}
-			cookie = tx->tx_submit(tx);
-			if (dma_submit_error(cookie)) {
-				ret = -ENOMEM;
-				goto err;
-			}
-			dma_async_issue_pending(chan);
-		}
-		offset += loop_len;
-		temp += loop_len;
-		temp_phys += loop_len;
-		remaining_len -= loop_len;
-		offset_in_ca = 0;
-	}
-	if (tail_len) {
-		if (offset == end_offset) {
-			window = list_next_entry(window, list);
-			end_offset = window->offset +
-				(window->nr_pages << PAGE_SHIFT);
-		}
-		window_virt_addr = ioremap_remote(offset, window, tail_len,
-						  work->remote_dev,
-						  NULL);
-		if (!window_virt_addr)
-			return -ENOMEM;
-		/*
-		 * The CPU copy for the tail bytes must be initiated only once
-		 * previous DMA transfers for this endpoint have completed
-		 * to guarantee ordering.
-		 */
-		if (work->ordered) {
-			struct scif_dev *rdev = work->remote_dev;
-
-			ret = scif_drain_dma_intr(rdev->sdev, chan);
-			if (ret)
-				return ret;
-		}
-		if (src_local)
-			scif_unaligned_cpy_toio(window_virt_addr, temp,
-						tail_len, work->ordered);
-		else
-			scif_unaligned_cpy_fromio(temp, window_virt_addr,
-						  tail_len, work->ordered);
-		iounmap_remote(window_virt_addr, tail_len, work);
-	}
-	tx = dev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_INTERRUPT);
-	if (!tx) {
-		ret = -ENOMEM;
-		return ret;
-	}
-	tx->callback = &scif_rma_completion_cb;
-	tx->callback_param = comp_cb;
-	cookie = tx->tx_submit(tx);
-
-	if (dma_submit_error(cookie)) {
-		ret = -ENOMEM;
-		return ret;
-	}
-	dma_async_issue_pending(chan);
-	return 0;
-err:
-	dev_err(scif_info.mdev.this_device,
-		"%s %d Desc Prog Failed ret %d\n",
-		__func__, __LINE__, ret);
-	return ret;
-}
-
-/*
- * _scif_rma_list_dma_copy_aligned:
- *
- * Traverse all the windows and perform DMA copy.
- */
-static int _scif_rma_list_dma_copy_aligned(struct scif_copy_work *work,
-					   struct dma_chan *chan)
-{
-	dma_addr_t src_dma_addr, dst_dma_addr;
-	size_t loop_len, remaining_len, src_contig_bytes = 0;
-	size_t dst_contig_bytes = 0;
-	struct scif_window_iter src_win_iter;
-	struct scif_window_iter dst_win_iter;
-	s64 end_src_offset, end_dst_offset;
-	struct scif_window *src_window = work->src_window;
-	struct scif_window *dst_window = work->dst_window;
-	s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
-	int ret = 0;
-	struct dma_async_tx_descriptor *tx;
-	struct dma_device *dev = chan->device;
-	dma_cookie_t cookie;
-
-	remaining_len = work->len;
-
-	scif_init_window_iter(src_window, &src_win_iter);
-	scif_init_window_iter(dst_window, &dst_win_iter);
-	end_src_offset = src_window->offset +
-		(src_window->nr_pages << PAGE_SHIFT);
-	end_dst_offset = dst_window->offset +
-		(dst_window->nr_pages << PAGE_SHIFT);
-	while (remaining_len) {
-		if (src_offset == end_src_offset) {
-			src_window = list_next_entry(src_window, list);
-			end_src_offset = src_window->offset +
-				(src_window->nr_pages << PAGE_SHIFT);
-			scif_init_window_iter(src_window, &src_win_iter);
-		}
-		if (dst_offset == end_dst_offset) {
-			dst_window = list_next_entry(dst_window, list);
-			end_dst_offset = dst_window->offset +
-				(dst_window->nr_pages << PAGE_SHIFT);
-			scif_init_window_iter(dst_window, &dst_win_iter);
-		}
-
-		/* compute dma addresses for transfer */
-		src_dma_addr = scif_off_to_dma_addr(src_window, src_offset,
-						    &src_contig_bytes,
-						    &src_win_iter);
-		dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset,
-						    &dst_contig_bytes,
-						    &dst_win_iter);
-		loop_len = min(src_contig_bytes, dst_contig_bytes);
-		loop_len = min(loop_len, remaining_len);
-		if (work->ordered && !(remaining_len - loop_len)) {
-			/*
-			 * Break up the last chunk of the transfer into two
-			 * steps to ensure that the last byte in step 2 is
-			 * updated last.
-			 */
-			/* Step 1) DMA: Body Length - 1 */
-			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
-							 src_dma_addr,
-							 loop_len - 1,
-							 DMA_PREP_FENCE);
-			if (!tx) {
-				ret = -ENOMEM;
-				goto err;
-			}
-			cookie = tx->tx_submit(tx);
-			if (dma_submit_error(cookie)) {
-				ret = -ENOMEM;
-				goto err;
-			}
-			src_offset += (loop_len - 1);
-			dst_offset += (loop_len - 1);
-			src_dma_addr += (loop_len - 1);
-			dst_dma_addr += (loop_len - 1);
-			remaining_len -= (loop_len - 1);
-			loop_len = remaining_len;
-
-			/* Step 2) DMA: 1 BYTES */
-			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
-					src_dma_addr, loop_len, 0);
-			if (!tx) {
-				ret = -ENOMEM;
-				goto err;
-			}
-			cookie = tx->tx_submit(tx);
-			if (dma_submit_error(cookie)) {
-				ret = -ENOMEM;
-				goto err;
-			}
-			dma_async_issue_pending(chan);
-		} else {
-			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
-					src_dma_addr, loop_len, 0);
-			if (!tx) {
-				ret = -ENOMEM;
-				goto err;
-			}
-			cookie = tx->tx_submit(tx);
-			if (dma_submit_error(cookie)) {
-				ret = -ENOMEM;
-				goto err;
-			}
-		}
-		src_offset += loop_len;
-		dst_offset += loop_len;
-		remaining_len -= loop_len;
-	}
-	return ret;
-err:
-	dev_err(scif_info.mdev.this_device,
-		"%s %d Desc Prog Failed ret %d\n",
-		__func__, __LINE__, ret);
-	return ret;
-}
-
-/*
- * scif_rma_list_dma_copy_aligned:
- *
- * Traverse all the windows and perform DMA copy.
- */
-static int scif_rma_list_dma_copy_aligned(struct scif_copy_work *work,
-					  struct dma_chan *chan)
-{
-	dma_addr_t src_dma_addr, dst_dma_addr;
-	size_t loop_len, remaining_len, tail_len, src_contig_bytes = 0;
-	size_t dst_contig_bytes = 0;
-	int src_cache_off;
-	s64 end_src_offset, end_dst_offset;
-	struct scif_window_iter src_win_iter;
-	struct scif_window_iter dst_win_iter;
-	void *src_virt, *dst_virt;
-	struct scif_window *src_window = work->src_window;
-	struct scif_window *dst_window = work->dst_window;
-	s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
-	int ret = 0;
-	struct dma_async_tx_descriptor *tx;
-	struct dma_device *dev = chan->device;
-	dma_cookie_t cookie;
-
-	remaining_len = work->len;
-	scif_init_window_iter(src_window, &src_win_iter);
-	scif_init_window_iter(dst_window, &dst_win_iter);
-
-	src_cache_off = src_offset & (L1_CACHE_BYTES - 1);
-	if (src_cache_off != 0) {
-		/* Head */
-		loop_len = L1_CACHE_BYTES - src_cache_off;
-		loop_len = min(loop_len, remaining_len);
-		src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset);
-		dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset);
-		if (src_window->type == SCIF_WINDOW_SELF)
-			src_virt = _get_local_va(src_offset, src_window,
-						 loop_len);
-		else
-			src_virt = ioremap_remote(src_offset, src_window,
-						  loop_len,
-						  work->remote_dev, NULL);
-		if (!src_virt)
-			return -ENOMEM;
-		if (dst_window->type == SCIF_WINDOW_SELF)
-			dst_virt = _get_local_va(dst_offset, dst_window,
-						 loop_len);
-		else
-			dst_virt = ioremap_remote(dst_offset, dst_window,
-						  loop_len,
-						  work->remote_dev, NULL);
-		if (!dst_virt) {
-			if (src_window->type != SCIF_WINDOW_SELF)
-				iounmap_remote(src_virt, loop_len, work);
-			return -ENOMEM;
-		}
-		if (src_window->type == SCIF_WINDOW_SELF)
-			scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len,
-						remaining_len == loop_len ?
-						work->ordered : false);
-		else
-			scif_unaligned_cpy_fromio(dst_virt, src_virt, loop_len,
-						  remaining_len == loop_len ?
-						  work->ordered : false);
-		if (src_window->type != SCIF_WINDOW_SELF)
-			iounmap_remote(src_virt, loop_len, work);
-		if (dst_window->type != SCIF_WINDOW_SELF)
-			iounmap_remote(dst_virt, loop_len, work);
-		src_offset += loop_len;
-		dst_offset += loop_len;
-		remaining_len -= loop_len;
-	}
-
-	end_src_offset = src_window->offset +
-		(src_window->nr_pages << PAGE_SHIFT);
-	end_dst_offset = dst_window->offset +
-		(dst_window->nr_pages << PAGE_SHIFT);
-	tail_len = remaining_len & (L1_CACHE_BYTES - 1);
-	remaining_len -= tail_len;
-	while (remaining_len) {
-		if (src_offset == end_src_offset) {
-			src_window = list_next_entry(src_window, list);
-			end_src_offset = src_window->offset +
-				(src_window->nr_pages << PAGE_SHIFT);
-			scif_init_window_iter(src_window, &src_win_iter);
-		}
-		if (dst_offset == end_dst_offset) {
-			dst_window = list_next_entry(dst_window, list);
-			end_dst_offset = dst_window->offset +
-				(dst_window->nr_pages << PAGE_SHIFT);
-			scif_init_window_iter(dst_window, &dst_win_iter);
-		}
-
-		/* compute dma addresses for transfer */
-		src_dma_addr = scif_off_to_dma_addr(src_window, src_offset,
-						    &src_contig_bytes,
-						    &src_win_iter);
-		dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset,
-						    &dst_contig_bytes,
-						    &dst_win_iter);
-		loop_len = min(src_contig_bytes, dst_contig_bytes);
-		loop_len = min(loop_len, remaining_len);
-		if (work->ordered && !tail_len &&
-		    !(remaining_len - loop_len)) {
-			/*
-			 * Break up the last chunk of the transfer into two
-			 * steps. if there is no tail to gurantee DMA ordering.
-			 * Passing SCIF_DMA_POLLING inserts a status update
-			 * descriptor in step 1 which acts as a double sided
-			 * synchronization fence for the DMA engine to ensure
-			 * that the last cache line in step 2 is updated last.
-			 */
-			/* Step 1) DMA: Body Length - L1_CACHE_BYTES. */
-			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
-							 src_dma_addr,
-							 loop_len -
-							 L1_CACHE_BYTES,
-							 DMA_PREP_FENCE);
-			if (!tx) {
-				ret = -ENOMEM;
-				goto err;
-			}
-			cookie = tx->tx_submit(tx);
-			if (dma_submit_error(cookie)) {
-				ret = -ENOMEM;
-				goto err;
-			}
-			dma_async_issue_pending(chan);
-			src_offset += (loop_len - L1_CACHE_BYTES);
-			dst_offset += (loop_len - L1_CACHE_BYTES);
-			src_dma_addr += (loop_len - L1_CACHE_BYTES);
-			dst_dma_addr += (loop_len - L1_CACHE_BYTES);
-			remaining_len -= (loop_len - L1_CACHE_BYTES);
-			loop_len = remaining_len;
-
-			/* Step 2) DMA: L1_CACHE_BYTES */
-			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
-							 src_dma_addr,
-							 loop_len, 0);
-			if (!tx) {
-				ret = -ENOMEM;
-				goto err;
-			}
-			cookie = tx->tx_submit(tx);
-			if (dma_submit_error(cookie)) {
-				ret = -ENOMEM;
-				goto err;
-			}
-			dma_async_issue_pending(chan);
-		} else {
-			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
-							 src_dma_addr,
-							 loop_len, 0);
-			if (!tx) {
-				ret = -ENOMEM;
-				goto err;
-			}
-			cookie = tx->tx_submit(tx);
-			if (dma_submit_error(cookie)) {
-				ret = -ENOMEM;
-				goto err;
-			}
-			dma_async_issue_pending(chan);
-		}
-		src_offset += loop_len;
-		dst_offset += loop_len;
-		remaining_len -= loop_len;
-	}
-	remaining_len = tail_len;
-	if (remaining_len) {
-		loop_len = remaining_len;
-		if (src_offset == end_src_offset)
-			src_window = list_next_entry(src_window, list);
-		if (dst_offset == end_dst_offset)
-			dst_window = list_next_entry(dst_window, list);
-
-		src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset);
-		dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset);
-		/*
-		 * The CPU copy for the tail bytes must be initiated only once
-		 * previous DMA transfers for this endpoint have completed to
-		 * guarantee ordering.
-		 */
-		if (work->ordered) {
-			struct scif_dev *rdev = work->remote_dev;
-
-			ret = scif_drain_dma_poll(rdev->sdev, chan);
-			if (ret)
-				return ret;
-		}
-		if (src_window->type == SCIF_WINDOW_SELF)
-			src_virt = _get_local_va(src_offset, src_window,
-						 loop_len);
-		else
-			src_virt = ioremap_remote(src_offset, src_window,
-						  loop_len,
-						  work->remote_dev, NULL);
-		if (!src_virt)
-			return -ENOMEM;
-
-		if (dst_window->type == SCIF_WINDOW_SELF)
-			dst_virt = _get_local_va(dst_offset, dst_window,
-						 loop_len);
-		else
-			dst_virt = ioremap_remote(dst_offset, dst_window,
-						  loop_len,
-						  work->remote_dev, NULL);
-		if (!dst_virt) {
-			if (src_window->type != SCIF_WINDOW_SELF)
-				iounmap_remote(src_virt, loop_len, work);
-			return -ENOMEM;
-		}
-
-		if (src_window->type == SCIF_WINDOW_SELF)
-			scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len,
-						work->ordered);
-		else
-			scif_unaligned_cpy_fromio(dst_virt, src_virt,
-						  loop_len, work->ordered);
-		if (src_window->type != SCIF_WINDOW_SELF)
-			iounmap_remote(src_virt, loop_len, work);
-
-		if (dst_window->type != SCIF_WINDOW_SELF)
-			iounmap_remote(dst_virt, loop_len, work);
-		remaining_len -= loop_len;
-	}
-	return ret;
-err:
-	dev_err(scif_info.mdev.this_device,
-		"%s %d Desc Prog Failed ret %d\n",
-		__func__, __LINE__, ret);
-	return ret;
-}
-
-/*
- * scif_rma_list_cpu_copy:
- *
- * Traverse all the windows and perform CPU copy.
- */
-static int scif_rma_list_cpu_copy(struct scif_copy_work *work)
-{
-	void *src_virt, *dst_virt;
-	size_t loop_len, remaining_len;
-	int src_page_off, dst_page_off;
-	s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
-	struct scif_window *src_window = work->src_window;
-	struct scif_window *dst_window = work->dst_window;
-	s64 end_src_offset, end_dst_offset;
-	int ret = 0;
-	struct scif_window_iter src_win_iter;
-	struct scif_window_iter dst_win_iter;
-
-	remaining_len = work->len;
-
-	scif_init_window_iter(src_window, &src_win_iter);
-	scif_init_window_iter(dst_window, &dst_win_iter);
-	while (remaining_len) {
-		src_page_off = src_offset & ~PAGE_MASK;
-		dst_page_off = dst_offset & ~PAGE_MASK;
-		loop_len = min(PAGE_SIZE -
-			       max(src_page_off, dst_page_off),
-			       remaining_len);
-
-		if (src_window->type == SCIF_WINDOW_SELF)
-			src_virt = _get_local_va(src_offset, src_window,
-						 loop_len);
-		else
-			src_virt = ioremap_remote(src_offset, src_window,
-						  loop_len,
-						  work->remote_dev,
-						  &src_win_iter);
-		if (!src_virt) {
-			ret = -ENOMEM;
-			goto error;
-		}
-
-		if (dst_window->type == SCIF_WINDOW_SELF)
-			dst_virt = _get_local_va(dst_offset, dst_window,
-						 loop_len);
-		else
-			dst_virt = ioremap_remote(dst_offset, dst_window,
-						  loop_len,
-						  work->remote_dev,
-						  &dst_win_iter);
-		if (!dst_virt) {
-			if (src_window->type == SCIF_WINDOW_PEER)
-				iounmap_remote(src_virt, loop_len, work);
-			ret = -ENOMEM;
-			goto error;
-		}
-
-		if (work->loopback) {
-			memcpy(dst_virt, src_virt, loop_len);
-		} else {
-			if (src_window->type == SCIF_WINDOW_SELF)
-				memcpy_toio((void __iomem __force *)dst_virt,
-					    src_virt, loop_len);
-			else
-				memcpy_fromio(dst_virt,
-					      (void __iomem __force *)src_virt,
-					      loop_len);
-		}
-		if (src_window->type == SCIF_WINDOW_PEER)
-			iounmap_remote(src_virt, loop_len, work);
-
-		if (dst_window->type == SCIF_WINDOW_PEER)
-			iounmap_remote(dst_virt, loop_len, work);
-
-		src_offset += loop_len;
-		dst_offset += loop_len;
-		remaining_len -= loop_len;
-		if (remaining_len) {
-			end_src_offset = src_window->offset +
-				(src_window->nr_pages << PAGE_SHIFT);
-			end_dst_offset = dst_window->offset +
-				(dst_window->nr_pages << PAGE_SHIFT);
-			if (src_offset == end_src_offset) {
-				src_window = list_next_entry(src_window, list);
-				scif_init_window_iter(src_window,
-						      &src_win_iter);
-			}
-			if (dst_offset == end_dst_offset) {
-				dst_window = list_next_entry(dst_window, list);
-				scif_init_window_iter(dst_window,
-						      &dst_win_iter);
-			}
-		}
-	}
-error:
-	return ret;
-}
-
-static int scif_rma_list_dma_copy_wrapper(struct scif_endpt *epd,
-					  struct scif_copy_work *work,
-					  struct dma_chan *chan, off_t loffset)
-{
-	int src_cache_off, dst_cache_off;
-	s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
-	u8 *temp = NULL;
-	bool src_local = true;
-	struct scif_dma_comp_cb *comp_cb;
-	int err;
-
-	if (is_dma_copy_aligned(chan->device, 1, 1, 1))
-		return _scif_rma_list_dma_copy_aligned(work, chan);
-
-	src_cache_off = src_offset & (L1_CACHE_BYTES - 1);
-	dst_cache_off = dst_offset & (L1_CACHE_BYTES - 1);
-
-	if (dst_cache_off == src_cache_off)
-		return scif_rma_list_dma_copy_aligned(work, chan);
-
-	if (work->loopback)
-		return scif_rma_list_cpu_copy(work);
-	src_local = work->src_window->type == SCIF_WINDOW_SELF;
-
-	/* Allocate dma_completion cb */
-	comp_cb = kzalloc(sizeof(*comp_cb), GFP_KERNEL);
-	if (!comp_cb)
-		goto error;
-
-	work->comp_cb = comp_cb;
-	comp_cb->cb_cookie = comp_cb;
-	comp_cb->dma_completion_func = &scif_rma_completion_cb;
-
-	if (work->len + (L1_CACHE_BYTES << 1) < SCIF_KMEM_UNALIGNED_BUF_SIZE) {
-		comp_cb->is_cache = false;
-		/* Allocate padding bytes to align to a cache line */
-		temp = kmalloc(work->len + (L1_CACHE_BYTES << 1),
-			       GFP_KERNEL);
-		if (!temp)
-			goto free_comp_cb;
-		comp_cb->temp_buf_to_free = temp;
-		/* kmalloc(..) does not guarantee cache line alignment */
-		if (!IS_ALIGNED((u64)temp, L1_CACHE_BYTES))
-			temp = PTR_ALIGN(temp, L1_CACHE_BYTES);
-	} else {
-		comp_cb->is_cache = true;
-		temp = kmem_cache_alloc(unaligned_cache, GFP_KERNEL);
-		if (!temp)
-			goto free_comp_cb;
-		comp_cb->temp_buf_to_free = temp;
-	}
-
-	if (src_local) {
-		temp += dst_cache_off;
-		scif_rma_local_cpu_copy(work->src_offset, work->src_window,
-					temp, work->len, true);
-	} else {
-		comp_cb->dst_window = work->dst_window;
-		comp_cb->dst_offset = work->dst_offset;
-		work->src_offset = work->src_offset - src_cache_off;
-		comp_cb->len = work->len;
-		work->len = ALIGN(work->len + src_cache_off, L1_CACHE_BYTES);
-		comp_cb->header_padding = src_cache_off;
-	}
-	comp_cb->temp_buf = temp;
-
-	err = scif_map_single(&comp_cb->temp_phys, temp,
-			      work->remote_dev, SCIF_KMEM_UNALIGNED_BUF_SIZE);
-	if (err)
-		goto free_temp_buf;
-	comp_cb->sdev = work->remote_dev;
-	if (scif_rma_list_dma_copy_unaligned(work, temp, chan, src_local) < 0)
-		goto free_temp_buf;
-	if (!src_local)
-		work->fence_type = SCIF_DMA_INTR;
-	return 0;
-free_temp_buf:
-	if (comp_cb->is_cache)
-		kmem_cache_free(unaligned_cache, comp_cb->temp_buf_to_free);
-	else
-		kfree(comp_cb->temp_buf_to_free);
-free_comp_cb:
-	kfree(comp_cb);
-error:
-	return -ENOMEM;
-}
-
-/**
- * scif_rma_copy:
- * @epd: end point descriptor.
- * @loffset: offset in local registered address space to/from which to copy
- * @addr: user virtual address to/from which to copy
- * @len: length of range to copy
- * @roffset: offset in remote registered address space to/from which to copy
- * @flags: flags
- * @dir: LOCAL->REMOTE or vice versa.
- * @last_chunk: true if this is the last chunk of a larger transfer
- *
- * Validate parameters, check if src/dst registered ranges requested for copy
- * are valid and initiate either CPU or DMA copy.
- */
-static int scif_rma_copy(scif_epd_t epd, off_t loffset, unsigned long addr,
-			 size_t len, off_t roffset, int flags,
-			 enum scif_rma_dir dir, bool last_chunk)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-	struct scif_rma_req remote_req;
-	struct scif_rma_req req;
-	struct scif_window *local_window = NULL;
-	struct scif_window *remote_window = NULL;
-	struct scif_copy_work copy_work;
-	bool loopback;
-	int err = 0;
-	struct dma_chan *chan;
-	struct scif_mmu_notif *mmn = NULL;
-	bool cache = false;
-	struct device *spdev;
-
-	err = scif_verify_epd(ep);
-	if (err)
-		return err;
-
-	if (flags && !(flags & (SCIF_RMA_USECPU | SCIF_RMA_USECACHE |
-				SCIF_RMA_SYNC | SCIF_RMA_ORDERED)))
-		return -EINVAL;
-
-	loopback = scifdev_self(ep->remote_dev) ? true : false;
-	copy_work.fence_type = ((flags & SCIF_RMA_SYNC) && last_chunk) ?
-				SCIF_DMA_POLL : 0;
-	copy_work.ordered = !!((flags & SCIF_RMA_ORDERED) && last_chunk);
-
-	/* Use CPU for Mgmt node <-> Mgmt node copies */
-	if (loopback && scif_is_mgmt_node()) {
-		flags |= SCIF_RMA_USECPU;
-		copy_work.fence_type = 0x0;
-	}
-
-	cache = scif_is_set_reg_cache(flags);
-
-	remote_req.out_window = &remote_window;
-	remote_req.offset = roffset;
-	remote_req.nr_bytes = len;
-	/*
-	 * If transfer is from local to remote then the remote window
-	 * must be writeable and vice versa.
-	 */
-	remote_req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_WRITE : VM_READ;
-	remote_req.type = SCIF_WINDOW_PARTIAL;
-	remote_req.head = &ep->rma_info.remote_reg_list;
-
-	spdev = scif_get_peer_dev(ep->remote_dev);
-	if (IS_ERR(spdev)) {
-		err = PTR_ERR(spdev);
-		return err;
-	}
-
-	if (addr && cache) {
-		mutex_lock(&ep->rma_info.mmn_lock);
-		mmn = scif_find_mmu_notifier(current->mm, &ep->rma_info);
-		if (!mmn)
-			mmn = scif_add_mmu_notifier(current->mm, ep);
-		mutex_unlock(&ep->rma_info.mmn_lock);
-		if (IS_ERR(mmn)) {
-			scif_put_peer_dev(spdev);
-			return PTR_ERR(mmn);
-		}
-		cache = cache && !scif_rma_tc_can_cache(ep, len);
-	}
-	mutex_lock(&ep->rma_info.rma_lock);
-	if (addr) {
-		req.out_window = &local_window;
-		req.nr_bytes = ALIGN(len + (addr & ~PAGE_MASK),
-				     PAGE_SIZE);
-		req.va_for_temp = addr & PAGE_MASK;
-		req.prot = (dir == SCIF_LOCAL_TO_REMOTE ?
-			    VM_READ : VM_WRITE | VM_READ);
-		/* Does a valid local window exist? */
-		if (mmn) {
-			spin_lock(&ep->rma_info.tc_lock);
-			req.head = &mmn->tc_reg_list;
-			err = scif_query_tcw(ep, &req);
-			spin_unlock(&ep->rma_info.tc_lock);
-		}
-		if (!mmn || err) {
-			err = scif_register_temp(epd, req.va_for_temp,
-						 req.nr_bytes, req.prot,
-						 &loffset, &local_window);
-			if (err) {
-				mutex_unlock(&ep->rma_info.rma_lock);
-				goto error;
-			}
-			if (!cache)
-				goto skip_cache;
-			atomic_inc(&ep->rma_info.tcw_refcount);
-			atomic_add_return(local_window->nr_pages,
-					  &ep->rma_info.tcw_total_pages);
-			if (mmn) {
-				spin_lock(&ep->rma_info.tc_lock);
-				scif_insert_tcw(local_window,
-						&mmn->tc_reg_list);
-				spin_unlock(&ep->rma_info.tc_lock);
-			}
-		}
-skip_cache:
-		loffset = local_window->offset +
-				(addr - local_window->va_for_temp);
-	} else {
-		req.out_window = &local_window;
-		req.offset = loffset;
-		/*
-		 * If transfer is from local to remote then the self window
-		 * must be readable and vice versa.
-		 */
-		req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_READ : VM_WRITE;
-		req.nr_bytes = len;
-		req.type = SCIF_WINDOW_PARTIAL;
-		req.head = &ep->rma_info.reg_list;
-		/* Does a valid local window exist? */
-		err = scif_query_window(&req);
-		if (err) {
-			mutex_unlock(&ep->rma_info.rma_lock);
-			goto error;
-		}
-	}
-
-	/* Does a valid remote window exist? */
-	err = scif_query_window(&remote_req);
-	if (err) {
-		mutex_unlock(&ep->rma_info.rma_lock);
-		goto error;
-	}
-
-	/*
-	 * Prepare copy_work for submitting work to the DMA kernel thread
-	 * or CPU copy routine.
-	 */
-	copy_work.len = len;
-	copy_work.loopback = loopback;
-	copy_work.remote_dev = ep->remote_dev;
-	if (dir == SCIF_LOCAL_TO_REMOTE) {
-		copy_work.src_offset = loffset;
-		copy_work.src_window = local_window;
-		copy_work.dst_offset = roffset;
-		copy_work.dst_window = remote_window;
-	} else {
-		copy_work.src_offset = roffset;
-		copy_work.src_window = remote_window;
-		copy_work.dst_offset = loffset;
-		copy_work.dst_window = local_window;
-	}
-
-	if (flags & SCIF_RMA_USECPU) {
-		scif_rma_list_cpu_copy(&copy_work);
-	} else {
-		chan = ep->rma_info.dma_chan;
-		err = scif_rma_list_dma_copy_wrapper(epd, &copy_work,
-						     chan, loffset);
-	}
-	if (addr && !cache)
-		atomic_inc(&ep->rma_info.tw_refcount);
-
-	mutex_unlock(&ep->rma_info.rma_lock);
-
-	if (last_chunk) {
-		struct scif_dev *rdev = ep->remote_dev;
-
-		if (copy_work.fence_type == SCIF_DMA_POLL)
-			err = scif_drain_dma_poll(rdev->sdev,
-						  ep->rma_info.dma_chan);
-		else if (copy_work.fence_type == SCIF_DMA_INTR)
-			err = scif_drain_dma_intr(rdev->sdev,
-						  ep->rma_info.dma_chan);
-	}
-
-	if (addr && !cache)
-		scif_queue_for_cleanup(local_window, &scif_info.rma);
-	scif_put_peer_dev(spdev);
-	return err;
-error:
-	if (err) {
-		if (addr && local_window && !cache)
-			scif_destroy_window(ep, local_window);
-		dev_err(scif_info.mdev.this_device,
-			"%s %d err %d len 0x%lx\n",
-			__func__, __LINE__, err, len);
-	}
-	scif_put_peer_dev(spdev);
-	return err;
-}
-
-int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len,
-		  off_t roffset, int flags)
-{
-	int err;
-
-	dev_dbg(scif_info.mdev.this_device,
-		"SCIFAPI readfrom: ep %p loffset 0x%lx len 0x%lx offset 0x%lx flags 0x%x\n",
-		epd, loffset, len, roffset, flags);
-	if (scif_unaligned(loffset, roffset)) {
-		while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
-			err = scif_rma_copy(epd, loffset, 0x0,
-					    SCIF_MAX_UNALIGNED_BUF_SIZE,
-					    roffset, flags,
-					    SCIF_REMOTE_TO_LOCAL, false);
-			if (err)
-				goto readfrom_err;
-			loffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
-			roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
-			len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
-		}
-	}
-	err = scif_rma_copy(epd, loffset, 0x0, len,
-			    roffset, flags, SCIF_REMOTE_TO_LOCAL, true);
-readfrom_err:
-	return err;
-}
-EXPORT_SYMBOL_GPL(scif_readfrom);
-
-int scif_writeto(scif_epd_t epd, off_t loffset, size_t len,
-		 off_t roffset, int flags)
-{
-	int err;
-
-	dev_dbg(scif_info.mdev.this_device,
-		"SCIFAPI writeto: ep %p loffset 0x%lx len 0x%lx roffset 0x%lx flags 0x%x\n",
-		epd, loffset, len, roffset, flags);
-	if (scif_unaligned(loffset, roffset)) {
-		while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
-			err = scif_rma_copy(epd, loffset, 0x0,
-					    SCIF_MAX_UNALIGNED_BUF_SIZE,
-					    roffset, flags,
-					    SCIF_LOCAL_TO_REMOTE, false);
-			if (err)
-				goto writeto_err;
-			loffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
-			roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
-			len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
-		}
-	}
-	err = scif_rma_copy(epd, loffset, 0x0, len,
-			    roffset, flags, SCIF_LOCAL_TO_REMOTE, true);
-writeto_err:
-	return err;
-}
-EXPORT_SYMBOL_GPL(scif_writeto);
-
-int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len,
-		   off_t roffset, int flags)
-{
-	int err;
-
-	dev_dbg(scif_info.mdev.this_device,
-		"SCIFAPI vreadfrom: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n",
-		epd, addr, len, roffset, flags);
-	if (scif_unaligned((off_t __force)addr, roffset)) {
-		if (len > SCIF_MAX_UNALIGNED_BUF_SIZE)
-			flags &= ~SCIF_RMA_USECACHE;
-
-		while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
-			err = scif_rma_copy(epd, 0, (u64)addr,
-					    SCIF_MAX_UNALIGNED_BUF_SIZE,
-					    roffset, flags,
-					    SCIF_REMOTE_TO_LOCAL, false);
-			if (err)
-				goto vreadfrom_err;
-			addr += SCIF_MAX_UNALIGNED_BUF_SIZE;
-			roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
-			len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
-		}
-	}
-	err = scif_rma_copy(epd, 0, (u64)addr, len,
-			    roffset, flags, SCIF_REMOTE_TO_LOCAL, true);
-vreadfrom_err:
-	return err;
-}
-EXPORT_SYMBOL_GPL(scif_vreadfrom);
-
-int scif_vwriteto(scif_epd_t epd, void *addr, size_t len,
-		  off_t roffset, int flags)
-{
-	int err;
-
-	dev_dbg(scif_info.mdev.this_device,
-		"SCIFAPI vwriteto: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n",
-		epd, addr, len, roffset, flags);
-	if (scif_unaligned((off_t __force)addr, roffset)) {
-		if (len > SCIF_MAX_UNALIGNED_BUF_SIZE)
-			flags &= ~SCIF_RMA_USECACHE;
-
-		while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
-			err = scif_rma_copy(epd, 0, (u64)addr,
-					    SCIF_MAX_UNALIGNED_BUF_SIZE,
-					    roffset, flags,
-					    SCIF_LOCAL_TO_REMOTE, false);
-			if (err)
-				goto vwriteto_err;
-			addr += SCIF_MAX_UNALIGNED_BUF_SIZE;
-			roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
-			len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
-		}
-	}
-	err = scif_rma_copy(epd, 0, (u64)addr, len,
-			    roffset, flags, SCIF_LOCAL_TO_REMOTE, true);
-vwriteto_err:
-	return err;
-}
-EXPORT_SYMBOL_GPL(scif_vwriteto);
diff --git a/drivers/misc/mic/scif/scif_epd.c b/drivers/misc/mic/scif/scif_epd.c
deleted file mode 100644
index 426687f6696b..000000000000
--- a/drivers/misc/mic/scif/scif_epd.c
+++ /dev/null
@@ -1,357 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Intel SCIF driver.
- */
-#include "scif_main.h"
-#include "scif_map.h"
-
-void scif_cleanup_ep_qp(struct scif_endpt *ep)
-{
-	struct scif_qp *qp = ep->qp_info.qp;
-
-	if (qp->outbound_q.rb_base) {
-		scif_iounmap((void *)qp->outbound_q.rb_base,
-			     qp->outbound_q.size, ep->remote_dev);
-		qp->outbound_q.rb_base = NULL;
-	}
-	if (qp->remote_qp) {
-		scif_iounmap((void *)qp->remote_qp,
-			     sizeof(struct scif_qp), ep->remote_dev);
-		qp->remote_qp = NULL;
-	}
-	if (qp->local_qp) {
-		scif_unmap_single(qp->local_qp, ep->remote_dev,
-				  sizeof(struct scif_qp));
-		qp->local_qp = 0x0;
-	}
-	if (qp->local_buf) {
-		scif_unmap_single(qp->local_buf, ep->remote_dev,
-				  SCIF_ENDPT_QP_SIZE);
-		qp->local_buf = 0;
-	}
-}
-
-void scif_teardown_ep(void *endpt)
-{
-	struct scif_endpt *ep = endpt;
-	struct scif_qp *qp = ep->qp_info.qp;
-
-	if (qp) {
-		spin_lock(&ep->lock);
-		scif_cleanup_ep_qp(ep);
-		spin_unlock(&ep->lock);
-		kfree(qp->inbound_q.rb_base);
-		kfree(qp);
-	}
-}
-
-/*
- * Enqueue the endpoint to the zombie list for cleanup.
- * The endpoint should not be accessed once this API returns.
- */
-void scif_add_epd_to_zombie_list(struct scif_endpt *ep, bool eplock_held)
-{
-	if (!eplock_held)
-		mutex_lock(&scif_info.eplock);
-	spin_lock(&ep->lock);
-	ep->state = SCIFEP_ZOMBIE;
-	spin_unlock(&ep->lock);
-	list_add_tail(&ep->list, &scif_info.zombie);
-	scif_info.nr_zombies++;
-	if (!eplock_held)
-		mutex_unlock(&scif_info.eplock);
-	schedule_work(&scif_info.misc_work);
-}
-
-static struct scif_endpt *scif_find_listen_ep(u16 port)
-{
-	struct scif_endpt *ep = NULL;
-	struct list_head *pos, *tmpq;
-
-	mutex_lock(&scif_info.eplock);
-	list_for_each_safe(pos, tmpq, &scif_info.listen) {
-		ep = list_entry(pos, struct scif_endpt, list);
-		if (ep->port.port == port) {
-			mutex_unlock(&scif_info.eplock);
-			return ep;
-		}
-	}
-	mutex_unlock(&scif_info.eplock);
-	return NULL;
-}
-
-void scif_cleanup_zombie_epd(void)
-{
-	struct list_head *pos, *tmpq;
-	struct scif_endpt *ep;
-
-	mutex_lock(&scif_info.eplock);
-	list_for_each_safe(pos, tmpq, &scif_info.zombie) {
-		ep = list_entry(pos, struct scif_endpt, list);
-		if (scif_rma_ep_can_uninit(ep)) {
-			list_del(pos);
-			scif_info.nr_zombies--;
-			put_iova_domain(&ep->rma_info.iovad);
-			kfree(ep);
-		}
-	}
-	mutex_unlock(&scif_info.eplock);
-}
-
-/**
- * scif_cnctreq() - Respond to SCIF_CNCT_REQ interrupt message
- * @scifdev:    SCIF device
- * @msg:        Interrupt message
- *
- * This message is initiated by the remote node to request a connection
- * to the local node.  This function looks for an end point in the
- * listen state on the requested port id.
- *
- * If it finds a listening port it places the connect request on the
- * listening end points queue and wakes up any pending accept calls.
- *
- * If it does not find a listening end point it sends a connection
- * reject message to the remote node.
- */
-void scif_cnctreq(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_endpt *ep = NULL;
-	struct scif_conreq *conreq;
-
-	conreq = kmalloc(sizeof(*conreq), GFP_KERNEL);
-	if (!conreq)
-		/* Lack of resources so reject the request. */
-		goto conreq_sendrej;
-
-	ep = scif_find_listen_ep(msg->dst.port);
-	if (!ep)
-		/*  Send reject due to no listening ports */
-		goto conreq_sendrej_free;
-	else
-		spin_lock(&ep->lock);
-
-	if (ep->backlog <= ep->conreqcnt) {
-		/*  Send reject due to too many pending requests */
-		spin_unlock(&ep->lock);
-		goto conreq_sendrej_free;
-	}
-
-	conreq->msg = *msg;
-	list_add_tail(&conreq->list, &ep->conlist);
-	ep->conreqcnt++;
-	wake_up_interruptible(&ep->conwq);
-	spin_unlock(&ep->lock);
-	return;
-
-conreq_sendrej_free:
-	kfree(conreq);
-conreq_sendrej:
-	msg->uop = SCIF_CNCT_REJ;
-	scif_nodeqp_send(&scif_dev[msg->src.node], msg);
-}
-
-/**
- * scif_cnctgnt() - Respond to SCIF_CNCT_GNT interrupt message
- * @scifdev:    SCIF device
- * @msg:        Interrupt message
- *
- * An accept() on the remote node has occurred and sent this message
- * to indicate success.  Place the end point in the MAPPING state and
- * save the remote nodes memory information.  Then wake up the connect
- * request so it can finish.
- */
-void scif_cnctgnt(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
-
-	spin_lock(&ep->lock);
-	if (SCIFEP_CONNECTING == ep->state) {
-		ep->peer.node = msg->src.node;
-		ep->peer.port = msg->src.port;
-		ep->qp_info.gnt_pld = msg->payload[1];
-		ep->remote_ep = msg->payload[2];
-		ep->state = SCIFEP_MAPPING;
-
-		wake_up(&ep->conwq);
-	}
-	spin_unlock(&ep->lock);
-}
-
-/**
- * scif_cnctgnt_ack() - Respond to SCIF_CNCT_GNTACK interrupt message
- * @scifdev:    SCIF device
- * @msg:        Interrupt message
- *
- * The remote connection request has finished mapping the local memory.
- * Place the connection in the connected state and wake up the pending
- * accept() call.
- */
-void scif_cnctgnt_ack(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
-
-	mutex_lock(&scif_info.connlock);
-	spin_lock(&ep->lock);
-	/* New ep is now connected with all resources set. */
-	ep->state = SCIFEP_CONNECTED;
-	list_add_tail(&ep->list, &scif_info.connected);
-	wake_up(&ep->conwq);
-	spin_unlock(&ep->lock);
-	mutex_unlock(&scif_info.connlock);
-}
-
-/**
- * scif_cnctgnt_nack() - Respond to SCIF_CNCT_GNTNACK interrupt message
- * @scifdev:    SCIF device
- * @msg:        Interrupt message
- *
- * The remote connection request failed to map the local memory it was sent.
- * Place the end point in the CLOSING state to indicate it and wake up
- * the pending accept();
- */
-void scif_cnctgnt_nack(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
-
-	spin_lock(&ep->lock);
-	ep->state = SCIFEP_CLOSING;
-	wake_up(&ep->conwq);
-	spin_unlock(&ep->lock);
-}
-
-/**
- * scif_cnctrej() - Respond to SCIF_CNCT_REJ interrupt message
- * @scifdev:    SCIF device
- * @msg:        Interrupt message
- *
- * The remote end has rejected the connection request.  Set the end
- * point back to the bound state and wake up the pending connect().
- */
-void scif_cnctrej(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
-
-	spin_lock(&ep->lock);
-	if (SCIFEP_CONNECTING == ep->state) {
-		ep->state = SCIFEP_BOUND;
-		wake_up(&ep->conwq);
-	}
-	spin_unlock(&ep->lock);
-}
-
-/**
- * scif_discnct() - Respond to SCIF_DISCNCT interrupt message
- * @scifdev:    SCIF device
- * @msg:        Interrupt message
- *
- * The remote node has indicated close() has been called on its end
- * point.  Remove the local end point from the connected list, set its
- * state to disconnected and ensure accesses to the remote node are
- * shutdown.
- *
- * When all accesses to the remote end have completed then send a
- * DISCNT_ACK to indicate it can remove its resources and complete
- * the close routine.
- */
-void scif_discnct(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_endpt *ep = NULL;
-	struct scif_endpt *tmpep;
-	struct list_head *pos, *tmpq;
-
-	mutex_lock(&scif_info.connlock);
-	list_for_each_safe(pos, tmpq, &scif_info.connected) {
-		tmpep = list_entry(pos, struct scif_endpt, list);
-		/*
-		 * The local ep may have sent a disconnect and and been closed
-		 * due to a message response time out. It may have been
-		 * allocated again and formed a new connection so we want to
-		 * check if the remote ep matches
-		 */
-		if (((u64)tmpep == msg->payload[1]) &&
-		    ((u64)tmpep->remote_ep == msg->payload[0])) {
-			list_del(pos);
-			ep = tmpep;
-			spin_lock(&ep->lock);
-			break;
-		}
-	}
-
-	/*
-	 * If the terminated end is not found then this side started closing
-	 * before the other side sent the disconnect.  If so the ep will no
-	 * longer be on the connected list.  Regardless the other side
-	 * needs to be acked to let it know close is complete.
-	 */
-	if (!ep) {
-		mutex_unlock(&scif_info.connlock);
-		goto discnct_ack;
-	}
-
-	ep->state = SCIFEP_DISCONNECTED;
-	list_add_tail(&ep->list, &scif_info.disconnected);
-
-	wake_up_interruptible(&ep->sendwq);
-	wake_up_interruptible(&ep->recvwq);
-	spin_unlock(&ep->lock);
-	mutex_unlock(&scif_info.connlock);
-
-discnct_ack:
-	msg->uop = SCIF_DISCNT_ACK;
-	scif_nodeqp_send(&scif_dev[msg->src.node], msg);
-}
-
-/**
- * scif_discnct_ack() - Respond to SCIF_DISCNT_ACK interrupt message
- * @scifdev:    SCIF device
- * @msg:        Interrupt message
- *
- * Remote side has indicated it has not more references to local resources
- */
-void scif_discnt_ack(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
-
-	spin_lock(&ep->lock);
-	ep->state = SCIFEP_DISCONNECTED;
-	spin_unlock(&ep->lock);
-	complete(&ep->discon);
-}
-
-/**
- * scif_clientsend() - Respond to SCIF_CLIENT_SEND interrupt message
- * @scifdev:    SCIF device
- * @msg:        Interrupt message
- *
- * Remote side is confirming send or receive interrupt handling is complete.
- */
-void scif_clientsend(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
-
-	spin_lock(&ep->lock);
-	if (SCIFEP_CONNECTED == ep->state)
-		wake_up_interruptible(&ep->recvwq);
-	spin_unlock(&ep->lock);
-}
-
-/**
- * scif_clientrcvd() - Respond to SCIF_CLIENT_RCVD interrupt message
- * @scifdev:    SCIF device
- * @msg:        Interrupt message
- *
- * Remote side is confirming send or receive interrupt handling is complete.
- */
-void scif_clientrcvd(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
-
-	spin_lock(&ep->lock);
-	if (SCIFEP_CONNECTED == ep->state)
-		wake_up_interruptible(&ep->sendwq);
-	spin_unlock(&ep->lock);
-}
diff --git a/drivers/misc/mic/scif/scif_epd.h b/drivers/misc/mic/scif/scif_epd.h
deleted file mode 100644
index 0b9dfe1cc06c..000000000000
--- a/drivers/misc/mic/scif/scif_epd.h
+++ /dev/null
@@ -1,200 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Intel SCIF driver.
- */
-#ifndef SCIF_EPD_H
-#define SCIF_EPD_H
-
-#include <linux/delay.h>
-#include <linux/scif.h>
-#include <linux/scif_ioctl.h>
-
-#define SCIF_EPLOCK_HELD true
-
-enum scif_epd_state {
-	SCIFEP_UNBOUND,
-	SCIFEP_BOUND,
-	SCIFEP_LISTENING,
-	SCIFEP_CONNECTED,
-	SCIFEP_CONNECTING,
-	SCIFEP_MAPPING,
-	SCIFEP_CLOSING,
-	SCIFEP_CLLISTEN,
-	SCIFEP_DISCONNECTED,
-	SCIFEP_ZOMBIE
-};
-
-/*
- * struct scif_conreq - Data structure added to the connection list.
- *
- * @msg: connection request message received
- * @list: link to list of connection requests
- */
-struct scif_conreq {
-	struct scifmsg msg;
-	struct list_head list;
-};
-
-/* Size of the RB for the Endpoint QP */
-#define SCIF_ENDPT_QP_SIZE 0x1000
-
-/*
- * scif_endpt_qp_info - SCIF endpoint queue pair
- *
- * @qp - Qpair for this endpoint
- * @qp_offset - DMA address of the QP
- * @gnt_pld - Payload in a SCIF_CNCT_GNT message containing the
- * physical address of the remote_qp.
- */
-struct scif_endpt_qp_info {
-	struct scif_qp *qp;
-	dma_addr_t qp_offset;
-	dma_addr_t gnt_pld;
-};
-
-/*
- * struct scif_endpt - The SCIF endpoint data structure
- *
- * @state: end point state
- * @lock: lock synchronizing access to endpoint fields like state etc
- * @port: self port information
- * @peer: peer port information
- * @backlog: maximum pending connection requests
- * @qp_info: Endpoint QP information for SCIF messaging
- * @remote_dev: scifdev used by this endpt to communicate with remote node.
- * @remote_ep: remote endpoint
- * @conreqcnt: Keep track of number of connection requests.
- * @files: Open file information used to match the id passed in with
- *         the flush routine.
- * @conlist: list of connection requests
- * @conwq: waitqueue for connection processing
- * @discon: completion used during disconnection
- * @sendwq: waitqueue used during sending messages
- * @recvwq: waitqueue used during message receipt
- * @sendlock: Synchronize ordering of messages sent
- * @recvlock: Synchronize ordering of messages received
- * @list: link to list of various endpoints like connected, listening etc
- * @li_accept: pending ACCEPTREG
- * @acceptcnt: pending ACCEPTREG cnt
- * @liacceptlist: link to listen accept
- * @miacceptlist: link to uaccept
- * @listenep: associated listen ep
- * @conn_work: Non blocking connect work
- * @conn_port: Connection port
- * @conn_err: Errors during connection
- * @conn_async_state: Async connection
- * @conn_pend_wq: Used by poll while waiting for incoming connections
- * @conn_list: List of async connection requests
- * @rma_info: Information for triggering SCIF RMA and DMA operations
- * @mmu_list: link to list of MMU notifier cleanup work
- * @anon: anonymous file for use in kernel mode scif poll
- */
-struct scif_endpt {
-	enum scif_epd_state state;
-	spinlock_t lock;
-	struct scif_port_id port;
-	struct scif_port_id peer;
-	int backlog;
-	struct scif_endpt_qp_info qp_info;
-	struct scif_dev *remote_dev;
-	u64 remote_ep;
-	int conreqcnt;
-	struct files_struct *files;
-	struct list_head conlist;
-	wait_queue_head_t conwq;
-	struct completion discon;
-	wait_queue_head_t sendwq;
-	wait_queue_head_t recvwq;
-	struct mutex sendlock;
-	struct mutex recvlock;
-	struct list_head list;
-	struct list_head li_accept;
-	int acceptcnt;
-	struct list_head liacceptlist;
-	struct list_head miacceptlist;
-	struct scif_endpt *listenep;
-	struct scif_port_id conn_port;
-	int conn_err;
-	int conn_async_state;
-	wait_queue_head_t conn_pend_wq;
-	struct list_head conn_list;
-	struct scif_endpt_rma_info rma_info;
-	struct list_head mmu_list;
-	struct file *anon;
-};
-
-static inline int scifdev_alive(struct scif_endpt *ep)
-{
-	return _scifdev_alive(ep->remote_dev);
-}
-
-/*
- * scif_verify_epd:
- * ep: SCIF endpoint
- *
- * Checks several generic error conditions and returns the
- * appropriate error.
- */
-static inline int scif_verify_epd(struct scif_endpt *ep)
-{
-	if (ep->state == SCIFEP_DISCONNECTED)
-		return -ECONNRESET;
-
-	if (ep->state != SCIFEP_CONNECTED)
-		return -ENOTCONN;
-
-	if (!scifdev_alive(ep))
-		return -ENODEV;
-
-	return 0;
-}
-
-static inline int scif_anon_inode_getfile(scif_epd_t epd)
-{
-	epd->anon = anon_inode_getfile("scif", &scif_anon_fops, NULL, 0);
-
-	return PTR_ERR_OR_ZERO(epd->anon);
-}
-
-static inline void scif_anon_inode_fput(scif_epd_t epd)
-{
-	if (epd->anon) {
-		fput(epd->anon);
-		epd->anon = NULL;
-	}
-}
-
-void scif_cleanup_zombie_epd(void);
-void scif_teardown_ep(void *endpt);
-void scif_cleanup_ep_qp(struct scif_endpt *ep);
-void scif_add_epd_to_zombie_list(struct scif_endpt *ep, bool eplock_held);
-void scif_get_node_info(void);
-void scif_send_acks(struct scif_dev *dev);
-void scif_conn_handler(struct work_struct *work);
-int scif_rsrv_port(u16 port);
-void scif_get_port(u16 port);
-int scif_get_new_port(void);
-void scif_put_port(u16 port);
-int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags);
-int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags);
-void scif_cnctreq(struct scif_dev *scifdev, struct scifmsg *msg);
-void scif_cnctgnt(struct scif_dev *scifdev, struct scifmsg *msg);
-void scif_cnctgnt_ack(struct scif_dev *scifdev, struct scifmsg *msg);
-void scif_cnctgnt_nack(struct scif_dev *scifdev, struct scifmsg *msg);
-void scif_cnctrej(struct scif_dev *scifdev, struct scifmsg *msg);
-void scif_discnct(struct scif_dev *scifdev, struct scifmsg *msg);
-void scif_discnt_ack(struct scif_dev *scifdev, struct scifmsg *msg);
-void scif_clientsend(struct scif_dev *scifdev, struct scifmsg *msg);
-void scif_clientrcvd(struct scif_dev *scifdev, struct scifmsg *msg);
-int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block);
-int __scif_flush(scif_epd_t epd);
-int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd);
-__poll_t __scif_pollfd(struct file *f, poll_table *wait,
-			   struct scif_endpt *ep);
-int __scif_pin_pages(void *addr, size_t len, int *out_prot,
-		     int map_flags, scif_pinned_pages_t *pages);
-#endif /* SCIF_EPD_H */
diff --git a/drivers/misc/mic/scif/scif_fd.c b/drivers/misc/mic/scif/scif_fd.c
deleted file mode 100644
index 3f08646cd78a..000000000000
--- a/drivers/misc/mic/scif/scif_fd.c
+++ /dev/null
@@ -1,462 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Intel SCIF driver.
- */
-#include "scif_main.h"
-
-static int scif_fdopen(struct inode *inode, struct file *f)
-{
-	struct scif_endpt *priv = scif_open();
-
-	if (!priv)
-		return -ENOMEM;
-	f->private_data = priv;
-	return 0;
-}
-
-static int scif_fdclose(struct inode *inode, struct file *f)
-{
-	struct scif_endpt *priv = f->private_data;
-
-	return scif_close(priv);
-}
-
-static int scif_fdmmap(struct file *f, struct vm_area_struct *vma)
-{
-	struct scif_endpt *priv = f->private_data;
-
-	return scif_mmap(vma, priv);
-}
-
-static __poll_t scif_fdpoll(struct file *f, poll_table *wait)
-{
-	struct scif_endpt *priv = f->private_data;
-
-	return __scif_pollfd(f, wait, priv);
-}
-
-static int scif_fdflush(struct file *f, fl_owner_t id)
-{
-	struct scif_endpt *ep = f->private_data;
-
-	spin_lock(&ep->lock);
-	/*
-	 * The listening endpoint stashes the open file information before
-	 * waiting for incoming connections. The release callback would never be
-	 * called if the application closed the endpoint, while waiting for
-	 * incoming connections from a separate thread since the file descriptor
-	 * reference count is bumped up in the accept IOCTL. Call the flush
-	 * routine if the id matches the endpoint open file information so that
-	 * the listening endpoint can be woken up and the fd released.
-	 */
-	if (ep->files == id)
-		__scif_flush(ep);
-	spin_unlock(&ep->lock);
-	return 0;
-}
-
-static __always_inline void scif_err_debug(int err, const char *str)
-{
-	/*
-	 * ENOTCONN is a common uninteresting error which is
-	 * flooding debug messages to the console unnecessarily.
-	 */
-	if (err < 0 && err != -ENOTCONN)
-		dev_dbg(scif_info.mdev.this_device, "%s err %d\n", str, err);
-}
-
-static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg)
-{
-	struct scif_endpt *priv = f->private_data;
-	void __user *argp = (void __user *)arg;
-	int err = 0;
-	struct scifioctl_msg request;
-	bool non_block = false;
-
-	non_block = !!(f->f_flags & O_NONBLOCK);
-
-	switch (cmd) {
-	case SCIF_BIND:
-	{
-		int pn;
-
-		if (copy_from_user(&pn, argp, sizeof(pn)))
-			return -EFAULT;
-
-		pn = scif_bind(priv, pn);
-		if (pn < 0)
-			return pn;
-
-		if (copy_to_user(argp, &pn, sizeof(pn)))
-			return -EFAULT;
-
-		return 0;
-	}
-	case SCIF_LISTEN:
-		return scif_listen(priv, arg);
-	case SCIF_CONNECT:
-	{
-		struct scifioctl_connect req;
-		struct scif_endpt *ep = (struct scif_endpt *)priv;
-
-		if (copy_from_user(&req, argp, sizeof(req)))
-			return -EFAULT;
-
-		err = __scif_connect(priv, &req.peer, non_block);
-		if (err < 0)
-			return err;
-
-		req.self.node = ep->port.node;
-		req.self.port = ep->port.port;
-
-		if (copy_to_user(argp, &req, sizeof(req)))
-			return -EFAULT;
-
-		return 0;
-	}
-	/*
-	 * Accept is done in two halves.  The request ioctl does the basic
-	 * functionality of accepting the request and returning the information
-	 * about it including the internal ID of the end point.  The register
-	 * is done with the internal ID on a new file descriptor opened by the
-	 * requesting process.
-	 */
-	case SCIF_ACCEPTREQ:
-	{
-		struct scifioctl_accept request;
-		scif_epd_t *ep = (scif_epd_t *)&request.endpt;
-
-		if (copy_from_user(&request, argp, sizeof(request)))
-			return -EFAULT;
-
-		err = scif_accept(priv, &request.peer, ep, request.flags);
-		if (err < 0)
-			return err;
-
-		if (copy_to_user(argp, &request, sizeof(request))) {
-			scif_close(*ep);
-			return -EFAULT;
-		}
-		/*
-		 * Add to the list of user mode eps where the second half
-		 * of the accept is not yet completed.
-		 */
-		mutex_lock(&scif_info.eplock);
-		list_add_tail(&((*ep)->miacceptlist), &scif_info.uaccept);
-		list_add_tail(&((*ep)->liacceptlist), &priv->li_accept);
-		(*ep)->listenep = priv;
-		priv->acceptcnt++;
-		mutex_unlock(&scif_info.eplock);
-
-		return 0;
-	}
-	case SCIF_ACCEPTREG:
-	{
-		struct scif_endpt *priv = f->private_data;
-		struct scif_endpt *newep;
-		struct scif_endpt *lisep;
-		struct scif_endpt *fep = NULL;
-		struct scif_endpt *tmpep;
-		struct list_head *pos, *tmpq;
-
-		/* Finally replace the pointer to the accepted endpoint */
-		if (copy_from_user(&newep, argp, sizeof(void *)))
-			return -EFAULT;
-
-		/* Remove form the user accept queue */
-		mutex_lock(&scif_info.eplock);
-		list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
-			tmpep = list_entry(pos,
-					   struct scif_endpt, miacceptlist);
-			if (tmpep == newep) {
-				list_del(pos);
-				fep = tmpep;
-				break;
-			}
-		}
-
-		if (!fep) {
-			mutex_unlock(&scif_info.eplock);
-			return -ENOENT;
-		}
-
-		lisep = newep->listenep;
-		list_for_each_safe(pos, tmpq, &lisep->li_accept) {
-			tmpep = list_entry(pos,
-					   struct scif_endpt, liacceptlist);
-			if (tmpep == newep) {
-				list_del(pos);
-				lisep->acceptcnt--;
-				break;
-			}
-		}
-
-		mutex_unlock(&scif_info.eplock);
-
-		/* Free the resources automatically created from the open. */
-		scif_anon_inode_fput(priv);
-		scif_teardown_ep(priv);
-		scif_add_epd_to_zombie_list(priv, !SCIF_EPLOCK_HELD);
-		f->private_data = newep;
-		return 0;
-	}
-	case SCIF_SEND:
-	{
-		struct scif_endpt *priv = f->private_data;
-
-		if (copy_from_user(&request, argp,
-				   sizeof(struct scifioctl_msg))) {
-			err = -EFAULT;
-			goto send_err;
-		}
-		err = scif_user_send(priv, (void __user *)request.msg,
-				     request.len, request.flags);
-		if (err < 0)
-			goto send_err;
-		if (copy_to_user(&
-				 ((struct scifioctl_msg __user *)argp)->out_len,
-				 &err, sizeof(err))) {
-			err = -EFAULT;
-			goto send_err;
-		}
-		err = 0;
-send_err:
-		scif_err_debug(err, "scif_send");
-		return err;
-	}
-	case SCIF_RECV:
-	{
-		struct scif_endpt *priv = f->private_data;
-
-		if (copy_from_user(&request, argp,
-				   sizeof(struct scifioctl_msg))) {
-			err = -EFAULT;
-			goto recv_err;
-		}
-
-		err = scif_user_recv(priv, (void __user *)request.msg,
-				     request.len, request.flags);
-		if (err < 0)
-			goto recv_err;
-
-		if (copy_to_user(&
-				 ((struct scifioctl_msg __user *)argp)->out_len,
-			&err, sizeof(err))) {
-			err = -EFAULT;
-			goto recv_err;
-		}
-		err = 0;
-recv_err:
-		scif_err_debug(err, "scif_recv");
-		return err;
-	}
-	case SCIF_GET_NODEIDS:
-	{
-		struct scifioctl_node_ids node_ids;
-		int entries;
-		u16 *nodes;
-		void __user *unodes, *uself;
-		u16 self;
-
-		if (copy_from_user(&node_ids, argp, sizeof(node_ids))) {
-			err = -EFAULT;
-			goto getnodes_err2;
-		}
-
-		entries = min_t(int, scif_info.maxid, node_ids.len);
-		nodes = kmalloc_array(entries, sizeof(u16), GFP_KERNEL);
-		if (entries && !nodes) {
-			err = -ENOMEM;
-			goto getnodes_err2;
-		}
-		node_ids.len = scif_get_node_ids(nodes, entries, &self);
-
-		unodes = (void __user *)node_ids.nodes;
-		if (copy_to_user(unodes, nodes, sizeof(u16) * entries)) {
-			err = -EFAULT;
-			goto getnodes_err1;
-		}
-
-		uself = (void __user *)node_ids.self;
-		if (copy_to_user(uself, &self, sizeof(u16))) {
-			err = -EFAULT;
-			goto getnodes_err1;
-		}
-
-		if (copy_to_user(argp, &node_ids, sizeof(node_ids))) {
-			err = -EFAULT;
-			goto getnodes_err1;
-		}
-getnodes_err1:
-		kfree(nodes);
-getnodes_err2:
-		return err;
-	}
-	case SCIF_REG:
-	{
-		struct scif_endpt *priv = f->private_data;
-		struct scifioctl_reg reg;
-		off_t ret;
-
-		if (copy_from_user(&reg, argp, sizeof(reg))) {
-			err = -EFAULT;
-			goto reg_err;
-		}
-		if (reg.flags & SCIF_MAP_KERNEL) {
-			err = -EINVAL;
-			goto reg_err;
-		}
-		ret = scif_register(priv, (void *)reg.addr, reg.len,
-				    reg.offset, reg.prot, reg.flags);
-		if (ret < 0) {
-			err = (int)ret;
-			goto reg_err;
-		}
-
-		if (copy_to_user(&((struct scifioctl_reg __user *)argp)
-				 ->out_offset, &ret, sizeof(reg.out_offset))) {
-			err = -EFAULT;
-			goto reg_err;
-		}
-		err = 0;
-reg_err:
-		scif_err_debug(err, "scif_register");
-		return err;
-	}
-	case SCIF_UNREG:
-	{
-		struct scif_endpt *priv = f->private_data;
-		struct scifioctl_unreg unreg;
-
-		if (copy_from_user(&unreg, argp, sizeof(unreg))) {
-			err = -EFAULT;
-			goto unreg_err;
-		}
-		err = scif_unregister(priv, unreg.offset, unreg.len);
-unreg_err:
-		scif_err_debug(err, "scif_unregister");
-		return err;
-	}
-	case SCIF_READFROM:
-	{
-		struct scif_endpt *priv = f->private_data;
-		struct scifioctl_copy copy;
-
-		if (copy_from_user(&copy, argp, sizeof(copy))) {
-			err = -EFAULT;
-			goto readfrom_err;
-		}
-		err = scif_readfrom(priv, copy.loffset, copy.len, copy.roffset,
-				    copy.flags);
-readfrom_err:
-		scif_err_debug(err, "scif_readfrom");
-		return err;
-	}
-	case SCIF_WRITETO:
-	{
-		struct scif_endpt *priv = f->private_data;
-		struct scifioctl_copy copy;
-
-		if (copy_from_user(&copy, argp, sizeof(copy))) {
-			err = -EFAULT;
-			goto writeto_err;
-		}
-		err = scif_writeto(priv, copy.loffset, copy.len, copy.roffset,
-				   copy.flags);
-writeto_err:
-		scif_err_debug(err, "scif_writeto");
-		return err;
-	}
-	case SCIF_VREADFROM:
-	{
-		struct scif_endpt *priv = f->private_data;
-		struct scifioctl_copy copy;
-
-		if (copy_from_user(&copy, argp, sizeof(copy))) {
-			err = -EFAULT;
-			goto vreadfrom_err;
-		}
-		err = scif_vreadfrom(priv, (void __force *)copy.addr, copy.len,
-				     copy.roffset, copy.flags);
-vreadfrom_err:
-		scif_err_debug(err, "scif_vreadfrom");
-		return err;
-	}
-	case SCIF_VWRITETO:
-	{
-		struct scif_endpt *priv = f->private_data;
-		struct scifioctl_copy copy;
-
-		if (copy_from_user(&copy, argp, sizeof(copy))) {
-			err = -EFAULT;
-			goto vwriteto_err;
-		}
-		err = scif_vwriteto(priv, (void __force *)copy.addr, copy.len,
-				    copy.roffset, copy.flags);
-vwriteto_err:
-		scif_err_debug(err, "scif_vwriteto");
-		return err;
-	}
-	case SCIF_FENCE_MARK:
-	{
-		struct scif_endpt *priv = f->private_data;
-		struct scifioctl_fence_mark mark;
-		int tmp_mark = 0;
-
-		if (copy_from_user(&mark, argp, sizeof(mark))) {
-			err = -EFAULT;
-			goto fence_mark_err;
-		}
-		err = scif_fence_mark(priv, mark.flags, &tmp_mark);
-		if (err)
-			goto fence_mark_err;
-		if (copy_to_user((void __user *)mark.mark, &tmp_mark,
-				 sizeof(tmp_mark))) {
-			err = -EFAULT;
-			goto fence_mark_err;
-		}
-fence_mark_err:
-		scif_err_debug(err, "scif_fence_mark");
-		return err;
-	}
-	case SCIF_FENCE_WAIT:
-	{
-		struct scif_endpt *priv = f->private_data;
-
-		err = scif_fence_wait(priv, arg);
-		scif_err_debug(err, "scif_fence_wait");
-		return err;
-	}
-	case SCIF_FENCE_SIGNAL:
-	{
-		struct scif_endpt *priv = f->private_data;
-		struct scifioctl_fence_signal signal;
-
-		if (copy_from_user(&signal, argp, sizeof(signal))) {
-			err = -EFAULT;
-			goto fence_signal_err;
-		}
-
-		err = scif_fence_signal(priv, signal.loff, signal.lval,
-					signal.roff, signal.rval, signal.flags);
-fence_signal_err:
-		scif_err_debug(err, "scif_fence_signal");
-		return err;
-	}
-	}
-	return -EINVAL;
-}
-
-const struct file_operations scif_fops = {
-	.open = scif_fdopen,
-	.release = scif_fdclose,
-	.unlocked_ioctl = scif_fdioctl,
-	.mmap = scif_fdmmap,
-	.poll = scif_fdpoll,
-	.flush = scif_fdflush,
-	.owner = THIS_MODULE,
-};
diff --git a/drivers/misc/mic/scif/scif_fence.c b/drivers/misc/mic/scif/scif_fence.c
deleted file mode 100644
index 4fedf6183951..000000000000
--- a/drivers/misc/mic/scif/scif_fence.c
+++ /dev/null
@@ -1,783 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Intel SCIF driver.
- */
-
-#include "scif_main.h"
-
-/**
- * scif_recv_mark: Handle SCIF_MARK request
- * @scifdev:	SCIF device
- * @msg:	Interrupt message
- *
- * The peer has requested a mark.
- */
-void scif_recv_mark(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
-	int mark = 0;
-	int err;
-
-	err = _scif_fence_mark(ep, &mark);
-	if (err)
-		msg->uop = SCIF_MARK_NACK;
-	else
-		msg->uop = SCIF_MARK_ACK;
-	msg->payload[0] = ep->remote_ep;
-	msg->payload[2] = mark;
-	scif_nodeqp_send(ep->remote_dev, msg);
-}
-
-/**
- * scif_recv_mark_resp: Handle SCIF_MARK_(N)ACK messages.
- * @scifdev:	SCIF device
- * @msg:	Interrupt message
- *
- * The peer has responded to a SCIF_MARK message.
- */
-void scif_recv_mark_resp(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
-	struct scif_fence_info *fence_req =
-		(struct scif_fence_info *)msg->payload[1];
-
-	mutex_lock(&ep->rma_info.rma_lock);
-	if (msg->uop == SCIF_MARK_ACK) {
-		fence_req->state = OP_COMPLETED;
-		fence_req->dma_mark = (int)msg->payload[2];
-	} else {
-		fence_req->state = OP_FAILED;
-	}
-	mutex_unlock(&ep->rma_info.rma_lock);
-	complete(&fence_req->comp);
-}
-
-/**
- * scif_recv_wait: Handle SCIF_WAIT request
- * @scifdev:	SCIF device
- * @msg:	Interrupt message
- *
- * The peer has requested waiting on a fence.
- */
-void scif_recv_wait(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
-	struct scif_remote_fence_info *fence;
-
-	/*
-	 * Allocate structure for remote fence information and
-	 * send a NACK if the allocation failed. The peer will
-	 * return ENOMEM upon receiving a NACK.
-	 */
-	fence = kmalloc(sizeof(*fence), GFP_KERNEL);
-	if (!fence) {
-		msg->payload[0] = ep->remote_ep;
-		msg->uop = SCIF_WAIT_NACK;
-		scif_nodeqp_send(ep->remote_dev, msg);
-		return;
-	}
-
-	/* Prepare the fence request */
-	memcpy(&fence->msg, msg, sizeof(struct scifmsg));
-	INIT_LIST_HEAD(&fence->list);
-
-	/* Insert to the global remote fence request list */
-	mutex_lock(&scif_info.fencelock);
-	atomic_inc(&ep->rma_info.fence_refcount);
-	list_add_tail(&fence->list, &scif_info.fence);
-	mutex_unlock(&scif_info.fencelock);
-
-	schedule_work(&scif_info.misc_work);
-}
-
-/**
- * scif_recv_wait_resp: Handle SCIF_WAIT_(N)ACK messages.
- * @scifdev:	SCIF device
- * @msg:	Interrupt message
- *
- * The peer has responded to a SCIF_WAIT message.
- */
-void scif_recv_wait_resp(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
-	struct scif_fence_info *fence_req =
-		(struct scif_fence_info *)msg->payload[1];
-
-	mutex_lock(&ep->rma_info.rma_lock);
-	if (msg->uop == SCIF_WAIT_ACK)
-		fence_req->state = OP_COMPLETED;
-	else
-		fence_req->state = OP_FAILED;
-	mutex_unlock(&ep->rma_info.rma_lock);
-	complete(&fence_req->comp);
-}
-
-/**
- * scif_recv_sig_local: Handle SCIF_SIG_LOCAL request
- * @scifdev:	SCIF device
- * @msg:	Interrupt message
- *
- * The peer has requested a signal on a local offset.
- */
-void scif_recv_sig_local(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
-	int err;
-
-	err = scif_prog_signal(ep, msg->payload[1], msg->payload[2],
-			       SCIF_WINDOW_SELF);
-	if (err)
-		msg->uop = SCIF_SIG_NACK;
-	else
-		msg->uop = SCIF_SIG_ACK;
-	msg->payload[0] = ep->remote_ep;
-	scif_nodeqp_send(ep->remote_dev, msg);
-}
-
-/**
- * scif_recv_sig_remote: Handle SCIF_SIGNAL_REMOTE request
- * @scifdev:	SCIF device
- * @msg:	Interrupt message
- *
- * The peer has requested a signal on a remote offset.
- */
-void scif_recv_sig_remote(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
-	int err;
-
-	err = scif_prog_signal(ep, msg->payload[1], msg->payload[2],
-			       SCIF_WINDOW_PEER);
-	if (err)
-		msg->uop = SCIF_SIG_NACK;
-	else
-		msg->uop = SCIF_SIG_ACK;
-	msg->payload[0] = ep->remote_ep;
-	scif_nodeqp_send(ep->remote_dev, msg);
-}
-
-/**
- * scif_recv_sig_resp: Handle SCIF_SIG_(N)ACK messages.
- * @scifdev:	SCIF device
- * @msg:	Interrupt message
- *
- * The peer has responded to a signal request.
- */
-void scif_recv_sig_resp(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
-	struct scif_fence_info *fence_req =
-		(struct scif_fence_info *)msg->payload[3];
-
-	mutex_lock(&ep->rma_info.rma_lock);
-	if (msg->uop == SCIF_SIG_ACK)
-		fence_req->state = OP_COMPLETED;
-	else
-		fence_req->state = OP_FAILED;
-	mutex_unlock(&ep->rma_info.rma_lock);
-	complete(&fence_req->comp);
-}
-
-static inline void *scif_get_local_va(off_t off, struct scif_window *window)
-{
-	struct page **pages = window->pinned_pages->pages;
-	int page_nr = (off - window->offset) >> PAGE_SHIFT;
-	off_t page_off = off & ~PAGE_MASK;
-
-	return page_address(pages[page_nr]) + page_off;
-}
-
-static void scif_prog_signal_cb(void *arg)
-{
-	struct scif_cb_arg *cb_arg = arg;
-
-	dma_pool_free(cb_arg->ep->remote_dev->signal_pool, cb_arg->status,
-		      cb_arg->src_dma_addr);
-	kfree(cb_arg);
-}
-
-static int _scif_prog_signal(scif_epd_t epd, dma_addr_t dst, u64 val)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-	struct dma_chan *chan = ep->rma_info.dma_chan;
-	struct dma_device *ddev = chan->device;
-	bool x100 = !is_dma_copy_aligned(chan->device, 1, 1, 1);
-	struct dma_async_tx_descriptor *tx;
-	struct scif_status *status = NULL;
-	struct scif_cb_arg *cb_arg = NULL;
-	dma_addr_t src;
-	dma_cookie_t cookie;
-	int err;
-
-	tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_FENCE);
-	if (!tx) {
-		err = -ENOMEM;
-		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
-			__func__, __LINE__, err);
-		goto alloc_fail;
-	}
-	cookie = tx->tx_submit(tx);
-	if (dma_submit_error(cookie)) {
-		err = (int)cookie;
-		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
-			__func__, __LINE__, err);
-		goto alloc_fail;
-	}
-	dma_async_issue_pending(chan);
-	if (x100) {
-		/*
-		 * For X100 use the status descriptor to write the value to
-		 * the destination.
-		 */
-		tx = ddev->device_prep_dma_imm_data(chan, dst, val, 0);
-	} else {
-		status = dma_pool_alloc(ep->remote_dev->signal_pool, GFP_KERNEL,
-					&src);
-		if (!status) {
-			err = -ENOMEM;
-			dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
-				__func__, __LINE__, err);
-			goto alloc_fail;
-		}
-		status->val = val;
-		status->src_dma_addr = src;
-		status->ep = ep;
-		src += offsetof(struct scif_status, val);
-		tx = ddev->device_prep_dma_memcpy(chan, dst, src, sizeof(val),
-						  DMA_PREP_INTERRUPT);
-	}
-	if (!tx) {
-		err = -ENOMEM;
-		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
-			__func__, __LINE__, err);
-		goto dma_fail;
-	}
-	if (!x100) {
-		cb_arg = kmalloc(sizeof(*cb_arg), GFP_KERNEL);
-		if (!cb_arg) {
-			err = -ENOMEM;
-			goto dma_fail;
-		}
-		cb_arg->src_dma_addr = src;
-		cb_arg->status = status;
-		cb_arg->ep = ep;
-		tx->callback = scif_prog_signal_cb;
-		tx->callback_param = cb_arg;
-	}
-	cookie = tx->tx_submit(tx);
-	if (dma_submit_error(cookie)) {
-		err = -EIO;
-		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
-			__func__, __LINE__, err);
-		goto dma_fail;
-	}
-	dma_async_issue_pending(chan);
-	return 0;
-dma_fail:
-	if (!x100) {
-		dma_pool_free(ep->remote_dev->signal_pool, status,
-			      src - offsetof(struct scif_status, val));
-		kfree(cb_arg);
-	}
-alloc_fail:
-	return err;
-}
-
-/**
- * scif_prog_signal:
- * @epd: Endpoint Descriptor
- * @offset: registered address to write @val to
- * @val: Value to be written at @offset
- * @type: Type of the window.
- *
- * Arrange to write a value to the registered offset after ensuring that the
- * offset provided is indeed valid.
- */
-int scif_prog_signal(scif_epd_t epd, off_t offset, u64 val,
-		     enum scif_window_type type)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-	struct scif_window *window = NULL;
-	struct scif_rma_req req;
-	dma_addr_t dst_dma_addr;
-	int err;
-
-	mutex_lock(&ep->rma_info.rma_lock);
-	req.out_window = &window;
-	req.offset = offset;
-	req.nr_bytes = sizeof(u64);
-	req.prot = SCIF_PROT_WRITE;
-	req.type = SCIF_WINDOW_SINGLE;
-	if (type == SCIF_WINDOW_SELF)
-		req.head = &ep->rma_info.reg_list;
-	else
-		req.head = &ep->rma_info.remote_reg_list;
-	/* Does a valid window exist? */
-	err = scif_query_window(&req);
-	if (err) {
-		dev_err(scif_info.mdev.this_device,
-			"%s %d err %d\n", __func__, __LINE__, err);
-		goto unlock_ret;
-	}
-
-	if (scif_is_mgmt_node() && scifdev_self(ep->remote_dev)) {
-		u64 *dst_virt;
-
-		if (type == SCIF_WINDOW_SELF)
-			dst_virt = scif_get_local_va(offset, window);
-		else
-			dst_virt =
-			scif_get_local_va(offset, (struct scif_window *)
-					  window->peer_window);
-		*dst_virt = val;
-	} else {
-		dst_dma_addr = __scif_off_to_dma_addr(window, offset);
-		err = _scif_prog_signal(epd, dst_dma_addr, val);
-	}
-unlock_ret:
-	mutex_unlock(&ep->rma_info.rma_lock);
-	return err;
-}
-
-static int _scif_fence_wait(scif_epd_t epd, int mark)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-	dma_cookie_t cookie = mark & ~SCIF_REMOTE_FENCE;
-	int err;
-
-	/* Wait for DMA callback in scif_fence_mark_cb(..) */
-	err = wait_event_interruptible_timeout(ep->rma_info.markwq,
-					       dma_async_is_tx_complete(
-					       ep->rma_info.dma_chan,
-					       cookie, NULL, NULL) ==
-					       DMA_COMPLETE,
-					       SCIF_NODE_ALIVE_TIMEOUT);
-	if (!err)
-		err = -ETIMEDOUT;
-	else if (err > 0)
-		err = 0;
-	return err;
-}
-
-/**
- * scif_rma_handle_remote_fences:
- *
- * This routine services remote fence requests.
- */
-void scif_rma_handle_remote_fences(void)
-{
-	struct list_head *item, *tmp;
-	struct scif_remote_fence_info *fence;
-	struct scif_endpt *ep;
-	int mark, err;
-
-	might_sleep();
-	mutex_lock(&scif_info.fencelock);
-	list_for_each_safe(item, tmp, &scif_info.fence) {
-		fence = list_entry(item, struct scif_remote_fence_info,
-				   list);
-		/* Remove fence from global list */
-		list_del(&fence->list);
-
-		/* Initiate the fence operation */
-		ep = (struct scif_endpt *)fence->msg.payload[0];
-		mark = fence->msg.payload[2];
-		err = _scif_fence_wait(ep, mark);
-		if (err)
-			fence->msg.uop = SCIF_WAIT_NACK;
-		else
-			fence->msg.uop = SCIF_WAIT_ACK;
-		fence->msg.payload[0] = ep->remote_ep;
-		scif_nodeqp_send(ep->remote_dev, &fence->msg);
-		kfree(fence);
-		if (!atomic_sub_return(1, &ep->rma_info.fence_refcount))
-			schedule_work(&scif_info.misc_work);
-	}
-	mutex_unlock(&scif_info.fencelock);
-}
-
-static int _scif_send_fence(scif_epd_t epd, int uop, int mark, int *out_mark)
-{
-	int err;
-	struct scifmsg msg;
-	struct scif_fence_info *fence_req;
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-
-	fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL);
-	if (!fence_req) {
-		err = -ENOMEM;
-		goto error;
-	}
-
-	fence_req->state = OP_IN_PROGRESS;
-	init_completion(&fence_req->comp);
-
-	msg.src = ep->port;
-	msg.uop = uop;
-	msg.payload[0] = ep->remote_ep;
-	msg.payload[1] = (u64)fence_req;
-	if (uop == SCIF_WAIT)
-		msg.payload[2] = mark;
-	spin_lock(&ep->lock);
-	if (ep->state == SCIFEP_CONNECTED)
-		err = scif_nodeqp_send(ep->remote_dev, &msg);
-	else
-		err = -ENOTCONN;
-	spin_unlock(&ep->lock);
-	if (err)
-		goto error_free;
-retry:
-	/* Wait for a SCIF_WAIT_(N)ACK message */
-	err = wait_for_completion_timeout(&fence_req->comp,
-					  SCIF_NODE_ALIVE_TIMEOUT);
-	if (!err && scifdev_alive(ep))
-		goto retry;
-	if (!err)
-		err = -ENODEV;
-	if (err > 0)
-		err = 0;
-	mutex_lock(&ep->rma_info.rma_lock);
-	if (err < 0) {
-		if (fence_req->state == OP_IN_PROGRESS)
-			fence_req->state = OP_FAILED;
-	}
-	if (fence_req->state == OP_FAILED && !err)
-		err = -ENOMEM;
-	if (uop == SCIF_MARK && fence_req->state == OP_COMPLETED)
-		*out_mark = SCIF_REMOTE_FENCE | fence_req->dma_mark;
-	mutex_unlock(&ep->rma_info.rma_lock);
-error_free:
-	kfree(fence_req);
-error:
-	return err;
-}
-
-/**
- * scif_send_fence_mark:
- * @epd: end point descriptor.
- * @out_mark: Output DMA mark reported by peer.
- *
- * Send a remote fence mark request.
- */
-static int scif_send_fence_mark(scif_epd_t epd, int *out_mark)
-{
-	return _scif_send_fence(epd, SCIF_MARK, 0, out_mark);
-}
-
-/**
- * scif_send_fence_wait:
- * @epd: end point descriptor.
- * @mark: DMA mark to wait for.
- *
- * Send a remote fence wait request.
- */
-static int scif_send_fence_wait(scif_epd_t epd, int mark)
-{
-	return _scif_send_fence(epd, SCIF_WAIT, mark, NULL);
-}
-
-static int _scif_send_fence_signal_wait(struct scif_endpt *ep,
-					struct scif_fence_info *fence_req)
-{
-	int err;
-
-retry:
-	/* Wait for a SCIF_SIG_(N)ACK message */
-	err = wait_for_completion_timeout(&fence_req->comp,
-					  SCIF_NODE_ALIVE_TIMEOUT);
-	if (!err && scifdev_alive(ep))
-		goto retry;
-	if (!err)
-		err = -ENODEV;
-	if (err > 0)
-		err = 0;
-	if (err < 0) {
-		mutex_lock(&ep->rma_info.rma_lock);
-		if (fence_req->state == OP_IN_PROGRESS)
-			fence_req->state = OP_FAILED;
-		mutex_unlock(&ep->rma_info.rma_lock);
-	}
-	if (fence_req->state == OP_FAILED && !err)
-		err = -ENXIO;
-	return err;
-}
-
-/**
- * scif_send_fence_signal:
- * @epd: endpoint descriptor
- * @loff: local offset
- * @lval: local value to write to loffset
- * @roff: remote offset
- * @rval: remote value to write to roffset
- * @flags: flags
- *
- * Sends a remote fence signal request
- */
-static int scif_send_fence_signal(scif_epd_t epd, off_t roff, u64 rval,
-				  off_t loff, u64 lval, int flags)
-{
-	int err = 0;
-	struct scifmsg msg;
-	struct scif_fence_info *fence_req;
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-
-	fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL);
-	if (!fence_req) {
-		err = -ENOMEM;
-		goto error;
-	}
-
-	fence_req->state = OP_IN_PROGRESS;
-	init_completion(&fence_req->comp);
-	msg.src = ep->port;
-	if (flags & SCIF_SIGNAL_LOCAL) {
-		msg.uop = SCIF_SIG_LOCAL;
-		msg.payload[0] = ep->remote_ep;
-		msg.payload[1] = roff;
-		msg.payload[2] = rval;
-		msg.payload[3] = (u64)fence_req;
-		spin_lock(&ep->lock);
-		if (ep->state == SCIFEP_CONNECTED)
-			err = scif_nodeqp_send(ep->remote_dev, &msg);
-		else
-			err = -ENOTCONN;
-		spin_unlock(&ep->lock);
-		if (err)
-			goto error_free;
-		err = _scif_send_fence_signal_wait(ep, fence_req);
-		if (err)
-			goto error_free;
-	}
-	fence_req->state = OP_IN_PROGRESS;
-
-	if (flags & SCIF_SIGNAL_REMOTE) {
-		msg.uop = SCIF_SIG_REMOTE;
-		msg.payload[0] = ep->remote_ep;
-		msg.payload[1] = loff;
-		msg.payload[2] = lval;
-		msg.payload[3] = (u64)fence_req;
-		spin_lock(&ep->lock);
-		if (ep->state == SCIFEP_CONNECTED)
-			err = scif_nodeqp_send(ep->remote_dev, &msg);
-		else
-			err = -ENOTCONN;
-		spin_unlock(&ep->lock);
-		if (err)
-			goto error_free;
-		err = _scif_send_fence_signal_wait(ep, fence_req);
-	}
-error_free:
-	kfree(fence_req);
-error:
-	return err;
-}
-
-static void scif_fence_mark_cb(void *arg)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)arg;
-
-	wake_up_interruptible(&ep->rma_info.markwq);
-	atomic_dec(&ep->rma_info.fence_refcount);
-}
-
-/**
- * _scif_fence_mark:
- * @epd: endpoint descriptor
- * @mark: DMA mark to set-up
- *
- * Set up a mark for this endpoint and return the value of the mark.
- */
-int _scif_fence_mark(scif_epd_t epd, int *mark)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-	struct dma_chan *chan = ep->rma_info.dma_chan;
-	struct dma_device *ddev = chan->device;
-	struct dma_async_tx_descriptor *tx;
-	dma_cookie_t cookie;
-	int err;
-
-	tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_FENCE);
-	if (!tx) {
-		err = -ENOMEM;
-		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
-			__func__, __LINE__, err);
-		return err;
-	}
-	cookie = tx->tx_submit(tx);
-	if (dma_submit_error(cookie)) {
-		err = (int)cookie;
-		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
-			__func__, __LINE__, err);
-		return err;
-	}
-	dma_async_issue_pending(chan);
-	tx = ddev->device_prep_dma_interrupt(chan, DMA_PREP_INTERRUPT);
-	if (!tx) {
-		err = -ENOMEM;
-		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
-			__func__, __LINE__, err);
-		return err;
-	}
-	tx->callback = scif_fence_mark_cb;
-	tx->callback_param = ep;
-	*mark = cookie = tx->tx_submit(tx);
-	if (dma_submit_error(cookie)) {
-		err = (int)cookie;
-		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
-			__func__, __LINE__, err);
-		return err;
-	}
-	atomic_inc(&ep->rma_info.fence_refcount);
-	dma_async_issue_pending(chan);
-	return 0;
-}
-
-#define SCIF_LOOPB_MAGIC_MARK 0xdead
-
-int scif_fence_mark(scif_epd_t epd, int flags, int *mark)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-	int err = 0;
-
-	dev_dbg(scif_info.mdev.this_device,
-		"SCIFAPI fence_mark: ep %p flags 0x%x mark 0x%x\n",
-		ep, flags, *mark);
-	err = scif_verify_epd(ep);
-	if (err)
-		return err;
-
-	/* Invalid flags? */
-	if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER))
-		return -EINVAL;
-
-	/* At least one of init self or peer RMA should be set */
-	if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER)))
-		return -EINVAL;
-
-	/* Exactly one of init self or peer RMA should be set but not both */
-	if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER))
-		return -EINVAL;
-
-	/*
-	 * Management node loopback does not need to use DMA.
-	 * Return a valid mark to be symmetric.
-	 */
-	if (scifdev_self(ep->remote_dev) && scif_is_mgmt_node()) {
-		*mark = SCIF_LOOPB_MAGIC_MARK;
-		return 0;
-	}
-
-	if (flags & SCIF_FENCE_INIT_SELF)
-		err = _scif_fence_mark(epd, mark);
-	else
-		err = scif_send_fence_mark(ep, mark);
-
-	if (err)
-		dev_err(scif_info.mdev.this_device,
-			"%s %d err %d\n", __func__, __LINE__, err);
-	dev_dbg(scif_info.mdev.this_device,
-		"SCIFAPI fence_mark: ep %p flags 0x%x mark 0x%x err %d\n",
-		ep, flags, *mark, err);
-	return err;
-}
-EXPORT_SYMBOL_GPL(scif_fence_mark);
-
-int scif_fence_wait(scif_epd_t epd, int mark)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-	int err = 0;
-
-	dev_dbg(scif_info.mdev.this_device,
-		"SCIFAPI fence_wait: ep %p mark 0x%x\n",
-		ep, mark);
-	err = scif_verify_epd(ep);
-	if (err)
-		return err;
-	/*
-	 * Management node loopback does not need to use DMA.
-	 * The only valid mark provided is 0 so simply
-	 * return success if the mark is valid.
-	 */
-	if (scifdev_self(ep->remote_dev) && scif_is_mgmt_node()) {
-		if (mark == SCIF_LOOPB_MAGIC_MARK)
-			return 0;
-		else
-			return -EINVAL;
-	}
-	if (mark & SCIF_REMOTE_FENCE)
-		err = scif_send_fence_wait(epd, mark);
-	else
-		err = _scif_fence_wait(epd, mark);
-	if (err < 0)
-		dev_err(scif_info.mdev.this_device,
-			"%s %d err %d\n", __func__, __LINE__, err);
-	return err;
-}
-EXPORT_SYMBOL_GPL(scif_fence_wait);
-
-int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval,
-		      off_t roff, u64 rval, int flags)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-	int err = 0;
-
-	dev_dbg(scif_info.mdev.this_device,
-		"SCIFAPI fence_signal: ep %p loff 0x%lx lval 0x%llx roff 0x%lx rval 0x%llx flags 0x%x\n",
-		ep, loff, lval, roff, rval, flags);
-	err = scif_verify_epd(ep);
-	if (err)
-		return err;
-
-	/* Invalid flags? */
-	if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER |
-			SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE))
-		return -EINVAL;
-
-	/* At least one of init self or peer RMA should be set */
-	if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER)))
-		return -EINVAL;
-
-	/* Exactly one of init self or peer RMA should be set but not both */
-	if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER))
-		return -EINVAL;
-
-	/* At least one of SCIF_SIGNAL_LOCAL or SCIF_SIGNAL_REMOTE required */
-	if (!(flags & (SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE)))
-		return -EINVAL;
-
-	/* Only Dword offsets allowed */
-	if ((flags & SCIF_SIGNAL_LOCAL) && (loff & (sizeof(u32) - 1)))
-		return -EINVAL;
-
-	/* Only Dword aligned offsets allowed */
-	if ((flags & SCIF_SIGNAL_REMOTE) && (roff & (sizeof(u32) - 1)))
-		return -EINVAL;
-
-	if (flags & SCIF_FENCE_INIT_PEER) {
-		err = scif_send_fence_signal(epd, roff, rval, loff,
-					     lval, flags);
-	} else {
-		/* Local Signal in Local RAS */
-		if (flags & SCIF_SIGNAL_LOCAL) {
-			err = scif_prog_signal(epd, loff, lval,
-					       SCIF_WINDOW_SELF);
-			if (err)
-				goto error_ret;
-		}
-
-		/* Signal in Remote RAS */
-		if (flags & SCIF_SIGNAL_REMOTE)
-			err = scif_prog_signal(epd, roff,
-					       rval, SCIF_WINDOW_PEER);
-	}
-error_ret:
-	if (err)
-		dev_err(scif_info.mdev.this_device,
-			"%s %d err %d\n", __func__, __LINE__, err);
-	return err;
-}
-EXPORT_SYMBOL_GPL(scif_fence_signal);
diff --git a/drivers/misc/mic/scif/scif_main.c b/drivers/misc/mic/scif/scif_main.c
deleted file mode 100644
index e2278bf9f11d..000000000000
--- a/drivers/misc/mic/scif/scif_main.c
+++ /dev/null
@@ -1,351 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Intel SCIF driver.
- */
-#include <linux/module.h>
-#include <linux/idr.h>
-
-#include <linux/mic_common.h>
-#include "../common/mic_dev.h"
-#include "../bus/scif_bus.h"
-#include "scif_peer_bus.h"
-#include "scif_main.h"
-#include "scif_map.h"
-
-struct scif_info scif_info = {
-	.mdev = {
-		.minor = MISC_DYNAMIC_MINOR,
-		.name = "scif",
-		.fops = &scif_fops,
-	}
-};
-
-struct scif_dev *scif_dev;
-struct kmem_cache *unaligned_cache;
-static atomic_t g_loopb_cnt;
-
-/* Runs in the context of intr_wq */
-static void scif_intr_bh_handler(struct work_struct *work)
-{
-	struct scif_dev *scifdev =
-			container_of(work, struct scif_dev, intr_bh);
-
-	if (scifdev_self(scifdev))
-		scif_loopb_msg_handler(scifdev, scifdev->qpairs);
-	else
-		scif_nodeqp_intrhandler(scifdev, scifdev->qpairs);
-}
-
-int scif_setup_intr_wq(struct scif_dev *scifdev)
-{
-	if (!scifdev->intr_wq) {
-		snprintf(scifdev->intr_wqname, sizeof(scifdev->intr_wqname),
-			 "SCIF INTR %d", scifdev->node);
-		scifdev->intr_wq =
-			alloc_ordered_workqueue(scifdev->intr_wqname, 0);
-		if (!scifdev->intr_wq)
-			return -ENOMEM;
-		INIT_WORK(&scifdev->intr_bh, scif_intr_bh_handler);
-	}
-	return 0;
-}
-
-void scif_destroy_intr_wq(struct scif_dev *scifdev)
-{
-	if (scifdev->intr_wq) {
-		destroy_workqueue(scifdev->intr_wq);
-		scifdev->intr_wq = NULL;
-	}
-}
-
-irqreturn_t scif_intr_handler(int irq, void *data)
-{
-	struct scif_dev *scifdev = data;
-	struct scif_hw_dev *sdev = scifdev->sdev;
-
-	sdev->hw_ops->ack_interrupt(sdev, scifdev->db);
-	queue_work(scifdev->intr_wq, &scifdev->intr_bh);
-	return IRQ_HANDLED;
-}
-
-static void scif_qp_setup_handler(struct work_struct *work)
-{
-	struct scif_dev *scifdev = container_of(work, struct scif_dev,
-						qp_dwork.work);
-	struct scif_hw_dev *sdev = scifdev->sdev;
-	dma_addr_t da = 0;
-	int err;
-
-	if (scif_is_mgmt_node()) {
-		struct mic_bootparam *bp = sdev->dp;
-
-		da = bp->scif_card_dma_addr;
-		scifdev->rdb = bp->h2c_scif_db;
-	} else {
-		struct mic_bootparam __iomem *bp = sdev->rdp;
-
-		da = readq(&bp->scif_host_dma_addr);
-		scifdev->rdb = ioread8(&bp->c2h_scif_db);
-	}
-	if (da) {
-		err = scif_qp_response(da, scifdev);
-		if (err)
-			dev_err(&scifdev->sdev->dev,
-				"scif_qp_response err %d\n", err);
-	} else {
-		schedule_delayed_work(&scifdev->qp_dwork,
-				      msecs_to_jiffies(1000));
-	}
-}
-
-static int scif_setup_scifdev(void)
-{
-	/* We support a maximum of 129 SCIF nodes including the mgmt node */
-#define MAX_SCIF_NODES 129
-	int i;
-	u8 num_nodes = MAX_SCIF_NODES;
-
-	scif_dev = kcalloc(num_nodes, sizeof(*scif_dev), GFP_KERNEL);
-	if (!scif_dev)
-		return -ENOMEM;
-	for (i = 0; i < num_nodes; i++) {
-		struct scif_dev *scifdev = &scif_dev[i];
-
-		scifdev->node = i;
-		scifdev->exit = OP_IDLE;
-		init_waitqueue_head(&scifdev->disconn_wq);
-		mutex_init(&scifdev->lock);
-		INIT_WORK(&scifdev->peer_add_work, scif_add_peer_device);
-		INIT_DELAYED_WORK(&scifdev->p2p_dwork,
-				  scif_poll_qp_state);
-		INIT_DELAYED_WORK(&scifdev->qp_dwork,
-				  scif_qp_setup_handler);
-		INIT_LIST_HEAD(&scifdev->p2p);
-		RCU_INIT_POINTER(scifdev->spdev, NULL);
-	}
-	return 0;
-}
-
-static void scif_destroy_scifdev(void)
-{
-	kfree(scif_dev);
-	scif_dev = NULL;
-}
-
-static int scif_probe(struct scif_hw_dev *sdev)
-{
-	struct scif_dev *scifdev = &scif_dev[sdev->dnode];
-	int rc;
-
-	dev_set_drvdata(&sdev->dev, sdev);
-	scifdev->sdev = sdev;
-
-	if (1 == atomic_add_return(1, &g_loopb_cnt)) {
-		struct scif_dev *loopb_dev = &scif_dev[sdev->snode];
-
-		loopb_dev->sdev = sdev;
-		rc = scif_setup_loopback_qp(loopb_dev);
-		if (rc)
-			goto exit;
-	}
-
-	rc = scif_setup_intr_wq(scifdev);
-	if (rc)
-		goto destroy_loopb;
-	rc = scif_setup_qp(scifdev);
-	if (rc)
-		goto destroy_intr;
-	scifdev->db = sdev->hw_ops->next_db(sdev);
-	scifdev->cookie = sdev->hw_ops->request_irq(sdev, scif_intr_handler,
-						    "SCIF_INTR", scifdev,
-						    scifdev->db);
-	if (IS_ERR(scifdev->cookie)) {
-		rc = PTR_ERR(scifdev->cookie);
-		goto free_qp;
-	}
-	if (scif_is_mgmt_node()) {
-		struct mic_bootparam *bp = sdev->dp;
-
-		bp->c2h_scif_db = scifdev->db;
-		bp->scif_host_dma_addr = scifdev->qp_dma_addr;
-	} else {
-		struct mic_bootparam __iomem *bp = sdev->rdp;
-
-		iowrite8(scifdev->db, &bp->h2c_scif_db);
-		writeq(scifdev->qp_dma_addr, &bp->scif_card_dma_addr);
-	}
-	schedule_delayed_work(&scifdev->qp_dwork,
-			      msecs_to_jiffies(1000));
-	return rc;
-free_qp:
-	scif_free_qp(scifdev);
-destroy_intr:
-	scif_destroy_intr_wq(scifdev);
-destroy_loopb:
-	if (atomic_dec_and_test(&g_loopb_cnt))
-		scif_destroy_loopback_qp(&scif_dev[sdev->snode]);
-exit:
-	return rc;
-}
-
-void scif_stop(struct scif_dev *scifdev)
-{
-	struct scif_dev *dev;
-	int i;
-
-	for (i = scif_info.maxid; i >= 0; i--) {
-		dev = &scif_dev[i];
-		if (scifdev_self(dev))
-			continue;
-		scif_handle_remove_node(i);
-	}
-}
-
-static void scif_remove(struct scif_hw_dev *sdev)
-{
-	struct scif_dev *scifdev = &scif_dev[sdev->dnode];
-
-	if (scif_is_mgmt_node()) {
-		struct mic_bootparam *bp = sdev->dp;
-
-		bp->c2h_scif_db = -1;
-		bp->scif_host_dma_addr = 0x0;
-	} else {
-		struct mic_bootparam __iomem *bp = sdev->rdp;
-
-		iowrite8(-1, &bp->h2c_scif_db);
-		writeq(0x0, &bp->scif_card_dma_addr);
-	}
-	if (scif_is_mgmt_node()) {
-		scif_disconnect_node(scifdev->node, true);
-	} else {
-		scif_info.card_initiated_exit = true;
-		scif_stop(scifdev);
-	}
-	if (atomic_dec_and_test(&g_loopb_cnt))
-		scif_destroy_loopback_qp(&scif_dev[sdev->snode]);
-	if (scifdev->cookie) {
-		sdev->hw_ops->free_irq(sdev, scifdev->cookie, scifdev);
-		scifdev->cookie = NULL;
-	}
-	scif_destroy_intr_wq(scifdev);
-	cancel_delayed_work(&scifdev->qp_dwork);
-	scif_free_qp(scifdev);
-	scifdev->rdb = -1;
-	scifdev->sdev = NULL;
-}
-
-static struct scif_hw_dev_id id_table[] = {
-	{ MIC_SCIF_DEV, SCIF_DEV_ANY_ID },
-	{ 0 },
-};
-
-static struct scif_driver scif_driver = {
-	.driver.name =	KBUILD_MODNAME,
-	.driver.owner =	THIS_MODULE,
-	.id_table = id_table,
-	.probe = scif_probe,
-	.remove = scif_remove,
-};
-
-static int _scif_init(void)
-{
-	int rc;
-
-	mutex_init(&scif_info.eplock);
-	spin_lock_init(&scif_info.rmalock);
-	spin_lock_init(&scif_info.nb_connect_lock);
-	spin_lock_init(&scif_info.port_lock);
-	mutex_init(&scif_info.conflock);
-	mutex_init(&scif_info.connlock);
-	mutex_init(&scif_info.fencelock);
-	INIT_LIST_HEAD(&scif_info.uaccept);
-	INIT_LIST_HEAD(&scif_info.listen);
-	INIT_LIST_HEAD(&scif_info.zombie);
-	INIT_LIST_HEAD(&scif_info.connected);
-	INIT_LIST_HEAD(&scif_info.disconnected);
-	INIT_LIST_HEAD(&scif_info.rma);
-	INIT_LIST_HEAD(&scif_info.rma_tc);
-	INIT_LIST_HEAD(&scif_info.mmu_notif_cleanup);
-	INIT_LIST_HEAD(&scif_info.fence);
-	INIT_LIST_HEAD(&scif_info.nb_connect_list);
-	init_waitqueue_head(&scif_info.exitwq);
-	scif_info.rma_tc_limit = SCIF_RMA_TEMP_CACHE_LIMIT;
-	scif_info.en_msg_log = 0;
-	scif_info.p2p_enable = 1;
-	rc = scif_setup_scifdev();
-	if (rc)
-		goto error;
-	unaligned_cache = kmem_cache_create("Unaligned_DMA",
-					    SCIF_KMEM_UNALIGNED_BUF_SIZE,
-					    0, SLAB_HWCACHE_ALIGN, NULL);
-	if (!unaligned_cache) {
-		rc = -ENOMEM;
-		goto free_sdev;
-	}
-	INIT_WORK(&scif_info.misc_work, scif_misc_handler);
-	INIT_WORK(&scif_info.mmu_notif_work, scif_mmu_notif_handler);
-	INIT_WORK(&scif_info.conn_work, scif_conn_handler);
-	idr_init(&scif_ports);
-	return 0;
-free_sdev:
-	scif_destroy_scifdev();
-error:
-	return rc;
-}
-
-static void _scif_exit(void)
-{
-	idr_destroy(&scif_ports);
-	kmem_cache_destroy(unaligned_cache);
-	scif_destroy_scifdev();
-}
-
-static int __init scif_init(void)
-{
-	struct miscdevice *mdev = &scif_info.mdev;
-	int rc;
-
-	_scif_init();
-	iova_cache_get();
-	rc = scif_peer_bus_init();
-	if (rc)
-		goto exit;
-	rc = scif_register_driver(&scif_driver);
-	if (rc)
-		goto peer_bus_exit;
-	rc = misc_register(mdev);
-	if (rc)
-		goto unreg_scif;
-	scif_init_debugfs();
-	return 0;
-unreg_scif:
-	scif_unregister_driver(&scif_driver);
-peer_bus_exit:
-	scif_peer_bus_exit();
-exit:
-	_scif_exit();
-	return rc;
-}
-
-static void __exit scif_exit(void)
-{
-	scif_exit_debugfs();
-	misc_deregister(&scif_info.mdev);
-	scif_unregister_driver(&scif_driver);
-	scif_peer_bus_exit();
-	iova_cache_put();
-	_scif_exit();
-}
-
-module_init(scif_init);
-module_exit(scif_exit);
-
-MODULE_DEVICE_TABLE(scif, id_table);
-MODULE_AUTHOR("Intel Corporation");
-MODULE_DESCRIPTION("Intel(R) SCIF driver");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/scif/scif_main.h b/drivers/misc/mic/scif/scif_main.h
deleted file mode 100644
index bb3ab97d5b35..000000000000
--- a/drivers/misc/mic/scif/scif_main.h
+++ /dev/null
@@ -1,274 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Intel SCIF driver.
- */
-#ifndef SCIF_MAIN_H
-#define SCIF_MAIN_H
-
-#include <linux/sched/signal.h>
-#include <linux/pci.h>
-#include <linux/miscdevice.h>
-#include <linux/dmaengine.h>
-#include <linux/iova.h>
-#include <linux/anon_inodes.h>
-#include <linux/file.h>
-#include <linux/vmalloc.h>
-#include <linux/scif.h>
-#include "../common/mic_dev.h"
-
-#define SCIF_MGMT_NODE 0
-#define SCIF_DEFAULT_WATCHDOG_TO 30
-#define SCIF_NODE_ACCEPT_TIMEOUT (3 * HZ)
-#define SCIF_NODE_ALIVE_TIMEOUT (SCIF_DEFAULT_WATCHDOG_TO * HZ)
-#define SCIF_RMA_TEMP_CACHE_LIMIT 0x20000
-
-/*
- * Generic state used for certain node QP message exchanges
- * like Unregister, Alloc etc.
- */
-enum scif_msg_state {
-	OP_IDLE = 1,
-	OP_IN_PROGRESS,
-	OP_COMPLETED,
-	OP_FAILED
-};
-
-/*
- * struct scif_info - Global SCIF information
- *
- * @nodeid: Node ID this node is to others
- * @maxid: Max known node ID
- * @total: Total number of SCIF nodes
- * @nr_zombies: number of zombie endpoints
- * @eplock: Lock to synchronize listening, zombie endpoint lists
- * @connlock: Lock to synchronize connected and disconnected lists
- * @nb_connect_lock: Synchronize non blocking connect operations
- * @port_lock: Synchronize access to SCIF ports
- * @uaccept: List of user acceptreq waiting for acceptreg
- * @listen: List of listening end points
- * @zombie: List of zombie end points with pending RMA's
- * @connected: List of end points in connected state
- * @disconnected: List of end points in disconnected state
- * @nb_connect_list: List for non blocking connections
- * @misc_work: miscellaneous SCIF tasks
- * @conflock: Lock to synchronize SCIF node configuration changes
- * @en_msg_log: Enable debug message logging
- * @p2p_enable: Enable P2P SCIF network
- * @mdev: The MISC device
- * @conn_work: Work for workqueue handling all connections
- * @exitwq: Wait queue for waiting for an EXIT node QP message response
- * @loopb_dev: Dummy SCIF device used for loopback
- * @loopb_wq: Workqueue used for handling loopback messages
- * @loopb_wqname[16]: Name of loopback workqueue
- * @loopb_work: Used for submitting work to loopb_wq
- * @loopb_recv_q: List of messages received on the loopb_wq
- * @card_initiated_exit: set when the card has initiated the exit
- * @rmalock: Synchronize access to RMA operations
- * @fencelock: Synchronize access to list of remote fences requested.
- * @rma: List of temporary registered windows to be destroyed.
- * @rma_tc: List of temporary registered & cached Windows to be destroyed
- * @fence: List of remote fence requests
- * @mmu_notif_work: Work for registration caching MMU notifier workqueue
- * @mmu_notif_cleanup: List of temporary cached windows for reg cache
- * @rma_tc_limit: RMA temporary cache limit
- */
-struct scif_info {
-	u8 nodeid;
-	u8 maxid;
-	u8 total;
-	u32 nr_zombies;
-	struct mutex eplock;
-	struct mutex connlock;
-	spinlock_t nb_connect_lock;
-	spinlock_t port_lock;
-	struct list_head uaccept;
-	struct list_head listen;
-	struct list_head zombie;
-	struct list_head connected;
-	struct list_head disconnected;
-	struct list_head nb_connect_list;
-	struct work_struct misc_work;
-	struct mutex conflock;
-	u8 en_msg_log;
-	u8 p2p_enable;
-	struct miscdevice mdev;
-	struct work_struct conn_work;
-	wait_queue_head_t exitwq;
-	struct scif_dev *loopb_dev;
-	struct workqueue_struct *loopb_wq;
-	char loopb_wqname[16];
-	struct work_struct loopb_work;
-	struct list_head loopb_recv_q;
-	bool card_initiated_exit;
-	spinlock_t rmalock;
-	struct mutex fencelock;
-	struct list_head rma;
-	struct list_head rma_tc;
-	struct list_head fence;
-	struct work_struct mmu_notif_work;
-	struct list_head mmu_notif_cleanup;
-	unsigned long rma_tc_limit;
-};
-
-/*
- * struct scif_p2p_info - SCIF mapping information used for P2P
- *
- * @ppi_peer_id - SCIF peer node id
- * @ppi_sg - Scatter list for bar information (One for mmio and one for aper)
- * @sg_nentries - Number of entries in the scatterlist
- * @ppi_da: DMA address for MMIO and APER bars
- * @ppi_len: Length of MMIO and APER bars
- * @ppi_list: Link in list of mapping information
- */
-struct scif_p2p_info {
-	u8 ppi_peer_id;
-	struct scatterlist *ppi_sg[2];
-	u64 sg_nentries[2];
-	dma_addr_t ppi_da[2];
-	u64 ppi_len[2];
-#define SCIF_PPI_MMIO 0
-#define SCIF_PPI_APER 1
-	struct list_head ppi_list;
-};
-
-/*
- * struct scif_dev - SCIF remote device specific fields
- *
- * @node: Node id
- * @p2p: List of P2P mapping information
- * @qpairs: The node queue pair for exchanging control messages
- * @intr_wq: Workqueue for handling Node QP messages
- * @intr_wqname: Name of node QP workqueue for handling interrupts
- * @intr_bh: Used for submitting work to intr_wq
- * @lock: Lock used for synchronizing access to the scif device
- * @sdev: SCIF hardware device on the SCIF hardware bus
- * @db: doorbell the peer will trigger to generate an interrupt on self
- * @rdb: Doorbell to trigger on the peer to generate an interrupt on the peer
- * @cookie: Cookie received while registering the interrupt handler
- * @peer_add_work: Work for handling device_add for peer devices
- * @p2p_dwork: Delayed work to enable polling for P2P state
- * @qp_dwork: Delayed work for enabling polling for remote QP information
- * @p2p_retry: Number of times to retry polling of P2P state
- * @base_addr: P2P aperture bar base address
- * @mic_mw mmio: The peer MMIO information used for P2P
- * @spdev: SCIF peer device on the SCIF peer bus
- * @node_remove_ack_pending: True if a node_remove_ack is pending
- * @exit_ack_pending: true if an exit_ack is pending
- * @disconn_wq: Used while waiting for a node remove response
- * @disconn_rescnt: Keeps track of number of node remove requests sent
- * @exit: Status of exit message
- * @qp_dma_addr: Queue pair DMA address passed to the peer
- * @dma_ch_idx: Round robin index for DMA channels
- * @signal_pool: DMA pool used for scheduling scif_fence_signal DMA's
-*/
-struct scif_dev {
-	u8 node;
-	struct list_head p2p;
-	struct scif_qp *qpairs;
-	struct workqueue_struct *intr_wq;
-	char intr_wqname[16];
-	struct work_struct intr_bh;
-	struct mutex lock;
-	struct scif_hw_dev *sdev;
-	int db;
-	int rdb;
-	struct mic_irq *cookie;
-	struct work_struct peer_add_work;
-	struct delayed_work p2p_dwork;
-	struct delayed_work qp_dwork;
-	int p2p_retry;
-	dma_addr_t base_addr;
-	struct mic_mw mmio;
-	struct scif_peer_dev __rcu *spdev;
-	bool node_remove_ack_pending;
-	bool exit_ack_pending;
-	wait_queue_head_t disconn_wq;
-	atomic_t disconn_rescnt;
-	enum scif_msg_state exit;
-	dma_addr_t qp_dma_addr;
-	int dma_ch_idx;
-	struct dma_pool *signal_pool;
-};
-
-extern bool scif_reg_cache_enable;
-extern bool scif_ulimit_check;
-extern struct scif_info scif_info;
-extern struct idr scif_ports;
-extern struct bus_type scif_peer_bus;
-extern struct scif_dev *scif_dev;
-extern const struct file_operations scif_fops;
-extern const struct file_operations scif_anon_fops;
-
-/* Size of the RB for the Node QP */
-#define SCIF_NODE_QP_SIZE 0x10000
-
-#include "scif_nodeqp.h"
-#include "scif_rma.h"
-#include "scif_rma_list.h"
-
-/*
- * scifdev_self:
- * @dev: The remote SCIF Device
- *
- * Returns true if the SCIF Device passed is the self aka Loopback SCIF device.
- */
-static inline int scifdev_self(struct scif_dev *dev)
-{
-	return dev->node == scif_info.nodeid;
-}
-
-static inline bool scif_is_mgmt_node(void)
-{
-	return !scif_info.nodeid;
-}
-
-/*
- * scifdev_is_p2p:
- * @dev: The remote SCIF Device
- *
- * Returns true if the SCIF Device is a MIC Peer to Peer SCIF device.
- */
-static inline bool scifdev_is_p2p(struct scif_dev *dev)
-{
-	if (scif_is_mgmt_node())
-		return false;
-	else
-		return dev != &scif_dev[SCIF_MGMT_NODE] &&
-			!scifdev_self(dev);
-}
-
-/*
- * scifdev_alive:
- * @scifdev: The remote SCIF Device
- *
- * Returns true if the remote SCIF Device is running or sleeping for
- * this endpoint.
- */
-static inline int _scifdev_alive(struct scif_dev *scifdev)
-{
-	struct scif_peer_dev *spdev;
-
-	rcu_read_lock();
-	spdev = rcu_dereference(scifdev->spdev);
-	rcu_read_unlock();
-	return !!spdev;
-}
-
-#include "scif_epd.h"
-
-void __init scif_init_debugfs(void);
-void scif_exit_debugfs(void);
-int scif_setup_intr_wq(struct scif_dev *scifdev);
-void scif_destroy_intr_wq(struct scif_dev *scifdev);
-void scif_cleanup_scifdev(struct scif_dev *dev);
-void scif_handle_remove_node(int node);
-void scif_disconnect_node(u32 node_id, bool mgmt_initiated);
-void scif_free_qp(struct scif_dev *dev);
-void scif_misc_handler(struct work_struct *work);
-void scif_stop(struct scif_dev *scifdev);
-irqreturn_t scif_intr_handler(int irq, void *data);
-#endif /* SCIF_MAIN_H */
diff --git a/drivers/misc/mic/scif/scif_map.h b/drivers/misc/mic/scif/scif_map.h
deleted file mode 100644
index 96b760819bfc..000000000000
--- a/drivers/misc/mic/scif/scif_map.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Intel SCIF driver.
- */
-#ifndef SCIF_MAP_H
-#define SCIF_MAP_H
-
-#include "../bus/scif_bus.h"
-
-static __always_inline void *
-scif_alloc_coherent(dma_addr_t *dma_handle,
-		    struct scif_dev *scifdev, size_t size,
-		    gfp_t gfp)
-{
-	void *va;
-
-	if (scifdev_self(scifdev)) {
-		va = kmalloc(size, gfp);
-		if (va)
-			*dma_handle = virt_to_phys(va);
-	} else {
-		va = dma_alloc_coherent(&scifdev->sdev->dev,
-					size, dma_handle, gfp);
-		if (va && scifdev_is_p2p(scifdev))
-			*dma_handle = *dma_handle + scifdev->base_addr;
-	}
-	return va;
-}
-
-static __always_inline void
-scif_free_coherent(void *va, dma_addr_t local,
-		   struct scif_dev *scifdev, size_t size)
-{
-	if (scifdev_self(scifdev)) {
-		kfree(va);
-	} else {
-		if (scifdev_is_p2p(scifdev) && local > scifdev->base_addr)
-			local = local - scifdev->base_addr;
-		dma_free_coherent(&scifdev->sdev->dev,
-				  size, va, local);
-	}
-}
-
-static __always_inline int
-scif_map_single(dma_addr_t *dma_handle,
-		void *local, struct scif_dev *scifdev, size_t size)
-{
-	int err = 0;
-
-	if (scifdev_self(scifdev)) {
-		*dma_handle = virt_to_phys((local));
-	} else {
-		*dma_handle = dma_map_single(&scifdev->sdev->dev,
-					     local, size, DMA_BIDIRECTIONAL);
-		if (dma_mapping_error(&scifdev->sdev->dev, *dma_handle))
-			err = -ENOMEM;
-		else if (scifdev_is_p2p(scifdev))
-			*dma_handle = *dma_handle + scifdev->base_addr;
-	}
-	if (err)
-		*dma_handle = 0;
-	return err;
-}
-
-static __always_inline void
-scif_unmap_single(dma_addr_t local, struct scif_dev *scifdev,
-		  size_t size)
-{
-	if (!scifdev_self(scifdev)) {
-		if (scifdev_is_p2p(scifdev))
-			local = local - scifdev->base_addr;
-		dma_unmap_single(&scifdev->sdev->dev, local,
-				 size, DMA_BIDIRECTIONAL);
-	}
-}
-
-static __always_inline void *
-scif_ioremap(dma_addr_t phys, size_t size, struct scif_dev *scifdev)
-{
-	void *out_virt;
-	struct scif_hw_dev *sdev = scifdev->sdev;
-
-	if (scifdev_self(scifdev))
-		out_virt = phys_to_virt(phys);
-	else
-		out_virt = (void __force *)
-			   sdev->hw_ops->remap(sdev, phys, size);
-	return out_virt;
-}
-
-static __always_inline void
-scif_iounmap(void *virt, size_t len, struct scif_dev *scifdev)
-{
-	if (!scifdev_self(scifdev)) {
-		struct scif_hw_dev *sdev = scifdev->sdev;
-
-		sdev->hw_ops->unmap(sdev, (void __force __iomem *)virt);
-	}
-}
-
-static __always_inline int
-scif_map_page(dma_addr_t *dma_handle, struct page *page,
-	      struct scif_dev *scifdev)
-{
-	int err = 0;
-
-	if (scifdev_self(scifdev)) {
-		*dma_handle = page_to_phys(page);
-	} else {
-		struct scif_hw_dev *sdev = scifdev->sdev;
-		*dma_handle = dma_map_page(&sdev->dev,
-					   page, 0x0, PAGE_SIZE,
-					   DMA_BIDIRECTIONAL);
-		if (dma_mapping_error(&sdev->dev, *dma_handle))
-			err = -ENOMEM;
-		else if (scifdev_is_p2p(scifdev))
-			*dma_handle = *dma_handle + scifdev->base_addr;
-	}
-	if (err)
-		*dma_handle = 0;
-	return err;
-}
-#endif  /* SCIF_MAP_H */
diff --git a/drivers/misc/mic/scif/scif_mmap.c b/drivers/misc/mic/scif/scif_mmap.c
deleted file mode 100644
index a151d416f39c..000000000000
--- a/drivers/misc/mic/scif/scif_mmap.c
+++ /dev/null
@@ -1,690 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Intel SCIF driver.
- */
-#include "scif_main.h"
-
-/*
- * struct scif_vma_info - Information about a remote memory mapping
- *			  created via scif_mmap(..)
- * @vma: VM area struct
- * @list: link to list of active vmas
- */
-struct scif_vma_info {
-	struct vm_area_struct *vma;
-	struct list_head list;
-};
-
-void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_rma_req req;
-	struct scif_window *window = NULL;
-	struct scif_window *recv_window =
-		(struct scif_window *)msg->payload[0];
-	struct scif_endpt *ep;
-
-	ep = (struct scif_endpt *)recv_window->ep;
-	req.out_window = &window;
-	req.offset = recv_window->offset;
-	req.prot = recv_window->prot;
-	req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
-	req.type = SCIF_WINDOW_FULL;
-	req.head = &ep->rma_info.reg_list;
-	msg->payload[0] = ep->remote_ep;
-
-	mutex_lock(&ep->rma_info.rma_lock);
-	/* Does a valid window exist? */
-	if (scif_query_window(&req)) {
-		dev_err(&scifdev->sdev->dev,
-			"%s %d -ENXIO\n", __func__, __LINE__);
-		msg->uop = SCIF_UNREGISTER_ACK;
-		goto error;
-	}
-
-	scif_put_window(window, window->nr_pages);
-
-	if (!window->ref_count) {
-		atomic_inc(&ep->rma_info.tw_refcount);
-		ep->rma_info.async_list_del = 1;
-		list_del_init(&window->list);
-		scif_free_window_offset(ep, window, window->offset);
-	}
-error:
-	mutex_unlock(&ep->rma_info.rma_lock);
-	if (window && !window->ref_count)
-		scif_queue_for_cleanup(window, &scif_info.rma);
-}
-
-/*
- * Remove valid remote memory mappings created via scif_mmap(..) from the
- * process address space since the remote node is lost
- */
-static void __scif_zap_mmaps(struct scif_endpt *ep)
-{
-	struct list_head *item;
-	struct scif_vma_info *info;
-	struct vm_area_struct *vma;
-	unsigned long size;
-
-	spin_lock(&ep->lock);
-	list_for_each(item, &ep->rma_info.vma_list) {
-		info = list_entry(item, struct scif_vma_info, list);
-		vma = info->vma;
-		size = vma->vm_end - vma->vm_start;
-		zap_vma_ptes(vma, vma->vm_start, size);
-		dev_dbg(scif_info.mdev.this_device,
-			"%s ep %p zap vma %p size 0x%lx\n",
-			__func__, ep, info->vma, size);
-	}
-	spin_unlock(&ep->lock);
-}
-
-/*
- * Traverse the list of endpoints for a particular remote node and
- * zap valid remote memory mappings since the remote node is lost
- */
-static void _scif_zap_mmaps(int node, struct list_head *head)
-{
-	struct scif_endpt *ep;
-	struct list_head *item;
-
-	mutex_lock(&scif_info.connlock);
-	list_for_each(item, head) {
-		ep = list_entry(item, struct scif_endpt, list);
-		if (ep->remote_dev->node == node)
-			__scif_zap_mmaps(ep);
-	}
-	mutex_unlock(&scif_info.connlock);
-}
-
-/*
- * Wrapper for removing remote memory mappings for a particular node. This API
- * is called by peer nodes as part of handling a lost node.
- */
-void scif_zap_mmaps(int node)
-{
-	_scif_zap_mmaps(node, &scif_info.connected);
-	_scif_zap_mmaps(node, &scif_info.disconnected);
-}
-
-/*
- * This API is only called while handling a lost node:
- * a) Remote node is dead.
- * b) Remote memory mappings have been zapped
- * So we can traverse the remote_reg_list without any locks. Since
- * the window has not yet been unregistered we can drop the ref count
- * and queue it to the cleanup thread.
- */
-static void __scif_cleanup_rma_for_zombies(struct scif_endpt *ep)
-{
-	struct list_head *pos, *tmp;
-	struct scif_window *window;
-
-	list_for_each_safe(pos, tmp, &ep->rma_info.remote_reg_list) {
-		window = list_entry(pos, struct scif_window, list);
-		if (window->ref_count)
-			scif_put_window(window, window->nr_pages);
-		else
-			dev_err(scif_info.mdev.this_device,
-				"%s %d unexpected\n",
-				__func__, __LINE__);
-		if (!window->ref_count) {
-			atomic_inc(&ep->rma_info.tw_refcount);
-			list_del_init(&window->list);
-			scif_queue_for_cleanup(window, &scif_info.rma);
-		}
-	}
-}
-
-/* Cleanup remote registration lists for zombie endpoints */
-void scif_cleanup_rma_for_zombies(int node)
-{
-	struct scif_endpt *ep;
-	struct list_head *item;
-
-	mutex_lock(&scif_info.eplock);
-	list_for_each(item, &scif_info.zombie) {
-		ep = list_entry(item, struct scif_endpt, list);
-		if (ep->remote_dev && ep->remote_dev->node == node)
-			__scif_cleanup_rma_for_zombies(ep);
-	}
-	mutex_unlock(&scif_info.eplock);
-	flush_work(&scif_info.misc_work);
-}
-
-/* Insert the VMA into the per endpoint VMA list */
-static int scif_insert_vma(struct scif_endpt *ep, struct vm_area_struct *vma)
-{
-	struct scif_vma_info *info;
-	int err = 0;
-
-	info = kzalloc(sizeof(*info), GFP_KERNEL);
-	if (!info) {
-		err = -ENOMEM;
-		goto done;
-	}
-	info->vma = vma;
-	spin_lock(&ep->lock);
-	list_add_tail(&info->list, &ep->rma_info.vma_list);
-	spin_unlock(&ep->lock);
-done:
-	return err;
-}
-
-/* Delete the VMA from the per endpoint VMA list */
-static void scif_delete_vma(struct scif_endpt *ep, struct vm_area_struct *vma)
-{
-	struct list_head *item;
-	struct scif_vma_info *info;
-
-	spin_lock(&ep->lock);
-	list_for_each(item, &ep->rma_info.vma_list) {
-		info = list_entry(item, struct scif_vma_info, list);
-		if (info->vma == vma) {
-			list_del(&info->list);
-			kfree(info);
-			break;
-		}
-	}
-	spin_unlock(&ep->lock);
-}
-
-static phys_addr_t scif_get_phys(phys_addr_t phys, struct scif_endpt *ep)
-{
-	struct scif_dev *scifdev = (struct scif_dev *)ep->remote_dev;
-	struct scif_hw_dev *sdev = scifdev->sdev;
-	phys_addr_t out_phys, apt_base = 0;
-
-	/*
-	 * If the DMA address is card relative then we need to add the
-	 * aperture base for mmap to work correctly
-	 */
-	if (!scifdev_self(scifdev) && sdev->aper && sdev->card_rel_da)
-		apt_base = sdev->aper->pa;
-	out_phys = apt_base + phys;
-	return out_phys;
-}
-
-int scif_get_pages(scif_epd_t epd, off_t offset, size_t len,
-		   struct scif_range **pages)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-	struct scif_rma_req req;
-	struct scif_window *window = NULL;
-	int nr_pages, err, i;
-
-	dev_dbg(scif_info.mdev.this_device,
-		"SCIFAPI get_pinned_pages: ep %p offset 0x%lx len 0x%lx\n",
-		ep, offset, len);
-	err = scif_verify_epd(ep);
-	if (err)
-		return err;
-
-	if (!len || (offset < 0) ||
-	    (offset + len < offset) ||
-	    (ALIGN(offset, PAGE_SIZE) != offset) ||
-	    (ALIGN(len, PAGE_SIZE) != len))
-		return -EINVAL;
-
-	nr_pages = len >> PAGE_SHIFT;
-
-	req.out_window = &window;
-	req.offset = offset;
-	req.prot = 0;
-	req.nr_bytes = len;
-	req.type = SCIF_WINDOW_SINGLE;
-	req.head = &ep->rma_info.remote_reg_list;
-
-	mutex_lock(&ep->rma_info.rma_lock);
-	/* Does a valid window exist? */
-	err = scif_query_window(&req);
-	if (err) {
-		dev_err(&ep->remote_dev->sdev->dev,
-			"%s %d err %d\n", __func__, __LINE__, err);
-		goto error;
-	}
-
-	/* Allocate scif_range */
-	*pages = kzalloc(sizeof(**pages), GFP_KERNEL);
-	if (!*pages) {
-		err = -ENOMEM;
-		goto error;
-	}
-
-	/* Allocate phys addr array */
-	(*pages)->phys_addr = scif_zalloc(nr_pages * sizeof(dma_addr_t));
-	if (!((*pages)->phys_addr)) {
-		err = -ENOMEM;
-		goto error;
-	}
-
-	if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev)) {
-		/* Allocate virtual address array */
-		((*pages)->va = scif_zalloc(nr_pages * sizeof(void *)));
-		if (!(*pages)->va) {
-			err = -ENOMEM;
-			goto error;
-		}
-	}
-	/* Populate the values */
-	(*pages)->cookie = window;
-	(*pages)->nr_pages = nr_pages;
-	(*pages)->prot_flags = window->prot;
-
-	for (i = 0; i < nr_pages; i++) {
-		(*pages)->phys_addr[i] =
-			__scif_off_to_dma_addr(window, offset +
-					       (i * PAGE_SIZE));
-		(*pages)->phys_addr[i] = scif_get_phys((*pages)->phys_addr[i],
-							ep);
-		if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev))
-			(*pages)->va[i] =
-				ep->remote_dev->sdev->aper->va +
-				(*pages)->phys_addr[i] -
-				ep->remote_dev->sdev->aper->pa;
-	}
-
-	scif_get_window(window, nr_pages);
-error:
-	mutex_unlock(&ep->rma_info.rma_lock);
-	if (err) {
-		if (*pages) {
-			scif_free((*pages)->phys_addr,
-				  nr_pages * sizeof(dma_addr_t));
-			scif_free((*pages)->va,
-				  nr_pages * sizeof(void *));
-			kfree(*pages);
-			*pages = NULL;
-		}
-		dev_err(&ep->remote_dev->sdev->dev,
-			"%s %d err %d\n", __func__, __LINE__, err);
-	}
-	return err;
-}
-EXPORT_SYMBOL_GPL(scif_get_pages);
-
-int scif_put_pages(struct scif_range *pages)
-{
-	struct scif_endpt *ep;
-	struct scif_window *window;
-	struct scifmsg msg;
-
-	if (!pages || !pages->cookie)
-		return -EINVAL;
-
-	window = pages->cookie;
-
-	if (!window || window->magic != SCIFEP_MAGIC)
-		return -EINVAL;
-
-	ep = (struct scif_endpt *)window->ep;
-	/*
-	 * If the state is SCIFEP_CONNECTED or SCIFEP_DISCONNECTED then the
-	 * callee should be allowed to release references to the pages,
-	 * else the endpoint was not connected in the first place,
-	 * hence the ENOTCONN.
-	 */
-	if (ep->state != SCIFEP_CONNECTED && ep->state != SCIFEP_DISCONNECTED)
-		return -ENOTCONN;
-
-	mutex_lock(&ep->rma_info.rma_lock);
-
-	scif_put_window(window, pages->nr_pages);
-
-	/* Initiate window destruction if ref count is zero */
-	if (!window->ref_count) {
-		list_del(&window->list);
-		mutex_unlock(&ep->rma_info.rma_lock);
-		scif_drain_dma_intr(ep->remote_dev->sdev,
-				    ep->rma_info.dma_chan);
-		/* Inform the peer about this window being destroyed. */
-		msg.uop = SCIF_MUNMAP;
-		msg.src = ep->port;
-		msg.payload[0] = window->peer_window;
-		/* No error handling for notification messages */
-		scif_nodeqp_send(ep->remote_dev, &msg);
-		/* Destroy this window from the peer's registered AS */
-		scif_destroy_remote_window(window);
-	} else {
-		mutex_unlock(&ep->rma_info.rma_lock);
-	}
-
-	scif_free(pages->phys_addr, pages->nr_pages * sizeof(dma_addr_t));
-	scif_free(pages->va, pages->nr_pages * sizeof(void *));
-	kfree(pages);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(scif_put_pages);
-
-/*
- * scif_rma_list_mmap:
- *
- * Traverse the remote registration list starting from start_window:
- * 1) Create VtoP mappings via remap_pfn_range(..)
- * 2) Once step 1) and 2) complete successfully then traverse the range of
- *    windows again and bump the reference count.
- * RMA lock must be held.
- */
-static int scif_rma_list_mmap(struct scif_window *start_window, s64 offset,
-			      int nr_pages, struct vm_area_struct *vma)
-{
-	s64 end_offset, loop_offset = offset;
-	struct scif_window *window = start_window;
-	int loop_nr_pages, nr_pages_left = nr_pages;
-	struct scif_endpt *ep = (struct scif_endpt *)start_window->ep;
-	struct list_head *head = &ep->rma_info.remote_reg_list;
-	int i, err = 0;
-	dma_addr_t phys_addr;
-	struct scif_window_iter src_win_iter;
-	size_t contig_bytes = 0;
-
-	might_sleep();
-	list_for_each_entry_from(window, head, list) {
-		end_offset = window->offset +
-			(window->nr_pages << PAGE_SHIFT);
-		loop_nr_pages = min_t(int,
-				      (end_offset - loop_offset) >> PAGE_SHIFT,
-				      nr_pages_left);
-		scif_init_window_iter(window, &src_win_iter);
-		for (i = 0; i < loop_nr_pages; i++) {
-			phys_addr = scif_off_to_dma_addr(window, loop_offset,
-							 &contig_bytes,
-							 &src_win_iter);
-			phys_addr = scif_get_phys(phys_addr, ep);
-			err = remap_pfn_range(vma,
-					      vma->vm_start +
-					      loop_offset - offset,
-					      phys_addr >> PAGE_SHIFT,
-					      PAGE_SIZE,
-					      vma->vm_page_prot);
-			if (err)
-				goto error;
-			loop_offset += PAGE_SIZE;
-		}
-		nr_pages_left -= loop_nr_pages;
-		if (!nr_pages_left)
-			break;
-	}
-	/*
-	 * No more failures expected. Bump up the ref count for all
-	 * the windows. Another traversal from start_window required
-	 * for handling errors encountered across windows during
-	 * remap_pfn_range(..).
-	 */
-	loop_offset = offset;
-	nr_pages_left = nr_pages;
-	window = start_window;
-	head = &ep->rma_info.remote_reg_list;
-	list_for_each_entry_from(window, head, list) {
-		end_offset = window->offset +
-			(window->nr_pages << PAGE_SHIFT);
-		loop_nr_pages = min_t(int,
-				      (end_offset - loop_offset) >> PAGE_SHIFT,
-				      nr_pages_left);
-		scif_get_window(window, loop_nr_pages);
-		nr_pages_left -= loop_nr_pages;
-		loop_offset += (loop_nr_pages << PAGE_SHIFT);
-		if (!nr_pages_left)
-			break;
-	}
-error:
-	if (err)
-		dev_err(scif_info.mdev.this_device,
-			"%s %d err %d\n", __func__, __LINE__, err);
-	return err;
-}
-
-/*
- * scif_rma_list_munmap:
- *
- * Traverse the remote registration list starting from window:
- * 1) Decrement ref count.
- * 2) If the ref count drops to zero then send a SCIF_MUNMAP message to peer.
- * RMA lock must be held.
- */
-static void scif_rma_list_munmap(struct scif_window *start_window,
-				 s64 offset, int nr_pages)
-{
-	struct scifmsg msg;
-	s64 loop_offset = offset, end_offset;
-	int loop_nr_pages, nr_pages_left = nr_pages;
-	struct scif_endpt *ep = (struct scif_endpt *)start_window->ep;
-	struct list_head *head = &ep->rma_info.remote_reg_list;
-	struct scif_window *window = start_window, *_window;
-
-	msg.uop = SCIF_MUNMAP;
-	msg.src = ep->port;
-	loop_offset = offset;
-	nr_pages_left = nr_pages;
-	list_for_each_entry_safe_from(window, _window, head, list) {
-		end_offset = window->offset +
-			(window->nr_pages << PAGE_SHIFT);
-		loop_nr_pages = min_t(int,
-				      (end_offset - loop_offset) >> PAGE_SHIFT,
-				      nr_pages_left);
-		scif_put_window(window, loop_nr_pages);
-		if (!window->ref_count) {
-			struct scif_dev *rdev = ep->remote_dev;
-
-			scif_drain_dma_intr(rdev->sdev,
-					    ep->rma_info.dma_chan);
-			/* Inform the peer about this munmap */
-			msg.payload[0] = window->peer_window;
-			/* No error handling for Notification messages. */
-			scif_nodeqp_send(ep->remote_dev, &msg);
-			list_del(&window->list);
-			/* Destroy this window from the peer's registered AS */
-			scif_destroy_remote_window(window);
-		}
-		nr_pages_left -= loop_nr_pages;
-		loop_offset += (loop_nr_pages << PAGE_SHIFT);
-		if (!nr_pages_left)
-			break;
-	}
-}
-
-/*
- * The private data field of each VMA used to mmap a remote window
- * points to an instance of struct vma_pvt
- */
-struct vma_pvt {
-	struct scif_endpt *ep;	/* End point for remote window */
-	s64 offset;		/* offset within remote window */
-	bool valid_offset;	/* offset is valid only if the original
-				 * mmap request was for a single page
-				 * else the offset within the vma is
-				 * the correct offset
-				 */
-	struct kref ref;
-};
-
-static void vma_pvt_release(struct kref *ref)
-{
-	struct vma_pvt *vmapvt = container_of(ref, struct vma_pvt, ref);
-
-	kfree(vmapvt);
-}
-
-/**
- * scif_vma_open - VMA open driver callback
- * @vma: VMM memory area.
- * The open method is called by the kernel to allow the subsystem implementing
- * the VMA to initialize the area. This method is invoked any time a new
- * reference to the VMA is made (when a process forks, for example).
- * The one exception happens when the VMA is first created by mmap;
- * in this case, the driver's mmap method is called instead.
- * This function is also invoked when an existing VMA is split by the kernel
- * due to a call to munmap on a subset of the VMA resulting in two VMAs.
- * The kernel invokes this function only on one of the two VMAs.
- */
-static void scif_vma_open(struct vm_area_struct *vma)
-{
-	struct vma_pvt *vmapvt = vma->vm_private_data;
-
-	dev_dbg(scif_info.mdev.this_device,
-		"SCIFAPI vma open: vma_start 0x%lx vma_end 0x%lx\n",
-		vma->vm_start, vma->vm_end);
-	scif_insert_vma(vmapvt->ep, vma);
-	kref_get(&vmapvt->ref);
-}
-
-/**
- * scif_munmap - VMA close driver callback.
- * @vma: VMM memory area.
- * When an area is destroyed, the kernel calls its close operation.
- * Note that there's no usage count associated with VMA's; the area
- * is opened and closed exactly once by each process that uses it.
- */
-static void scif_munmap(struct vm_area_struct *vma)
-{
-	struct scif_endpt *ep;
-	struct vma_pvt *vmapvt = vma->vm_private_data;
-	int nr_pages = vma_pages(vma);
-	s64 offset;
-	struct scif_rma_req req;
-	struct scif_window *window = NULL;
-	int err;
-
-	might_sleep();
-	dev_dbg(scif_info.mdev.this_device,
-		"SCIFAPI munmap: vma_start 0x%lx vma_end 0x%lx\n",
-		vma->vm_start, vma->vm_end);
-	ep = vmapvt->ep;
-	offset = vmapvt->valid_offset ? vmapvt->offset :
-		(vma->vm_pgoff) << PAGE_SHIFT;
-	dev_dbg(scif_info.mdev.this_device,
-		"SCIFAPI munmap: ep %p nr_pages 0x%x offset 0x%llx\n",
-		ep, nr_pages, offset);
-	req.out_window = &window;
-	req.offset = offset;
-	req.nr_bytes = vma->vm_end - vma->vm_start;
-	req.prot = vma->vm_flags & (VM_READ | VM_WRITE);
-	req.type = SCIF_WINDOW_PARTIAL;
-	req.head = &ep->rma_info.remote_reg_list;
-
-	mutex_lock(&ep->rma_info.rma_lock);
-
-	err = scif_query_window(&req);
-	if (err)
-		dev_err(scif_info.mdev.this_device,
-			"%s %d err %d\n", __func__, __LINE__, err);
-	else
-		scif_rma_list_munmap(window, offset, nr_pages);
-
-	mutex_unlock(&ep->rma_info.rma_lock);
-	/*
-	 * The kernel probably zeroes these out but we still want
-	 * to clean up our own mess just in case.
-	 */
-	vma->vm_ops = NULL;
-	vma->vm_private_data = NULL;
-	kref_put(&vmapvt->ref, vma_pvt_release);
-	scif_delete_vma(ep, vma);
-}
-
-static const struct vm_operations_struct scif_vm_ops = {
-	.open = scif_vma_open,
-	.close = scif_munmap,
-};
-
-/**
- * scif_mmap - Map pages in virtual address space to a remote window.
- * @vma: VMM memory area.
- * @epd: endpoint descriptor
- *
- * Return: Upon successful completion, scif_mmap() returns zero
- * else an apt error is returned as documented in scif.h
- */
-int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd)
-{
-	struct scif_rma_req req;
-	struct scif_window *window = NULL;
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-	s64 start_offset = vma->vm_pgoff << PAGE_SHIFT;
-	int nr_pages = vma_pages(vma);
-	int err;
-	struct vma_pvt *vmapvt;
-
-	dev_dbg(scif_info.mdev.this_device,
-		"SCIFAPI mmap: ep %p start_offset 0x%llx nr_pages 0x%x\n",
-		ep, start_offset, nr_pages);
-	err = scif_verify_epd(ep);
-	if (err)
-		return err;
-
-	might_sleep();
-
-	err = scif_insert_vma(ep, vma);
-	if (err)
-		return err;
-
-	vmapvt = kzalloc(sizeof(*vmapvt), GFP_KERNEL);
-	if (!vmapvt) {
-		scif_delete_vma(ep, vma);
-		return -ENOMEM;
-	}
-
-	vmapvt->ep = ep;
-	kref_init(&vmapvt->ref);
-
-	req.out_window = &window;
-	req.offset = start_offset;
-	req.nr_bytes = vma->vm_end - vma->vm_start;
-	req.prot = vma->vm_flags & (VM_READ | VM_WRITE);
-	req.type = SCIF_WINDOW_PARTIAL;
-	req.head = &ep->rma_info.remote_reg_list;
-
-	mutex_lock(&ep->rma_info.rma_lock);
-	/* Does a valid window exist? */
-	err = scif_query_window(&req);
-	if (err) {
-		dev_err(&ep->remote_dev->sdev->dev,
-			"%s %d err %d\n", __func__, __LINE__, err);
-		goto error_unlock;
-	}
-
-	/* Default prot for loopback */
-	if (!scifdev_self(ep->remote_dev))
-		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
-
-	/*
-	 * VM_DONTCOPY - Do not copy this vma on fork
-	 * VM_DONTEXPAND - Cannot expand with mremap()
-	 * VM_RESERVED - Count as reserved_vm like IO
-	 * VM_PFNMAP - Page-ranges managed without "struct page"
-	 * VM_IO - Memory mapped I/O or similar
-	 *
-	 * We do not want to copy this VMA automatically on a fork(),
-	 * expand this VMA due to mremap() or swap out these pages since
-	 * the VMA is actually backed by physical pages in the remote
-	 * node's physical memory and not via a struct page.
-	 */
-	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
-
-	if (!scifdev_self(ep->remote_dev))
-		vma->vm_flags |= VM_IO | VM_PFNMAP;
-
-	/* Map this range of windows */
-	err = scif_rma_list_mmap(window, start_offset, nr_pages, vma);
-	if (err) {
-		dev_err(&ep->remote_dev->sdev->dev,
-			"%s %d err %d\n", __func__, __LINE__, err);
-		goto error_unlock;
-	}
-	/* Set up the driver call back */
-	vma->vm_ops = &scif_vm_ops;
-	vma->vm_private_data = vmapvt;
-error_unlock:
-	mutex_unlock(&ep->rma_info.rma_lock);
-	if (err) {
-		kfree(vmapvt);
-		dev_err(&ep->remote_dev->sdev->dev,
-			"%s %d err %d\n", __func__, __LINE__, err);
-		scif_delete_vma(ep, vma);
-	}
-	return err;
-}
diff --git a/drivers/misc/mic/scif/scif_nm.c b/drivers/misc/mic/scif/scif_nm.c
deleted file mode 100644
index c4d9422082b7..000000000000
--- a/drivers/misc/mic/scif/scif_nm.c
+++ /dev/null
@@ -1,229 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Intel SCIF driver.
- */
-#include "scif_peer_bus.h"
-
-#include "scif_main.h"
-#include "scif_map.h"
-
-/**
- * scif_invalidate_ep() - Set state for all connected endpoints
- * to disconnected and wake up all send/recv waitqueues
- *
- * @node: Node to invalidate
- */
-static void scif_invalidate_ep(int node)
-{
-	struct scif_endpt *ep;
-	struct list_head *pos, *tmpq;
-
-	flush_work(&scif_info.conn_work);
-	mutex_lock(&scif_info.connlock);
-	list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
-		ep = list_entry(pos, struct scif_endpt, list);
-		if (ep->remote_dev->node == node) {
-			scif_unmap_all_windows(ep);
-			spin_lock(&ep->lock);
-			scif_cleanup_ep_qp(ep);
-			spin_unlock(&ep->lock);
-		}
-	}
-	list_for_each_safe(pos, tmpq, &scif_info.connected) {
-		ep = list_entry(pos, struct scif_endpt, list);
-		if (ep->remote_dev->node == node) {
-			list_del(pos);
-			spin_lock(&ep->lock);
-			ep->state = SCIFEP_DISCONNECTED;
-			list_add_tail(&ep->list, &scif_info.disconnected);
-			scif_cleanup_ep_qp(ep);
-			wake_up_interruptible(&ep->sendwq);
-			wake_up_interruptible(&ep->recvwq);
-			spin_unlock(&ep->lock);
-			scif_unmap_all_windows(ep);
-		}
-	}
-	mutex_unlock(&scif_info.connlock);
-}
-
-void scif_free_qp(struct scif_dev *scifdev)
-{
-	struct scif_qp *qp = scifdev->qpairs;
-
-	if (!qp)
-		return;
-	scif_unmap_single(qp->local_buf, scifdev, qp->inbound_q.size);
-	kfree(qp->inbound_q.rb_base);
-	scif_unmap_single(qp->local_qp, scifdev, sizeof(struct scif_qp));
-	kfree(scifdev->qpairs);
-	scifdev->qpairs = NULL;
-}
-
-static void scif_cleanup_qp(struct scif_dev *dev)
-{
-	struct scif_qp *qp = &dev->qpairs[0];
-
-	if (!qp)
-		return;
-	scif_iounmap((void *)qp->remote_qp, sizeof(struct scif_qp), dev);
-	scif_iounmap((void *)qp->outbound_q.rb_base,
-		     sizeof(struct scif_qp), dev);
-	qp->remote_qp = NULL;
-	qp->local_write = 0;
-	qp->inbound_q.current_write_offset = 0;
-	qp->inbound_q.current_read_offset = 0;
-	if (scifdev_is_p2p(dev))
-		scif_free_qp(dev);
-}
-
-void scif_send_acks(struct scif_dev *dev)
-{
-	struct scifmsg msg;
-
-	if (dev->node_remove_ack_pending) {
-		msg.uop = SCIF_NODE_REMOVE_ACK;
-		msg.src.node = scif_info.nodeid;
-		msg.dst.node = SCIF_MGMT_NODE;
-		msg.payload[0] = dev->node;
-		scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], &msg);
-		dev->node_remove_ack_pending = false;
-	}
-	if (dev->exit_ack_pending) {
-		msg.uop = SCIF_EXIT_ACK;
-		msg.src.node = scif_info.nodeid;
-		msg.dst.node = dev->node;
-		scif_nodeqp_send(dev, &msg);
-		dev->exit_ack_pending = false;
-	}
-}
-
-/**
- * scif_cleanup_scifdev - Uninitialize SCIF data structures for remote
- *                        SCIF device.
- * @dev: Remote SCIF device.
- */
-void scif_cleanup_scifdev(struct scif_dev *dev)
-{
-	struct scif_hw_dev *sdev = dev->sdev;
-
-	if (!dev->sdev)
-		return;
-	if (scifdev_is_p2p(dev)) {
-		if (dev->cookie) {
-			sdev->hw_ops->free_irq(sdev, dev->cookie, dev);
-			dev->cookie = NULL;
-		}
-		scif_destroy_intr_wq(dev);
-	}
-	flush_work(&scif_info.misc_work);
-	scif_destroy_p2p(dev);
-	scif_invalidate_ep(dev->node);
-	scif_zap_mmaps(dev->node);
-	scif_cleanup_rma_for_zombies(dev->node);
-	flush_work(&scif_info.misc_work);
-	scif_send_acks(dev);
-	if (!dev->node && scif_info.card_initiated_exit) {
-		/*
-		 * Send an SCIF_EXIT message which is the last message from MIC
-		 * to the Host and wait for a SCIF_EXIT_ACK
-		 */
-		scif_send_exit(dev);
-		scif_info.card_initiated_exit = false;
-	}
-	scif_cleanup_qp(dev);
-}
-
-/**
- * scif_remove_node
- *
- * @node: Node to remove
- */
-void scif_handle_remove_node(int node)
-{
-	struct scif_dev *scifdev = &scif_dev[node];
-
-	if (scif_peer_unregister_device(scifdev))
-		scif_send_acks(scifdev);
-}
-
-static int scif_send_rmnode_msg(int node, int remove_node)
-{
-	struct scifmsg notif_msg;
-	struct scif_dev *dev = &scif_dev[node];
-
-	notif_msg.uop = SCIF_NODE_REMOVE;
-	notif_msg.src.node = scif_info.nodeid;
-	notif_msg.dst.node = node;
-	notif_msg.payload[0] = remove_node;
-	return scif_nodeqp_send(dev, &notif_msg);
-}
-
-/**
- * scif_node_disconnect
- *
- * @node_id: source node id [in]
- * @mgmt_initiated: Disconnection initiated from the mgmt node
- *
- * Disconnect a node from the scif network.
- */
-void scif_disconnect_node(u32 node_id, bool mgmt_initiated)
-{
-	int ret;
-	int msg_cnt = 0;
-	u32 i = 0;
-	struct scif_dev *scifdev = &scif_dev[node_id];
-
-	if (!node_id)
-		return;
-
-	atomic_set(&scifdev->disconn_rescnt, 0);
-
-	/* Destroy p2p network */
-	for (i = 1; i <= scif_info.maxid; i++) {
-		if (i == node_id)
-			continue;
-		ret = scif_send_rmnode_msg(i, node_id);
-		if (!ret)
-			msg_cnt++;
-	}
-	/* Wait for the remote nodes to respond with SCIF_NODE_REMOVE_ACK */
-	ret = wait_event_timeout(scifdev->disconn_wq,
-				 (atomic_read(&scifdev->disconn_rescnt)
-				 == msg_cnt), SCIF_NODE_ALIVE_TIMEOUT);
-	/* Tell the card to clean up */
-	if (mgmt_initiated && _scifdev_alive(scifdev))
-		/*
-		 * Send an SCIF_EXIT message which is the last message from Host
-		 * to the MIC and wait for a SCIF_EXIT_ACK
-		 */
-		scif_send_exit(scifdev);
-	atomic_set(&scifdev->disconn_rescnt, 0);
-	/* Tell the mgmt node to clean up */
-	ret = scif_send_rmnode_msg(SCIF_MGMT_NODE, node_id);
-	if (!ret)
-		/* Wait for mgmt node to respond with SCIF_NODE_REMOVE_ACK */
-		wait_event_timeout(scifdev->disconn_wq,
-				   (atomic_read(&scifdev->disconn_rescnt) == 1),
-				   SCIF_NODE_ALIVE_TIMEOUT);
-}
-
-void scif_get_node_info(void)
-{
-	struct scifmsg msg;
-	DECLARE_COMPLETION_ONSTACK(node_info);
-
-	msg.uop = SCIF_GET_NODE_INFO;
-	msg.src.node = scif_info.nodeid;
-	msg.dst.node = SCIF_MGMT_NODE;
-	msg.payload[3] = (u64)&node_info;
-
-	if ((scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], &msg)))
-		return;
-
-	/* Wait for a response with SCIF_GET_NODE_INFO */
-	wait_for_completion(&node_info);
-}
diff --git a/drivers/misc/mic/scif/scif_nodeqp.c b/drivers/misc/mic/scif/scif_nodeqp.c
deleted file mode 100644
index 384ce08fa98a..000000000000
--- a/drivers/misc/mic/scif/scif_nodeqp.c
+++ /dev/null
@@ -1,1349 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Intel SCIF driver.
- */
-#include "../bus/scif_bus.h"
-#include "scif_peer_bus.h"
-#include "scif_main.h"
-#include "scif_nodeqp.h"
-#include "scif_map.h"
-
-/*
- ************************************************************************
- * SCIF node Queue Pair (QP) setup flow:
- *
- * 1) SCIF driver gets probed with a scif_hw_dev via the scif_hw_bus
- * 2) scif_setup_qp(..) allocates the local qp and calls
- *	scif_setup_qp_connect(..) which allocates and maps the local
- *	buffer for the inbound QP
- * 3) The local node updates the device page with the DMA address of the QP
- * 4) A delayed work is scheduled (qp_dwork) which periodically reads if
- *	the peer node has updated its QP DMA address
- * 5) Once a valid non zero address is found in the QP DMA address field
- *	in the device page, the local node maps the remote node's QP,
- *	updates its outbound QP and sends a SCIF_INIT message to the peer
- * 6) The SCIF_INIT message is received by the peer node QP interrupt bottom
- *	half handler by calling scif_init(..)
- * 7) scif_init(..) registers a new SCIF peer node by calling
- *	scif_peer_register_device(..) which signifies the addition of a new
- *	SCIF node
- * 8) On the mgmt node, P2P network setup/teardown is initiated if all the
- *	remote nodes are online via scif_p2p_setup(..)
- * 9) For P2P setup, the host maps the remote nodes' aperture and memory
- *	bars and sends a SCIF_NODE_ADD message to both nodes
- * 10) As part of scif_nodeadd, both nodes set up their local inbound
- *	QPs and send a SCIF_NODE_ADD_ACK to the mgmt node
- * 11) As part of scif_node_add_ack(..) the mgmt node forwards the
- *	SCIF_NODE_ADD_ACK to the remote nodes
- * 12) As part of scif_node_add_ack(..) the remote nodes update their
- *	outbound QPs, make sure they can access memory on the remote node
- *	and then add a new SCIF peer node by calling
- *	scif_peer_register_device(..) which signifies the addition of a new
- *	SCIF node.
- * 13) The SCIF network is now established across all nodes.
- *
- ************************************************************************
- * SCIF node QP teardown flow (initiated by non mgmt node):
- *
- * 1) SCIF driver gets a remove callback with a scif_hw_dev via the scif_hw_bus
- * 2) The device page QP DMA address field is updated with 0x0
- * 3) A non mgmt node now cleans up all local data structures and sends a
- *	SCIF_EXIT message to the peer and waits for a SCIF_EXIT_ACK
- * 4) As part of scif_exit(..) handling scif_disconnect_node(..) is called
- * 5) scif_disconnect_node(..) sends a SCIF_NODE_REMOVE message to all the
- *	peers and waits for a SCIF_NODE_REMOVE_ACK
- * 6) As part of scif_node_remove(..) a remote node unregisters the peer
- *	node from the SCIF network and sends a SCIF_NODE_REMOVE_ACK
- * 7) When the mgmt node has received all the SCIF_NODE_REMOVE_ACKs
- *	it sends itself a node remove message whose handling cleans up local
- *	data structures and unregisters the peer node from the SCIF network
- * 8) The mgmt node sends a SCIF_EXIT_ACK
- * 9) Upon receipt of the SCIF_EXIT_ACK the node initiating the teardown
- *	completes the SCIF remove routine
- * 10) The SCIF network is now torn down for the node initiating the
- *	teardown sequence
- *
- ************************************************************************
- * SCIF node QP teardown flow (initiated by mgmt node):
- *
- * 1) SCIF driver gets a remove callback with a scif_hw_dev via the scif_hw_bus
- * 2) The device page QP DMA address field is updated with 0x0
- * 3) The mgmt node calls scif_disconnect_node(..)
- * 4) scif_disconnect_node(..) sends a SCIF_NODE_REMOVE message to all the peers
- *	and waits for a SCIF_NODE_REMOVE_ACK
- * 5) As part of scif_node_remove(..) a remote node unregisters the peer
- *	node from the SCIF network and sends a SCIF_NODE_REMOVE_ACK
- * 6) When the mgmt node has received all the SCIF_NODE_REMOVE_ACKs
- *	it unregisters the peer node from the SCIF network
- * 7) The mgmt node sends a SCIF_EXIT message and waits for a SCIF_EXIT_ACK.
- * 8) A non mgmt node upon receipt of a SCIF_EXIT message calls scif_stop(..)
- *	which would clean up local data structures for all SCIF nodes and
- *	then send a SCIF_EXIT_ACK back to the mgmt node
- * 9) Upon receipt of the SCIF_EXIT_ACK the the mgmt node sends itself a node
- *	remove message whose handling cleans up local data structures and
- *	destroys any P2P mappings.
- * 10) The SCIF hardware device for which a remove callback was received is now
- *	disconnected from the SCIF network.
- */
-/*
- * Initializes "local" data structures for the QP. Allocates the QP
- * ring buffer (rb) and initializes the "in bound" queue.
- */
-int scif_setup_qp_connect(struct scif_qp *qp, dma_addr_t *qp_offset,
-			  int local_size, struct scif_dev *scifdev)
-{
-	void *local_q = qp->inbound_q.rb_base;
-	int err = 0;
-	u32 tmp_rd = 0;
-
-	spin_lock_init(&qp->send_lock);
-	spin_lock_init(&qp->recv_lock);
-
-	/* Allocate rb only if not already allocated */
-	if (!local_q) {
-		local_q = kzalloc(local_size, GFP_KERNEL);
-		if (!local_q) {
-			err = -ENOMEM;
-			return err;
-		}
-	}
-
-	err = scif_map_single(&qp->local_buf, local_q, scifdev, local_size);
-	if (err)
-		goto kfree;
-	/*
-	 * To setup the inbound_q, the buffer lives locally, the read pointer
-	 * is remote and the write pointer is local.
-	 */
-	scif_rb_init(&qp->inbound_q,
-		     &tmp_rd,
-		     &qp->local_write,
-		     local_q, get_count_order(local_size));
-	/*
-	 * The read pointer is NULL initially and it is unsafe to use the ring
-	 * buffer til this changes!
-	 */
-	qp->inbound_q.read_ptr = NULL;
-	err = scif_map_single(qp_offset, qp,
-			      scifdev, sizeof(struct scif_qp));
-	if (err)
-		goto unmap;
-	qp->local_qp = *qp_offset;
-	return err;
-unmap:
-	scif_unmap_single(qp->local_buf, scifdev, local_size);
-	qp->local_buf = 0;
-kfree:
-	kfree(local_q);
-	return err;
-}
-
-/* When the other side has already done it's allocation, this is called */
-int scif_setup_qp_accept(struct scif_qp *qp, dma_addr_t *qp_offset,
-			 dma_addr_t phys, int local_size,
-			 struct scif_dev *scifdev)
-{
-	void *local_q;
-	void *remote_q;
-	struct scif_qp *remote_qp;
-	int remote_size;
-	int err = 0;
-
-	spin_lock_init(&qp->send_lock);
-	spin_lock_init(&qp->recv_lock);
-	/* Start by figuring out where we need to point */
-	remote_qp = scif_ioremap(phys, sizeof(struct scif_qp), scifdev);
-	if (!remote_qp)
-		return -EIO;
-	qp->remote_qp = remote_qp;
-	if (qp->remote_qp->magic != SCIFEP_MAGIC) {
-		err = -EIO;
-		goto iounmap;
-	}
-	qp->remote_buf = remote_qp->local_buf;
-	remote_size = qp->remote_qp->inbound_q.size;
-	remote_q = scif_ioremap(qp->remote_buf, remote_size, scifdev);
-	if (!remote_q) {
-		err = -EIO;
-		goto iounmap;
-	}
-	qp->remote_qp->local_write = 0;
-	/*
-	 * To setup the outbound_q, the buffer lives in remote memory,
-	 * the read pointer is local, the write pointer is remote
-	 */
-	scif_rb_init(&qp->outbound_q,
-		     &qp->local_read,
-		     &qp->remote_qp->local_write,
-		     remote_q,
-		     get_count_order(remote_size));
-	local_q = kzalloc(local_size, GFP_KERNEL);
-	if (!local_q) {
-		err = -ENOMEM;
-		goto iounmap_1;
-	}
-	err = scif_map_single(&qp->local_buf, local_q, scifdev, local_size);
-	if (err)
-		goto kfree;
-	qp->remote_qp->local_read = 0;
-	/*
-	 * To setup the inbound_q, the buffer lives locally, the read pointer
-	 * is remote and the write pointer is local
-	 */
-	scif_rb_init(&qp->inbound_q,
-		     &qp->remote_qp->local_read,
-		     &qp->local_write,
-		     local_q, get_count_order(local_size));
-	err = scif_map_single(qp_offset, qp, scifdev,
-			      sizeof(struct scif_qp));
-	if (err)
-		goto unmap;
-	qp->local_qp = *qp_offset;
-	return err;
-unmap:
-	scif_unmap_single(qp->local_buf, scifdev, local_size);
-	qp->local_buf = 0;
-kfree:
-	kfree(local_q);
-iounmap_1:
-	scif_iounmap(remote_q, remote_size, scifdev);
-	qp->outbound_q.rb_base = NULL;
-iounmap:
-	scif_iounmap(qp->remote_qp, sizeof(struct scif_qp), scifdev);
-	qp->remote_qp = NULL;
-	return err;
-}
-
-int scif_setup_qp_connect_response(struct scif_dev *scifdev,
-				   struct scif_qp *qp, u64 payload)
-{
-	int err = 0;
-	void *r_buf;
-	int remote_size;
-	phys_addr_t tmp_phys;
-
-	qp->remote_qp = scif_ioremap(payload, sizeof(struct scif_qp), scifdev);
-
-	if (!qp->remote_qp) {
-		err = -ENOMEM;
-		goto error;
-	}
-
-	if (qp->remote_qp->magic != SCIFEP_MAGIC) {
-		dev_err(&scifdev->sdev->dev,
-			"SCIFEP_MAGIC mismatch between self %d remote %d\n",
-			scif_dev[scif_info.nodeid].node, scifdev->node);
-		err = -ENODEV;
-		goto error;
-	}
-
-	tmp_phys = qp->remote_qp->local_buf;
-	remote_size = qp->remote_qp->inbound_q.size;
-	r_buf = scif_ioremap(tmp_phys, remote_size, scifdev);
-
-	if (!r_buf)
-		return -EIO;
-
-	qp->local_read = 0;
-	scif_rb_init(&qp->outbound_q,
-		     &qp->local_read,
-		     &qp->remote_qp->local_write,
-		     r_buf,
-		     get_count_order(remote_size));
-	/*
-	 * Because the node QP may already be processing an INIT message, set
-	 * the read pointer so the cached read offset isn't lost
-	 */
-	qp->remote_qp->local_read = qp->inbound_q.current_read_offset;
-	/*
-	 * resetup the inbound_q now that we know where the
-	 * inbound_read really is.
-	 */
-	scif_rb_init(&qp->inbound_q,
-		     &qp->remote_qp->local_read,
-		     &qp->local_write,
-		     qp->inbound_q.rb_base,
-		     get_count_order(qp->inbound_q.size));
-error:
-	return err;
-}
-
-static __always_inline void
-scif_send_msg_intr(struct scif_dev *scifdev)
-{
-	struct scif_hw_dev *sdev = scifdev->sdev;
-
-	if (scifdev_is_p2p(scifdev))
-		sdev->hw_ops->send_p2p_intr(sdev, scifdev->rdb, &scifdev->mmio);
-	else
-		sdev->hw_ops->send_intr(sdev, scifdev->rdb);
-}
-
-int scif_qp_response(phys_addr_t phys, struct scif_dev *scifdev)
-{
-	int err = 0;
-	struct scifmsg msg;
-
-	err = scif_setup_qp_connect_response(scifdev, scifdev->qpairs, phys);
-	if (!err) {
-		/*
-		 * Now that everything is setup and mapped, we're ready
-		 * to tell the peer about our queue's location
-		 */
-		msg.uop = SCIF_INIT;
-		msg.dst.node = scifdev->node;
-		err = scif_nodeqp_send(scifdev, &msg);
-	}
-	return err;
-}
-
-void scif_send_exit(struct scif_dev *scifdev)
-{
-	struct scifmsg msg;
-	int ret;
-
-	scifdev->exit = OP_IN_PROGRESS;
-	msg.uop = SCIF_EXIT;
-	msg.src.node = scif_info.nodeid;
-	msg.dst.node = scifdev->node;
-	ret = scif_nodeqp_send(scifdev, &msg);
-	if (ret)
-		goto done;
-	/* Wait for a SCIF_EXIT_ACK message */
-	wait_event_timeout(scif_info.exitwq, scifdev->exit == OP_COMPLETED,
-			   SCIF_NODE_ALIVE_TIMEOUT);
-done:
-	scifdev->exit = OP_IDLE;
-}
-
-int scif_setup_qp(struct scif_dev *scifdev)
-{
-	int err = 0;
-	int local_size;
-	struct scif_qp *qp;
-
-	local_size = SCIF_NODE_QP_SIZE;
-
-	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
-	if (!qp) {
-		err = -ENOMEM;
-		return err;
-	}
-	qp->magic = SCIFEP_MAGIC;
-	scifdev->qpairs = qp;
-	err = scif_setup_qp_connect(qp, &scifdev->qp_dma_addr,
-				    local_size, scifdev);
-	if (err)
-		goto free_qp;
-	/*
-	 * We're as setup as we can be. The inbound_q is setup, w/o a usable
-	 * outbound q.  When we get a message, the read_ptr will be updated,
-	 * and we will pull the message.
-	 */
-	return err;
-free_qp:
-	kfree(scifdev->qpairs);
-	scifdev->qpairs = NULL;
-	return err;
-}
-
-static void scif_p2p_freesg(struct scatterlist *sg)
-{
-	kfree(sg);
-}
-
-static struct scatterlist *
-scif_p2p_setsg(phys_addr_t pa, int page_size, int page_cnt)
-{
-	struct scatterlist *sg;
-	struct page *page;
-	int i;
-
-	sg = kmalloc_array(page_cnt, sizeof(struct scatterlist), GFP_KERNEL);
-	if (!sg)
-		return NULL;
-	sg_init_table(sg, page_cnt);
-	for (i = 0; i < page_cnt; i++) {
-		page = pfn_to_page(pa >> PAGE_SHIFT);
-		sg_set_page(&sg[i], page, page_size, 0);
-		pa += page_size;
-	}
-	return sg;
-}
-
-/* Init p2p mappings required to access peerdev from scifdev */
-static struct scif_p2p_info *
-scif_init_p2p_info(struct scif_dev *scifdev, struct scif_dev *peerdev)
-{
-	struct scif_p2p_info *p2p;
-	int num_mmio_pages, num_aper_pages, sg_page_shift, err, num_aper_chunks;
-	struct scif_hw_dev *psdev = peerdev->sdev;
-	struct scif_hw_dev *sdev = scifdev->sdev;
-
-	num_mmio_pages = psdev->mmio->len >> PAGE_SHIFT;
-	num_aper_pages = psdev->aper->len >> PAGE_SHIFT;
-
-	p2p = kzalloc(sizeof(*p2p), GFP_KERNEL);
-	if (!p2p)
-		return NULL;
-	p2p->ppi_sg[SCIF_PPI_MMIO] = scif_p2p_setsg(psdev->mmio->pa,
-						    PAGE_SIZE, num_mmio_pages);
-	if (!p2p->ppi_sg[SCIF_PPI_MMIO])
-		goto free_p2p;
-	p2p->sg_nentries[SCIF_PPI_MMIO] = num_mmio_pages;
-	sg_page_shift = get_order(min(psdev->aper->len, (u64)(1 << 30)));
-	num_aper_chunks = num_aper_pages >> (sg_page_shift - PAGE_SHIFT);
-	p2p->ppi_sg[SCIF_PPI_APER] = scif_p2p_setsg(psdev->aper->pa,
-						    1 << sg_page_shift,
-						    num_aper_chunks);
-	p2p->sg_nentries[SCIF_PPI_APER] = num_aper_chunks;
-	err = dma_map_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
-			 num_mmio_pages, PCI_DMA_BIDIRECTIONAL);
-	if (err != num_mmio_pages)
-		goto scif_p2p_free;
-	err = dma_map_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_APER],
-			 num_aper_chunks, PCI_DMA_BIDIRECTIONAL);
-	if (err != num_aper_chunks)
-		goto dma_unmap;
-	p2p->ppi_da[SCIF_PPI_MMIO] = sg_dma_address(p2p->ppi_sg[SCIF_PPI_MMIO]);
-	p2p->ppi_da[SCIF_PPI_APER] = sg_dma_address(p2p->ppi_sg[SCIF_PPI_APER]);
-	p2p->ppi_len[SCIF_PPI_MMIO] = num_mmio_pages;
-	p2p->ppi_len[SCIF_PPI_APER] = num_aper_pages;
-	p2p->ppi_peer_id = peerdev->node;
-	return p2p;
-dma_unmap:
-	dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
-		     p2p->sg_nentries[SCIF_PPI_MMIO], DMA_BIDIRECTIONAL);
-scif_p2p_free:
-	scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
-	scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
-free_p2p:
-	kfree(p2p);
-	return NULL;
-}
-
-/* Uninitialize and release resources from a p2p mapping */
-static void scif_deinit_p2p_info(struct scif_dev *scifdev,
-				 struct scif_p2p_info *p2p)
-{
-	struct scif_hw_dev *sdev = scifdev->sdev;
-
-	dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
-		     p2p->sg_nentries[SCIF_PPI_MMIO], DMA_BIDIRECTIONAL);
-	dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_APER],
-		     p2p->sg_nentries[SCIF_PPI_APER], DMA_BIDIRECTIONAL);
-	scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
-	scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
-	kfree(p2p);
-}
-
-/**
- * scif_node_connect: Respond to SCIF_NODE_CONNECT interrupt message
- * @scifdev: SCIF device
- * @dst: Destination node
- *
- * Connect the src and dst node by setting up the p2p connection
- * between them. Management node here acts like a proxy.
- */
-static void scif_node_connect(struct scif_dev *scifdev, int dst)
-{
-	struct scif_dev *dev_j = scifdev;
-	struct scif_dev *dev_i = NULL;
-	struct scif_p2p_info *p2p_ij = NULL;    /* bus addr for j from i */
-	struct scif_p2p_info *p2p_ji = NULL;    /* bus addr for i from j */
-	struct scif_p2p_info *p2p;
-	struct list_head *pos, *tmp;
-	struct scifmsg msg;
-	int err;
-	u64 tmppayload;
-
-	if (dst < 1 || dst > scif_info.maxid)
-		return;
-
-	dev_i = &scif_dev[dst];
-
-	if (!_scifdev_alive(dev_i))
-		return;
-	/*
-	 * If the p2p connection is already setup or in the process of setting
-	 * up then just ignore this request. The requested node will get
-	 * informed by SCIF_NODE_ADD_ACK or SCIF_NODE_ADD_NACK
-	 */
-	if (!list_empty(&dev_i->p2p)) {
-		list_for_each_safe(pos, tmp, &dev_i->p2p) {
-			p2p = list_entry(pos, struct scif_p2p_info, ppi_list);
-			if (p2p->ppi_peer_id == dev_j->node)
-				return;
-		}
-	}
-	p2p_ij = scif_init_p2p_info(dev_i, dev_j);
-	if (!p2p_ij)
-		return;
-	p2p_ji = scif_init_p2p_info(dev_j, dev_i);
-	if (!p2p_ji) {
-		scif_deinit_p2p_info(dev_i, p2p_ij);
-		return;
-	}
-	list_add_tail(&p2p_ij->ppi_list, &dev_i->p2p);
-	list_add_tail(&p2p_ji->ppi_list, &dev_j->p2p);
-
-	/*
-	 * Send a SCIF_NODE_ADD to dev_i, pass it its bus address
-	 * as seen from dev_j
-	 */
-	msg.uop = SCIF_NODE_ADD;
-	msg.src.node = dev_j->node;
-	msg.dst.node = dev_i->node;
-
-	msg.payload[0] = p2p_ji->ppi_da[SCIF_PPI_APER];
-	msg.payload[1] = p2p_ij->ppi_da[SCIF_PPI_MMIO];
-	msg.payload[2] = p2p_ij->ppi_da[SCIF_PPI_APER];
-	msg.payload[3] = p2p_ij->ppi_len[SCIF_PPI_APER] << PAGE_SHIFT;
-
-	err = scif_nodeqp_send(dev_i,  &msg);
-	if (err) {
-		dev_err(&scifdev->sdev->dev,
-			"%s %d error %d\n", __func__, __LINE__, err);
-		return;
-	}
-
-	/* Same as above but to dev_j */
-	msg.uop = SCIF_NODE_ADD;
-	msg.src.node = dev_i->node;
-	msg.dst.node = dev_j->node;
-
-	tmppayload = msg.payload[0];
-	msg.payload[0] = msg.payload[2];
-	msg.payload[2] = tmppayload;
-	msg.payload[1] = p2p_ji->ppi_da[SCIF_PPI_MMIO];
-	msg.payload[3] = p2p_ji->ppi_len[SCIF_PPI_APER] << PAGE_SHIFT;
-
-	scif_nodeqp_send(dev_j, &msg);
-}
-
-static void scif_p2p_setup(void)
-{
-	int i, j;
-
-	if (!scif_info.p2p_enable)
-		return;
-
-	for (i = 1; i <= scif_info.maxid; i++)
-		if (!_scifdev_alive(&scif_dev[i]))
-			return;
-
-	for (i = 1; i <= scif_info.maxid; i++) {
-		for (j = 1; j <= scif_info.maxid; j++) {
-			struct scif_dev *scifdev = &scif_dev[i];
-
-			if (i == j)
-				continue;
-			scif_node_connect(scifdev, j);
-		}
-	}
-}
-
-static char *message_types[] = {"BAD",
-				"INIT",
-				"EXIT",
-				"SCIF_EXIT_ACK",
-				"SCIF_NODE_ADD",
-				"SCIF_NODE_ADD_ACK",
-				"SCIF_NODE_ADD_NACK",
-				"REMOVE_NODE",
-				"REMOVE_NODE_ACK",
-				"CNCT_REQ",
-				"CNCT_GNT",
-				"CNCT_GNTACK",
-				"CNCT_GNTNACK",
-				"CNCT_REJ",
-				"DISCNCT",
-				"DISCNT_ACK",
-				"CLIENT_SENT",
-				"CLIENT_RCVD",
-				"SCIF_GET_NODE_INFO",
-				"REGISTER",
-				"REGISTER_ACK",
-				"REGISTER_NACK",
-				"UNREGISTER",
-				"UNREGISTER_ACK",
-				"UNREGISTER_NACK",
-				"ALLOC_REQ",
-				"ALLOC_GNT",
-				"ALLOC_REJ",
-				"FREE_PHYS",
-				"FREE_VIRT",
-				"MUNMAP",
-				"MARK",
-				"MARK_ACK",
-				"MARK_NACK",
-				"WAIT",
-				"WAIT_ACK",
-				"WAIT_NACK",
-				"SIGNAL_LOCAL",
-				"SIGNAL_REMOTE",
-				"SIG_ACK",
-				"SIG_NACK"};
-
-static void
-scif_display_message(struct scif_dev *scifdev, struct scifmsg *msg,
-		     const char *label)
-{
-	if (!scif_info.en_msg_log)
-		return;
-	if (msg->uop > SCIF_MAX_MSG) {
-		dev_err(&scifdev->sdev->dev,
-			"%s: unknown msg type %d\n", label, msg->uop);
-		return;
-	}
-	dev_info(&scifdev->sdev->dev,
-		 "%s: msg type %s, src %d:%d, dest %d:%d payload 0x%llx:0x%llx:0x%llx:0x%llx\n",
-		 label, message_types[msg->uop], msg->src.node, msg->src.port,
-		 msg->dst.node, msg->dst.port, msg->payload[0], msg->payload[1],
-		 msg->payload[2], msg->payload[3]);
-}
-
-int _scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_qp *qp = scifdev->qpairs;
-	int err = -ENOMEM, loop_cnt = 0;
-
-	scif_display_message(scifdev, msg, "Sent");
-	if (!qp) {
-		err = -EINVAL;
-		goto error;
-	}
-	spin_lock(&qp->send_lock);
-
-	while ((err = scif_rb_write(&qp->outbound_q,
-				    msg, sizeof(struct scifmsg)))) {
-		mdelay(1);
-#define SCIF_NODEQP_SEND_TO_MSEC (3 * 1000)
-		if (loop_cnt++ > (SCIF_NODEQP_SEND_TO_MSEC)) {
-			err = -ENODEV;
-			break;
-		}
-	}
-	if (!err)
-		scif_rb_commit(&qp->outbound_q);
-	spin_unlock(&qp->send_lock);
-	if (!err) {
-		if (scifdev_self(scifdev))
-			/*
-			 * For loopback we need to emulate an interrupt by
-			 * queuing work for the queue handling real node
-			 * Qp interrupts.
-			 */
-			queue_work(scifdev->intr_wq, &scifdev->intr_bh);
-		else
-			scif_send_msg_intr(scifdev);
-	}
-error:
-	if (err)
-		dev_dbg(&scifdev->sdev->dev,
-			"%s %d error %d uop %d\n",
-			 __func__, __LINE__, err, msg->uop);
-	return err;
-}
-
-/**
- * scif_nodeqp_send - Send a message on the node queue pair
- * @scifdev: Scif Device.
- * @msg: The message to be sent.
- */
-int scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	int err;
-	struct device *spdev = NULL;
-
-	if (msg->uop > SCIF_EXIT_ACK) {
-		/* Don't send messages once the exit flow has begun */
-		if (OP_IDLE != scifdev->exit)
-			return -ENODEV;
-		spdev = scif_get_peer_dev(scifdev);
-		if (IS_ERR(spdev)) {
-			err = PTR_ERR(spdev);
-			return err;
-		}
-	}
-	err = _scif_nodeqp_send(scifdev, msg);
-	if (msg->uop > SCIF_EXIT_ACK)
-		scif_put_peer_dev(spdev);
-	return err;
-}
-
-/*
- * scif_misc_handler:
- *
- * Work queue handler for servicing miscellaneous SCIF tasks.
- * Examples include:
- * 1) Remote fence requests.
- * 2) Destruction of temporary registered windows
- *    created during scif_vreadfrom()/scif_vwriteto().
- * 3) Cleanup of zombie endpoints.
- */
-void scif_misc_handler(struct work_struct *work)
-{
-	scif_rma_handle_remote_fences();
-	scif_rma_destroy_windows();
-	scif_rma_destroy_tcw_invalid();
-	scif_cleanup_zombie_epd();
-}
-
-/**
- * scif_init() - Respond to SCIF_INIT interrupt message
- * @scifdev:    Remote SCIF device node
- * @msg:        Interrupt message
- */
-static __always_inline void
-scif_init(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	/*
-	 * Allow the thread waiting for device page updates for the peer QP DMA
-	 * address to complete initializing the inbound_q.
-	 */
-	flush_delayed_work(&scifdev->qp_dwork);
-
-	scif_peer_register_device(scifdev);
-
-	if (scif_is_mgmt_node()) {
-		mutex_lock(&scif_info.conflock);
-		scif_p2p_setup();
-		mutex_unlock(&scif_info.conflock);
-	}
-}
-
-/**
- * scif_exit() - Respond to SCIF_EXIT interrupt message
- * @scifdev:    Remote SCIF device node
- * @unused:     Interrupt message (unused)
- *
- * This function stops the SCIF interface for the node which sent
- * the SCIF_EXIT message and starts waiting for that node to
- * resetup the queue pair again.
- */
-static __always_inline void
-scif_exit(struct scif_dev *scifdev, struct scifmsg *unused)
-{
-	scifdev->exit_ack_pending = true;
-	if (scif_is_mgmt_node())
-		scif_disconnect_node(scifdev->node, false);
-	else
-		scif_stop(scifdev);
-	schedule_delayed_work(&scifdev->qp_dwork,
-			      msecs_to_jiffies(1000));
-}
-
-/**
- * scif_exitack() - Respond to SCIF_EXIT_ACK interrupt message
- * @scifdev:    Remote SCIF device node
- * @unused:     Interrupt message (unused)
- *
- */
-static __always_inline void
-scif_exit_ack(struct scif_dev *scifdev, struct scifmsg *unused)
-{
-	scifdev->exit = OP_COMPLETED;
-	wake_up(&scif_info.exitwq);
-}
-
-/**
- * scif_node_add() - Respond to SCIF_NODE_ADD interrupt message
- * @scifdev:    Remote SCIF device node
- * @msg:        Interrupt message
- *
- * When the mgmt node driver has finished initializing a MIC node queue pair it
- * marks the node as online. It then looks for all currently online MIC cards
- * and send a SCIF_NODE_ADD message to identify the ID of the new card for
- * peer to peer initialization
- *
- * The local node allocates its incoming queue and sends its address in the
- * SCIF_NODE_ADD_ACK message back to the mgmt node, the mgmt node "reflects"
- * this message to the new node
- */
-static __always_inline void
-scif_node_add(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_dev *newdev;
-	dma_addr_t qp_offset;
-	int qp_connect;
-	struct scif_hw_dev *sdev;
-
-	dev_dbg(&scifdev->sdev->dev,
-		"Scifdev %d:%d received NODE_ADD msg for node %d\n",
-		scifdev->node, msg->dst.node, msg->src.node);
-	dev_dbg(&scifdev->sdev->dev,
-		"Remote address for this node's aperture %llx\n",
-		msg->payload[0]);
-	newdev = &scif_dev[msg->src.node];
-	newdev->node = msg->src.node;
-	newdev->sdev = scif_dev[SCIF_MGMT_NODE].sdev;
-	sdev = newdev->sdev;
-
-	if (scif_setup_intr_wq(newdev)) {
-		dev_err(&scifdev->sdev->dev,
-			"failed to setup interrupts for %d\n", msg->src.node);
-		goto interrupt_setup_error;
-	}
-	newdev->mmio.va = ioremap(msg->payload[1], sdev->mmio->len);
-	if (!newdev->mmio.va) {
-		dev_err(&scifdev->sdev->dev,
-			"failed to map mmio for %d\n", msg->src.node);
-		goto mmio_map_error;
-	}
-	newdev->qpairs = kzalloc(sizeof(*newdev->qpairs), GFP_KERNEL);
-	if (!newdev->qpairs)
-		goto qp_alloc_error;
-	/*
-	 * Set the base address of the remote node's memory since it gets
-	 * added to qp_offset
-	 */
-	newdev->base_addr = msg->payload[0];
-
-	qp_connect = scif_setup_qp_connect(newdev->qpairs, &qp_offset,
-					   SCIF_NODE_QP_SIZE, newdev);
-	if (qp_connect) {
-		dev_err(&scifdev->sdev->dev,
-			"failed to setup qp_connect %d\n", qp_connect);
-		goto qp_connect_error;
-	}
-
-	newdev->db = sdev->hw_ops->next_db(sdev);
-	newdev->cookie = sdev->hw_ops->request_irq(sdev, scif_intr_handler,
-						   "SCIF_INTR", newdev,
-						   newdev->db);
-	if (IS_ERR(newdev->cookie))
-		goto qp_connect_error;
-	newdev->qpairs->magic = SCIFEP_MAGIC;
-	newdev->qpairs->qp_state = SCIF_QP_OFFLINE;
-
-	msg->uop = SCIF_NODE_ADD_ACK;
-	msg->dst.node = msg->src.node;
-	msg->src.node = scif_info.nodeid;
-	msg->payload[0] = qp_offset;
-	msg->payload[2] = newdev->db;
-	scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], msg);
-	return;
-qp_connect_error:
-	kfree(newdev->qpairs);
-	newdev->qpairs = NULL;
-qp_alloc_error:
-	iounmap(newdev->mmio.va);
-	newdev->mmio.va = NULL;
-mmio_map_error:
-interrupt_setup_error:
-	dev_err(&scifdev->sdev->dev,
-		"node add failed for node %d\n", msg->src.node);
-	msg->uop = SCIF_NODE_ADD_NACK;
-	msg->dst.node = msg->src.node;
-	msg->src.node = scif_info.nodeid;
-	scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], msg);
-}
-
-void scif_poll_qp_state(struct work_struct *work)
-{
-#define SCIF_NODE_QP_RETRY 100
-#define SCIF_NODE_QP_TIMEOUT 100
-	struct scif_dev *peerdev = container_of(work, struct scif_dev,
-							p2p_dwork.work);
-	struct scif_qp *qp = &peerdev->qpairs[0];
-
-	if (qp->qp_state != SCIF_QP_ONLINE ||
-	    qp->remote_qp->qp_state != SCIF_QP_ONLINE) {
-		if (peerdev->p2p_retry++ == SCIF_NODE_QP_RETRY) {
-			dev_err(&peerdev->sdev->dev,
-				"Warning: QP check timeout with state %d\n",
-				qp->qp_state);
-			goto timeout;
-		}
-		schedule_delayed_work(&peerdev->p2p_dwork,
-				      msecs_to_jiffies(SCIF_NODE_QP_TIMEOUT));
-		return;
-	}
-	return;
-timeout:
-	dev_err(&peerdev->sdev->dev,
-		"%s %d remote node %d offline,  state = 0x%x\n",
-		__func__, __LINE__, peerdev->node, qp->qp_state);
-	qp->remote_qp->qp_state = SCIF_QP_OFFLINE;
-	scif_peer_unregister_device(peerdev);
-	scif_cleanup_scifdev(peerdev);
-}
-
-/**
- * scif_node_add_ack() - Respond to SCIF_NODE_ADD_ACK interrupt message
- * @scifdev:    Remote SCIF device node
- * @msg:        Interrupt message
- *
- * After a MIC node receives the SCIF_NODE_ADD_ACK message it send this
- * message to the mgmt node to confirm the sequence is finished.
- *
- */
-static __always_inline void
-scif_node_add_ack(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_dev *peerdev;
-	struct scif_qp *qp;
-	struct scif_dev *dst_dev = &scif_dev[msg->dst.node];
-
-	dev_dbg(&scifdev->sdev->dev,
-		"Scifdev %d received SCIF_NODE_ADD_ACK msg src %d dst %d\n",
-		scifdev->node, msg->src.node, msg->dst.node);
-	dev_dbg(&scifdev->sdev->dev,
-		"payload %llx %llx %llx %llx\n", msg->payload[0],
-		msg->payload[1], msg->payload[2], msg->payload[3]);
-	if (scif_is_mgmt_node()) {
-		/*
-		 * the lock serializes with scif_qp_response_ack. The mgmt node
-		 * is forwarding the NODE_ADD_ACK message from src to dst we
-		 * need to make sure that the dst has already received a
-		 * NODE_ADD for src and setup its end of the qp to dst
-		 */
-		mutex_lock(&scif_info.conflock);
-		msg->payload[1] = scif_info.maxid;
-		scif_nodeqp_send(dst_dev, msg);
-		mutex_unlock(&scif_info.conflock);
-		return;
-	}
-	peerdev = &scif_dev[msg->src.node];
-	peerdev->sdev = scif_dev[SCIF_MGMT_NODE].sdev;
-	peerdev->node = msg->src.node;
-
-	qp = &peerdev->qpairs[0];
-
-	if ((scif_setup_qp_connect_response(peerdev, &peerdev->qpairs[0],
-					    msg->payload[0])))
-		goto local_error;
-	peerdev->rdb = msg->payload[2];
-	qp->remote_qp->qp_state = SCIF_QP_ONLINE;
-
-	scif_peer_register_device(peerdev);
-
-	schedule_delayed_work(&peerdev->p2p_dwork, 0);
-	return;
-local_error:
-	scif_cleanup_scifdev(peerdev);
-}
-
-/**
- * scif_node_add_nack: Respond to SCIF_NODE_ADD_NACK interrupt message
- * @scifdev:    Remote SCIF device node
- * @msg:        Interrupt message
- *
- * SCIF_NODE_ADD failed, so inform the waiting wq.
- */
-static __always_inline void
-scif_node_add_nack(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	if (scif_is_mgmt_node()) {
-		struct scif_dev *dst_dev = &scif_dev[msg->dst.node];
-
-		dev_dbg(&scifdev->sdev->dev,
-			"SCIF_NODE_ADD_NACK received from %d\n", scifdev->node);
-		scif_nodeqp_send(dst_dev, msg);
-	}
-}
-
-/**
- * scif_node_remove: Handle SCIF_NODE_REMOVE message
- * @scifdev:    Remote SCIF device node
- * @msg: Interrupt message
- *
- * Handle node removal.
- */
-static __always_inline void
-scif_node_remove(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	int node = msg->payload[0];
-	struct scif_dev *scdev = &scif_dev[node];
-
-	scdev->node_remove_ack_pending = true;
-	scif_handle_remove_node(node);
-}
-
-/**
- * scif_node_remove_ack: Handle SCIF_NODE_REMOVE_ACK message
- * @scifdev:    Remote SCIF device node
- * @msg: Interrupt message
- *
- * The peer has acked a SCIF_NODE_REMOVE message.
- */
-static __always_inline void
-scif_node_remove_ack(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_dev *sdev = &scif_dev[msg->payload[0]];
-
-	atomic_inc(&sdev->disconn_rescnt);
-	wake_up(&sdev->disconn_wq);
-}
-
-/**
- * scif_get_node_info: Respond to SCIF_GET_NODE_INFO interrupt message
- * @scifdev:    Remote SCIF device node
- * @msg:        Interrupt message
- *
- * Retrieve node info i.e maxid and total from the mgmt node.
- */
-static __always_inline void
-scif_get_node_info_resp(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	if (scif_is_mgmt_node()) {
-		swap(msg->dst.node, msg->src.node);
-		mutex_lock(&scif_info.conflock);
-		msg->payload[1] = scif_info.maxid;
-		msg->payload[2] = scif_info.total;
-		mutex_unlock(&scif_info.conflock);
-		scif_nodeqp_send(scifdev, msg);
-	} else {
-		struct completion *node_info =
-			(struct completion *)msg->payload[3];
-
-		mutex_lock(&scif_info.conflock);
-		scif_info.maxid = msg->payload[1];
-		scif_info.total = msg->payload[2];
-		complete_all(node_info);
-		mutex_unlock(&scif_info.conflock);
-	}
-}
-
-static void
-scif_msg_unknown(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	/* Bogus Node Qp Message? */
-	dev_err(&scifdev->sdev->dev,
-		"Unknown message 0x%xn scifdev->node 0x%x\n",
-		msg->uop, scifdev->node);
-}
-
-static void (*scif_intr_func[SCIF_MAX_MSG + 1])
-	    (struct scif_dev *, struct scifmsg *msg) = {
-	scif_msg_unknown,	/* Error */
-	scif_init,		/* SCIF_INIT */
-	scif_exit,		/* SCIF_EXIT */
-	scif_exit_ack,		/* SCIF_EXIT_ACK */
-	scif_node_add,		/* SCIF_NODE_ADD */
-	scif_node_add_ack,	/* SCIF_NODE_ADD_ACK */
-	scif_node_add_nack,	/* SCIF_NODE_ADD_NACK */
-	scif_node_remove,	/* SCIF_NODE_REMOVE */
-	scif_node_remove_ack,	/* SCIF_NODE_REMOVE_ACK */
-	scif_cnctreq,		/* SCIF_CNCT_REQ */
-	scif_cnctgnt,		/* SCIF_CNCT_GNT */
-	scif_cnctgnt_ack,	/* SCIF_CNCT_GNTACK */
-	scif_cnctgnt_nack,	/* SCIF_CNCT_GNTNACK */
-	scif_cnctrej,		/* SCIF_CNCT_REJ */
-	scif_discnct,		/* SCIF_DISCNCT */
-	scif_discnt_ack,	/* SCIF_DISCNT_ACK */
-	scif_clientsend,	/* SCIF_CLIENT_SENT */
-	scif_clientrcvd,	/* SCIF_CLIENT_RCVD */
-	scif_get_node_info_resp,/* SCIF_GET_NODE_INFO */
-	scif_recv_reg,		/* SCIF_REGISTER */
-	scif_recv_reg_ack,	/* SCIF_REGISTER_ACK */
-	scif_recv_reg_nack,	/* SCIF_REGISTER_NACK */
-	scif_recv_unreg,	/* SCIF_UNREGISTER */
-	scif_recv_unreg_ack,	/* SCIF_UNREGISTER_ACK */
-	scif_recv_unreg_nack,	/* SCIF_UNREGISTER_NACK */
-	scif_alloc_req,		/* SCIF_ALLOC_REQ */
-	scif_alloc_gnt_rej,	/* SCIF_ALLOC_GNT */
-	scif_alloc_gnt_rej,	/* SCIF_ALLOC_REJ */
-	scif_free_virt,		/* SCIF_FREE_VIRT */
-	scif_recv_munmap,	/* SCIF_MUNMAP */
-	scif_recv_mark,		/* SCIF_MARK */
-	scif_recv_mark_resp,	/* SCIF_MARK_ACK */
-	scif_recv_mark_resp,	/* SCIF_MARK_NACK */
-	scif_recv_wait,		/* SCIF_WAIT */
-	scif_recv_wait_resp,	/* SCIF_WAIT_ACK */
-	scif_recv_wait_resp,	/* SCIF_WAIT_NACK */
-	scif_recv_sig_local,	/* SCIF_SIG_LOCAL */
-	scif_recv_sig_remote,	/* SCIF_SIG_REMOTE */
-	scif_recv_sig_resp,	/* SCIF_SIG_ACK */
-	scif_recv_sig_resp,	/* SCIF_SIG_NACK */
-};
-
-static int scif_max_msg_id = SCIF_MAX_MSG;
-/**
- * scif_nodeqp_msg_handler() - Common handler for node messages
- * @scifdev: Remote device to respond to
- * @qp: Remote memory pointer
- * @msg: The message to be handled.
- *
- * This routine calls the appropriate routine to handle a Node Qp
- * message receipt
- */
-static void
-scif_nodeqp_msg_handler(struct scif_dev *scifdev,
-			struct scif_qp *qp, struct scifmsg *msg)
-{
-	scif_display_message(scifdev, msg, "Rcvd");
-
-	if (msg->uop > (u32)scif_max_msg_id) {
-		/* Bogus Node Qp Message? */
-		dev_err(&scifdev->sdev->dev,
-			"Unknown message 0x%xn scifdev->node 0x%x\n",
-			msg->uop, scifdev->node);
-		return;
-	}
-
-	scif_intr_func[msg->uop](scifdev, msg);
-}
-
-/**
- * scif_nodeqp_intrhandler() - Interrupt handler for node messages
- * @scifdev:    Remote device to respond to
- * @qp:         Remote memory pointer
- *
- * This routine is triggered by the interrupt mechanism.  It reads
- * messages from the node queue RB and calls the Node QP Message handling
- * routine.
- */
-void scif_nodeqp_intrhandler(struct scif_dev *scifdev, struct scif_qp *qp)
-{
-	struct scifmsg msg;
-	int read_size;
-
-	do {
-		read_size = scif_rb_get_next(&qp->inbound_q, &msg, sizeof(msg));
-		if (!read_size)
-			break;
-		scif_nodeqp_msg_handler(scifdev, qp, &msg);
-		/*
-		 * The node queue pair is unmapped so skip the read pointer
-		 * update after receipt of a SCIF_EXIT_ACK
-		 */
-		if (SCIF_EXIT_ACK == msg.uop)
-			break;
-		scif_rb_update_read_ptr(&qp->inbound_q);
-	} while (1);
-}
-
-/**
- * scif_loopb_wq_handler - Loopback Workqueue Handler.
- * @unused: loop back work (unused)
- *
- * This work queue routine is invoked by the loopback work queue handler.
- * It grabs the recv lock, dequeues any available messages from the head
- * of the loopback message list, calls the node QP message handler,
- * waits for it to return, then frees up this message and dequeues more
- * elements of the list if available.
- */
-static void scif_loopb_wq_handler(struct work_struct *unused)
-{
-	struct scif_dev *scifdev = scif_info.loopb_dev;
-	struct scif_qp *qp = scifdev->qpairs;
-	struct scif_loopb_msg *msg;
-
-	do {
-		msg = NULL;
-		spin_lock(&qp->recv_lock);
-		if (!list_empty(&scif_info.loopb_recv_q)) {
-			msg = list_first_entry(&scif_info.loopb_recv_q,
-					       struct scif_loopb_msg,
-					       list);
-			list_del(&msg->list);
-		}
-		spin_unlock(&qp->recv_lock);
-
-		if (msg) {
-			scif_nodeqp_msg_handler(scifdev, qp, &msg->msg);
-			kfree(msg);
-		}
-	} while (msg);
-}
-
-/**
- * scif_loopb_msg_handler() - Workqueue handler for loopback messages.
- * @scifdev: SCIF device
- * @qp: Queue pair.
- *
- * This work queue routine is triggered when a loopback message is received.
- *
- * We need special handling for receiving Node Qp messages on a loopback SCIF
- * device via two workqueues for receiving messages.
- *
- * The reason we need the extra workqueue which is not required with *normal*
- * non-loopback SCIF devices is the potential classic deadlock described below:
- *
- * Thread A tries to send a message on a loopback SCIF device and blocks since
- * there is no space in the RB while it has the send_lock held or another
- * lock called lock X for example.
- *
- * Thread B: The Loopback Node QP message receive workqueue receives the message
- * and tries to send a message (eg an ACK) to the loopback SCIF device. It tries
- * to grab the send lock again or lock X and deadlocks with Thread A. The RB
- * cannot be drained any further due to this classic deadlock.
- *
- * In order to avoid deadlocks as mentioned above we have an extra level of
- * indirection achieved by having two workqueues.
- * 1) The first workqueue whose handler is scif_loopb_msg_handler reads
- * messages from the Node QP RB, adds them to a list and queues work for the
- * second workqueue.
- *
- * 2) The second workqueue whose handler is scif_loopb_wq_handler dequeues
- * messages from the list, handles them, frees up the memory and dequeues
- * more elements from the list if possible.
- */
-int
-scif_loopb_msg_handler(struct scif_dev *scifdev, struct scif_qp *qp)
-{
-	int read_size;
-	struct scif_loopb_msg *msg;
-
-	do {
-		msg = kmalloc(sizeof(*msg), GFP_KERNEL);
-		if (!msg)
-			return -ENOMEM;
-		read_size = scif_rb_get_next(&qp->inbound_q, &msg->msg,
-					     sizeof(struct scifmsg));
-		if (read_size != sizeof(struct scifmsg)) {
-			kfree(msg);
-			scif_rb_update_read_ptr(&qp->inbound_q);
-			break;
-		}
-		spin_lock(&qp->recv_lock);
-		list_add_tail(&msg->list, &scif_info.loopb_recv_q);
-		spin_unlock(&qp->recv_lock);
-		queue_work(scif_info.loopb_wq, &scif_info.loopb_work);
-		scif_rb_update_read_ptr(&qp->inbound_q);
-	} while (read_size == sizeof(struct scifmsg));
-	return read_size;
-}
-
-/**
- * scif_setup_loopback_qp - One time setup work for Loopback Node Qp.
- * @scifdev: SCIF device
- *
- * Sets up the required loopback workqueues, queue pairs and ring buffers
- */
-int scif_setup_loopback_qp(struct scif_dev *scifdev)
-{
-	int err = 0;
-	void *local_q;
-	struct scif_qp *qp;
-
-	err = scif_setup_intr_wq(scifdev);
-	if (err)
-		goto exit;
-	INIT_LIST_HEAD(&scif_info.loopb_recv_q);
-	snprintf(scif_info.loopb_wqname, sizeof(scif_info.loopb_wqname),
-		 "SCIF LOOPB %d", scifdev->node);
-	scif_info.loopb_wq =
-		alloc_ordered_workqueue(scif_info.loopb_wqname, 0);
-	if (!scif_info.loopb_wq) {
-		err = -ENOMEM;
-		goto destroy_intr;
-	}
-	INIT_WORK(&scif_info.loopb_work, scif_loopb_wq_handler);
-	/* Allocate Self Qpair */
-	scifdev->qpairs = kzalloc(sizeof(*scifdev->qpairs), GFP_KERNEL);
-	if (!scifdev->qpairs) {
-		err = -ENOMEM;
-		goto destroy_loopb_wq;
-	}
-
-	qp = scifdev->qpairs;
-	qp->magic = SCIFEP_MAGIC;
-	spin_lock_init(&qp->send_lock);
-	spin_lock_init(&qp->recv_lock);
-
-	local_q = kzalloc(SCIF_NODE_QP_SIZE, GFP_KERNEL);
-	if (!local_q) {
-		err = -ENOMEM;
-		goto free_qpairs;
-	}
-	/*
-	 * For loopback the inbound_q and outbound_q are essentially the same
-	 * since the Node sends a message on the loopback interface to the
-	 * outbound_q which is then received on the inbound_q.
-	 */
-	scif_rb_init(&qp->outbound_q,
-		     &qp->local_read,
-		     &qp->local_write,
-		     local_q, get_count_order(SCIF_NODE_QP_SIZE));
-
-	scif_rb_init(&qp->inbound_q,
-		     &qp->local_read,
-		     &qp->local_write,
-		     local_q, get_count_order(SCIF_NODE_QP_SIZE));
-	scif_info.nodeid = scifdev->node;
-
-	scif_peer_register_device(scifdev);
-
-	scif_info.loopb_dev = scifdev;
-	return err;
-free_qpairs:
-	kfree(scifdev->qpairs);
-destroy_loopb_wq:
-	destroy_workqueue(scif_info.loopb_wq);
-destroy_intr:
-	scif_destroy_intr_wq(scifdev);
-exit:
-	return err;
-}
-
-/**
- * scif_destroy_loopback_qp - One time uninit work for Loopback Node Qp
- * @scifdev: SCIF device
- *
- * Destroys the workqueues and frees up the Ring Buffer and Queue Pair memory.
- */
-int scif_destroy_loopback_qp(struct scif_dev *scifdev)
-{
-	scif_peer_unregister_device(scifdev);
-	destroy_workqueue(scif_info.loopb_wq);
-	scif_destroy_intr_wq(scifdev);
-	kfree(scifdev->qpairs->outbound_q.rb_base);
-	kfree(scifdev->qpairs);
-	scifdev->sdev = NULL;
-	scif_info.loopb_dev = NULL;
-	return 0;
-}
-
-void scif_destroy_p2p(struct scif_dev *scifdev)
-{
-	struct scif_dev *peer_dev;
-	struct scif_p2p_info *p2p;
-	struct list_head *pos, *tmp;
-	int bd;
-
-	mutex_lock(&scif_info.conflock);
-	/* Free P2P mappings in the given node for all its peer nodes */
-	list_for_each_safe(pos, tmp, &scifdev->p2p) {
-		p2p = list_entry(pos, struct scif_p2p_info, ppi_list);
-		dma_unmap_sg(&scifdev->sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
-			     p2p->sg_nentries[SCIF_PPI_MMIO],
-			     DMA_BIDIRECTIONAL);
-		dma_unmap_sg(&scifdev->sdev->dev, p2p->ppi_sg[SCIF_PPI_APER],
-			     p2p->sg_nentries[SCIF_PPI_APER],
-			     DMA_BIDIRECTIONAL);
-		scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
-		scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
-		list_del(pos);
-		kfree(p2p);
-	}
-
-	/* Free P2P mapping created in the peer nodes for the given node */
-	for (bd = SCIF_MGMT_NODE + 1; bd <= scif_info.maxid; bd++) {
-		peer_dev = &scif_dev[bd];
-		list_for_each_safe(pos, tmp, &peer_dev->p2p) {
-			p2p = list_entry(pos, struct scif_p2p_info, ppi_list);
-			if (p2p->ppi_peer_id == scifdev->node) {
-				dma_unmap_sg(&peer_dev->sdev->dev,
-					     p2p->ppi_sg[SCIF_PPI_MMIO],
-					     p2p->sg_nentries[SCIF_PPI_MMIO],
-					     DMA_BIDIRECTIONAL);
-				dma_unmap_sg(&peer_dev->sdev->dev,
-					     p2p->ppi_sg[SCIF_PPI_APER],
-					     p2p->sg_nentries[SCIF_PPI_APER],
-					     DMA_BIDIRECTIONAL);
-				scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
-				scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
-				list_del(pos);
-				kfree(p2p);
-			}
-		}
-	}
-	mutex_unlock(&scif_info.conflock);
-}
diff --git a/drivers/misc/mic/scif/scif_nodeqp.h b/drivers/misc/mic/scif/scif_nodeqp.h
deleted file mode 100644
index 95896273138e..000000000000
--- a/drivers/misc/mic/scif/scif_nodeqp.h
+++ /dev/null
@@ -1,221 +0,0 @@
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in
- *   the documentation and/or other materials provided with the
- *   distribution.
- * * Neither the name of Intel Corporation nor the names of its
- *   contributors may be used to endorse or promote products derived
- *   from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Intel SCIF driver.
- *
- */
-#ifndef SCIF_NODEQP
-#define SCIF_NODEQP
-
-#include "scif_rb.h"
-#include "scif_peer_bus.h"
-
-#define SCIF_INIT 1  /* First message sent to the peer node for discovery */
-#define SCIF_EXIT 2  /* Last message from the peer informing intent to exit */
-#define SCIF_EXIT_ACK 3 /* Response to SCIF_EXIT message */
-#define SCIF_NODE_ADD 4  /* Tell Online nodes a new node exits */
-#define SCIF_NODE_ADD_ACK 5  /* Confirm to mgmt node sequence is finished */
-#define SCIF_NODE_ADD_NACK 6 /* SCIF_NODE_ADD failed */
-#define SCIF_NODE_REMOVE 7 /* Request to deactivate a SCIF node */
-#define SCIF_NODE_REMOVE_ACK 8 /* Response to a SCIF_NODE_REMOVE message */
-#define SCIF_CNCT_REQ 9  /* Phys addr of Request connection to a port */
-#define SCIF_CNCT_GNT 10  /* Phys addr of new Grant connection request */
-#define SCIF_CNCT_GNTACK 11  /* Error type Reject a connection request */
-#define SCIF_CNCT_GNTNACK 12  /* Error type Reject a connection request */
-#define SCIF_CNCT_REJ 13  /* Error type Reject a connection request */
-#define SCIF_DISCNCT 14 /* Notify peer that connection is being terminated */
-#define SCIF_DISCNT_ACK 15 /* Notify peer that connection is being terminated */
-#define SCIF_CLIENT_SENT 16 /* Notify the peer that data has been written */
-#define SCIF_CLIENT_RCVD 17 /* Notify the peer that data has been read */
-#define SCIF_GET_NODE_INFO 18 /* Get current node mask from the mgmt node*/
-#define SCIF_REGISTER 19 /* Tell peer about a new registered window */
-#define SCIF_REGISTER_ACK 20 /* Notify peer about unregistration success */
-#define SCIF_REGISTER_NACK 21 /* Notify peer about registration success */
-#define SCIF_UNREGISTER 22 /* Tell peer about unregistering a window */
-#define SCIF_UNREGISTER_ACK 23 /* Notify peer about registration failure */
-#define SCIF_UNREGISTER_NACK 24 /* Notify peer about unregistration failure */
-#define SCIF_ALLOC_REQ 25 /* Request a mapped buffer */
-#define SCIF_ALLOC_GNT 26 /* Notify peer about allocation success */
-#define SCIF_ALLOC_REJ 27 /* Notify peer about allocation failure */
-#define SCIF_FREE_VIRT 28 /* Free previously allocated virtual memory */
-#define SCIF_MUNMAP 29 /* Acknowledgment for a SCIF_MMAP request */
-#define SCIF_MARK 30 /* SCIF Remote Fence Mark Request */
-#define SCIF_MARK_ACK 31 /* SCIF Remote Fence Mark Success */
-#define SCIF_MARK_NACK 32 /* SCIF Remote Fence Mark Failure */
-#define SCIF_WAIT 33 /* SCIF Remote Fence Wait Request */
-#define SCIF_WAIT_ACK 34 /* SCIF Remote Fence Wait Success */
-#define SCIF_WAIT_NACK 35 /* SCIF Remote Fence Wait Failure */
-#define SCIF_SIG_LOCAL 36 /* SCIF Remote Fence Local Signal Request */
-#define SCIF_SIG_REMOTE 37 /* SCIF Remote Fence Remote Signal Request */
-#define SCIF_SIG_ACK 38 /* SCIF Remote Fence Remote Signal Success */
-#define SCIF_SIG_NACK 39 /* SCIF Remote Fence Remote Signal Failure */
-#define SCIF_MAX_MSG SCIF_SIG_NACK
-
-/*
- * struct scifmsg - Node QP message format
- *
- * @src: Source information
- * @dst: Destination information
- * @uop: The message opcode
- * @payload: Unique payload format for each message
- */
-struct scifmsg {
-	struct scif_port_id src;
-	struct scif_port_id dst;
-	u32 uop;
-	u64 payload[4];
-} __packed;
-
-/*
- * struct scif_allocmsg - Used with SCIF_ALLOC_REQ to request
- * the remote note to allocate memory
- *
- * phys_addr: Physical address of the buffer
- * vaddr: Virtual address of the buffer
- * size: Size of the buffer
- * state: Current state
- * allocwq: wait queue for status
- */
-struct scif_allocmsg {
-	dma_addr_t phys_addr;
-	unsigned long vaddr;
-	size_t size;
-	enum scif_msg_state state;
-	wait_queue_head_t allocwq;
-};
-
-/*
- * struct scif_qp - Node Queue Pair
- *
- * Interesting structure -- a little difficult because we can only
- * write across the PCIe, so any r/w pointer we need to read is
- * local. We only need to read the read pointer on the inbound_q
- * and read the write pointer in the outbound_q
- *
- * @magic: Magic value to ensure the peer sees the QP correctly
- * @outbound_q: The outbound ring buffer for sending messages
- * @inbound_q: The inbound ring buffer for receiving messages
- * @local_write: Local write index
- * @local_read: Local read index
- * @remote_qp: The remote queue pair
- * @local_buf: DMA address of local ring buffer
- * @local_qp: DMA address of the local queue pair data structure
- * @remote_buf: DMA address of remote ring buffer
- * @qp_state: QP state i.e. online or offline used for P2P
- * @send_lock: synchronize access to outbound queue
- * @recv_lock: Synchronize access to inbound queue
- */
-struct scif_qp {
-	u64 magic;
-#define SCIFEP_MAGIC 0x5c1f000000005c1fULL
-	struct scif_rb outbound_q;
-	struct scif_rb inbound_q;
-
-	u32 local_write __aligned(64);
-	u32 local_read __aligned(64);
-	struct scif_qp *remote_qp;
-	dma_addr_t local_buf;
-	dma_addr_t local_qp;
-	dma_addr_t remote_buf;
-	u32 qp_state;
-#define SCIF_QP_OFFLINE 0xdead
-#define SCIF_QP_ONLINE 0xc0de
-	spinlock_t send_lock;
-	spinlock_t recv_lock;
-};
-
-/*
- * struct scif_loopb_msg - An element in the loopback Node QP message list.
- *
- * @msg - The SCIF node QP message
- * @list - link in the list of messages
- */
-struct scif_loopb_msg {
-	struct scifmsg msg;
-	struct list_head list;
-};
-
-int scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg);
-int _scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg);
-void scif_nodeqp_intrhandler(struct scif_dev *scifdev, struct scif_qp *qp);
-int scif_loopb_msg_handler(struct scif_dev *scifdev, struct scif_qp *qp);
-int scif_setup_qp(struct scif_dev *scifdev);
-int scif_qp_response(phys_addr_t phys, struct scif_dev *dev);
-int scif_setup_qp_connect(struct scif_qp *qp, dma_addr_t *qp_offset,
-			  int local_size, struct scif_dev *scifdev);
-int scif_setup_qp_accept(struct scif_qp *qp, dma_addr_t *qp_offset,
-			 dma_addr_t phys, int local_size,
-			 struct scif_dev *scifdev);
-int scif_setup_qp_connect_response(struct scif_dev *scifdev,
-				   struct scif_qp *qp, u64 payload);
-int scif_setup_loopback_qp(struct scif_dev *scifdev);
-int scif_destroy_loopback_qp(struct scif_dev *scifdev);
-void scif_poll_qp_state(struct work_struct *work);
-void scif_destroy_p2p(struct scif_dev *scifdev);
-void scif_send_exit(struct scif_dev *scifdev);
-static inline struct device *scif_get_peer_dev(struct scif_dev *scifdev)
-{
-	struct scif_peer_dev *spdev;
-	struct device *spdev_ret;
-
-	rcu_read_lock();
-	spdev = rcu_dereference(scifdev->spdev);
-	if (spdev)
-		spdev_ret = get_device(&spdev->dev);
-	else
-		spdev_ret = ERR_PTR(-ENODEV);
-	rcu_read_unlock();
-	return spdev_ret;
-}
-
-static inline void scif_put_peer_dev(struct device *dev)
-{
-	put_device(dev);
-}
-#endif  /* SCIF_NODEQP */
diff --git a/drivers/misc/mic/scif/scif_peer_bus.c b/drivers/misc/mic/scif/scif_peer_bus.c
deleted file mode 100644
index 6d608308bb60..000000000000
--- a/drivers/misc/mic/scif/scif_peer_bus.c
+++ /dev/null
@@ -1,175 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Intel SCIF driver.
- */
-#include "scif_main.h"
-#include "../bus/scif_bus.h"
-#include "scif_peer_bus.h"
-
-static inline struct scif_peer_dev *
-dev_to_scif_peer(struct device *dev)
-{
-	return container_of(dev, struct scif_peer_dev, dev);
-}
-
-struct bus_type scif_peer_bus = {
-	.name  = "scif_peer_bus",
-};
-
-static void scif_peer_release_dev(struct device *d)
-{
-	struct scif_peer_dev *sdev = dev_to_scif_peer(d);
-	struct scif_dev *scifdev = &scif_dev[sdev->dnode];
-
-	scif_cleanup_scifdev(scifdev);
-	kfree(sdev);
-}
-
-static int scif_peer_initialize_device(struct scif_dev *scifdev)
-{
-	struct scif_peer_dev *spdev;
-	int ret;
-
-	spdev = kzalloc(sizeof(*spdev), GFP_KERNEL);
-	if (!spdev) {
-		ret = -ENOMEM;
-		goto err;
-	}
-
-	spdev->dev.parent = scifdev->sdev->dev.parent;
-	spdev->dev.release = scif_peer_release_dev;
-	spdev->dnode = scifdev->node;
-	spdev->dev.bus = &scif_peer_bus;
-	dev_set_name(&spdev->dev, "scif_peer-dev%u", spdev->dnode);
-
-	device_initialize(&spdev->dev);
-	get_device(&spdev->dev);
-	rcu_assign_pointer(scifdev->spdev, spdev);
-
-	mutex_lock(&scif_info.conflock);
-	scif_info.total++;
-	scif_info.maxid = max_t(u32, spdev->dnode, scif_info.maxid);
-	mutex_unlock(&scif_info.conflock);
-	return 0;
-err:
-	dev_err(&scifdev->sdev->dev,
-		"dnode %d: initialize_device rc %d\n", scifdev->node, ret);
-	return ret;
-}
-
-static int scif_peer_add_device(struct scif_dev *scifdev)
-{
-	struct scif_peer_dev *spdev = rcu_dereference(scifdev->spdev);
-	char pool_name[16];
-	int ret;
-
-	ret = device_add(&spdev->dev);
-	put_device(&spdev->dev);
-	if (ret) {
-		dev_err(&scifdev->sdev->dev,
-			"dnode %d: peer device_add failed\n", scifdev->node);
-		goto put_spdev;
-	}
-
-	scnprintf(pool_name, sizeof(pool_name), "scif-%d", spdev->dnode);
-	scifdev->signal_pool = dmam_pool_create(pool_name, &scifdev->sdev->dev,
-						sizeof(struct scif_status), 1,
-						0);
-	if (!scifdev->signal_pool) {
-		dev_err(&scifdev->sdev->dev,
-			"dnode %d: dmam_pool_create failed\n", scifdev->node);
-		ret = -ENOMEM;
-		goto del_spdev;
-	}
-	dev_dbg(&spdev->dev, "Added peer dnode %d\n", spdev->dnode);
-	return 0;
-del_spdev:
-	device_del(&spdev->dev);
-put_spdev:
-	RCU_INIT_POINTER(scifdev->spdev, NULL);
-	synchronize_rcu();
-	put_device(&spdev->dev);
-
-	mutex_lock(&scif_info.conflock);
-	scif_info.total--;
-	mutex_unlock(&scif_info.conflock);
-	return ret;
-}
-
-void scif_add_peer_device(struct work_struct *work)
-{
-	struct scif_dev *scifdev = container_of(work, struct scif_dev,
-						peer_add_work);
-
-	scif_peer_add_device(scifdev);
-}
-
-/*
- * Peer device registration is split into a device_initialize and a device_add.
- * The reason for doing this is as follows: First, peer device registration
- * itself cannot be done in the message processing thread and must be delegated
- * to another workqueue, otherwise if SCIF client probe, called during peer
- * device registration, calls scif_connect(..), it will block the message
- * processing thread causing a deadlock. Next, device_initialize is done in the
- * "top-half" message processing thread and device_add in the "bottom-half"
- * workqueue. If this is not done, SCIF_CNCT_REQ message processing executing
- * concurrently with SCIF_INIT message processing is unable to get a reference
- * on the peer device, thereby failing the connect request.
- */
-void scif_peer_register_device(struct scif_dev *scifdev)
-{
-	int ret;
-
-	mutex_lock(&scifdev->lock);
-	ret = scif_peer_initialize_device(scifdev);
-	if (ret)
-		goto exit;
-	schedule_work(&scifdev->peer_add_work);
-exit:
-	mutex_unlock(&scifdev->lock);
-}
-
-int scif_peer_unregister_device(struct scif_dev *scifdev)
-{
-	struct scif_peer_dev *spdev;
-
-	mutex_lock(&scifdev->lock);
-	/* Flush work to ensure device register is complete */
-	flush_work(&scifdev->peer_add_work);
-
-	/*
-	 * Continue holding scifdev->lock since theoretically unregister_device
-	 * can be called simultaneously from multiple threads
-	 */
-	spdev = rcu_dereference(scifdev->spdev);
-	if (!spdev) {
-		mutex_unlock(&scifdev->lock);
-		return -ENODEV;
-	}
-
-	RCU_INIT_POINTER(scifdev->spdev, NULL);
-	synchronize_rcu();
-	mutex_unlock(&scifdev->lock);
-
-	dev_dbg(&spdev->dev, "Removing peer dnode %d\n", spdev->dnode);
-	device_unregister(&spdev->dev);
-
-	mutex_lock(&scif_info.conflock);
-	scif_info.total--;
-	mutex_unlock(&scif_info.conflock);
-	return 0;
-}
-
-int scif_peer_bus_init(void)
-{
-	return bus_register(&scif_peer_bus);
-}
-
-void scif_peer_bus_exit(void)
-{
-	bus_unregister(&scif_peer_bus);
-}
diff --git a/drivers/misc/mic/scif/scif_peer_bus.h b/drivers/misc/mic/scif/scif_peer_bus.h
deleted file mode 100644
index 2ea4c51c18c1..000000000000
--- a/drivers/misc/mic/scif/scif_peer_bus.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Intel SCIF driver.
- */
-#ifndef _SCIF_PEER_BUS_H_
-#define _SCIF_PEER_BUS_H_
-
-#include <linux/device.h>
-#include <linux/mic_common.h>
-#include <linux/scif.h>
-
-struct scif_dev;
-
-void scif_add_peer_device(struct work_struct *work);
-void scif_peer_register_device(struct scif_dev *sdev);
-int scif_peer_unregister_device(struct scif_dev *scifdev);
-int scif_peer_bus_init(void);
-void scif_peer_bus_exit(void);
-#endif /* _SCIF_PEER_BUS_H */
diff --git a/drivers/misc/mic/scif/scif_ports.c b/drivers/misc/mic/scif/scif_ports.c
deleted file mode 100644
index 4bdb5ef9a139..000000000000
--- a/drivers/misc/mic/scif/scif_ports.c
+++ /dev/null
@@ -1,116 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Intel SCIF driver.
- */
-#include <linux/idr.h>
-
-#include "scif_main.h"
-
-#define SCIF_PORT_COUNT	0x10000	/* Ports available */
-
-struct idr scif_ports;
-
-/**
- * struct scif_port - SCIF port information
- *
- * @ref_cnt:  Reference count since there can be multiple endpoints
- *	      created via scif_accept(..) simultaneously using a port.
- */
-struct scif_port {
-	int ref_cnt;
-};
-
-/**
- * __scif_get_port - Reserve a specified port # for SCIF and add it
- * to the global list.
- * @start: lowest port # to be reserved (inclusive).
- * @end:   highest port # to be reserved (exclusive).
- *
- * @return : Allocated SCIF port #, or -ENOSPC if port unavailable.
- *		On memory allocation failure, returns -ENOMEM.
- */
-static int __scif_get_port(int start, int end)
-{
-	int id;
-	struct scif_port *port = kzalloc(sizeof(*port), GFP_ATOMIC);
-
-	if (!port)
-		return -ENOMEM;
-	spin_lock(&scif_info.port_lock);
-	id = idr_alloc(&scif_ports, port, start, end, GFP_ATOMIC);
-	if (id >= 0)
-		port->ref_cnt++;
-	spin_unlock(&scif_info.port_lock);
-	return id;
-}
-
-/**
- * scif_rsrv_port - Reserve a specified port # for SCIF.
- * @port : port # to be reserved.
- *
- * @return : Allocated SCIF port #, or -ENOSPC if port unavailable.
- *		On memory allocation failure, returns -ENOMEM.
- */
-int scif_rsrv_port(u16 port)
-{
-	return __scif_get_port(port, port + 1);
-}
-
-/**
- * scif_get_new_port - Get and reserve any port # for SCIF in the range
- *			SCIF_PORT_RSVD + 1 to SCIF_PORT_COUNT - 1.
- *
- * @return : Allocated SCIF port #, or -ENOSPC if no ports available.
- *		On memory allocation failure, returns -ENOMEM.
- */
-int scif_get_new_port(void)
-{
-	return __scif_get_port(SCIF_PORT_RSVD + 1, SCIF_PORT_COUNT);
-}
-
-/**
- * scif_get_port - Increment the reference count for a SCIF port
- * @id : SCIF port
- *
- * @return : None
- */
-void scif_get_port(u16 id)
-{
-	struct scif_port *port;
-
-	if (!id)
-		return;
-	spin_lock(&scif_info.port_lock);
-	port = idr_find(&scif_ports, id);
-	if (port)
-		port->ref_cnt++;
-	spin_unlock(&scif_info.port_lock);
-}
-
-/**
- * scif_put_port - Release a reserved SCIF port
- * @id : SCIF port to be released.
- *
- * @return : None
- */
-void scif_put_port(u16 id)
-{
-	struct scif_port *port;
-
-	if (!id)
-		return;
-	spin_lock(&scif_info.port_lock);
-	port = idr_find(&scif_ports, id);
-	if (port) {
-		port->ref_cnt--;
-		if (!port->ref_cnt) {
-			idr_remove(&scif_ports, id);
-			kfree(port);
-		}
-	}
-	spin_unlock(&scif_info.port_lock);
-}
diff --git a/drivers/misc/mic/scif/scif_rb.c b/drivers/misc/mic/scif/scif_rb.c
deleted file mode 100644
index e425882ae06d..000000000000
--- a/drivers/misc/mic/scif/scif_rb.c
+++ /dev/null
@@ -1,240 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Intel SCIF driver.
- */
-#include <linux/circ_buf.h>
-#include <linux/types.h>
-#include <linux/io.h>
-#include <linux/errno.h>
-
-#include "scif_rb.h"
-
-#define scif_rb_ring_cnt(head, tail, size) CIRC_CNT(head, tail, size)
-#define scif_rb_ring_space(head, tail, size) CIRC_SPACE(head, tail, size)
-
-/**
- * scif_rb_init - Initializes the ring buffer
- * @rb: ring buffer
- * @read_ptr: A pointer to the read offset
- * @write_ptr: A pointer to the write offset
- * @rb_base: A pointer to the base of the ring buffer
- * @size: The size of the ring buffer in powers of two
- */
-void scif_rb_init(struct scif_rb *rb, u32 *read_ptr, u32 *write_ptr,
-		  void *rb_base, u8 size)
-{
-	rb->rb_base = rb_base;
-	rb->size = (1 << size);
-	rb->read_ptr = read_ptr;
-	rb->write_ptr = write_ptr;
-	rb->current_read_offset = *read_ptr;
-	rb->current_write_offset = *write_ptr;
-}
-
-/* Copies a message to the ring buffer -- handles the wrap around case */
-static void memcpy_torb(struct scif_rb *rb, void *header,
-			void *msg, u32 size)
-{
-	u32 size1, size2;
-
-	if (header + size >= rb->rb_base + rb->size) {
-		/* Need to call two copies if it wraps around */
-		size1 = (u32)(rb->rb_base + rb->size - header);
-		size2 = size - size1;
-		memcpy_toio((void __iomem __force *)header, msg, size1);
-		memcpy_toio((void __iomem __force *)rb->rb_base,
-			    msg + size1, size2);
-	} else {
-		memcpy_toio((void __iomem __force *)header, msg, size);
-	}
-}
-
-/* Copies a message from the ring buffer -- handles the wrap around case */
-static void memcpy_fromrb(struct scif_rb *rb, void *header,
-			  void *msg, u32 size)
-{
-	u32 size1, size2;
-
-	if (header + size >= rb->rb_base + rb->size) {
-		/* Need to call two copies if it wraps around */
-		size1 = (u32)(rb->rb_base + rb->size - header);
-		size2 = size - size1;
-		memcpy_fromio(msg, (void __iomem __force *)header, size1);
-		memcpy_fromio(msg + size1,
-			      (void __iomem __force *)rb->rb_base, size2);
-	} else {
-		memcpy_fromio(msg, (void __iomem __force *)header, size);
-	}
-}
-
-/**
- * scif_rb_space - Query space available for writing to the RB
- * @rb: ring buffer
- *
- * Return: size available for writing to RB in bytes.
- */
-u32 scif_rb_space(struct scif_rb *rb)
-{
-	rb->current_read_offset = *rb->read_ptr;
-	/*
-	 * Update from the HW read pointer only once the peer has exposed the
-	 * new empty slot. This barrier is paired with the memory barrier
-	 * scif_rb_update_read_ptr()
-	 */
-	mb();
-	return scif_rb_ring_space(rb->current_write_offset,
-				  rb->current_read_offset, rb->size);
-}
-
-/**
- * scif_rb_write - Write a message to the RB
- * @rb: ring buffer
- * @msg: buffer to send the message.  Must be at least size bytes long
- * @size: the size (in bytes) to be copied to the RB
- *
- * This API does not block if there isn't enough space in the RB.
- * Returns: 0 on success or -ENOMEM on failure
- */
-int scif_rb_write(struct scif_rb *rb, void *msg, u32 size)
-{
-	void *header;
-
-	if (scif_rb_space(rb) < size)
-		return -ENOMEM;
-	header = rb->rb_base + rb->current_write_offset;
-	memcpy_torb(rb, header, msg, size);
-	/*
-	 * Wait until scif_rb_commit(). Update the local ring
-	 * buffer data, not the shared data until commit.
-	 */
-	rb->current_write_offset =
-		(rb->current_write_offset + size) & (rb->size - 1);
-	return 0;
-}
-
-/**
- * scif_rb_commit - To submit the message to let the peer fetch it
- * @rb: ring buffer
- */
-void scif_rb_commit(struct scif_rb *rb)
-{
-	/*
-	 * We must ensure ordering between the all the data committed
-	 * previously before we expose the new message to the peer by
-	 * updating the write_ptr. This write barrier is paired with
-	 * the read barrier in scif_rb_count(..)
-	 */
-	wmb();
-	WRITE_ONCE(*rb->write_ptr, rb->current_write_offset);
-#ifdef CONFIG_INTEL_MIC_CARD
-	/*
-	 * X100 Si bug: For the case where a Core is performing an EXT_WR
-	 * followed by a Doorbell Write, the Core must perform two EXT_WR to the
-	 * same address with the same data before it does the Doorbell Write.
-	 * This way, if ordering is violated for the Interrupt Message, it will
-	 * fall just behind the first Posted associated with the first EXT_WR.
-	 */
-	WRITE_ONCE(*rb->write_ptr, rb->current_write_offset);
-#endif
-}
-
-/**
- * scif_rb_get - To get next message from the ring buffer
- * @rb: ring buffer
- * @size: Number of bytes to be read
- *
- * Return: NULL if no bytes to be read from the ring buffer, otherwise the
- *	pointer to the next byte
- */
-static void *scif_rb_get(struct scif_rb *rb, u32 size)
-{
-	void *header = NULL;
-
-	if (scif_rb_count(rb, size) >= size)
-		header = rb->rb_base + rb->current_read_offset;
-	return header;
-}
-
-/*
- * scif_rb_get_next - Read from ring buffer.
- * @rb: ring buffer
- * @msg: buffer to hold the message.  Must be at least size bytes long
- * @size: Number of bytes to be read
- *
- * Return: number of bytes read if available bytes are >= size, otherwise
- * returns zero.
- */
-u32 scif_rb_get_next(struct scif_rb *rb, void *msg, u32 size)
-{
-	void *header = NULL;
-	int read_size = 0;
-
-	header = scif_rb_get(rb, size);
-	if (header) {
-		u32 next_cmd_offset =
-			(rb->current_read_offset + size) & (rb->size - 1);
-
-		read_size = size;
-		rb->current_read_offset = next_cmd_offset;
-		memcpy_fromrb(rb, header, msg, size);
-	}
-	return read_size;
-}
-
-/**
- * scif_rb_update_read_ptr
- * @rb: ring buffer
- */
-void scif_rb_update_read_ptr(struct scif_rb *rb)
-{
-	u32 new_offset;
-
-	new_offset = rb->current_read_offset;
-	/*
-	 * We must ensure ordering between the all the data committed or read
-	 * previously before we expose the empty slot to the peer by updating
-	 * the read_ptr. This barrier is paired with the memory barrier in
-	 * scif_rb_space(..)
-	 */
-	mb();
-	WRITE_ONCE(*rb->read_ptr, new_offset);
-#ifdef CONFIG_INTEL_MIC_CARD
-	/*
-	 * X100 Si Bug: For the case where a Core is performing an EXT_WR
-	 * followed by a Doorbell Write, the Core must perform two EXT_WR to the
-	 * same address with the same data before it does the Doorbell Write.
-	 * This way, if ordering is violated for the Interrupt Message, it will
-	 * fall just behind the first Posted associated with the first EXT_WR.
-	 */
-	WRITE_ONCE(*rb->read_ptr, new_offset);
-#endif
-}
-
-/**
- * scif_rb_count
- * @rb: ring buffer
- * @size: Number of bytes expected to be read
- *
- * Return: number of bytes that can be read from the RB
- */
-u32 scif_rb_count(struct scif_rb *rb, u32 size)
-{
-	if (scif_rb_ring_cnt(rb->current_write_offset,
-			     rb->current_read_offset,
-			     rb->size) < size) {
-		rb->current_write_offset = *rb->write_ptr;
-		/*
-		 * Update from the HW write pointer if empty only once the peer
-		 * has exposed the new message. This read barrier is paired
-		 * with the write barrier in scif_rb_commit(..)
-		 */
-		smp_rmb();
-	}
-	return scif_rb_ring_cnt(rb->current_write_offset,
-				rb->current_read_offset,
-				rb->size);
-}
diff --git a/drivers/misc/mic/scif/scif_rb.h b/drivers/misc/mic/scif/scif_rb.h
deleted file mode 100644
index 166dffe3093d..000000000000
--- a/drivers/misc/mic/scif/scif_rb.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in
- *   the documentation and/or other materials provided with the
- *   distribution.
- * * Neither the name of Intel Corporation nor the names of its
- *   contributors may be used to endorse or promote products derived
- *   from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Intel SCIF driver.
- */
-#ifndef SCIF_RB_H
-#define SCIF_RB_H
-/*
- * This file describes a general purpose, byte based ring buffer. Writers to the
- * ring buffer need to synchronize using a lock. The same is true for readers,
- * although in practice, the ring buffer has a single reader. It is lockless
- * between producer and consumer so it can handle being used across the PCIe
- * bus. The ring buffer ensures that there are no reads across the PCIe bus for
- * performance reasons. Two of these are used to form a single bidirectional
- * queue-pair across PCIe.
- */
-/*
- * struct scif_rb - SCIF Ring Buffer
- *
- * @rb_base: The base of the memory used for storing RB messages
- * @read_ptr: Pointer to the read offset
- * @write_ptr: Pointer to the write offset
- * @size: Size of the memory in rb_base
- * @current_read_offset: Cached read offset for performance
- * @current_write_offset: Cached write offset for performance
- */
-struct scif_rb {
-	void *rb_base;
-	u32 *read_ptr;
-	u32 *write_ptr;
-	u32 size;
-	u32 current_read_offset;
-	u32 current_write_offset;
-};
-
-/* methods used by both */
-void scif_rb_init(struct scif_rb *rb, u32 *read_ptr, u32 *write_ptr,
-		  void *rb_base, u8 size);
-/* writer only methods */
-/* write a new command, then scif_rb_commit() */
-int scif_rb_write(struct scif_rb *rb, void *msg, u32 size);
-/* after write(), then scif_rb_commit() */
-void scif_rb_commit(struct scif_rb *rb);
-/* query space available for writing to a RB. */
-u32 scif_rb_space(struct scif_rb *rb);
-
-/* reader only methods */
-/* read a new message from the ring buffer of size bytes */
-u32 scif_rb_get_next(struct scif_rb *rb, void *msg, u32 size);
-/* update the read pointer so that the space can be reused */
-void scif_rb_update_read_ptr(struct scif_rb *rb);
-/* count the number of bytes that can be read */
-u32 scif_rb_count(struct scif_rb *rb, u32 size);
-#endif
diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c
deleted file mode 100644
index 18fb9d8b8a4b..000000000000
--- a/drivers/misc/mic/scif/scif_rma.c
+++ /dev/null
@@ -1,1760 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Intel SCIF driver.
- */
-#include <linux/intel-iommu.h>
-#include <linux/pagemap.h>
-#include <linux/sched/mm.h>
-#include <linux/sched/signal.h>
-
-#include "scif_main.h"
-#include "scif_map.h"
-
-/* Used to skip ulimit checks for registrations with SCIF_MAP_KERNEL flag */
-#define SCIF_MAP_ULIMIT 0x40
-
-bool scif_ulimit_check = 1;
-
-/**
- * scif_rma_ep_init:
- * @ep: end point
- *
- * Initialize RMA per EP data structures.
- */
-void scif_rma_ep_init(struct scif_endpt *ep)
-{
-	struct scif_endpt_rma_info *rma = &ep->rma_info;
-
-	mutex_init(&rma->rma_lock);
-	init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN);
-	spin_lock_init(&rma->tc_lock);
-	mutex_init(&rma->mmn_lock);
-	INIT_LIST_HEAD(&rma->reg_list);
-	INIT_LIST_HEAD(&rma->remote_reg_list);
-	atomic_set(&rma->tw_refcount, 0);
-	atomic_set(&rma->tcw_refcount, 0);
-	atomic_set(&rma->tcw_total_pages, 0);
-	atomic_set(&rma->fence_refcount, 0);
-
-	rma->async_list_del = 0;
-	rma->dma_chan = NULL;
-	INIT_LIST_HEAD(&rma->mmn_list);
-	INIT_LIST_HEAD(&rma->vma_list);
-	init_waitqueue_head(&rma->markwq);
-}
-
-/**
- * scif_rma_ep_can_uninit:
- * @ep: end point
- *
- * Returns 1 if an endpoint can be uninitialized and 0 otherwise.
- */
-int scif_rma_ep_can_uninit(struct scif_endpt *ep)
-{
-	int ret = 0;
-
-	mutex_lock(&ep->rma_info.rma_lock);
-	/* Destroy RMA Info only if both lists are empty */
-	if (list_empty(&ep->rma_info.reg_list) &&
-	    list_empty(&ep->rma_info.remote_reg_list) &&
-	    list_empty(&ep->rma_info.mmn_list) &&
-	    !atomic_read(&ep->rma_info.tw_refcount) &&
-	    !atomic_read(&ep->rma_info.tcw_refcount) &&
-	    !atomic_read(&ep->rma_info.fence_refcount))
-		ret = 1;
-	mutex_unlock(&ep->rma_info.rma_lock);
-	return ret;
-}
-
-/**
- * scif_create_pinned_pages:
- * @nr_pages: number of pages in window
- * @prot: read/write protection
- *
- * Allocate and prepare a set of pinned pages.
- */
-static struct scif_pinned_pages *
-scif_create_pinned_pages(int nr_pages, int prot)
-{
-	struct scif_pinned_pages *pin;
-
-	might_sleep();
-	pin = scif_zalloc(sizeof(*pin));
-	if (!pin)
-		goto error;
-
-	pin->pages = scif_zalloc(nr_pages * sizeof(*pin->pages));
-	if (!pin->pages)
-		goto error_free_pinned_pages;
-
-	pin->prot = prot;
-	pin->magic = SCIFEP_MAGIC;
-	return pin;
-
-error_free_pinned_pages:
-	scif_free(pin, sizeof(*pin));
-error:
-	return NULL;
-}
-
-/**
- * scif_destroy_pinned_pages:
- * @pin: A set of pinned pages.
- *
- * Deallocate resources for pinned pages.
- */
-static int scif_destroy_pinned_pages(struct scif_pinned_pages *pin)
-{
-	int j;
-	int writeable = pin->prot & SCIF_PROT_WRITE;
-	int kernel = SCIF_MAP_KERNEL & pin->map_flags;
-
-	if (kernel) {
-		for (j = 0; j < pin->nr_pages; j++) {
-			if (pin->pages[j] && !kernel) {
-				if (writeable)
-					set_page_dirty_lock(pin->pages[j]);
-				put_page(pin->pages[j]);
-			}
-		}
-	} else
-		unpin_user_pages_dirty_lock(pin->pages, pin->nr_pages,
-					    writeable);
-	scif_free(pin->pages,
-		  pin->nr_pages * sizeof(*pin->pages));
-	scif_free(pin, sizeof(*pin));
-	return 0;
-}
-
-/*
- * scif_create_window:
- * @ep: end point
- * @nr_pages: number of pages
- * @offset: registration offset
- * @temp: true if a temporary window is being created
- *
- * Allocate and prepare a self registration window.
- */
-struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages,
-				       s64 offset, bool temp)
-{
-	struct scif_window *window;
-
-	might_sleep();
-	window = scif_zalloc(sizeof(*window));
-	if (!window)
-		goto error;
-
-	window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
-	if (!window->dma_addr)
-		goto error_free_window;
-
-	window->num_pages = scif_zalloc(nr_pages * sizeof(*window->num_pages));
-	if (!window->num_pages)
-		goto error_free_window;
-
-	window->offset = offset;
-	window->ep = (u64)ep;
-	window->magic = SCIFEP_MAGIC;
-	window->reg_state = OP_IDLE;
-	init_waitqueue_head(&window->regwq);
-	window->unreg_state = OP_IDLE;
-	init_waitqueue_head(&window->unregwq);
-	INIT_LIST_HEAD(&window->list);
-	window->type = SCIF_WINDOW_SELF;
-	window->temp = temp;
-	return window;
-
-error_free_window:
-	scif_free(window->dma_addr,
-		  nr_pages * sizeof(*window->dma_addr));
-	scif_free(window, sizeof(*window));
-error:
-	return NULL;
-}
-
-/**
- * scif_destroy_incomplete_window:
- * @ep: end point
- * @window: registration window
- *
- * Deallocate resources for self window.
- */
-static void scif_destroy_incomplete_window(struct scif_endpt *ep,
-					   struct scif_window *window)
-{
-	int err;
-	int nr_pages = window->nr_pages;
-	struct scif_allocmsg *alloc = &window->alloc_handle;
-	struct scifmsg msg;
-
-retry:
-	/* Wait for a SCIF_ALLOC_GNT/REJ message */
-	err = wait_event_timeout(alloc->allocwq,
-				 alloc->state != OP_IN_PROGRESS,
-				 SCIF_NODE_ALIVE_TIMEOUT);
-	if (!err && scifdev_alive(ep))
-		goto retry;
-
-	mutex_lock(&ep->rma_info.rma_lock);
-	if (alloc->state == OP_COMPLETED) {
-		msg.uop = SCIF_FREE_VIRT;
-		msg.src = ep->port;
-		msg.payload[0] = ep->remote_ep;
-		msg.payload[1] = window->alloc_handle.vaddr;
-		msg.payload[2] = (u64)window;
-		msg.payload[3] = SCIF_REGISTER;
-		_scif_nodeqp_send(ep->remote_dev, &msg);
-	}
-	mutex_unlock(&ep->rma_info.rma_lock);
-
-	scif_free_window_offset(ep, window, window->offset);
-	scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
-	scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
-	scif_free(window, sizeof(*window));
-}
-
-/**
- * scif_unmap_window:
- * @remote_dev: SCIF remote device
- * @window: registration window
- *
- * Delete any DMA mappings created for a registered self window
- */
-void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window)
-{
-	int j;
-
-	if (scif_is_iommu_enabled() && !scifdev_self(remote_dev)) {
-		if (window->st) {
-			dma_unmap_sg(&remote_dev->sdev->dev,
-				     window->st->sgl, window->st->nents,
-				     DMA_BIDIRECTIONAL);
-			sg_free_table(window->st);
-			kfree(window->st);
-			window->st = NULL;
-		}
-	} else {
-		for (j = 0; j < window->nr_contig_chunks; j++) {
-			if (window->dma_addr[j]) {
-				scif_unmap_single(window->dma_addr[j],
-						  remote_dev,
-						  window->num_pages[j] <<
-						  PAGE_SHIFT);
-				window->dma_addr[j] = 0x0;
-			}
-		}
-	}
-}
-
-static inline struct mm_struct *__scif_acquire_mm(void)
-{
-	if (scif_ulimit_check)
-		return get_task_mm(current);
-	return NULL;
-}
-
-static inline void __scif_release_mm(struct mm_struct *mm)
-{
-	if (mm)
-		mmput(mm);
-}
-
-static inline int
-__scif_dec_pinned_vm_lock(struct mm_struct *mm,
-			  int nr_pages)
-{
-	if (!mm || !nr_pages || !scif_ulimit_check)
-		return 0;
-
-	atomic64_sub(nr_pages, &mm->pinned_vm);
-	return 0;
-}
-
-static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
-					     int nr_pages)
-{
-	unsigned long locked, lock_limit;
-
-	if (!mm || !nr_pages || !scif_ulimit_check)
-		return 0;
-
-	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-	locked = atomic64_add_return(nr_pages, &mm->pinned_vm);
-
-	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
-		atomic64_sub(nr_pages, &mm->pinned_vm);
-		dev_err(scif_info.mdev.this_device,
-			"locked(%lu) > lock_limit(%lu)\n",
-			locked, lock_limit);
-		return -ENOMEM;
-	}
-	return 0;
-}
-
-/**
- * scif_destroy_window:
- * @ep: end point
- * @window: registration window
- *
- * Deallocate resources for self window.
- */
-int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window)
-{
-	int j;
-	struct scif_pinned_pages *pinned_pages = window->pinned_pages;
-	int nr_pages = window->nr_pages;
-
-	might_sleep();
-	if (!window->temp && window->mm) {
-		__scif_dec_pinned_vm_lock(window->mm, window->nr_pages);
-		__scif_release_mm(window->mm);
-		window->mm = NULL;
-	}
-
-	scif_free_window_offset(ep, window, window->offset);
-	scif_unmap_window(ep->remote_dev, window);
-	/*
-	 * Decrement references for this set of pinned pages from
-	 * this window.
-	 */
-	j = atomic_sub_return(1, &pinned_pages->ref_count);
-	if (j < 0)
-		dev_err(scif_info.mdev.this_device,
-			"%s %d incorrect ref count %d\n",
-			__func__, __LINE__, j);
-	/*
-	 * If the ref count for pinned_pages is zero then someone
-	 * has already called scif_unpin_pages() for it and we should
-	 * destroy the page cache.
-	 */
-	if (!j)
-		scif_destroy_pinned_pages(window->pinned_pages);
-	scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
-	scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
-	window->magic = 0;
-	scif_free(window, sizeof(*window));
-	return 0;
-}
-
-/**
- * scif_create_remote_lookup:
- * @remote_dev: SCIF remote device
- * @window: remote window
- *
- * Allocate and prepare lookup entries for the remote
- * end to copy over the physical addresses.
- * Returns 0 on success and appropriate errno on failure.
- */
-static int scif_create_remote_lookup(struct scif_dev *remote_dev,
-				     struct scif_window *window)
-{
-	int i, j, err = 0;
-	int nr_pages = window->nr_pages;
-	bool vmalloc_dma_phys, vmalloc_num_pages;
-
-	might_sleep();
-	/* Map window */
-	err = scif_map_single(&window->mapped_offset,
-			      window, remote_dev, sizeof(*window));
-	if (err)
-		goto error_window;
-
-	/* Compute the number of lookup entries. 21 == 2MB Shift */
-	window->nr_lookup = ALIGN(nr_pages * PAGE_SIZE,
-					((2) * 1024 * 1024)) >> 21;
-
-	window->dma_addr_lookup.lookup =
-		scif_alloc_coherent(&window->dma_addr_lookup.offset,
-				    remote_dev, window->nr_lookup *
-				    sizeof(*window->dma_addr_lookup.lookup),
-				    GFP_KERNEL | __GFP_ZERO);
-	if (!window->dma_addr_lookup.lookup) {
-		err = -ENOMEM;
-		goto error_window;
-	}
-
-	window->num_pages_lookup.lookup =
-		scif_alloc_coherent(&window->num_pages_lookup.offset,
-				    remote_dev, window->nr_lookup *
-				    sizeof(*window->num_pages_lookup.lookup),
-				    GFP_KERNEL | __GFP_ZERO);
-	if (!window->num_pages_lookup.lookup) {
-		err = -ENOMEM;
-		goto error_window;
-	}
-
-	vmalloc_dma_phys = is_vmalloc_addr(&window->dma_addr[0]);
-	vmalloc_num_pages = is_vmalloc_addr(&window->num_pages[0]);
-
-	/* Now map each of the pages containing physical addresses */
-	for (i = 0, j = 0; i < nr_pages; i += SCIF_NR_ADDR_IN_PAGE, j++) {
-		err = scif_map_page(&window->dma_addr_lookup.lookup[j],
-				    vmalloc_dma_phys ?
-				    vmalloc_to_page(&window->dma_addr[i]) :
-				    virt_to_page(&window->dma_addr[i]),
-				    remote_dev);
-		if (err)
-			goto error_window;
-		err = scif_map_page(&window->num_pages_lookup.lookup[j],
-				    vmalloc_num_pages ?
-				    vmalloc_to_page(&window->num_pages[i]) :
-				    virt_to_page(&window->num_pages[i]),
-				    remote_dev);
-		if (err)
-			goto error_window;
-	}
-	return 0;
-error_window:
-	return err;
-}
-
-/**
- * scif_destroy_remote_lookup:
- * @remote_dev: SCIF remote device
- * @window: remote window
- *
- * Destroy lookup entries used for the remote
- * end to copy over the physical addresses.
- */
-static void scif_destroy_remote_lookup(struct scif_dev *remote_dev,
-				       struct scif_window *window)
-{
-	int i, j;
-
-	if (window->nr_lookup) {
-		struct scif_rma_lookup *lup = &window->dma_addr_lookup;
-		struct scif_rma_lookup *npup = &window->num_pages_lookup;
-
-		for (i = 0, j = 0; i < window->nr_pages;
-			i += SCIF_NR_ADDR_IN_PAGE, j++) {
-			if (lup->lookup && lup->lookup[j])
-				scif_unmap_single(lup->lookup[j],
-						  remote_dev,
-						  PAGE_SIZE);
-			if (npup->lookup && npup->lookup[j])
-				scif_unmap_single(npup->lookup[j],
-						  remote_dev,
-						  PAGE_SIZE);
-		}
-		if (lup->lookup)
-			scif_free_coherent(lup->lookup, lup->offset,
-					   remote_dev, window->nr_lookup *
-					   sizeof(*lup->lookup));
-		if (npup->lookup)
-			scif_free_coherent(npup->lookup, npup->offset,
-					   remote_dev, window->nr_lookup *
-					   sizeof(*npup->lookup));
-		if (window->mapped_offset)
-			scif_unmap_single(window->mapped_offset,
-					  remote_dev, sizeof(*window));
-		window->nr_lookup = 0;
-	}
-}
-
-/**
- * scif_create_remote_window:
- * @scifdev:  SCIF device
- * @nr_pages: number of pages in window
- *
- * Allocate and prepare a remote registration window.
- */
-static struct scif_window *
-scif_create_remote_window(struct scif_dev *scifdev, int nr_pages)
-{
-	struct scif_window *window;
-
-	might_sleep();
-	window = scif_zalloc(sizeof(*window));
-	if (!window)
-		goto error_ret;
-
-	window->magic = SCIFEP_MAGIC;
-	window->nr_pages = nr_pages;
-
-	window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
-	if (!window->dma_addr)
-		goto error_window;
-
-	window->num_pages = scif_zalloc(nr_pages *
-					sizeof(*window->num_pages));
-	if (!window->num_pages)
-		goto error_window;
-
-	if (scif_create_remote_lookup(scifdev, window))
-		goto error_window;
-
-	window->type = SCIF_WINDOW_PEER;
-	window->unreg_state = OP_IDLE;
-	INIT_LIST_HEAD(&window->list);
-	return window;
-error_window:
-	scif_destroy_remote_window(window);
-error_ret:
-	return NULL;
-}
-
-/**
- * scif_destroy_remote_window:
- * @window: remote registration window
- *
- * Deallocate resources for remote window.
- */
-void
-scif_destroy_remote_window(struct scif_window *window)
-{
-	scif_free(window->dma_addr, window->nr_pages *
-		  sizeof(*window->dma_addr));
-	scif_free(window->num_pages, window->nr_pages *
-		  sizeof(*window->num_pages));
-	window->magic = 0;
-	scif_free(window, sizeof(*window));
-}
-
-/**
- * scif_iommu_map: create DMA mappings if the IOMMU is enabled
- * @remote_dev: SCIF remote device
- * @window: remote registration window
- *
- * Map the physical pages using dma_map_sg(..) and then detect the number
- * of contiguous DMA mappings allocated
- */
-static int scif_iommu_map(struct scif_dev *remote_dev,
-			  struct scif_window *window)
-{
-	struct scatterlist *sg;
-	int i, err;
-	scif_pinned_pages_t pin = window->pinned_pages;
-
-	window->st = kzalloc(sizeof(*window->st), GFP_KERNEL);
-	if (!window->st)
-		return -ENOMEM;
-
-	err = sg_alloc_table(window->st, window->nr_pages, GFP_KERNEL);
-	if (err)
-		return err;
-
-	for_each_sg(window->st->sgl, sg, window->st->nents, i)
-		sg_set_page(sg, pin->pages[i], PAGE_SIZE, 0x0);
-
-	err = dma_map_sg(&remote_dev->sdev->dev, window->st->sgl,
-			 window->st->nents, DMA_BIDIRECTIONAL);
-	if (!err)
-		return -ENOMEM;
-	/* Detect contiguous ranges of DMA mappings */
-	sg = window->st->sgl;
-	for (i = 0; sg; i++) {
-		dma_addr_t last_da;
-
-		window->dma_addr[i] = sg_dma_address(sg);
-		window->num_pages[i] = sg_dma_len(sg) >> PAGE_SHIFT;
-		last_da = sg_dma_address(sg) + sg_dma_len(sg);
-		while ((sg = sg_next(sg)) && sg_dma_address(sg) == last_da) {
-			window->num_pages[i] +=
-				(sg_dma_len(sg) >> PAGE_SHIFT);
-			last_da = window->dma_addr[i] +
-				sg_dma_len(sg);
-		}
-		window->nr_contig_chunks++;
-	}
-	return 0;
-}
-
-/**
- * scif_map_window:
- * @remote_dev: SCIF remote device
- * @window: self registration window
- *
- * Map pages of a window into the aperture/PCI.
- * Also determine addresses required for DMA.
- */
-int
-scif_map_window(struct scif_dev *remote_dev, struct scif_window *window)
-{
-	int i, j, k, err = 0, nr_contig_pages;
-	scif_pinned_pages_t pin;
-	phys_addr_t phys_prev, phys_curr;
-
-	might_sleep();
-
-	pin = window->pinned_pages;
-
-	if (intel_iommu_enabled && !scifdev_self(remote_dev))
-		return scif_iommu_map(remote_dev, window);
-
-	for (i = 0, j = 0; i < window->nr_pages; i += nr_contig_pages, j++) {
-		phys_prev = page_to_phys(pin->pages[i]);
-		nr_contig_pages = 1;
-
-		/* Detect physically contiguous chunks */
-		for (k = i + 1; k < window->nr_pages; k++) {
-			phys_curr = page_to_phys(pin->pages[k]);
-			if (phys_curr != (phys_prev + PAGE_SIZE))
-				break;
-			phys_prev = phys_curr;
-			nr_contig_pages++;
-		}
-		window->num_pages[j] = nr_contig_pages;
-		window->nr_contig_chunks++;
-		if (scif_is_mgmt_node()) {
-			/*
-			 * Management node has to deal with SMPT on X100 and
-			 * hence the DMA mapping is required
-			 */
-			err = scif_map_single(&window->dma_addr[j],
-					      phys_to_virt(page_to_phys(
-							   pin->pages[i])),
-					      remote_dev,
-					      nr_contig_pages << PAGE_SHIFT);
-			if (err)
-				return err;
-		} else {
-			window->dma_addr[j] = page_to_phys(pin->pages[i]);
-		}
-	}
-	return err;
-}
-
-/**
- * scif_send_scif_unregister:
- * @ep: end point
- * @window: self registration window
- *
- * Send a SCIF_UNREGISTER message.
- */
-static int scif_send_scif_unregister(struct scif_endpt *ep,
-				     struct scif_window *window)
-{
-	struct scifmsg msg;
-
-	msg.uop = SCIF_UNREGISTER;
-	msg.src = ep->port;
-	msg.payload[0] = window->alloc_handle.vaddr;
-	msg.payload[1] = (u64)window;
-	return scif_nodeqp_send(ep->remote_dev, &msg);
-}
-
-/**
- * scif_unregister_window:
- * @window: self registration window
- *
- * Send an unregistration request and wait for a response.
- */
-int scif_unregister_window(struct scif_window *window)
-{
-	int err = 0;
-	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
-	bool send_msg = false;
-
-	might_sleep();
-	switch (window->unreg_state) {
-	case OP_IDLE:
-	{
-		window->unreg_state = OP_IN_PROGRESS;
-		send_msg = true;
-	}
-		fallthrough;
-	case OP_IN_PROGRESS:
-	{
-		scif_get_window(window, 1);
-		mutex_unlock(&ep->rma_info.rma_lock);
-		if (send_msg) {
-			err = scif_send_scif_unregister(ep, window);
-			if (err) {
-				window->unreg_state = OP_COMPLETED;
-				goto done;
-			}
-		} else {
-			/* Return ENXIO since unregistration is in progress */
-			mutex_lock(&ep->rma_info.rma_lock);
-			return -ENXIO;
-		}
-retry:
-		/* Wait for a SCIF_UNREGISTER_(N)ACK message */
-		err = wait_event_timeout(window->unregwq,
-					 window->unreg_state != OP_IN_PROGRESS,
-					 SCIF_NODE_ALIVE_TIMEOUT);
-		if (!err && scifdev_alive(ep))
-			goto retry;
-		if (!err) {
-			err = -ENODEV;
-			window->unreg_state = OP_COMPLETED;
-			dev_err(scif_info.mdev.this_device,
-				"%s %d err %d\n", __func__, __LINE__, err);
-		}
-		if (err > 0)
-			err = 0;
-done:
-		mutex_lock(&ep->rma_info.rma_lock);
-		scif_put_window(window, 1);
-		break;
-	}
-	case OP_FAILED:
-	{
-		if (!scifdev_alive(ep)) {
-			err = -ENODEV;
-			window->unreg_state = OP_COMPLETED;
-		}
-		break;
-	}
-	case OP_COMPLETED:
-		break;
-	default:
-		err = -ENODEV;
-	}
-
-	if (window->unreg_state == OP_COMPLETED && window->ref_count)
-		scif_put_window(window, window->nr_pages);
-
-	if (!window->ref_count) {
-		atomic_inc(&ep->rma_info.tw_refcount);
-		list_del_init(&window->list);
-		scif_free_window_offset(ep, window, window->offset);
-		mutex_unlock(&ep->rma_info.rma_lock);
-		if ((!!(window->pinned_pages->map_flags & SCIF_MAP_KERNEL)) &&
-		    scifdev_alive(ep)) {
-			scif_drain_dma_intr(ep->remote_dev->sdev,
-					    ep->rma_info.dma_chan);
-		} else {
-			if (!__scif_dec_pinned_vm_lock(window->mm,
-						       window->nr_pages)) {
-				__scif_release_mm(window->mm);
-				window->mm = NULL;
-			}
-		}
-		scif_queue_for_cleanup(window, &scif_info.rma);
-		mutex_lock(&ep->rma_info.rma_lock);
-	}
-	return err;
-}
-
-/**
- * scif_send_alloc_request:
- * @ep: end point
- * @window: self registration window
- *
- * Send a remote window allocation request
- */
-static int scif_send_alloc_request(struct scif_endpt *ep,
-				   struct scif_window *window)
-{
-	struct scifmsg msg;
-	struct scif_allocmsg *alloc = &window->alloc_handle;
-
-	/* Set up the Alloc Handle */
-	alloc->state = OP_IN_PROGRESS;
-	init_waitqueue_head(&alloc->allocwq);
-
-	/* Send out an allocation request */
-	msg.uop = SCIF_ALLOC_REQ;
-	msg.payload[1] = window->nr_pages;
-	msg.payload[2] = (u64)&window->alloc_handle;
-	return _scif_nodeqp_send(ep->remote_dev, &msg);
-}
-
-/**
- * scif_prep_remote_window:
- * @ep: end point
- * @window: self registration window
- *
- * Send a remote window allocation request, wait for an allocation response,
- * and prepares the remote window by copying over the page lists
- */
-static int scif_prep_remote_window(struct scif_endpt *ep,
-				   struct scif_window *window)
-{
-	struct scifmsg msg;
-	struct scif_window *remote_window;
-	struct scif_allocmsg *alloc = &window->alloc_handle;
-	dma_addr_t *dma_phys_lookup, *tmp, *num_pages_lookup, *tmp1;
-	int i = 0, j = 0;
-	int nr_contig_chunks, loop_nr_contig_chunks;
-	int remaining_nr_contig_chunks, nr_lookup;
-	int err, map_err;
-
-	map_err = scif_map_window(ep->remote_dev, window);
-	if (map_err)
-		dev_err(&ep->remote_dev->sdev->dev,
-			"%s %d map_err %d\n", __func__, __LINE__, map_err);
-	remaining_nr_contig_chunks = window->nr_contig_chunks;
-	nr_contig_chunks = window->nr_contig_chunks;
-retry:
-	/* Wait for a SCIF_ALLOC_GNT/REJ message */
-	err = wait_event_timeout(alloc->allocwq,
-				 alloc->state != OP_IN_PROGRESS,
-				 SCIF_NODE_ALIVE_TIMEOUT);
-	mutex_lock(&ep->rma_info.rma_lock);
-	/* Synchronize with the thread waking up allocwq */
-	mutex_unlock(&ep->rma_info.rma_lock);
-	if (!err && scifdev_alive(ep))
-		goto retry;
-
-	if (!err)
-		err = -ENODEV;
-
-	if (err > 0)
-		err = 0;
-	else
-		return err;
-
-	/* Bail out. The remote end rejected this request */
-	if (alloc->state == OP_FAILED)
-		return -ENOMEM;
-
-	if (map_err) {
-		dev_err(&ep->remote_dev->sdev->dev,
-			"%s %d err %d\n", __func__, __LINE__, map_err);
-		msg.uop = SCIF_FREE_VIRT;
-		msg.src = ep->port;
-		msg.payload[0] = ep->remote_ep;
-		msg.payload[1] = window->alloc_handle.vaddr;
-		msg.payload[2] = (u64)window;
-		msg.payload[3] = SCIF_REGISTER;
-		spin_lock(&ep->lock);
-		if (ep->state == SCIFEP_CONNECTED)
-			err = _scif_nodeqp_send(ep->remote_dev, &msg);
-		else
-			err = -ENOTCONN;
-		spin_unlock(&ep->lock);
-		return err;
-	}
-
-	remote_window = scif_ioremap(alloc->phys_addr, sizeof(*window),
-				     ep->remote_dev);
-
-	/* Compute the number of lookup entries. 21 == 2MB Shift */
-	nr_lookup = ALIGN(nr_contig_chunks, SCIF_NR_ADDR_IN_PAGE)
-			  >> ilog2(SCIF_NR_ADDR_IN_PAGE);
-
-	dma_phys_lookup =
-		scif_ioremap(remote_window->dma_addr_lookup.offset,
-			     nr_lookup *
-			     sizeof(*remote_window->dma_addr_lookup.lookup),
-			     ep->remote_dev);
-	num_pages_lookup =
-		scif_ioremap(remote_window->num_pages_lookup.offset,
-			     nr_lookup *
-			     sizeof(*remote_window->num_pages_lookup.lookup),
-			     ep->remote_dev);
-
-	while (remaining_nr_contig_chunks) {
-		loop_nr_contig_chunks = min_t(int, remaining_nr_contig_chunks,
-					      (int)SCIF_NR_ADDR_IN_PAGE);
-		/* #1/2 - Copy  physical addresses over to the remote side */
-
-		/* #2/2 - Copy DMA addresses (addresses that are fed into the
-		 * DMA engine) We transfer bus addresses which are then
-		 * converted into a MIC physical address on the remote
-		 * side if it is a MIC, if the remote node is a mgmt node we
-		 * transfer the MIC physical address
-		 */
-		tmp = scif_ioremap(dma_phys_lookup[j],
-				   loop_nr_contig_chunks *
-				   sizeof(*window->dma_addr),
-				   ep->remote_dev);
-		tmp1 = scif_ioremap(num_pages_lookup[j],
-				    loop_nr_contig_chunks *
-				    sizeof(*window->num_pages),
-				    ep->remote_dev);
-		if (scif_is_mgmt_node()) {
-			memcpy_toio((void __force __iomem *)tmp,
-				    &window->dma_addr[i], loop_nr_contig_chunks
-				    * sizeof(*window->dma_addr));
-			memcpy_toio((void __force __iomem *)tmp1,
-				    &window->num_pages[i], loop_nr_contig_chunks
-				    * sizeof(*window->num_pages));
-		} else {
-			if (scifdev_is_p2p(ep->remote_dev)) {
-				/*
-				 * add remote node's base address for this node
-				 * to convert it into a MIC address
-				 */
-				int m;
-				dma_addr_t dma_addr;
-
-				for (m = 0; m < loop_nr_contig_chunks; m++) {
-					dma_addr = window->dma_addr[i + m] +
-						ep->remote_dev->base_addr;
-					writeq(dma_addr,
-					       (void __force __iomem *)&tmp[m]);
-				}
-				memcpy_toio((void __force __iomem *)tmp1,
-					    &window->num_pages[i],
-					    loop_nr_contig_chunks
-					    * sizeof(*window->num_pages));
-			} else {
-				/* Mgmt node or loopback - transfer DMA
-				 * addresses as is, this is the same as a
-				 * MIC physical address (we use the dma_addr
-				 * and not the phys_addr array since the
-				 * phys_addr is only setup if there is a mmap()
-				 * request from the mgmt node)
-				 */
-				memcpy_toio((void __force __iomem *)tmp,
-					    &window->dma_addr[i],
-					    loop_nr_contig_chunks *
-					    sizeof(*window->dma_addr));
-				memcpy_toio((void __force __iomem *)tmp1,
-					    &window->num_pages[i],
-					    loop_nr_contig_chunks *
-					    sizeof(*window->num_pages));
-			}
-		}
-		remaining_nr_contig_chunks -= loop_nr_contig_chunks;
-		i += loop_nr_contig_chunks;
-		j++;
-		scif_iounmap(tmp, loop_nr_contig_chunks *
-			     sizeof(*window->dma_addr), ep->remote_dev);
-		scif_iounmap(tmp1, loop_nr_contig_chunks *
-			     sizeof(*window->num_pages), ep->remote_dev);
-	}
-
-	/* Prepare the remote window for the peer */
-	remote_window->peer_window = (u64)window;
-	remote_window->offset = window->offset;
-	remote_window->prot = window->prot;
-	remote_window->nr_contig_chunks = nr_contig_chunks;
-	remote_window->ep = ep->remote_ep;
-	scif_iounmap(num_pages_lookup,
-		     nr_lookup *
-		     sizeof(*remote_window->num_pages_lookup.lookup),
-		     ep->remote_dev);
-	scif_iounmap(dma_phys_lookup,
-		     nr_lookup *
-		     sizeof(*remote_window->dma_addr_lookup.lookup),
-		     ep->remote_dev);
-	scif_iounmap(remote_window, sizeof(*remote_window), ep->remote_dev);
-	window->peer_window = alloc->vaddr;
-	return err;
-}
-
-/**
- * scif_send_scif_register:
- * @ep: end point
- * @window: self registration window
- *
- * Send a SCIF_REGISTER message if EP is connected and wait for a
- * SCIF_REGISTER_(N)ACK message else send a SCIF_FREE_VIRT
- * message so that the peer can free its remote window allocated earlier.
- */
-static int scif_send_scif_register(struct scif_endpt *ep,
-				   struct scif_window *window)
-{
-	int err = 0;
-	struct scifmsg msg;
-
-	msg.src = ep->port;
-	msg.payload[0] = ep->remote_ep;
-	msg.payload[1] = window->alloc_handle.vaddr;
-	msg.payload[2] = (u64)window;
-	spin_lock(&ep->lock);
-	if (ep->state == SCIFEP_CONNECTED) {
-		msg.uop = SCIF_REGISTER;
-		window->reg_state = OP_IN_PROGRESS;
-		err = _scif_nodeqp_send(ep->remote_dev, &msg);
-		spin_unlock(&ep->lock);
-		if (!err) {
-retry:
-			/* Wait for a SCIF_REGISTER_(N)ACK message */
-			err = wait_event_timeout(window->regwq,
-						 window->reg_state !=
-						 OP_IN_PROGRESS,
-						 SCIF_NODE_ALIVE_TIMEOUT);
-			if (!err && scifdev_alive(ep))
-				goto retry;
-			err = !err ? -ENODEV : 0;
-			if (window->reg_state == OP_FAILED)
-				err = -ENOTCONN;
-		}
-	} else {
-		msg.uop = SCIF_FREE_VIRT;
-		msg.payload[3] = SCIF_REGISTER;
-		err = _scif_nodeqp_send(ep->remote_dev, &msg);
-		spin_unlock(&ep->lock);
-		if (!err)
-			err = -ENOTCONN;
-	}
-	return err;
-}
-
-/**
- * scif_get_window_offset:
- * @ep: end point descriptor
- * @flags: flags
- * @offset: offset hint
- * @num_pages: number of pages
- * @out_offset: computed offset returned by reference.
- *
- * Compute/Claim a new offset for this EP.
- */
-int scif_get_window_offset(struct scif_endpt *ep, int flags, s64 offset,
-			   int num_pages, s64 *out_offset)
-{
-	s64 page_index;
-	struct iova *iova_ptr;
-	int err = 0;
-
-	if (flags & SCIF_MAP_FIXED) {
-		page_index = SCIF_IOVA_PFN(offset);
-		iova_ptr = reserve_iova(&ep->rma_info.iovad, page_index,
-					page_index + num_pages - 1);
-		if (!iova_ptr)
-			err = -EADDRINUSE;
-	} else {
-		iova_ptr = alloc_iova(&ep->rma_info.iovad, num_pages,
-				      SCIF_DMA_63BIT_PFN - 1, 0);
-		if (!iova_ptr)
-			err = -ENOMEM;
-	}
-	if (!err)
-		*out_offset = (iova_ptr->pfn_lo) << PAGE_SHIFT;
-	return err;
-}
-
-/**
- * scif_free_window_offset:
- * @ep: end point descriptor
- * @window: registration window
- * @offset: Offset to be freed
- *
- * Free offset for this EP. The callee is supposed to grab
- * the RMA mutex before calling this API.
- */
-void scif_free_window_offset(struct scif_endpt *ep,
-			     struct scif_window *window, s64 offset)
-{
-	if ((window && !window->offset_freed) || !window) {
-		free_iova(&ep->rma_info.iovad, offset >> PAGE_SHIFT);
-		if (window)
-			window->offset_freed = true;
-	}
-}
-
-/**
- * scif_alloc_req: Respond to SCIF_ALLOC_REQ interrupt message
- * @scifdev:    SCIF device
- * @msg:        Interrupt message
- *
- * Remote side is requesting a memory allocation.
- */
-void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	int err;
-	struct scif_window *window = NULL;
-	int nr_pages = msg->payload[1];
-
-	window = scif_create_remote_window(scifdev, nr_pages);
-	if (!window) {
-		err = -ENOMEM;
-		goto error;
-	}
-
-	/* The peer's allocation request is granted */
-	msg->uop = SCIF_ALLOC_GNT;
-	msg->payload[0] = (u64)window;
-	msg->payload[1] = window->mapped_offset;
-	err = scif_nodeqp_send(scifdev, msg);
-	if (err)
-		scif_destroy_remote_window(window);
-	return;
-error:
-	/* The peer's allocation request is rejected */
-	dev_err(&scifdev->sdev->dev,
-		"%s %d error %d alloc_ptr %p nr_pages 0x%x\n",
-		__func__, __LINE__, err, window, nr_pages);
-	msg->uop = SCIF_ALLOC_REJ;
-	scif_nodeqp_send(scifdev, msg);
-}
-
-/**
- * scif_alloc_gnt_rej: Respond to SCIF_ALLOC_GNT/REJ interrupt message
- * @scifdev:    SCIF device
- * @msg:        Interrupt message
- *
- * Remote side responded to a memory allocation.
- */
-void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_allocmsg *handle = (struct scif_allocmsg *)msg->payload[2];
-	struct scif_window *window = container_of(handle, struct scif_window,
-						  alloc_handle);
-	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
-
-	mutex_lock(&ep->rma_info.rma_lock);
-	handle->vaddr = msg->payload[0];
-	handle->phys_addr = msg->payload[1];
-	if (msg->uop == SCIF_ALLOC_GNT)
-		handle->state = OP_COMPLETED;
-	else
-		handle->state = OP_FAILED;
-	wake_up(&handle->allocwq);
-	mutex_unlock(&ep->rma_info.rma_lock);
-}
-
-/**
- * scif_free_virt: Respond to SCIF_FREE_VIRT interrupt message
- * @scifdev:    SCIF device
- * @msg:        Interrupt message
- *
- * Free up memory kmalloc'd earlier.
- */
-void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_window *window = (struct scif_window *)msg->payload[1];
-
-	scif_destroy_remote_window(window);
-}
-
-static void
-scif_fixup_aper_base(struct scif_dev *dev, struct scif_window *window)
-{
-	int j;
-	struct scif_hw_dev *sdev = dev->sdev;
-	phys_addr_t apt_base = 0;
-
-	/*
-	 * Add the aperture base if the DMA address is not card relative
-	 * since the DMA addresses need to be an offset into the bar
-	 */
-	if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER &&
-	    sdev->aper && !sdev->card_rel_da)
-		apt_base = sdev->aper->pa;
-	else
-		return;
-
-	for (j = 0; j < window->nr_contig_chunks; j++) {
-		if (window->num_pages[j])
-			window->dma_addr[j] += apt_base;
-		else
-			break;
-	}
-}
-
-/**
- * scif_recv_reg: Respond to SCIF_REGISTER interrupt message
- * @scifdev:    SCIF device
- * @msg:        Interrupt message
- *
- * Update remote window list with a new registered window.
- */
-void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
-	struct scif_window *window =
-		(struct scif_window *)msg->payload[1];
-
-	mutex_lock(&ep->rma_info.rma_lock);
-	spin_lock(&ep->lock);
-	if (ep->state == SCIFEP_CONNECTED) {
-		msg->uop = SCIF_REGISTER_ACK;
-		scif_nodeqp_send(ep->remote_dev, msg);
-		scif_fixup_aper_base(ep->remote_dev, window);
-		/* No further failures expected. Insert new window */
-		scif_insert_window(window, &ep->rma_info.remote_reg_list);
-	} else {
-		msg->uop = SCIF_REGISTER_NACK;
-		scif_nodeqp_send(ep->remote_dev, msg);
-	}
-	spin_unlock(&ep->lock);
-	mutex_unlock(&ep->rma_info.rma_lock);
-	/* free up any lookup resources now that page lists are transferred */
-	scif_destroy_remote_lookup(ep->remote_dev, window);
-	/*
-	 * We could not insert the window but we need to
-	 * destroy the window.
-	 */
-	if (msg->uop == SCIF_REGISTER_NACK)
-		scif_destroy_remote_window(window);
-}
-
-/**
- * scif_recv_unreg: Respond to SCIF_UNREGISTER interrupt message
- * @scifdev:    SCIF device
- * @msg:        Interrupt message
- *
- * Remove window from remote registration list;
- */
-void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_rma_req req;
-	struct scif_window *window = NULL;
-	struct scif_window *recv_window =
-		(struct scif_window *)msg->payload[0];
-	struct scif_endpt *ep;
-	int del_window = 0;
-
-	ep = (struct scif_endpt *)recv_window->ep;
-	req.out_window = &window;
-	req.offset = recv_window->offset;
-	req.prot = 0;
-	req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
-	req.type = SCIF_WINDOW_FULL;
-	req.head = &ep->rma_info.remote_reg_list;
-	msg->payload[0] = ep->remote_ep;
-
-	mutex_lock(&ep->rma_info.rma_lock);
-	/* Does a valid window exist? */
-	if (scif_query_window(&req)) {
-		dev_err(&scifdev->sdev->dev,
-			"%s %d -ENXIO\n", __func__, __LINE__);
-		msg->uop = SCIF_UNREGISTER_ACK;
-		goto error;
-	}
-	if (window) {
-		if (window->ref_count)
-			scif_put_window(window, window->nr_pages);
-		else
-			dev_err(&scifdev->sdev->dev,
-				"%s %d ref count should be +ve\n",
-				__func__, __LINE__);
-		window->unreg_state = OP_COMPLETED;
-		if (!window->ref_count) {
-			msg->uop = SCIF_UNREGISTER_ACK;
-			atomic_inc(&ep->rma_info.tw_refcount);
-			ep->rma_info.async_list_del = 1;
-			list_del_init(&window->list);
-			del_window = 1;
-		} else {
-			/* NACK! There are valid references to this window */
-			msg->uop = SCIF_UNREGISTER_NACK;
-		}
-	} else {
-		/* The window did not make its way to the list at all. ACK */
-		msg->uop = SCIF_UNREGISTER_ACK;
-		scif_destroy_remote_window(recv_window);
-	}
-error:
-	mutex_unlock(&ep->rma_info.rma_lock);
-	if (del_window)
-		scif_drain_dma_intr(ep->remote_dev->sdev,
-				    ep->rma_info.dma_chan);
-	scif_nodeqp_send(ep->remote_dev, msg);
-	if (del_window)
-		scif_queue_for_cleanup(window, &scif_info.rma);
-}
-
-/**
- * scif_recv_reg_ack: Respond to SCIF_REGISTER_ACK interrupt message
- * @scifdev:    SCIF device
- * @msg:        Interrupt message
- *
- * Wake up the window waiting to complete registration.
- */
-void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_window *window =
-		(struct scif_window *)msg->payload[2];
-	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
-
-	mutex_lock(&ep->rma_info.rma_lock);
-	window->reg_state = OP_COMPLETED;
-	wake_up(&window->regwq);
-	mutex_unlock(&ep->rma_info.rma_lock);
-}
-
-/**
- * scif_recv_reg_nack: Respond to SCIF_REGISTER_NACK interrupt message
- * @scifdev:    SCIF device
- * @msg:        Interrupt message
- *
- * Wake up the window waiting to inform it that registration
- * cannot be completed.
- */
-void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_window *window =
-		(struct scif_window *)msg->payload[2];
-	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
-
-	mutex_lock(&ep->rma_info.rma_lock);
-	window->reg_state = OP_FAILED;
-	wake_up(&window->regwq);
-	mutex_unlock(&ep->rma_info.rma_lock);
-}
-
-/**
- * scif_recv_unreg_ack: Respond to SCIF_UNREGISTER_ACK interrupt message
- * @scifdev:    SCIF device
- * @msg:        Interrupt message
- *
- * Wake up the window waiting to complete unregistration.
- */
-void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_window *window =
-		(struct scif_window *)msg->payload[1];
-	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
-
-	mutex_lock(&ep->rma_info.rma_lock);
-	window->unreg_state = OP_COMPLETED;
-	wake_up(&window->unregwq);
-	mutex_unlock(&ep->rma_info.rma_lock);
-}
-
-/**
- * scif_recv_unreg_nack: Respond to SCIF_UNREGISTER_NACK interrupt message
- * @scifdev:    SCIF device
- * @msg:        Interrupt message
- *
- * Wake up the window waiting to inform it that unregistration
- * cannot be completed immediately.
- */
-void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
-{
-	struct scif_window *window =
-		(struct scif_window *)msg->payload[1];
-	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
-
-	mutex_lock(&ep->rma_info.rma_lock);
-	window->unreg_state = OP_FAILED;
-	wake_up(&window->unregwq);
-	mutex_unlock(&ep->rma_info.rma_lock);
-}
-
-int __scif_pin_pages(void *addr, size_t len, int *out_prot,
-		     int map_flags, scif_pinned_pages_t *pages)
-{
-	struct scif_pinned_pages *pinned_pages;
-	int nr_pages, err = 0, i;
-	bool vmalloc_addr = false;
-	bool try_upgrade = false;
-	int prot = *out_prot;
-	int ulimit = 0;
-	struct mm_struct *mm = NULL;
-
-	/* Unsupported flags */
-	if (map_flags & ~(SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT))
-		return -EINVAL;
-	ulimit = !!(map_flags & SCIF_MAP_ULIMIT);
-
-	/* Unsupported protection requested */
-	if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
-		return -EINVAL;
-
-	/* addr/len must be page aligned. len should be non zero */
-	if (!len ||
-	    (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
-	    (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
-		return -EINVAL;
-
-	might_sleep();
-
-	nr_pages = len >> PAGE_SHIFT;
-
-	/* Allocate a set of pinned pages */
-	pinned_pages = scif_create_pinned_pages(nr_pages, prot);
-	if (!pinned_pages)
-		return -ENOMEM;
-
-	if (map_flags & SCIF_MAP_KERNEL) {
-		if (is_vmalloc_addr(addr))
-			vmalloc_addr = true;
-
-		for (i = 0; i < nr_pages; i++) {
-			if (vmalloc_addr)
-				pinned_pages->pages[i] =
-					vmalloc_to_page(addr + (i * PAGE_SIZE));
-			else
-				pinned_pages->pages[i] =
-					virt_to_page(addr + (i * PAGE_SIZE));
-		}
-		pinned_pages->nr_pages = nr_pages;
-		pinned_pages->map_flags = SCIF_MAP_KERNEL;
-	} else {
-		/*
-		 * SCIF supports registration caching. If a registration has
-		 * been requested with read only permissions, then we try
-		 * to pin the pages with RW permissions so that a subsequent
-		 * transfer with RW permission can hit the cache instead of
-		 * invalidating it. If the upgrade fails with RW then we
-		 * revert back to R permission and retry
-		 */
-		if (prot == SCIF_PROT_READ)
-			try_upgrade = true;
-		prot |= SCIF_PROT_WRITE;
-retry:
-		mm = current->mm;
-		if (ulimit) {
-			err = __scif_check_inc_pinned_vm(mm, nr_pages);
-			if (err) {
-				pinned_pages->nr_pages = 0;
-				goto error_unmap;
-			}
-		}
-
-		pinned_pages->nr_pages = pin_user_pages_fast(
-				(u64)addr,
-				nr_pages,
-				(prot & SCIF_PROT_WRITE) ? FOLL_WRITE : 0,
-				pinned_pages->pages);
-		if (nr_pages != pinned_pages->nr_pages) {
-			if (pinned_pages->nr_pages < 0)
-				pinned_pages->nr_pages = 0;
-			if (try_upgrade) {
-				if (ulimit)
-					__scif_dec_pinned_vm_lock(mm, nr_pages);
-				/* Roll back any pinned pages */
-				unpin_user_pages(pinned_pages->pages,
-						 pinned_pages->nr_pages);
-				prot &= ~SCIF_PROT_WRITE;
-				try_upgrade = false;
-				goto retry;
-			}
-		}
-		pinned_pages->map_flags = 0;
-	}
-
-	if (pinned_pages->nr_pages < nr_pages) {
-		err = -EFAULT;
-		goto dec_pinned;
-	}
-
-	*out_prot = prot;
-	atomic_set(&pinned_pages->ref_count, 1);
-	*pages = pinned_pages;
-	return err;
-dec_pinned:
-	if (ulimit)
-		__scif_dec_pinned_vm_lock(mm, nr_pages);
-	/* Something went wrong! Rollback */
-error_unmap:
-	scif_destroy_pinned_pages(pinned_pages);
-	*pages = NULL;
-	dev_dbg(scif_info.mdev.this_device,
-		"%s %d err %d len 0x%lx\n", __func__, __LINE__, err, len);
-	return err;
-}
-
-int scif_pin_pages(void *addr, size_t len, int prot,
-		   int map_flags, scif_pinned_pages_t *pages)
-{
-	return __scif_pin_pages(addr, len, &prot, map_flags, pages);
-}
-EXPORT_SYMBOL_GPL(scif_pin_pages);
-
-int scif_unpin_pages(scif_pinned_pages_t pinned_pages)
-{
-	int err = 0, ret;
-
-	if (!pinned_pages || SCIFEP_MAGIC != pinned_pages->magic)
-		return -EINVAL;
-
-	ret = atomic_sub_return(1, &pinned_pages->ref_count);
-	if (ret < 0) {
-		dev_err(scif_info.mdev.this_device,
-			"%s %d scif_unpin_pages called without pinning? rc %d\n",
-			__func__, __LINE__, ret);
-		return -EINVAL;
-	}
-	/*
-	 * Destroy the window if the ref count for this set of pinned
-	 * pages has dropped to zero. If it is positive then there is
-	 * a valid registered window which is backed by these pages and
-	 * it will be destroyed once all such windows are unregistered.
-	 */
-	if (!ret)
-		err = scif_destroy_pinned_pages(pinned_pages);
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(scif_unpin_pages);
-
-static inline void
-scif_insert_local_window(struct scif_window *window, struct scif_endpt *ep)
-{
-	mutex_lock(&ep->rma_info.rma_lock);
-	scif_insert_window(window, &ep->rma_info.reg_list);
-	mutex_unlock(&ep->rma_info.rma_lock);
-}
-
-off_t scif_register_pinned_pages(scif_epd_t epd,
-				 scif_pinned_pages_t pinned_pages,
-				 off_t offset, int map_flags)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-	s64 computed_offset;
-	struct scif_window *window;
-	int err;
-	size_t len;
-	struct device *spdev;
-
-	/* Unsupported flags */
-	if (map_flags & ~SCIF_MAP_FIXED)
-		return -EINVAL;
-
-	len = pinned_pages->nr_pages << PAGE_SHIFT;
-
-	/*
-	 * Offset is not page aligned/negative or offset+len
-	 * wraps around with SCIF_MAP_FIXED.
-	 */
-	if ((map_flags & SCIF_MAP_FIXED) &&
-	    ((ALIGN(offset, PAGE_SIZE) != offset) ||
-	    (offset < 0) ||
-	    (len > LONG_MAX - offset)))
-		return -EINVAL;
-
-	might_sleep();
-
-	err = scif_verify_epd(ep);
-	if (err)
-		return err;
-	/*
-	 * It is an error to pass pinned_pages to scif_register_pinned_pages()
-	 * after calling scif_unpin_pages().
-	 */
-	if (!atomic_add_unless(&pinned_pages->ref_count, 1, 0))
-		return -EINVAL;
-
-	/* Compute the offset for this registration */
-	err = scif_get_window_offset(ep, map_flags, offset,
-				     len, &computed_offset);
-	if (err) {
-		atomic_sub(1, &pinned_pages->ref_count);
-		return err;
-	}
-
-	/* Allocate and prepare self registration window */
-	window = scif_create_window(ep, pinned_pages->nr_pages,
-				    computed_offset, false);
-	if (!window) {
-		atomic_sub(1, &pinned_pages->ref_count);
-		scif_free_window_offset(ep, NULL, computed_offset);
-		return -ENOMEM;
-	}
-
-	window->pinned_pages = pinned_pages;
-	window->nr_pages = pinned_pages->nr_pages;
-	window->prot = pinned_pages->prot;
-
-	spdev = scif_get_peer_dev(ep->remote_dev);
-	if (IS_ERR(spdev)) {
-		err = PTR_ERR(spdev);
-		scif_destroy_window(ep, window);
-		return err;
-	}
-	err = scif_send_alloc_request(ep, window);
-	if (err) {
-		dev_err(&ep->remote_dev->sdev->dev,
-			"%s %d err %d\n", __func__, __LINE__, err);
-		goto error_unmap;
-	}
-
-	/* Prepare the remote registration window */
-	err = scif_prep_remote_window(ep, window);
-	if (err) {
-		dev_err(&ep->remote_dev->sdev->dev,
-			"%s %d err %d\n", __func__, __LINE__, err);
-		goto error_unmap;
-	}
-
-	/* Tell the peer about the new window */
-	err = scif_send_scif_register(ep, window);
-	if (err) {
-		dev_err(&ep->remote_dev->sdev->dev,
-			"%s %d err %d\n", __func__, __LINE__, err);
-		goto error_unmap;
-	}
-
-	scif_put_peer_dev(spdev);
-	/* No further failures expected. Insert new window */
-	scif_insert_local_window(window, ep);
-	return computed_offset;
-error_unmap:
-	scif_destroy_window(ep, window);
-	scif_put_peer_dev(spdev);
-	dev_err(&ep->remote_dev->sdev->dev,
-		"%s %d err %d\n", __func__, __LINE__, err);
-	return err;
-}
-EXPORT_SYMBOL_GPL(scif_register_pinned_pages);
-
-off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
-		    int prot, int map_flags)
-{
-	scif_pinned_pages_t pinned_pages;
-	off_t err;
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-	s64 computed_offset;
-	struct scif_window *window;
-	struct mm_struct *mm = NULL;
-	struct device *spdev;
-
-	dev_dbg(scif_info.mdev.this_device,
-		"SCIFAPI register: ep %p addr %p len 0x%lx offset 0x%lx prot 0x%x map_flags 0x%x\n",
-		epd, addr, len, offset, prot, map_flags);
-	/* Unsupported flags */
-	if (map_flags & ~(SCIF_MAP_FIXED | SCIF_MAP_KERNEL))
-		return -EINVAL;
-
-	/*
-	 * Offset is not page aligned/negative or offset+len
-	 * wraps around with SCIF_MAP_FIXED.
-	 */
-	if ((map_flags & SCIF_MAP_FIXED) &&
-	    ((ALIGN(offset, PAGE_SIZE) != offset) ||
-	    (offset < 0) ||
-	    (len > LONG_MAX - offset)))
-		return -EINVAL;
-
-	/* Unsupported protection requested */
-	if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
-		return -EINVAL;
-
-	/* addr/len must be page aligned. len should be non zero */
-	if (!len || (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
-	    (ALIGN(len, PAGE_SIZE) != len))
-		return -EINVAL;
-
-	might_sleep();
-
-	err = scif_verify_epd(ep);
-	if (err)
-		return err;
-
-	/* Compute the offset for this registration */
-	err = scif_get_window_offset(ep, map_flags, offset,
-				     len >> PAGE_SHIFT, &computed_offset);
-	if (err)
-		return err;
-
-	spdev = scif_get_peer_dev(ep->remote_dev);
-	if (IS_ERR(spdev)) {
-		err = PTR_ERR(spdev);
-		scif_free_window_offset(ep, NULL, computed_offset);
-		return err;
-	}
-	/* Allocate and prepare self registration window */
-	window = scif_create_window(ep, len >> PAGE_SHIFT,
-				    computed_offset, false);
-	if (!window) {
-		scif_free_window_offset(ep, NULL, computed_offset);
-		scif_put_peer_dev(spdev);
-		return -ENOMEM;
-	}
-
-	window->nr_pages = len >> PAGE_SHIFT;
-
-	err = scif_send_alloc_request(ep, window);
-	if (err) {
-		scif_destroy_incomplete_window(ep, window);
-		scif_put_peer_dev(spdev);
-		return err;
-	}
-
-	if (!(map_flags & SCIF_MAP_KERNEL)) {
-		mm = __scif_acquire_mm();
-		map_flags |= SCIF_MAP_ULIMIT;
-	}
-	/* Pin down the pages */
-	err = __scif_pin_pages(addr, len, &prot,
-			       map_flags & (SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT),
-			       &pinned_pages);
-	if (err) {
-		scif_destroy_incomplete_window(ep, window);
-		__scif_release_mm(mm);
-		goto error;
-	}
-
-	window->pinned_pages = pinned_pages;
-	window->prot = pinned_pages->prot;
-	window->mm = mm;
-
-	/* Prepare the remote registration window */
-	err = scif_prep_remote_window(ep, window);
-	if (err) {
-		dev_err(&ep->remote_dev->sdev->dev,
-			"%s %d err %ld\n", __func__, __LINE__, err);
-		goto error_unmap;
-	}
-
-	/* Tell the peer about the new window */
-	err = scif_send_scif_register(ep, window);
-	if (err) {
-		dev_err(&ep->remote_dev->sdev->dev,
-			"%s %d err %ld\n", __func__, __LINE__, err);
-		goto error_unmap;
-	}
-
-	scif_put_peer_dev(spdev);
-	/* No further failures expected. Insert new window */
-	scif_insert_local_window(window, ep);
-	dev_dbg(&ep->remote_dev->sdev->dev,
-		"SCIFAPI register: ep %p addr %p len 0x%lx computed_offset 0x%llx\n",
-		epd, addr, len, computed_offset);
-	return computed_offset;
-error_unmap:
-	scif_destroy_window(ep, window);
-error:
-	scif_put_peer_dev(spdev);
-	dev_err(&ep->remote_dev->sdev->dev,
-		"%s %d err %ld\n", __func__, __LINE__, err);
-	return err;
-}
-EXPORT_SYMBOL_GPL(scif_register);
-
-int
-scif_unregister(scif_epd_t epd, off_t offset, size_t len)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-	struct scif_window *window = NULL;
-	struct scif_rma_req req;
-	int nr_pages, err;
-	struct device *spdev;
-
-	dev_dbg(scif_info.mdev.this_device,
-		"SCIFAPI unregister: ep %p offset 0x%lx len 0x%lx\n",
-		ep, offset, len);
-	/* len must be page aligned. len should be non zero */
-	if (!len ||
-	    (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
-		return -EINVAL;
-
-	/* Offset is not page aligned or offset+len wraps around */
-	if ((ALIGN(offset, PAGE_SIZE) != offset) ||
-	    (offset < 0) ||
-	    (len > LONG_MAX - offset))
-		return -EINVAL;
-
-	err = scif_verify_epd(ep);
-	if (err)
-		return err;
-
-	might_sleep();
-	nr_pages = len >> PAGE_SHIFT;
-
-	req.out_window = &window;
-	req.offset = offset;
-	req.prot = 0;
-	req.nr_bytes = len;
-	req.type = SCIF_WINDOW_FULL;
-	req.head = &ep->rma_info.reg_list;
-
-	spdev = scif_get_peer_dev(ep->remote_dev);
-	if (IS_ERR(spdev)) {
-		err = PTR_ERR(spdev);
-		return err;
-	}
-	mutex_lock(&ep->rma_info.rma_lock);
-	/* Does a valid window exist? */
-	err = scif_query_window(&req);
-	if (err) {
-		dev_err(&ep->remote_dev->sdev->dev,
-			"%s %d err %d\n", __func__, __LINE__, err);
-		goto error;
-	}
-	/* Unregister all the windows in this range */
-	err = scif_rma_list_unregister(window, offset, nr_pages);
-	if (err)
-		dev_err(&ep->remote_dev->sdev->dev,
-			"%s %d err %d\n", __func__, __LINE__, err);
-error:
-	mutex_unlock(&ep->rma_info.rma_lock);
-	scif_put_peer_dev(spdev);
-	return err;
-}
-EXPORT_SYMBOL_GPL(scif_unregister);
diff --git a/drivers/misc/mic/scif/scif_rma.h b/drivers/misc/mic/scif/scif_rma.h
deleted file mode 100644
index 964dd0fc3657..000000000000
--- a/drivers/misc/mic/scif/scif_rma.h
+++ /dev/null
@@ -1,477 +0,0 @@
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in
- *   the documentation and/or other materials provided with the
- *   distribution.
- * * Neither the name of Intel Corporation nor the names of its
- *   contributors may be used to endorse or promote products derived
- *   from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Intel SCIF driver.
- *
- */
-#ifndef SCIF_RMA_H
-#define SCIF_RMA_H
-
-#include <linux/intel-iommu.h>
-#include <linux/mmu_notifier.h>
-
-#include "../bus/scif_bus.h"
-
-/* If this bit is set then the mark is a remote fence mark */
-#define SCIF_REMOTE_FENCE_BIT          31
-/* Magic value used to indicate a remote fence request */
-#define SCIF_REMOTE_FENCE BIT_ULL(SCIF_REMOTE_FENCE_BIT)
-
-#define SCIF_MAX_UNALIGNED_BUF_SIZE (1024 * 1024ULL)
-#define SCIF_KMEM_UNALIGNED_BUF_SIZE (SCIF_MAX_UNALIGNED_BUF_SIZE + \
-				      (L1_CACHE_BYTES << 1))
-
-#define SCIF_IOVA_START_PFN		(1)
-#define SCIF_IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
-#define SCIF_DMA_64BIT_PFN SCIF_IOVA_PFN(DMA_BIT_MASK(64))
-#define SCIF_DMA_63BIT_PFN SCIF_IOVA_PFN(DMA_BIT_MASK(63))
-
-/*
- * struct scif_endpt_rma_info - Per Endpoint Remote Memory Access Information
- *
- * @reg_list: List of registration windows for self
- * @remote_reg_list: List of registration windows for peer
- * @iovad: Offset generator
- * @rma_lock: Synchronizes access to self/remote list and also protects the
- *	      window from being destroyed while RMAs are in progress.
- * @tc_lock: Synchronizes access to temporary cached windows list
- *	     for SCIF Registration Caching.
- * @mmn_lock: Synchronizes access to the list of MMU notifiers registered
- * @tw_refcount: Keeps track of number of outstanding temporary registered
- *		 windows created by scif_vreadfrom/scif_vwriteto which have
- *		 not been destroyed.
- * @tcw_refcount: Same as tw_refcount but for temporary cached windows
- * @tcw_total_pages: Same as tcw_refcount but in terms of pages pinned
- * @mmn_list: MMU notifier so that we can destroy the windows when required
- * @fence_refcount: Keeps track of number of outstanding remote fence
- *		    requests which have been received by the peer.
- * @dma_chan: DMA channel used for all DMA transfers for this endpoint.
- * @async_list_del: Detect asynchronous list entry deletion
- * @vma_list: List of vmas with remote memory mappings
- * @markwq: Wait queue used for scif_fence_mark/scif_fence_wait
-*/
-struct scif_endpt_rma_info {
-	struct list_head reg_list;
-	struct list_head remote_reg_list;
-	struct iova_domain iovad;
-	struct mutex rma_lock;
-	spinlock_t tc_lock;
-	struct mutex mmn_lock;
-	atomic_t tw_refcount;
-	atomic_t tcw_refcount;
-	atomic_t tcw_total_pages;
-	struct list_head mmn_list;
-	atomic_t fence_refcount;
-	struct dma_chan	*dma_chan;
-	int async_list_del;
-	struct list_head vma_list;
-	wait_queue_head_t markwq;
-};
-
-/*
- * struct scif_fence_info - used for tracking fence requests
- *
- * @state: State of this transfer
- * @wq: Fences wait on this queue
- * @dma_mark: Used for storing the DMA mark
- */
-struct scif_fence_info {
-	enum scif_msg_state state;
-	struct completion comp;
-	int dma_mark;
-};
-
-/*
- * struct scif_remote_fence_info - used for tracking remote fence requests
- *
- * @msg: List of SCIF node QP fence messages
- * @list: Link to list of remote fence requests
- */
-struct scif_remote_fence_info {
-	struct scifmsg msg;
-	struct list_head list;
-};
-
-/*
- * Specifies whether an RMA operation can span across partial windows, a single
- * window or multiple contiguous windows. Mmaps can span across partial windows.
- * Unregistration can span across complete windows. scif_get_pages() can span a
- * single window. A window can also be of type self or peer.
- */
-enum scif_window_type {
-	SCIF_WINDOW_PARTIAL,
-	SCIF_WINDOW_SINGLE,
-	SCIF_WINDOW_FULL,
-	SCIF_WINDOW_SELF,
-	SCIF_WINDOW_PEER
-};
-
-/* The number of physical addresses that can be stored in a PAGE. */
-#define SCIF_NR_ADDR_IN_PAGE   (0x1000 >> 3)
-
-/*
- * struct scif_rma_lookup - RMA lookup data structure for page list transfers
- *
- * Store an array of lookup offsets. Each offset in this array maps
- * one 4K page containing 512 physical addresses i.e. 2MB. 512 such
- * offsets in a 4K page will correspond to 1GB of registered address space.
-
- * @lookup: Array of offsets
- * @offset: DMA offset of lookup array
- */
-struct scif_rma_lookup {
-	dma_addr_t *lookup;
-	dma_addr_t offset;
-};
-
-/*
- * struct scif_pinned_pages - A set of pinned pages obtained with
- * scif_pin_pages() which could be part of multiple registered
- * windows across different end points.
- *
- * @nr_pages: Number of pages which is defined as a s64 instead of an int
- * to avoid sign extension with buffers >= 2GB
- * @prot: read/write protections
- * @map_flags: Flags specified during the pin operation
- * @ref_count: Reference count bumped in terms of number of pages
- * @magic: A magic value
- * @pages: Array of pointers to struct pages populated with get_user_pages(..)
- */
-struct scif_pinned_pages {
-	s64 nr_pages;
-	int prot;
-	int map_flags;
-	atomic_t ref_count;
-	u64 magic;
-	struct page **pages;
-};
-
-/*
- * struct scif_status - Stores DMA status update information
- *
- * @src_dma_addr: Source buffer DMA address
- * @val: src location for value to be written to the destination
- * @ep: SCIF endpoint
- */
-struct scif_status {
-	dma_addr_t src_dma_addr;
-	u64 val;
-	struct scif_endpt *ep;
-};
-
-/*
- * struct scif_cb_arg - Stores the argument of the callback func
- *
- * @src_dma_addr: Source buffer DMA address
- * @status: DMA status
- * @ep: SCIF endpoint
- */
-struct scif_cb_arg {
-	dma_addr_t src_dma_addr;
-	struct scif_status *status;
-	struct scif_endpt *ep;
-};
-
-/*
- * struct scif_window - Registration Window for Self and Remote
- *
- * @nr_pages: Number of pages which is defined as a s64 instead of an int
- * to avoid sign extension with buffers >= 2GB
- * @nr_contig_chunks: Number of contiguous physical chunks
- * @prot: read/write protections
- * @ref_count: reference count in terms of number of pages
- * @magic: Cookie to detect corruption
- * @offset: registered offset
- * @va_for_temp: va address that this window represents
- * @dma_mark: Used to determine if all DMAs against the window are done
- * @ep: Pointer to EP. Useful for passing EP around with messages to
-	avoid expensive list traversals.
- * @list: link to list of windows for the endpoint
- * @type: self or peer window
- * @peer_window: Pointer to peer window. Useful for sending messages to peer
- *		 without requiring an extra list traversal
- * @unreg_state: unregistration state
- * @offset_freed: True if the offset has been freed
- * @temp: True for temporary windows created via scif_vreadfrom/scif_vwriteto
- * @mm: memory descriptor for the task_struct which initiated the RMA
- * @st: scatter gather table for DMA mappings with IOMMU enabled
- * @pinned_pages: The set of pinned_pages backing this window
- * @alloc_handle: Handle for sending ALLOC_REQ
- * @regwq: Wait Queue for an registration (N)ACK
- * @reg_state: Registration state
- * @unregwq: Wait Queue for an unregistration (N)ACK
- * @dma_addr_lookup: Lookup for physical addresses used for DMA
- * @nr_lookup: Number of entries in lookup
- * @mapped_offset: Offset used to map the window by the peer
- * @dma_addr: Array of physical addresses used for Mgmt node & MIC initiated DMA
- * @num_pages: Array specifying number of pages for each physical address
- */
-struct scif_window {
-	s64 nr_pages;
-	int nr_contig_chunks;
-	int prot;
-	int ref_count;
-	u64 magic;
-	s64 offset;
-	unsigned long va_for_temp;
-	int dma_mark;
-	u64 ep;
-	struct list_head list;
-	enum scif_window_type type;
-	u64 peer_window;
-	enum scif_msg_state unreg_state;
-	bool offset_freed;
-	bool temp;
-	struct mm_struct *mm;
-	struct sg_table *st;
-	union {
-		struct {
-			struct scif_pinned_pages *pinned_pages;
-			struct scif_allocmsg alloc_handle;
-			wait_queue_head_t regwq;
-			enum scif_msg_state reg_state;
-			wait_queue_head_t unregwq;
-		};
-		struct {
-			struct scif_rma_lookup dma_addr_lookup;
-			struct scif_rma_lookup num_pages_lookup;
-			int nr_lookup;
-			dma_addr_t mapped_offset;
-		};
-	};
-	dma_addr_t *dma_addr;
-	u64 *num_pages;
-} __packed;
-
-/*
- * scif_mmu_notif - SCIF mmu notifier information
- *
- * @mmu_notifier ep_mmu_notifier: MMU notifier operations
- * @tc_reg_list: List of temp registration windows for self
- * @mm: memory descriptor for the task_struct which initiated the RMA
- * @ep: SCIF endpoint
- * @list: link to list of MMU notifier information
- */
-struct scif_mmu_notif {
-#ifdef CONFIG_MMU_NOTIFIER
-	struct mmu_notifier ep_mmu_notifier;
-#endif
-	struct list_head tc_reg_list;
-	struct mm_struct *mm;
-	struct scif_endpt *ep;
-	struct list_head list;
-};
-
-enum scif_rma_dir {
-	SCIF_LOCAL_TO_REMOTE,
-	SCIF_REMOTE_TO_LOCAL
-};
-
-extern struct kmem_cache *unaligned_cache;
-/* Initialize RMA for this EP */
-void scif_rma_ep_init(struct scif_endpt *ep);
-/* Check if epd can be uninitialized */
-int scif_rma_ep_can_uninit(struct scif_endpt *ep);
-/* Obtain a new offset. Callee must grab RMA lock */
-int scif_get_window_offset(struct scif_endpt *ep, int flags,
-			   s64 offset, int nr_pages, s64 *out_offset);
-/* Free offset. Callee must grab RMA lock */
-void scif_free_window_offset(struct scif_endpt *ep,
-			     struct scif_window *window, s64 offset);
-/* Create self registration window */
-struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages,
-				       s64 offset, bool temp);
-/* Destroy self registration window.*/
-int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window);
-void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window);
-/* Map pages of self window to Aperture/PCI */
-int scif_map_window(struct scif_dev *remote_dev,
-		    struct scif_window *window);
-/* Unregister a self window */
-int scif_unregister_window(struct scif_window *window);
-/* Destroy remote registration window */
-void
-scif_destroy_remote_window(struct scif_window *window);
-/* remove valid remote memory mappings from process address space */
-void scif_zap_mmaps(int node);
-/* Query if any applications have remote memory mappings */
-bool scif_rma_do_apps_have_mmaps(int node);
-/* Cleanup remote registration lists for zombie endpoints */
-void scif_cleanup_rma_for_zombies(int node);
-/* Reserve a DMA channel for a particular endpoint */
-int scif_reserve_dma_chan(struct scif_endpt *ep);
-/* Setup a DMA mark for an endpoint */
-int _scif_fence_mark(scif_epd_t epd, int *mark);
-int scif_prog_signal(scif_epd_t epd, off_t offset, u64 val,
-		     enum scif_window_type type);
-void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg);
-void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg);
-void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg);
-void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg);
-void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg);
-void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg);
-void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg);
-void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg);
-void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg);
-void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg);
-void scif_recv_mark(struct scif_dev *scifdev, struct scifmsg *msg);
-void scif_recv_mark_resp(struct scif_dev *scifdev, struct scifmsg *msg);
-void scif_recv_wait(struct scif_dev *scifdev, struct scifmsg *msg);
-void scif_recv_wait_resp(struct scif_dev *scifdev, struct scifmsg *msg);
-void scif_recv_sig_local(struct scif_dev *scifdev, struct scifmsg *msg);
-void scif_recv_sig_remote(struct scif_dev *scifdev, struct scifmsg *msg);
-void scif_recv_sig_resp(struct scif_dev *scifdev, struct scifmsg *msg);
-void scif_mmu_notif_handler(struct work_struct *work);
-void scif_rma_handle_remote_fences(void);
-void scif_rma_destroy_windows(void);
-void scif_rma_destroy_tcw_invalid(void);
-int scif_drain_dma_intr(struct scif_hw_dev *sdev, struct dma_chan *chan);
-
-struct scif_window_iter {
-	s64 offset;
-	int index;
-};
-
-static inline void
-scif_init_window_iter(struct scif_window *window, struct scif_window_iter *iter)
-{
-	iter->offset = window->offset;
-	iter->index = 0;
-}
-
-dma_addr_t scif_off_to_dma_addr(struct scif_window *window, s64 off,
-				size_t *nr_bytes,
-				struct scif_window_iter *iter);
-static inline
-dma_addr_t __scif_off_to_dma_addr(struct scif_window *window, s64 off)
-{
-	return scif_off_to_dma_addr(window, off, NULL, NULL);
-}
-
-static inline bool scif_unaligned(off_t src_offset, off_t dst_offset)
-{
-	src_offset = src_offset & (L1_CACHE_BYTES - 1);
-	dst_offset = dst_offset & (L1_CACHE_BYTES - 1);
-	return !(src_offset == dst_offset);
-}
-
-/*
- * scif_zalloc:
- * @size: Size of the allocation request.
- *
- * Helper API which attempts to allocate zeroed pages via
- * __get_free_pages(..) first and then falls back on
- * vzalloc(..) if that fails.
- */
-static inline void *scif_zalloc(size_t size)
-{
-	void *ret = NULL;
-	size_t align = ALIGN(size, PAGE_SIZE);
-
-	if (align && get_order(align) < MAX_ORDER)
-		ret = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
-					       get_order(align));
-	return ret ? ret : vzalloc(align);
-}
-
-/*
- * scif_free:
- * @addr: Address to be freed.
- * @size: Size of the allocation.
- * Helper API which frees memory allocated via scif_zalloc().
- */
-static inline void scif_free(void *addr, size_t size)
-{
-	size_t align = ALIGN(size, PAGE_SIZE);
-
-	if (is_vmalloc_addr(addr))
-		vfree(addr);
-	else
-		free_pages((unsigned long)addr, get_order(align));
-}
-
-static inline void scif_get_window(struct scif_window *window, int nr_pages)
-{
-	window->ref_count += nr_pages;
-}
-
-static inline void scif_put_window(struct scif_window *window, int nr_pages)
-{
-	window->ref_count -= nr_pages;
-}
-
-static inline void scif_set_window_ref(struct scif_window *window, int nr_pages)
-{
-	window->ref_count = nr_pages;
-}
-
-static inline void
-scif_queue_for_cleanup(struct scif_window *window, struct list_head *list)
-{
-	spin_lock(&scif_info.rmalock);
-	list_add_tail(&window->list, list);
-	spin_unlock(&scif_info.rmalock);
-	schedule_work(&scif_info.misc_work);
-}
-
-static inline void __scif_rma_destroy_tcw_helper(struct scif_window *window)
-{
-	list_del_init(&window->list);
-	scif_queue_for_cleanup(window, &scif_info.rma_tc);
-}
-
-static inline bool scif_is_iommu_enabled(void)
-{
-#ifdef CONFIG_INTEL_IOMMU
-	return intel_iommu_enabled;
-#else
-	return false;
-#endif
-}
-#endif /* SCIF_RMA_H */
diff --git a/drivers/misc/mic/scif/scif_rma_list.c b/drivers/misc/mic/scif/scif_rma_list.c
deleted file mode 100644
index ef923ba134c8..000000000000
--- a/drivers/misc/mic/scif/scif_rma_list.c
+++ /dev/null
@@ -1,282 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Intel SCIF driver.
- */
-#include "scif_main.h"
-#include <linux/mmu_notifier.h>
-#include <linux/highmem.h>
-
-/*
- * scif_insert_tcw:
- *
- * Insert a temp window to the temp registration list sorted by va_for_temp.
- * RMA lock must be held.
- */
-void scif_insert_tcw(struct scif_window *window, struct list_head *head)
-{
-	struct scif_window *curr = NULL;
-	struct scif_window *prev = list_entry(head, struct scif_window, list);
-	struct list_head *item;
-
-	INIT_LIST_HEAD(&window->list);
-	/* Compare with tail and if the entry is new tail add it to the end */
-	if (!list_empty(head)) {
-		curr = list_entry(head->prev, struct scif_window, list);
-		if (curr->va_for_temp < window->va_for_temp) {
-			list_add_tail(&window->list, head);
-			return;
-		}
-	}
-	list_for_each(item, head) {
-		curr = list_entry(item, struct scif_window, list);
-		if (curr->va_for_temp > window->va_for_temp)
-			break;
-		prev = curr;
-	}
-	list_add(&window->list, &prev->list);
-}
-
-/*
- * scif_insert_window:
- *
- * Insert a window to the self registration list sorted by offset.
- * RMA lock must be held.
- */
-void scif_insert_window(struct scif_window *window, struct list_head *head)
-{
-	struct scif_window *curr = NULL, *prev = NULL;
-	struct list_head *item;
-
-	INIT_LIST_HEAD(&window->list);
-	list_for_each(item, head) {
-		curr = list_entry(item, struct scif_window, list);
-		if (curr->offset > window->offset)
-			break;
-		prev = curr;
-	}
-	if (!prev)
-		list_add(&window->list, head);
-	else
-		list_add(&window->list, &prev->list);
-	scif_set_window_ref(window, window->nr_pages);
-}
-
-/*
- * scif_query_tcw:
- *
- * Query the temp cached registration list of ep for an overlapping window
- * in case of permission mismatch, destroy the previous window. if permissions
- * match and overlap is partial, destroy the window but return the new range
- * RMA lock must be held.
- */
-int scif_query_tcw(struct scif_endpt *ep, struct scif_rma_req *req)
-{
-	struct list_head *item, *temp, *head = req->head;
-	struct scif_window *window;
-	u64 start_va_window, start_va_req = req->va_for_temp;
-	u64 end_va_window, end_va_req = start_va_req + req->nr_bytes;
-
-	if (!req->nr_bytes)
-		return -EINVAL;
-	/*
-	 * Avoid traversing the entire list to find out that there
-	 * is no entry that matches
-	 */
-	if (!list_empty(head)) {
-		window = list_last_entry(head, struct scif_window, list);
-		end_va_window = window->va_for_temp +
-			(window->nr_pages << PAGE_SHIFT);
-		if (start_va_req > end_va_window)
-			return -ENXIO;
-	}
-	list_for_each_safe(item, temp, head) {
-		window = list_entry(item, struct scif_window, list);
-		start_va_window = window->va_for_temp;
-		end_va_window = window->va_for_temp +
-			(window->nr_pages << PAGE_SHIFT);
-		if (start_va_req < start_va_window &&
-		    end_va_req < start_va_window)
-			break;
-		if (start_va_req >= end_va_window)
-			continue;
-		if ((window->prot & req->prot) == req->prot) {
-			if (start_va_req >= start_va_window &&
-			    end_va_req <= end_va_window) {
-				*req->out_window = window;
-				return 0;
-			}
-			/* expand window */
-			if (start_va_req < start_va_window) {
-				req->nr_bytes +=
-					start_va_window - start_va_req;
-				req->va_for_temp = start_va_window;
-			}
-			if (end_va_req >= end_va_window)
-				req->nr_bytes += end_va_window - end_va_req;
-		}
-		/* Destroy the old window to create a new one */
-		__scif_rma_destroy_tcw_helper(window);
-		break;
-	}
-	return -ENXIO;
-}
-
-/*
- * scif_query_window:
- *
- * Query the registration list and check if a valid contiguous
- * range of windows exist.
- * RMA lock must be held.
- */
-int scif_query_window(struct scif_rma_req *req)
-{
-	struct list_head *item;
-	struct scif_window *window;
-	s64 end_offset, offset = req->offset;
-	u64 tmp_min, nr_bytes_left = req->nr_bytes;
-
-	if (!req->nr_bytes)
-		return -EINVAL;
-
-	list_for_each(item, req->head) {
-		window = list_entry(item, struct scif_window, list);
-		end_offset = window->offset +
-			(window->nr_pages << PAGE_SHIFT);
-		if (offset < window->offset)
-			/* Offset not found! */
-			return -ENXIO;
-		if (offset >= end_offset)
-			continue;
-		/* Check read/write protections. */
-		if ((window->prot & req->prot) != req->prot)
-			return -EPERM;
-		if (nr_bytes_left == req->nr_bytes)
-			/* Store the first window */
-			*req->out_window = window;
-		tmp_min = min((u64)end_offset - offset, nr_bytes_left);
-		nr_bytes_left -= tmp_min;
-		offset += tmp_min;
-		/*
-		 * Range requested encompasses
-		 * multiple windows contiguously.
-		 */
-		if (!nr_bytes_left) {
-			/* Done for partial window */
-			if (req->type == SCIF_WINDOW_PARTIAL ||
-			    req->type == SCIF_WINDOW_SINGLE)
-				return 0;
-			/* Extra logic for full windows */
-			if (offset == end_offset)
-				/* Spanning multiple whole windows */
-				return 0;
-				/* Not spanning multiple whole windows */
-			return -ENXIO;
-		}
-		if (req->type == SCIF_WINDOW_SINGLE)
-			break;
-	}
-	dev_err(scif_info.mdev.this_device,
-		"%s %d ENXIO\n", __func__, __LINE__);
-	return -ENXIO;
-}
-
-/*
- * scif_rma_list_unregister:
- *
- * Traverse the self registration list starting from window:
- * 1) Call scif_unregister_window(..)
- * RMA lock must be held.
- */
-int scif_rma_list_unregister(struct scif_window *window,
-			     s64 offset, int nr_pages)
-{
-	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
-	struct list_head *head = &ep->rma_info.reg_list;
-	s64 end_offset;
-	int err = 0;
-	int loop_nr_pages;
-	struct scif_window *_window;
-
-	list_for_each_entry_safe_from(window, _window, head, list) {
-		end_offset = window->offset + (window->nr_pages << PAGE_SHIFT);
-		loop_nr_pages = min((int)((end_offset - offset) >> PAGE_SHIFT),
-				    nr_pages);
-		err = scif_unregister_window(window);
-		if (err)
-			return err;
-		nr_pages -= loop_nr_pages;
-		offset += (loop_nr_pages << PAGE_SHIFT);
-		if (!nr_pages)
-			break;
-	}
-	return 0;
-}
-
-/*
- * scif_unmap_all_window:
- *
- * Traverse all the windows in the self registration list and:
- * 1) Delete any DMA mappings created
- */
-void scif_unmap_all_windows(scif_epd_t epd)
-{
-	struct list_head *item, *tmp;
-	struct scif_window *window;
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-	struct list_head *head = &ep->rma_info.reg_list;
-
-	mutex_lock(&ep->rma_info.rma_lock);
-	list_for_each_safe(item, tmp, head) {
-		window = list_entry(item, struct scif_window, list);
-		scif_unmap_window(ep->remote_dev, window);
-	}
-	mutex_unlock(&ep->rma_info.rma_lock);
-}
-
-/*
- * scif_unregister_all_window:
- *
- * Traverse all the windows in the self registration list and:
- * 1) Call scif_unregister_window(..)
- * RMA lock must be held.
- */
-int scif_unregister_all_windows(scif_epd_t epd)
-{
-	struct list_head *item, *tmp;
-	struct scif_window *window;
-	struct scif_endpt *ep = (struct scif_endpt *)epd;
-	struct list_head *head = &ep->rma_info.reg_list;
-	int err = 0;
-
-	mutex_lock(&ep->rma_info.rma_lock);
-retry:
-	item = NULL;
-	tmp = NULL;
-	list_for_each_safe(item, tmp, head) {
-		window = list_entry(item, struct scif_window, list);
-		ep->rma_info.async_list_del = 0;
-		err = scif_unregister_window(window);
-		if (err)
-			dev_err(scif_info.mdev.this_device,
-				"%s %d err %d\n",
-				__func__, __LINE__, err);
-		/*
-		 * Need to restart list traversal if there has been
-		 * an asynchronous list entry deletion.
-		 */
-		if (READ_ONCE(ep->rma_info.async_list_del))
-			goto retry;
-	}
-	mutex_unlock(&ep->rma_info.rma_lock);
-	if (!list_empty(&ep->rma_info.mmn_list)) {
-		spin_lock(&scif_info.rmalock);
-		list_add_tail(&ep->mmu_list, &scif_info.mmu_notif_cleanup);
-		spin_unlock(&scif_info.rmalock);
-		schedule_work(&scif_info.mmu_notif_work);
-	}
-	return err;
-}
diff --git a/drivers/misc/mic/scif/scif_rma_list.h b/drivers/misc/mic/scif/scif_rma_list.h
deleted file mode 100644
index 0f8e0ed65614..000000000000
--- a/drivers/misc/mic/scif/scif_rma_list.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Intel SCIF driver.
- */
-#ifndef SCIF_RMA_LIST_H
-#define SCIF_RMA_LIST_H
-
-/*
- * struct scif_rma_req - Self Registration list RMA Request query
- *
- * @out_window - Returns the window if found
- * @offset: Starting offset
- * @nr_bytes: number of bytes
- * @prot: protection requested i.e. read or write or both
- * @type: Specify single, partial or multiple windows
- * @head: Head of list on which to search
- * @va_for_temp: VA for searching temporary cached windows
- */
-struct scif_rma_req {
-	struct scif_window **out_window;
-	union {
-		s64 offset;
-		unsigned long va_for_temp;
-	};
-	size_t nr_bytes;
-	int prot;
-	enum scif_window_type type;
-	struct list_head *head;
-};
-
-/* Insert */
-void scif_insert_window(struct scif_window *window, struct list_head *head);
-void scif_insert_tcw(struct scif_window *window,
-		     struct list_head *head);
-/* Query */
-int scif_query_window(struct scif_rma_req *request);
-int scif_query_tcw(struct scif_endpt *ep, struct scif_rma_req *request);
-/* Called from close to unregister all self windows */
-int scif_unregister_all_windows(scif_epd_t epd);
-void scif_unmap_all_windows(scif_epd_t epd);
-/* Traverse list and unregister */
-int scif_rma_list_unregister(struct scif_window *window, s64 offset,
-			     int nr_pages);
-#endif /* SCIF_RMA_LIST_H */
diff --git a/drivers/misc/mic/vop/Makefile b/drivers/misc/mic/vop/Makefile
deleted file mode 100644
index 51b9b0022786..000000000000
--- a/drivers/misc/mic/vop/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Makefile - Intel MIC Linux driver.
-# Copyright(c) 2016, Intel Corporation.
-#
-obj-$(CONFIG_VOP) := vop.o
-
-vop-objs += vop_main.o
-vop-objs += vop_debugfs.o
-vop-objs += vop_vringh.o
diff --git a/drivers/misc/mic/vop/vop_debugfs.c b/drivers/misc/mic/vop/vop_debugfs.c
deleted file mode 100644
index 9d4f175f4dd1..000000000000
--- a/drivers/misc/mic/vop/vop_debugfs.c
+++ /dev/null
@@ -1,184 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Intel Virtio Over PCIe (VOP) driver.
- */
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-#include "vop_main.h"
-
-static int vop_dp_show(struct seq_file *s, void *pos)
-{
-	struct mic_device_desc *d;
-	struct mic_device_ctrl *dc;
-	struct mic_vqconfig *vqconfig;
-	__u32 *features;
-	__u8 *config;
-	struct vop_info *vi = s->private;
-	struct vop_device *vpdev = vi->vpdev;
-	struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
-	int j, k;
-
-	seq_printf(s, "Bootparam: magic 0x%x\n",
-		   bootparam->magic);
-	seq_printf(s, "Bootparam: h2c_config_db %d\n",
-		   bootparam->h2c_config_db);
-	seq_printf(s, "Bootparam: node_id %d\n",
-		   bootparam->node_id);
-	seq_printf(s, "Bootparam: c2h_scif_db %d\n",
-		   bootparam->c2h_scif_db);
-	seq_printf(s, "Bootparam: h2c_scif_db %d\n",
-		   bootparam->h2c_scif_db);
-	seq_printf(s, "Bootparam: scif_host_dma_addr 0x%llx\n",
-		   bootparam->scif_host_dma_addr);
-	seq_printf(s, "Bootparam: scif_card_dma_addr 0x%llx\n",
-		   bootparam->scif_card_dma_addr);
-
-	for (j = sizeof(*bootparam);
-		j < MIC_DP_SIZE; j += mic_total_desc_size(d)) {
-		d = (void *)bootparam + j;
-		dc = (void *)d + mic_aligned_desc_size(d);
-
-		/* end of list */
-		if (d->type == 0)
-			break;
-
-		if (d->type == -1)
-			continue;
-
-		seq_printf(s, "Type %d ", d->type);
-		seq_printf(s, "Num VQ %d ", d->num_vq);
-		seq_printf(s, "Feature Len %d\n", d->feature_len);
-		seq_printf(s, "Config Len %d ", d->config_len);
-		seq_printf(s, "Shutdown Status %d\n", d->status);
-
-		for (k = 0; k < d->num_vq; k++) {
-			vqconfig = mic_vq_config(d) + k;
-			seq_printf(s, "vqconfig[%d]: ", k);
-			seq_printf(s, "address 0x%llx ",
-				   vqconfig->address);
-			seq_printf(s, "num %d ", vqconfig->num);
-			seq_printf(s, "used address 0x%llx\n",
-				   vqconfig->used_address);
-		}
-
-		features = (__u32 *)mic_vq_features(d);
-		seq_printf(s, "Features: Host 0x%x ", features[0]);
-		seq_printf(s, "Guest 0x%x\n", features[1]);
-
-		config = mic_vq_configspace(d);
-		for (k = 0; k < d->config_len; k++)
-			seq_printf(s, "config[%d]=%d\n", k, config[k]);
-
-		seq_puts(s, "Device control:\n");
-		seq_printf(s, "Config Change %d ", dc->config_change);
-		seq_printf(s, "Vdev reset %d\n", dc->vdev_reset);
-		seq_printf(s, "Guest Ack %d ", dc->guest_ack);
-		seq_printf(s, "Host ack %d\n", dc->host_ack);
-		seq_printf(s, "Used address updated %d ",
-			   dc->used_address_updated);
-		seq_printf(s, "Vdev 0x%llx\n", dc->vdev);
-		seq_printf(s, "c2h doorbell %d ", dc->c2h_vdev_db);
-		seq_printf(s, "h2c doorbell %d\n", dc->h2c_vdev_db);
-	}
-	schedule_work(&vi->hotplug_work);
-	return 0;
-}
-
-DEFINE_SHOW_ATTRIBUTE(vop_dp);
-
-static int vop_vdev_info_show(struct seq_file *s, void *unused)
-{
-	struct vop_info *vi = s->private;
-	struct list_head *pos, *tmp;
-	struct vop_vdev *vdev;
-	int i, j;
-
-	mutex_lock(&vi->vop_mutex);
-	list_for_each_safe(pos, tmp, &vi->vdev_list) {
-		vdev = list_entry(pos, struct vop_vdev, list);
-		seq_printf(s, "VDEV type %d state %s in %ld out %ld in_dma %ld out_dma %ld\n",
-			   vdev->virtio_id,
-			   vop_vdevup(vdev) ? "UP" : "DOWN",
-			   vdev->in_bytes,
-			   vdev->out_bytes,
-			   vdev->in_bytes_dma,
-			   vdev->out_bytes_dma);
-		for (i = 0; i < MIC_MAX_VRINGS; i++) {
-			struct vring_desc *desc;
-			struct vring_avail *avail;
-			struct vring_used *used;
-			struct vop_vringh *vvr = &vdev->vvr[i];
-			struct vringh *vrh = &vvr->vrh;
-			int num = vrh->vring.num;
-
-			if (!num)
-				continue;
-			desc = vrh->vring.desc;
-			seq_printf(s, "vring i %d avail_idx %d",
-				   i, vvr->vring.info->avail_idx & (num - 1));
-			seq_printf(s, " vring i %d avail_idx %d\n",
-				   i, vvr->vring.info->avail_idx);
-			seq_printf(s, "vrh i %d weak_barriers %d",
-				   i, vrh->weak_barriers);
-			seq_printf(s, " last_avail_idx %d last_used_idx %d",
-				   vrh->last_avail_idx, vrh->last_used_idx);
-			seq_printf(s, " completed %d\n", vrh->completed);
-			for (j = 0; j < num; j++) {
-				seq_printf(s, "desc[%d] addr 0x%llx len %d",
-					   j, desc->addr, desc->len);
-				seq_printf(s, " flags 0x%x next %d\n",
-					   desc->flags, desc->next);
-				desc++;
-			}
-			avail = vrh->vring.avail;
-			seq_printf(s, "avail flags 0x%x idx %d\n",
-				   vringh16_to_cpu(vrh, avail->flags),
-				   vringh16_to_cpu(vrh,
-						   avail->idx) & (num - 1));
-			seq_printf(s, "avail flags 0x%x idx %d\n",
-				   vringh16_to_cpu(vrh, avail->flags),
-				   vringh16_to_cpu(vrh, avail->idx));
-			for (j = 0; j < num; j++)
-				seq_printf(s, "avail ring[%d] %d\n",
-					   j, avail->ring[j]);
-			used = vrh->vring.used;
-			seq_printf(s, "used flags 0x%x idx %d\n",
-				   vringh16_to_cpu(vrh, used->flags),
-				   vringh16_to_cpu(vrh, used->idx) & (num - 1));
-			seq_printf(s, "used flags 0x%x idx %d\n",
-				   vringh16_to_cpu(vrh, used->flags),
-				   vringh16_to_cpu(vrh, used->idx));
-			for (j = 0; j < num; j++)
-				seq_printf(s, "used ring[%d] id %d len %d\n",
-					   j, vringh32_to_cpu(vrh,
-							      used->ring[j].id),
-					   vringh32_to_cpu(vrh,
-							   used->ring[j].len));
-		}
-	}
-	mutex_unlock(&vi->vop_mutex);
-
-	return 0;
-}
-
-DEFINE_SHOW_ATTRIBUTE(vop_vdev_info);
-
-void vop_init_debugfs(struct vop_info *vi)
-{
-	char name[16];
-
-	snprintf(name, sizeof(name), "%s%d", KBUILD_MODNAME, vi->vpdev->dnode);
-	vi->dbg = debugfs_create_dir(name, NULL);
-	debugfs_create_file("dp", 0444, vi->dbg, vi, &vop_dp_fops);
-	debugfs_create_file("vdev_info", 0444, vi->dbg, vi, &vop_vdev_info_fops);
-}
-
-void vop_exit_debugfs(struct vop_info *vi)
-{
-	debugfs_remove_recursive(vi->dbg);
-}
diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c
deleted file mode 100644
index 714b94f42d38..000000000000
--- a/drivers/misc/mic/vop/vop_main.c
+++ /dev/null
@@ -1,784 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Adapted from:
- *
- * virtio for kvm on s390
- *
- * Copyright IBM Corp. 2008
- *
- *    Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
- *
- * Intel Virtio Over PCIe (VOP) driver.
- */
-#include <linux/delay.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/dma-mapping.h>
-#include <linux/io-64-nonatomic-lo-hi.h>
-
-#include "vop_main.h"
-
-#define VOP_MAX_VRINGS 4
-
-/*
- * _vop_vdev - Allocated per virtio device instance injected by the peer.
- *
- * @vdev: Virtio device
- * @desc: Virtio device page descriptor
- * @dc: Virtio device control
- * @vpdev: VOP device which is the parent for this virtio device
- * @vr: Buffer for accessing the VRING
- * @used_virt: Virtual address of used ring
- * @used: DMA address of used ring
- * @used_size: Size of the used buffer
- * @reset_done: Track whether VOP reset is complete
- * @virtio_cookie: Cookie returned upon requesting a interrupt
- * @c2h_vdev_db: The doorbell used by the guest to interrupt the host
- * @h2c_vdev_db: The doorbell used by the host to interrupt the guest
- * @dnode: The destination node
- */
-struct _vop_vdev {
-	struct virtio_device vdev;
-	struct mic_device_desc __iomem *desc;
-	struct mic_device_ctrl __iomem *dc;
-	struct vop_device *vpdev;
-	void __iomem *vr[VOP_MAX_VRINGS];
-	void *used_virt[VOP_MAX_VRINGS];
-	dma_addr_t used[VOP_MAX_VRINGS];
-	int used_size[VOP_MAX_VRINGS];
-	struct completion reset_done;
-	struct mic_irq *virtio_cookie;
-	int c2h_vdev_db;
-	int h2c_vdev_db;
-	int dnode;
-};
-
-#define to_vopvdev(vd) container_of(vd, struct _vop_vdev, vdev)
-
-#define _vop_aligned_desc_size(d) __mic_align(_vop_desc_size(d), 8)
-
-/* Helper API to obtain the parent of the virtio device */
-static inline struct device *_vop_dev(struct _vop_vdev *vdev)
-{
-	return vdev->vdev.dev.parent;
-}
-
-static inline unsigned _vop_desc_size(struct mic_device_desc __iomem *desc)
-{
-	return sizeof(*desc)
-		+ ioread8(&desc->num_vq) * sizeof(struct mic_vqconfig)
-		+ ioread8(&desc->feature_len) * 2
-		+ ioread8(&desc->config_len);
-}
-
-static inline struct mic_vqconfig __iomem *
-_vop_vq_config(struct mic_device_desc __iomem *desc)
-{
-	return (struct mic_vqconfig __iomem *)(desc + 1);
-}
-
-static inline u8 __iomem *
-_vop_vq_features(struct mic_device_desc __iomem *desc)
-{
-	return (u8 __iomem *)(_vop_vq_config(desc) + ioread8(&desc->num_vq));
-}
-
-static inline u8 __iomem *
-_vop_vq_configspace(struct mic_device_desc __iomem *desc)
-{
-	return _vop_vq_features(desc) + ioread8(&desc->feature_len) * 2;
-}
-
-static inline unsigned
-_vop_total_desc_size(struct mic_device_desc __iomem *desc)
-{
-	return _vop_aligned_desc_size(desc) + sizeof(struct mic_device_ctrl);
-}
-
-/* This gets the device's feature bits. */
-static u64 vop_get_features(struct virtio_device *vdev)
-{
-	unsigned int i, bits;
-	u64 features = 0;
-	struct mic_device_desc __iomem *desc = to_vopvdev(vdev)->desc;
-	u8 __iomem *in_features = _vop_vq_features(desc);
-	int feature_len = ioread8(&desc->feature_len);
-
-	bits = min_t(unsigned, feature_len, sizeof(vdev->features)) * 8;
-	for (i = 0; i < bits; i++)
-		if (ioread8(&in_features[i / 8]) & (BIT(i % 8)))
-			features |= BIT_ULL(i);
-
-	return features;
-}
-
-static void vop_transport_features(struct virtio_device *vdev)
-{
-	/*
-	 * Packed ring isn't enabled on virtio_vop for now,
-	 * because virtio_vop uses vring_new_virtqueue() which
-	 * creates virtio rings on preallocated memory.
-	 */
-	__virtio_clear_bit(vdev, VIRTIO_F_RING_PACKED);
-	__virtio_set_bit(vdev, VIRTIO_F_ACCESS_PLATFORM);
-}
-
-static int vop_finalize_features(struct virtio_device *vdev)
-{
-	unsigned int i, bits;
-	struct mic_device_desc __iomem *desc = to_vopvdev(vdev)->desc;
-	u8 feature_len = ioread8(&desc->feature_len);
-	/* Second half of bitmap is features we accept. */
-	u8 __iomem *out_features =
-		_vop_vq_features(desc) + feature_len;
-
-	/* Give virtio_ring a chance to accept features. */
-	vring_transport_features(vdev);
-
-	/* Give virtio_vop a chance to accept features. */
-	vop_transport_features(vdev);
-
-	memset_io(out_features, 0, feature_len);
-	bits = min_t(unsigned, feature_len,
-		     sizeof(vdev->features)) * 8;
-	for (i = 0; i < bits; i++) {
-		if (__virtio_test_bit(vdev, i))
-			iowrite8(ioread8(&out_features[i / 8]) | (1 << (i % 8)),
-				 &out_features[i / 8]);
-	}
-	return 0;
-}
-
-/*
- * Reading and writing elements in config space
- */
-static void vop_get(struct virtio_device *vdev, unsigned int offset,
-		    void *buf, unsigned len)
-{
-	struct mic_device_desc __iomem *desc = to_vopvdev(vdev)->desc;
-
-	if (offset + len > ioread8(&desc->config_len))
-		return;
-	memcpy_fromio(buf, _vop_vq_configspace(desc) + offset, len);
-}
-
-static void vop_set(struct virtio_device *vdev, unsigned int offset,
-		    const void *buf, unsigned len)
-{
-	struct mic_device_desc __iomem *desc = to_vopvdev(vdev)->desc;
-
-	if (offset + len > ioread8(&desc->config_len))
-		return;
-	memcpy_toio(_vop_vq_configspace(desc) + offset, buf, len);
-}
-
-/*
- * The operations to get and set the status word just access the status
- * field of the device descriptor. set_status also interrupts the host
- * to tell about status changes.
- */
-static u8 vop_get_status(struct virtio_device *vdev)
-{
-	return ioread8(&to_vopvdev(vdev)->desc->status);
-}
-
-static void vop_set_status(struct virtio_device *dev, u8 status)
-{
-	struct _vop_vdev *vdev = to_vopvdev(dev);
-	struct vop_device *vpdev = vdev->vpdev;
-
-	if (!status)
-		return;
-	iowrite8(status, &vdev->desc->status);
-	vpdev->hw_ops->send_intr(vpdev, vdev->c2h_vdev_db);
-}
-
-/* Inform host on a virtio device reset and wait for ack from host */
-static void vop_reset_inform_host(struct virtio_device *dev)
-{
-	struct _vop_vdev *vdev = to_vopvdev(dev);
-	struct mic_device_ctrl __iomem *dc = vdev->dc;
-	struct vop_device *vpdev = vdev->vpdev;
-	int retry;
-
-	iowrite8(0, &dc->host_ack);
-	iowrite8(1, &dc->vdev_reset);
-	vpdev->hw_ops->send_intr(vpdev, vdev->c2h_vdev_db);
-
-	/* Wait till host completes all card accesses and acks the reset */
-	for (retry = 100; retry--;) {
-		if (ioread8(&dc->host_ack))
-			break;
-		msleep(100);
-	}
-
-	dev_dbg(_vop_dev(vdev), "%s: retry: %d\n", __func__, retry);
-
-	/* Reset status to 0 in case we timed out */
-	iowrite8(0, &vdev->desc->status);
-}
-
-static void vop_reset(struct virtio_device *dev)
-{
-	struct _vop_vdev *vdev = to_vopvdev(dev);
-
-	dev_dbg(_vop_dev(vdev), "%s: virtio id %d\n",
-		__func__, dev->id.device);
-
-	vop_reset_inform_host(dev);
-	complete_all(&vdev->reset_done);
-}
-
-/*
- * The virtio_ring code calls this API when it wants to notify the Host.
- */
-static bool vop_notify(struct virtqueue *vq)
-{
-	struct _vop_vdev *vdev = vq->priv;
-	struct vop_device *vpdev = vdev->vpdev;
-
-	vpdev->hw_ops->send_intr(vpdev, vdev->c2h_vdev_db);
-	return true;
-}
-
-static void vop_del_vq(struct virtqueue *vq, int n)
-{
-	struct _vop_vdev *vdev = to_vopvdev(vq->vdev);
-	struct vop_device *vpdev = vdev->vpdev;
-
-	dma_unmap_single(&vpdev->dev, vdev->used[n],
-			 vdev->used_size[n], DMA_BIDIRECTIONAL);
-	free_pages((unsigned long)vdev->used_virt[n],
-		   get_order(vdev->used_size[n]));
-	vring_del_virtqueue(vq);
-	vpdev->hw_ops->unmap(vpdev, vdev->vr[n]);
-	vdev->vr[n] = NULL;
-}
-
-static void vop_del_vqs(struct virtio_device *dev)
-{
-	struct _vop_vdev *vdev = to_vopvdev(dev);
-	struct virtqueue *vq, *n;
-	int idx = 0;
-
-	dev_dbg(_vop_dev(vdev), "%s\n", __func__);
-
-	list_for_each_entry_safe(vq, n, &dev->vqs, list)
-		vop_del_vq(vq, idx++);
-}
-
-static struct virtqueue *vop_new_virtqueue(unsigned int index,
-				      unsigned int num,
-				      struct virtio_device *vdev,
-				      bool context,
-				      void *pages,
-				      bool (*notify)(struct virtqueue *vq),
-				      void (*callback)(struct virtqueue *vq),
-				      const char *name,
-				      void *used)
-{
-	bool weak_barriers = false;
-	struct vring vring;
-
-	vring_init(&vring, num, pages, MIC_VIRTIO_RING_ALIGN);
-	vring.used = used;
-
-	return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
-				     notify, callback, name);
-}
-
-/*
- * This routine will assign vring's allocated in host/io memory. Code in
- * virtio_ring.c however continues to access this io memory as if it were local
- * memory without io accessors.
- */
-static struct virtqueue *vop_find_vq(struct virtio_device *dev,
-				     unsigned index,
-				     void (*callback)(struct virtqueue *vq),
-				     const char *name, bool ctx)
-{
-	struct _vop_vdev *vdev = to_vopvdev(dev);
-	struct vop_device *vpdev = vdev->vpdev;
-	struct mic_vqconfig __iomem *vqconfig;
-	struct mic_vqconfig config;
-	struct virtqueue *vq;
-	void __iomem *va;
-	struct _mic_vring_info __iomem *info;
-	void *used;
-	int vr_size, _vr_size, err, magic;
-	u8 type = ioread8(&vdev->desc->type);
-
-	if (index >= ioread8(&vdev->desc->num_vq))
-		return ERR_PTR(-ENOENT);
-
-	if (!name)
-		return ERR_PTR(-ENOENT);
-
-	/* First assign the vring's allocated in host memory */
-	vqconfig = _vop_vq_config(vdev->desc) + index;
-	memcpy_fromio(&config, vqconfig, sizeof(config));
-	_vr_size = round_up(vring_size(le16_to_cpu(config.num), MIC_VIRTIO_RING_ALIGN), 4);
-	vr_size = PAGE_ALIGN(_vr_size + sizeof(struct _mic_vring_info));
-	va = vpdev->hw_ops->remap(vpdev, le64_to_cpu(config.address), vr_size);
-	if (!va)
-		return ERR_PTR(-ENOMEM);
-	vdev->vr[index] = va;
-	memset_io(va, 0x0, _vr_size);
-
-	info = va + _vr_size;
-	magic = ioread32(&info->magic);
-
-	if (WARN(magic != MIC_MAGIC + type + index, "magic mismatch")) {
-		err = -EIO;
-		goto unmap;
-	}
-
-	vdev->used_size[index] = PAGE_ALIGN(sizeof(__u16) * 3 +
-					     sizeof(struct vring_used_elem) *
-					     le16_to_cpu(config.num));
-	used = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
-					get_order(vdev->used_size[index]));
-	vdev->used_virt[index] = used;
-	if (!used) {
-		err = -ENOMEM;
-		dev_err(_vop_dev(vdev), "%s %d err %d\n",
-			__func__, __LINE__, err);
-		goto unmap;
-	}
-
-	vq = vop_new_virtqueue(index, le16_to_cpu(config.num), dev, ctx,
-			       (void __force *)va, vop_notify, callback,
-			       name, used);
-	if (!vq) {
-		err = -ENOMEM;
-		goto free_used;
-	}
-
-	vdev->used[index] = dma_map_single(&vpdev->dev, used,
-					    vdev->used_size[index],
-					    DMA_BIDIRECTIONAL);
-	if (dma_mapping_error(&vpdev->dev, vdev->used[index])) {
-		err = -ENOMEM;
-		dev_err(_vop_dev(vdev), "%s %d err %d\n",
-			__func__, __LINE__, err);
-		goto del_vq;
-	}
-	writeq(vdev->used[index], &vqconfig->used_address);
-
-	vq->priv = vdev;
-	return vq;
-del_vq:
-	vring_del_virtqueue(vq);
-free_used:
-	free_pages((unsigned long)used,
-		   get_order(vdev->used_size[index]));
-unmap:
-	vpdev->hw_ops->unmap(vpdev, vdev->vr[index]);
-	return ERR_PTR(err);
-}
-
-static int vop_find_vqs(struct virtio_device *dev, unsigned nvqs,
-			struct virtqueue *vqs[],
-			vq_callback_t *callbacks[],
-			const char * const names[], const bool *ctx,
-			struct irq_affinity *desc)
-{
-	struct _vop_vdev *vdev = to_vopvdev(dev);
-	struct vop_device *vpdev = vdev->vpdev;
-	struct mic_device_ctrl __iomem *dc = vdev->dc;
-	int i, err, retry, queue_idx = 0;
-
-	/* We must have this many virtqueues. */
-	if (nvqs > ioread8(&vdev->desc->num_vq))
-		return -ENOENT;
-
-	for (i = 0; i < nvqs; ++i) {
-		if (!names[i]) {
-			vqs[i] = NULL;
-			continue;
-		}
-
-		dev_dbg(_vop_dev(vdev), "%s: %d: %s\n",
-			__func__, i, names[i]);
-		vqs[i] = vop_find_vq(dev, queue_idx++, callbacks[i], names[i],
-				     ctx ? ctx[i] : false);
-		if (IS_ERR(vqs[i])) {
-			err = PTR_ERR(vqs[i]);
-			goto error;
-		}
-	}
-
-	iowrite8(1, &dc->used_address_updated);
-	/*
-	 * Send an interrupt to the host to inform it that used
-	 * rings have been re-assigned.
-	 */
-	vpdev->hw_ops->send_intr(vpdev, vdev->c2h_vdev_db);
-	for (retry = 100; --retry;) {
-		if (!ioread8(&dc->used_address_updated))
-			break;
-		msleep(100);
-	}
-
-	dev_dbg(_vop_dev(vdev), "%s: retry: %d\n", __func__, retry);
-	if (!retry) {
-		err = -ENODEV;
-		goto error;
-	}
-
-	return 0;
-error:
-	vop_del_vqs(dev);
-	return err;
-}
-
-/*
- * The config ops structure as defined by virtio config
- */
-static const struct virtio_config_ops vop_vq_config_ops = {
-	.get_features = vop_get_features,
-	.finalize_features = vop_finalize_features,
-	.get = vop_get,
-	.set = vop_set,
-	.get_status = vop_get_status,
-	.set_status = vop_set_status,
-	.reset = vop_reset,
-	.find_vqs = vop_find_vqs,
-	.del_vqs = vop_del_vqs,
-};
-
-static irqreturn_t vop_virtio_intr_handler(int irq, void *data)
-{
-	struct _vop_vdev *vdev = data;
-	struct vop_device *vpdev = vdev->vpdev;
-	struct virtqueue *vq;
-
-	vpdev->hw_ops->ack_interrupt(vpdev, vdev->h2c_vdev_db);
-	list_for_each_entry(vq, &vdev->vdev.vqs, list)
-		vring_interrupt(0, vq);
-
-	return IRQ_HANDLED;
-}
-
-static void vop_virtio_release_dev(struct device *_d)
-{
-	struct virtio_device *vdev =
-			container_of(_d, struct virtio_device, dev);
-	struct _vop_vdev *vop_vdev =
-			container_of(vdev, struct _vop_vdev, vdev);
-
-	kfree(vop_vdev);
-}
-
-/*
- * adds a new device and register it with virtio
- * appropriate drivers are loaded by the device model
- */
-static int _vop_add_device(struct mic_device_desc __iomem *d,
-			   unsigned int offset, struct vop_device *vpdev,
-			   int dnode)
-{
-	struct _vop_vdev *vdev, *reg_dev = NULL;
-	int ret;
-	u8 type = ioread8(&d->type);
-
-	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
-	if (!vdev)
-		return -ENOMEM;
-
-	vdev->vpdev = vpdev;
-	vdev->vdev.dev.parent = &vpdev->dev;
-	vdev->vdev.dev.release = vop_virtio_release_dev;
-	vdev->vdev.id.device = type;
-	vdev->vdev.config = &vop_vq_config_ops;
-	vdev->desc = d;
-	vdev->dc = (void __iomem *)d + _vop_aligned_desc_size(d);
-	vdev->dnode = dnode;
-	vdev->vdev.priv = (void *)(unsigned long)dnode;
-	init_completion(&vdev->reset_done);
-
-	vdev->h2c_vdev_db = vpdev->hw_ops->next_db(vpdev);
-	vdev->virtio_cookie = vpdev->hw_ops->request_irq(vpdev,
-			vop_virtio_intr_handler, "virtio intr",
-			vdev, vdev->h2c_vdev_db);
-	if (IS_ERR(vdev->virtio_cookie)) {
-		ret = PTR_ERR(vdev->virtio_cookie);
-		goto kfree;
-	}
-	iowrite8((u8)vdev->h2c_vdev_db, &vdev->dc->h2c_vdev_db);
-	vdev->c2h_vdev_db = ioread8(&vdev->dc->c2h_vdev_db);
-
-	ret = register_virtio_device(&vdev->vdev);
-	reg_dev = vdev;
-	if (ret) {
-		dev_err(_vop_dev(vdev),
-			"Failed to register vop device %u type %u\n",
-			offset, type);
-		goto free_irq;
-	}
-	writeq((unsigned long)vdev, &vdev->dc->vdev);
-	dev_dbg(_vop_dev(vdev), "%s: registered vop device %u type %u vdev %p\n",
-		__func__, offset, type, vdev);
-
-	return 0;
-
-free_irq:
-	vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev);
-kfree:
-	if (reg_dev)
-		put_device(&vdev->vdev.dev);
-	else
-		kfree(vdev);
-	return ret;
-}
-
-/*
- * match for a vop device with a specific desc pointer
- */
-static int vop_match_desc(struct device *dev, void *data)
-{
-	struct virtio_device *_dev = dev_to_virtio(dev);
-	struct _vop_vdev *vdev = to_vopvdev(_dev);
-
-	return vdev->desc == (void __iomem *)data;
-}
-
-static struct _vop_vdev *vop_dc_to_vdev(struct mic_device_ctrl __iomem *dc)
-{
-	return (struct _vop_vdev *)(unsigned long)readq(&dc->vdev);
-}
-
-static void _vop_handle_config_change(struct mic_device_desc __iomem *d,
-				      unsigned int offset,
-				      struct vop_device *vpdev)
-{
-	struct mic_device_ctrl __iomem *dc
-		= (void __iomem *)d + _vop_aligned_desc_size(d);
-	struct _vop_vdev *vdev = vop_dc_to_vdev(dc);
-
-	if (ioread8(&dc->config_change) != MIC_VIRTIO_PARAM_CONFIG_CHANGED)
-		return;
-
-	dev_dbg(&vpdev->dev, "%s %d\n", __func__, __LINE__);
-	virtio_config_changed(&vdev->vdev);
-	iowrite8(1, &dc->guest_ack);
-}
-
-/*
- * removes a virtio device if a hot remove event has been
- * requested by the host.
- */
-static int _vop_remove_device(struct mic_device_desc __iomem *d,
-			      unsigned int offset, struct vop_device *vpdev)
-{
-	struct mic_device_ctrl __iomem *dc
-		= (void __iomem *)d + _vop_aligned_desc_size(d);
-	struct _vop_vdev *vdev = vop_dc_to_vdev(dc);
-	u8 status;
-	int ret = -1;
-
-	if (ioread8(&dc->config_change) == MIC_VIRTIO_PARAM_DEV_REMOVE) {
-		struct device *dev = get_device(&vdev->vdev.dev);
-
-		dev_dbg(&vpdev->dev,
-			"%s %d config_change %d type %d vdev %p\n",
-			__func__, __LINE__,
-			ioread8(&dc->config_change), ioread8(&d->type), vdev);
-		status = ioread8(&d->status);
-		reinit_completion(&vdev->reset_done);
-		unregister_virtio_device(&vdev->vdev);
-		vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev);
-		iowrite8(-1, &dc->h2c_vdev_db);
-		if (status & VIRTIO_CONFIG_S_DRIVER_OK)
-			wait_for_completion(&vdev->reset_done);
-		put_device(dev);
-		iowrite8(1, &dc->guest_ack);
-		dev_dbg(&vpdev->dev, "%s %d guest_ack %d\n",
-			__func__, __LINE__, ioread8(&dc->guest_ack));
-		iowrite8(-1, &d->type);
-		ret = 0;
-	}
-	return ret;
-}
-
-#define REMOVE_DEVICES true
-
-static void _vop_scan_devices(void __iomem *dp, struct vop_device *vpdev,
-			      bool remove, int dnode)
-{
-	s8 type;
-	unsigned int i;
-	struct mic_device_desc __iomem *d;
-	struct mic_device_ctrl __iomem *dc;
-	struct device *dev;
-
-	for (i = sizeof(struct mic_bootparam);
-			i < MIC_DP_SIZE; i += _vop_total_desc_size(d)) {
-		d = dp + i;
-		dc = (void __iomem *)d + _vop_aligned_desc_size(d);
-		/*
-		 * This read barrier is paired with the corresponding write
-		 * barrier on the host which is inserted before adding or
-		 * removing a virtio device descriptor, by updating the type.
-		 */
-		rmb();
-		type = ioread8(&d->type);
-
-		/* end of list */
-		if (type == 0)
-			break;
-
-		if (type == -1)
-			continue;
-
-		/* device already exists */
-		dev = device_find_child(&vpdev->dev, (void __force *)d,
-					vop_match_desc);
-		if (dev) {
-			if (remove)
-				iowrite8(MIC_VIRTIO_PARAM_DEV_REMOVE,
-					 &dc->config_change);
-			put_device(dev);
-			_vop_handle_config_change(d, i, vpdev);
-			_vop_remove_device(d, i, vpdev);
-			if (remove) {
-				iowrite8(0, &dc->config_change);
-				iowrite8(0, &dc->guest_ack);
-			}
-			continue;
-		}
-
-		/* new device */
-		dev_dbg(&vpdev->dev, "%s %d Adding new virtio device %p\n",
-			__func__, __LINE__, d);
-		if (!remove)
-			_vop_add_device(d, i, vpdev, dnode);
-	}
-}
-
-static void vop_scan_devices(struct vop_info *vi,
-			     struct vop_device *vpdev, bool remove)
-{
-	void __iomem *dp = vpdev->hw_ops->get_remote_dp(vpdev);
-
-	if (!dp)
-		return;
-	mutex_lock(&vi->vop_mutex);
-	_vop_scan_devices(dp, vpdev, remove, vpdev->dnode);
-	mutex_unlock(&vi->vop_mutex);
-}
-
-/*
- * vop_hotplug_device tries to find changes in the device page.
- */
-static void vop_hotplug_devices(struct work_struct *work)
-{
-	struct vop_info *vi = container_of(work, struct vop_info,
-					     hotplug_work);
-
-	vop_scan_devices(vi, vi->vpdev, !REMOVE_DEVICES);
-}
-
-/*
- * Interrupt handler for hot plug/config changes etc.
- */
-static irqreturn_t vop_extint_handler(int irq, void *data)
-{
-	struct vop_info *vi = data;
-	struct mic_bootparam __iomem *bp;
-	struct vop_device *vpdev = vi->vpdev;
-
-	bp = vpdev->hw_ops->get_remote_dp(vpdev);
-	dev_dbg(&vpdev->dev, "%s %d hotplug work\n",
-		__func__, __LINE__);
-	vpdev->hw_ops->ack_interrupt(vpdev, ioread8(&bp->h2c_config_db));
-	schedule_work(&vi->hotplug_work);
-	return IRQ_HANDLED;
-}
-
-static int vop_driver_probe(struct vop_device *vpdev)
-{
-	struct vop_info *vi;
-	int rc;
-
-	vi = kzalloc(sizeof(*vi), GFP_KERNEL);
-	if (!vi) {
-		rc = -ENOMEM;
-		goto exit;
-	}
-	dev_set_drvdata(&vpdev->dev, vi);
-	vi->vpdev = vpdev;
-
-	mutex_init(&vi->vop_mutex);
-	INIT_WORK(&vi->hotplug_work, vop_hotplug_devices);
-	if (vpdev->dnode) {
-		rc = vop_host_init(vi);
-		if (rc < 0)
-			goto free;
-	} else {
-		struct mic_bootparam __iomem *bootparam;
-
-		vop_scan_devices(vi, vpdev, !REMOVE_DEVICES);
-
-		vi->h2c_config_db = vpdev->hw_ops->next_db(vpdev);
-		vi->cookie = vpdev->hw_ops->request_irq(vpdev,
-							vop_extint_handler,
-							"virtio_config_intr",
-							vi, vi->h2c_config_db);
-		if (IS_ERR(vi->cookie)) {
-			rc = PTR_ERR(vi->cookie);
-			goto free;
-		}
-		bootparam = vpdev->hw_ops->get_remote_dp(vpdev);
-		iowrite8(vi->h2c_config_db, &bootparam->h2c_config_db);
-	}
-	vop_init_debugfs(vi);
-	return 0;
-free:
-	kfree(vi);
-exit:
-	return rc;
-}
-
-static void vop_driver_remove(struct vop_device *vpdev)
-{
-	struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
-
-	if (vpdev->dnode) {
-		vop_host_uninit(vi);
-	} else {
-		struct mic_bootparam __iomem *bootparam =
-			vpdev->hw_ops->get_remote_dp(vpdev);
-		if (bootparam)
-			iowrite8(-1, &bootparam->h2c_config_db);
-		vpdev->hw_ops->free_irq(vpdev, vi->cookie, vi);
-		flush_work(&vi->hotplug_work);
-		vop_scan_devices(vi, vpdev, REMOVE_DEVICES);
-	}
-	vop_exit_debugfs(vi);
-	kfree(vi);
-}
-
-static const struct vop_device_id id_table[] = {
-	{ VOP_DEV_TRNSP, VOP_DEV_ANY_ID },
-	{ 0 },
-};
-
-static struct vop_driver vop_driver = {
-	.driver.name =	KBUILD_MODNAME,
-	.driver.owner =	THIS_MODULE,
-	.id_table = id_table,
-	.probe = vop_driver_probe,
-	.remove = vop_driver_remove,
-};
-
-module_vop_driver(vop_driver);
-
-MODULE_DEVICE_TABLE(mbus, id_table);
-MODULE_AUTHOR("Intel Corporation");
-MODULE_DESCRIPTION("Intel(R) Virtio Over PCIe (VOP) driver");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/vop/vop_main.h b/drivers/misc/mic/vop/vop_main.h
deleted file mode 100644
index 2451d9218137..000000000000
--- a/drivers/misc/mic/vop/vop_main.h
+++ /dev/null
@@ -1,158 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Intel Virtio Over PCIe (VOP) driver.
- */
-#ifndef _VOP_MAIN_H_
-#define _VOP_MAIN_H_
-
-#include <linux/vringh.h>
-#include <linux/virtio_config.h>
-#include <linux/virtio.h>
-#include <linux/miscdevice.h>
-
-#include <linux/mic_common.h>
-#include "../common/mic_dev.h"
-
-#include "../bus/vop_bus.h"
-
-/*
- * Note on endianness.
- * 1. Host can be both BE or LE
- * 2. Guest/card is LE. Host uses le_to_cpu to access desc/avail
- *    rings and ioreadXX/iowriteXX to access used ring.
- * 3. Device page exposed by host to guest contains LE values. Guest
- *    accesses these using ioreadXX/iowriteXX etc. This way in general we
- *    obey the virtio spec according to which guest works with native
- *    endianness and host is aware of guest endianness and does all
- *    required endianness conversion.
- * 4. Data provided from user space to guest (in ADD_DEVICE and
- *    CONFIG_CHANGE ioctl's) is not interpreted by the driver and should be
- *    in guest endianness.
- */
-
-/*
- * vop_info - Allocated per invocation of VOP probe
- *
- * @vpdev: VOP device
- * @hotplug_work: Handle virtio device creation, deletion and configuration
- * @cookie: Cookie received upon requesting a virtio configuration interrupt
- * @h2c_config_db: The doorbell used by the peer to indicate a config change
- * @vdev_list: List of "active" virtio devices injected in the peer node
- * @vop_mutex: Synchronize access to the device page as well as serialize
- *             creation/deletion of virtio devices on the peer node
- * @dp: Peer device page information
- * @dbg: Debugfs entry
- * @dma_ch: The DMA channel used by this transport for data transfers.
- * @name: Name for this transport used in misc device creation.
- * @miscdev: The misc device registered.
- */
-struct vop_info {
-	struct vop_device *vpdev;
-	struct work_struct hotplug_work;
-	struct mic_irq *cookie;
-	int h2c_config_db;
-	struct list_head vdev_list;
-	struct mutex vop_mutex;
-	void __iomem *dp;
-	struct dentry *dbg;
-	struct dma_chan *dma_ch;
-	char name[16];
-	struct miscdevice miscdev;
-};
-
-/**
- * struct vop_vringh - Virtio ring host information.
- *
- * @vring: The VOP vring used for setting up user space mappings.
- * @vrh: The host VRINGH used for accessing the card vrings.
- * @riov: The VRINGH read kernel IOV.
- * @wiov: The VRINGH write kernel IOV.
- * @head: The VRINGH head index address passed to vringh_getdesc_kern(..).
- * @vr_mutex: Mutex for synchronizing access to the VRING.
- * @buf: Temporary kernel buffer used to copy in/out data
- * from/to the card via DMA.
- * @buf_da: dma address of buf.
- * @vdev: Back pointer to VOP virtio device for vringh_notify(..).
- */
-struct vop_vringh {
-	struct mic_vring vring;
-	struct vringh vrh;
-	struct vringh_kiov riov;
-	struct vringh_kiov wiov;
-	u16 head;
-	struct mutex vr_mutex;
-	void *buf;
-	dma_addr_t buf_da;
-	struct vop_vdev *vdev;
-};
-
-/**
- * struct vop_vdev - Host information for a card Virtio device.
- *
- * @virtio_id - Virtio device id.
- * @waitq - Waitqueue to allow ring3 apps to poll.
- * @vpdev - pointer to VOP bus device.
- * @poll_wake - Used for waking up threads blocked in poll.
- * @out_bytes - Debug stats for number of bytes copied from host to card.
- * @in_bytes - Debug stats for number of bytes copied from card to host.
- * @out_bytes_dma - Debug stats for number of bytes copied from host to card
- * using DMA.
- * @in_bytes_dma - Debug stats for number of bytes copied from card to host
- * using DMA.
- * @tx_len_unaligned - Debug stats for number of bytes copied to the card where
- * the transfer length did not have the required DMA alignment.
- * @tx_dst_unaligned - Debug stats for number of bytes copied where the
- * destination address on the card did not have the required DMA alignment.
- * @vvr - Store per VRING data structures.
- * @virtio_bh_work - Work struct used to schedule virtio bottom half handling.
- * @dd - Virtio device descriptor.
- * @dc - Virtio device control fields.
- * @list - List of Virtio devices.
- * @virtio_db - The doorbell used by the card to interrupt the host.
- * @virtio_cookie - The cookie returned while requesting interrupts.
- * @vi: Transport information.
- * @vdev_mutex: Mutex synchronizing virtio device injection,
- *              removal and data transfers.
- * @destroy: Track if a virtio device is being destroyed.
- * @deleted: The virtio device has been deleted.
- */
-struct vop_vdev {
-	int virtio_id;
-	wait_queue_head_t waitq;
-	struct vop_device *vpdev;
-	int poll_wake;
-	unsigned long out_bytes;
-	unsigned long in_bytes;
-	unsigned long out_bytes_dma;
-	unsigned long in_bytes_dma;
-	unsigned long tx_len_unaligned;
-	unsigned long tx_dst_unaligned;
-	unsigned long rx_dst_unaligned;
-	struct vop_vringh vvr[MIC_MAX_VRINGS];
-	struct work_struct virtio_bh_work;
-	struct mic_device_desc *dd;
-	struct mic_device_ctrl *dc;
-	struct list_head list;
-	int virtio_db;
-	struct mic_irq *virtio_cookie;
-	struct vop_info *vi;
-	struct mutex vdev_mutex;
-	struct completion destroy;
-	bool deleted;
-};
-
-/* Helper API to check if a virtio device is running */
-static inline bool vop_vdevup(struct vop_vdev *vdev)
-{
-	return !!vdev->dd->status;
-}
-
-void vop_init_debugfs(struct vop_info *vi);
-void vop_exit_debugfs(struct vop_info *vi);
-int vop_host_init(struct vop_info *vi);
-void vop_host_uninit(struct vop_info *vi);
-#endif
diff --git a/drivers/misc/mic/vop/vop_vringh.c b/drivers/misc/mic/vop/vop_vringh.c
deleted file mode 100644
index 7014ffe88632..000000000000
--- a/drivers/misc/mic/vop/vop_vringh.c
+++ /dev/null
@@ -1,1166 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Intel Virtio Over PCIe (VOP) driver.
- */
-#include <linux/sched.h>
-#include <linux/poll.h>
-#include <linux/dma-mapping.h>
-
-#include <linux/mic_common.h>
-#include "../common/mic_dev.h"
-
-#include <linux/mic_ioctl.h>
-#include "vop_main.h"
-
-/* Helper API to obtain the VOP PCIe device */
-static inline struct device *vop_dev(struct vop_vdev *vdev)
-{
-	return vdev->vpdev->dev.parent;
-}
-
-/* Helper API to check if a virtio device is initialized */
-static inline int vop_vdev_inited(struct vop_vdev *vdev)
-{
-	if (!vdev)
-		return -EINVAL;
-	/* Device has not been created yet */
-	if (!vdev->dd || !vdev->dd->type) {
-		dev_err(vop_dev(vdev), "%s %d err %d\n",
-			__func__, __LINE__, -EINVAL);
-		return -EINVAL;
-	}
-	/* Device has been removed/deleted */
-	if (vdev->dd->type == -1) {
-		dev_dbg(vop_dev(vdev), "%s %d err %d\n",
-			__func__, __LINE__, -ENODEV);
-		return -ENODEV;
-	}
-	return 0;
-}
-
-static void _vop_notify(struct vringh *vrh)
-{
-	struct vop_vringh *vvrh = container_of(vrh, struct vop_vringh, vrh);
-	struct vop_vdev *vdev = vvrh->vdev;
-	struct vop_device *vpdev = vdev->vpdev;
-	s8 db = vdev->dc->h2c_vdev_db;
-
-	if (db != -1)
-		vpdev->hw_ops->send_intr(vpdev, db);
-}
-
-static void vop_virtio_init_post(struct vop_vdev *vdev)
-{
-	struct mic_vqconfig *vqconfig = mic_vq_config(vdev->dd);
-	struct vop_device *vpdev = vdev->vpdev;
-	int i, used_size;
-
-	for (i = 0; i < vdev->dd->num_vq; i++) {
-		used_size = PAGE_ALIGN(sizeof(u16) * 3 +
-				sizeof(struct vring_used_elem) *
-				le16_to_cpu(vqconfig->num));
-		if (!le64_to_cpu(vqconfig[i].used_address)) {
-			dev_warn(vop_dev(vdev), "used_address zero??\n");
-			continue;
-		}
-		vdev->vvr[i].vrh.vring.used =
-			(void __force *)vpdev->hw_ops->remap(
-			vpdev,
-			le64_to_cpu(vqconfig[i].used_address),
-			used_size);
-	}
-
-	vdev->dc->used_address_updated = 0;
-
-	dev_info(vop_dev(vdev), "%s: device type %d LINKUP\n",
-		 __func__, vdev->virtio_id);
-}
-
-static inline void vop_virtio_device_reset(struct vop_vdev *vdev)
-{
-	int i;
-
-	dev_dbg(vop_dev(vdev), "%s: status %d device type %d RESET\n",
-		__func__, vdev->dd->status, vdev->virtio_id);
-
-	for (i = 0; i < vdev->dd->num_vq; i++)
-		/*
-		 * Avoid lockdep false positive. The + 1 is for the vop
-		 * mutex which is held in the reset devices code path.
-		 */
-		mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
-
-	/* 0 status means "reset" */
-	vdev->dd->status = 0;
-	vdev->dc->vdev_reset = 0;
-	vdev->dc->host_ack = 1;
-
-	for (i = 0; i < vdev->dd->num_vq; i++) {
-		struct vringh *vrh = &vdev->vvr[i].vrh;
-
-		vdev->vvr[i].vring.info->avail_idx = 0;
-		vrh->completed = 0;
-		vrh->last_avail_idx = 0;
-		vrh->last_used_idx = 0;
-	}
-
-	for (i = 0; i < vdev->dd->num_vq; i++)
-		mutex_unlock(&vdev->vvr[i].vr_mutex);
-}
-
-static void vop_virtio_reset_devices(struct vop_info *vi)
-{
-	struct list_head *pos, *tmp;
-	struct vop_vdev *vdev;
-
-	list_for_each_safe(pos, tmp, &vi->vdev_list) {
-		vdev = list_entry(pos, struct vop_vdev, list);
-		vop_virtio_device_reset(vdev);
-		vdev->poll_wake = 1;
-		wake_up(&vdev->waitq);
-	}
-}
-
-static void vop_bh_handler(struct work_struct *work)
-{
-	struct vop_vdev *vdev = container_of(work, struct vop_vdev,
-			virtio_bh_work);
-
-	if (vdev->dc->used_address_updated)
-		vop_virtio_init_post(vdev);
-
-	if (vdev->dc->vdev_reset)
-		vop_virtio_device_reset(vdev);
-
-	vdev->poll_wake = 1;
-	wake_up(&vdev->waitq);
-}
-
-static irqreturn_t _vop_virtio_intr_handler(int irq, void *data)
-{
-	struct vop_vdev *vdev = data;
-	struct vop_device *vpdev = vdev->vpdev;
-
-	vpdev->hw_ops->ack_interrupt(vpdev, vdev->virtio_db);
-	schedule_work(&vdev->virtio_bh_work);
-	return IRQ_HANDLED;
-}
-
-static int vop_virtio_config_change(struct vop_vdev *vdev, void *argp)
-{
-	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
-	int ret = 0, retry, i;
-	struct vop_device *vpdev = vdev->vpdev;
-	struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
-	struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
-	s8 db = bootparam->h2c_config_db;
-
-	mutex_lock(&vi->vop_mutex);
-	for (i = 0; i < vdev->dd->num_vq; i++)
-		mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
-
-	if (db == -1 || vdev->dd->type == -1) {
-		ret = -EIO;
-		goto exit;
-	}
-
-	memcpy(mic_vq_configspace(vdev->dd), argp, vdev->dd->config_len);
-	vdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED;
-	vpdev->hw_ops->send_intr(vpdev, db);
-
-	for (retry = 100; retry--;) {
-		ret = wait_event_timeout(wake, vdev->dc->guest_ack,
-					 msecs_to_jiffies(100));
-		if (ret)
-			break;
-	}
-
-	dev_dbg(vop_dev(vdev),
-		"%s %d retry: %d\n", __func__, __LINE__, retry);
-	vdev->dc->config_change = 0;
-	vdev->dc->guest_ack = 0;
-exit:
-	for (i = 0; i < vdev->dd->num_vq; i++)
-		mutex_unlock(&vdev->vvr[i].vr_mutex);
-	mutex_unlock(&vi->vop_mutex);
-	return ret;
-}
-
-static int vop_copy_dp_entry(struct vop_vdev *vdev,
-			     struct mic_device_desc *argp, __u8 *type,
-			     struct mic_device_desc **devpage)
-{
-	struct vop_device *vpdev = vdev->vpdev;
-	struct mic_device_desc *devp;
-	struct mic_vqconfig *vqconfig;
-	int ret = 0, i;
-	bool slot_found = false;
-
-	vqconfig = mic_vq_config(argp);
-	for (i = 0; i < argp->num_vq; i++) {
-		if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) {
-			ret =  -EINVAL;
-			dev_err(vop_dev(vdev), "%s %d err %d\n",
-				__func__, __LINE__, ret);
-			goto exit;
-		}
-	}
-
-	/* Find the first free device page entry */
-	for (i = sizeof(struct mic_bootparam);
-		i < MIC_DP_SIZE - mic_total_desc_size(argp);
-		i += mic_total_desc_size(devp)) {
-		devp = vpdev->hw_ops->get_dp(vpdev) + i;
-		if (devp->type == 0 || devp->type == -1) {
-			slot_found = true;
-			break;
-		}
-	}
-	if (!slot_found) {
-		ret =  -EINVAL;
-		dev_err(vop_dev(vdev), "%s %d err %d\n",
-			__func__, __LINE__, ret);
-		goto exit;
-	}
-	/*
-	 * Save off the type before doing the memcpy. Type will be set in the
-	 * end after completing all initialization for the new device.
-	 */
-	*type = argp->type;
-	argp->type = 0;
-	memcpy(devp, argp, mic_desc_size(argp));
-
-	*devpage = devp;
-exit:
-	return ret;
-}
-
-static void vop_init_device_ctrl(struct vop_vdev *vdev,
-				 struct mic_device_desc *devpage)
-{
-	struct mic_device_ctrl *dc;
-
-	dc = (void *)devpage + mic_aligned_desc_size(devpage);
-
-	dc->config_change = 0;
-	dc->guest_ack = 0;
-	dc->vdev_reset = 0;
-	dc->host_ack = 0;
-	dc->used_address_updated = 0;
-	dc->c2h_vdev_db = -1;
-	dc->h2c_vdev_db = -1;
-	vdev->dc = dc;
-}
-
-static int vop_virtio_add_device(struct vop_vdev *vdev,
-				 struct mic_device_desc *argp)
-{
-	struct vop_info *vi = vdev->vi;
-	struct vop_device *vpdev = vi->vpdev;
-	struct mic_device_desc *dd = NULL;
-	struct mic_vqconfig *vqconfig;
-	int vr_size, i, j, ret;
-	u8 type = 0;
-	s8 db = -1;
-	char irqname[16];
-	struct mic_bootparam *bootparam;
-	u16 num;
-	dma_addr_t vr_addr;
-
-	bootparam = vpdev->hw_ops->get_dp(vpdev);
-	init_waitqueue_head(&vdev->waitq);
-	INIT_LIST_HEAD(&vdev->list);
-	vdev->vpdev = vpdev;
-
-	ret = vop_copy_dp_entry(vdev, argp, &type, &dd);
-	if (ret) {
-		dev_err(vop_dev(vdev), "%s %d err %d\n",
-			__func__, __LINE__, ret);
-		return ret;
-	}
-
-	vop_init_device_ctrl(vdev, dd);
-
-	vdev->dd = dd;
-	vdev->virtio_id = type;
-	vqconfig = mic_vq_config(dd);
-	INIT_WORK(&vdev->virtio_bh_work, vop_bh_handler);
-
-	for (i = 0; i < dd->num_vq; i++) {
-		struct vop_vringh *vvr = &vdev->vvr[i];
-		struct mic_vring *vr = &vdev->vvr[i].vring;
-
-		num = le16_to_cpu(vqconfig[i].num);
-		mutex_init(&vvr->vr_mutex);
-		vr_size = PAGE_ALIGN(round_up(vring_size(num, MIC_VIRTIO_RING_ALIGN), 4) +
-			sizeof(struct _mic_vring_info));
-		vr->va = (void *)
-			__get_free_pages(GFP_KERNEL | __GFP_ZERO,
-					 get_order(vr_size));
-		if (!vr->va) {
-			ret = -ENOMEM;
-			dev_err(vop_dev(vdev), "%s %d err %d\n",
-				__func__, __LINE__, ret);
-			goto err;
-		}
-		vr->len = vr_size;
-		vr->info = vr->va + round_up(vring_size(num, MIC_VIRTIO_RING_ALIGN), 4);
-		vr->info->magic = cpu_to_le32(MIC_MAGIC + vdev->virtio_id + i);
-		vr_addr = dma_map_single(&vpdev->dev, vr->va, vr_size,
-					 DMA_BIDIRECTIONAL);
-		if (dma_mapping_error(&vpdev->dev, vr_addr)) {
-			free_pages((unsigned long)vr->va, get_order(vr_size));
-			ret = -ENOMEM;
-			dev_err(vop_dev(vdev), "%s %d err %d\n",
-				__func__, __LINE__, ret);
-			goto err;
-		}
-		vqconfig[i].address = cpu_to_le64(vr_addr);
-
-		vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN);
-		ret = vringh_init_kern(&vvr->vrh,
-				       *(u32 *)mic_vq_features(vdev->dd),
-				       num, false, vr->vr.desc, vr->vr.avail,
-				       vr->vr.used);
-		if (ret) {
-			dev_err(vop_dev(vdev), "%s %d err %d\n",
-				__func__, __LINE__, ret);
-			goto err;
-		}
-		vringh_kiov_init(&vvr->riov, NULL, 0);
-		vringh_kiov_init(&vvr->wiov, NULL, 0);
-		vvr->head = USHRT_MAX;
-		vvr->vdev = vdev;
-		vvr->vrh.notify = _vop_notify;
-		dev_dbg(&vpdev->dev,
-			"%s %d index %d va %p info %p vr_size 0x%x\n",
-			__func__, __LINE__, i, vr->va, vr->info, vr_size);
-		vvr->buf = (void *)__get_free_pages(GFP_KERNEL,
-					get_order(VOP_INT_DMA_BUF_SIZE));
-		vvr->buf_da = dma_map_single(&vpdev->dev,
-					  vvr->buf, VOP_INT_DMA_BUF_SIZE,
-					  DMA_BIDIRECTIONAL);
-	}
-
-	snprintf(irqname, sizeof(irqname), "vop%dvirtio%d", vpdev->index,
-		 vdev->virtio_id);
-	vdev->virtio_db = vpdev->hw_ops->next_db(vpdev);
-	vdev->virtio_cookie = vpdev->hw_ops->request_irq(vpdev,
-			_vop_virtio_intr_handler, irqname, vdev,
-			vdev->virtio_db);
-	if (IS_ERR(vdev->virtio_cookie)) {
-		ret = PTR_ERR(vdev->virtio_cookie);
-		dev_dbg(&vpdev->dev, "request irq failed\n");
-		goto err;
-	}
-
-	vdev->dc->c2h_vdev_db = vdev->virtio_db;
-
-	/*
-	 * Order the type update with previous stores. This write barrier
-	 * is paired with the corresponding read barrier before the uncached
-	 * system memory read of the type, on the card while scanning the
-	 * device page.
-	 */
-	smp_wmb();
-	dd->type = type;
-	argp->type = type;
-
-	if (bootparam) {
-		db = bootparam->h2c_config_db;
-		if (db != -1)
-			vpdev->hw_ops->send_intr(vpdev, db);
-	}
-	dev_dbg(&vpdev->dev, "Added virtio id %d db %d\n", dd->type, db);
-	return 0;
-err:
-	vqconfig = mic_vq_config(dd);
-	for (j = 0; j < i; j++) {
-		struct vop_vringh *vvr = &vdev->vvr[j];
-
-		dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[j].address),
-				 vvr->vring.len, DMA_BIDIRECTIONAL);
-		free_pages((unsigned long)vvr->vring.va,
-			   get_order(vvr->vring.len));
-	}
-	return ret;
-}
-
-static void vop_dev_remove(struct vop_info *pvi, struct mic_device_ctrl *devp,
-			   struct vop_device *vpdev)
-{
-	struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
-	s8 db;
-	int ret, retry;
-	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
-
-	devp->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE;
-	db = bootparam->h2c_config_db;
-	if (db != -1)
-		vpdev->hw_ops->send_intr(vpdev, db);
-	else
-		goto done;
-	for (retry = 15; retry--;) {
-		ret = wait_event_timeout(wake, devp->guest_ack,
-					 msecs_to_jiffies(1000));
-		if (ret)
-			break;
-	}
-done:
-	devp->config_change = 0;
-	devp->guest_ack = 0;
-}
-
-static void vop_virtio_del_device(struct vop_vdev *vdev)
-{
-	struct vop_info *vi = vdev->vi;
-	struct vop_device *vpdev = vdev->vpdev;
-	int i;
-	struct mic_vqconfig *vqconfig;
-	struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
-
-	if (!bootparam)
-		goto skip_hot_remove;
-	vop_dev_remove(vi, vdev->dc, vpdev);
-skip_hot_remove:
-	vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev);
-	flush_work(&vdev->virtio_bh_work);
-	vqconfig = mic_vq_config(vdev->dd);
-	for (i = 0; i < vdev->dd->num_vq; i++) {
-		struct vop_vringh *vvr = &vdev->vvr[i];
-
-		dma_unmap_single(&vpdev->dev,
-				 vvr->buf_da, VOP_INT_DMA_BUF_SIZE,
-				 DMA_BIDIRECTIONAL);
-		free_pages((unsigned long)vvr->buf,
-			   get_order(VOP_INT_DMA_BUF_SIZE));
-		vringh_kiov_cleanup(&vvr->riov);
-		vringh_kiov_cleanup(&vvr->wiov);
-		dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[i].address),
-				 vvr->vring.len, DMA_BIDIRECTIONAL);
-		free_pages((unsigned long)vvr->vring.va,
-			   get_order(vvr->vring.len));
-	}
-	/*
-	 * Order the type update with previous stores. This write barrier
-	 * is paired with the corresponding read barrier before the uncached
-	 * system memory read of the type, on the card while scanning the
-	 * device page.
-	 */
-	smp_wmb();
-	vdev->dd->type = -1;
-}
-
-/*
- * vop_sync_dma - Wrapper for synchronous DMAs.
- *
- * @dev - The address of the pointer to the device instance used
- * for DMA registration.
- * @dst - destination DMA address.
- * @src - source DMA address.
- * @len - size of the transfer.
- *
- * Return DMA_SUCCESS on success
- */
-static int vop_sync_dma(struct vop_vdev *vdev, dma_addr_t dst, dma_addr_t src,
-			size_t len)
-{
-	int err = 0;
-	struct dma_device *ddev;
-	struct dma_async_tx_descriptor *tx;
-	struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
-	struct dma_chan *vop_ch = vi->dma_ch;
-
-	if (!vop_ch) {
-		err = -EBUSY;
-		goto error;
-	}
-	ddev = vop_ch->device;
-	tx = ddev->device_prep_dma_memcpy(vop_ch, dst, src, len,
-		DMA_PREP_FENCE);
-	if (!tx) {
-		err = -ENOMEM;
-		goto error;
-	} else {
-		dma_cookie_t cookie;
-
-		cookie = tx->tx_submit(tx);
-		if (dma_submit_error(cookie)) {
-			err = -ENOMEM;
-			goto error;
-		}
-		dma_async_issue_pending(vop_ch);
-		err = dma_sync_wait(vop_ch, cookie);
-	}
-error:
-	if (err)
-		dev_err(&vi->vpdev->dev, "%s %d err %d\n",
-			__func__, __LINE__, err);
-	return err;
-}
-
-#define VOP_USE_DMA true
-
-/*
- * Initiates the copies across the PCIe bus from card memory to a user
- * space buffer. When transfers are done using DMA, source/destination
- * addresses and transfer length must follow the alignment requirements of
- * the MIC DMA engine.
- */
-static int vop_virtio_copy_to_user(struct vop_vdev *vdev, void __user *ubuf,
-				   size_t len, u64 daddr, size_t dlen,
-				   int vr_idx)
-{
-	struct vop_device *vpdev = vdev->vpdev;
-	void __iomem *dbuf = vpdev->hw_ops->remap(vpdev, daddr, len);
-	struct vop_vringh *vvr = &vdev->vvr[vr_idx];
-	struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
-	size_t dma_alignment;
-	bool x200;
-	size_t dma_offset, partlen;
-	int err;
-
-	if (!VOP_USE_DMA || !vi->dma_ch) {
-		if (copy_to_user(ubuf, (void __force *)dbuf, len)) {
-			err = -EFAULT;
-			dev_err(vop_dev(vdev), "%s %d err %d\n",
-				__func__, __LINE__, err);
-			goto err;
-		}
-		vdev->in_bytes += len;
-		err = 0;
-		goto err;
-	}
-
-	dma_alignment = 1 << vi->dma_ch->device->copy_align;
-	x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
-
-	dma_offset = daddr - round_down(daddr, dma_alignment);
-	daddr -= dma_offset;
-	len += dma_offset;
-	/*
-	 * X100 uses DMA addresses as seen by the card so adding
-	 * the aperture base is not required for DMA. However x200
-	 * requires DMA addresses to be an offset into the bar so
-	 * add the aperture base for x200.
-	 */
-	if (x200)
-		daddr += vpdev->aper->pa;
-	while (len) {
-		partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
-		err = vop_sync_dma(vdev, vvr->buf_da, daddr,
-				   ALIGN(partlen, dma_alignment));
-		if (err) {
-			dev_err(vop_dev(vdev), "%s %d err %d\n",
-				__func__, __LINE__, err);
-			goto err;
-		}
-		if (copy_to_user(ubuf, vvr->buf + dma_offset,
-				 partlen - dma_offset)) {
-			err = -EFAULT;
-			dev_err(vop_dev(vdev), "%s %d err %d\n",
-				__func__, __LINE__, err);
-			goto err;
-		}
-		daddr += partlen;
-		ubuf += partlen;
-		dbuf += partlen;
-		vdev->in_bytes_dma += partlen;
-		vdev->in_bytes += partlen;
-		len -= partlen;
-		dma_offset = 0;
-	}
-	err = 0;
-err:
-	vpdev->hw_ops->unmap(vpdev, dbuf);
-	dev_dbg(vop_dev(vdev),
-		"%s: ubuf %p dbuf %p len 0x%zx vr_idx 0x%x\n",
-		__func__, ubuf, dbuf, len, vr_idx);
-	return err;
-}
-
-/*
- * Initiates copies across the PCIe bus from a user space buffer to card
- * memory. When transfers are done using DMA, source/destination addresses
- * and transfer length must follow the alignment requirements of the MIC
- * DMA engine.
- */
-static int vop_virtio_copy_from_user(struct vop_vdev *vdev, void __user *ubuf,
-				     size_t len, u64 daddr, size_t dlen,
-				     int vr_idx)
-{
-	struct vop_device *vpdev = vdev->vpdev;
-	void __iomem *dbuf = vpdev->hw_ops->remap(vpdev, daddr, len);
-	struct vop_vringh *vvr = &vdev->vvr[vr_idx];
-	struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
-	size_t dma_alignment;
-	bool x200;
-	size_t partlen;
-	bool dma = VOP_USE_DMA && vi->dma_ch;
-	int err = 0;
-	size_t offset = 0;
-
-	if (dma) {
-		dma_alignment = 1 << vi->dma_ch->device->copy_align;
-		x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
-
-		if (daddr & (dma_alignment - 1)) {
-			vdev->tx_dst_unaligned += len;
-			dma = false;
-		} else if (ALIGN(len, dma_alignment) > dlen) {
-			vdev->tx_len_unaligned += len;
-			dma = false;
-		}
-	}
-
-	if (!dma)
-		goto memcpy;
-
-	/*
-	 * X100 uses DMA addresses as seen by the card so adding
-	 * the aperture base is not required for DMA. However x200
-	 * requires DMA addresses to be an offset into the bar so
-	 * add the aperture base for x200.
-	 */
-	if (x200)
-		daddr += vpdev->aper->pa;
-	while (len) {
-		partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
-
-		if (copy_from_user(vvr->buf, ubuf, partlen)) {
-			err = -EFAULT;
-			dev_err(vop_dev(vdev), "%s %d err %d\n",
-				__func__, __LINE__, err);
-			goto err;
-		}
-		err = vop_sync_dma(vdev, daddr, vvr->buf_da,
-				   ALIGN(partlen, dma_alignment));
-		if (err) {
-			dev_err(vop_dev(vdev), "%s %d err %d\n",
-				__func__, __LINE__, err);
-			goto err;
-		}
-		daddr += partlen;
-		ubuf += partlen;
-		dbuf += partlen;
-		vdev->out_bytes_dma += partlen;
-		vdev->out_bytes += partlen;
-		len -= partlen;
-	}
-memcpy:
-	/*
-	 * We are copying to IO below and should ideally use something
-	 * like copy_from_user_toio(..) if it existed.
-	 */
-	while (len) {
-		partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
-
-		if (copy_from_user(vvr->buf, ubuf + offset, partlen)) {
-			err = -EFAULT;
-			dev_err(vop_dev(vdev), "%s %d err %d\n",
-				__func__, __LINE__, err);
-			goto err;
-		}
-		memcpy_toio(dbuf + offset, vvr->buf, partlen);
-		offset += partlen;
-		vdev->out_bytes += partlen;
-		len -= partlen;
-	}
-	err = 0;
-err:
-	vpdev->hw_ops->unmap(vpdev, dbuf);
-	dev_dbg(vop_dev(vdev),
-		"%s: ubuf %p dbuf %p len 0x%zx vr_idx 0x%x\n",
-		__func__, ubuf, dbuf, len, vr_idx);
-	return err;
-}
-
-#define MIC_VRINGH_READ true
-
-/* Determine the total number of bytes consumed in a VRINGH KIOV */
-static inline u32 vop_vringh_iov_consumed(struct vringh_kiov *iov)
-{
-	int i;
-	u32 total = iov->consumed;
-
-	for (i = 0; i < iov->i; i++)
-		total += iov->iov[i].iov_len;
-	return total;
-}
-
-/*
- * Traverse the VRINGH KIOV and issue the APIs to trigger the copies.
- * This API is heavily based on the vringh_iov_xfer(..) implementation
- * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..)
- * and vringh_iov_push_kern(..) directly is because there is no
- * way to override the VRINGH xfer(..) routines as of v3.10.
- */
-static int vop_vringh_copy(struct vop_vdev *vdev, struct vringh_kiov *iov,
-			   void __user *ubuf, size_t len, bool read, int vr_idx,
-			   size_t *out_len)
-{
-	int ret = 0;
-	size_t partlen, tot_len = 0;
-
-	while (len && iov->i < iov->used) {
-		struct kvec *kiov = &iov->iov[iov->i];
-		unsigned long daddr = (unsigned long)kiov->iov_base;
-
-		partlen = min(kiov->iov_len, len);
-		if (read)
-			ret = vop_virtio_copy_to_user(vdev, ubuf, partlen,
-						      daddr,
-						      kiov->iov_len,
-						      vr_idx);
-		else
-			ret = vop_virtio_copy_from_user(vdev, ubuf, partlen,
-							daddr,
-							kiov->iov_len,
-							vr_idx);
-		if (ret) {
-			dev_err(vop_dev(vdev), "%s %d err %d\n",
-				__func__, __LINE__, ret);
-			break;
-		}
-		len -= partlen;
-		ubuf += partlen;
-		tot_len += partlen;
-		iov->consumed += partlen;
-		kiov->iov_len -= partlen;
-		kiov->iov_base += partlen;
-		if (!kiov->iov_len) {
-			/* Fix up old iov element then increment. */
-			kiov->iov_len = iov->consumed;
-			kiov->iov_base -= iov->consumed;
-
-			iov->consumed = 0;
-			iov->i++;
-		}
-	}
-	*out_len = tot_len;
-	return ret;
-}
-
-/*
- * Use the standard VRINGH infrastructure in the kernel to fetch new
- * descriptors, initiate the copies and update the used ring.
- */
-static int _vop_virtio_copy(struct vop_vdev *vdev, struct mic_copy_desc *copy)
-{
-	int ret = 0;
-	u32 iovcnt = copy->iovcnt;
-	struct iovec iov;
-	struct iovec __user *u_iov = copy->iov;
-	void __user *ubuf = NULL;
-	struct vop_vringh *vvr = &vdev->vvr[copy->vr_idx];
-	struct vringh_kiov *riov = &vvr->riov;
-	struct vringh_kiov *wiov = &vvr->wiov;
-	struct vringh *vrh = &vvr->vrh;
-	u16 *head = &vvr->head;
-	struct mic_vring *vr = &vvr->vring;
-	size_t len = 0, out_len;
-
-	copy->out_len = 0;
-	/* Fetch a new IOVEC if all previous elements have been processed */
-	if (riov->i == riov->used && wiov->i == wiov->used) {
-		ret = vringh_getdesc_kern(vrh, riov, wiov,
-					  head, GFP_KERNEL);
-		/* Check if there are available descriptors */
-		if (ret <= 0)
-			return ret;
-	}
-	while (iovcnt) {
-		if (!len) {
-			/* Copy over a new iovec from user space. */
-			ret = copy_from_user(&iov, u_iov, sizeof(*u_iov));
-			if (ret) {
-				ret = -EINVAL;
-				dev_err(vop_dev(vdev), "%s %d err %d\n",
-					__func__, __LINE__, ret);
-				break;
-			}
-			len = iov.iov_len;
-			ubuf = iov.iov_base;
-		}
-		/* Issue all the read descriptors first */
-		ret = vop_vringh_copy(vdev, riov, ubuf, len,
-				      MIC_VRINGH_READ, copy->vr_idx, &out_len);
-		if (ret) {
-			dev_err(vop_dev(vdev), "%s %d err %d\n",
-				__func__, __LINE__, ret);
-			break;
-		}
-		len -= out_len;
-		ubuf += out_len;
-		copy->out_len += out_len;
-		/* Issue the write descriptors next */
-		ret = vop_vringh_copy(vdev, wiov, ubuf, len,
-				      !MIC_VRINGH_READ, copy->vr_idx, &out_len);
-		if (ret) {
-			dev_err(vop_dev(vdev), "%s %d err %d\n",
-				__func__, __LINE__, ret);
-			break;
-		}
-		len -= out_len;
-		ubuf += out_len;
-		copy->out_len += out_len;
-		if (!len) {
-			/* One user space iovec is now completed */
-			iovcnt--;
-			u_iov++;
-		}
-		/* Exit loop if all elements in KIOVs have been processed. */
-		if (riov->i == riov->used && wiov->i == wiov->used)
-			break;
-	}
-	/*
-	 * Update the used ring if a descriptor was available and some data was
-	 * copied in/out and the user asked for a used ring update.
-	 */
-	if (*head != USHRT_MAX && copy->out_len && copy->update_used) {
-		u32 total = 0;
-
-		/* Determine the total data consumed */
-		total += vop_vringh_iov_consumed(riov);
-		total += vop_vringh_iov_consumed(wiov);
-		vringh_complete_kern(vrh, *head, total);
-		*head = USHRT_MAX;
-		if (vringh_need_notify_kern(vrh) > 0)
-			vringh_notify(vrh);
-		vringh_kiov_cleanup(riov);
-		vringh_kiov_cleanup(wiov);
-		/* Update avail idx for user space */
-		vr->info->avail_idx = vrh->last_avail_idx;
-	}
-	return ret;
-}
-
-static inline int vop_verify_copy_args(struct vop_vdev *vdev,
-				       struct mic_copy_desc *copy)
-{
-	if (!vdev || copy->vr_idx >= vdev->dd->num_vq)
-		return -EINVAL;
-	return 0;
-}
-
-/* Copy a specified number of virtio descriptors in a chain */
-static int vop_virtio_copy_desc(struct vop_vdev *vdev,
-				struct mic_copy_desc *copy)
-{
-	int err;
-	struct vop_vringh *vvr;
-
-	err = vop_verify_copy_args(vdev, copy);
-	if (err)
-		return err;
-
-	vvr = &vdev->vvr[copy->vr_idx];
-	mutex_lock(&vvr->vr_mutex);
-	if (!vop_vdevup(vdev)) {
-		err = -ENODEV;
-		dev_err(vop_dev(vdev), "%s %d err %d\n",
-			__func__, __LINE__, err);
-		goto err;
-	}
-	err = _vop_virtio_copy(vdev, copy);
-	if (err) {
-		dev_err(vop_dev(vdev), "%s %d err %d\n",
-			__func__, __LINE__, err);
-	}
-err:
-	mutex_unlock(&vvr->vr_mutex);
-	return err;
-}
-
-static int vop_open(struct inode *inode, struct file *f)
-{
-	struct vop_vdev *vdev;
-	struct vop_info *vi = container_of(f->private_data,
-		struct vop_info, miscdev);
-
-	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
-	if (!vdev)
-		return -ENOMEM;
-	vdev->vi = vi;
-	mutex_init(&vdev->vdev_mutex);
-	f->private_data = vdev;
-	init_completion(&vdev->destroy);
-	complete(&vdev->destroy);
-	return 0;
-}
-
-static int vop_release(struct inode *inode, struct file *f)
-{
-	struct vop_vdev *vdev = f->private_data, *vdev_tmp;
-	struct vop_info *vi = vdev->vi;
-	struct list_head *pos, *tmp;
-	bool found = false;
-
-	mutex_lock(&vdev->vdev_mutex);
-	if (vdev->deleted)
-		goto unlock;
-	mutex_lock(&vi->vop_mutex);
-	list_for_each_safe(pos, tmp, &vi->vdev_list) {
-		vdev_tmp = list_entry(pos, struct vop_vdev, list);
-		if (vdev == vdev_tmp) {
-			vop_virtio_del_device(vdev);
-			list_del(pos);
-			found = true;
-			break;
-		}
-	}
-	mutex_unlock(&vi->vop_mutex);
-unlock:
-	mutex_unlock(&vdev->vdev_mutex);
-	if (!found)
-		wait_for_completion(&vdev->destroy);
-	f->private_data = NULL;
-	kfree(vdev);
-	return 0;
-}
-
-static long vop_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
-{
-	struct vop_vdev *vdev = f->private_data;
-	struct vop_info *vi = vdev->vi;
-	void __user *argp = (void __user *)arg;
-	int ret;
-
-	switch (cmd) {
-	case MIC_VIRTIO_ADD_DEVICE:
-	{
-		struct mic_device_desc dd, *dd_config;
-
-		if (copy_from_user(&dd, argp, sizeof(dd)))
-			return -EFAULT;
-
-		if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE ||
-		    dd.num_vq > MIC_MAX_VRINGS)
-			return -EINVAL;
-
-		dd_config = memdup_user(argp, mic_desc_size(&dd));
-		if (IS_ERR(dd_config))
-			return PTR_ERR(dd_config);
-
-		/* Ensure desc has not changed between the two reads */
-		if (memcmp(&dd, dd_config, sizeof(dd))) {
-			ret = -EINVAL;
-			goto free_ret;
-		}
-		mutex_lock(&vdev->vdev_mutex);
-		mutex_lock(&vi->vop_mutex);
-		ret = vop_virtio_add_device(vdev, dd_config);
-		if (ret)
-			goto unlock_ret;
-		list_add_tail(&vdev->list, &vi->vdev_list);
-unlock_ret:
-		mutex_unlock(&vi->vop_mutex);
-		mutex_unlock(&vdev->vdev_mutex);
-free_ret:
-		kfree(dd_config);
-		return ret;
-	}
-	case MIC_VIRTIO_COPY_DESC:
-	{
-		struct mic_copy_desc copy;
-
-		mutex_lock(&vdev->vdev_mutex);
-		ret = vop_vdev_inited(vdev);
-		if (ret)
-			goto _unlock_ret;
-
-		if (copy_from_user(&copy, argp, sizeof(copy))) {
-			ret = -EFAULT;
-			goto _unlock_ret;
-		}
-
-		ret = vop_virtio_copy_desc(vdev, &copy);
-		if (ret < 0)
-			goto _unlock_ret;
-		if (copy_to_user(
-			&((struct mic_copy_desc __user *)argp)->out_len,
-			&copy.out_len, sizeof(copy.out_len)))
-			ret = -EFAULT;
-_unlock_ret:
-		mutex_unlock(&vdev->vdev_mutex);
-		return ret;
-	}
-	case MIC_VIRTIO_CONFIG_CHANGE:
-	{
-		void *buf;
-
-		mutex_lock(&vdev->vdev_mutex);
-		ret = vop_vdev_inited(vdev);
-		if (ret)
-			goto __unlock_ret;
-		buf = memdup_user(argp, vdev->dd->config_len);
-		if (IS_ERR(buf)) {
-			ret = PTR_ERR(buf);
-			goto __unlock_ret;
-		}
-		ret = vop_virtio_config_change(vdev, buf);
-		kfree(buf);
-__unlock_ret:
-		mutex_unlock(&vdev->vdev_mutex);
-		return ret;
-	}
-	default:
-		return -ENOIOCTLCMD;
-	};
-	return 0;
-}
-
-/*
- * We return EPOLLIN | EPOLLOUT from poll when new buffers are enqueued, and
- * not when previously enqueued buffers may be available. This means that
- * in the card->host (TX) path, when userspace is unblocked by poll it
- * must drain all available descriptors or it can stall.
- */
-static __poll_t vop_poll(struct file *f, poll_table *wait)
-{
-	struct vop_vdev *vdev = f->private_data;
-	__poll_t mask = 0;
-
-	mutex_lock(&vdev->vdev_mutex);
-	if (vop_vdev_inited(vdev)) {
-		mask = EPOLLERR;
-		goto done;
-	}
-	poll_wait(f, &vdev->waitq, wait);
-	if (vop_vdev_inited(vdev)) {
-		mask = EPOLLERR;
-	} else if (vdev->poll_wake) {
-		vdev->poll_wake = 0;
-		mask = EPOLLIN | EPOLLOUT;
-	}
-done:
-	mutex_unlock(&vdev->vdev_mutex);
-	return mask;
-}
-
-static inline int
-vop_query_offset(struct vop_vdev *vdev, unsigned long offset,
-		 unsigned long *size, unsigned long *pa)
-{
-	struct vop_device *vpdev = vdev->vpdev;
-	unsigned long start = MIC_DP_SIZE;
-	int i;
-
-	/*
-	 * MMAP interface is as follows:
-	 * offset				region
-	 * 0x0					virtio device_page
-	 * 0x1000				first vring
-	 * 0x1000 + size of 1st vring		second vring
-	 * ....
-	 */
-	if (!offset) {
-		*pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev));
-		*size = MIC_DP_SIZE;
-		return 0;
-	}
-
-	for (i = 0; i < vdev->dd->num_vq; i++) {
-		struct vop_vringh *vvr = &vdev->vvr[i];
-
-		if (offset == start) {
-			*pa = virt_to_phys(vvr->vring.va);
-			*size = vvr->vring.len;
-			return 0;
-		}
-		start += vvr->vring.len;
-	}
-	return -1;
-}
-
-/*
- * Maps the device page and virtio rings to user space for readonly access.
- */
-static int vop_mmap(struct file *f, struct vm_area_struct *vma)
-{
-	struct vop_vdev *vdev = f->private_data;
-	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
-	unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem = size;
-	int i, err;
-
-	err = vop_vdev_inited(vdev);
-	if (err)
-		goto ret;
-	if (vma->vm_flags & VM_WRITE) {
-		err = -EACCES;
-		goto ret;
-	}
-	while (size_rem) {
-		i = vop_query_offset(vdev, offset, &size, &pa);
-		if (i < 0) {
-			err = -EINVAL;
-			goto ret;
-		}
-		err = remap_pfn_range(vma, vma->vm_start + offset,
-				      pa >> PAGE_SHIFT, size,
-				      vma->vm_page_prot);
-		if (err)
-			goto ret;
-		size_rem -= size;
-		offset += size;
-	}
-ret:
-	return err;
-}
-
-static const struct file_operations vop_fops = {
-	.open = vop_open,
-	.release = vop_release,
-	.unlocked_ioctl = vop_ioctl,
-	.poll = vop_poll,
-	.mmap = vop_mmap,
-	.owner = THIS_MODULE,
-};
-
-int vop_host_init(struct vop_info *vi)
-{
-	int rc;
-	struct miscdevice *mdev;
-	struct vop_device *vpdev = vi->vpdev;
-
-	INIT_LIST_HEAD(&vi->vdev_list);
-	vi->dma_ch = vpdev->dma_ch;
-	mdev = &vi->miscdev;
-	mdev->minor = MISC_DYNAMIC_MINOR;
-	snprintf(vi->name, sizeof(vi->name), "vop_virtio%d", vpdev->index);
-	mdev->name = vi->name;
-	mdev->fops = &vop_fops;
-	mdev->parent = &vpdev->dev;
-
-	rc = misc_register(mdev);
-	if (rc)
-		dev_err(&vpdev->dev, "%s failed rc %d\n", __func__, rc);
-	return rc;
-}
-
-void vop_host_uninit(struct vop_info *vi)
-{
-	struct list_head *pos, *tmp;
-	struct vop_vdev *vdev;
-
-	mutex_lock(&vi->vop_mutex);
-	vop_virtio_reset_devices(vi);
-	list_for_each_safe(pos, tmp, &vi->vdev_list) {
-		vdev = list_entry(pos, struct vop_vdev, list);
-		list_del(pos);
-		reinit_completion(&vdev->destroy);
-		mutex_unlock(&vi->vop_mutex);
-		mutex_lock(&vdev->vdev_mutex);
-		vop_virtio_del_device(vdev);
-		vdev->deleted = true;
-		mutex_unlock(&vdev->vdev_mutex);
-		complete(&vdev->destroy);
-		mutex_lock(&vi->vop_mutex);
-	}
-	mutex_unlock(&vi->vop_mutex);
-	misc_deregister(&vi->miscdev);
-}
diff --git a/include/linux/mic_bus.h b/include/linux/mic_bus.h
deleted file mode 100644
index e99c789424e0..000000000000
--- a/include/linux/mic_bus.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Intel MIC Bus driver.
- *
- * This implementation is very similar to the virtio bus driver
- * implementation @ include/linux/virtio.h.
- */
-#ifndef _MIC_BUS_H_
-#define _MIC_BUS_H_
-/*
- * Everything a mbus driver needs to work with any particular mbus
- * implementation.
- */
-#include <linux/interrupt.h>
-#include <linux/dma-mapping.h>
-
-struct mbus_device_id {
-	__u32 device;
-	__u32 vendor;
-};
-
-#define MBUS_DEV_DMA_HOST 2
-#define MBUS_DEV_DMA_MIC 3
-#define MBUS_DEV_ANY_ID 0xffffffff
-
-/**
- * mbus_device - representation of a device using mbus
- * @mmio_va: virtual address of mmio space
- * @hw_ops: the hardware ops supported by this device.
- * @id: the device type identification (used to match it with a driver).
- * @dev: underlying device.
- * be used to communicate with.
- * @index: unique position on the mbus bus
- */
-struct mbus_device {
-	void __iomem *mmio_va;
-	struct mbus_hw_ops *hw_ops;
-	struct mbus_device_id id;
-	struct device dev;
-	int index;
-};
-
-/**
- * mbus_driver - operations for a mbus I/O driver
- * @driver: underlying device driver (populate name and owner).
- * @id_table: the ids serviced by this driver.
- * @probe: the function to call when a device is found.  Returns 0 or -errno.
- * @remove: the function to call when a device is removed.
- */
-struct mbus_driver {
-	struct device_driver driver;
-	const struct mbus_device_id *id_table;
-	int (*probe)(struct mbus_device *dev);
-	void (*scan)(struct mbus_device *dev);
-	void (*remove)(struct mbus_device *dev);
-};
-
-/**
- * struct mic_irq - opaque pointer used as cookie
- */
-struct mic_irq;
-
-/**
- * mbus_hw_ops - Hardware operations for accessing a MIC device on the MIC bus.
- */
-struct mbus_hw_ops {
-	struct mic_irq* (*request_threaded_irq)(struct mbus_device *mbdev,
-						irq_handler_t handler,
-						irq_handler_t thread_fn,
-						const char *name, void *data,
-						int intr_src);
-	void (*free_irq)(struct mbus_device *mbdev,
-			 struct mic_irq *cookie, void *data);
-	void (*ack_interrupt)(struct mbus_device *mbdev, int num);
-};
-
-struct mbus_device *
-mbus_register_device(struct device *pdev, int id, const struct dma_map_ops *dma_ops,
-		     struct mbus_hw_ops *hw_ops, int index,
-		     void __iomem *mmio_va);
-void mbus_unregister_device(struct mbus_device *mbdev);
-
-int mbus_register_driver(struct mbus_driver *drv);
-void mbus_unregister_driver(struct mbus_driver *drv);
-
-static inline struct mbus_device *dev_to_mbus(struct device *_dev)
-{
-	return container_of(_dev, struct mbus_device, dev);
-}
-
-static inline struct mbus_driver *drv_to_mbus(struct device_driver *drv)
-{
-	return container_of(drv, struct mbus_driver, driver);
-}
-
-#endif /* _MIC_BUS_H */
diff --git a/include/linux/scif.h b/include/linux/scif.h
deleted file mode 100644
index 329e695b8fe5..000000000000
--- a/include/linux/scif.h
+++ /dev/null
@@ -1,1339 +0,0 @@
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in
- *   the documentation and/or other materials provided with the
- *   distribution.
- * * Neither the name of Intel Corporation nor the names of its
- *   contributors may be used to endorse or promote products derived
- *   from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Intel SCIF driver.
- *
- */
-#ifndef __SCIF_H__
-#define __SCIF_H__
-
-#include <linux/types.h>
-#include <linux/poll.h>
-#include <linux/device.h>
-#include <linux/scif_ioctl.h>
-
-#define SCIF_ACCEPT_SYNC	1
-#define SCIF_SEND_BLOCK		1
-#define SCIF_RECV_BLOCK		1
-
-enum {
-	SCIF_PROT_READ = (1 << 0),
-	SCIF_PROT_WRITE = (1 << 1)
-};
-
-enum {
-	SCIF_MAP_FIXED = 0x10,
-	SCIF_MAP_KERNEL	= 0x20,
-};
-
-enum {
-	SCIF_FENCE_INIT_SELF = (1 << 0),
-	SCIF_FENCE_INIT_PEER = (1 << 1),
-	SCIF_SIGNAL_LOCAL = (1 << 4),
-	SCIF_SIGNAL_REMOTE = (1 << 5)
-};
-
-enum {
-	SCIF_RMA_USECPU = (1 << 0),
-	SCIF_RMA_USECACHE = (1 << 1),
-	SCIF_RMA_SYNC = (1 << 2),
-	SCIF_RMA_ORDERED = (1 << 3)
-};
-
-/* End of SCIF Admin Reserved Ports */
-#define SCIF_ADMIN_PORT_END	1024
-
-/* End of SCIF Reserved Ports */
-#define SCIF_PORT_RSVD		1088
-
-typedef struct scif_endpt *scif_epd_t;
-typedef struct scif_pinned_pages *scif_pinned_pages_t;
-
-/**
- * struct scif_range - SCIF registered range used in kernel mode
- * @cookie: cookie used internally by SCIF
- * @nr_pages: number of pages of PAGE_SIZE
- * @prot_flags: R/W protection
- * @phys_addr: Array of bus addresses
- * @va: Array of kernel virtual addresses backed by the pages in the phys_addr
- *	array. The va is populated only when called on the host for a remote
- *	SCIF connection on MIC. This is required to support the use case of DMA
- *	between MIC and another device which is not a SCIF node e.g., an IB or
- *	ethernet NIC.
- */
-struct scif_range {
-	void *cookie;
-	int nr_pages;
-	int prot_flags;
-	dma_addr_t *phys_addr;
-	void __iomem **va;
-};
-
-/**
- * struct scif_pollepd - SCIF endpoint to be monitored via scif_poll
- * @epd: SCIF endpoint
- * @events: requested events
- * @revents: returned events
- */
-struct scif_pollepd {
-	scif_epd_t epd;
-	__poll_t events;
-	__poll_t revents;
-};
-
-/**
- * scif_peer_dev - representation of a peer SCIF device
- *
- * Peer devices show up as PCIe devices for the mgmt node but not the cards.
- * The mgmt node discovers all the cards on the PCIe bus and informs the other
- * cards about their peers. Upon notification of a peer a node adds a peer
- * device to the peer bus to maintain symmetry in the way devices are
- * discovered across all nodes in the SCIF network.
- *
- * @dev: underlying device
- * @dnode - The destination node which this device will communicate with.
- */
-struct scif_peer_dev {
-	struct device dev;
-	u8 dnode;
-};
-
-/**
- * scif_client - representation of a SCIF client
- * @name: client name
- * @probe - client method called when a peer device is registered
- * @remove - client method called when a peer device is unregistered
- * @si - subsys_interface used internally for implementing SCIF clients
- */
-struct scif_client {
-	const char *name;
-	void (*probe)(struct scif_peer_dev *spdev);
-	void (*remove)(struct scif_peer_dev *spdev);
-	struct subsys_interface si;
-};
-
-#define SCIF_OPEN_FAILED ((scif_epd_t)-1)
-#define SCIF_REGISTER_FAILED ((off_t)-1)
-#define SCIF_MMAP_FAILED ((void *)-1)
-
-/**
- * scif_open() - Create an endpoint
- *
- * Return:
- * Upon successful completion, scif_open() returns an endpoint descriptor to
- * be used in subsequent SCIF functions calls to refer to that endpoint;
- * otherwise in user mode SCIF_OPEN_FAILED (that is ((scif_epd_t)-1)) is
- * returned and errno is set to indicate the error; in kernel mode a NULL
- * scif_epd_t is returned.
- *
- * Errors:
- * ENOMEM - Insufficient kernel memory was available
- */
-scif_epd_t scif_open(void);
-
-/**
- * scif_bind() - Bind an endpoint to a port
- * @epd:	endpoint descriptor
- * @pn:		port number
- *
- * scif_bind() binds endpoint epd to port pn, where pn is a port number on the
- * local node. If pn is zero, a port number greater than or equal to
- * SCIF_PORT_RSVD is assigned and returned. Each endpoint may be bound to
- * exactly one local port. Ports less than 1024 when requested can only be bound
- * by system (or root) processes or by processes executed by privileged users.
- *
- * Return:
- * Upon successful completion, scif_bind() returns the port number to which epd
- * is bound; otherwise in user mode -1 is returned and errno is set to
- * indicate the error; in kernel mode the negative of one of the following
- * errors is returned.
- *
- * Errors:
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * EINVAL - the endpoint or the port is already bound
- * EISCONN - The endpoint is already connected
- * ENOSPC - No port number available for assignment
- * EACCES - The port requested is protected and the user is not the superuser
- */
-int scif_bind(scif_epd_t epd, u16 pn);
-
-/**
- * scif_listen() - Listen for connections on an endpoint
- * @epd:	endpoint descriptor
- * @backlog:	maximum pending connection requests
- *
- * scif_listen() marks the endpoint epd as a listening endpoint - that is, as
- * an endpoint that will be used to accept incoming connection requests. Once
- * so marked, the endpoint is said to be in the listening state and may not be
- * used as the endpoint of a connection.
- *
- * The endpoint, epd, must have been bound to a port.
- *
- * The backlog argument defines the maximum length to which the queue of
- * pending connections for epd may grow. If a connection request arrives when
- * the queue is full, the client may receive an error with an indication that
- * the connection was refused.
- *
- * Return:
- * Upon successful completion, scif_listen() returns 0; otherwise in user mode
- * -1 is returned and errno is set to indicate the error; in kernel mode the
- * negative of one of the following errors is returned.
- *
- * Errors:
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * EINVAL - the endpoint is not bound to a port
- * EISCONN - The endpoint is already connected or listening
- */
-int scif_listen(scif_epd_t epd, int backlog);
-
-/**
- * scif_connect() - Initiate a connection on a port
- * @epd:	endpoint descriptor
- * @dst:	global id of port to which to connect
- *
- * The scif_connect() function requests the connection of endpoint epd to remote
- * port dst. If the connection is successful, a peer endpoint, bound to dst, is
- * created on node dst.node. On successful return, the connection is complete.
- *
- * If the endpoint epd has not already been bound to a port, scif_connect()
- * will bind it to an unused local port.
- *
- * A connection is terminated when an endpoint of the connection is closed,
- * either explicitly by scif_close(), or when a process that owns one of the
- * endpoints of the connection is terminated.
- *
- * In user space, scif_connect() supports an asynchronous connection mode
- * if the application has set the O_NONBLOCK flag on the endpoint via the
- * fcntl() system call. Setting this flag will result in the calling process
- * not to wait during scif_connect().
- *
- * Return:
- * Upon successful completion, scif_connect() returns the port ID to which the
- * endpoint, epd, is bound; otherwise in user mode -1 is returned and errno is
- * set to indicate the error; in kernel mode the negative of one of the
- * following errors is returned.
- *
- * Errors:
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * ECONNREFUSED - The destination was not listening for connections or refused
- * the connection request
- * EINVAL - dst.port is not a valid port ID
- * EISCONN - The endpoint is already connected
- * ENOMEM - No buffer space is available
- * ENODEV - The destination node does not exist, or the node is lost or existed,
- * but is not currently in the network since it may have crashed
- * ENOSPC - No port number available for assignment
- * EOPNOTSUPP - The endpoint is listening and cannot be connected
- */
-int scif_connect(scif_epd_t epd, struct scif_port_id *dst);
-
-/**
- * scif_accept() - Accept a connection on an endpoint
- * @epd:	endpoint descriptor
- * @peer:	global id of port to which connected
- * @newepd:	new connected endpoint descriptor
- * @flags:	flags
- *
- * The scif_accept() call extracts the first connection request from the queue
- * of pending connections for the port on which epd is listening. scif_accept()
- * creates a new endpoint, bound to the same port as epd, and allocates a new
- * SCIF endpoint descriptor, returned in newepd, for the endpoint. The new
- * endpoint is connected to the endpoint through which the connection was
- * requested. epd is unaffected by this call, and remains in the listening
- * state.
- *
- * On successful return, peer holds the global port identifier (node id and
- * local port number) of the port which requested the connection.
- *
- * A connection is terminated when an endpoint of the connection is closed,
- * either explicitly by scif_close(), or when a process that owns one of the
- * endpoints of the connection is terminated.
- *
- * The number of connections that can (subsequently) be accepted on epd is only
- * limited by system resources (memory).
- *
- * The flags argument is formed by OR'ing together zero or more of the
- * following values.
- * SCIF_ACCEPT_SYNC - block until a connection request is presented. If
- *			SCIF_ACCEPT_SYNC is not in flags, and no pending
- *			connections are present on the queue, scif_accept()
- *			fails with an EAGAIN error
- *
- * In user mode, the select() and poll() functions can be used to determine
- * when there is a connection request. In kernel mode, the scif_poll()
- * function may be used for this purpose. A readable event will be delivered
- * when a connection is requested.
- *
- * Return:
- * Upon successful completion, scif_accept() returns 0; otherwise in user mode
- * -1 is returned and errno is set to indicate the error; in kernel mode the
- *	negative of one of the following errors is returned.
- *
- * Errors:
- * EAGAIN - SCIF_ACCEPT_SYNC is not set and no connections are present to be
- * accepted or SCIF_ACCEPT_SYNC is not set and remote node failed to complete
- * its connection request
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * EINTR - Interrupted function
- * EINVAL - epd is not a listening endpoint, or flags is invalid, or peer is
- * NULL, or newepd is NULL
- * ENODEV - The requesting node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOMEM - Not enough space
- * ENOENT - Secondary part of epd registration failed
- */
-int scif_accept(scif_epd_t epd, struct scif_port_id *peer, scif_epd_t
-		*newepd, int flags);
-
-/**
- * scif_close() - Close an endpoint
- * @epd:	endpoint descriptor
- *
- * scif_close() closes an endpoint and performs necessary teardown of
- * facilities associated with that endpoint.
- *
- * If epd is a listening endpoint then it will no longer accept connection
- * requests on the port to which it is bound. Any pending connection requests
- * are rejected.
- *
- * If epd is a connected endpoint, then its peer endpoint is also closed. RMAs
- * which are in-process through epd or its peer endpoint will complete before
- * scif_close() returns. Registered windows of the local and peer endpoints are
- * released as if scif_unregister() was called against each window.
- *
- * Closing a SCIF endpoint does not affect local registered memory mapped by
- * a SCIF endpoint on a remote node. The local memory remains mapped by the peer
- * SCIF endpoint explicitly removed by calling munmap(..) by the peer.
- *
- * If the peer endpoint's receive queue is not empty at the time that epd is
- * closed, then the peer endpoint can be passed as the endpoint parameter to
- * scif_recv() until the receive queue is empty.
- *
- * epd is freed and may no longer be accessed.
- *
- * Return:
- * Upon successful completion, scif_close() returns 0; otherwise in user mode
- * -1 is returned and errno is set to indicate the error; in kernel mode the
- * negative of one of the following errors is returned.
- *
- * Errors:
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- */
-int scif_close(scif_epd_t epd);
-
-/**
- * scif_send() - Send a message
- * @epd:	endpoint descriptor
- * @msg:	message buffer address
- * @len:	message length
- * @flags:	blocking mode flags
- *
- * scif_send() sends data to the peer of endpoint epd. Up to len bytes of data
- * are copied from memory starting at address msg. On successful execution the
- * return value of scif_send() is the number of bytes that were sent, and is
- * zero if no bytes were sent because len was zero. scif_send() may be called
- * only when the endpoint is in a connected state.
- *
- * If a scif_send() call is non-blocking, then it sends only those bytes which
- * can be sent without waiting, up to a maximum of len bytes.
- *
- * If a scif_send() call is blocking, then it normally returns after sending
- * all len bytes. If a blocking call is interrupted or the connection is
- * reset, the call is considered successful if some bytes were sent or len is
- * zero, otherwise the call is considered unsuccessful.
- *
- * In user mode, the select() and poll() functions can be used to determine
- * when the send queue is not full. In kernel mode, the scif_poll() function
- * may be used for this purpose.
- *
- * It is recommended that scif_send()/scif_recv() only be used for short
- * control-type message communication between SCIF endpoints. The SCIF RMA
- * APIs are expected to provide better performance for transfer sizes of
- * 1024 bytes or longer for the current MIC hardware and software
- * implementation.
- *
- * scif_send() will block until the entire message is sent if SCIF_SEND_BLOCK
- * is passed as the flags argument.
- *
- * Return:
- * Upon successful completion, scif_send() returns the number of bytes sent;
- * otherwise in user mode -1 is returned and errno is set to indicate the
- * error; in kernel mode the negative of one of the following errors is
- * returned.
- *
- * Errors:
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * ECONNRESET - Connection reset by peer
- * EINVAL - flags is invalid, or len is negative
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOMEM - Not enough space
- * ENOTCONN - The endpoint is not connected
- */
-int scif_send(scif_epd_t epd, void *msg, int len, int flags);
-
-/**
- * scif_recv() - Receive a message
- * @epd:	endpoint descriptor
- * @msg:	message buffer address
- * @len:	message buffer length
- * @flags:	blocking mode flags
- *
- * scif_recv() receives data from the peer of endpoint epd. Up to len bytes of
- * data are copied to memory starting at address msg. On successful execution
- * the return value of scif_recv() is the number of bytes that were received,
- * and is zero if no bytes were received because len was zero. scif_recv() may
- * be called only when the endpoint is in a connected state.
- *
- * If a scif_recv() call is non-blocking, then it receives only those bytes
- * which can be received without waiting, up to a maximum of len bytes.
- *
- * If a scif_recv() call is blocking, then it normally returns after receiving
- * all len bytes. If the blocking call was interrupted due to a disconnection,
- * subsequent calls to scif_recv() will copy all bytes received upto the point
- * of disconnection.
- *
- * In user mode, the select() and poll() functions can be used to determine
- * when data is available to be received. In kernel mode, the scif_poll()
- * function may be used for this purpose.
- *
- * It is recommended that scif_send()/scif_recv() only be used for short
- * control-type message communication between SCIF endpoints. The SCIF RMA
- * APIs are expected to provide better performance for transfer sizes of
- * 1024 bytes or longer for the current MIC hardware and software
- * implementation.
- *
- * scif_recv() will block until the entire message is received if
- * SCIF_RECV_BLOCK is passed as the flags argument.
- *
- * Return:
- * Upon successful completion, scif_recv() returns the number of bytes
- * received; otherwise in user mode -1 is returned and errno is set to
- * indicate the error; in kernel mode the negative of one of the following
- * errors is returned.
- *
- * Errors:
- * EAGAIN - The destination node is returning from a low power state
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * ECONNRESET - Connection reset by peer
- * EINVAL - flags is invalid, or len is negative
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOMEM - Not enough space
- * ENOTCONN - The endpoint is not connected
- */
-int scif_recv(scif_epd_t epd, void *msg, int len, int flags);
-
-/**
- * scif_register() - Mark a memory region for remote access.
- * @epd:		endpoint descriptor
- * @addr:		starting virtual address
- * @len:		length of range
- * @offset:		offset of window
- * @prot_flags:		read/write protection flags
- * @map_flags:		mapping flags
- *
- * The scif_register() function opens a window, a range of whole pages of the
- * registered address space of the endpoint epd, starting at offset po and
- * continuing for len bytes. The value of po, further described below, is a
- * function of the parameters offset and len, and the value of map_flags. Each
- * page of the window represents the physical memory page which backs the
- * corresponding page of the range of virtual address pages starting at addr
- * and continuing for len bytes. addr and len are constrained to be multiples
- * of the page size. A successful scif_register() call returns po.
- *
- * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset
- * exactly, and offset is constrained to be a multiple of the page size. The
- * mapping established by scif_register() will not replace any existing
- * registration; an error is returned if any page within the range [offset,
- * offset + len - 1] intersects an existing window.
- *
- * When SCIF_MAP_FIXED is not set, the implementation uses offset in an
- * implementation-defined manner to arrive at po. The po value so chosen will
- * be an area of the registered address space that the implementation deems
- * suitable for a mapping of len bytes. An offset value of 0 is interpreted as
- * granting the implementation complete freedom in selecting po, subject to
- * constraints described below. A non-zero value of offset is taken to be a
- * suggestion of an offset near which the mapping should be placed. When the
- * implementation selects a value for po, it does not replace any extant
- * window. In all cases, po will be a multiple of the page size.
- *
- * The physical pages which are so represented by a window are available for
- * access in calls to mmap(), scif_readfrom(), scif_writeto(),
- * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the
- * physical pages represented by the window will not be reused by the memory
- * subsystem for any other purpose. Note that the same physical page may be
- * represented by multiple windows.
- *
- * Subsequent operations which change the memory pages to which virtual
- * addresses are mapped (such as mmap(), munmap()) have no effect on
- * existing window.
- *
- * If the process will fork(), it is recommended that the registered
- * virtual address range be marked with MADV_DONTFORK. Doing so will prevent
- * problems due to copy-on-write semantics.
- *
- * The prot_flags argument is formed by OR'ing together one or more of the
- * following values.
- * SCIF_PROT_READ - allow read operations from the window
- * SCIF_PROT_WRITE - allow write operations to the window
- *
- * Return:
- * Upon successful completion, scif_register() returns the offset at which the
- * mapping was placed (po); otherwise in user mode SCIF_REGISTER_FAILED (that
- * is (off_t *)-1) is returned and errno is set to indicate the error; in
- * kernel mode the negative of one of the following errors is returned.
- *
- * Errors:
- * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags, and pages in the range
- * [offset, offset + len -1] are already registered
- * EAGAIN - The mapping could not be performed due to lack of resources
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * ECONNRESET - Connection reset by peer
- * EINVAL - map_flags is invalid, or prot_flags is invalid, or SCIF_MAP_FIXED is
- * set in flags, and offset is not a multiple of the page size, or addr is not a
- * multiple of the page size, or len is not a multiple of the page size, or is
- * 0, or offset is negative
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOMEM - Not enough space
- * ENOTCONN -The endpoint is not connected
- */
-off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
-		    int prot_flags, int map_flags);
-
-/**
- * scif_unregister() - Mark a memory region for remote access.
- * @epd:	endpoint descriptor
- * @offset:	start of range to unregister
- * @len:	length of range to unregister
- *
- * The scif_unregister() function closes those previously registered windows
- * which are entirely within the range [offset, offset + len - 1]. It is an
- * error to specify a range which intersects only a subrange of a window.
- *
- * On a successful return, pages within the window may no longer be specified
- * in calls to mmap(), scif_readfrom(), scif_writeto(), scif_vreadfrom(),
- * scif_vwriteto(), scif_get_pages, and scif_fence_signal(). The window,
- * however, continues to exist until all previous references against it are
- * removed. A window is referenced if there is a mapping to it created by
- * mmap(), or if scif_get_pages() was called against the window
- * (and the pages have not been returned via scif_put_pages()). A window is
- * also referenced while an RMA, in which some range of the window is a source
- * or destination, is in progress. Finally a window is referenced while some
- * offset in that window was specified to scif_fence_signal(), and the RMAs
- * marked by that call to scif_fence_signal() have not completed. While a
- * window is in this state, its registered address space pages are not
- * available for use in a new registered window.
- *
- * When all such references to the window have been removed, its references to
- * all the physical pages which it represents are removed. Similarly, the
- * registered address space pages of the window become available for
- * registration in a new window.
- *
- * Return:
- * Upon successful completion, scif_unregister() returns 0; otherwise in user
- * mode -1 is returned and errno is set to indicate the error; in kernel mode
- * the negative of one of the following errors is returned. In the event of an
- * error, no windows are unregistered.
- *
- * Errors:
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * ECONNRESET - Connection reset by peer
- * EINVAL - the range [offset, offset + len - 1] intersects a subrange of a
- * window, or offset is negative
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOTCONN - The endpoint is not connected
- * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid for the
- * registered address space of epd
- */
-int scif_unregister(scif_epd_t epd, off_t offset, size_t len);
-
-/**
- * scif_readfrom() - Copy from a remote address space
- * @epd:	endpoint descriptor
- * @loffset:	offset in local registered address space to
- *		which to copy
- * @len:	length of range to copy
- * @roffset:	offset in remote registered address space
- *		from which to copy
- * @rma_flags:	transfer mode flags
- *
- * scif_readfrom() copies len bytes from the remote registered address space of
- * the peer of endpoint epd, starting at the offset roffset to the local
- * registered address space of epd, starting at the offset loffset.
- *
- * Each of the specified ranges [loffset, loffset + len - 1] and [roffset,
- * roffset + len - 1] must be within some registered window or windows of the
- * local and remote nodes. A range may intersect multiple registered windows,
- * but only if those windows are contiguous in the registered address space.
- *
- * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
- * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
- * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the
- * transfer is complete. Otherwise, the transfer may be performed asynchron-
- * ously. The order in which any two asynchronous RMA operations complete
- * is non-deterministic. The synchronization functions, scif_fence_mark()/
- * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
- * the completion of asynchronous RMA operations on the same endpoint.
- *
- * The DMA transfer of individual bytes is not guaranteed to complete in
- * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
- * cacheline or partial cacheline of the source range will become visible on
- * the destination node after all other transferred data in the source
- * range has become visible on the destination node.
- *
- * The optimal DMA performance will likely be realized if both
- * loffset and roffset are cacheline aligned (are a multiple of 64). Lower
- * performance will likely be realized if loffset and roffset are not
- * cacheline aligned but are separated by some multiple of 64. The lowest level
- * of performance is likely if loffset and roffset are not separated by a
- * multiple of 64.
- *
- * The rma_flags argument is formed by ORing together zero or more of the
- * following values.
- * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
- *	engine.
- * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
- *		transfer has completed. Passing this flag results in the
- *		current implementation busy waiting and consuming CPU cycles
- *		while the DMA transfer is in progress for best performance by
- *		avoiding the interrupt latency.
- * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
- *		the source range becomes visible on the destination node
- *		after all other transferred data in the source range has
- *		become visible on the destination
- *
- * Return:
- * Upon successful completion, scif_readfrom() returns 0; otherwise in user
- * mode -1 is returned and errno is set to indicate the error; in kernel mode
- * the negative of one of the following errors is returned.
- *
- * Errors:
- * EACCES - Attempt to write to a read-only range
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * ECONNRESET - Connection reset by peer
- * EINVAL - rma_flags is invalid
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOTCONN - The endpoint is not connected
- * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered
- * address space of epd, or, The range [roffset, roffset + len - 1] is invalid
- * for the registered address space of the peer of epd, or loffset or roffset
- * is negative
- */
-int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t
-		  roffset, int rma_flags);
-
-/**
- * scif_writeto() - Copy to a remote address space
- * @epd:	endpoint descriptor
- * @loffset:	offset in local registered address space
- *		from which to copy
- * @len:	length of range to copy
- * @roffset:	offset in remote registered address space to
- *		which to copy
- * @rma_flags:	transfer mode flags
- *
- * scif_writeto() copies len bytes from the local registered address space of
- * epd, starting at the offset loffset to the remote registered address space
- * of the peer of endpoint epd, starting at the offset roffset.
- *
- * Each of the specified ranges [loffset, loffset + len - 1] and [roffset,
- * roffset + len - 1] must be within some registered window or windows of the
- * local and remote nodes. A range may intersect multiple registered windows,
- * but only if those windows are contiguous in the registered address space.
- *
- * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
- * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
- * flags includes SCIF_RMA_SYNC, then scif_writeto() will return after the
- * transfer is complete. Otherwise, the transfer may be performed asynchron-
- * ously. The order in which any two asynchronous RMA operations complete
- * is non-deterministic. The synchronization functions, scif_fence_mark()/
- * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
- * the completion of asynchronous RMA operations on the same endpoint.
- *
- * The DMA transfer of individual bytes is not guaranteed to complete in
- * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
- * cacheline or partial cacheline of the source range will become visible on
- * the destination node after all other transferred data in the source
- * range has become visible on the destination node.
- *
- * The optimal DMA performance will likely be realized if both
- * loffset and roffset are cacheline aligned (are a multiple of 64). Lower
- * performance will likely be realized if loffset and roffset are not cacheline
- * aligned but are separated by some multiple of 64. The lowest level of
- * performance is likely if loffset and roffset are not separated by a multiple
- * of 64.
- *
- * The rma_flags argument is formed by ORing together zero or more of the
- * following values.
- * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
- *			engine.
- * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
- *		transfer has completed. Passing this flag results in the
- *		current implementation busy waiting and consuming CPU cycles
- *		while the DMA transfer is in progress for best performance by
- *		avoiding the interrupt latency.
- * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
- *		the source range becomes visible on the destination node
- *		after all other transferred data in the source range has
- *		become visible on the destination
- *
- * Return:
- * Upon successful completion, scif_readfrom() returns 0; otherwise in user
- * mode -1 is returned and errno is set to indicate the error; in kernel mode
- * the negative of one of the following errors is returned.
- *
- * Errors:
- * EACCES - Attempt to write to a read-only range
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * ECONNRESET - Connection reset by peer
- * EINVAL - rma_flags is invalid
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOTCONN - The endpoint is not connected
- * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered
- * address space of epd, or, The range [roffset , roffset + len -1] is invalid
- * for the registered address space of the peer of epd, or loffset or roffset
- * is negative
- */
-int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t
-		 roffset, int rma_flags);
-
-/**
- * scif_vreadfrom() - Copy from a remote address space
- * @epd:	endpoint descriptor
- * @addr:	address to which to copy
- * @len:	length of range to copy
- * @roffset:	offset in remote registered address space
- *		from which to copy
- * @rma_flags:	transfer mode flags
- *
- * scif_vreadfrom() copies len bytes from the remote registered address
- * space of the peer of endpoint epd, starting at the offset roffset, to local
- * memory, starting at addr.
- *
- * The specified range [roffset, roffset + len - 1] must be within some
- * registered window or windows of the remote nodes. The range may
- * intersect multiple registered windows, but only if those windows are
- * contiguous in the registered address space.
- *
- * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
- * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
- * flags includes SCIF_RMA_SYNC, then scif_vreadfrom() will return after the
- * transfer is complete. Otherwise, the transfer may be performed asynchron-
- * ously. The order in which any two asynchronous RMA operations complete
- * is non-deterministic. The synchronization functions, scif_fence_mark()/
- * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
- * the completion of asynchronous RMA operations on the same endpoint.
- *
- * The DMA transfer of individual bytes is not guaranteed to complete in
- * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
- * cacheline or partial cacheline of the source range will become visible on
- * the destination node after all other transferred data in the source
- * range has become visible on the destination node.
- *
- * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back
- * the specified local memory range may be remain in a pinned state even after
- * the specified transfer completes. This may reduce overhead if some or all of
- * the same virtual address range is referenced in a subsequent call of
- * scif_vreadfrom() or scif_vwriteto().
- *
- * The optimal DMA performance will likely be realized if both
- * addr and roffset are cacheline aligned (are a multiple of 64). Lower
- * performance will likely be realized if addr and roffset are not
- * cacheline aligned but are separated by some multiple of 64. The lowest level
- * of performance is likely if addr and roffset are not separated by a
- * multiple of 64.
- *
- * The rma_flags argument is formed by ORing together zero or more of the
- * following values.
- * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
- *	engine.
- * SCIF_RMA_USECACHE - enable registration caching
- * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
- *		transfer has completed. Passing this flag results in the
- *		current implementation busy waiting and consuming CPU cycles
- *		while the DMA transfer is in progress for best performance by
- *		avoiding the interrupt latency.
- * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
- *	the source range becomes visible on the destination node
- *	after all other transferred data in the source range has
- *	become visible on the destination
- *
- * Return:
- * Upon successful completion, scif_vreadfrom() returns 0; otherwise in user
- * mode -1 is returned and errno is set to indicate the error; in kernel mode
- * the negative of one of the following errors is returned.
- *
- * Errors:
- * EACCES - Attempt to write to a read-only range
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * ECONNRESET - Connection reset by peer
- * EINVAL - rma_flags is invalid
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOTCONN - The endpoint is not connected
- * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the
- * registered address space of epd
- */
-int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t roffset,
-		   int rma_flags);
-
-/**
- * scif_vwriteto() - Copy to a remote address space
- * @epd:	endpoint descriptor
- * @addr:	address from which to copy
- * @len:	length of range to copy
- * @roffset:	offset in remote registered address space to
- *		which to copy
- * @rma_flags:	transfer mode flags
- *
- * scif_vwriteto() copies len bytes from the local memory, starting at addr, to
- * the remote registered address space of the peer of endpoint epd, starting at
- * the offset roffset.
- *
- * The specified range [roffset, roffset + len - 1] must be within some
- * registered window or windows of the remote nodes. The range may intersect
- * multiple registered windows, but only if those windows are contiguous in the
- * registered address space.
- *
- * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
- * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
- * flags includes SCIF_RMA_SYNC, then scif_vwriteto() will return after the
- * transfer is complete. Otherwise, the transfer may be performed asynchron-
- * ously. The order in which any two asynchronous RMA operations complete
- * is non-deterministic. The synchronization functions, scif_fence_mark()/
- * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
- * the completion of asynchronous RMA operations on the same endpoint.
- *
- * The DMA transfer of individual bytes is not guaranteed to complete in
- * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
- * cacheline or partial cacheline of the source range will become visible on
- * the destination node after all other transferred data in the source
- * range has become visible on the destination node.
- *
- * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back
- * the specified local memory range may be remain in a pinned state even after
- * the specified transfer completes. This may reduce overhead if some or all of
- * the same virtual address range is referenced in a subsequent call of
- * scif_vreadfrom() or scif_vwriteto().
- *
- * The optimal DMA performance will likely be realized if both
- * addr and offset are cacheline aligned (are a multiple of 64). Lower
- * performance will likely be realized if addr and offset are not cacheline
- * aligned but are separated by some multiple of 64. The lowest level of
- * performance is likely if addr and offset are not separated by a multiple of
- * 64.
- *
- * The rma_flags argument is formed by ORing together zero or more of the
- * following values.
- * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
- *	engine.
- * SCIF_RMA_USECACHE - allow registration caching
- * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
- *		transfer has completed. Passing this flag results in the
- *		current implementation busy waiting and consuming CPU cycles
- *		while the DMA transfer is in progress for best performance by
- *		avoiding the interrupt latency.
- * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
- *		the source range becomes visible on the destination node
- *		after all other transferred data in the source range has
- *		become visible on the destination
- *
- * Return:
- * Upon successful completion, scif_vwriteto() returns 0; otherwise in user
- * mode -1 is returned and errno is set to indicate the error; in kernel mode
- * the negative of one of the following errors is returned.
- *
- * Errors:
- * EACCES - Attempt to write to a read-only range
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * ECONNRESET - Connection reset by peer
- * EINVAL - rma_flags is invalid
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOTCONN - The endpoint is not connected
- * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the
- * registered address space of epd
- */
-int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, off_t roffset,
-		  int rma_flags);
-
-/**
- * scif_fence_mark() - Mark previously issued RMAs
- * @epd:	endpoint descriptor
- * @flags:	control flags
- * @mark:	marked value returned as output.
- *
- * scif_fence_mark() returns after marking the current set of all uncompleted
- * RMAs initiated through the endpoint epd or the current set of all
- * uncompleted RMAs initiated through the peer of endpoint epd. The RMAs are
- * marked with a value returned at mark. The application may subsequently call
- * scif_fence_wait(), passing the value returned at mark, to await completion
- * of all RMAs so marked.
- *
- * The flags argument has exactly one of the following values.
- * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint
- *	epd are marked
- * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer
- *	of endpoint epd are marked
- *
- * Return:
- * Upon successful completion, scif_fence_mark() returns 0; otherwise in user
- * mode -1 is returned and errno is set to indicate the error; in kernel mode
- * the negative of one of the following errors is returned.
- *
- * Errors:
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * ECONNRESET - Connection reset by peer
- * EINVAL - flags is invalid
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOTCONN - The endpoint is not connected
- * ENOMEM - Insufficient kernel memory was available
- */
-int scif_fence_mark(scif_epd_t epd, int flags, int *mark);
-
-/**
- * scif_fence_wait() - Wait for completion of marked RMAs
- * @epd:	endpoint descriptor
- * @mark:	mark request
- *
- * scif_fence_wait() returns after all RMAs marked with mark have completed.
- * The value passed in mark must have been obtained in a previous call to
- * scif_fence_mark().
- *
- * Return:
- * Upon successful completion, scif_fence_wait() returns 0; otherwise in user
- * mode -1 is returned and errno is set to indicate the error; in kernel mode
- * the negative of one of the following errors is returned.
- *
- * Errors:
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * ECONNRESET - Connection reset by peer
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOTCONN - The endpoint is not connected
- * ENOMEM - Insufficient kernel memory was available
- */
-int scif_fence_wait(scif_epd_t epd, int mark);
-
-/**
- * scif_fence_signal() - Request a memory update on completion of RMAs
- * @epd:	endpoint descriptor
- * @loff:	local offset
- * @lval:	local value to write to loffset
- * @roff:	remote offset
- * @rval:	remote value to write to roffset
- * @flags:	flags
- *
- * scif_fence_signal() returns after marking the current set of all uncompleted
- * RMAs initiated through the endpoint epd or marking the current set of all
- * uncompleted RMAs initiated through the peer of endpoint epd.
- *
- * If flags includes SCIF_SIGNAL_LOCAL, then on completion of the RMAs in the
- * marked set, lval is written to memory at the address corresponding to offset
- * loff in the local registered address space of epd. loff must be within a
- * registered window. If flags includes SCIF_SIGNAL_REMOTE, then on completion
- * of the RMAs in the marked set, rval is written to memory at the address
- * corresponding to offset roff in the remote registered address space of epd.
- * roff must be within a remote registered window of the peer of epd. Note
- * that any specified offset must be DWORD (4 byte / 32 bit) aligned.
- *
- * The flags argument is formed by OR'ing together the following.
- * Exactly one of the following values.
- * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint
- *	epd are marked
- * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer
- *	of endpoint epd are marked
- * One or more of the following values.
- * SCIF_SIGNAL_LOCAL - On completion of the marked set of RMAs, write lval to
- *	memory at the address corresponding to offset loff in the local
- *	registered address space of epd.
- * SCIF_SIGNAL_REMOTE - On completion of the marked set of RMAs, write rval to
- *	memory at the address corresponding to offset roff in the remote
- *	registered address space of epd.
- *
- * Return:
- * Upon successful completion, scif_fence_signal() returns 0; otherwise in
- * user mode -1 is returned and errno is set to indicate the error; in kernel
- * mode the negative of one of the following errors is returned.
- *
- * Errors:
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * ECONNRESET - Connection reset by peer
- * EINVAL - flags is invalid, or loff or roff are not DWORD aligned
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOTCONN - The endpoint is not connected
- * ENXIO - loff is invalid for the registered address of epd, or roff is invalid
- * for the registered address space, of the peer of epd
- */
-int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval, off_t roff,
-		      u64 rval, int flags);
-
-/**
- * scif_get_node_ids() - Return information about online nodes
- * @nodes:	array in which to return online node IDs
- * @len:	number of entries in the nodes array
- * @self:	address to place the node ID of the local node
- *
- * scif_get_node_ids() fills in the nodes array with up to len node IDs of the
- * nodes in the SCIF network. If there is not enough space in nodes, as
- * indicated by the len parameter, only len node IDs are returned in nodes. The
- * return value of scif_get_node_ids() is the total number of nodes currently in
- * the SCIF network. By checking the return value against the len parameter,
- * the user may determine if enough space for nodes was allocated.
- *
- * The node ID of the local node is returned at self.
- *
- * Return:
- * Upon successful completion, scif_get_node_ids() returns the actual number of
- * online nodes in the SCIF network including 'self'; otherwise in user mode
- * -1 is returned and errno is set to indicate the error; in kernel mode no
- * errors are returned.
- */
-int scif_get_node_ids(u16 *nodes, int len, u16 *self);
-
-/**
- * scif_pin_pages() - Pin a set of pages
- * @addr:		Virtual address of range to pin
- * @len:		Length of range to pin
- * @prot_flags:		Page protection flags
- * @map_flags:		Page classification flags
- * @pinned_pages:	Handle to pinned pages
- *
- * scif_pin_pages() pins (locks in physical memory) the physical pages which
- * back the range of virtual address pages starting at addr and continuing for
- * len bytes. addr and len are constrained to be multiples of the page size. A
- * successful scif_pin_pages() call returns a handle to pinned_pages which may
- * be used in subsequent calls to scif_register_pinned_pages().
- *
- * The pages will remain pinned as long as there is a reference against the
- * scif_pinned_pages_t value returned by scif_pin_pages() and until
- * scif_unpin_pages() is called, passing the scif_pinned_pages_t value. A
- * reference is added to a scif_pinned_pages_t value each time a window is
- * created by calling scif_register_pinned_pages() and passing the
- * scif_pinned_pages_t value. A reference is removed from a
- * scif_pinned_pages_t value each time such a window is deleted.
- *
- * Subsequent operations which change the memory pages to which virtual
- * addresses are mapped (such as mmap(), munmap()) have no effect on the
- * scif_pinned_pages_t value or windows created against it.
- *
- * If the process will fork(), it is recommended that the registered
- * virtual address range be marked with MADV_DONTFORK. Doing so will prevent
- * problems due to copy-on-write semantics.
- *
- * The prot_flags argument is formed by OR'ing together one or more of the
- * following values.
- * SCIF_PROT_READ - allow read operations against the pages
- * SCIF_PROT_WRITE - allow write operations against the pages
- * The map_flags argument can be set as SCIF_MAP_KERNEL to interpret addr as a
- * kernel space address. By default, addr is interpreted as a user space
- * address.
- *
- * Return:
- * Upon successful completion, scif_pin_pages() returns 0; otherwise the
- * negative of one of the following errors is returned.
- *
- * Errors:
- * EINVAL - prot_flags is invalid, map_flags is invalid, or offset is negative
- * ENOMEM - Not enough space
- */
-int scif_pin_pages(void *addr, size_t len, int prot_flags, int map_flags,
-		   scif_pinned_pages_t *pinned_pages);
-
-/**
- * scif_unpin_pages() - Unpin a set of pages
- * @pinned_pages:	Handle to pinned pages to be unpinned
- *
- * scif_unpin_pages() prevents scif_register_pinned_pages() from registering new
- * windows against pinned_pages. The physical pages represented by pinned_pages
- * will remain pinned until all windows previously registered against
- * pinned_pages are deleted (the window is scif_unregister()'d and all
- * references to the window are removed (see scif_unregister()).
- *
- * pinned_pages must have been obtain from a previous call to scif_pin_pages().
- * After calling scif_unpin_pages(), it is an error to pass pinned_pages to
- * scif_register_pinned_pages().
- *
- * Return:
- * Upon successful completion, scif_unpin_pages() returns 0; otherwise the
- * negative of one of the following errors is returned.
- *
- * Errors:
- * EINVAL - pinned_pages is not valid
- */
-int scif_unpin_pages(scif_pinned_pages_t pinned_pages);
-
-/**
- * scif_register_pinned_pages() - Mark a memory region for remote access.
- * @epd:		endpoint descriptor
- * @pinned_pages:	Handle to pinned pages
- * @offset:		Registered address space offset
- * @map_flags:		Flags which control where pages are mapped
- *
- * The scif_register_pinned_pages() function opens a window, a range of whole
- * pages of the registered address space of the endpoint epd, starting at
- * offset po. The value of po, further described below, is a function of the
- * parameters offset and pinned_pages, and the value of map_flags. Each page of
- * the window represents a corresponding physical memory page of the range
- * represented by pinned_pages; the length of the window is the same as the
- * length of range represented by pinned_pages. A successful
- * scif_register_pinned_pages() call returns po as the return value.
- *
- * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset
- * exactly, and offset is constrained to be a multiple of the page size. The
- * mapping established by scif_register_pinned_pages() will not replace any
- * existing registration; an error is returned if any page of the new window
- * would intersect an existing window.
- *
- * When SCIF_MAP_FIXED is not set, the implementation uses offset in an
- * implementation-defined manner to arrive at po. The po so chosen will be an
- * area of the registered address space that the implementation deems suitable
- * for a mapping of the required size. An offset value of 0 is interpreted as
- * granting the implementation complete freedom in selecting po, subject to
- * constraints described below. A non-zero value of offset is taken to be a
- * suggestion of an offset near which the mapping should be placed. When the
- * implementation selects a value for po, it does not replace any extant
- * window. In all cases, po will be a multiple of the page size.
- *
- * The physical pages which are so represented by a window are available for
- * access in calls to scif_get_pages(), scif_readfrom(), scif_writeto(),
- * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the
- * physical pages represented by the window will not be reused by the memory
- * subsystem for any other purpose. Note that the same physical page may be
- * represented by multiple windows.
- *
- * Windows created by scif_register_pinned_pages() are unregistered by
- * scif_unregister().
- *
- * The map_flags argument can be set to SCIF_MAP_FIXED which interprets a
- * fixed offset.
- *
- * Return:
- * Upon successful completion, scif_register_pinned_pages() returns the offset
- * at which the mapping was placed (po); otherwise the negative of one of the
- * following errors is returned.
- *
- * Errors:
- * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags and pages in the new window
- * would intersect an existing window
- * EAGAIN - The mapping could not be performed due to lack of resources
- * ECONNRESET - Connection reset by peer
- * EINVAL - map_flags is invalid, or SCIF_MAP_FIXED is set in map_flags, and
- * offset is not a multiple of the page size, or offset is negative
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOMEM - Not enough space
- * ENOTCONN - The endpoint is not connected
- */
-off_t scif_register_pinned_pages(scif_epd_t epd,
-				 scif_pinned_pages_t pinned_pages,
-				 off_t offset, int map_flags);
-
-/**
- * scif_get_pages() - Add references to remote registered pages
- * @epd:	endpoint descriptor
- * @offset:	remote registered offset
- * @len:	length of range of pages
- * @pages:	returned scif_range structure
- *
- * scif_get_pages() returns the addresses of the physical pages represented by
- * those pages of the registered address space of the peer of epd, starting at
- * offset and continuing for len bytes. offset and len are constrained to be
- * multiples of the page size.
- *
- * All of the pages in the specified range [offset, offset + len - 1] must be
- * within a single window of the registered address space of the peer of epd.
- *
- * The addresses are returned as a virtually contiguous array pointed to by the
- * phys_addr component of the scif_range structure whose address is returned in
- * pages. The nr_pages component of scif_range is the length of the array. The
- * prot_flags component of scif_range holds the protection flag value passed
- * when the pages were registered.
- *
- * Each physical page whose address is returned by scif_get_pages() remains
- * available and will not be released for reuse until the scif_range structure
- * is returned in a call to scif_put_pages(). The scif_range structure returned
- * by scif_get_pages() must be unmodified.
- *
- * It is an error to call scif_close() on an endpoint on which a scif_range
- * structure of that endpoint has not been returned to scif_put_pages().
- *
- * Return:
- * Upon successful completion, scif_get_pages() returns 0; otherwise the
- * negative of one of the following errors is returned.
- * Errors:
- * ECONNRESET - Connection reset by peer.
- * EINVAL - offset is not a multiple of the page size, or offset is negative, or
- * len is not a multiple of the page size
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOTCONN - The endpoint is not connected
- * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid
- * for the registered address space of the peer epd
- */
-int scif_get_pages(scif_epd_t epd, off_t offset, size_t len,
-		   struct scif_range **pages);
-
-/**
- * scif_put_pages() - Remove references from remote registered pages
- * @pages:	pages to be returned
- *
- * scif_put_pages() releases a scif_range structure previously obtained by
- * calling scif_get_pages(). The physical pages represented by pages may
- * be reused when the window which represented those pages is unregistered.
- * Therefore, those pages must not be accessed after calling scif_put_pages().
- *
- * Return:
- * Upon successful completion, scif_put_pages() returns 0; otherwise the
- * negative of one of the following errors is returned.
- * Errors:
- * EINVAL - pages does not point to a valid scif_range structure, or
- * the scif_range structure pointed to by pages was already returned
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOTCONN - The endpoint is not connected
- */
-int scif_put_pages(struct scif_range *pages);
-
-/**
- * scif_poll() - Wait for some event on an endpoint
- * @epds:	Array of endpoint descriptors
- * @nepds:	Length of epds
- * @timeout:	Upper limit on time for which scif_poll() will block
- *
- * scif_poll() waits for one of a set of endpoints to become ready to perform
- * an I/O operation.
- *
- * The epds argument specifies the endpoint descriptors to be examined and the
- * events of interest for each endpoint descriptor. epds is a pointer to an
- * array with one member for each open endpoint descriptor of interest.
- *
- * The number of items in the epds array is specified in nepds. The epd field
- * of scif_pollepd is an endpoint descriptor of an open endpoint. The field
- * events is a bitmask specifying the events which the application is
- * interested in. The field revents is an output parameter, filled by the
- * kernel with the events that actually occurred. The bits returned in revents
- * can include any of those specified in events, or one of the values EPOLLERR,
- * EPOLLHUP, or EPOLLNVAL. (These three bits are meaningless in the events
- * field, and will be set in the revents field whenever the corresponding
- * condition is true.)
- *
- * If none of the events requested (and no error) has occurred for any of the
- * endpoint descriptors, then scif_poll() blocks until one of the events occurs.
- *
- * The timeout argument specifies an upper limit on the time for which
- * scif_poll() will block, in milliseconds. Specifying a negative value in
- * timeout means an infinite timeout.
- *
- * The following bits may be set in events and returned in revents.
- * EPOLLIN - Data may be received without blocking. For a connected
- * endpoint, this means that scif_recv() may be called without blocking. For a
- * listening endpoint, this means that scif_accept() may be called without
- * blocking.
- * EPOLLOUT - Data may be sent without blocking. For a connected endpoint, this
- * means that scif_send() may be called without blocking. EPOLLOUT may also be
- * used to block waiting for a non-blocking connect to complete. This bit value
- * has no meaning for a listening endpoint and is ignored if specified.
- *
- * The following bits are only returned in revents, and are ignored if set in
- * events.
- * EPOLLERR - An error occurred on the endpoint
- * EPOLLHUP - The connection to the peer endpoint was disconnected
- * EPOLLNVAL - The specified endpoint descriptor is invalid.
- *
- * Return:
- * Upon successful completion, scif_poll() returns a non-negative value. A
- * positive value indicates the total number of endpoint descriptors that have
- * been selected (that is, endpoint descriptors for which the revents member is
- * non-zero). A value of 0 indicates that the call timed out and no endpoint
- * descriptors have been selected. Otherwise in user mode -1 is returned and
- * errno is set to indicate the error; in kernel mode the negative of one of
- * the following errors is returned.
- *
- * Errors:
- * EINTR - A signal occurred before any requested event
- * EINVAL - The nepds argument is greater than {OPEN_MAX}
- * ENOMEM - There was no space to allocate file descriptor tables
- */
-int scif_poll(struct scif_pollepd *epds, unsigned int nepds, long timeout);
-
-/**
- * scif_client_register() - Register a SCIF client
- * @client:	client to be registered
- *
- * scif_client_register() registers a SCIF client. The probe() method
- * of the client is called when SCIF peer devices come online and the
- * remove() method is called when the peer devices disappear.
- *
- * Return:
- * Upon successful completion, scif_client_register() returns a non-negative
- * value. Otherwise the return value is the same as subsys_interface_register()
- * in the kernel.
- */
-int scif_client_register(struct scif_client *client);
-
-/**
- * scif_client_unregister() - Unregister a SCIF client
- * @client:	client to be unregistered
- *
- * scif_client_unregister() unregisters a SCIF client.
- *
- * Return:
- * None
- */
-void scif_client_unregister(struct scif_client *client);
-
-#endif /* __SCIF_H__ */
diff --git a/include/uapi/linux/mic_common.h b/include/uapi/linux/mic_common.h
deleted file mode 100644
index 504e523f702c..000000000000
--- a/include/uapi/linux/mic_common.h
+++ /dev/null
@@ -1,235 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Intel MIC driver.
- *
- */
-#ifndef __MIC_COMMON_H_
-#define __MIC_COMMON_H_
-
-#include <linux/virtio_ring.h>
-
-#define __mic_align(a, x) (((a) + (x) - 1) & ~((x) - 1))
-
-/**
- * struct mic_device_desc: Virtio device information shared between the
- * virtio driver and userspace backend
- *
- * @type: Device type: console/network/disk etc.  Type 0/-1 terminates.
- * @num_vq: Number of virtqueues.
- * @feature_len: Number of bytes of feature bits.  Multiply by 2: one for
-   host features and one for guest acknowledgements.
- * @config_len: Number of bytes of the config array after virtqueues.
- * @status: A status byte, written by the Guest.
- * @config: Start of the following variable length config.
- */
-struct mic_device_desc {
-	__s8 type;
-	__u8 num_vq;
-	__u8 feature_len;
-	__u8 config_len;
-	__u8 status;
-	__le64 config[0];
-} __attribute__ ((aligned(8)));
-
-/**
- * struct mic_device_ctrl: Per virtio device information in the device page
- * used internally by the host and card side drivers.
- *
- * @vdev: Used for storing MIC vdev information by the guest.
- * @config_change: Set to 1 by host when a config change is requested.
- * @vdev_reset: Set to 1 by guest to indicate virtio device has been reset.
- * @guest_ack: Set to 1 by guest to ack a command.
- * @host_ack: Set to 1 by host to ack a command.
- * @used_address_updated: Set to 1 by guest when the used address should be
- * updated.
- * @c2h_vdev_db: The doorbell number to be used by guest. Set by host.
- * @h2c_vdev_db: The doorbell number to be used by host. Set by guest.
- */
-struct mic_device_ctrl {
-	__le64 vdev;
-	__u8 config_change;
-	__u8 vdev_reset;
-	__u8 guest_ack;
-	__u8 host_ack;
-	__u8 used_address_updated;
-	__s8 c2h_vdev_db;
-	__s8 h2c_vdev_db;
-} __attribute__ ((aligned(8)));
-
-/**
- * struct mic_bootparam: Virtio device independent information in device page
- *
- * @magic: A magic value used by the card to ensure it can see the host
- * @h2c_config_db: Host to Card Virtio config doorbell set by card
- * @node_id: Unique id of the node
- * @h2c_scif_db - Host to card SCIF doorbell set by card
- * @c2h_scif_db - Card to host SCIF doorbell set by host
- * @scif_host_dma_addr - SCIF host queue pair DMA address
- * @scif_card_dma_addr - SCIF card queue pair DMA address
- */
-struct mic_bootparam {
-	__le32 magic;
-	__s8 h2c_config_db;
-	__u8 node_id;
-	__u8 h2c_scif_db;
-	__u8 c2h_scif_db;
-	__u64 scif_host_dma_addr;
-	__u64 scif_card_dma_addr;
-} __attribute__ ((aligned(8)));
-
-/**
- * struct mic_device_page: High level representation of the device page
- *
- * @bootparam: The bootparam structure is used for sharing information and
- * status updates between MIC host and card drivers.
- * @desc: Array of MIC virtio device descriptors.
- */
-struct mic_device_page {
-	struct mic_bootparam bootparam;
-	struct mic_device_desc desc[0];
-};
-/**
- * struct mic_vqconfig: This is how we expect the device configuration field
- * for a virtqueue to be laid out in config space.
- *
- * @address: Guest/MIC physical address of the virtio ring
- * (avail and desc rings)
- * @used_address: Guest/MIC physical address of the used ring
- * @num: The number of entries in the virtio_ring
- */
-struct mic_vqconfig {
-	__le64 address;
-	__le64 used_address;
-	__le16 num;
-} __attribute__ ((aligned(8)));
-
-/*
- * The alignment to use between consumer and producer parts of vring.
- * This is pagesize for historical reasons.
- */
-#define MIC_VIRTIO_RING_ALIGN		4096
-
-#define MIC_MAX_VRINGS			4
-#define MIC_VRING_ENTRIES		128
-
-/*
- * Max vring entries (power of 2) to ensure desc and avail rings
- * fit in a single page
- */
-#define MIC_MAX_VRING_ENTRIES		128
-
-/**
- * Max size of the desc block in bytes: includes:
- *	- struct mic_device_desc
- *	- struct mic_vqconfig (num_vq of these)
- *	- host and guest features
- *	- virtio device config space
- */
-#define MIC_MAX_DESC_BLK_SIZE		256
-
-/**
- * struct _mic_vring_info - Host vring info exposed to userspace backend
- * for the avail index and magic for the card.
- *
- * @avail_idx: host avail idx
- * @magic: A magic debug cookie.
- */
-struct _mic_vring_info {
-	__u16 avail_idx;
-	__le32 magic;
-};
-
-/**
- * struct mic_vring - Vring information.
- *
- * @vr: The virtio ring.
- * @info: Host vring information exposed to the userspace backend for the
- * avail index and magic for the card.
- * @va: The va for the buffer allocated for vr and info.
- * @len: The length of the buffer required for allocating vr and info.
- */
-struct mic_vring {
-	struct vring vr;
-	struct _mic_vring_info *info;
-	void *va;
-	int len;
-};
-
-#define mic_aligned_desc_size(d) __mic_align(mic_desc_size(d), 8)
-
-#ifndef INTEL_MIC_CARD
-static inline unsigned mic_desc_size(const struct mic_device_desc *desc)
-{
-	return sizeof(*desc) + desc->num_vq * sizeof(struct mic_vqconfig)
-		+ desc->feature_len * 2 + desc->config_len;
-}
-
-static inline struct mic_vqconfig *
-mic_vq_config(const struct mic_device_desc *desc)
-{
-	return (struct mic_vqconfig *)(desc + 1);
-}
-
-static inline __u8 *mic_vq_features(const struct mic_device_desc *desc)
-{
-	return (__u8 *)(mic_vq_config(desc) + desc->num_vq);
-}
-
-static inline __u8 *mic_vq_configspace(const struct mic_device_desc *desc)
-{
-	return mic_vq_features(desc) + desc->feature_len * 2;
-}
-static inline unsigned mic_total_desc_size(struct mic_device_desc *desc)
-{
-	return mic_aligned_desc_size(desc) + sizeof(struct mic_device_ctrl);
-}
-#endif
-
-/* Device page size */
-#define MIC_DP_SIZE 4096
-
-#define MIC_MAGIC 0xc0ffee00
-
-/**
- * enum mic_states - MIC states.
- */
-enum mic_states {
-	MIC_READY = 0,
-	MIC_BOOTING,
-	MIC_ONLINE,
-	MIC_SHUTTING_DOWN,
-	MIC_RESETTING,
-	MIC_RESET_FAILED,
-	MIC_LAST
-};
-
-/**
- * enum mic_status - MIC status reported by card after
- * a host or card initiated shutdown or a card crash.
- */
-enum mic_status {
-	MIC_NOP = 0,
-	MIC_CRASHED,
-	MIC_HALTED,
-	MIC_POWER_OFF,
-	MIC_RESTART,
-	MIC_STATUS_LAST
-};
-
-#endif
diff --git a/include/uapi/linux/mic_ioctl.h b/include/uapi/linux/mic_ioctl.h
deleted file mode 100644
index 687b9cd9d3e2..000000000000
--- a/include/uapi/linux/mic_ioctl.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Intel MIC Host driver.
- *
- */
-#ifndef _MIC_IOCTL_H_
-#define _MIC_IOCTL_H_
-
-#include <linux/types.h>
-
-/*
- * mic_copy - MIC virtio descriptor copy.
- *
- * @iov: An array of IOVEC structures containing user space buffers.
- * @iovcnt: Number of IOVEC structures in iov.
- * @vr_idx: The vring index.
- * @update_used: A non zero value results in used index being updated.
- * @out_len: The aggregate of the total length written to or read from
- *	the virtio device.
- */
-struct mic_copy_desc {
-#ifdef __KERNEL__
-	struct iovec __user *iov;
-#else
-	struct iovec *iov;
-#endif
-	__u32 iovcnt;
-	__u8 vr_idx;
-	__u8 update_used;
-	__u32 out_len;
-};
-
-/*
- * Add a new virtio device
- * The (struct mic_device_desc *) pointer points to a device page entry
- *	for the virtio device consisting of:
- *	- struct mic_device_desc
- *	- struct mic_vqconfig (num_vq of these)
- *	- host and guest features
- *	- virtio device config space
- * The total size referenced by the pointer should equal the size returned
- * by desc_size() in mic_common.h
- */
-#define MIC_VIRTIO_ADD_DEVICE _IOWR('s', 1, struct mic_device_desc *)
-
-/*
- * Copy the number of entries in the iovec and update the used index
- * if requested by the user.
- */
-#define MIC_VIRTIO_COPY_DESC	_IOWR('s', 2, struct mic_copy_desc *)
-
-/*
- * Notify virtio device of a config change
- * The (__u8 *) pointer points to config space values for the device
- * as they should be written into the device page. The total size
- * referenced by the pointer should equal the config_len field of struct
- * mic_device_desc.
- */
-#define MIC_VIRTIO_CONFIG_CHANGE _IOWR('s', 5, __u8 *)
-
-#endif
diff --git a/samples/mic/mpssd/.gitignore b/samples/mic/mpssd/.gitignore
deleted file mode 100644
index aa03f1eb37a0..000000000000
--- a/samples/mic/mpssd/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-mpssd
diff --git a/samples/mic/mpssd/Makefile b/samples/mic/mpssd/Makefile
deleted file mode 100644
index a7a6e0c70424..000000000000
--- a/samples/mic/mpssd/Makefile
+++ /dev/null
@@ -1,28 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-ifndef CROSS_COMPILE
-uname_M := $(shell uname -m 2>/dev/null || echo not)
-ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
-
-ifeq ($(ARCH),x86)
-
-PROGS := mpssd
-CC = $(CROSS_COMPILE)gcc
-CFLAGS := -I../../../usr/include -I../../../tools/include
-
-ifdef DEBUG
-CFLAGS += -DDEBUG=$(DEBUG)
-endif
-
-all: $(PROGS)
-mpssd: mpssd.c sysfs.c
-	$(CC) $(CFLAGS) mpssd.c sysfs.c -o mpssd -lpthread
-
-install:
-	install mpssd /usr/sbin/mpssd
-	install micctrl /usr/sbin/micctrl
-
-clean:
-	rm -fr $(PROGS)
-
-endif
-endif
diff --git a/samples/mic/mpssd/micctrl b/samples/mic/mpssd/micctrl
deleted file mode 100755
index 030a60b04046..000000000000
--- a/samples/mic/mpssd/micctrl
+++ /dev/null
@@ -1,162 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0-only
-# Intel MIC Platform Software Stack (MPSS)
-#
-# Copyright(c) 2013 Intel Corporation.
-#
-# Intel MIC User Space Tools.
-#
-# micctrl - Controls MIC boot/start/stop.
-#
-# chkconfig: 2345 95 05
-# description: start MPSS stack processing.
-#
-### BEGIN INIT INFO
-# Provides: micctrl
-### END INIT INFO
-
-# Source function library.
-. /etc/init.d/functions
-
-sysfs="/sys/class/mic"
-
-_status()
-{
-	f=$sysfs/$1
-	echo -e $1 state: "`cat $f/state`" shutdown_status: "`cat $f/shutdown_status`"
-}
-
-status()
-{
-	if [ "`echo $1 | head -c3`" == "mic" ]; then
-		_status $1
-		return $?
-	fi
-	for f in $sysfs/*
-	do
-		_status `basename $f`
-		RETVAL=$?
-		[ $RETVAL -ne 0 ] && return $RETVAL
-	done
-	return 0
-}
-
-_reset()
-{
-	f=$sysfs/$1
-	echo reset > $f/state
-}
-
-reset()
-{
-	if [ "`echo $1 | head -c3`" == "mic" ]; then
-		_reset $1
-		return $?
-	fi
-	for f in $sysfs/*
-	do
-		_reset `basename $f`
-		RETVAL=$?
-		[ $RETVAL -ne 0 ] && return $RETVAL
-	done
-	return 0
-}
-
-_boot()
-{
-	f=$sysfs/$1
-	echo "linux" > $f/bootmode
-	echo "mic/uos.img" > $f/firmware
-	echo "mic/$1.image" > $f/ramdisk
-	echo "boot" > $f/state
-}
-
-boot()
-{
-	if [ "`echo $1 | head -c3`" == "mic" ]; then
-		_boot $1
-		return $?
-	fi
-	for f in $sysfs/*
-	do
-		_boot `basename $f`
-		RETVAL=$?
-		[ $RETVAL -ne 0 ] && return $RETVAL
-	done
-	return 0
-}
-
-_shutdown()
-{
-	f=$sysfs/$1
-	echo shutdown > $f/state
-}
-
-shutdown()
-{
-	if [ "`echo $1 | head -c3`" == "mic" ]; then
-		_shutdown $1
-		return $?
-	fi
-	for f in $sysfs/*
-	do
-		_shutdown `basename $f`
-		RETVAL=$?
-		[ $RETVAL -ne 0 ] && return $RETVAL
-	done
-	return 0
-}
-
-_wait()
-{
-	f=$sysfs/$1
-	while [ "`cat $f/state`" != "offline" -a "`cat $f/state`" != "online" ]
-	do
-		sleep 1
-		echo -e "Waiting for $1 to go offline"
-	done
-}
-
-wait()
-{
-	if [ "`echo $1 | head -c3`" == "mic" ]; then
-		_wait $1
-		return $?
-	fi
-	# Wait for the cards to go offline
-	for f in $sysfs/*
-	do
-		_wait `basename $f`
-		RETVAL=$?
-		[ $RETVAL -ne 0 ] && return $RETVAL
-	done
-	return 0
-}
-
-if [ ! -d "$sysfs" ]; then
-	echo -e $"Module unloaded "
-	exit 3
-fi
-
-case $1 in
-	-s)
-		status $2
-		;;
-	-r)
-		reset $2
-		;;
-	-b)
-		boot $2
-		;;
-	-S)
-		shutdown $2
-		;;
-	-w)
-		wait $2
-		;;
-	*)
-		echo $"Usage: $0 {-s (status) |-r (reset) |-b (boot) |-S (shutdown) |-w (wait)}"
-		exit 2
-esac
-
-exit $?
diff --git a/samples/mic/mpssd/mpss b/samples/mic/mpssd/mpss
deleted file mode 100755
index 248ac7313c71..000000000000
--- a/samples/mic/mpssd/mpss
+++ /dev/null
@@ -1,189 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0-only
-# Intel MIC Platform Software Stack (MPSS)
-#
-# Copyright(c) 2013 Intel Corporation.
-#
-# Intel MIC User Space Tools.
-#
-# mpss	Start mpssd.
-#
-# chkconfig: 2345 95 05
-# description: start MPSS stack processing.
-#
-### BEGIN INIT INFO
-# Provides: mpss
-# Required-Start:
-# Required-Stop:
-# Short-Description: MPSS stack control
-# Description: MPSS stack control
-### END INIT INFO
-
-# Source function library.
-. /etc/init.d/functions
-
-exec=/usr/sbin/mpssd
-sysfs="/sys/class/mic"
-mic_modules="mic_host mic_x100_dma scif vop"
-
-start()
-{
-	[ -x $exec ] || exit 5
-
-	if [ "`ps -e | awk '{print $4}' | grep mpssd | head -1`" = "mpssd" ]; then
-		echo -e $"MPSSD already running! "
-		success
-		echo
-		return 0
-	fi
-
-	echo -e $"Starting MPSS Stack"
-	echo -e $"Loading MIC drivers:" $mic_modules
-
-	modprobe -a $mic_modules
-	RETVAL=$?
-	if [ $RETVAL -ne 0 ]; then
-		failure
-		echo
-		return $RETVAL
-	fi
-
-	# Start the daemon
-	echo -n $"Starting MPSSD "
-	$exec
-	RETVAL=$?
-	if [ $RETVAL -ne 0 ]; then
-		failure
-		echo
-		return $RETVAL
-	fi
-	success
-	echo
-
-	sleep 5
-
-	# Boot the cards
-	micctrl -b
-
-	# Wait till ping works
-	for f in $sysfs/*
-	do
-		count=100
-		ipaddr=`cat $f/cmdline`
-		ipaddr=${ipaddr#*address,}
-		ipaddr=`echo $ipaddr | cut -d, -f1 | cut -d\; -f1`
-		while [ $count -ge 0 ]
-		do
-			echo -e "Pinging "`basename $f`" "
-			ping -c 1 $ipaddr &> /dev/null
-			RETVAL=$?
-			if [ $RETVAL -eq 0 ]; then
-				success
-				break
-			fi
-			sleep 1
-			count=`expr $count - 1`
-		done
-		[ $RETVAL -ne 0 ] && failure || success
-		echo
-	done
-	return $RETVAL
-}
-
-stop()
-{
-	echo -e $"Shutting down MPSS Stack: "
-
-	# Bail out if module is unloaded
-	if [ ! -d "$sysfs" ]; then
-		echo -n $"Module unloaded "
-		success
-		echo
-		return 0
-	fi
-
-	# Shut down the cards.
-	micctrl -S
-
-	# Wait for the cards to go offline
-	for f in $sysfs/*
-	do
-		while [ "`cat $f/state`" != "ready" ]
-		do
-			sleep 1
-			echo -e "Waiting for "`basename $f`" to become ready"
-		done
-	done
-
-	# Display the status of the cards
-	micctrl -s
-
-	# Kill MPSSD now
-	echo -n $"Killing MPSSD"
-	killall -9 mpssd 2>/dev/null
-	RETVAL=$?
-	[ $RETVAL -ne 0 ] && failure || success
-	echo
-	return $RETVAL
-}
-
-restart()
-{
-	stop
-	sleep 5
-	start
-}
-
-status()
-{
-	micctrl -s
-	if [ "`ps -e | awk '{print $4}' | grep mpssd | head -n 1`" = "mpssd" ]; then
-		echo "mpssd is running"
-	else
-		echo "mpssd is stopped"
-	fi
-	return 0
-}
-
-unload()
-{
-	if [ ! -d "$sysfs" ]; then
-		echo -n $"No MIC_HOST Module: "
-		success
-		echo
-		return
-	fi
-
-	stop
-
-	sleep 5
-	echo -n $"Removing MIC drivers:" $mic_modules
-	modprobe -r $mic_modules
-	RETVAL=$?
-	[ $RETVAL -ne 0 ] && failure || success
-	echo
-	return $RETVAL
-}
-
-case $1 in
-	start)
-		start
-		;;
-	stop)
-		stop
-		;;
-	restart)
-		restart
-		;;
-	status)
-		status
-		;;
-	unload)
-		unload
-		;;
-	*)
-		echo $"Usage: $0 {start|stop|restart|status|unload}"
-		exit 2
-esac
-
-exit $?
diff --git a/samples/mic/mpssd/mpssd.c b/samples/mic/mpssd/mpssd.c
deleted file mode 100644
index c03a05d498f0..000000000000
--- a/samples/mic/mpssd/mpssd.c
+++ /dev/null
@@ -1,1815 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * Intel MIC User Space Tools.
- */
-
-#define _GNU_SOURCE
-
-#include <stdlib.h>
-#include <fcntl.h>
-#include <getopt.h>
-#include <assert.h>
-#include <unistd.h>
-#include <stdbool.h>
-#include <signal.h>
-#include <poll.h>
-#include <features.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <sys/socket.h>
-#include <linux/virtio_ring.h>
-#include <linux/virtio_net.h>
-#include <linux/virtio_console.h>
-#include <linux/virtio_blk.h>
-#include <linux/version.h>
-#include "mpssd.h"
-#include <linux/mic_ioctl.h>
-#include <linux/mic_common.h>
-#include <tools/endian.h>
-
-static void *init_mic(void *arg);
-
-static FILE *logfp;
-static struct mic_info mic_list;
-
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-
-#define min_t(type, x, y) ({				\
-		type __min1 = (x);                      \
-		type __min2 = (y);                      \
-		__min1 < __min2 ? __min1 : __min2; })
-
-/* align addr on a size boundary - adjust address up/down if needed */
-#define _ALIGN_DOWN(addr, size)  ((addr)&(~((size)-1)))
-#define _ALIGN_UP(addr, size)    _ALIGN_DOWN(addr + size - 1, size)
-
-/* align addr on a size boundary - adjust address up if needed */
-#define _ALIGN(addr, size)     _ALIGN_UP(addr, size)
-
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)        _ALIGN(addr, PAGE_SIZE)
-
-#define READ_ONCE(x) (*(volatile typeof(x) *)&(x))
-
-#define GSO_ENABLED		1
-#define MAX_GSO_SIZE		(64 * 1024)
-#define ETH_H_LEN		14
-#define MAX_NET_PKT_SIZE	(_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
-#define MIC_DEVICE_PAGE_END	0x1000
-
-#ifndef VIRTIO_NET_HDR_F_DATA_VALID
-#define VIRTIO_NET_HDR_F_DATA_VALID	2	/* Csum is valid */
-#endif
-
-static struct {
-	struct mic_device_desc dd;
-	struct mic_vqconfig vqconfig[2];
-	__u32 host_features, guest_acknowledgements;
-	struct virtio_console_config cons_config;
-} virtcons_dev_page = {
-	.dd = {
-		.type = VIRTIO_ID_CONSOLE,
-		.num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
-		.feature_len = sizeof(virtcons_dev_page.host_features),
-		.config_len = sizeof(virtcons_dev_page.cons_config),
-	},
-	.vqconfig[0] = {
-		.num = htole16(MIC_VRING_ENTRIES),
-	},
-	.vqconfig[1] = {
-		.num = htole16(MIC_VRING_ENTRIES),
-	},
-};
-
-static struct {
-	struct mic_device_desc dd;
-	struct mic_vqconfig vqconfig[2];
-	__u32 host_features, guest_acknowledgements;
-	struct virtio_net_config net_config;
-} virtnet_dev_page = {
-	.dd = {
-		.type = VIRTIO_ID_NET,
-		.num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
-		.feature_len = sizeof(virtnet_dev_page.host_features),
-		.config_len = sizeof(virtnet_dev_page.net_config),
-	},
-	.vqconfig[0] = {
-		.num = htole16(MIC_VRING_ENTRIES),
-	},
-	.vqconfig[1] = {
-		.num = htole16(MIC_VRING_ENTRIES),
-	},
-#if GSO_ENABLED
-	.host_features = htole32(
-		1 << VIRTIO_NET_F_CSUM |
-		1 << VIRTIO_NET_F_GSO |
-		1 << VIRTIO_NET_F_GUEST_TSO4 |
-		1 << VIRTIO_NET_F_GUEST_TSO6 |
-		1 << VIRTIO_NET_F_GUEST_ECN),
-#else
-		.host_features = 0,
-#endif
-};
-
-static const char *mic_config_dir = "/etc/mpss";
-static const char *virtblk_backend = "VIRTBLK_BACKEND";
-static struct {
-	struct mic_device_desc dd;
-	struct mic_vqconfig vqconfig[1];
-	__u32 host_features, guest_acknowledgements;
-	struct virtio_blk_config blk_config;
-} virtblk_dev_page = {
-	.dd = {
-		.type = VIRTIO_ID_BLOCK,
-		.num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
-		.feature_len = sizeof(virtblk_dev_page.host_features),
-		.config_len = sizeof(virtblk_dev_page.blk_config),
-	},
-	.vqconfig[0] = {
-		.num = htole16(MIC_VRING_ENTRIES),
-	},
-	.host_features =
-		htole32(1<<VIRTIO_BLK_F_SEG_MAX),
-	.blk_config = {
-		.seg_max = htole32(MIC_VRING_ENTRIES - 2),
-		.capacity = htole64(0),
-	 }
-};
-
-static char *myname;
-
-static int
-tap_configure(struct mic_info *mic, char *dev)
-{
-	pid_t pid;
-	char *ifargv[7];
-	char ipaddr[IFNAMSIZ];
-	int ret = 0;
-
-	pid = fork();
-	if (pid == 0) {
-		ifargv[0] = "ip";
-		ifargv[1] = "link";
-		ifargv[2] = "set";
-		ifargv[3] = dev;
-		ifargv[4] = "up";
-		ifargv[5] = NULL;
-		mpsslog("Configuring %s\n", dev);
-		ret = execvp("ip", ifargv);
-		if (ret < 0) {
-			mpsslog("%s execvp failed errno %s\n",
-				mic->name, strerror(errno));
-			return ret;
-		}
-	}
-	if (pid < 0) {
-		mpsslog("%s fork failed errno %s\n",
-			mic->name, strerror(errno));
-		return ret;
-	}
-
-	ret = waitpid(pid, NULL, 0);
-	if (ret < 0) {
-		mpsslog("%s waitpid failed errno %s\n",
-			mic->name, strerror(errno));
-		return ret;
-	}
-
-	snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id + 1);
-
-	pid = fork();
-	if (pid == 0) {
-		ifargv[0] = "ip";
-		ifargv[1] = "addr";
-		ifargv[2] = "add";
-		ifargv[3] = ipaddr;
-		ifargv[4] = "dev";
-		ifargv[5] = dev;
-		ifargv[6] = NULL;
-		mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
-		ret = execvp("ip", ifargv);
-		if (ret < 0) {
-			mpsslog("%s execvp failed errno %s\n",
-				mic->name, strerror(errno));
-			return ret;
-		}
-	}
-	if (pid < 0) {
-		mpsslog("%s fork failed errno %s\n",
-			mic->name, strerror(errno));
-		return ret;
-	}
-
-	ret = waitpid(pid, NULL, 0);
-	if (ret < 0) {
-		mpsslog("%s waitpid failed errno %s\n",
-			mic->name, strerror(errno));
-		return ret;
-	}
-	mpsslog("MIC name %s %s %d DONE!\n",
-		mic->name, __func__, __LINE__);
-	return 0;
-}
-
-static int tun_alloc(struct mic_info *mic, char *dev)
-{
-	struct ifreq ifr;
-	int fd, err;
-#if GSO_ENABLED
-	unsigned offload;
-#endif
-	fd = open("/dev/net/tun", O_RDWR);
-	if (fd < 0) {
-		mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
-		goto done;
-	}
-
-	memset(&ifr, 0, sizeof(ifr));
-
-	ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
-	if (*dev)
-		strncpy(ifr.ifr_name, dev, IFNAMSIZ);
-
-	err = ioctl(fd, TUNSETIFF, (void *)&ifr);
-	if (err < 0) {
-		mpsslog("%s %s %d TUNSETIFF failed %s\n",
-			mic->name, __func__, __LINE__, strerror(errno));
-		close(fd);
-		return err;
-	}
-#if GSO_ENABLED
-	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_TSO_ECN;
-
-	err = ioctl(fd, TUNSETOFFLOAD, offload);
-	if (err < 0) {
-		mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
-			mic->name, __func__, __LINE__, strerror(errno));
-		close(fd);
-		return err;
-	}
-#endif
-	strcpy(dev, ifr.ifr_name);
-	mpsslog("Created TAP %s\n", dev);
-done:
-	return fd;
-}
-
-#define NET_FD_VIRTIO_NET 0
-#define NET_FD_TUN 1
-#define MAX_NET_FD 2
-
-static void set_dp(struct mic_info *mic, int type, void *dp)
-{
-	switch (type) {
-	case VIRTIO_ID_CONSOLE:
-		mic->mic_console.console_dp = dp;
-		return;
-	case VIRTIO_ID_NET:
-		mic->mic_net.net_dp = dp;
-		return;
-	case VIRTIO_ID_BLOCK:
-		mic->mic_virtblk.block_dp = dp;
-		return;
-	}
-	mpsslog("%s %s %d not found\n", mic->name, __func__, type);
-	assert(0);
-}
-
-static void *get_dp(struct mic_info *mic, int type)
-{
-	switch (type) {
-	case VIRTIO_ID_CONSOLE:
-		return mic->mic_console.console_dp;
-	case VIRTIO_ID_NET:
-		return mic->mic_net.net_dp;
-	case VIRTIO_ID_BLOCK:
-		return mic->mic_virtblk.block_dp;
-	}
-	mpsslog("%s %s %d not found\n", mic->name, __func__, type);
-	assert(0);
-	return NULL;
-}
-
-static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
-{
-	struct mic_device_desc *d;
-	int i;
-	void *dp = get_dp(mic, type);
-
-	for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE;
-		i += mic_total_desc_size(d)) {
-		d = dp + i;
-
-		/* End of list */
-		if (d->type == 0)
-			break;
-
-		if (d->type == -1)
-			continue;
-
-		mpsslog("%s %s d-> type %d d %p\n",
-			mic->name, __func__, d->type, d);
-
-		if (d->type == (__u8)type)
-			return d;
-	}
-	mpsslog("%s %s %d not found\n", mic->name, __func__, type);
-	return NULL;
-}
-
-/* See comments in vhost.c for explanation of next_desc() */
-static unsigned next_desc(struct vring_desc *desc)
-{
-	unsigned int next;
-
-	if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
-		return -1U;
-	next = le16toh(desc->next);
-	return next;
-}
-
-/* Sum up all the IOVEC length */
-static ssize_t
-sum_iovec_len(struct mic_copy_desc *copy)
-{
-	ssize_t sum = 0;
-	unsigned int i;
-
-	for (i = 0; i < copy->iovcnt; i++)
-		sum += copy->iov[i].iov_len;
-	return sum;
-}
-
-static inline void verify_out_len(struct mic_info *mic,
-	struct mic_copy_desc *copy)
-{
-	if (copy->out_len != sum_iovec_len(copy)) {
-		mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
-			mic->name, __func__, __LINE__,
-			copy->out_len, sum_iovec_len(copy));
-		assert(copy->out_len == sum_iovec_len(copy));
-	}
-}
-
-/* Display an iovec */
-static void
-disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
-	   const char *s, int line)
-{
-	unsigned int i;
-
-	for (i = 0; i < copy->iovcnt; i++)
-		mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
-			mic->name, s, line, i,
-			copy->iov[i].iov_base, copy->iov[i].iov_len);
-}
-
-static inline __u16 read_avail_idx(struct mic_vring *vr)
-{
-	return READ_ONCE(vr->info->avail_idx);
-}
-
-static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
-				struct mic_copy_desc *copy, ssize_t len)
-{
-	copy->vr_idx = tx ? 0 : 1;
-	copy->update_used = true;
-	if (type == VIRTIO_ID_NET)
-		copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
-	else
-		copy->iov[0].iov_len = len;
-}
-
-/* Central API which triggers the copies */
-static int
-mic_virtio_copy(struct mic_info *mic, int fd,
-		struct mic_vring *vr, struct mic_copy_desc *copy)
-{
-	int ret;
-
-	ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
-	if (ret) {
-		mpsslog("%s %s %d errno %s ret %d\n",
-			mic->name, __func__, __LINE__,
-			strerror(errno), ret);
-	}
-	return ret;
-}
-
-static inline unsigned _vring_size(unsigned int num, unsigned long align)
-{
-	return _ALIGN_UP(((sizeof(struct vring_desc) * num + sizeof(__u16) * (3 + num)
-				+ align - 1) & ~(align - 1))
-		+ sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num, 4);
-}
-
-/*
- * This initialization routine requires at least one
- * vring i.e. vr0. vr1 is optional.
- */
-static void *
-init_vr(struct mic_info *mic, int fd, int type,
-	struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
-{
-	int vr_size;
-	char *va;
-
-	vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
-					 MIC_VIRTIO_RING_ALIGN) +
-			     sizeof(struct _mic_vring_info));
-	va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
-		PROT_READ, MAP_SHARED, fd, 0);
-	if (MAP_FAILED == va) {
-		mpsslog("%s %s %d mmap failed errno %s\n",
-			mic->name, __func__, __LINE__,
-			strerror(errno));
-		goto done;
-	}
-	set_dp(mic, type, va);
-	vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
-	vr0->info = vr0->va +
-		_vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
-	vring_init(&vr0->vr,
-		   MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
-	mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
-		__func__, mic->name, vr0->va, vr0->info, vr_size,
-		_vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
-	mpsslog("magic 0x%x expected 0x%x\n",
-		le32toh(vr0->info->magic), MIC_MAGIC + type);
-	assert(le32toh(vr0->info->magic) == MIC_MAGIC + type);
-	if (vr1) {
-		vr1->va = (struct mic_vring *)
-			&va[MIC_DEVICE_PAGE_END + vr_size];
-		vr1->info = vr1->va + _vring_size(MIC_VRING_ENTRIES,
-			MIC_VIRTIO_RING_ALIGN);
-		vring_init(&vr1->vr,
-			   MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
-		mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
-			__func__, mic->name, vr1->va, vr1->info, vr_size,
-			_vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
-		mpsslog("magic 0x%x expected 0x%x\n",
-			le32toh(vr1->info->magic), MIC_MAGIC + type + 1);
-		assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1);
-	}
-done:
-	return va;
-}
-
-static int
-wait_for_card_driver(struct mic_info *mic, int fd, int type)
-{
-	struct pollfd pollfd;
-	int err;
-	struct mic_device_desc *desc = get_device_desc(mic, type);
-	__u8 prev_status;
-
-	if (!desc)
-		return -ENODEV;
-	prev_status = desc->status;
-	pollfd.fd = fd;
-	mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
-		mic->name, __func__, type, desc->status);
-
-	while (1) {
-		pollfd.events = POLLIN;
-		pollfd.revents = 0;
-		err = poll(&pollfd, 1, -1);
-		if (err < 0) {
-			mpsslog("%s %s poll failed %s\n",
-				mic->name, __func__, strerror(errno));
-			continue;
-		}
-
-		if (pollfd.revents) {
-			if (desc->status != prev_status) {
-				mpsslog("%s %s Waiting... desc-> type %d "
-					"status 0x%x\n",
-					mic->name, __func__, type,
-					desc->status);
-				prev_status = desc->status;
-			}
-			if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
-				mpsslog("%s %s poll.revents %d\n",
-					mic->name, __func__, pollfd.revents);
-				mpsslog("%s %s desc-> type %d status 0x%x\n",
-					mic->name, __func__, type,
-					desc->status);
-				break;
-			}
-		}
-	}
-	return 0;
-}
-
-/* Spin till we have some descriptors */
-static void
-spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
-{
-	__u16 avail_idx = read_avail_idx(vr);
-
-	while (avail_idx == le16toh(READ_ONCE(vr->vr.avail->idx))) {
-#ifdef DEBUG
-		mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
-			mic->name, __func__,
-			le16toh(vr->vr.avail->idx), vr->info->avail_idx);
-#endif
-		sched_yield();
-	}
-}
-
-static void *
-virtio_net(void *arg)
-{
-	static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
-	static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64)));
-	struct iovec vnet_iov[2][2] = {
-		{ { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
-		  { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
-		{ { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
-		  { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
-	};
-	struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
-	struct mic_info *mic = (struct mic_info *)arg;
-	char if_name[IFNAMSIZ];
-	struct pollfd net_poll[MAX_NET_FD];
-	struct mic_vring tx_vr, rx_vr;
-	struct mic_copy_desc copy;
-	struct mic_device_desc *desc;
-	int err;
-
-	snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
-	mic->mic_net.tap_fd = tun_alloc(mic, if_name);
-	if (mic->mic_net.tap_fd < 0)
-		goto done;
-
-	if (tap_configure(mic, if_name))
-		goto done;
-	mpsslog("MIC name %s id %d\n", mic->name, mic->id);
-
-	net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
-	net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
-	net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
-	net_poll[NET_FD_TUN].events = POLLIN;
-
-	if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
-				  VIRTIO_ID_NET, &tx_vr, &rx_vr,
-		virtnet_dev_page.dd.num_vq)) {
-		mpsslog("%s init_vr failed %s\n",
-			mic->name, strerror(errno));
-		goto done;
-	}
-
-	copy.iovcnt = 2;
-	desc = get_device_desc(mic, VIRTIO_ID_NET);
-
-	while (1) {
-		ssize_t len;
-
-		net_poll[NET_FD_VIRTIO_NET].revents = 0;
-		net_poll[NET_FD_TUN].revents = 0;
-
-		/* Start polling for data from tap and virtio net */
-		err = poll(net_poll, 2, -1);
-		if (err < 0) {
-			mpsslog("%s poll failed %s\n",
-				__func__, strerror(errno));
-			continue;
-		}
-		if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
-			err = wait_for_card_driver(mic,
-						   mic->mic_net.virtio_net_fd,
-						   VIRTIO_ID_NET);
-			if (err) {
-				mpsslog("%s %s %d Exiting...\n",
-					mic->name, __func__, __LINE__);
-				break;
-			}
-		}
-		/*
-		 * Check if there is data to be read from TUN and write to
-		 * virtio net fd if there is.
-		 */
-		if (net_poll[NET_FD_TUN].revents & POLLIN) {
-			copy.iov = iov0;
-			len = readv(net_poll[NET_FD_TUN].fd,
-				copy.iov, copy.iovcnt);
-			if (len > 0) {
-				struct virtio_net_hdr *hdr
-					= (struct virtio_net_hdr *)vnet_hdr[0];
-
-				/* Disable checksums on the card since we are on
-				   a reliable PCIe link */
-				hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
-#ifdef DEBUG
-				mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
-					__func__, __LINE__, hdr->flags);
-				mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
-					copy.out_len, hdr->gso_type);
-#endif
-#ifdef DEBUG
-				disp_iovec(mic, &copy, __func__, __LINE__);
-				mpsslog("%s %s %d read from tap 0x%lx\n",
-					mic->name, __func__, __LINE__,
-					len);
-#endif
-				spin_for_descriptors(mic, &tx_vr);
-				txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy,
-					     len);
-
-				err = mic_virtio_copy(mic,
-					mic->mic_net.virtio_net_fd, &tx_vr,
-					&copy);
-				if (err < 0) {
-					mpsslog("%s %s %d mic_virtio_copy %s\n",
-						mic->name, __func__, __LINE__,
-						strerror(errno));
-				}
-				if (!err)
-					verify_out_len(mic, &copy);
-#ifdef DEBUG
-				disp_iovec(mic, &copy, __func__, __LINE__);
-				mpsslog("%s %s %d wrote to net 0x%lx\n",
-					mic->name, __func__, __LINE__,
-					sum_iovec_len(&copy));
-#endif
-				/* Reinitialize IOV for next run */
-				iov0[1].iov_len = MAX_NET_PKT_SIZE;
-			} else if (len < 0) {
-				disp_iovec(mic, &copy, __func__, __LINE__);
-				mpsslog("%s %s %d read failed %s ", mic->name,
-					__func__, __LINE__, strerror(errno));
-				mpsslog("cnt %d sum %zd\n",
-					copy.iovcnt, sum_iovec_len(&copy));
-			}
-		}
-
-		/*
-		 * Check if there is data to be read from virtio net and
-		 * write to TUN if there is.
-		 */
-		if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
-			while (rx_vr.info->avail_idx !=
-				le16toh(rx_vr.vr.avail->idx)) {
-				copy.iov = iov1;
-				txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy,
-					     MAX_NET_PKT_SIZE
-					+ sizeof(struct virtio_net_hdr));
-
-				err = mic_virtio_copy(mic,
-					mic->mic_net.virtio_net_fd, &rx_vr,
-					&copy);
-				if (!err) {
-#ifdef DEBUG
-					struct virtio_net_hdr *hdr
-						= (struct virtio_net_hdr *)
-							vnet_hdr[1];
-
-					mpsslog("%s %s %d hdr->flags 0x%x, ",
-						mic->name, __func__, __LINE__,
-						hdr->flags);
-					mpsslog("out_len %d gso_type 0x%x\n",
-						copy.out_len,
-						hdr->gso_type);
-#endif
-					/* Set the correct output iov_len */
-					iov1[1].iov_len = copy.out_len -
-						sizeof(struct virtio_net_hdr);
-					verify_out_len(mic, &copy);
-#ifdef DEBUG
-					disp_iovec(mic, &copy, __func__,
-						   __LINE__);
-					mpsslog("%s %s %d ",
-						mic->name, __func__, __LINE__);
-					mpsslog("read from net 0x%lx\n",
-						sum_iovec_len(&copy));
-#endif
-					len = writev(net_poll[NET_FD_TUN].fd,
-						copy.iov, copy.iovcnt);
-					if (len != sum_iovec_len(&copy)) {
-						mpsslog("Tun write failed %s ",
-							strerror(errno));
-						mpsslog("len 0x%zx ", len);
-						mpsslog("read_len 0x%zx\n",
-							sum_iovec_len(&copy));
-					} else {
-#ifdef DEBUG
-						disp_iovec(mic, &copy, __func__,
-							   __LINE__);
-						mpsslog("%s %s %d ",
-							mic->name, __func__,
-							__LINE__);
-						mpsslog("wrote to tap 0x%lx\n",
-							len);
-#endif
-					}
-				} else {
-					mpsslog("%s %s %d mic_virtio_copy %s\n",
-						mic->name, __func__, __LINE__,
-						strerror(errno));
-					break;
-				}
-			}
-		}
-		if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
-			mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
-	}
-done:
-	pthread_exit(NULL);
-}
-
-/* virtio_console */
-#define VIRTIO_CONSOLE_FD 0
-#define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
-#define MAX_CONSOLE_FD (MONITOR_FD + 1)  /* must be the last one + 1 */
-#define MAX_BUFFER_SIZE PAGE_SIZE
-
-static void *
-virtio_console(void *arg)
-{
-	static __u8 vcons_buf[2][PAGE_SIZE];
-	struct iovec vcons_iov[2] = {
-		{ .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
-		{ .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
-	};
-	struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
-	struct mic_info *mic = (struct mic_info *)arg;
-	int err;
-	struct pollfd console_poll[MAX_CONSOLE_FD];
-	int pty_fd;
-	char *pts_name;
-	ssize_t len;
-	struct mic_vring tx_vr, rx_vr;
-	struct mic_copy_desc copy;
-	struct mic_device_desc *desc;
-
-	pty_fd = posix_openpt(O_RDWR);
-	if (pty_fd < 0) {
-		mpsslog("can't open a pseudoterminal master device: %s\n",
-			strerror(errno));
-		goto _return;
-	}
-	pts_name = ptsname(pty_fd);
-	if (pts_name == NULL) {
-		mpsslog("can't get pts name\n");
-		goto _close_pty;
-	}
-	printf("%s console message goes to %s\n", mic->name, pts_name);
-	mpsslog("%s console message goes to %s\n", mic->name, pts_name);
-	err = grantpt(pty_fd);
-	if (err < 0) {
-		mpsslog("can't grant access: %s %s\n",
-			pts_name, strerror(errno));
-		goto _close_pty;
-	}
-	err = unlockpt(pty_fd);
-	if (err < 0) {
-		mpsslog("can't unlock a pseudoterminal: %s %s\n",
-			pts_name, strerror(errno));
-		goto _close_pty;
-	}
-	console_poll[MONITOR_FD].fd = pty_fd;
-	console_poll[MONITOR_FD].events = POLLIN;
-
-	console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
-	console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
-
-	if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
-				  VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
-		virtcons_dev_page.dd.num_vq)) {
-		mpsslog("%s init_vr failed %s\n",
-			mic->name, strerror(errno));
-		goto _close_pty;
-	}
-
-	copy.iovcnt = 1;
-	desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
-
-	for (;;) {
-		console_poll[MONITOR_FD].revents = 0;
-		console_poll[VIRTIO_CONSOLE_FD].revents = 0;
-		err = poll(console_poll, MAX_CONSOLE_FD, -1);
-		if (err < 0) {
-			mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
-				strerror(errno));
-			continue;
-		}
-		if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
-			err = wait_for_card_driver(mic,
-					mic->mic_console.virtio_console_fd,
-					VIRTIO_ID_CONSOLE);
-			if (err) {
-				mpsslog("%s %s %d Exiting...\n",
-					mic->name, __func__, __LINE__);
-				break;
-			}
-		}
-
-		if (console_poll[MONITOR_FD].revents & POLLIN) {
-			copy.iov = iov0;
-			len = readv(pty_fd, copy.iov, copy.iovcnt);
-			if (len > 0) {
-#ifdef DEBUG
-				disp_iovec(mic, &copy, __func__, __LINE__);
-				mpsslog("%s %s %d read from tap 0x%lx\n",
-					mic->name, __func__, __LINE__,
-					len);
-#endif
-				spin_for_descriptors(mic, &tx_vr);
-				txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
-					     &copy, len);
-
-				err = mic_virtio_copy(mic,
-					mic->mic_console.virtio_console_fd,
-					&tx_vr, &copy);
-				if (err < 0) {
-					mpsslog("%s %s %d mic_virtio_copy %s\n",
-						mic->name, __func__, __LINE__,
-						strerror(errno));
-				}
-				if (!err)
-					verify_out_len(mic, &copy);
-#ifdef DEBUG
-				disp_iovec(mic, &copy, __func__, __LINE__);
-				mpsslog("%s %s %d wrote to net 0x%lx\n",
-					mic->name, __func__, __LINE__,
-					sum_iovec_len(&copy));
-#endif
-				/* Reinitialize IOV for next run */
-				iov0->iov_len = PAGE_SIZE;
-			} else if (len < 0) {
-				disp_iovec(mic, &copy, __func__, __LINE__);
-				mpsslog("%s %s %d read failed %s ",
-					mic->name, __func__, __LINE__,
-					strerror(errno));
-				mpsslog("cnt %d sum %zd\n",
-					copy.iovcnt, sum_iovec_len(&copy));
-			}
-		}
-
-		if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
-			while (rx_vr.info->avail_idx !=
-				le16toh(rx_vr.vr.avail->idx)) {
-				copy.iov = iov1;
-				txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
-					     &copy, PAGE_SIZE);
-
-				err = mic_virtio_copy(mic,
-					mic->mic_console.virtio_console_fd,
-					&rx_vr, &copy);
-				if (!err) {
-					/* Set the correct output iov_len */
-					iov1->iov_len = copy.out_len;
-					verify_out_len(mic, &copy);
-#ifdef DEBUG
-					disp_iovec(mic, &copy, __func__,
-						   __LINE__);
-					mpsslog("%s %s %d ",
-						mic->name, __func__, __LINE__);
-					mpsslog("read from net 0x%lx\n",
-						sum_iovec_len(&copy));
-#endif
-					len = writev(pty_fd,
-						copy.iov, copy.iovcnt);
-					if (len != sum_iovec_len(&copy)) {
-						mpsslog("Tun write failed %s ",
-							strerror(errno));
-						mpsslog("len 0x%zx ", len);
-						mpsslog("read_len 0x%zx\n",
-							sum_iovec_len(&copy));
-					} else {
-#ifdef DEBUG
-						disp_iovec(mic, &copy, __func__,
-							   __LINE__);
-						mpsslog("%s %s %d ",
-							mic->name, __func__,
-							__LINE__);
-						mpsslog("wrote to tap 0x%lx\n",
-							len);
-#endif
-					}
-				} else {
-					mpsslog("%s %s %d mic_virtio_copy %s\n",
-						mic->name, __func__, __LINE__,
-						strerror(errno));
-					break;
-				}
-			}
-		}
-		if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
-			mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
-	}
-_close_pty:
-	close(pty_fd);
-_return:
-	pthread_exit(NULL);
-}
-
-static void
-add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
-{
-	char path[PATH_MAX];
-	int fd, err;
-
-	snprintf(path, PATH_MAX, "/dev/vop_virtio%d", mic->id);
-	fd = open(path, O_RDWR);
-	if (fd < 0) {
-		mpsslog("Could not open %s %s\n", path, strerror(errno));
-		return;
-	}
-
-	err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
-	if (err < 0) {
-		mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
-		close(fd);
-		return;
-	}
-	switch (dd->type) {
-	case VIRTIO_ID_NET:
-		mic->mic_net.virtio_net_fd = fd;
-		mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
-		break;
-	case VIRTIO_ID_CONSOLE:
-		mic->mic_console.virtio_console_fd = fd;
-		mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
-		break;
-	case VIRTIO_ID_BLOCK:
-		mic->mic_virtblk.virtio_block_fd = fd;
-		mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
-		break;
-	}
-}
-
-static bool
-set_backend_file(struct mic_info *mic)
-{
-	FILE *config;
-	char buff[PATH_MAX], *line, *evv, *p;
-
-	snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
-	config = fopen(buff, "r");
-	if (config == NULL)
-		return false;
-	do {  /* look for "virtblk_backend=XXXX" */
-		line = fgets(buff, PATH_MAX, config);
-		if (line == NULL)
-			break;
-		if (*line == '#')
-			continue;
-		p = strchr(line, '\n');
-		if (p)
-			*p = '\0';
-	} while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
-	fclose(config);
-	if (line == NULL)
-		return false;
-	evv = strchr(line, '=');
-	if (evv == NULL)
-		return false;
-	mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
-	if (mic->mic_virtblk.backend_file == NULL) {
-		mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
-		return false;
-	}
-	strcpy(mic->mic_virtblk.backend_file, evv + 1);
-	return true;
-}
-
-#define SECTOR_SIZE 512
-static bool
-set_backend_size(struct mic_info *mic)
-{
-	mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
-		SEEK_END);
-	if (mic->mic_virtblk.backend_size < 0) {
-		mpsslog("%s: can't seek: %s\n",
-			mic->name, mic->mic_virtblk.backend_file);
-		return false;
-	}
-	virtblk_dev_page.blk_config.capacity =
-		mic->mic_virtblk.backend_size / SECTOR_SIZE;
-	if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
-		virtblk_dev_page.blk_config.capacity++;
-
-	virtblk_dev_page.blk_config.capacity =
-		htole64(virtblk_dev_page.blk_config.capacity);
-
-	return true;
-}
-
-static bool
-open_backend(struct mic_info *mic)
-{
-	if (!set_backend_file(mic))
-		goto _error_exit;
-	mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
-	if (mic->mic_virtblk.backend < 0) {
-		mpsslog("%s: can't open: %s\n", mic->name,
-			mic->mic_virtblk.backend_file);
-		goto _error_free;
-	}
-	if (!set_backend_size(mic))
-		goto _error_close;
-	mic->mic_virtblk.backend_addr = mmap(NULL,
-		mic->mic_virtblk.backend_size,
-		PROT_READ|PROT_WRITE, MAP_SHARED,
-		mic->mic_virtblk.backend, 0L);
-	if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
-		mpsslog("%s: can't map: %s %s\n",
-			mic->name, mic->mic_virtblk.backend_file,
-			strerror(errno));
-		goto _error_close;
-	}
-	return true;
-
- _error_close:
-	close(mic->mic_virtblk.backend);
- _error_free:
-	free(mic->mic_virtblk.backend_file);
- _error_exit:
-	return false;
-}
-
-static void
-close_backend(struct mic_info *mic)
-{
-	munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
-	close(mic->mic_virtblk.backend);
-	free(mic->mic_virtblk.backend_file);
-}
-
-static bool
-start_virtblk(struct mic_info *mic, struct mic_vring *vring)
-{
-	if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
-		mpsslog("%s: blk_config is not 8 byte aligned.\n",
-			mic->name);
-		return false;
-	}
-	add_virtio_device(mic, &virtblk_dev_page.dd);
-	if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
-				  VIRTIO_ID_BLOCK, vring, NULL,
-				  virtblk_dev_page.dd.num_vq)) {
-		mpsslog("%s init_vr failed %s\n",
-			mic->name, strerror(errno));
-		return false;
-	}
-	return true;
-}
-
-static void
-stop_virtblk(struct mic_info *mic)
-{
-	int vr_size, ret;
-
-	vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
-					 MIC_VIRTIO_RING_ALIGN) +
-			     sizeof(struct _mic_vring_info));
-	ret = munmap(mic->mic_virtblk.block_dp,
-		MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
-	if (ret < 0)
-		mpsslog("%s munmap errno %d\n", mic->name, errno);
-	close(mic->mic_virtblk.virtio_block_fd);
-}
-
-static __u8
-header_error_check(struct vring_desc *desc)
-{
-	if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
-		mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
-			__func__, __LINE__);
-		return -EIO;
-	}
-	if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
-		mpsslog("%s() %d: alone\n",
-			__func__, __LINE__);
-		return -EIO;
-	}
-	if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
-		mpsslog("%s() %d: not read\n",
-			__func__, __LINE__);
-		return -EIO;
-	}
-	return 0;
-}
-
-static int
-read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
-{
-	struct iovec iovec;
-	struct mic_copy_desc copy;
-
-	iovec.iov_len = sizeof(*hdr);
-	iovec.iov_base = hdr;
-	copy.iov = &iovec;
-	copy.iovcnt = 1;
-	copy.vr_idx = 0;  /* only one vring on virtio_block */
-	copy.update_used = false;  /* do not update used index */
-	return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
-}
-
-static int
-transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
-{
-	struct mic_copy_desc copy;
-
-	copy.iov = iovec;
-	copy.iovcnt = iovcnt;
-	copy.vr_idx = 0;  /* only one vring on virtio_block */
-	copy.update_used = false;  /* do not update used index */
-	return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
-}
-
-static __u8
-status_error_check(struct vring_desc *desc)
-{
-	if (le32toh(desc->len) != sizeof(__u8)) {
-		mpsslog("%s() %d: length is not sizeof(status)\n",
-			__func__, __LINE__);
-		return -EIO;
-	}
-	return 0;
-}
-
-static int
-write_status(int fd, __u8 *status)
-{
-	struct iovec iovec;
-	struct mic_copy_desc copy;
-
-	iovec.iov_base = status;
-	iovec.iov_len = sizeof(*status);
-	copy.iov = &iovec;
-	copy.iovcnt = 1;
-	copy.vr_idx = 0;  /* only one vring on virtio_block */
-	copy.update_used = true; /* Update used index */
-	return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
-}
-
-#ifndef VIRTIO_BLK_T_GET_ID
-#define VIRTIO_BLK_T_GET_ID    8
-#endif
-
-static void *
-virtio_block(void *arg)
-{
-	struct mic_info *mic = (struct mic_info *)arg;
-	int ret;
-	struct pollfd block_poll;
-	struct mic_vring vring;
-	__u16 avail_idx;
-	__u32 desc_idx;
-	struct vring_desc *desc;
-	struct iovec *iovec, *piov;
-	__u8 status;
-	__u32 buffer_desc_idx;
-	struct virtio_blk_outhdr hdr;
-	void *fos;
-
-	for (;;) {  /* forever */
-		if (!open_backend(mic)) { /* No virtblk */
-			for (mic->mic_virtblk.signaled = 0;
-				!mic->mic_virtblk.signaled;)
-				sleep(1);
-			continue;
-		}
-
-		/* backend file is specified. */
-		if (!start_virtblk(mic, &vring))
-			goto _close_backend;
-		iovec = malloc(sizeof(*iovec) *
-			le32toh(virtblk_dev_page.blk_config.seg_max));
-		if (!iovec) {
-			mpsslog("%s: can't alloc iovec: %s\n",
-				mic->name, strerror(ENOMEM));
-			goto _stop_virtblk;
-		}
-
-		block_poll.fd = mic->mic_virtblk.virtio_block_fd;
-		block_poll.events = POLLIN;
-		for (mic->mic_virtblk.signaled = 0;
-		     !mic->mic_virtblk.signaled;) {
-			block_poll.revents = 0;
-					/* timeout in 1 sec to see signaled */
-			ret = poll(&block_poll, 1, 1000);
-			if (ret < 0) {
-				mpsslog("%s %d: poll failed: %s\n",
-					__func__, __LINE__,
-					strerror(errno));
-				continue;
-			}
-
-			if (!(block_poll.revents & POLLIN)) {
-#ifdef DEBUG
-				mpsslog("%s %d: block_poll.revents=0x%x\n",
-					__func__, __LINE__, block_poll.revents);
-#endif
-				continue;
-			}
-
-			/* POLLIN */
-			while (vring.info->avail_idx !=
-				le16toh(vring.vr.avail->idx)) {
-				/* read header element */
-				avail_idx =
-					vring.info->avail_idx &
-					(vring.vr.num - 1);
-				desc_idx = le16toh(
-					vring.vr.avail->ring[avail_idx]);
-				desc = &vring.vr.desc[desc_idx];
-#ifdef DEBUG
-				mpsslog("%s() %d: avail_idx=%d ",
-					__func__, __LINE__,
-					vring.info->avail_idx);
-				mpsslog("vring.vr.num=%d desc=%p\n",
-					vring.vr.num, desc);
-#endif
-				status = header_error_check(desc);
-				ret = read_header(
-					mic->mic_virtblk.virtio_block_fd,
-					&hdr, desc_idx);
-				if (ret < 0) {
-					mpsslog("%s() %d %s: ret=%d %s\n",
-						__func__, __LINE__,
-						mic->name, ret,
-						strerror(errno));
-					break;
-				}
-				/* buffer element */
-				piov = iovec;
-				status = 0;
-				fos = mic->mic_virtblk.backend_addr +
-					(hdr.sector * SECTOR_SIZE);
-				buffer_desc_idx = next_desc(desc);
-				desc_idx = buffer_desc_idx;
-				for (desc = &vring.vr.desc[buffer_desc_idx];
-				     desc->flags & VRING_DESC_F_NEXT;
-				     desc_idx = next_desc(desc),
-					     desc = &vring.vr.desc[desc_idx]) {
-					piov->iov_len = desc->len;
-					piov->iov_base = fos;
-					piov++;
-					fos += desc->len;
-				}
-				/* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
-				if (hdr.type & ~(VIRTIO_BLK_T_OUT |
-					VIRTIO_BLK_T_GET_ID)) {
-					/*
-					  VIRTIO_BLK_T_IN - does not do
-					  anything. Probably for documenting.
-					  VIRTIO_BLK_T_SCSI_CMD - for
-					  virtio_scsi.
-					  VIRTIO_BLK_T_FLUSH - turned off in
-					  config space.
-					  VIRTIO_BLK_T_BARRIER - defined but not
-					  used in anywhere.
-					*/
-					mpsslog("%s() %d: type %x ",
-						__func__, __LINE__,
-						hdr.type);
-					mpsslog("is not supported\n");
-					status = -ENOTSUP;
-
-				} else {
-					ret = transfer_blocks(
-					mic->mic_virtblk.virtio_block_fd,
-						iovec,
-						piov - iovec);
-					if (ret < 0 &&
-					    status != 0)
-						status = ret;
-				}
-				/* write status and update used pointer */
-				if (status != 0)
-					status = status_error_check(desc);
-				ret = write_status(
-					mic->mic_virtblk.virtio_block_fd,
-					&status);
-#ifdef DEBUG
-				mpsslog("%s() %d: write status=%d on desc=%p\n",
-					__func__, __LINE__,
-					status, desc);
-#endif
-			}
-		}
-		free(iovec);
-_stop_virtblk:
-		stop_virtblk(mic);
-_close_backend:
-		close_backend(mic);
-	}  /* forever */
-
-	pthread_exit(NULL);
-}
-
-static void
-reset(struct mic_info *mic)
-{
-#define RESET_TIMEOUT 120
-	int i = RESET_TIMEOUT;
-	setsysfs(mic->name, "state", "reset");
-	while (i) {
-		char *state;
-		state = readsysfs(mic->name, "state");
-		if (!state)
-			goto retry;
-		mpsslog("%s: %s %d state %s\n",
-			mic->name, __func__, __LINE__, state);
-
-		if (!strcmp(state, "ready")) {
-			free(state);
-			break;
-		}
-		free(state);
-retry:
-		sleep(1);
-		i--;
-	}
-}
-
-static int
-get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
-{
-	if (!strcmp(shutdown_status, "nop"))
-		return MIC_NOP;
-	if (!strcmp(shutdown_status, "crashed"))
-		return MIC_CRASHED;
-	if (!strcmp(shutdown_status, "halted"))
-		return MIC_HALTED;
-	if (!strcmp(shutdown_status, "poweroff"))
-		return MIC_POWER_OFF;
-	if (!strcmp(shutdown_status, "restart"))
-		return MIC_RESTART;
-	mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
-	/* Invalid state */
-	assert(0);
-};
-
-static int get_mic_state(struct mic_info *mic)
-{
-	char *state = NULL;
-	enum mic_states mic_state;
-
-	while (!state) {
-		state = readsysfs(mic->name, "state");
-		sleep(1);
-	}
-	mpsslog("%s: %s %d state %s\n",
-		mic->name, __func__, __LINE__, state);
-
-	if (!strcmp(state, "ready")) {
-		mic_state = MIC_READY;
-	} else if (!strcmp(state, "booting")) {
-		mic_state = MIC_BOOTING;
-	} else if (!strcmp(state, "online")) {
-		mic_state = MIC_ONLINE;
-	} else if (!strcmp(state, "shutting_down")) {
-		mic_state = MIC_SHUTTING_DOWN;
-	} else if (!strcmp(state, "reset_failed")) {
-		mic_state = MIC_RESET_FAILED;
-	} else if (!strcmp(state, "resetting")) {
-		mic_state = MIC_RESETTING;
-	} else {
-		mpsslog("%s: BUG invalid state %s\n", mic->name, state);
-		assert(0);
-	}
-
-	free(state);
-	return mic_state;
-};
-
-static void mic_handle_shutdown(struct mic_info *mic)
-{
-#define SHUTDOWN_TIMEOUT 60
-	int i = SHUTDOWN_TIMEOUT;
-	char *shutdown_status;
-	while (i) {
-		shutdown_status = readsysfs(mic->name, "shutdown_status");
-		if (!shutdown_status) {
-			sleep(1);
-			continue;
-		}
-		mpsslog("%s: %s %d shutdown_status %s\n",
-			mic->name, __func__, __LINE__, shutdown_status);
-		switch (get_mic_shutdown_status(mic, shutdown_status)) {
-		case MIC_RESTART:
-			mic->restart = 1;
-		case MIC_HALTED:
-		case MIC_POWER_OFF:
-		case MIC_CRASHED:
-			free(shutdown_status);
-			goto reset;
-		default:
-			break;
-		}
-		free(shutdown_status);
-		sleep(1);
-		i--;
-	}
-reset:
-	if (!i)
-		mpsslog("%s: %s %d timing out waiting for shutdown_status %s\n",
-			mic->name, __func__, __LINE__, shutdown_status);
-	reset(mic);
-}
-
-static int open_state_fd(struct mic_info *mic)
-{
-	char pathname[PATH_MAX];
-	int fd;
-
-	snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
-		 MICSYSFSDIR, mic->name, "state");
-
-	fd = open(pathname, O_RDONLY);
-	if (fd < 0)
-		mpsslog("%s: opening file %s failed %s\n",
-			mic->name, pathname, strerror(errno));
-	return fd;
-}
-
-static int block_till_state_change(int fd, struct mic_info *mic)
-{
-	struct pollfd ufds[1];
-	char value[PAGE_SIZE];
-	int ret;
-
-	ufds[0].fd = fd;
-	ufds[0].events = POLLERR | POLLPRI;
-	ret = poll(ufds, 1, -1);
-	if (ret < 0) {
-		mpsslog("%s: %s %d poll failed %s\n",
-			mic->name, __func__, __LINE__, strerror(errno));
-		return ret;
-	}
-
-	ret = lseek(fd, 0, SEEK_SET);
-	if (ret < 0) {
-		mpsslog("%s: %s %d Failed to seek to 0: %s\n",
-			mic->name, __func__, __LINE__, strerror(errno));
-		return ret;
-	}
-
-	ret = read(fd, value, sizeof(value));
-	if (ret < 0) {
-		mpsslog("%s: %s %d Failed to read sysfs entry: %s\n",
-			mic->name, __func__, __LINE__, strerror(errno));
-		return ret;
-	}
-
-	return 0;
-}
-
-static void *
-mic_config(void *arg)
-{
-	struct mic_info *mic = (struct mic_info *)arg;
-	int fd, ret, stat = 0;
-
-	fd = open_state_fd(mic);
-	if (fd < 0) {
-		mpsslog("%s: %s %d open state fd failed %s\n",
-			mic->name, __func__, __LINE__, strerror(errno));
-		goto exit;
-	}
-
-	do {
-		ret = block_till_state_change(fd, mic);
-		if (ret < 0) {
-			mpsslog("%s: %s %d block_till_state_change error %s\n",
-				mic->name, __func__, __LINE__, strerror(errno));
-			goto close_exit;
-		}
-
-		switch (get_mic_state(mic)) {
-		case MIC_SHUTTING_DOWN:
-			mic_handle_shutdown(mic);
-			break;
-		case MIC_READY:
-		case MIC_RESET_FAILED:
-			ret = kill(mic->pid, SIGTERM);
-			mpsslog("%s: %s %d kill pid %d ret %d\n",
-				mic->name, __func__, __LINE__,
-				mic->pid, ret);
-			if (!ret) {
-				ret = waitpid(mic->pid, &stat,
-					      WIFSIGNALED(stat));
-				mpsslog("%s: %s %d waitpid ret %d pid %d\n",
-					mic->name, __func__, __LINE__,
-					ret, mic->pid);
-			}
-			if (mic->boot_on_resume) {
-				setsysfs(mic->name, "state", "boot");
-				mic->boot_on_resume = 0;
-			}
-			goto close_exit;
-		default:
-			break;
-		}
-	} while (1);
-
-close_exit:
-	close(fd);
-exit:
-	init_mic(mic);
-	pthread_exit(NULL);
-}
-
-static void
-set_cmdline(struct mic_info *mic)
-{
-	char buffer[PATH_MAX];
-	int len;
-
-	len = snprintf(buffer, PATH_MAX,
-		"clocksource=tsc highres=off nohz=off ");
-	len += snprintf(buffer + len, PATH_MAX - len,
-		"cpufreq_on;corec6_off;pc3_off;pc6_off ");
-	len += snprintf(buffer + len, PATH_MAX - len,
-		"ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
-		mic->id + 1);
-
-	setsysfs(mic->name, "cmdline", buffer);
-	mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
-	snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id + 1);
-	mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
-}
-
-static void
-set_log_buf_info(struct mic_info *mic)
-{
-	int fd;
-	off_t len;
-	char system_map[] = "/lib/firmware/mic/System.map";
-	char *map, *temp, log_buf[17] = {'\0'};
-
-	fd = open(system_map, O_RDONLY);
-	if (fd < 0) {
-		mpsslog("%s: Opening System.map failed: %d\n",
-			mic->name, errno);
-		return;
-	}
-	len = lseek(fd, 0, SEEK_END);
-	if (len < 0) {
-		mpsslog("%s: Reading System.map size failed: %d\n",
-			mic->name, errno);
-		close(fd);
-		return;
-	}
-	map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
-	if (map == MAP_FAILED) {
-		mpsslog("%s: mmap of System.map failed: %d\n",
-			mic->name, errno);
-		close(fd);
-		return;
-	}
-	temp = strstr(map, "__log_buf");
-	if (!temp) {
-		mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
-		munmap(map, len);
-		close(fd);
-		return;
-	}
-	strncpy(log_buf, temp - 19, 16);
-	setsysfs(mic->name, "log_buf_addr", log_buf);
-	mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
-	temp = strstr(map, "log_buf_len");
-	if (!temp) {
-		mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
-		munmap(map, len);
-		close(fd);
-		return;
-	}
-	strncpy(log_buf, temp - 19, 16);
-	setsysfs(mic->name, "log_buf_len", log_buf);
-	mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
-	munmap(map, len);
-	close(fd);
-}
-
-static void
-change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
-{
-	struct mic_info *mic;
-
-	for (mic = mic_list.next; mic != NULL; mic = mic->next)
-		mic->mic_virtblk.signaled = 1/* true */;
-}
-
-static void
-set_mic_boot_params(struct mic_info *mic)
-{
-	set_log_buf_info(mic);
-	set_cmdline(mic);
-}
-
-static void *
-init_mic(void *arg)
-{
-	struct mic_info *mic = (struct mic_info *)arg;
-	struct sigaction ignore = {
-		.sa_flags = 0,
-		.sa_handler = SIG_IGN
-	};
-	struct sigaction act = {
-		.sa_flags = SA_SIGINFO,
-		.sa_sigaction = change_virtblk_backend,
-	};
-	char buffer[PATH_MAX];
-	int err, fd;
-
-	/*
-	 * Currently, one virtio block device is supported for each MIC card
-	 * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
-	 * The signal informs the virtio block backend about a change in the
-	 * configuration file which specifies the virtio backend file name on
-	 * the host. Virtio block backend then re-reads the configuration file
-	 * and switches to the new block device. This signalling mechanism may
-	 * not be required once multiple virtio block devices are supported by
-	 * the MIC daemon.
-	 */
-	sigaction(SIGUSR1, &ignore, NULL);
-retry:
-	fd = open_state_fd(mic);
-	if (fd < 0) {
-		mpsslog("%s: %s %d open state fd failed %s\n",
-			mic->name, __func__, __LINE__, strerror(errno));
-		sleep(2);
-		goto retry;
-	}
-
-	if (mic->restart) {
-		snprintf(buffer, PATH_MAX, "boot");
-		setsysfs(mic->name, "state", buffer);
-		mpsslog("%s restarting mic %d\n",
-			mic->name, mic->restart);
-		mic->restart = 0;
-	}
-
-	while (1) {
-		while (block_till_state_change(fd, mic)) {
-			mpsslog("%s: %s %d block_till_state_change error %s\n",
-				mic->name, __func__, __LINE__, strerror(errno));
-			sleep(2);
-			continue;
-		}
-
-		if (get_mic_state(mic) == MIC_BOOTING)
-			break;
-	}
-
-	mic->pid = fork();
-	switch (mic->pid) {
-	case 0:
-		add_virtio_device(mic, &virtcons_dev_page.dd);
-		add_virtio_device(mic, &virtnet_dev_page.dd);
-		err = pthread_create(&mic->mic_console.console_thread, NULL,
-			virtio_console, mic);
-		if (err)
-			mpsslog("%s virtcons pthread_create failed %s\n",
-				mic->name, strerror(err));
-		err = pthread_create(&mic->mic_net.net_thread, NULL,
-			virtio_net, mic);
-		if (err)
-			mpsslog("%s virtnet pthread_create failed %s\n",
-				mic->name, strerror(err));
-		err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
-			virtio_block, mic);
-		if (err)
-			mpsslog("%s virtblk pthread_create failed %s\n",
-				mic->name, strerror(err));
-		sigemptyset(&act.sa_mask);
-		err = sigaction(SIGUSR1, &act, NULL);
-		if (err)
-			mpsslog("%s sigaction SIGUSR1 failed %s\n",
-				mic->name, strerror(errno));
-		while (1)
-			sleep(60);
-	case -1:
-		mpsslog("fork failed MIC name %s id %d errno %d\n",
-			mic->name, mic->id, errno);
-		break;
-	default:
-		err = pthread_create(&mic->config_thread, NULL,
-				     mic_config, mic);
-		if (err)
-			mpsslog("%s mic_config pthread_create failed %s\n",
-				mic->name, strerror(err));
-	}
-
-	return NULL;
-}
-
-static void
-start_daemon(void)
-{
-	struct mic_info *mic;
-	int err;
-
-	for (mic = mic_list.next; mic; mic = mic->next) {
-		set_mic_boot_params(mic);
-		err = pthread_create(&mic->init_thread, NULL, init_mic, mic);
-		if (err)
-			mpsslog("%s init_mic pthread_create failed %s\n",
-				mic->name, strerror(err));
-	}
-
-	while (1)
-		sleep(60);
-}
-
-static int
-init_mic_list(void)
-{
-	struct mic_info *mic = &mic_list;
-	struct dirent *file;
-	DIR *dp;
-	int cnt = 0;
-
-	dp = opendir(MICSYSFSDIR);
-	if (!dp)
-		return 0;
-
-	while ((file = readdir(dp)) != NULL) {
-		if (!strncmp(file->d_name, "mic", 3)) {
-			mic->next = calloc(1, sizeof(struct mic_info));
-			if (mic->next) {
-				mic = mic->next;
-				mic->id = atoi(&file->d_name[3]);
-				mic->name = malloc(strlen(file->d_name) + 16);
-				if (mic->name)
-					strcpy(mic->name, file->d_name);
-				mpsslog("MIC name %s id %d\n", mic->name,
-					mic->id);
-				cnt++;
-			}
-		}
-	}
-
-	closedir(dp);
-	return cnt;
-}
-
-void
-mpsslog(char *format, ...)
-{
-	va_list args;
-	char buffer[4096];
-	char ts[52], *ts1;
-	time_t t;
-
-	if (logfp == NULL)
-		return;
-
-	va_start(args, format);
-	vsprintf(buffer, format, args);
-	va_end(args);
-
-	time(&t);
-	ts1 = ctime_r(&t, ts);
-	ts1[strlen(ts1) - 1] = '\0';
-	fprintf(logfp, "%s: %s", ts1, buffer);
-
-	fflush(logfp);
-}
-
-int
-main(int argc, char *argv[])
-{
-	int cnt;
-	pid_t pid;
-
-	myname = argv[0];
-
-	logfp = fopen(LOGFILE_NAME, "a+");
-	if (!logfp) {
-		fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
-		exit(1);
-	}
-	pid = fork();
-	switch (pid) {
-	case 0:
-		break;
-	case -1:
-		exit(2);
-	default:
-		exit(0);
-	}
-
-	mpsslog("MIC Daemon start\n");
-
-	cnt = init_mic_list();
-	if (cnt == 0) {
-		mpsslog("MIC module not loaded\n");
-		exit(3);
-	}
-	mpsslog("MIC found %d devices\n", cnt);
-
-	start_daemon();
-
-	exit(0);
-}
diff --git a/samples/mic/mpssd/mpssd.h b/samples/mic/mpssd/mpssd.h
deleted file mode 100644
index 5f98bdafe653..000000000000
--- a/samples/mic/mpssd/mpssd.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * Intel MIC User Space Tools.
- */
-#ifndef _MPSSD_H_
-#define _MPSSD_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <dirent.h>
-#include <libgen.h>
-#include <pthread.h>
-#include <stdarg.h>
-#include <time.h>
-#include <errno.h>
-#include <sys/dir.h>
-#include <sys/ioctl.h>
-#include <sys/poll.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <sys/utsname.h>
-#include <sys/wait.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
-#include <netdb.h>
-#include <signal.h>
-#include <limits.h>
-#include <syslog.h>
-#include <getopt.h>
-#include <net/if.h>
-#include <linux/if_tun.h>
-#include <linux/virtio_ids.h>
-
-#define MICSYSFSDIR "/sys/class/mic"
-#define LOGFILE_NAME "/var/log/mpssd"
-#define PAGE_SIZE 4096
-
-struct mic_console_info {
-	pthread_t       console_thread;
-	int		virtio_console_fd;
-	void		*console_dp;
-};
-
-struct mic_net_info {
-	pthread_t       net_thread;
-	int		virtio_net_fd;
-	int		tap_fd;
-	void		*net_dp;
-};
-
-struct mic_virtblk_info {
-	pthread_t       block_thread;
-	int		virtio_block_fd;
-	void		*block_dp;
-	volatile sig_atomic_t	signaled;
-	char		*backend_file;
-	int		backend;
-	void		*backend_addr;
-	long		backend_size;
-};
-
-struct mic_info {
-	int		id;
-	char		*name;
-	pthread_t       config_thread;
-	pthread_t       init_thread;
-	pid_t		pid;
-	struct mic_console_info	mic_console;
-	struct mic_net_info	mic_net;
-	struct mic_virtblk_info	mic_virtblk;
-	int		restart;
-	int		boot_on_resume;
-	struct mic_info *next;
-};
-
-__attribute__((format(printf, 1, 2)))
-void mpsslog(char *format, ...);
-char *readsysfs(char *dir, char *entry);
-int setsysfs(char *dir, char *entry, char *value);
-#endif
diff --git a/samples/mic/mpssd/sysfs.c b/samples/mic/mpssd/sysfs.c
deleted file mode 100644
index 3fb08eb7ed9d..000000000000
--- a/samples/mic/mpssd/sysfs.c
+++ /dev/null
@@ -1,91 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * Intel MIC User Space Tools.
- */
-
-#include "mpssd.h"
-
-#define PAGE_SIZE 4096
-
-char *
-readsysfs(char *dir, char *entry)
-{
-	char filename[PATH_MAX];
-	char value[PAGE_SIZE];
-	char *string = NULL;
-	int fd;
-	int len;
-
-	if (dir == NULL)
-		snprintf(filename, PATH_MAX, "%s/%s", MICSYSFSDIR, entry);
-	else
-		snprintf(filename, PATH_MAX,
-			 "%s/%s/%s", MICSYSFSDIR, dir, entry);
-
-	fd = open(filename, O_RDONLY);
-	if (fd < 0) {
-		mpsslog("Failed to open sysfs entry '%s': %s\n",
-			filename, strerror(errno));
-		return NULL;
-	}
-
-	len = read(fd, value, sizeof(value));
-	if (len < 0) {
-		mpsslog("Failed to read sysfs entry '%s': %s\n",
-			filename, strerror(errno));
-		goto readsys_ret;
-	}
-	if (len == 0)
-		goto readsys_ret;
-
-	value[len - 1] = '\0';
-
-	string = malloc(strlen(value) + 1);
-	if (string)
-		strcpy(string, value);
-
-readsys_ret:
-	close(fd);
-	return string;
-}
-
-int
-setsysfs(char *dir, char *entry, char *value)
-{
-	char filename[PATH_MAX];
-	char *oldvalue;
-	int fd, ret = 0;
-
-	if (dir == NULL)
-		snprintf(filename, PATH_MAX, "%s/%s", MICSYSFSDIR, entry);
-	else
-		snprintf(filename, PATH_MAX, "%s/%s/%s",
-			 MICSYSFSDIR, dir, entry);
-
-	oldvalue = readsysfs(dir, entry);
-
-	fd = open(filename, O_RDWR);
-	if (fd < 0) {
-		ret = errno;
-		mpsslog("Failed to open sysfs entry '%s': %s\n",
-			filename, strerror(errno));
-		goto done;
-	}
-
-	if (!oldvalue || strcmp(value, oldvalue)) {
-		if (write(fd, value, strlen(value)) < 0) {
-			ret = errno;
-			mpsslog("Failed to write new sysfs entry '%s': %s\n",
-				filename, strerror(errno));
-		}
-	}
-	close(fd);
-done:
-	if (oldvalue)
-		free(oldvalue);
-	return ret;
-}
-- 
cgit v1.2.3


From 8d97e71811aaafe4abf611dc24822fd6e73df1a1 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Thu, 1 Oct 2020 06:57:46 -0700
Subject: perf/core: Add PERF_SAMPLE_DATA_PAGE_SIZE

Current perf can report both virtual addresses and physical addresses,
but not the MMU page size. Without the MMU page size information of the
utilized page, users cannot decide whether to promote/demote large pages
to optimize memory usage.

Add a new sample type for the data MMU page size.

Current perf already has a facility to collect data virtual addresses.
A page walker is required to walk the pages tables and calculate the
MMU page size from a given virtual address.

On some platforms, e.g., X86, the page walker is invoked in an NMI
handler. So the page walker must be NMI-safe and low overhead. Besides,
the page walker should work for both user and kernel virtual address.
The existing generic page walker, e.g., walk_page_range_novma(), is a
little bit complex and doesn't guarantee the NMI-safe. The follow_page()
is only for user-virtual address.

Add a new function perf_get_page_size() to walk the page tables and
calculate the MMU page size. In the function:
- Interrupts have to be disabled to prevent any teardown of the page
  tables.
- For user space threads, the current->mm is used for the page walker.
  For kernel threads and the like, the current->mm is NULL. The init_mm
  is used for the page walker. The active_mm is not used here, because
  it can be NULL.
  Quote from Peter Zijlstra,
  "context_switch() can set prev->active_mm to NULL when it transfers it
   to @next. It does this before @current is updated. So an NMI that
   comes in between this active_mm swizzling and updating @current will
   see !active_mm."
- The MMU page size is calculated from the page table level.

The method should work for all architectures, but it has only been
verified on X86. Should there be some architectures, which support perf,
where the method doesn't work, it can be fixed later separately.
Reporting the wrong page size would not be fatal for the architecture.

Some under discussion features may impact the method in the future.
Quote from Dave Hansen,
  "There are lots of weird things folks are trying to do with the page
   tables, like Address Space Isolation.  For instance, if you get a
   perf NMI when running userspace, current->mm->pgd is *different* than
   the PGD that was in use when userspace was running. It's close enough
   today, but it might not stay that way."
If the case happens later, lots of consecutive page walk errors will
happen. The worst case is that lots of page-size '0' are returned, which
would not be fatal.
In the perf tool, a check is implemented to detect this case. Once it
happens, a kernel patch could be implemented accordingly then.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20201001135749.2804-2-kan.liang@linux.intel.com
---
 include/linux/perf_event.h      |   1 +
 include/uapi/linux/perf_event.h |   4 +-
 kernel/events/core.c            | 103 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 107 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 0c19d279b97f..7e3785dd27d9 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1034,6 +1034,7 @@ struct perf_sample_data {
 
 	u64				phys_addr;
 	u64				cgroup;
+	u64				data_page_size;
 } ____cacheline_aligned;
 
 /* default value for data source */
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 077e7ee69e3d..cc6ea346e9f9 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -143,8 +143,9 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_PHYS_ADDR			= 1U << 19,
 	PERF_SAMPLE_AUX				= 1U << 20,
 	PERF_SAMPLE_CGROUP			= 1U << 21,
+	PERF_SAMPLE_DATA_PAGE_SIZE		= 1U << 22,
 
-	PERF_SAMPLE_MAX = 1U << 22,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 23,		/* non-ABI */
 
 	__PERF_SAMPLE_CALLCHAIN_EARLY		= 1ULL << 63, /* non-ABI; internal use */
 };
@@ -896,6 +897,7 @@ enum perf_event_type {
 	 *	{ u64			phys_addr;} && PERF_SAMPLE_PHYS_ADDR
 	 *	{ u64			size;
 	 *	  char			data[size]; } && PERF_SAMPLE_AUX
+	 *	{ u64			data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE
 	 * };
 	 */
 	PERF_RECORD_SAMPLE			= 9,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index fb662eb4fb69..a796db2f3b57 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -51,6 +51,7 @@
 #include <linux/proc_ns.h>
 #include <linux/mount.h>
 #include <linux/min_heap.h>
+#include <linux/highmem.h>
 
 #include "internal.h"
 
@@ -1894,6 +1895,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
 	if (sample_type & PERF_SAMPLE_CGROUP)
 		size += sizeof(data->cgroup);
 
+	if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
+		size += sizeof(data->data_page_size);
+
 	event->header_size = size;
 }
 
@@ -6938,6 +6942,9 @@ void perf_output_sample(struct perf_output_handle *handle,
 	if (sample_type & PERF_SAMPLE_CGROUP)
 		perf_output_put(handle, data->cgroup);
 
+	if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
+		perf_output_put(handle, data->data_page_size);
+
 	if (sample_type & PERF_SAMPLE_AUX) {
 		perf_output_put(handle, data->aux_size);
 
@@ -6995,6 +7002,94 @@ static u64 perf_virt_to_phys(u64 virt)
 	return phys_addr;
 }
 
+#ifdef CONFIG_MMU
+
+/*
+ * Return the MMU page size of a given virtual address
+ */
+static u64 __perf_get_page_size(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pgd = pgd_offset(mm, addr);
+	if (pgd_none(*pgd))
+		return 0;
+
+	p4d = p4d_offset(pgd, addr);
+	if (!p4d_present(*p4d))
+		return 0;
+
+	if (p4d_leaf(*p4d))
+		return 1ULL << P4D_SHIFT;
+
+	pud = pud_offset(p4d, addr);
+	if (!pud_present(*pud))
+		return 0;
+
+	if (pud_leaf(*pud))
+		return 1ULL << PUD_SHIFT;
+
+	pmd = pmd_offset(pud, addr);
+	if (!pmd_present(*pmd))
+		return 0;
+
+	if (pmd_leaf(*pmd))
+		return 1ULL << PMD_SHIFT;
+
+	pte = pte_offset_map(pmd, addr);
+	if (!pte_present(*pte)) {
+		pte_unmap(pte);
+		return 0;
+	}
+
+	pte_unmap(pte);
+	return PAGE_SIZE;
+}
+
+#else
+
+static u64 __perf_get_page_size(struct mm_struct *mm, unsigned long addr)
+{
+	return 0;
+}
+
+#endif
+
+static u64 perf_get_page_size(unsigned long addr)
+{
+	struct mm_struct *mm;
+	unsigned long flags;
+	u64 size;
+
+	if (!addr)
+		return 0;
+
+	/*
+	 * Software page-table walkers must disable IRQs,
+	 * which prevents any tear down of the page tables.
+	 */
+	local_irq_save(flags);
+
+	mm = current->mm;
+	if (!mm) {
+		/*
+		 * For kernel threads and the like, use init_mm so that
+		 * we can find kernel memory.
+		 */
+		mm = &init_mm;
+	}
+
+	size = __perf_get_page_size(mm, addr);
+
+	local_irq_restore(flags);
+
+	return size;
+}
+
 static struct perf_callchain_entry __empty_callchain = { .nr = 0, };
 
 struct perf_callchain_entry *
@@ -7150,6 +7245,14 @@ void perf_prepare_sample(struct perf_event_header *header,
 	}
 #endif
 
+	/*
+	 * PERF_DATA_PAGE_SIZE requires PERF_SAMPLE_ADDR. If the user doesn't
+	 * require PERF_SAMPLE_ADDR, kernel implicitly retrieve the data->addr,
+	 * but the value will not dump to the userspace.
+	 */
+	if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
+		data->data_page_size = perf_get_page_size(data->addr);
+
 	if (sample_type & PERF_SAMPLE_AUX) {
 		u64 size;
 
-- 
cgit v1.2.3


From 995f088efebe1eba0282a6ffa12411b37f8990c2 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Thu, 1 Oct 2020 06:57:49 -0700
Subject: perf/core: Add support for PERF_SAMPLE_CODE_PAGE_SIZE

When studying code layout, it is useful to capture the page size of the
sampled code address.

Add a new sample type for code page size.
The new sample type requires collecting the ip. The code page size can
be calculated from the NMI-safe perf_get_page_size().

For large PEBS, it's very unlikely that the mapping is gone for the
earlier PEBS records. Enable the feature for the large PEBS. The worst
case is that page-size '0' is returned.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20201001135749.2804-5-kan.liang@linux.intel.com
---
 arch/x86/events/perf_event.h    |  2 +-
 include/linux/perf_event.h      |  1 +
 include/uapi/linux/perf_event.h |  4 +++-
 kernel/events/core.c            | 11 ++++++++++-
 4 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index ee2b9b9fc2a5..10032f023fcc 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -132,7 +132,7 @@ struct amd_nb {
 	PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
 	PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \
 	PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER | \
-	PERF_SAMPLE_PERIOD)
+	PERF_SAMPLE_PERIOD | PERF_SAMPLE_CODE_PAGE_SIZE)
 
 #define PEBS_GP_REGS			\
 	((1ULL << PERF_REG_X86_AX)    | \
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 7e3785dd27d9..e533b03af053 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1035,6 +1035,7 @@ struct perf_sample_data {
 	u64				phys_addr;
 	u64				cgroup;
 	u64				data_page_size;
+	u64				code_page_size;
 } ____cacheline_aligned;
 
 /* default value for data source */
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index cc6ea346e9f9..c2f20ee3124d 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -144,8 +144,9 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_AUX				= 1U << 20,
 	PERF_SAMPLE_CGROUP			= 1U << 21,
 	PERF_SAMPLE_DATA_PAGE_SIZE		= 1U << 22,
+	PERF_SAMPLE_CODE_PAGE_SIZE		= 1U << 23,
 
-	PERF_SAMPLE_MAX = 1U << 23,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 24,		/* non-ABI */
 
 	__PERF_SAMPLE_CALLCHAIN_EARLY		= 1ULL << 63, /* non-ABI; internal use */
 };
@@ -898,6 +899,7 @@ enum perf_event_type {
 	 *	{ u64			size;
 	 *	  char			data[size]; } && PERF_SAMPLE_AUX
 	 *	{ u64			data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE
+	 *	{ u64			code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE
 	 * };
 	 */
 	PERF_RECORD_SAMPLE			= 9,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index a796db2f3b57..7f655d19b8c4 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1898,6 +1898,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
 	if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
 		size += sizeof(data->data_page_size);
 
+	if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
+		size += sizeof(data->code_page_size);
+
 	event->header_size = size;
 }
 
@@ -6945,6 +6948,9 @@ void perf_output_sample(struct perf_output_handle *handle,
 	if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
 		perf_output_put(handle, data->data_page_size);
 
+	if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
+		perf_output_put(handle, data->code_page_size);
+
 	if (sample_type & PERF_SAMPLE_AUX) {
 		perf_output_put(handle, data->aux_size);
 
@@ -7125,7 +7131,7 @@ void perf_prepare_sample(struct perf_event_header *header,
 
 	__perf_event_header__init_id(header, data, event);
 
-	if (sample_type & PERF_SAMPLE_IP)
+	if (sample_type & (PERF_SAMPLE_IP | PERF_SAMPLE_CODE_PAGE_SIZE))
 		data->ip = perf_instruction_pointer(regs);
 
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
@@ -7253,6 +7259,9 @@ void perf_prepare_sample(struct perf_event_header *header,
 	if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
 		data->data_page_size = perf_get_page_size(data->addr);
 
+	if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
+		data->code_page_size = perf_get_page_size(data->ip);
+
 	if (sample_type & PERF_SAMPLE_AUX) {
 		u64 size;
 
-- 
cgit v1.2.3


From f54ec58fee837ec847cb8b50593e81bfaa46107f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 27 Oct 2020 21:12:12 +0100
Subject: wimax: move out to staging

There are no known users of this driver as of October 2020, and it will
be removed unless someone turns out to still need it in future releases.

According to https://en.wikipedia.org/wiki/List_of_WiMAX_networks, there
have been many public wimax networks, but it appears that many of these
have migrated to LTE or discontinued their service altogether.
As most PCs and phones lack WiMAX hardware support, the remaining
networks tend to use standalone routers. These almost certainly
run Linux, but not a modern kernel or the mainline wimax driver stack.

NetworkManager appears to have dropped userspace support in 2015
https://bugzilla.gnome.org/show_bug.cgi?id=747846, the
www.linuxwimax.org
site had already shut down earlier.

WiMax is apparently still being deployed on airport campus networks
("AeroMACS"), but in a frequency band that was not supported by the old
Intel 2400m (used in Sandy Bridge laptops and earlier), which is the
only driver using the kernel's wimax stack.

Move all files into drivers/staging/wimax, including the uapi header
files and documentation, to make it easier to remove it when it gets
to that. Only minimal changes are made to the source files, in order
to make it possible to port patches across the move.

Also remove the MAINTAINERS entry that refers to a broken mailing
list and website.

Acked-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-By: Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Suggested-by: Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 Documentation/admin-guide/index.rst                |    1 -
 Documentation/admin-guide/wimax/i2400m.rst         |  283 ----
 Documentation/admin-guide/wimax/index.rst          |   19 -
 Documentation/admin-guide/wimax/wimax.rst          |   89 --
 Documentation/networking/kapi.rst                  |   21 -
 .../translations/zh_CN/admin-guide/index.rst       |    1 -
 MAINTAINERS                                        |   22 -
 drivers/net/Kconfig                                |    2 -
 drivers/net/Makefile                               |    1 -
 drivers/net/wimax/Kconfig                          |   18 -
 drivers/net/wimax/Makefile                         |    2 -
 drivers/net/wimax/i2400m/Kconfig                   |   37 -
 drivers/net/wimax/i2400m/Makefile                  |   23 -
 drivers/net/wimax/i2400m/control.c                 | 1434 -----------------
 drivers/net/wimax/i2400m/debug-levels.h            |   32 -
 drivers/net/wimax/i2400m/debugfs.c                 |  253 ---
 drivers/net/wimax/i2400m/driver.c                  | 1002 ------------
 drivers/net/wimax/i2400m/fw.c                      | 1653 --------------------
 drivers/net/wimax/i2400m/i2400m-usb.h              |  275 ----
 drivers/net/wimax/i2400m/i2400m.h                  |  970 ------------
 drivers/net/wimax/i2400m/netdev.c                  |  603 -------
 drivers/net/wimax/i2400m/op-rfkill.c               |  196 ---
 drivers/net/wimax/i2400m/rx.c                      | 1395 -----------------
 drivers/net/wimax/i2400m/sysfs.c                   |   65 -
 drivers/net/wimax/i2400m/tx.c                      | 1011 ------------
 drivers/net/wimax/i2400m/usb-debug-levels.h        |   28 -
 drivers/net/wimax/i2400m/usb-fw.c                  |  365 -----
 drivers/net/wimax/i2400m/usb-notif.c               |  258 ---
 drivers/net/wimax/i2400m/usb-rx.c                  |  462 ------
 drivers/net/wimax/i2400m/usb-tx.c                  |  273 ----
 drivers/net/wimax/i2400m/usb.c                     |  764 ---------
 drivers/staging/Kconfig                            |    2 +
 drivers/staging/Makefile                           |    1 +
 drivers/staging/wimax/Documentation/i2400m.rst     |  283 ++++
 drivers/staging/wimax/Documentation/index.rst      |   19 +
 drivers/staging/wimax/Documentation/wimax.rst      |   89 ++
 drivers/staging/wimax/Kconfig                      |   46 +
 drivers/staging/wimax/Makefile                     |   15 +
 drivers/staging/wimax/TODO                         |   18 +
 drivers/staging/wimax/debug-levels.h               |   29 +
 drivers/staging/wimax/debugfs.c                    |   38 +
 drivers/staging/wimax/i2400m/Kconfig               |   37 +
 drivers/staging/wimax/i2400m/Makefile              |   23 +
 drivers/staging/wimax/i2400m/control.c             | 1434 +++++++++++++++++
 drivers/staging/wimax/i2400m/debug-levels.h        |   32 +
 drivers/staging/wimax/i2400m/debugfs.c             |  253 +++
 drivers/staging/wimax/i2400m/driver.c              | 1002 ++++++++++++
 drivers/staging/wimax/i2400m/fw.c                  | 1653 ++++++++++++++++++++
 drivers/staging/wimax/i2400m/i2400m-usb.h          |  275 ++++
 drivers/staging/wimax/i2400m/i2400m.h              |  970 ++++++++++++
 drivers/staging/wimax/i2400m/linux-wimax-i2400m.h  |  572 +++++++
 drivers/staging/wimax/i2400m/netdev.c              |  603 +++++++
 drivers/staging/wimax/i2400m/op-rfkill.c           |  196 +++
 drivers/staging/wimax/i2400m/rx.c                  | 1395 +++++++++++++++++
 drivers/staging/wimax/i2400m/sysfs.c               |   65 +
 drivers/staging/wimax/i2400m/tx.c                  | 1011 ++++++++++++
 drivers/staging/wimax/i2400m/usb-debug-levels.h    |   28 +
 drivers/staging/wimax/i2400m/usb-fw.c              |  365 +++++
 drivers/staging/wimax/i2400m/usb-notif.c           |  258 +++
 drivers/staging/wimax/i2400m/usb-rx.c              |  462 ++++++
 drivers/staging/wimax/i2400m/usb-tx.c              |  273 ++++
 drivers/staging/wimax/i2400m/usb.c                 |  764 +++++++++
 drivers/staging/wimax/id-table.c                   |  130 ++
 drivers/staging/wimax/linux-wimax-debug.h          |  491 ++++++
 drivers/staging/wimax/linux-wimax.h                |  239 +++
 drivers/staging/wimax/net-wimax.h                  |  503 ++++++
 drivers/staging/wimax/op-msg.c                     |  391 +++++
 drivers/staging/wimax/op-reset.c                   |  108 ++
 drivers/staging/wimax/op-rfkill.c                  |  431 +++++
 drivers/staging/wimax/op-state-get.c               |   52 +
 drivers/staging/wimax/stack.c                      |  616 ++++++++
 drivers/staging/wimax/wimax-internal.h             |   85 +
 include/linux/wimax/debug.h                        |  491 ------
 include/net/wimax.h                                |  503 ------
 include/uapi/linux/wimax.h                         |  239 ---
 include/uapi/linux/wimax/i2400m.h                  |  572 -------
 net/Kconfig                                        |    2 -
 net/Makefile                                       |    1 -
 net/wimax/Kconfig                                  |   40 -
 net/wimax/Makefile                                 |   13 -
 net/wimax/debug-levels.h                           |   29 -
 net/wimax/debugfs.c                                |   38 -
 net/wimax/id-table.c                               |  130 --
 net/wimax/op-msg.c                                 |  391 -----
 net/wimax/op-reset.c                               |  108 --
 net/wimax/op-rfkill.c                              |  431 -----
 net/wimax/op-state-get.c                           |   52 -
 net/wimax/stack.c                                  |  616 --------
 net/wimax/wimax-internal.h                         |   85 -
 89 files changed, 15257 insertions(+), 15299 deletions(-)
 delete mode 100644 Documentation/admin-guide/wimax/i2400m.rst
 delete mode 100644 Documentation/admin-guide/wimax/index.rst
 delete mode 100644 Documentation/admin-guide/wimax/wimax.rst
 delete mode 100644 drivers/net/wimax/Kconfig
 delete mode 100644 drivers/net/wimax/Makefile
 delete mode 100644 drivers/net/wimax/i2400m/Kconfig
 delete mode 100644 drivers/net/wimax/i2400m/Makefile
 delete mode 100644 drivers/net/wimax/i2400m/control.c
 delete mode 100644 drivers/net/wimax/i2400m/debug-levels.h
 delete mode 100644 drivers/net/wimax/i2400m/debugfs.c
 delete mode 100644 drivers/net/wimax/i2400m/driver.c
 delete mode 100644 drivers/net/wimax/i2400m/fw.c
 delete mode 100644 drivers/net/wimax/i2400m/i2400m-usb.h
 delete mode 100644 drivers/net/wimax/i2400m/i2400m.h
 delete mode 100644 drivers/net/wimax/i2400m/netdev.c
 delete mode 100644 drivers/net/wimax/i2400m/op-rfkill.c
 delete mode 100644 drivers/net/wimax/i2400m/rx.c
 delete mode 100644 drivers/net/wimax/i2400m/sysfs.c
 delete mode 100644 drivers/net/wimax/i2400m/tx.c
 delete mode 100644 drivers/net/wimax/i2400m/usb-debug-levels.h
 delete mode 100644 drivers/net/wimax/i2400m/usb-fw.c
 delete mode 100644 drivers/net/wimax/i2400m/usb-notif.c
 delete mode 100644 drivers/net/wimax/i2400m/usb-rx.c
 delete mode 100644 drivers/net/wimax/i2400m/usb-tx.c
 delete mode 100644 drivers/net/wimax/i2400m/usb.c
 create mode 100644 drivers/staging/wimax/Documentation/i2400m.rst
 create mode 100644 drivers/staging/wimax/Documentation/index.rst
 create mode 100644 drivers/staging/wimax/Documentation/wimax.rst
 create mode 100644 drivers/staging/wimax/Kconfig
 create mode 100644 drivers/staging/wimax/Makefile
 create mode 100644 drivers/staging/wimax/TODO
 create mode 100644 drivers/staging/wimax/debug-levels.h
 create mode 100644 drivers/staging/wimax/debugfs.c
 create mode 100644 drivers/staging/wimax/i2400m/Kconfig
 create mode 100644 drivers/staging/wimax/i2400m/Makefile
 create mode 100644 drivers/staging/wimax/i2400m/control.c
 create mode 100644 drivers/staging/wimax/i2400m/debug-levels.h
 create mode 100644 drivers/staging/wimax/i2400m/debugfs.c
 create mode 100644 drivers/staging/wimax/i2400m/driver.c
 create mode 100644 drivers/staging/wimax/i2400m/fw.c
 create mode 100644 drivers/staging/wimax/i2400m/i2400m-usb.h
 create mode 100644 drivers/staging/wimax/i2400m/i2400m.h
 create mode 100644 drivers/staging/wimax/i2400m/linux-wimax-i2400m.h
 create mode 100644 drivers/staging/wimax/i2400m/netdev.c
 create mode 100644 drivers/staging/wimax/i2400m/op-rfkill.c
 create mode 100644 drivers/staging/wimax/i2400m/rx.c
 create mode 100644 drivers/staging/wimax/i2400m/sysfs.c
 create mode 100644 drivers/staging/wimax/i2400m/tx.c
 create mode 100644 drivers/staging/wimax/i2400m/usb-debug-levels.h
 create mode 100644 drivers/staging/wimax/i2400m/usb-fw.c
 create mode 100644 drivers/staging/wimax/i2400m/usb-notif.c
 create mode 100644 drivers/staging/wimax/i2400m/usb-rx.c
 create mode 100644 drivers/staging/wimax/i2400m/usb-tx.c
 create mode 100644 drivers/staging/wimax/i2400m/usb.c
 create mode 100644 drivers/staging/wimax/id-table.c
 create mode 100644 drivers/staging/wimax/linux-wimax-debug.h
 create mode 100644 drivers/staging/wimax/linux-wimax.h
 create mode 100644 drivers/staging/wimax/net-wimax.h
 create mode 100644 drivers/staging/wimax/op-msg.c
 create mode 100644 drivers/staging/wimax/op-reset.c
 create mode 100644 drivers/staging/wimax/op-rfkill.c
 create mode 100644 drivers/staging/wimax/op-state-get.c
 create mode 100644 drivers/staging/wimax/stack.c
 create mode 100644 drivers/staging/wimax/wimax-internal.h
 delete mode 100644 include/linux/wimax/debug.h
 delete mode 100644 include/net/wimax.h
 delete mode 100644 include/uapi/linux/wimax.h
 delete mode 100644 include/uapi/linux/wimax/i2400m.h
 delete mode 100644 net/wimax/Kconfig
 delete mode 100644 net/wimax/Makefile
 delete mode 100644 net/wimax/debug-levels.h
 delete mode 100644 net/wimax/debugfs.c
 delete mode 100644 net/wimax/id-table.c
 delete mode 100644 net/wimax/op-msg.c
 delete mode 100644 net/wimax/op-reset.c
 delete mode 100644 net/wimax/op-rfkill.c
 delete mode 100644 net/wimax/op-state-get.c
 delete mode 100644 net/wimax/stack.c
 delete mode 100644 net/wimax/wimax-internal.h

(limited to 'include/uapi')

diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index ed1cf94ea50c..d53986a424c4 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -115,7 +115,6 @@ configure specific aspects of kernel behavior to your liking.
    unicode
    vga-softcursor
    video-output
-   wimax/index
    xfs
 
 .. only::  subproject and html
diff --git a/Documentation/admin-guide/wimax/i2400m.rst b/Documentation/admin-guide/wimax/i2400m.rst
deleted file mode 100644
index 194388c0c351..000000000000
--- a/Documentation/admin-guide/wimax/i2400m.rst
+++ /dev/null
@@ -1,283 +0,0 @@
-.. include:: <isonum.txt>
-
-====================================================
-Driver for the Intel Wireless Wimax Connection 2400m
-====================================================
-
-:Copyright: |copy| 2008 Intel Corporation < linux-wimax@intel.com >
-
-   This provides a driver for the Intel Wireless WiMAX Connection 2400m
-   and a basic Linux kernel WiMAX stack.
-
-1. Requirements
-===============
-
-     * Linux installation with Linux kernel 2.6.22 or newer (if building
-       from a separate tree)
-     * Intel i2400m Echo Peak or Baxter Peak; this includes the Intel
-       Wireless WiMAX/WiFi Link 5x50 series.
-     * build tools:
-
-          + Linux kernel development package for the target kernel; to
-            build against your currently running kernel, you need to have
-            the kernel development package corresponding to the running
-            image installed (usually if your kernel is named
-            linux-VERSION, the development package is called
-            linux-dev-VERSION or linux-headers-VERSION).
-          + GNU C Compiler, make
-
-2. Compilation and installation
-===============================
-
-2.1. Compilation of the drivers included in the kernel
-------------------------------------------------------
-
-   Configure the kernel; to enable the WiMAX drivers select Drivers >
-   Networking Drivers > WiMAX device support. Enable all of them as
-   modules (easier).
-
-   If USB or SDIO are not enabled in the kernel configuration, the options
-   to build the i2400m USB or SDIO drivers will not show. Enable said
-   subsystems and go back to the WiMAX menu to enable the drivers.
-
-   Compile and install your kernel as usual.
-
-2.2. Compilation of the drivers distributed as an standalone module
--------------------------------------------------------------------
-
-   To compile::
-
-	$ cd source/directory
-	$ make
-
-   Once built you can load and unload using the provided load.sh script;
-   load.sh will load the modules, load.sh u will unload them.
-
-   To install in the default kernel directories (and enable auto loading
-   when the device is plugged)::
-
-	$ make install
-	$ depmod -a
-
-   If your kernel development files are located in a non standard
-   directory or if you want to build for a kernel that is not the
-   currently running one, set KDIR to the right location::
-
-	$ make KDIR=/path/to/kernel/dev/tree
-
-   For more information, please contact linux-wimax@intel.com.
-
-3. Installing the firmware
---------------------------
-
-   The firmware can be obtained from http://linuxwimax.org or might have
-   been supplied with your hardware.
-
-   It has to be installed in the target system::
-
-	$ cp FIRMWAREFILE.sbcf /lib/firmware/i2400m-fw-BUSTYPE-1.3.sbcf
-
-     * NOTE: if your firmware came in an .rpm or .deb file, just install
-       it as normal, with the rpm (rpm -i FIRMWARE.rpm) or dpkg
-       (dpkg -i FIRMWARE.deb) commands. No further action is needed.
-     * BUSTYPE will be usb or sdio, depending on the hardware you have.
-       Each hardware type comes with its own firmware and will not work
-       with other types.
-
-4. Design
-=========
-
-   This package contains two major parts: a WiMAX kernel stack and a
-   driver for the Intel i2400m.
-
-   The WiMAX stack is designed to provide for common WiMAX control
-   services to current and future WiMAX devices from any vendor; please
-   see README.wimax for details.
-
-   The i2400m kernel driver is broken up in two main parts: the bus
-   generic driver and the bus-specific drivers. The bus generic driver
-   forms the drivercore and contain no knowledge of the actual method we
-   use to connect to the device. The bus specific drivers are just the
-   glue to connect the bus-generic driver and the device. Currently only
-   USB and SDIO are supported. See drivers/net/wimax/i2400m/i2400m.h for
-   more information.
-
-   The bus generic driver is logically broken up in two parts: OS-glue and
-   hardware-glue. The OS-glue interfaces with Linux. The hardware-glue
-   interfaces with the device on using an interface provided by the
-   bus-specific driver. The reason for this breakup is to be able to
-   easily reuse the hardware-glue to write drivers for other OSes; note
-   the hardware glue part is written as a native Linux driver; no
-   abstraction layers are used, so to port to another OS, the Linux kernel
-   API calls should be replaced with the target OS's.
-
-5. Usage
-========
-
-   To load the driver, follow the instructions in the install section;
-   once the driver is loaded, plug in the device (unless it is permanently
-   plugged in). The driver will enumerate the device, upload the firmware
-   and output messages in the kernel log (dmesg, /var/log/messages or
-   /var/log/kern.log) such as::
-
-	...
-	i2400m_usb 5-4:1.0: firmware interface version 8.0.0
-	i2400m_usb 5-4:1.0: WiMAX interface wmx0 (00:1d:e1:01:94:2c) ready
-
-   At this point the device is ready to work.
-
-   Current versions require the Intel WiMAX Network Service in userspace
-   to make things work. See the network service's README for instructions
-   on how to scan, connect and disconnect.
-
-5.1. Module parameters
-----------------------
-
-   Module parameters can be set at kernel or module load time or by
-   echoing values::
-
-	$ echo VALUE > /sys/module/MODULENAME/parameters/PARAMETERNAME
-
-   To make changes permanent, for example, for the i2400m module, you can
-   also create a file named /etc/modprobe.d/i2400m containing::
-
-	options i2400m idle_mode_disabled=1
-
-   To find which parameters are supported by a module, run::
-
-	$ modinfo path/to/module.ko
-
-   During kernel bootup (if the driver is linked in the kernel), specify
-   the following to the kernel command line::
-
-	i2400m.PARAMETER=VALUE
-
-5.1.1. i2400m: idle_mode_disabled
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-   The i2400m module supports a parameter to disable idle mode. This
-   parameter, once set, will take effect only when the device is
-   reinitialized by the driver (eg: following a reset or a reconnect).
-
-5.2. Debug operations: debugfs entries
---------------------------------------
-
-   The driver will register debugfs entries that allow the user to tweak
-   debug settings. There are three main container directories where
-   entries are placed, which correspond to the three blocks a i2400m WiMAX
-   driver has:
-
-     * /sys/kernel/debug/wimax:DEVNAME/ for the generic WiMAX stack
-       controls
-     * /sys/kernel/debug/wimax:DEVNAME/i2400m for the i2400m generic
-       driver controls
-     * /sys/kernel/debug/wimax:DEVNAME/i2400m-usb (or -sdio) for the
-       bus-specific i2400m-usb or i2400m-sdio controls).
-
-   Of course, if debugfs is mounted in a directory other than
-   /sys/kernel/debug, those paths will change.
-
-5.2.1. Increasing debug output
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-   The files named *dl_* indicate knobs for controlling the debug output
-   of different submodules::
-
-	# find /sys/kernel/debug/wimax\:wmx0 -name \*dl_\*
-	/sys/kernel/debug/wimax:wmx0/i2400m-usb/dl_tx
-	/sys/kernel/debug/wimax:wmx0/i2400m-usb/dl_rx
-	/sys/kernel/debug/wimax:wmx0/i2400m-usb/dl_notif
-	/sys/kernel/debug/wimax:wmx0/i2400m-usb/dl_fw
-	/sys/kernel/debug/wimax:wmx0/i2400m-usb/dl_usb
-	/sys/kernel/debug/wimax:wmx0/i2400m/dl_tx
-	/sys/kernel/debug/wimax:wmx0/i2400m/dl_rx
-	/sys/kernel/debug/wimax:wmx0/i2400m/dl_rfkill
-	/sys/kernel/debug/wimax:wmx0/i2400m/dl_netdev
-	/sys/kernel/debug/wimax:wmx0/i2400m/dl_fw
-	/sys/kernel/debug/wimax:wmx0/i2400m/dl_debugfs
-	/sys/kernel/debug/wimax:wmx0/i2400m/dl_driver
-	/sys/kernel/debug/wimax:wmx0/i2400m/dl_control
-	/sys/kernel/debug/wimax:wmx0/wimax_dl_stack
-	/sys/kernel/debug/wimax:wmx0/wimax_dl_op_rfkill
-	/sys/kernel/debug/wimax:wmx0/wimax_dl_op_reset
-	/sys/kernel/debug/wimax:wmx0/wimax_dl_op_msg
-	/sys/kernel/debug/wimax:wmx0/wimax_dl_id_table
-	/sys/kernel/debug/wimax:wmx0/wimax_dl_debugfs
-
-   By reading the file you can obtain the current value of said debug
-   level; by writing to it, you can set it.
-
-   To increase the debug level of, for example, the i2400m's generic TX
-   engine, just write::
-
-	$ echo 3 > /sys/kernel/debug/wimax:wmx0/i2400m/dl_tx
-
-   Increasing numbers yield increasing debug information; for details of
-   what is printed and the available levels, check the source. The code
-   uses 0 for disabled and increasing values until 8.
-
-5.2.2. RX and TX statistics
-^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-   The i2400m/rx_stats and i2400m/tx_stats provide statistics about the
-   data reception/delivery from the device::
-
-	$ cat /sys/kernel/debug/wimax:wmx0/i2400m/rx_stats
-	45 1 3 34 3104 48 480
-
-   The numbers reported are:
-
-     * packets/RX-buffer: total, min, max
-     * RX-buffers: total RX buffers received, accumulated RX buffer size
-       in bytes, min size received, max size received
-
-   Thus, to find the average buffer size received, divide accumulated
-   RX-buffer / total RX-buffers.
-
-   To clear the statistics back to 0, write anything to the rx_stats file::
-
-	$ echo 1 > /sys/kernel/debug/wimax:wmx0/i2400m_rx_stats
-
-   Likewise for TX.
-
-   Note the packets this debug file refers to are not network packet, but
-   packets in the sense of the device-specific protocol for communication
-   to the host. See drivers/net/wimax/i2400m/tx.c.
-
-5.2.3. Tracing messages received from user space
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-   To echo messages received from user space into the trace pipe that the
-   i2400m driver creates, set the debug file i2400m/trace_msg_from_user to
-   1::
-
-	$ echo 1 > /sys/kernel/debug/wimax:wmx0/i2400m/trace_msg_from_user
-
-5.2.4. Performing a device reset
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-   By writing a 0, a 1 or a 2 to the file
-   /sys/kernel/debug/wimax:wmx0/reset, the driver performs a warm (without
-   disconnecting from the bus), cold (disconnecting from the bus) or bus
-   (bus specific) reset on the device.
-
-5.2.5. Asking the device to enter power saving mode
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-   By writing any value to the /sys/kernel/debug/wimax:wmx0 file, the
-   device will attempt to enter power saving mode.
-
-6. Troubleshooting
-==================
-
-6.1. Driver complains about ``i2400m-fw-usb-1.2.sbcf: request failed``
-----------------------------------------------------------------------
-
-   If upon connecting the device, the following is output in the kernel
-   log::
-
-	i2400m_usb 5-4:1.0: fw i2400m-fw-usb-1.3.sbcf: request failed: -2
-
-   This means that the driver cannot locate the firmware file named
-   /lib/firmware/i2400m-fw-usb-1.2.sbcf. Check that the file is present in
-   the right location.
diff --git a/Documentation/admin-guide/wimax/index.rst b/Documentation/admin-guide/wimax/index.rst
deleted file mode 100644
index fdf7c1f99ff5..000000000000
--- a/Documentation/admin-guide/wimax/index.rst
+++ /dev/null
@@ -1,19 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-===============
-WiMAX subsystem
-===============
-
-.. toctree::
-   :maxdepth: 2
-
-   wimax
-
-   i2400m
-
-.. only::  subproject and html
-
-   Indices
-   =======
-
-   * :ref:`genindex`
diff --git a/Documentation/admin-guide/wimax/wimax.rst b/Documentation/admin-guide/wimax/wimax.rst
deleted file mode 100644
index 817ee8ba2732..000000000000
--- a/Documentation/admin-guide/wimax/wimax.rst
+++ /dev/null
@@ -1,89 +0,0 @@
-.. include:: <isonum.txt>
-
-========================
-Linux kernel WiMAX stack
-========================
-
-:Copyright: |copy| 2008 Intel Corporation < linux-wimax@intel.com >
-
-   This provides a basic Linux kernel WiMAX stack to provide a common
-   control API for WiMAX devices, usable from kernel and user space.
-
-1. Design
-=========
-
-   The WiMAX stack is designed to provide for common WiMAX control
-   services to current and future WiMAX devices from any vendor.
-
-   Because currently there is only one and we don't know what would be the
-   common services, the APIs it currently provides are very minimal.
-   However, it is done in such a way that it is easily extensible to
-   accommodate future requirements.
-
-   The stack works by embedding a struct wimax_dev in your device's
-   control structures. This provides a set of callbacks that the WiMAX
-   stack will call in order to implement control operations requested by
-   the user. As well, the stack provides API functions that the driver
-   calls to notify about changes of state in the device.
-
-   The stack exports the API calls needed to control the device to user
-   space using generic netlink as a marshalling mechanism. You can access
-   them using your own code or use the wrappers provided for your
-   convenience in libwimax (in the wimax-tools package).
-
-   For detailed information on the stack, please see
-   include/linux/wimax.h.
-
-2. Usage
-========
-
-   For usage in a driver (registration, API, etc) please refer to the
-   instructions in the header file include/linux/wimax.h.
-
-   When a device is registered with the WiMAX stack, a set of debugfs
-   files will appear in /sys/kernel/debug/wimax:wmxX can tweak for
-   control.
-
-2.1. Obtaining debug information: debugfs entries
--------------------------------------------------
-
-   The WiMAX stack is compiled, by default, with debug messages that can
-   be used to diagnose issues. By default, said messages are disabled.
-
-   The drivers will register debugfs entries that allow the user to tweak
-   debug settings.
-
-   Each driver, when registering with the stack, will cause a debugfs
-   directory named wimax:DEVICENAME to be created; optionally, it might
-   create more subentries below it.
-
-2.1.1. Increasing debug output
-------------------------------
-
-   The files named *dl_* indicate knobs for controlling the debug output
-   of different submodules of the WiMAX stack::
-
-	# find /sys/kernel/debug/wimax\:wmx0 -name \*dl_\*
-	/sys/kernel/debug/wimax:wmx0/wimax_dl_stack
-	/sys/kernel/debug/wimax:wmx0/wimax_dl_op_rfkill
-	/sys/kernel/debug/wimax:wmx0/wimax_dl_op_reset
-	/sys/kernel/debug/wimax:wmx0/wimax_dl_op_msg
-	/sys/kernel/debug/wimax:wmx0/wimax_dl_id_table
-	/sys/kernel/debug/wimax:wmx0/wimax_dl_debugfs
-	/sys/kernel/debug/wimax:wmx0/.... # other driver specific files
-
-   NOTE:
-       Of course, if debugfs is mounted in a directory other than
-       /sys/kernel/debug, those paths will change.
-
-   By reading the file you can obtain the current value of said debug
-   level; by writing to it, you can set it.
-
-   To increase the debug level of, for example, the id-table submodule,
-   just write:
-
-	$ echo 3 > /sys/kernel/debug/wimax:wmx0/wimax_dl_id_table
-
-   Increasing numbers yield increasing debug information; for details of
-   what is printed and the available levels, check the source. The code
-   uses 0 for disabled and increasing values until 8.
diff --git a/Documentation/networking/kapi.rst b/Documentation/networking/kapi.rst
index d198fa5eaacd..ea55f462cefa 100644
--- a/Documentation/networking/kapi.rst
+++ b/Documentation/networking/kapi.rst
@@ -83,27 +83,6 @@ SUN RPC subsystem
 .. kernel-doc:: net/sunrpc/clnt.c
    :export:
 
-WiMAX
------
-
-.. kernel-doc:: net/wimax/op-msg.c
-   :export:
-
-.. kernel-doc:: net/wimax/op-reset.c
-   :export:
-
-.. kernel-doc:: net/wimax/op-rfkill.c
-   :export:
-
-.. kernel-doc:: net/wimax/stack.c
-   :export:
-
-.. kernel-doc:: include/net/wimax.h
-   :internal:
-
-.. kernel-doc:: include/uapi/linux/wimax.h
-   :internal:
-
 Network device support
 ======================
 
diff --git a/Documentation/translations/zh_CN/admin-guide/index.rst b/Documentation/translations/zh_CN/admin-guide/index.rst
index ed5ab7e37f38..48bbd3ebad48 100644
--- a/Documentation/translations/zh_CN/admin-guide/index.rst
+++ b/Documentation/translations/zh_CN/admin-guide/index.rst
@@ -114,7 +114,6 @@ Todolist:
    unicode
    vga-softcursor
    video-output
-   wimax/index
    xfs
 
 .. only::  subproject and html
diff --git a/MAINTAINERS b/MAINTAINERS
index e73636b75f29..17f5571788c9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9106,16 +9106,6 @@ W:	https://wireless.wiki.kernel.org/en/users/drivers/iwlwifi
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi.git
 F:	drivers/net/wireless/intel/iwlwifi/
 
-INTEL WIRELESS WIMAX CONNECTION 2400
-M:	Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
-M:	linux-wimax@intel.com
-L:	wimax@linuxwimax.org (subscribers-only)
-S:	Supported
-W:	http://linuxwimax.org
-F:	Documentation/admin-guide/wimax/i2400m.rst
-F:	drivers/net/wimax/i2400m/
-F:	include/uapi/linux/wimax/i2400m.h
-
 INTEL WMI SLIM BOOTLOADER (SBL) FIRMWARE UPDATE DRIVER
 M:	Jithu Joseph <jithu.joseph@intel.com>
 R:	Maurice Ma <maurice.ma@intel.com>
@@ -18907,18 +18897,6 @@ S:	Supported
 W:	https://wireless.wiki.kernel.org/en/users/Drivers/wil6210
 F:	drivers/net/wireless/ath/wil6210/
 
-WIMAX STACK
-M:	Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
-M:	linux-wimax@intel.com
-L:	wimax@linuxwimax.org (subscribers-only)
-S:	Supported
-W:	http://linuxwimax.org
-F:	Documentation/admin-guide/wimax/wimax.rst
-F:	include/linux/wimax/debug.h
-F:	include/net/wimax.h
-F:	include/uapi/linux/wimax.h
-F:	net/wimax/
-
 WINBOND CIR DRIVER
 M:	David Härdeman <david@hardeman.nu>
 S:	Maintained
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index c3dbe64e628e..c0af2dc8b938 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -489,8 +489,6 @@ source "drivers/net/usb/Kconfig"
 
 source "drivers/net/wireless/Kconfig"
 
-source "drivers/net/wimax/Kconfig"
-
 source "drivers/net/wan/Kconfig"
 
 source "drivers/net/ieee802154/Kconfig"
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 72e18d505d1a..b27e8633c305 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -66,7 +66,6 @@ obj-$(CONFIG_NET_SB1000) += sb1000.o
 obj-$(CONFIG_SUNGEM_PHY) += sungem_phy.o
 obj-$(CONFIG_WAN) += wan/
 obj-$(CONFIG_WLAN) += wireless/
-obj-$(CONFIG_WIMAX) += wimax/
 obj-$(CONFIG_IEEE802154) += ieee802154/
 
 obj-$(CONFIG_VMXNET3) += vmxnet3/
diff --git a/drivers/net/wimax/Kconfig b/drivers/net/wimax/Kconfig
deleted file mode 100644
index 2249e3d77a76..000000000000
--- a/drivers/net/wimax/Kconfig
+++ /dev/null
@@ -1,18 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# WiMAX LAN device drivers configuration
-#
-
-
-comment "Enable WiMAX (Networking options) to see the WiMAX drivers"
-	depends on WIMAX = n
-
-if WIMAX
-
-menu "WiMAX Wireless Broadband devices"
-
-source "drivers/net/wimax/i2400m/Kconfig"
-
-endmenu
-
-endif
diff --git a/drivers/net/wimax/Makefile b/drivers/net/wimax/Makefile
deleted file mode 100644
index b4575bacf994..000000000000
--- a/drivers/net/wimax/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_WIMAX_I2400M)	+= i2400m/
diff --git a/drivers/net/wimax/i2400m/Kconfig b/drivers/net/wimax/i2400m/Kconfig
deleted file mode 100644
index 843b905a26a3..000000000000
--- a/drivers/net/wimax/i2400m/Kconfig
+++ /dev/null
@@ -1,37 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-
-config WIMAX_I2400M
-	tristate
-	depends on WIMAX
-	select FW_LOADER
-
-comment "Enable USB support to see WiMAX USB drivers"
-	depends on USB = n
-
-config WIMAX_I2400M_USB
-	tristate "Intel Wireless WiMAX Connection 2400 over USB (including 5x50)"
-	depends on WIMAX && USB
-	select WIMAX_I2400M
-	help
-	  Select if you have a device based on the Intel WiMAX
-	  Connection 2400 over USB (like any of the Intel Wireless
-	  WiMAX/WiFi Link 5x50 series).
-
-	  If unsure, it is safe to select M (module).
-
-config WIMAX_I2400M_DEBUG_LEVEL
-	int "WiMAX i2400m debug level"
-	depends on WIMAX_I2400M
-	default 8
-	help
-
-	  Select the maximum debug verbosity level to be compiled into
-	  the WiMAX i2400m driver code.
-
-	  By default, this is disabled at runtime and can be
-	  selectively enabled at runtime for different parts of the
-	  code using the sysfs debug-levels file.
-
-	  If set at zero, this will compile out all the debug code.
-
-	  It is recommended that it is left at 8.
diff --git a/drivers/net/wimax/i2400m/Makefile b/drivers/net/wimax/i2400m/Makefile
deleted file mode 100644
index b1db1eff0648..000000000000
--- a/drivers/net/wimax/i2400m/Makefile
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-obj-$(CONFIG_WIMAX_I2400M) += i2400m.o
-obj-$(CONFIG_WIMAX_I2400M_USB) += i2400m-usb.o
-
-i2400m-y :=		\
-	control.o	\
-	driver.o	\
-	fw.o		\
-	op-rfkill.o	\
-	sysfs.o		\
-	netdev.o	\
-	tx.o		\
-	rx.o
-
-i2400m-$(CONFIG_DEBUG_FS) += debugfs.o
-
-i2400m-usb-y :=			\
-	usb-fw.o		\
-	usb-notif.o		\
-	usb-tx.o		\
-	usb-rx.o		\
-	usb.o
diff --git a/drivers/net/wimax/i2400m/control.c b/drivers/net/wimax/i2400m/control.c
deleted file mode 100644
index 8df98757d901..000000000000
--- a/drivers/net/wimax/i2400m/control.c
+++ /dev/null
@@ -1,1434 +0,0 @@
-/*
- * Intel Wireless WiMAX Connection 2400m
- * Miscellaneous control functions for managing the device
- *
- *
- * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * Intel Corporation <linux-wimax@intel.com>
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *  - Initial implementation
- *
- * This is a collection of functions used to control the device (plus
- * a few helpers).
- *
- * There are utilities for handling TLV buffers, hooks on the device's
- * reports to act on device changes of state [i2400m_report_hook()],
- * on acks to commands [i2400m_msg_ack_hook()], a helper for sending
- * commands to the device and blocking until a reply arrives
- * [i2400m_msg_to_dev()], a few high level commands for manipulating
- * the device state, powersving mode and configuration plus the
- * routines to setup the device once communication is stablished with
- * it [i2400m_dev_initialize()].
- *
- * ROADMAP
- *
- * i2400m_dev_initialize()       Called by i2400m_dev_start()
- *   i2400m_set_init_config()
- *   i2400m_cmd_get_state()
- * i2400m_dev_shutdown()        Called by i2400m_dev_stop()
- *   i2400m_reset()
- *
- * i2400m_{cmd,get,set}_*()
- *   i2400m_msg_to_dev()
- *   i2400m_msg_check_status()
- *
- * i2400m_report_hook()         Called on reception of an event
- *   i2400m_report_state_hook()
- *     i2400m_tlv_buffer_walk()
- *     i2400m_tlv_match()
- *     i2400m_report_tlv_system_state()
- *     i2400m_report_tlv_rf_switches_status()
- *     i2400m_report_tlv_media_status()
- *   i2400m_cmd_enter_powersave()
- *
- * i2400m_msg_ack_hook()        Called on reception of a reply to a
- *                              command, get or set
- */
-
-#include <stdarg.h>
-#include "i2400m.h"
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/wimax/i2400m.h>
-#include <linux/export.h>
-#include <linux/moduleparam.h>
-
-
-#define D_SUBMODULE control
-#include "debug-levels.h"
-
-static int i2400m_idle_mode_disabled;/* 0 (idle mode enabled) by default */
-module_param_named(idle_mode_disabled, i2400m_idle_mode_disabled, int, 0644);
-MODULE_PARM_DESC(idle_mode_disabled,
-		 "If true, the device will not enable idle mode negotiation "
-		 "with the base station (when connected) to save power.");
-
-/* 0 (power saving enabled) by default */
-static int i2400m_power_save_disabled;
-module_param_named(power_save_disabled, i2400m_power_save_disabled, int, 0644);
-MODULE_PARM_DESC(power_save_disabled,
-		 "If true, the driver will not tell the device to enter "
-		 "power saving mode when it reports it is ready for it. "
-		 "False by default (so the device is told to do power "
-		 "saving).");
-
-static int i2400m_passive_mode;	/* 0 (passive mode disabled) by default */
-module_param_named(passive_mode, i2400m_passive_mode, int, 0644);
-MODULE_PARM_DESC(passive_mode,
-		 "If true, the driver will not do any device setup "
-		 "and leave it up to user space, who must be properly "
-		 "setup.");
-
-
-/*
- * Return if a TLV is of a give type and size
- *
- * @tlv_hdr: pointer to the TLV
- * @tlv_type: type of the TLV we are looking for
- * @tlv_size: expected size of the TLV we are looking for (if -1,
- *            don't check the size). This includes the header
- * Returns: 0 if the TLV matches
- *          < 0 if it doesn't match at all
- *          > 0 total TLV + payload size, if the type matches, but not
- *              the size
- */
-static
-ssize_t i2400m_tlv_match(const struct i2400m_tlv_hdr *tlv,
-		     enum i2400m_tlv tlv_type, ssize_t tlv_size)
-{
-	if (le16_to_cpu(tlv->type) != tlv_type)	/* Not our type? skip */
-		return -1;
-	if (tlv_size != -1
-	    && le16_to_cpu(tlv->length) + sizeof(*tlv) != tlv_size) {
-		size_t size = le16_to_cpu(tlv->length) + sizeof(*tlv);
-		printk(KERN_WARNING "W: tlv type 0x%x mismatched because of "
-		       "size (got %zu vs %zd expected)\n",
-		       tlv_type, size, tlv_size);
-		return size;
-	}
-	return 0;
-}
-
-
-/*
- * Given a buffer of TLVs, iterate over them
- *
- * @i2400m: device instance
- * @tlv_buf: pointer to the beginning of the TLV buffer
- * @buf_size: buffer size in bytes
- * @tlv_pos: seek position; this is assumed to be a pointer returned
- *           by i2400m_tlv_buffer_walk() [and thus, validated]. The
- *           TLV returned will be the one following this one.
- *
- * Usage:
- *
- * tlv_itr = NULL;
- * while (tlv_itr = i2400m_tlv_buffer_walk(i2400m, buf, size, tlv_itr))  {
- *         ...
- *         // Do stuff with tlv_itr, DON'T MODIFY IT
- *         ...
- * }
- */
-static
-const struct i2400m_tlv_hdr *i2400m_tlv_buffer_walk(
-	struct i2400m *i2400m,
-	const void *tlv_buf, size_t buf_size,
-	const struct i2400m_tlv_hdr *tlv_pos)
-{
-	struct device *dev = i2400m_dev(i2400m);
-	const struct i2400m_tlv_hdr *tlv_top = tlv_buf + buf_size;
-	size_t offset, length, avail_size;
-	unsigned type;
-
-	if (tlv_pos == NULL)	/* Take the first one? */
-		tlv_pos = tlv_buf;
-	else			/* Nope, the next one */
-		tlv_pos = (void *) tlv_pos
-			+ le16_to_cpu(tlv_pos->length) + sizeof(*tlv_pos);
-	if (tlv_pos == tlv_top) {	/* buffer done */
-		tlv_pos = NULL;
-		goto error_beyond_end;
-	}
-	if (tlv_pos > tlv_top) {
-		tlv_pos = NULL;
-		WARN_ON(1);
-		goto error_beyond_end;
-	}
-	offset = (void *) tlv_pos - (void *) tlv_buf;
-	avail_size = buf_size - offset;
-	if (avail_size < sizeof(*tlv_pos)) {
-		dev_err(dev, "HW BUG? tlv_buf %p [%zu bytes], tlv @%zu: "
-			"short header\n", tlv_buf, buf_size, offset);
-		goto error_short_header;
-	}
-	type = le16_to_cpu(tlv_pos->type);
-	length = le16_to_cpu(tlv_pos->length);
-	if (avail_size < sizeof(*tlv_pos) + length) {
-		dev_err(dev, "HW BUG? tlv_buf %p [%zu bytes], "
-			"tlv type 0x%04x @%zu: "
-			"short data (%zu bytes vs %zu needed)\n",
-			tlv_buf, buf_size, type, offset, avail_size,
-			sizeof(*tlv_pos) + length);
-		goto error_short_header;
-	}
-error_short_header:
-error_beyond_end:
-	return tlv_pos;
-}
-
-
-/*
- * Find a TLV in a buffer of sequential TLVs
- *
- * @i2400m: device descriptor
- * @tlv_hdr: pointer to the first TLV in the sequence
- * @size: size of the buffer in bytes; all TLVs are assumed to fit
- *        fully in the buffer (otherwise we'll complain).
- * @tlv_type: type of the TLV we are looking for
- * @tlv_size: expected size of the TLV we are looking for (if -1,
- *            don't check the size). This includes the header
- *
- * Returns: NULL if the TLV is not found, otherwise a pointer to
- *          it. If the sizes don't match, an error is printed and NULL
- *          returned.
- */
-static
-const struct i2400m_tlv_hdr *i2400m_tlv_find(
-	struct i2400m *i2400m,
-	const struct i2400m_tlv_hdr *tlv_hdr, size_t size,
-	enum i2400m_tlv tlv_type, ssize_t tlv_size)
-{
-	ssize_t match;
-	struct device *dev = i2400m_dev(i2400m);
-	const struct i2400m_tlv_hdr *tlv = NULL;
-	while ((tlv = i2400m_tlv_buffer_walk(i2400m, tlv_hdr, size, tlv))) {
-		match = i2400m_tlv_match(tlv, tlv_type, tlv_size);
-		if (match == 0)		/* found it :) */
-			break;
-		if (match > 0)
-			dev_warn(dev, "TLV type 0x%04x found with size "
-				 "mismatch (%zu vs %zd needed)\n",
-				 tlv_type, match, tlv_size);
-	}
-	return tlv;
-}
-
-
-static const struct
-{
-	char *msg;
-	int errno;
-} ms_to_errno[I2400M_MS_MAX] = {
-	[I2400M_MS_DONE_OK] = { "", 0 },
-	[I2400M_MS_DONE_IN_PROGRESS] = { "", 0 },
-	[I2400M_MS_INVALID_OP] = { "invalid opcode", -ENOSYS },
-	[I2400M_MS_BAD_STATE] = { "invalid state", -EILSEQ },
-	[I2400M_MS_ILLEGAL_VALUE] = { "illegal value", -EINVAL },
-	[I2400M_MS_MISSING_PARAMS] = { "missing parameters", -ENOMSG },
-	[I2400M_MS_VERSION_ERROR] = { "bad version", -EIO },
-	[I2400M_MS_ACCESSIBILITY_ERROR] = { "accesibility error", -EIO },
-	[I2400M_MS_BUSY] = { "busy", -EBUSY },
-	[I2400M_MS_CORRUPTED_TLV] = { "corrupted TLV", -EILSEQ },
-	[I2400M_MS_UNINITIALIZED] = { "uninitialized", -EILSEQ },
-	[I2400M_MS_UNKNOWN_ERROR] = { "unknown error", -EIO },
-	[I2400M_MS_PRODUCTION_ERROR] = { "production error", -EIO },
-	[I2400M_MS_NO_RF] = { "no RF", -EIO },
-	[I2400M_MS_NOT_READY_FOR_POWERSAVE] =
-		{ "not ready for powersave", -EACCES },
-	[I2400M_MS_THERMAL_CRITICAL] = { "thermal critical", -EL3HLT },
-};
-
-
-/*
- * i2400m_msg_check_status - translate a message's status code
- *
- * @i2400m: device descriptor
- * @l3l4_hdr: message header
- * @strbuf: buffer to place a formatted error message (unless NULL).
- * @strbuf_size: max amount of available space; larger messages will
- * be truncated.
- *
- * Returns: errno code corresponding to the status code in @l3l4_hdr
- *          and a message in @strbuf describing the error.
- */
-int i2400m_msg_check_status(const struct i2400m_l3l4_hdr *l3l4_hdr,
-			    char *strbuf, size_t strbuf_size)
-{
-	int result;
-	enum i2400m_ms status = le16_to_cpu(l3l4_hdr->status);
-	const char *str;
-
-	if (status == 0)
-		return 0;
-	if (status >= ARRAY_SIZE(ms_to_errno)) {
-		str = "unknown status code";
-		result = -EBADR;
-	} else {
-		str = ms_to_errno[status].msg;
-		result = ms_to_errno[status].errno;
-	}
-	if (strbuf)
-		snprintf(strbuf, strbuf_size, "%s (%d)", str, status);
-	return result;
-}
-
-
-/*
- * Act on a TLV System State reported by the device
- *
- * @i2400m: device descriptor
- * @ss: validated System State TLV
- */
-static
-void i2400m_report_tlv_system_state(struct i2400m *i2400m,
-				    const struct i2400m_tlv_system_state *ss)
-{
-	struct device *dev = i2400m_dev(i2400m);
-	struct wimax_dev *wimax_dev = &i2400m->wimax_dev;
-	enum i2400m_system_state i2400m_state = le32_to_cpu(ss->state);
-
-	d_fnstart(3, dev, "(i2400m %p ss %p [%u])\n", i2400m, ss, i2400m_state);
-
-	if (i2400m->state != i2400m_state) {
-		i2400m->state = i2400m_state;
-		wake_up_all(&i2400m->state_wq);
-	}
-	switch (i2400m_state) {
-	case I2400M_SS_UNINITIALIZED:
-	case I2400M_SS_INIT:
-	case I2400M_SS_CONFIG:
-	case I2400M_SS_PRODUCTION:
-		wimax_state_change(wimax_dev, WIMAX_ST_UNINITIALIZED);
-		break;
-
-	case I2400M_SS_RF_OFF:
-	case I2400M_SS_RF_SHUTDOWN:
-		wimax_state_change(wimax_dev, WIMAX_ST_RADIO_OFF);
-		break;
-
-	case I2400M_SS_READY:
-	case I2400M_SS_STANDBY:
-	case I2400M_SS_SLEEPACTIVE:
-		wimax_state_change(wimax_dev, WIMAX_ST_READY);
-		break;
-
-	case I2400M_SS_CONNECTING:
-	case I2400M_SS_WIMAX_CONNECTED:
-		wimax_state_change(wimax_dev, WIMAX_ST_READY);
-		break;
-
-	case I2400M_SS_SCAN:
-	case I2400M_SS_OUT_OF_ZONE:
-		wimax_state_change(wimax_dev, WIMAX_ST_SCANNING);
-		break;
-
-	case I2400M_SS_IDLE:
-		d_printf(1, dev, "entering BS-negotiated idle mode\n");
-		fallthrough;
-	case I2400M_SS_DISCONNECTING:
-	case I2400M_SS_DATA_PATH_CONNECTED:
-		wimax_state_change(wimax_dev, WIMAX_ST_CONNECTED);
-		break;
-
-	default:
-		/* Huh? just in case, shut it down */
-		dev_err(dev, "HW BUG? unknown state %u: shutting down\n",
-			i2400m_state);
-		i2400m_reset(i2400m, I2400M_RT_WARM);
-		break;
-	}
-	d_fnend(3, dev, "(i2400m %p ss %p [%u]) = void\n",
-		i2400m, ss, i2400m_state);
-}
-
-
-/*
- * Parse and act on a TLV Media Status sent by the device
- *
- * @i2400m: device descriptor
- * @ms: validated Media Status TLV
- *
- * This will set the carrier up on down based on the device's link
- * report. This is done asides of what the WiMAX stack does based on
- * the device's state as sometimes we need to do a link-renew (the BS
- * wants us to renew a DHCP lease, for example).
- *
- * In fact, doc says that every time we get a link-up, we should do a
- * DHCP negotiation...
- */
-static
-void i2400m_report_tlv_media_status(struct i2400m *i2400m,
-				    const struct i2400m_tlv_media_status *ms)
-{
-	struct device *dev = i2400m_dev(i2400m);
-	struct wimax_dev *wimax_dev = &i2400m->wimax_dev;
-	struct net_device *net_dev = wimax_dev->net_dev;
-	enum i2400m_media_status status = le32_to_cpu(ms->media_status);
-
-	d_fnstart(3, dev, "(i2400m %p ms %p [%u])\n", i2400m, ms, status);
-
-	switch (status) {
-	case I2400M_MEDIA_STATUS_LINK_UP:
-		netif_carrier_on(net_dev);
-		break;
-	case I2400M_MEDIA_STATUS_LINK_DOWN:
-		netif_carrier_off(net_dev);
-		break;
-	/*
-	 * This is the network telling us we need to retrain the DHCP
-	 * lease -- so far, we are trusting the WiMAX Network Service
-	 * in user space to pick this up and poke the DHCP client.
-	 */
-	case I2400M_MEDIA_STATUS_LINK_RENEW:
-		netif_carrier_on(net_dev);
-		break;
-	default:
-		dev_err(dev, "HW BUG? unknown media status %u\n",
-			status);
-	}
-	d_fnend(3, dev, "(i2400m %p ms %p [%u]) = void\n",
-		i2400m, ms, status);
-}
-
-
-/*
- * Process a TLV from a 'state report'
- *
- * @i2400m: device descriptor
- * @tlv: pointer to the TLV header; it has been already validated for
- *     consistent size.
- * @tag: for error messages
- *
- * Act on the TLVs from a 'state report'.
- */
-static
-void i2400m_report_state_parse_tlv(struct i2400m *i2400m,
-				   const struct i2400m_tlv_hdr *tlv,
-				   const char *tag)
-{
-	struct device *dev = i2400m_dev(i2400m);
-	const struct i2400m_tlv_media_status *ms;
-	const struct i2400m_tlv_system_state *ss;
-	const struct i2400m_tlv_rf_switches_status *rfss;
-
-	if (0 == i2400m_tlv_match(tlv, I2400M_TLV_SYSTEM_STATE, sizeof(*ss))) {
-		ss = container_of(tlv, typeof(*ss), hdr);
-		d_printf(2, dev, "%s: system state TLV "
-			 "found (0x%04x), state 0x%08x\n",
-			 tag, I2400M_TLV_SYSTEM_STATE,
-			 le32_to_cpu(ss->state));
-		i2400m_report_tlv_system_state(i2400m, ss);
-	}
-	if (0 == i2400m_tlv_match(tlv, I2400M_TLV_RF_STATUS, sizeof(*rfss))) {
-		rfss = container_of(tlv, typeof(*rfss), hdr);
-		d_printf(2, dev, "%s: RF status TLV "
-			 "found (0x%04x), sw 0x%02x hw 0x%02x\n",
-			 tag, I2400M_TLV_RF_STATUS,
-			 le32_to_cpu(rfss->sw_rf_switch),
-			 le32_to_cpu(rfss->hw_rf_switch));
-		i2400m_report_tlv_rf_switches_status(i2400m, rfss);
-	}
-	if (0 == i2400m_tlv_match(tlv, I2400M_TLV_MEDIA_STATUS, sizeof(*ms))) {
-		ms = container_of(tlv, typeof(*ms), hdr);
-		d_printf(2, dev, "%s: Media Status TLV: %u\n",
-			 tag, le32_to_cpu(ms->media_status));
-		i2400m_report_tlv_media_status(i2400m, ms);
-	}
-}
-
-
-/*
- * Parse a 'state report' and extract information
- *
- * @i2400m: device descriptor
- * @l3l4_hdr: pointer to message; it has been already validated for
- *            consistent size.
- * @size: size of the message (header + payload). The header length
- *        declaration is assumed to be congruent with @size (as in
- *        sizeof(*l3l4_hdr) + l3l4_hdr->length == size)
- *
- * Walk over the TLVs in a report state and act on them.
- */
-static
-void i2400m_report_state_hook(struct i2400m *i2400m,
-			      const struct i2400m_l3l4_hdr *l3l4_hdr,
-			      size_t size, const char *tag)
-{
-	struct device *dev = i2400m_dev(i2400m);
-	const struct i2400m_tlv_hdr *tlv;
-	size_t tlv_size = le16_to_cpu(l3l4_hdr->length);
-
-	d_fnstart(4, dev, "(i2400m %p, l3l4_hdr %p, size %zu, %s)\n",
-		  i2400m, l3l4_hdr, size, tag);
-	tlv = NULL;
-
-	while ((tlv = i2400m_tlv_buffer_walk(i2400m, &l3l4_hdr->pl,
-					     tlv_size, tlv)))
-		i2400m_report_state_parse_tlv(i2400m, tlv, tag);
-	d_fnend(4, dev, "(i2400m %p, l3l4_hdr %p, size %zu, %s) = void\n",
-		i2400m, l3l4_hdr, size, tag);
-}
-
-
-/*
- * i2400m_report_hook - (maybe) act on a report
- *
- * @i2400m: device descriptor
- * @l3l4_hdr: pointer to message; it has been already validated for
- *            consistent size.
- * @size: size of the message (header + payload). The header length
- *        declaration is assumed to be congruent with @size (as in
- *        sizeof(*l3l4_hdr) + l3l4_hdr->length == size)
- *
- * Extract information we might need (like carrien on/off) from a
- * device report.
- */
-void i2400m_report_hook(struct i2400m *i2400m,
-			const struct i2400m_l3l4_hdr *l3l4_hdr, size_t size)
-{
-	struct device *dev = i2400m_dev(i2400m);
-	unsigned msg_type;
-
-	d_fnstart(3, dev, "(i2400m %p l3l4_hdr %p size %zu)\n",
-		  i2400m, l3l4_hdr, size);
-	/* Chew on the message, we might need some information from
-	 * here */
-	msg_type = le16_to_cpu(l3l4_hdr->type);
-	switch (msg_type) {
-	case I2400M_MT_REPORT_STATE:	/* carrier detection... */
-		i2400m_report_state_hook(i2400m,
-					 l3l4_hdr, size, "REPORT STATE");
-		break;
-	/* If the device is ready for power save, then ask it to do
-	 * it. */
-	case I2400M_MT_REPORT_POWERSAVE_READY:	/* zzzzz */
-		if (l3l4_hdr->status == cpu_to_le16(I2400M_MS_DONE_OK)) {
-			if (i2400m_power_save_disabled)
-				d_printf(1, dev, "ready for powersave, "
-					 "not requesting (disabled by module "
-					 "parameter)\n");
-			else {
-				d_printf(1, dev, "ready for powersave, "
-					 "requesting\n");
-				i2400m_cmd_enter_powersave(i2400m);
-			}
-		}
-		break;
-	}
-	d_fnend(3, dev, "(i2400m %p l3l4_hdr %p size %zu) = void\n",
-		i2400m, l3l4_hdr, size);
-}
-
-
-/*
- * i2400m_msg_ack_hook - process cmd/set/get ack for internal status
- *
- * @i2400m: device descriptor
- * @l3l4_hdr: pointer to message; it has been already validated for
- *            consistent size.
- * @size: size of the message
- *
- * Extract information we might need from acks to commands and act on
- * it. This is akin to i2400m_report_hook(). Note most of this
- * processing should be done in the function that calls the
- * command. This is here for some cases where it can't happen...
- */
-static void i2400m_msg_ack_hook(struct i2400m *i2400m,
-				 const struct i2400m_l3l4_hdr *l3l4_hdr,
-				 size_t size)
-{
-	int result;
-	struct device *dev = i2400m_dev(i2400m);
-	unsigned int ack_type;
-	char strerr[32];
-
-	/* Chew on the message, we might need some information from
-	 * here */
-	ack_type = le16_to_cpu(l3l4_hdr->type);
-	switch (ack_type) {
-	case I2400M_MT_CMD_ENTER_POWERSAVE:
-		/* This is just left here for the sake of example, as
-		 * the processing is done somewhere else. */
-		if (0) {
-			result = i2400m_msg_check_status(
-				l3l4_hdr, strerr, sizeof(strerr));
-			if (result >= 0)
-				d_printf(1, dev, "ready for power save: %zd\n",
-					 size);
-		}
-		break;
-	}
-}
-
-
-/*
- * i2400m_msg_size_check() - verify message size and header are congruent
- *
- * It is ok if the total message size is larger than the expected
- * size, as there can be padding.
- */
-int i2400m_msg_size_check(struct i2400m *i2400m,
-			  const struct i2400m_l3l4_hdr *l3l4_hdr,
-			  size_t msg_size)
-{
-	int result;
-	struct device *dev = i2400m_dev(i2400m);
-	size_t expected_size;
-	d_fnstart(4, dev, "(i2400m %p l3l4_hdr %p msg_size %zu)\n",
-		  i2400m, l3l4_hdr, msg_size);
-	if (msg_size < sizeof(*l3l4_hdr)) {
-		dev_err(dev, "bad size for message header "
-			"(expected at least %zu, got %zu)\n",
-			(size_t) sizeof(*l3l4_hdr), msg_size);
-		result = -EIO;
-		goto error_hdr_size;
-	}
-	expected_size = le16_to_cpu(l3l4_hdr->length) + sizeof(*l3l4_hdr);
-	if (msg_size < expected_size) {
-		dev_err(dev, "bad size for message code 0x%04x (expected %zu, "
-			"got %zu)\n", le16_to_cpu(l3l4_hdr->type),
-			expected_size, msg_size);
-		result = -EIO;
-	} else
-		result = 0;
-error_hdr_size:
-	d_fnend(4, dev,
-		"(i2400m %p l3l4_hdr %p msg_size %zu) = %d\n",
-		i2400m, l3l4_hdr, msg_size, result);
-	return result;
-}
-
-
-
-/*
- * Cancel a wait for a command ACK
- *
- * @i2400m: device descriptor
- * @code: [negative] errno code to cancel with (don't use
- *     -EINPROGRESS)
- *
- * If there is an ack already filled out, free it.
- */
-void i2400m_msg_to_dev_cancel_wait(struct i2400m *i2400m, int code)
-{
-	struct sk_buff *ack_skb;
-	unsigned long flags;
-
-	spin_lock_irqsave(&i2400m->rx_lock, flags);
-	ack_skb = i2400m->ack_skb;
-	if (ack_skb && !IS_ERR(ack_skb))
-		kfree_skb(ack_skb);
-	i2400m->ack_skb = ERR_PTR(code);
-	spin_unlock_irqrestore(&i2400m->rx_lock, flags);
-}
-
-
-/**
- * i2400m_msg_to_dev - Send a control message to the device and get a response
- *
- * @i2400m: device descriptor
- *
- * @buf: pointer to the buffer containing the message to be sent; it
- *           has to start with a &struct i2400M_l3l4_hdr and then
- *           followed by the payload. Once this function returns, the
- *           buffer can be reused.
- *
- * @buf_len: buffer size
- *
- * Returns:
- *
- * Pointer to skb containing the ack message. You need to check the
- * pointer with IS_ERR(), as it might be an error code. Error codes
- * could happen because:
- *
- *  - the message wasn't formatted correctly
- *  - couldn't send the message
- *  - failed waiting for a response
- *  - the ack message wasn't formatted correctly
- *
- * The returned skb has been allocated with wimax_msg_to_user_alloc(),
- * it contains the response in a netlink attribute and is ready to be
- * passed up to user space with wimax_msg_to_user_send(). To access
- * the payload and its length, use wimax_msg_{data,len}() on the skb.
- *
- * The skb has to be freed with kfree_skb() once done.
- *
- * Description:
- *
- * This function delivers a message/command to the device and waits
- * for an ack to be received. The format is described in
- * linux/wimax/i2400m.h. In summary, a command/get/set is followed by an
- * ack.
- *
- * This function will not check the ack status, that's left up to the
- * caller.  Once done with the ack skb, it has to be kfree_skb()ed.
- *
- * The i2400m handles only one message at the same time, thus we need
- * the mutex to exclude other players.
- *
- * We write the message and then wait for an answer to come back. The
- * RX path intercepts control messages and handles them in
- * i2400m_rx_ctl(). Reports (notifications) are (maybe) processed
- * locally and then forwarded (as needed) to user space on the WiMAX
- * stack message pipe. Acks are saved and passed back to us through an
- * skb in i2400m->ack_skb which is ready to be given to generic
- * netlink if need be.
- */
-struct sk_buff *i2400m_msg_to_dev(struct i2400m *i2400m,
-				  const void *buf, size_t buf_len)
-{
-	int result;
-	struct device *dev = i2400m_dev(i2400m);
-	const struct i2400m_l3l4_hdr *msg_l3l4_hdr;
-	struct sk_buff *ack_skb;
-	const struct i2400m_l3l4_hdr *ack_l3l4_hdr;
-	size_t ack_len;
-	int ack_timeout;
-	unsigned msg_type;
-	unsigned long flags;
-
-	d_fnstart(3, dev, "(i2400m %p buf %p len %zu)\n",
-		  i2400m, buf, buf_len);
-
-	rmb();		/* Make sure we see what i2400m_dev_reset_handle() */
-	if (i2400m->boot_mode)
-		return ERR_PTR(-EL3RST);
-
-	msg_l3l4_hdr = buf;
-	/* Check msg & payload consistency */
-	result = i2400m_msg_size_check(i2400m, msg_l3l4_hdr, buf_len);
-	if (result < 0)
-		goto error_bad_msg;
-	msg_type = le16_to_cpu(msg_l3l4_hdr->type);
-	d_printf(1, dev, "CMD/GET/SET 0x%04x %zu bytes\n",
-		 msg_type, buf_len);
-	d_dump(2, dev, buf, buf_len);
-
-	/* Setup the completion, ack_skb ("we are waiting") and send
-	 * the message to the device */
-	mutex_lock(&i2400m->msg_mutex);
-	spin_lock_irqsave(&i2400m->rx_lock, flags);
-	i2400m->ack_skb = ERR_PTR(-EINPROGRESS);
-	spin_unlock_irqrestore(&i2400m->rx_lock, flags);
-	init_completion(&i2400m->msg_completion);
-	result = i2400m_tx(i2400m, buf, buf_len, I2400M_PT_CTRL);
-	if (result < 0) {
-		dev_err(dev, "can't send message 0x%04x: %d\n",
-			le16_to_cpu(msg_l3l4_hdr->type), result);
-		goto error_tx;
-	}
-
-	/* Some commands take longer to execute because of crypto ops,
-	 * so we give them some more leeway on timeout */
-	switch (msg_type) {
-	case I2400M_MT_GET_TLS_OPERATION_RESULT:
-	case I2400M_MT_CMD_SEND_EAP_RESPONSE:
-		ack_timeout = 5 * HZ;
-		break;
-	default:
-		ack_timeout = HZ;
-	}
-
-	if (unlikely(i2400m->trace_msg_from_user))
-		wimax_msg(&i2400m->wimax_dev, "echo", buf, buf_len, GFP_KERNEL);
-	/* The RX path in rx.c will put any response for this message
-	 * in i2400m->ack_skb and wake us up. If we cancel the wait,
-	 * we need to change the value of i2400m->ack_skb to something
-	 * not -EINPROGRESS so RX knows there is no one waiting. */
-	result = wait_for_completion_interruptible_timeout(
-		&i2400m->msg_completion, ack_timeout);
-	if (result == 0) {
-		dev_err(dev, "timeout waiting for reply to message 0x%04x\n",
-			msg_type);
-		result = -ETIMEDOUT;
-		i2400m_msg_to_dev_cancel_wait(i2400m, result);
-		goto error_wait_for_completion;
-	} else if (result < 0) {
-		dev_err(dev, "error waiting for reply to message 0x%04x: %d\n",
-			msg_type, result);
-		i2400m_msg_to_dev_cancel_wait(i2400m, result);
-		goto error_wait_for_completion;
-	}
-
-	/* Pull out the ack data from i2400m->ack_skb -- see if it is
-	 * an error and act accordingly */
-	spin_lock_irqsave(&i2400m->rx_lock, flags);
-	ack_skb = i2400m->ack_skb;
-	if (IS_ERR(ack_skb))
-		result = PTR_ERR(ack_skb);
-	else
-		result = 0;
-	i2400m->ack_skb = NULL;
-	spin_unlock_irqrestore(&i2400m->rx_lock, flags);
-	if (result < 0)
-		goto error_ack_status;
-	ack_l3l4_hdr = wimax_msg_data_len(ack_skb, &ack_len);
-
-	/* Check the ack and deliver it if it is ok */
-	if (unlikely(i2400m->trace_msg_from_user))
-		wimax_msg(&i2400m->wimax_dev, "echo",
-			  ack_l3l4_hdr, ack_len, GFP_KERNEL);
-	result = i2400m_msg_size_check(i2400m, ack_l3l4_hdr, ack_len);
-	if (result < 0) {
-		dev_err(dev, "HW BUG? reply to message 0x%04x: %d\n",
-			msg_type, result);
-		goto error_bad_ack_len;
-	}
-	if (msg_type != le16_to_cpu(ack_l3l4_hdr->type)) {
-		dev_err(dev, "HW BUG? bad reply 0x%04x to message 0x%04x\n",
-			le16_to_cpu(ack_l3l4_hdr->type), msg_type);
-		result = -EIO;
-		goto error_bad_ack_type;
-	}
-	i2400m_msg_ack_hook(i2400m, ack_l3l4_hdr, ack_len);
-	mutex_unlock(&i2400m->msg_mutex);
-	d_fnend(3, dev, "(i2400m %p buf %p len %zu) = %p\n",
-		i2400m, buf, buf_len, ack_skb);
-	return ack_skb;
-
-error_bad_ack_type:
-error_bad_ack_len:
-	kfree_skb(ack_skb);
-error_ack_status:
-error_wait_for_completion:
-error_tx:
-	mutex_unlock(&i2400m->msg_mutex);
-error_bad_msg:
-	d_fnend(3, dev, "(i2400m %p buf %p len %zu) = %d\n",
-		i2400m, buf, buf_len, result);
-	return ERR_PTR(result);
-}
-
-
-/*
- * Definitions for the Enter Power Save command
- *
- * The Enter Power Save command requests the device to go into power
- * saving mode. The device will ack or nak the command depending on it
- * being ready for it. If it acks, we tell the USB subsystem to
- *
- * As well, the device might request to go into power saving mode by
- * sending a report (REPORT_POWERSAVE_READY), in which case, we issue
- * this command. The hookups in the RX coder allow
- */
-enum {
-	I2400M_WAKEUP_ENABLED  = 0x01,
-	I2400M_WAKEUP_DISABLED = 0x02,
-	I2400M_TLV_TYPE_WAKEUP_MODE = 144,
-};
-
-struct i2400m_cmd_enter_power_save {
-	struct i2400m_l3l4_hdr hdr;
-	struct i2400m_tlv_hdr tlv;
-	__le32 val;
-} __packed;
-
-
-/*
- * Request entering power save
- *
- * This command is (mainly) executed when the device indicates that it
- * is ready to go into powersave mode via a REPORT_POWERSAVE_READY.
- */
-int i2400m_cmd_enter_powersave(struct i2400m *i2400m)
-{
-	int result;
-	struct device *dev = i2400m_dev(i2400m);
-	struct sk_buff *ack_skb;
-	struct i2400m_cmd_enter_power_save *cmd;
-	char strerr[32];
-
-	result = -ENOMEM;
-	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
-	if (cmd == NULL)
-		goto error_alloc;
-	cmd->hdr.type = cpu_to_le16(I2400M_MT_CMD_ENTER_POWERSAVE);
-	cmd->hdr.length = cpu_to_le16(sizeof(*cmd) - sizeof(cmd->hdr));
-	cmd->hdr.version = cpu_to_le16(I2400M_L3L4_VERSION);
-	cmd->tlv.type = cpu_to_le16(I2400M_TLV_TYPE_WAKEUP_MODE);
-	cmd->tlv.length = cpu_to_le16(sizeof(cmd->val));
-	cmd->val = cpu_to_le32(I2400M_WAKEUP_ENABLED);
-
-	ack_skb = i2400m_msg_to_dev(i2400m, cmd, sizeof(*cmd));
-	result = PTR_ERR(ack_skb);
-	if (IS_ERR(ack_skb)) {
-		dev_err(dev, "Failed to issue 'Enter power save' command: %d\n",
-			result);
-		goto error_msg_to_dev;
-	}
-	result = i2400m_msg_check_status(wimax_msg_data(ack_skb),
-					 strerr, sizeof(strerr));
-	if (result == -EACCES)
-		d_printf(1, dev, "Cannot enter power save mode\n");
-	else if (result < 0)
-		dev_err(dev, "'Enter power save' (0x%04x) command failed: "
-			"%d - %s\n", I2400M_MT_CMD_ENTER_POWERSAVE,
-			result, strerr);
-	else
-		d_printf(1, dev, "device ready to power save\n");
-	kfree_skb(ack_skb);
-error_msg_to_dev:
-	kfree(cmd);
-error_alloc:
-	return result;
-}
-EXPORT_SYMBOL_GPL(i2400m_cmd_enter_powersave);
-
-
-/*
- * Definitions for getting device information
- */
-enum {
-	I2400M_TLV_DETAILED_DEVICE_INFO = 140
-};
-
-/**
- * i2400m_get_device_info - Query the device for detailed device information
- *
- * @i2400m: device descriptor
- *
- * Returns: an skb whose skb->data points to a 'struct
- *    i2400m_tlv_detailed_device_info'. When done, kfree_skb() it. The
- *    skb is *guaranteed* to contain the whole TLV data structure.
- *
- *    On error, IS_ERR(skb) is true and ERR_PTR(skb) is the error
- *    code.
- */
-struct sk_buff *i2400m_get_device_info(struct i2400m *i2400m)
-{
-	int result;
-	struct device *dev = i2400m_dev(i2400m);
-	struct sk_buff *ack_skb;
-	struct i2400m_l3l4_hdr *cmd;
-	const struct i2400m_l3l4_hdr *ack;
-	size_t ack_len;
-	const struct i2400m_tlv_hdr *tlv;
-	const struct i2400m_tlv_detailed_device_info *ddi;
-	char strerr[32];
-
-	ack_skb = ERR_PTR(-ENOMEM);
-	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
-	if (cmd == NULL)
-		goto error_alloc;
-	cmd->type = cpu_to_le16(I2400M_MT_GET_DEVICE_INFO);
-	cmd->length = 0;
-	cmd->version = cpu_to_le16(I2400M_L3L4_VERSION);
-
-	ack_skb = i2400m_msg_to_dev(i2400m, cmd, sizeof(*cmd));
-	if (IS_ERR(ack_skb)) {
-		dev_err(dev, "Failed to issue 'get device info' command: %ld\n",
-			PTR_ERR(ack_skb));
-		goto error_msg_to_dev;
-	}
-	ack = wimax_msg_data_len(ack_skb, &ack_len);
-	result = i2400m_msg_check_status(ack, strerr, sizeof(strerr));
-	if (result < 0) {
-		dev_err(dev, "'get device info' (0x%04x) command failed: "
-			"%d - %s\n", I2400M_MT_GET_DEVICE_INFO, result,
-			strerr);
-		goto error_cmd_failed;
-	}
-	tlv = i2400m_tlv_find(i2400m, ack->pl, ack_len - sizeof(*ack),
-			      I2400M_TLV_DETAILED_DEVICE_INFO, sizeof(*ddi));
-	if (tlv == NULL) {
-		dev_err(dev, "GET DEVICE INFO: "
-			"detailed device info TLV not found (0x%04x)\n",
-			I2400M_TLV_DETAILED_DEVICE_INFO);
-		result = -EIO;
-		goto error_no_tlv;
-	}
-	skb_pull(ack_skb, (void *) tlv - (void *) ack_skb->data);
-error_msg_to_dev:
-	kfree(cmd);
-error_alloc:
-	return ack_skb;
-
-error_no_tlv:
-error_cmd_failed:
-	kfree_skb(ack_skb);
-	kfree(cmd);
-	return ERR_PTR(result);
-}
-
-
-/* Firmware interface versions we support */
-enum {
-	I2400M_HDIv_MAJOR = 9,
-	I2400M_HDIv_MINOR = 1,
-	I2400M_HDIv_MINOR_2 = 2,
-};
-
-
-/**
- * i2400m_firmware_check - check firmware versions are compatible with
- * the driver
- *
- * @i2400m: device descriptor
- *
- * Returns: 0 if ok, < 0 errno code an error and a message in the
- *    kernel log.
- *
- * Long function, but quite simple; first chunk launches the command
- * and double checks the reply for the right TLV. Then we process the
- * TLV (where the meat is).
- *
- * Once we process the TLV that gives us the firmware's interface
- * version, we encode it and save it in i2400m->fw_version for future
- * reference.
- */
-int i2400m_firmware_check(struct i2400m *i2400m)
-{
-	int result;
-	struct device *dev = i2400m_dev(i2400m);
-	struct sk_buff *ack_skb;
-	struct i2400m_l3l4_hdr *cmd;
-	const struct i2400m_l3l4_hdr *ack;
-	size_t ack_len;
-	const struct i2400m_tlv_hdr *tlv;
-	const struct i2400m_tlv_l4_message_versions *l4mv;
-	char strerr[32];
-	unsigned major, minor, branch;
-
-	result = -ENOMEM;
-	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
-	if (cmd == NULL)
-		goto error_alloc;
-	cmd->type = cpu_to_le16(I2400M_MT_GET_LM_VERSION);
-	cmd->length = 0;
-	cmd->version = cpu_to_le16(I2400M_L3L4_VERSION);
-
-	ack_skb = i2400m_msg_to_dev(i2400m, cmd, sizeof(*cmd));
-	if (IS_ERR(ack_skb)) {
-		result = PTR_ERR(ack_skb);
-		dev_err(dev, "Failed to issue 'get lm version' command: %-d\n",
-			result);
-		goto error_msg_to_dev;
-	}
-	ack = wimax_msg_data_len(ack_skb, &ack_len);
-	result = i2400m_msg_check_status(ack, strerr, sizeof(strerr));
-	if (result < 0) {
-		dev_err(dev, "'get lm version' (0x%04x) command failed: "
-			"%d - %s\n", I2400M_MT_GET_LM_VERSION, result,
-			strerr);
-		goto error_cmd_failed;
-	}
-	tlv = i2400m_tlv_find(i2400m, ack->pl, ack_len - sizeof(*ack),
-			      I2400M_TLV_L4_MESSAGE_VERSIONS, sizeof(*l4mv));
-	if (tlv == NULL) {
-		dev_err(dev, "get lm version: TLV not found (0x%04x)\n",
-			I2400M_TLV_L4_MESSAGE_VERSIONS);
-		result = -EIO;
-		goto error_no_tlv;
-	}
-	l4mv = container_of(tlv, typeof(*l4mv), hdr);
-	major = le16_to_cpu(l4mv->major);
-	minor = le16_to_cpu(l4mv->minor);
-	branch = le16_to_cpu(l4mv->branch);
-	result = -EINVAL;
-	if (major != I2400M_HDIv_MAJOR) {
-		dev_err(dev, "unsupported major fw version "
-			"%u.%u.%u\n", major, minor, branch);
-		goto error_bad_major;
-	}
-	result = 0;
-	if (minor > I2400M_HDIv_MINOR_2 || minor < I2400M_HDIv_MINOR)
-		dev_warn(dev, "untested minor fw version %u.%u.%u\n",
-			 major, minor, branch);
-	/* Yes, we ignore the branch -- we don't have to track it */
-	i2400m->fw_version = major << 16 | minor;
-	dev_info(dev, "firmware interface version %u.%u.%u\n",
-		 major, minor, branch);
-error_bad_major:
-error_no_tlv:
-error_cmd_failed:
-	kfree_skb(ack_skb);
-error_msg_to_dev:
-	kfree(cmd);
-error_alloc:
-	return result;
-}
-
-
-/*
- * Send an DoExitIdle command to the device to ask it to go out of
- * basestation-idle mode.
- *
- * @i2400m: device descriptor
- *
- * This starts a renegotiation with the basestation that might involve
- * another crypto handshake with user space.
- *
- * Returns: 0 if ok, < 0 errno code on error.
- */
-int i2400m_cmd_exit_idle(struct i2400m *i2400m)
-{
-	int result;
-	struct device *dev = i2400m_dev(i2400m);
-	struct sk_buff *ack_skb;
-	struct i2400m_l3l4_hdr *cmd;
-	char strerr[32];
-
-	result = -ENOMEM;
-	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
-	if (cmd == NULL)
-		goto error_alloc;
-	cmd->type = cpu_to_le16(I2400M_MT_CMD_EXIT_IDLE);
-	cmd->length = 0;
-	cmd->version = cpu_to_le16(I2400M_L3L4_VERSION);
-
-	ack_skb = i2400m_msg_to_dev(i2400m, cmd, sizeof(*cmd));
-	result = PTR_ERR(ack_skb);
-	if (IS_ERR(ack_skb)) {
-		dev_err(dev, "Failed to issue 'exit idle' command: %d\n",
-			result);
-		goto error_msg_to_dev;
-	}
-	result = i2400m_msg_check_status(wimax_msg_data(ack_skb),
-					 strerr, sizeof(strerr));
-	kfree_skb(ack_skb);
-error_msg_to_dev:
-	kfree(cmd);
-error_alloc:
-	return result;
-
-}
-
-
-/*
- * Query the device for its state, update the WiMAX stack's idea of it
- *
- * @i2400m: device descriptor
- *
- * Returns: 0 if ok, < 0 errno code on error.
- *
- * Executes a 'Get State' command and parses the returned
- * TLVs.
- *
- * Because this is almost identical to a 'Report State', we use
- * i2400m_report_state_hook() to parse the answer. This will set the
- * carrier state, as well as the RF Kill switches state.
- */
-static int i2400m_cmd_get_state(struct i2400m *i2400m)
-{
-	int result;
-	struct device *dev = i2400m_dev(i2400m);
-	struct sk_buff *ack_skb;
-	struct i2400m_l3l4_hdr *cmd;
-	const struct i2400m_l3l4_hdr *ack;
-	size_t ack_len;
-	char strerr[32];
-
-	result = -ENOMEM;
-	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
-	if (cmd == NULL)
-		goto error_alloc;
-	cmd->type = cpu_to_le16(I2400M_MT_GET_STATE);
-	cmd->length = 0;
-	cmd->version = cpu_to_le16(I2400M_L3L4_VERSION);
-
-	ack_skb = i2400m_msg_to_dev(i2400m, cmd, sizeof(*cmd));
-	if (IS_ERR(ack_skb)) {
-		dev_err(dev, "Failed to issue 'get state' command: %ld\n",
-			PTR_ERR(ack_skb));
-		result = PTR_ERR(ack_skb);
-		goto error_msg_to_dev;
-	}
-	ack = wimax_msg_data_len(ack_skb, &ack_len);
-	result = i2400m_msg_check_status(ack, strerr, sizeof(strerr));
-	if (result < 0) {
-		dev_err(dev, "'get state' (0x%04x) command failed: "
-			"%d - %s\n", I2400M_MT_GET_STATE, result, strerr);
-		goto error_cmd_failed;
-	}
-	i2400m_report_state_hook(i2400m, ack, ack_len - sizeof(*ack),
-				 "GET STATE");
-	result = 0;
-	kfree_skb(ack_skb);
-error_cmd_failed:
-error_msg_to_dev:
-	kfree(cmd);
-error_alloc:
-	return result;
-}
-
-/**
- * Set basic configuration settings
- *
- * @i2400m: device descriptor
- * @args: array of pointers to the TLV headers to send for
- *     configuration (each followed by its payload).
- *     TLV headers and payloads must be properly initialized, with the
- *     right endianess (LE).
- * @arg_size: number of pointers in the @args array
- */
-static int i2400m_set_init_config(struct i2400m *i2400m,
-				  const struct i2400m_tlv_hdr **arg,
-				  size_t args)
-{
-	int result;
-	struct device *dev = i2400m_dev(i2400m);
-	struct sk_buff *ack_skb;
-	struct i2400m_l3l4_hdr *cmd;
-	char strerr[32];
-	unsigned argc, argsize, tlv_size;
-	const struct i2400m_tlv_hdr *tlv_hdr;
-	void *buf, *itr;
-
-	d_fnstart(3, dev, "(i2400m %p arg %p args %zu)\n", i2400m, arg, args);
-	result = 0;
-	if (args == 0)
-		goto none;
-	/* Compute the size of all the TLVs, so we can alloc a
-	 * contiguous command block to copy them. */
-	argsize = 0;
-	for (argc = 0; argc < args; argc++) {
-		tlv_hdr = arg[argc];
-		argsize += sizeof(*tlv_hdr) + le16_to_cpu(tlv_hdr->length);
-	}
-	WARN_ON(argc >= 9);	/* As per hw spec */
-
-	/* Alloc the space for the command and TLVs*/
-	result = -ENOMEM;
-	buf = kzalloc(sizeof(*cmd) + argsize, GFP_KERNEL);
-	if (buf == NULL)
-		goto error_alloc;
-	cmd = buf;
-	cmd->type = cpu_to_le16(I2400M_MT_SET_INIT_CONFIG);
-	cmd->length = cpu_to_le16(argsize);
-	cmd->version = cpu_to_le16(I2400M_L3L4_VERSION);
-
-	/* Copy the TLVs */
-	itr = buf + sizeof(*cmd);
-	for (argc = 0; argc < args; argc++) {
-		tlv_hdr = arg[argc];
-		tlv_size = sizeof(*tlv_hdr) + le16_to_cpu(tlv_hdr->length);
-		memcpy(itr, tlv_hdr, tlv_size);
-		itr += tlv_size;
-	}
-
-	/* Send the message! */
-	ack_skb = i2400m_msg_to_dev(i2400m, buf, sizeof(*cmd) + argsize);
-	result = PTR_ERR(ack_skb);
-	if (IS_ERR(ack_skb)) {
-		dev_err(dev, "Failed to issue 'init config' command: %d\n",
-			result);
-
-		goto error_msg_to_dev;
-	}
-	result = i2400m_msg_check_status(wimax_msg_data(ack_skb),
-					 strerr, sizeof(strerr));
-	if (result < 0)
-		dev_err(dev, "'init config' (0x%04x) command failed: %d - %s\n",
-			I2400M_MT_SET_INIT_CONFIG, result, strerr);
-	kfree_skb(ack_skb);
-error_msg_to_dev:
-	kfree(buf);
-error_alloc:
-none:
-	d_fnend(3, dev, "(i2400m %p arg %p args %zu) = %d\n",
-		i2400m, arg, args, result);
-	return result;
-
-}
-
-/**
- * i2400m_set_idle_timeout - Set the device's idle mode timeout
- *
- * @i2400m: i2400m device descriptor
- *
- * @msecs: milliseconds for the timeout to enter idle mode. Between
- *     100 to 300000 (5m); 0 to disable. In increments of 100.
- *
- * After this @msecs of the link being idle (no data being sent or
- * received), the device will negotiate with the basestation entering
- * idle mode for saving power. The connection is maintained, but
- * getting out of it (done in tx.c) will require some negotiation,
- * possible crypto re-handshake and a possible DHCP re-lease.
- *
- * Only available if fw_version >= 0x00090002.
- *
- * Returns: 0 if ok, < 0 errno code on error.
- */
-int i2400m_set_idle_timeout(struct i2400m *i2400m, unsigned msecs)
-{
-	int result;
-	struct device *dev = i2400m_dev(i2400m);
-	struct sk_buff *ack_skb;
-	struct {
-		struct i2400m_l3l4_hdr hdr;
-		struct i2400m_tlv_config_idle_timeout cit;
-	} *cmd;
-	const struct i2400m_l3l4_hdr *ack;
-	size_t ack_len;
-	char strerr[32];
-
-	result = -ENOSYS;
-	if (i2400m_le_v1_3(i2400m))
-		goto error_alloc;
-	result = -ENOMEM;
-	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
-	if (cmd == NULL)
-		goto error_alloc;
-	cmd->hdr.type = cpu_to_le16(I2400M_MT_GET_STATE);
-	cmd->hdr.length = cpu_to_le16(sizeof(*cmd) - sizeof(cmd->hdr));
-	cmd->hdr.version = cpu_to_le16(I2400M_L3L4_VERSION);
-
-	cmd->cit.hdr.type =
-		cpu_to_le16(I2400M_TLV_CONFIG_IDLE_TIMEOUT);
-	cmd->cit.hdr.length = cpu_to_le16(sizeof(cmd->cit.timeout));
-	cmd->cit.timeout = cpu_to_le32(msecs);
-
-	ack_skb = i2400m_msg_to_dev(i2400m, cmd, sizeof(*cmd));
-	if (IS_ERR(ack_skb)) {
-		dev_err(dev, "Failed to issue 'set idle timeout' command: "
-			"%ld\n", PTR_ERR(ack_skb));
-		result = PTR_ERR(ack_skb);
-		goto error_msg_to_dev;
-	}
-	ack = wimax_msg_data_len(ack_skb, &ack_len);
-	result = i2400m_msg_check_status(ack, strerr, sizeof(strerr));
-	if (result < 0) {
-		dev_err(dev, "'set idle timeout' (0x%04x) command failed: "
-			"%d - %s\n", I2400M_MT_GET_STATE, result, strerr);
-		goto error_cmd_failed;
-	}
-	result = 0;
-	kfree_skb(ack_skb);
-error_cmd_failed:
-error_msg_to_dev:
-	kfree(cmd);
-error_alloc:
-	return result;
-}
-
-
-/**
- * i2400m_dev_initialize - Initialize the device once communications are ready
- *
- * @i2400m: device descriptor
- *
- * Returns: 0 if ok, < 0 errno code on error.
- *
- * Configures the device to work the way we like it.
- *
- * At the point of this call, the device is registered with the WiMAX
- * and netdev stacks, firmware is uploaded and we can talk to the
- * device normally.
- */
-int i2400m_dev_initialize(struct i2400m *i2400m)
-{
-	int result;
-	struct device *dev = i2400m_dev(i2400m);
-	struct i2400m_tlv_config_idle_parameters idle_params;
-	struct i2400m_tlv_config_idle_timeout idle_timeout;
-	struct i2400m_tlv_config_d2h_data_format df;
-	struct i2400m_tlv_config_dl_host_reorder dlhr;
-	const struct i2400m_tlv_hdr *args[9];
-	unsigned argc = 0;
-
-	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
-	if (i2400m_passive_mode)
-		goto out_passive;
-	/* Disable idle mode? (enabled by default) */
-	if (i2400m_idle_mode_disabled) {
-		if (i2400m_le_v1_3(i2400m)) {
-			idle_params.hdr.type =
-				cpu_to_le16(I2400M_TLV_CONFIG_IDLE_PARAMETERS);
-			idle_params.hdr.length = cpu_to_le16(
-				sizeof(idle_params) - sizeof(idle_params.hdr));
-			idle_params.idle_timeout = 0;
-			idle_params.idle_paging_interval = 0;
-			args[argc++] = &idle_params.hdr;
-		} else {
-			idle_timeout.hdr.type =
-				cpu_to_le16(I2400M_TLV_CONFIG_IDLE_TIMEOUT);
-			idle_timeout.hdr.length = cpu_to_le16(
-				sizeof(idle_timeout) - sizeof(idle_timeout.hdr));
-			idle_timeout.timeout = 0;
-			args[argc++] = &idle_timeout.hdr;
-		}
-	}
-	if (i2400m_ge_v1_4(i2400m)) {
-		/* Enable extended RX data format? */
-		df.hdr.type =
-			cpu_to_le16(I2400M_TLV_CONFIG_D2H_DATA_FORMAT);
-		df.hdr.length = cpu_to_le16(
-			sizeof(df) - sizeof(df.hdr));
-		df.format = 1;
-		args[argc++] = &df.hdr;
-
-		/* Enable RX data reordering?
-		 * (switch flipped in rx.c:i2400m_rx_setup() after fw upload) */
-		if (i2400m->rx_reorder) {
-			dlhr.hdr.type =
-				cpu_to_le16(I2400M_TLV_CONFIG_DL_HOST_REORDER);
-			dlhr.hdr.length = cpu_to_le16(
-				sizeof(dlhr) - sizeof(dlhr.hdr));
-			dlhr.reorder = 1;
-			args[argc++] = &dlhr.hdr;
-		}
-	}
-	result = i2400m_set_init_config(i2400m, args, argc);
-	if (result < 0)
-		goto error;
-out_passive:
-	/*
-	 * Update state: Here it just calls a get state; parsing the
-	 * result (System State TLV and RF Status TLV [done in the rx
-	 * path hooks]) will set the hardware and software RF-Kill
-	 * status.
-	 */
-	result = i2400m_cmd_get_state(i2400m);
-error:
-	if (result < 0)
-		dev_err(dev, "failed to initialize the device: %d\n", result);
-	d_fnend(3, dev, "(i2400m %p) = %d\n", i2400m, result);
-	return result;
-}
-
-
-/**
- * i2400m_dev_shutdown - Shutdown a running device
- *
- * @i2400m: device descriptor
- *
- * Release resources acquired during the running of the device; in
- * theory, should also tell the device to go to sleep, switch off the
- * radio, all that, but at this point, in most cases (driver
- * disconnection, reset handling) we can't even talk to the device.
- */
-void i2400m_dev_shutdown(struct i2400m *i2400m)
-{
-	struct device *dev = i2400m_dev(i2400m);
-
-	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
-	d_fnend(3, dev, "(i2400m %p) = void\n", i2400m);
-}
diff --git a/drivers/net/wimax/i2400m/debug-levels.h b/drivers/net/wimax/i2400m/debug-levels.h
deleted file mode 100644
index 00942bb1489b..000000000000
--- a/drivers/net/wimax/i2400m/debug-levels.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Intel Wireless WiMAX Connection 2400m
- * Debug levels control file for the i2400m module
- *
- * Copyright (C) 2007-2008 Intel Corporation <linux-wimax@intel.com>
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- */
-#ifndef __debug_levels__h__
-#define __debug_levels__h__
-
-/* Maximum compile and run time debug level for all submodules */
-#define D_MODULENAME i2400m
-#define D_MASTER CONFIG_WIMAX_I2400M_DEBUG_LEVEL
-
-#include <linux/wimax/debug.h>
-
-/* List of all the enabled modules */
-enum d_module {
-	D_SUBMODULE_DECLARE(control),
-	D_SUBMODULE_DECLARE(driver),
-	D_SUBMODULE_DECLARE(debugfs),
-	D_SUBMODULE_DECLARE(fw),
-	D_SUBMODULE_DECLARE(netdev),
-	D_SUBMODULE_DECLARE(rfkill),
-	D_SUBMODULE_DECLARE(rx),
-	D_SUBMODULE_DECLARE(sysfs),
-	D_SUBMODULE_DECLARE(tx),
-};
-
-
-#endif /* #ifndef __debug_levels__h__ */
diff --git a/drivers/net/wimax/i2400m/debugfs.c b/drivers/net/wimax/i2400m/debugfs.c
deleted file mode 100644
index 1c640b41ea4c..000000000000
--- a/drivers/net/wimax/i2400m/debugfs.c
+++ /dev/null
@@ -1,253 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel Wireless WiMAX Connection 2400m
- * Debugfs interfaces to manipulate driver and device information
- *
- * Copyright (C) 2007 Intel Corporation <linux-wimax@intel.com>
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- */
-
-#include <linux/debugfs.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/spinlock.h>
-#include <linux/device.h>
-#include <linux/export.h>
-#include "i2400m.h"
-
-
-#define D_SUBMODULE debugfs
-#include "debug-levels.h"
-
-static
-int debugfs_netdev_queue_stopped_get(void *data, u64 *val)
-{
-	struct i2400m *i2400m = data;
-	*val = netif_queue_stopped(i2400m->wimax_dev.net_dev);
-	return 0;
-}
-DEFINE_DEBUGFS_ATTRIBUTE(fops_netdev_queue_stopped,
-			debugfs_netdev_queue_stopped_get,
-			NULL, "%llu\n");
-
-/*
- * We don't allow partial reads of this file, as then the reader would
- * get weirdly confused data as it is updated.
- *
- * So or you read it all or nothing; if you try to read with an offset
- * != 0, we consider you are done reading.
- */
-static
-ssize_t i2400m_rx_stats_read(struct file *filp, char __user *buffer,
-			     size_t count, loff_t *ppos)
-{
-	struct i2400m *i2400m = filp->private_data;
-	char buf[128];
-	unsigned long flags;
-
-	if (*ppos != 0)
-		return 0;
-	if (count < sizeof(buf))
-		return -ENOSPC;
-	spin_lock_irqsave(&i2400m->rx_lock, flags);
-	snprintf(buf, sizeof(buf), "%u %u %u %u %u %u %u\n",
-		 i2400m->rx_pl_num, i2400m->rx_pl_min,
-		 i2400m->rx_pl_max, i2400m->rx_num,
-		 i2400m->rx_size_acc,
-		 i2400m->rx_size_min, i2400m->rx_size_max);
-	spin_unlock_irqrestore(&i2400m->rx_lock, flags);
-	return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
-}
-
-
-/* Any write clears the stats */
-static
-ssize_t i2400m_rx_stats_write(struct file *filp, const char __user *buffer,
-			      size_t count, loff_t *ppos)
-{
-	struct i2400m *i2400m = filp->private_data;
-	unsigned long flags;
-
-	spin_lock_irqsave(&i2400m->rx_lock, flags);
-	i2400m->rx_pl_num = 0;
-	i2400m->rx_pl_max = 0;
-	i2400m->rx_pl_min = UINT_MAX;
-	i2400m->rx_num = 0;
-	i2400m->rx_size_acc = 0;
-	i2400m->rx_size_min = UINT_MAX;
-	i2400m->rx_size_max = 0;
-	spin_unlock_irqrestore(&i2400m->rx_lock, flags);
-	return count;
-}
-
-static
-const struct file_operations i2400m_rx_stats_fops = {
-	.owner =	THIS_MODULE,
-	.open =		simple_open,
-	.read =		i2400m_rx_stats_read,
-	.write =	i2400m_rx_stats_write,
-	.llseek =	default_llseek,
-};
-
-
-/* See i2400m_rx_stats_read() */
-static
-ssize_t i2400m_tx_stats_read(struct file *filp, char __user *buffer,
-			     size_t count, loff_t *ppos)
-{
-	struct i2400m *i2400m = filp->private_data;
-	char buf[128];
-	unsigned long flags;
-
-	if (*ppos != 0)
-		return 0;
-	if (count < sizeof(buf))
-		return -ENOSPC;
-	spin_lock_irqsave(&i2400m->tx_lock, flags);
-	snprintf(buf, sizeof(buf), "%u %u %u %u %u %u %u\n",
-		 i2400m->tx_pl_num, i2400m->tx_pl_min,
-		 i2400m->tx_pl_max, i2400m->tx_num,
-		 i2400m->tx_size_acc,
-		 i2400m->tx_size_min, i2400m->tx_size_max);
-	spin_unlock_irqrestore(&i2400m->tx_lock, flags);
-	return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
-}
-
-/* Any write clears the stats */
-static
-ssize_t i2400m_tx_stats_write(struct file *filp, const char __user *buffer,
-			      size_t count, loff_t *ppos)
-{
-	struct i2400m *i2400m = filp->private_data;
-	unsigned long flags;
-
-	spin_lock_irqsave(&i2400m->tx_lock, flags);
-	i2400m->tx_pl_num = 0;
-	i2400m->tx_pl_max = 0;
-	i2400m->tx_pl_min = UINT_MAX;
-	i2400m->tx_num = 0;
-	i2400m->tx_size_acc = 0;
-	i2400m->tx_size_min = UINT_MAX;
-	i2400m->tx_size_max = 0;
-	spin_unlock_irqrestore(&i2400m->tx_lock, flags);
-	return count;
-}
-
-static
-const struct file_operations i2400m_tx_stats_fops = {
-	.owner =	THIS_MODULE,
-	.open =		simple_open,
-	.read =		i2400m_tx_stats_read,
-	.write =	i2400m_tx_stats_write,
-	.llseek =	default_llseek,
-};
-
-
-/* Write 1 to ask the device to go into suspend */
-static
-int debugfs_i2400m_suspend_set(void *data, u64 val)
-{
-	int result;
-	struct i2400m *i2400m = data;
-	result = i2400m_cmd_enter_powersave(i2400m);
-	if (result >= 0)
-		result = 0;
-	return result;
-}
-DEFINE_DEBUGFS_ATTRIBUTE(fops_i2400m_suspend,
-			NULL, debugfs_i2400m_suspend_set,
-			"%llu\n");
-
-/*
- * Reset the device
- *
- * Write 0 to ask the device to soft reset, 1 to cold reset, 2 to bus
- * reset (as defined by enum i2400m_reset_type).
- */
-static
-int debugfs_i2400m_reset_set(void *data, u64 val)
-{
-	int result;
-	struct i2400m *i2400m = data;
-	enum i2400m_reset_type rt = val;
-	switch(rt) {
-	case I2400M_RT_WARM:
-	case I2400M_RT_COLD:
-	case I2400M_RT_BUS:
-		result = i2400m_reset(i2400m, rt);
-		if (result >= 0)
-			result = 0;
-		break;
-	default:
-		result = -EINVAL;
-	}
-	return result;
-}
-DEFINE_DEBUGFS_ATTRIBUTE(fops_i2400m_reset,
-			NULL, debugfs_i2400m_reset_set,
-			"%llu\n");
-
-void i2400m_debugfs_add(struct i2400m *i2400m)
-{
-	struct dentry *dentry = i2400m->wimax_dev.debugfs_dentry;
-
-	dentry = debugfs_create_dir("i2400m", dentry);
-	i2400m->debugfs_dentry = dentry;
-
-	d_level_register_debugfs("dl_", control, dentry);
-	d_level_register_debugfs("dl_", driver, dentry);
-	d_level_register_debugfs("dl_", debugfs, dentry);
-	d_level_register_debugfs("dl_", fw, dentry);
-	d_level_register_debugfs("dl_", netdev, dentry);
-	d_level_register_debugfs("dl_", rfkill, dentry);
-	d_level_register_debugfs("dl_", rx, dentry);
-	d_level_register_debugfs("dl_", tx, dentry);
-
-	debugfs_create_size_t("tx_in", 0400, dentry, &i2400m->tx_in);
-	debugfs_create_size_t("tx_out", 0400, dentry, &i2400m->tx_out);
-	debugfs_create_u32("state", 0600, dentry, &i2400m->state);
-
-	/*
-	 * Trace received messages from user space
-	 *
-	 * In order to tap the bidirectional message stream in the
-	 * 'msg' pipe, user space can read from the 'msg' pipe;
-	 * however, due to limitations in libnl, we can't know what
-	 * the different applications are sending down to the kernel.
-	 *
-	 * So we have this hack where the driver will echo any message
-	 * received on the msg pipe from user space [through a call to
-	 * wimax_dev->op_msg_from_user() into
-	 * i2400m_op_msg_from_user()] into the 'trace' pipe that this
-	 * driver creates.
-	 *
-	 * So then, reading from both the 'trace' and 'msg' pipes in
-	 * user space will provide a full dump of the traffic.
-	 *
-	 * Write 1 to activate, 0 to clear.
-	 *
-	 * It is not really very atomic, but it is also not too
-	 * critical.
-	 */
-	debugfs_create_u8("trace_msg_from_user", 0600, dentry,
-			  &i2400m->trace_msg_from_user);
-
-	debugfs_create_file("netdev_queue_stopped", 0400, dentry, i2400m,
-			    &fops_netdev_queue_stopped);
-
-	debugfs_create_file("rx_stats", 0600, dentry, i2400m,
-			    &i2400m_rx_stats_fops);
-
-	debugfs_create_file("tx_stats", 0600, dentry, i2400m,
-			    &i2400m_tx_stats_fops);
-
-	debugfs_create_file("suspend", 0200, dentry, i2400m,
-			    &fops_i2400m_suspend);
-
-	debugfs_create_file("reset", 0200, dentry, i2400m, &fops_i2400m_reset);
-}
-
-void i2400m_debugfs_rm(struct i2400m *i2400m)
-{
-	debugfs_remove_recursive(i2400m->debugfs_dentry);
-}
diff --git a/drivers/net/wimax/i2400m/driver.c b/drivers/net/wimax/i2400m/driver.c
deleted file mode 100644
index ecb3fccca603..000000000000
--- a/drivers/net/wimax/i2400m/driver.c
+++ /dev/null
@@ -1,1002 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel Wireless WiMAX Connection 2400m
- * Generic probe/disconnect, reset and message passing
- *
- * Copyright (C) 2007-2008 Intel Corporation <linux-wimax@intel.com>
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * See i2400m.h for driver documentation. This contains helpers for
- * the driver model glue [_setup()/_release()], handling device resets
- * [_dev_reset_handle()], and the backends for the WiMAX stack ops
- * reset [_op_reset()] and message from user [_op_msg_from_user()].
- *
- * ROADMAP:
- *
- * i2400m_op_msg_from_user()
- *   i2400m_msg_to_dev()
- *   wimax_msg_to_user_send()
- *
- * i2400m_op_reset()
- *   i240m->bus_reset()
- *
- * i2400m_dev_reset_handle()
- *   __i2400m_dev_reset_handle()
- *     __i2400m_dev_stop()
- *     __i2400m_dev_start()
- *
- * i2400m_setup()
- *   i2400m->bus_setup()
- *   i2400m_bootrom_init()
- *   register_netdev()
- *   wimax_dev_add()
- *   i2400m_dev_start()
- *     __i2400m_dev_start()
- *       i2400m_dev_bootstrap()
- *       i2400m_tx_setup()
- *       i2400m->bus_dev_start()
- *       i2400m_firmware_check()
- *       i2400m_check_mac_addr()
- *
- * i2400m_release()
- *   i2400m_dev_stop()
- *     __i2400m_dev_stop()
- *       i2400m_dev_shutdown()
- *       i2400m->bus_dev_stop()
- *       i2400m_tx_release()
- *   i2400m->bus_release()
- *   wimax_dev_rm()
- *   unregister_netdev()
- */
-#include "i2400m.h"
-#include <linux/etherdevice.h>
-#include <linux/wimax/i2400m.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/suspend.h>
-#include <linux/slab.h>
-
-#define D_SUBMODULE driver
-#include "debug-levels.h"
-
-
-static char i2400m_debug_params[128];
-module_param_string(debug, i2400m_debug_params, sizeof(i2400m_debug_params),
-		    0644);
-MODULE_PARM_DESC(debug,
-		 "String of space-separated NAME:VALUE pairs, where NAMEs "
-		 "are the different debug submodules and VALUE are the "
-		 "initial debug value to set.");
-
-static char i2400m_barkers_params[128];
-module_param_string(barkers, i2400m_barkers_params,
-		    sizeof(i2400m_barkers_params), 0644);
-MODULE_PARM_DESC(barkers,
-		 "String of comma-separated 32-bit values; each is "
-		 "recognized as the value the device sends as a reboot "
-		 "signal; values are appended to a list--setting one value "
-		 "as zero cleans the existing list and starts a new one.");
-
-/*
- * WiMAX stack operation: relay a message from user space
- *
- * @wimax_dev: device descriptor
- * @pipe_name: named pipe the message is for
- * @msg_buf: pointer to the message bytes
- * @msg_len: length of the buffer
- * @genl_info: passed by the generic netlink layer
- *
- * The WiMAX stack will call this function when a message was received
- * from user space.
- *
- * For the i2400m, this is an L3L4 message, as specified in
- * include/linux/wimax/i2400m.h, and thus prefixed with a 'struct
- * i2400m_l3l4_hdr'. Driver (and device) expect the messages to be
- * coded in Little Endian.
- *
- * This function just verifies that the header declaration and the
- * payload are consistent and then deals with it, either forwarding it
- * to the device or procesing it locally.
- *
- * In the i2400m, messages are basically commands that will carry an
- * ack, so we use i2400m_msg_to_dev() and then deliver the ack back to
- * user space. The rx.c code might intercept the response and use it
- * to update the driver's state, but then it will pass it on so it can
- * be relayed back to user space.
- *
- * Note that asynchronous events from the device are processed and
- * sent to user space in rx.c.
- */
-static
-int i2400m_op_msg_from_user(struct wimax_dev *wimax_dev,
-			    const char *pipe_name,
-			    const void *msg_buf, size_t msg_len,
-			    const struct genl_info *genl_info)
-{
-	int result;
-	struct i2400m *i2400m = wimax_dev_to_i2400m(wimax_dev);
-	struct device *dev = i2400m_dev(i2400m);
-	struct sk_buff *ack_skb;
-
-	d_fnstart(4, dev, "(wimax_dev %p [i2400m %p] msg_buf %p "
-		  "msg_len %zu genl_info %p)\n", wimax_dev, i2400m,
-		  msg_buf, msg_len, genl_info);
-	ack_skb = i2400m_msg_to_dev(i2400m, msg_buf, msg_len);
-	result = PTR_ERR(ack_skb);
-	if (IS_ERR(ack_skb))
-		goto error_msg_to_dev;
-	result = wimax_msg_send(&i2400m->wimax_dev, ack_skb);
-error_msg_to_dev:
-	d_fnend(4, dev, "(wimax_dev %p [i2400m %p] msg_buf %p msg_len %zu "
-		"genl_info %p) = %d\n", wimax_dev, i2400m, msg_buf, msg_len,
-		genl_info, result);
-	return result;
-}
-
-
-/*
- * Context to wait for a reset to finalize
- */
-struct i2400m_reset_ctx {
-	struct completion completion;
-	int result;
-};
-
-
-/*
- * WiMAX stack operation: reset a device
- *
- * @wimax_dev: device descriptor
- *
- * See the documentation for wimax_reset() and wimax_dev->op_reset for
- * the requirements of this function. The WiMAX stack guarantees
- * serialization on calls to this function.
- *
- * Do a warm reset on the device; if it fails, resort to a cold reset
- * and return -ENODEV. On successful warm reset, we need to block
- * until it is complete.
- *
- * The bus-driver implementation of reset takes care of falling back
- * to cold reset if warm fails.
- */
-static
-int i2400m_op_reset(struct wimax_dev *wimax_dev)
-{
-	int result;
-	struct i2400m *i2400m = wimax_dev_to_i2400m(wimax_dev);
-	struct device *dev = i2400m_dev(i2400m);
-	struct i2400m_reset_ctx ctx = {
-		.completion = COMPLETION_INITIALIZER_ONSTACK(ctx.completion),
-		.result = 0,
-	};
-
-	d_fnstart(4, dev, "(wimax_dev %p)\n", wimax_dev);
-	mutex_lock(&i2400m->init_mutex);
-	i2400m->reset_ctx = &ctx;
-	mutex_unlock(&i2400m->init_mutex);
-	result = i2400m_reset(i2400m, I2400M_RT_WARM);
-	if (result < 0)
-		goto out;
-	result = wait_for_completion_timeout(&ctx.completion, 4*HZ);
-	if (result == 0)
-		result = -ETIMEDOUT;
-	else if (result > 0)
-		result = ctx.result;
-	/* if result < 0, pass it on */
-	mutex_lock(&i2400m->init_mutex);
-	i2400m->reset_ctx = NULL;
-	mutex_unlock(&i2400m->init_mutex);
-out:
-	d_fnend(4, dev, "(wimax_dev %p) = %d\n", wimax_dev, result);
-	return result;
-}
-
-
-/*
- * Check the MAC address we got from boot mode is ok
- *
- * @i2400m: device descriptor
- *
- * Returns: 0 if ok, < 0 errno code on error.
- */
-static
-int i2400m_check_mac_addr(struct i2400m *i2400m)
-{
-	int result;
-	struct device *dev = i2400m_dev(i2400m);
-	struct sk_buff *skb;
-	const struct i2400m_tlv_detailed_device_info *ddi;
-	struct net_device *net_dev = i2400m->wimax_dev.net_dev;
-
-	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
-	skb = i2400m_get_device_info(i2400m);
-	if (IS_ERR(skb)) {
-		result = PTR_ERR(skb);
-		dev_err(dev, "Cannot verify MAC address, error reading: %d\n",
-			result);
-		goto error;
-	}
-	/* Extract MAC address */
-	ddi = (void *) skb->data;
-	BUILD_BUG_ON(ETH_ALEN != sizeof(ddi->mac_address));
-	d_printf(2, dev, "GET DEVICE INFO: mac addr %pM\n",
-		 ddi->mac_address);
-	if (!memcmp(net_dev->perm_addr, ddi->mac_address,
-		   sizeof(ddi->mac_address)))
-		goto ok;
-	dev_warn(dev, "warning: device reports a different MAC address "
-		 "to that of boot mode's\n");
-	dev_warn(dev, "device reports     %pM\n", ddi->mac_address);
-	dev_warn(dev, "boot mode reported %pM\n", net_dev->perm_addr);
-	if (is_zero_ether_addr(ddi->mac_address))
-		dev_err(dev, "device reports an invalid MAC address, "
-			"not updating\n");
-	else {
-		dev_warn(dev, "updating MAC address\n");
-		net_dev->addr_len = ETH_ALEN;
-		memcpy(net_dev->perm_addr, ddi->mac_address, ETH_ALEN);
-		memcpy(net_dev->dev_addr, ddi->mac_address, ETH_ALEN);
-	}
-ok:
-	result = 0;
-	kfree_skb(skb);
-error:
-	d_fnend(3, dev, "(i2400m %p) = %d\n", i2400m, result);
-	return result;
-}
-
-
-/**
- * __i2400m_dev_start - Bring up driver communication with the device
- *
- * @i2400m: device descriptor
- * @flags: boot mode flags
- *
- * Returns: 0 if ok, < 0 errno code on error.
- *
- * Uploads firmware and brings up all the resources needed to be able
- * to communicate with the device.
- *
- * The workqueue has to be setup early, at least before RX handling
- * (it's only real user for now) so it can process reports as they
- * arrive. We also want to destroy it if we retry, to make sure it is
- * flushed...easier like this.
- *
- * TX needs to be setup before the bus-specific code (otherwise on
- * shutdown, the bus-tx code could try to access it).
- */
-static
-int __i2400m_dev_start(struct i2400m *i2400m, enum i2400m_bri flags)
-{
-	int result;
-	struct wimax_dev *wimax_dev = &i2400m->wimax_dev;
-	struct net_device *net_dev = wimax_dev->net_dev;
-	struct device *dev = i2400m_dev(i2400m);
-	int times = i2400m->bus_bm_retries;
-
-	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
-retry:
-	result = i2400m_dev_bootstrap(i2400m, flags);
-	if (result < 0) {
-		dev_err(dev, "cannot bootstrap device: %d\n", result);
-		goto error_bootstrap;
-	}
-	result = i2400m_tx_setup(i2400m);
-	if (result < 0)
-		goto error_tx_setup;
-	result = i2400m_rx_setup(i2400m);
-	if (result < 0)
-		goto error_rx_setup;
-	i2400m->work_queue = create_singlethread_workqueue(wimax_dev->name);
-	if (i2400m->work_queue == NULL) {
-		result = -ENOMEM;
-		dev_err(dev, "cannot create workqueue\n");
-		goto error_create_workqueue;
-	}
-	if (i2400m->bus_dev_start) {
-		result = i2400m->bus_dev_start(i2400m);
-		if (result < 0)
-			goto error_bus_dev_start;
-	}
-	i2400m->ready = 1;
-	wmb();		/* see i2400m->ready's documentation  */
-	/* process pending reports from the device */
-	queue_work(i2400m->work_queue, &i2400m->rx_report_ws);
-	result = i2400m_firmware_check(i2400m);	/* fw versions ok? */
-	if (result < 0)
-		goto error_fw_check;
-	/* At this point is ok to send commands to the device */
-	result = i2400m_check_mac_addr(i2400m);
-	if (result < 0)
-		goto error_check_mac_addr;
-	result = i2400m_dev_initialize(i2400m);
-	if (result < 0)
-		goto error_dev_initialize;
-
-	/* We don't want any additional unwanted error recovery triggered
-	 * from any other context so if anything went wrong before we come
-	 * here, let's keep i2400m->error_recovery untouched and leave it to
-	 * dev_reset_handle(). See dev_reset_handle(). */
-
-	atomic_dec(&i2400m->error_recovery);
-	/* Every thing works so far, ok, now we are ready to
-	 * take error recovery if it's required. */
-
-	/* At this point, reports will come for the device and set it
-	 * to the right state if it is different than UNINITIALIZED */
-	d_fnend(3, dev, "(net_dev %p [i2400m %p]) = %d\n",
-		net_dev, i2400m, result);
-	return result;
-
-error_dev_initialize:
-error_check_mac_addr:
-error_fw_check:
-	i2400m->ready = 0;
-	wmb();		/* see i2400m->ready's documentation  */
-	flush_workqueue(i2400m->work_queue);
-	if (i2400m->bus_dev_stop)
-		i2400m->bus_dev_stop(i2400m);
-error_bus_dev_start:
-	destroy_workqueue(i2400m->work_queue);
-error_create_workqueue:
-	i2400m_rx_release(i2400m);
-error_rx_setup:
-	i2400m_tx_release(i2400m);
-error_tx_setup:
-error_bootstrap:
-	if (result == -EL3RST && times-- > 0) {
-		flags = I2400M_BRI_SOFT|I2400M_BRI_MAC_REINIT;
-		goto retry;
-	}
-	d_fnend(3, dev, "(net_dev %p [i2400m %p]) = %d\n",
-		net_dev, i2400m, result);
-	return result;
-}
-
-
-static
-int i2400m_dev_start(struct i2400m *i2400m, enum i2400m_bri bm_flags)
-{
-	int result = 0;
-	mutex_lock(&i2400m->init_mutex);	/* Well, start the device */
-	if (i2400m->updown == 0) {
-		result = __i2400m_dev_start(i2400m, bm_flags);
-		if (result >= 0) {
-			i2400m->updown = 1;
-			i2400m->alive = 1;
-			wmb();/* see i2400m->updown and i2400m->alive's doc */
-		}
-	}
-	mutex_unlock(&i2400m->init_mutex);
-	return result;
-}
-
-
-/**
- * i2400m_dev_stop - Tear down driver communication with the device
- *
- * @i2400m: device descriptor
- *
- * Returns: 0 if ok, < 0 errno code on error.
- *
- * Releases all the resources allocated to communicate with the
- * device. Note we cannot destroy the workqueue earlier as until RX is
- * fully destroyed, it could still try to schedule jobs.
- */
-static
-void __i2400m_dev_stop(struct i2400m *i2400m)
-{
-	struct wimax_dev *wimax_dev = &i2400m->wimax_dev;
-	struct device *dev = i2400m_dev(i2400m);
-
-	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
-	wimax_state_change(wimax_dev, __WIMAX_ST_QUIESCING);
-	i2400m_msg_to_dev_cancel_wait(i2400m, -EL3RST);
-	complete(&i2400m->msg_completion);
-	i2400m_net_wake_stop(i2400m);
-	i2400m_dev_shutdown(i2400m);
-	/*
-	 * Make sure no report hooks are running *before* we stop the
-	 * communication infrastructure with the device.
-	 */
-	i2400m->ready = 0;	/* nobody can queue work anymore */
-	wmb();		/* see i2400m->ready's documentation  */
-	flush_workqueue(i2400m->work_queue);
-
-	if (i2400m->bus_dev_stop)
-		i2400m->bus_dev_stop(i2400m);
-	destroy_workqueue(i2400m->work_queue);
-	i2400m_rx_release(i2400m);
-	i2400m_tx_release(i2400m);
-	wimax_state_change(wimax_dev, WIMAX_ST_DOWN);
-	d_fnend(3, dev, "(i2400m %p) = 0\n", i2400m);
-}
-
-
-/*
- * Watch out -- we only need to stop if there is a need for it. The
- * device could have reset itself and failed to come up again (see
- * _i2400m_dev_reset_handle()).
- */
-static
-void i2400m_dev_stop(struct i2400m *i2400m)
-{
-	mutex_lock(&i2400m->init_mutex);
-	if (i2400m->updown) {
-		__i2400m_dev_stop(i2400m);
-		i2400m->updown = 0;
-		i2400m->alive = 0;
-		wmb();	/* see i2400m->updown and i2400m->alive's doc */
-	}
-	mutex_unlock(&i2400m->init_mutex);
-}
-
-
-/*
- * Listen to PM events to cache the firmware before suspend/hibernation
- *
- * When the device comes out of suspend, it might go into reset and
- * firmware has to be uploaded again. At resume, most of the times, we
- * can't load firmware images from disk, so we need to cache it.
- *
- * i2400m_fw_cache() will allocate a kobject and attach the firmware
- * to it; that way we don't have to worry too much about the fw loader
- * hitting a race condition.
- *
- * Note: modus operandi stolen from the Orinoco driver; thx.
- */
-static
-int i2400m_pm_notifier(struct notifier_block *notifier,
-		       unsigned long pm_event,
-		       void *unused)
-{
-	struct i2400m *i2400m =
-		container_of(notifier, struct i2400m, pm_notifier);
-	struct device *dev = i2400m_dev(i2400m);
-
-	d_fnstart(3, dev, "(i2400m %p pm_event %lx)\n", i2400m, pm_event);
-	switch (pm_event) {
-	case PM_HIBERNATION_PREPARE:
-	case PM_SUSPEND_PREPARE:
-		i2400m_fw_cache(i2400m);
-		break;
-	case PM_POST_RESTORE:
-		/* Restore from hibernation failed. We need to clean
-		 * up in exactly the same way, so fall through. */
-	case PM_POST_HIBERNATION:
-	case PM_POST_SUSPEND:
-		i2400m_fw_uncache(i2400m);
-		break;
-
-	case PM_RESTORE_PREPARE:
-	default:
-		break;
-	}
-	d_fnend(3, dev, "(i2400m %p pm_event %lx) = void\n", i2400m, pm_event);
-	return NOTIFY_DONE;
-}
-
-
-/*
- * pre-reset is called before a device is going on reset
- *
- * This has to be followed by a call to i2400m_post_reset(), otherwise
- * bad things might happen.
- */
-int i2400m_pre_reset(struct i2400m *i2400m)
-{
-	struct device *dev = i2400m_dev(i2400m);
-
-	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
-	d_printf(1, dev, "pre-reset shut down\n");
-
-	mutex_lock(&i2400m->init_mutex);
-	if (i2400m->updown) {
-		netif_tx_disable(i2400m->wimax_dev.net_dev);
-		__i2400m_dev_stop(i2400m);
-		/* down't set updown to zero -- this way
-		 * post_reset can restore properly */
-	}
-	mutex_unlock(&i2400m->init_mutex);
-	if (i2400m->bus_release)
-		i2400m->bus_release(i2400m);
-	d_fnend(3, dev, "(i2400m %p) = 0\n", i2400m);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(i2400m_pre_reset);
-
-
-/*
- * Restore device state after a reset
- *
- * Do the work needed after a device reset to bring it up to the same
- * state as it was before the reset.
- *
- * NOTE: this requires i2400m->init_mutex taken
- */
-int i2400m_post_reset(struct i2400m *i2400m)
-{
-	int result = 0;
-	struct device *dev = i2400m_dev(i2400m);
-
-	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
-	d_printf(1, dev, "post-reset start\n");
-	if (i2400m->bus_setup) {
-		result = i2400m->bus_setup(i2400m);
-		if (result < 0) {
-			dev_err(dev, "bus-specific setup failed: %d\n",
-				result);
-			goto error_bus_setup;
-		}
-	}
-	mutex_lock(&i2400m->init_mutex);
-	if (i2400m->updown) {
-		result = __i2400m_dev_start(
-			i2400m, I2400M_BRI_SOFT | I2400M_BRI_MAC_REINIT);
-		if (result < 0)
-			goto error_dev_start;
-	}
-	mutex_unlock(&i2400m->init_mutex);
-	d_fnend(3, dev, "(i2400m %p) = %d\n", i2400m, result);
-	return result;
-
-error_dev_start:
-	if (i2400m->bus_release)
-		i2400m->bus_release(i2400m);
-	/* even if the device was up, it could not be recovered, so we
-	 * mark it as down. */
-	i2400m->updown = 0;
-	wmb();		/* see i2400m->updown's documentation  */
-	mutex_unlock(&i2400m->init_mutex);
-error_bus_setup:
-	d_fnend(3, dev, "(i2400m %p) = %d\n", i2400m, result);
-	return result;
-}
-EXPORT_SYMBOL_GPL(i2400m_post_reset);
-
-
-/*
- * The device has rebooted; fix up the device and the driver
- *
- * Tear down the driver communication with the device, reload the
- * firmware and reinitialize the communication with the device.
- *
- * If someone calls a reset when the device's firmware is down, in
- * theory we won't see it because we are not listening. However, just
- * in case, leave the code to handle it.
- *
- * If there is a reset context, use it; this means someone is waiting
- * for us to tell him when the reset operation is complete and the
- * device is ready to rock again.
- *
- * NOTE: if we are in the process of bringing up or down the
- *       communication with the device [running i2400m_dev_start() or
- *       _stop()], don't do anything, let it fail and handle it.
- *
- * This function is ran always in a thread context
- *
- * This function gets passed, as payload to i2400m_work() a 'const
- * char *' ptr with a "reason" why the reset happened (for messages).
- */
-static
-void __i2400m_dev_reset_handle(struct work_struct *ws)
-{
-	struct i2400m *i2400m = container_of(ws, struct i2400m, reset_ws);
-	const char *reason = i2400m->reset_reason;
-	struct device *dev = i2400m_dev(i2400m);
-	struct i2400m_reset_ctx *ctx = i2400m->reset_ctx;
-	int result;
-
-	d_fnstart(3, dev, "(ws %p i2400m %p reason %s)\n", ws, i2400m, reason);
-
-	i2400m->boot_mode = 1;
-	wmb();		/* Make sure i2400m_msg_to_dev() sees boot_mode */
-
-	result = 0;
-	if (mutex_trylock(&i2400m->init_mutex) == 0) {
-		/* We are still in i2400m_dev_start() [let it fail] or
-		 * i2400m_dev_stop() [we are shutting down anyway, so
-		 * ignore it] or we are resetting somewhere else. */
-		dev_err(dev, "device rebooted somewhere else?\n");
-		i2400m_msg_to_dev_cancel_wait(i2400m, -EL3RST);
-		complete(&i2400m->msg_completion);
-		goto out;
-	}
-
-	dev_err(dev, "%s: reinitializing driver\n", reason);
-	rmb();
-	if (i2400m->updown) {
-		__i2400m_dev_stop(i2400m);
-		i2400m->updown = 0;
-		wmb();		/* see i2400m->updown's documentation  */
-	}
-
-	if (i2400m->alive) {
-		result = __i2400m_dev_start(i2400m,
-				    I2400M_BRI_SOFT | I2400M_BRI_MAC_REINIT);
-		if (result < 0) {
-			dev_err(dev, "%s: cannot start the device: %d\n",
-				reason, result);
-			result = -EUCLEAN;
-			if (atomic_read(&i2400m->bus_reset_retries)
-					>= I2400M_BUS_RESET_RETRIES) {
-				result = -ENODEV;
-				dev_err(dev, "tried too many times to "
-					"reset the device, giving up\n");
-			}
-		}
-	}
-
-	if (i2400m->reset_ctx) {
-		ctx->result = result;
-		complete(&ctx->completion);
-	}
-	mutex_unlock(&i2400m->init_mutex);
-	if (result == -EUCLEAN) {
-		/*
-		 * We come here because the reset during operational mode
-		 * wasn't successfully done and need to proceed to a bus
-		 * reset. For the dev_reset_handle() to be able to handle
-		 * the reset event later properly, we restore boot_mode back
-		 * to the state before previous reset. ie: just like we are
-		 * issuing the bus reset for the first time
-		 */
-		i2400m->boot_mode = 0;
-		wmb();
-
-		atomic_inc(&i2400m->bus_reset_retries);
-		/* ops, need to clean up [w/ init_mutex not held] */
-		result = i2400m_reset(i2400m, I2400M_RT_BUS);
-		if (result >= 0)
-			result = -ENODEV;
-	} else {
-		rmb();
-		if (i2400m->alive) {
-			/* great, we expect the device state up and
-			 * dev_start() actually brings the device state up */
-			i2400m->updown = 1;
-			wmb();
-			atomic_set(&i2400m->bus_reset_retries, 0);
-		}
-	}
-out:
-	d_fnend(3, dev, "(ws %p i2400m %p reason %s) = void\n",
-		ws, i2400m, reason);
-}
-
-
-/**
- * i2400m_dev_reset_handle - Handle a device's reset in a thread context
- *
- * Schedule a device reset handling out on a thread context, so it
- * is safe to call from atomic context. We can't use the i2400m's
- * queue as we are going to destroy it and reinitialize it as part of
- * the driver bringup/bringup process.
- *
- * See __i2400m_dev_reset_handle() for details; that takes care of
- * reinitializing the driver to handle the reset, calling into the
- * bus-specific functions ops as needed.
- */
-int i2400m_dev_reset_handle(struct i2400m *i2400m, const char *reason)
-{
-	i2400m->reset_reason = reason;
-	return schedule_work(&i2400m->reset_ws);
-}
-EXPORT_SYMBOL_GPL(i2400m_dev_reset_handle);
-
-
- /*
- * The actual work of error recovery.
- *
- * The current implementation of error recovery is to trigger a bus reset.
- */
-static
-void __i2400m_error_recovery(struct work_struct *ws)
-{
-	struct i2400m *i2400m = container_of(ws, struct i2400m, recovery_ws);
-
-	i2400m_reset(i2400m, I2400M_RT_BUS);
-}
-
-/*
- * Schedule a work struct for error recovery.
- *
- * The intention of error recovery is to bring back the device to some
- * known state whenever TX sees -110 (-ETIMEOUT) on copying the data to
- * the device. The TX failure could mean a device bus stuck, so the current
- * error recovery implementation is to trigger a bus reset to the device
- * and hopefully it can bring back the device.
- *
- * The actual work of error recovery has to be in a thread context because
- * it is kicked off in the TX thread (i2400ms->tx_workqueue) which is to be
- * destroyed by the error recovery mechanism (currently a bus reset).
- *
- * Also, there may be already a queue of TX works that all hit
- * the -ETIMEOUT error condition because the device is stuck already.
- * Since bus reset is used as the error recovery mechanism and we don't
- * want consecutive bus resets simply because the multiple TX works
- * in the queue all hit the same device erratum, the flag "error_recovery"
- * is introduced for preventing unwanted consecutive bus resets.
- *
- * Error recovery shall only be invoked again if previous one was completed.
- * The flag error_recovery is set when error recovery mechanism is scheduled,
- * and is checked when we need to schedule another error recovery. If it is
- * in place already, then we shouldn't schedule another one.
- */
-void i2400m_error_recovery(struct i2400m *i2400m)
-{
-	if (atomic_add_return(1, &i2400m->error_recovery) == 1)
-		schedule_work(&i2400m->recovery_ws);
-	else
-		atomic_dec(&i2400m->error_recovery);
-}
-EXPORT_SYMBOL_GPL(i2400m_error_recovery);
-
-/*
- * Alloc the command and ack buffers for boot mode
- *
- * Get the buffers needed to deal with boot mode messages.
- */
-static
-int i2400m_bm_buf_alloc(struct i2400m *i2400m)
-{
-	i2400m->bm_cmd_buf = kzalloc(I2400M_BM_CMD_BUF_SIZE, GFP_KERNEL);
-	if (i2400m->bm_cmd_buf == NULL)
-		goto error_bm_cmd_kzalloc;
-	i2400m->bm_ack_buf = kzalloc(I2400M_BM_ACK_BUF_SIZE, GFP_KERNEL);
-	if (i2400m->bm_ack_buf == NULL)
-		goto error_bm_ack_buf_kzalloc;
-	return 0;
-
-error_bm_ack_buf_kzalloc:
-	kfree(i2400m->bm_cmd_buf);
-error_bm_cmd_kzalloc:
-	return -ENOMEM;
-}
-
-
-/*
- * Free boot mode command and ack buffers.
- */
-static
-void i2400m_bm_buf_free(struct i2400m *i2400m)
-{
-	kfree(i2400m->bm_ack_buf);
-	kfree(i2400m->bm_cmd_buf);
-}
-
-
-/**
- * i2400m_init - Initialize a 'struct i2400m' from all zeroes
- *
- * This is a bus-generic API call.
- */
-void i2400m_init(struct i2400m *i2400m)
-{
-	wimax_dev_init(&i2400m->wimax_dev);
-
-	i2400m->boot_mode = 1;
-	i2400m->rx_reorder = 1;
-	init_waitqueue_head(&i2400m->state_wq);
-
-	spin_lock_init(&i2400m->tx_lock);
-	i2400m->tx_pl_min = UINT_MAX;
-	i2400m->tx_size_min = UINT_MAX;
-
-	spin_lock_init(&i2400m->rx_lock);
-	i2400m->rx_pl_min = UINT_MAX;
-	i2400m->rx_size_min = UINT_MAX;
-	INIT_LIST_HEAD(&i2400m->rx_reports);
-	INIT_WORK(&i2400m->rx_report_ws, i2400m_report_hook_work);
-
-	mutex_init(&i2400m->msg_mutex);
-	init_completion(&i2400m->msg_completion);
-
-	mutex_init(&i2400m->init_mutex);
-	/* wake_tx_ws is initialized in i2400m_tx_setup() */
-
-	INIT_WORK(&i2400m->reset_ws, __i2400m_dev_reset_handle);
-	INIT_WORK(&i2400m->recovery_ws, __i2400m_error_recovery);
-
-	atomic_set(&i2400m->bus_reset_retries, 0);
-
-	i2400m->alive = 0;
-
-	/* initialize error_recovery to 1 for denoting we
-	 * are not yet ready to take any error recovery */
-	atomic_set(&i2400m->error_recovery, 1);
-}
-EXPORT_SYMBOL_GPL(i2400m_init);
-
-
-int i2400m_reset(struct i2400m *i2400m, enum i2400m_reset_type rt)
-{
-	struct net_device *net_dev = i2400m->wimax_dev.net_dev;
-
-	/*
-	 * Make sure we stop TXs and down the carrier before
-	 * resetting; this is needed to avoid things like
-	 * i2400m_wake_tx() scheduling stuff in parallel.
-	 */
-	if (net_dev->reg_state == NETREG_REGISTERED) {
-		netif_tx_disable(net_dev);
-		netif_carrier_off(net_dev);
-	}
-	return i2400m->bus_reset(i2400m, rt);
-}
-EXPORT_SYMBOL_GPL(i2400m_reset);
-
-
-/**
- * i2400m_setup - bus-generic setup function for the i2400m device
- *
- * @i2400m: device descriptor (bus-specific parts have been initialized)
- *
- * Returns: 0 if ok, < 0 errno code on error.
- *
- * Sets up basic device comunication infrastructure, boots the ROM to
- * read the MAC address, registers with the WiMAX and network stacks
- * and then brings up the device.
- */
-int i2400m_setup(struct i2400m *i2400m, enum i2400m_bri bm_flags)
-{
-	int result;
-	struct device *dev = i2400m_dev(i2400m);
-	struct wimax_dev *wimax_dev = &i2400m->wimax_dev;
-	struct net_device *net_dev = i2400m->wimax_dev.net_dev;
-
-	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
-
-	snprintf(wimax_dev->name, sizeof(wimax_dev->name),
-		 "i2400m-%s:%s", dev->bus->name, dev_name(dev));
-
-	result = i2400m_bm_buf_alloc(i2400m);
-	if (result < 0) {
-		dev_err(dev, "cannot allocate bootmode scratch buffers\n");
-		goto error_bm_buf_alloc;
-	}
-
-	if (i2400m->bus_setup) {
-		result = i2400m->bus_setup(i2400m);
-		if (result < 0) {
-			dev_err(dev, "bus-specific setup failed: %d\n",
-				result);
-			goto error_bus_setup;
-		}
-	}
-
-	result = i2400m_bootrom_init(i2400m, bm_flags);
-	if (result < 0) {
-		dev_err(dev, "read mac addr: bootrom init "
-			"failed: %d\n", result);
-		goto error_bootrom_init;
-	}
-	result = i2400m_read_mac_addr(i2400m);
-	if (result < 0)
-		goto error_read_mac_addr;
-	eth_random_addr(i2400m->src_mac_addr);
-
-	i2400m->pm_notifier.notifier_call = i2400m_pm_notifier;
-	register_pm_notifier(&i2400m->pm_notifier);
-
-	result = register_netdev(net_dev);	/* Okey dokey, bring it up */
-	if (result < 0) {
-		dev_err(dev, "cannot register i2400m network device: %d\n",
-			result);
-		goto error_register_netdev;
-	}
-	netif_carrier_off(net_dev);
-
-	i2400m->wimax_dev.op_msg_from_user = i2400m_op_msg_from_user;
-	i2400m->wimax_dev.op_rfkill_sw_toggle = i2400m_op_rfkill_sw_toggle;
-	i2400m->wimax_dev.op_reset = i2400m_op_reset;
-
-	result = wimax_dev_add(&i2400m->wimax_dev, net_dev);
-	if (result < 0)
-		goto error_wimax_dev_add;
-
-	/* Now setup all that requires a registered net and wimax device. */
-	result = sysfs_create_group(&net_dev->dev.kobj, &i2400m_dev_attr_group);
-	if (result < 0) {
-		dev_err(dev, "cannot setup i2400m's sysfs: %d\n", result);
-		goto error_sysfs_setup;
-	}
-
-	i2400m_debugfs_add(i2400m);
-
-	result = i2400m_dev_start(i2400m, bm_flags);
-	if (result < 0)
-		goto error_dev_start;
-	d_fnend(3, dev, "(i2400m %p) = %d\n", i2400m, result);
-	return result;
-
-error_dev_start:
-	i2400m_debugfs_rm(i2400m);
-	sysfs_remove_group(&i2400m->wimax_dev.net_dev->dev.kobj,
-			   &i2400m_dev_attr_group);
-error_sysfs_setup:
-	wimax_dev_rm(&i2400m->wimax_dev);
-error_wimax_dev_add:
-	unregister_netdev(net_dev);
-error_register_netdev:
-	unregister_pm_notifier(&i2400m->pm_notifier);
-error_read_mac_addr:
-error_bootrom_init:
-	if (i2400m->bus_release)
-		i2400m->bus_release(i2400m);
-error_bus_setup:
-	i2400m_bm_buf_free(i2400m);
-error_bm_buf_alloc:
-	d_fnend(3, dev, "(i2400m %p) = %d\n", i2400m, result);
-	return result;
-}
-EXPORT_SYMBOL_GPL(i2400m_setup);
-
-
-/**
- * i2400m_release - release the bus-generic driver resources
- *
- * Sends a disconnect message and undoes any setup done by i2400m_setup()
- */
-void i2400m_release(struct i2400m *i2400m)
-{
-	struct device *dev = i2400m_dev(i2400m);
-
-	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
-	netif_stop_queue(i2400m->wimax_dev.net_dev);
-
-	i2400m_dev_stop(i2400m);
-
-	cancel_work_sync(&i2400m->reset_ws);
-	cancel_work_sync(&i2400m->recovery_ws);
-
-	i2400m_debugfs_rm(i2400m);
-	sysfs_remove_group(&i2400m->wimax_dev.net_dev->dev.kobj,
-			   &i2400m_dev_attr_group);
-	wimax_dev_rm(&i2400m->wimax_dev);
-	unregister_netdev(i2400m->wimax_dev.net_dev);
-	unregister_pm_notifier(&i2400m->pm_notifier);
-	if (i2400m->bus_release)
-		i2400m->bus_release(i2400m);
-	i2400m_bm_buf_free(i2400m);
-	d_fnend(3, dev, "(i2400m %p) = void\n", i2400m);
-}
-EXPORT_SYMBOL_GPL(i2400m_release);
-
-
-/*
- * Debug levels control; see debug.h
- */
-struct d_level D_LEVEL[] = {
-	D_SUBMODULE_DEFINE(control),
-	D_SUBMODULE_DEFINE(driver),
-	D_SUBMODULE_DEFINE(debugfs),
-	D_SUBMODULE_DEFINE(fw),
-	D_SUBMODULE_DEFINE(netdev),
-	D_SUBMODULE_DEFINE(rfkill),
-	D_SUBMODULE_DEFINE(rx),
-	D_SUBMODULE_DEFINE(sysfs),
-	D_SUBMODULE_DEFINE(tx),
-};
-size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL);
-
-
-static
-int __init i2400m_driver_init(void)
-{
-	d_parse_params(D_LEVEL, D_LEVEL_SIZE, i2400m_debug_params,
-		       "i2400m.debug");
-	return i2400m_barker_db_init(i2400m_barkers_params);
-}
-module_init(i2400m_driver_init);
-
-static
-void __exit i2400m_driver_exit(void)
-{
-	i2400m_barker_db_exit();
-}
-module_exit(i2400m_driver_exit);
-
-MODULE_AUTHOR("Intel Corporation <linux-wimax@intel.com>");
-MODULE_DESCRIPTION("Intel 2400M WiMAX networking bus-generic driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/net/wimax/i2400m/fw.c b/drivers/net/wimax/i2400m/fw.c
deleted file mode 100644
index 6c9a41bff2e0..000000000000
--- a/drivers/net/wimax/i2400m/fw.c
+++ /dev/null
@@ -1,1653 +0,0 @@
-/*
- * Intel Wireless WiMAX Connection 2400m
- * Firmware uploader
- *
- *
- * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * Intel Corporation <linux-wimax@intel.com>
- * Yanir Lubetkin <yanirx.lubetkin@intel.com>
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *  - Initial implementation
- *
- *
- * THE PROCEDURE
- *
- * The 2400m and derived devices work in two modes: boot-mode or
- * normal mode. In boot mode we can execute only a handful of commands
- * targeted at uploading the firmware and launching it.
- *
- * The 2400m enters boot mode when it is first connected to the
- * system, when it crashes and when you ask it to reboot. There are
- * two submodes of the boot mode: signed and non-signed. Signed takes
- * firmwares signed with a certain private key, non-signed takes any
- * firmware. Normal hardware takes only signed firmware.
- *
- * On boot mode, in USB, we write to the device using the bulk out
- * endpoint and read from it in the notification endpoint.
- *
- * Upon entrance to boot mode, the device sends (preceded with a few
- * zero length packets (ZLPs) on the notification endpoint in USB) a
- * reboot barker (4 le32 words with the same value). We ack it by
- * sending the same barker to the device. The device acks with a
- * reboot ack barker (4 le32 words with value I2400M_ACK_BARKER) and
- * then is fully booted. At this point we can upload the firmware.
- *
- * Note that different iterations of the device and EEPROM
- * configurations will send different [re]boot barkers; these are
- * collected in i2400m_barker_db along with the firmware
- * characteristics they require.
- *
- * This process is accomplished by the i2400m_bootrom_init()
- * function. All the device interaction happens through the
- * i2400m_bm_cmd() [boot mode command]. Special return values will
- * indicate if the device did reset during the process.
- *
- * After this, we read the MAC address and then (if needed)
- * reinitialize the device. We need to read it ahead of time because
- * in the future, we might not upload the firmware until userspace
- * 'ifconfig up's the device.
- *
- * We can then upload the firmware file. The file is composed of a BCF
- * header (basic data, keys and signatures) and a list of write
- * commands and payloads. Optionally more BCF headers might follow the
- * main payload. We first upload the header [i2400m_dnload_init()] and
- * then pass the commands and payloads verbatim to the i2400m_bm_cmd()
- * function [i2400m_dnload_bcf()]. Then we tell the device to jump to
- * the new firmware [i2400m_dnload_finalize()].
- *
- * Once firmware is uploaded, we are good to go :)
- *
- * When we don't know in which mode we are, we first try by sending a
- * warm reset request that will take us to boot-mode. If we time out
- * waiting for a reboot barker, that means maybe we are already in
- * boot mode, so we send a reboot barker.
- *
- * COMMAND EXECUTION
- *
- * This code (and process) is single threaded; for executing commands,
- * we post a URB to the notification endpoint, post the command, wait
- * for data on the notification buffer. We don't need to worry about
- * others as we know we are the only ones in there.
- *
- * BACKEND IMPLEMENTATION
- *
- * This code is bus-generic; the bus-specific driver provides back end
- * implementations to send a boot mode command to the device and to
- * read an acknolwedgement from it (or an asynchronous notification)
- * from it.
- *
- * FIRMWARE LOADING
- *
- * Note that in some cases, we can't just load a firmware file (for
- * example, when resuming). For that, we might cache the firmware
- * file. Thus, when doing the bootstrap, if there is a cache firmware
- * file, it is used; if not, loading from disk is attempted.
- *
- * ROADMAP
- *
- * i2400m_barker_db_init              Called by i2400m_driver_init()
- *   i2400m_barker_db_add
- *
- * i2400m_barker_db_exit              Called by i2400m_driver_exit()
- *
- * i2400m_dev_bootstrap               Called by __i2400m_dev_start()
- *   request_firmware
- *   i2400m_fw_bootstrap
- *     i2400m_fw_check
- *       i2400m_fw_hdr_check
- *     i2400m_fw_dnload
- *   release_firmware
- *
- * i2400m_fw_dnload
- *   i2400m_bootrom_init
- *     i2400m_bm_cmd
- *     i2400m_reset
- *   i2400m_dnload_init
- *     i2400m_dnload_init_signed
- *     i2400m_dnload_init_nonsigned
- *       i2400m_download_chunk
- *         i2400m_bm_cmd
- *   i2400m_dnload_bcf
- *     i2400m_bm_cmd
- *   i2400m_dnload_finalize
- *     i2400m_bm_cmd
- *
- * i2400m_bm_cmd
- *   i2400m->bus_bm_cmd_send()
- *   i2400m->bus_bm_wait_for_ack
- *   __i2400m_bm_ack_verify
- *     i2400m_is_boot_barker
- *
- * i2400m_bm_cmd_prepare              Used by bus-drivers to prep
- *                                    commands before sending
- *
- * i2400m_pm_notifier                 Called on Power Management events
- *   i2400m_fw_cache
- *   i2400m_fw_uncache
- */
-#include <linux/firmware.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/usb.h>
-#include <linux/export.h>
-#include "i2400m.h"
-
-
-#define D_SUBMODULE fw
-#include "debug-levels.h"
-
-
-static const __le32 i2400m_ACK_BARKER[4] = {
-	cpu_to_le32(I2400M_ACK_BARKER),
-	cpu_to_le32(I2400M_ACK_BARKER),
-	cpu_to_le32(I2400M_ACK_BARKER),
-	cpu_to_le32(I2400M_ACK_BARKER)
-};
-
-
-/**
- * Prepare a boot-mode command for delivery
- *
- * @cmd: pointer to bootrom header to prepare
- *
- * Computes checksum if so needed. After calling this function, DO NOT
- * modify the command or header as the checksum won't work anymore.
- *
- * We do it from here because some times we cannot do it in the
- * original context the command was sent (it is a const), so when we
- * copy it to our staging buffer, we add the checksum there.
- */
-void i2400m_bm_cmd_prepare(struct i2400m_bootrom_header *cmd)
-{
-	if (i2400m_brh_get_use_checksum(cmd)) {
-		int i;
-		u32 checksum = 0;
-		const u32 *checksum_ptr = (void *) cmd->payload;
-		for (i = 0; i < cmd->data_size / 4; i++)
-			checksum += cpu_to_le32(*checksum_ptr++);
-		checksum += cmd->command + cmd->target_addr + cmd->data_size;
-		cmd->block_checksum = cpu_to_le32(checksum);
-	}
-}
-EXPORT_SYMBOL_GPL(i2400m_bm_cmd_prepare);
-
-
-/*
- * Database of known barkers.
- *
- * A barker is what the device sends indicating he is ready to be
- * bootloaded. Different versions of the device will send different
- * barkers. Depending on the barker, it might mean the device wants
- * some kind of firmware or the other.
- */
-static struct i2400m_barker_db {
-	__le32 data[4];
-} *i2400m_barker_db;
-static size_t i2400m_barker_db_used, i2400m_barker_db_size;
-
-
-static
-int i2400m_zrealloc_2x(void **ptr, size_t *_count, size_t el_size,
-		       gfp_t gfp_flags)
-{
-	size_t old_count = *_count,
-		new_count = old_count ? 2 * old_count : 2,
-		old_size = el_size * old_count,
-		new_size = el_size * new_count;
-	void *nptr = krealloc(*ptr, new_size, gfp_flags);
-	if (nptr) {
-		/* zero the other half or the whole thing if old_count
-		 * was zero */
-		if (old_size == 0)
-			memset(nptr, 0, new_size);
-		else
-			memset(nptr + old_size, 0, old_size);
-		*_count = new_count;
-		*ptr = nptr;
-		return 0;
-	} else
-		return -ENOMEM;
-}
-
-
-/*
- * Add a barker to the database
- *
- * This cannot used outside of this module and only at at module_init
- * time. This is to avoid the need to do locking.
- */
-static
-int i2400m_barker_db_add(u32 barker_id)
-{
-	int result;
-
-	struct i2400m_barker_db *barker;
-	if (i2400m_barker_db_used >= i2400m_barker_db_size) {
-		result = i2400m_zrealloc_2x(
-			(void **) &i2400m_barker_db, &i2400m_barker_db_size,
-			sizeof(i2400m_barker_db[0]), GFP_KERNEL);
-		if (result < 0)
-			return result;
-	}
-	barker = i2400m_barker_db + i2400m_barker_db_used++;
-	barker->data[0] = le32_to_cpu(barker_id);
-	barker->data[1] = le32_to_cpu(barker_id);
-	barker->data[2] = le32_to_cpu(barker_id);
-	barker->data[3] = le32_to_cpu(barker_id);
-	return 0;
-}
-
-
-void i2400m_barker_db_exit(void)
-{
-	kfree(i2400m_barker_db);
-	i2400m_barker_db = NULL;
-	i2400m_barker_db_size = 0;
-	i2400m_barker_db_used = 0;
-}
-
-
-/*
- * Helper function to add all the known stable barkers to the barker
- * database.
- */
-static
-int i2400m_barker_db_known_barkers(void)
-{
-	int result;
-
-	result = i2400m_barker_db_add(I2400M_NBOOT_BARKER);
-	if (result < 0)
-		goto error_add;
-	result = i2400m_barker_db_add(I2400M_SBOOT_BARKER);
-	if (result < 0)
-		goto error_add;
-	result = i2400m_barker_db_add(I2400M_SBOOT_BARKER_6050);
-	if (result < 0)
-		goto error_add;
-error_add:
-       return result;
-}
-
-
-/*
- * Initialize the barker database
- *
- * This can only be used from the module_init function for this
- * module; this is to avoid the need to do locking.
- *
- * @options: command line argument with extra barkers to
- *     recognize. This is a comma-separated list of 32-bit hex
- *     numbers. They are appended to the existing list. Setting 0
- *     cleans the existing list and starts a new one.
- */
-int i2400m_barker_db_init(const char *_options)
-{
-	int result;
-	char *options = NULL, *options_orig, *token;
-
-	i2400m_barker_db = NULL;
-	i2400m_barker_db_size = 0;
-	i2400m_barker_db_used = 0;
-
-	result = i2400m_barker_db_known_barkers();
-	if (result < 0)
-		goto error_add;
-	/* parse command line options from i2400m.barkers */
-	if (_options != NULL) {
-		unsigned barker;
-
-		options_orig = kstrdup(_options, GFP_KERNEL);
-		if (options_orig == NULL) {
-			result = -ENOMEM;
-			goto error_parse;
-		}
-		options = options_orig;
-
-		while ((token = strsep(&options, ",")) != NULL) {
-			if (*token == '\0')	/* eat joint commas */
-				continue;
-			if (sscanf(token, "%x", &barker) != 1
-			    || barker > 0xffffffff) {
-				printk(KERN_ERR "%s: can't recognize "
-				       "i2400m.barkers value '%s' as "
-				       "a 32-bit number\n",
-				       __func__, token);
-				result = -EINVAL;
-				goto error_parse;
-			}
-			if (barker == 0) {
-				/* clean list and start new */
-				i2400m_barker_db_exit();
-				continue;
-			}
-			result = i2400m_barker_db_add(barker);
-			if (result < 0)
-				goto error_parse_add;
-		}
-		kfree(options_orig);
-	}
-	return 0;
-
-error_parse_add:
-error_parse:
-	kfree(options_orig);
-error_add:
-	kfree(i2400m_barker_db);
-	return result;
-}
-
-
-/*
- * Recognize a boot barker
- *
- * @buf: buffer where the boot barker.
- * @buf_size: size of the buffer (has to be 16 bytes). It is passed
- *     here so the function can check it for the caller.
- *
- * Note that as a side effect, upon identifying the obtained boot
- * barker, this function will set i2400m->barker to point to the right
- * barker database entry. Subsequent calls to the function will result
- * in verifying that the same type of boot barker is returned when the
- * device [re]boots (as long as the same device instance is used).
- *
- * Return: 0 if @buf matches a known boot barker. -ENOENT if the
- *     buffer in @buf doesn't match any boot barker in the database or
- *     -EILSEQ if the buffer doesn't have the right size.
- */
-int i2400m_is_boot_barker(struct i2400m *i2400m,
-			  const void *buf, size_t buf_size)
-{
-	int result;
-	struct device *dev = i2400m_dev(i2400m);
-	struct i2400m_barker_db *barker;
-	int i;
-
-	result = -ENOENT;
-	if (buf_size != sizeof(i2400m_barker_db[i].data))
-		return result;
-
-	/* Short circuit if we have already discovered the barker
-	 * associated with the device. */
-	if (i2400m->barker &&
-	    !memcmp(buf, i2400m->barker, sizeof(i2400m->barker->data)))
-		return 0;
-
-	for (i = 0; i < i2400m_barker_db_used; i++) {
-		barker = &i2400m_barker_db[i];
-		BUILD_BUG_ON(sizeof(barker->data) != 16);
-		if (memcmp(buf, barker->data, sizeof(barker->data)))
-			continue;
-
-		if (i2400m->barker == NULL) {
-			i2400m->barker = barker;
-			d_printf(1, dev, "boot barker set to #%u/%08x\n",
-				 i, le32_to_cpu(barker->data[0]));
-			if (barker->data[0] == le32_to_cpu(I2400M_NBOOT_BARKER))
-				i2400m->sboot = 0;
-			else
-				i2400m->sboot = 1;
-		} else if (i2400m->barker != barker) {
-			dev_err(dev, "HW inconsistency: device "
-				"reports a different boot barker "
-				"than set (from %08x to %08x)\n",
-				le32_to_cpu(i2400m->barker->data[0]),
-				le32_to_cpu(barker->data[0]));
-			result = -EIO;
-		} else
-			d_printf(2, dev, "boot barker confirmed #%u/%08x\n",
-				 i, le32_to_cpu(barker->data[0]));
-		result = 0;
-		break;
-	}
-	return result;
-}
-EXPORT_SYMBOL_GPL(i2400m_is_boot_barker);
-
-
-/*
- * Verify the ack data received
- *
- * Given a reply to a boot mode command, chew it and verify everything
- * is ok.
- *
- * @opcode: opcode which generated this ack. For error messages.
- * @ack: pointer to ack data we received
- * @ack_size: size of that data buffer
- * @flags: I2400M_BM_CMD_* flags we called the command with.
- *
- * Way too long function -- maybe it should be further split
- */
-static
-ssize_t __i2400m_bm_ack_verify(struct i2400m *i2400m, int opcode,
-			       struct i2400m_bootrom_header *ack,
-			       size_t ack_size, int flags)
-{
-	ssize_t result = -ENOMEM;
-	struct device *dev = i2400m_dev(i2400m);
-
-	d_fnstart(8, dev, "(i2400m %p opcode %d ack %p size %zu)\n",
-		  i2400m, opcode, ack, ack_size);
-	if (ack_size < sizeof(*ack)) {
-		result = -EIO;
-		dev_err(dev, "boot-mode cmd %d: HW BUG? notification didn't "
-			"return enough data (%zu bytes vs %zu expected)\n",
-			opcode, ack_size, sizeof(*ack));
-		goto error_ack_short;
-	}
-	result = i2400m_is_boot_barker(i2400m, ack, ack_size);
-	if (result >= 0) {
-		result = -ERESTARTSYS;
-		d_printf(6, dev, "boot-mode cmd %d: HW boot barker\n", opcode);
-		goto error_reboot;
-	}
-	if (ack_size == sizeof(i2400m_ACK_BARKER)
-		 && memcmp(ack, i2400m_ACK_BARKER, sizeof(*ack)) == 0) {
-		result = -EISCONN;
-		d_printf(3, dev, "boot-mode cmd %d: HW reboot ack barker\n",
-			 opcode);
-		goto error_reboot_ack;
-	}
-	result = 0;
-	if (flags & I2400M_BM_CMD_RAW)
-		goto out_raw;
-	ack->data_size = le32_to_cpu(ack->data_size);
-	ack->target_addr = le32_to_cpu(ack->target_addr);
-	ack->block_checksum = le32_to_cpu(ack->block_checksum);
-	d_printf(5, dev, "boot-mode cmd %d: notification for opcode %u "
-		 "response %u csum %u rr %u da %u\n",
-		 opcode, i2400m_brh_get_opcode(ack),
-		 i2400m_brh_get_response(ack),
-		 i2400m_brh_get_use_checksum(ack),
-		 i2400m_brh_get_response_required(ack),
-		 i2400m_brh_get_direct_access(ack));
-	result = -EIO;
-	if (i2400m_brh_get_signature(ack) != 0xcbbc) {
-		dev_err(dev, "boot-mode cmd %d: HW BUG? wrong signature "
-			"0x%04x\n", opcode, i2400m_brh_get_signature(ack));
-		goto error_ack_signature;
-	}
-	if (opcode != -1 && opcode != i2400m_brh_get_opcode(ack)) {
-		dev_err(dev, "boot-mode cmd %d: HW BUG? "
-			"received response for opcode %u, expected %u\n",
-			opcode, i2400m_brh_get_opcode(ack), opcode);
-		goto error_ack_opcode;
-	}
-	if (i2400m_brh_get_response(ack) != 0) {	/* failed? */
-		dev_err(dev, "boot-mode cmd %d: error; hw response %u\n",
-			opcode, i2400m_brh_get_response(ack));
-		goto error_ack_failed;
-	}
-	if (ack_size < ack->data_size + sizeof(*ack)) {
-		dev_err(dev, "boot-mode cmd %d: SW BUG "
-			"driver provided only %zu bytes for %zu bytes "
-			"of data\n", opcode, ack_size,
-			(size_t) le32_to_cpu(ack->data_size) + sizeof(*ack));
-		goto error_ack_short_buffer;
-	}
-	result = ack_size;
-	/* Don't you love this stack of empty targets? Well, I don't
-	 * either, but it helps track exactly who comes in here and
-	 * why :) */
-error_ack_short_buffer:
-error_ack_failed:
-error_ack_opcode:
-error_ack_signature:
-out_raw:
-error_reboot_ack:
-error_reboot:
-error_ack_short:
-	d_fnend(8, dev, "(i2400m %p opcode %d ack %p size %zu) = %d\n",
-		i2400m, opcode, ack, ack_size, (int) result);
-	return result;
-}
-
-
-/**
- * i2400m_bm_cmd - Execute a boot mode command
- *
- * @cmd: buffer containing the command data (pointing at the header).
- *     This data can be ANYWHERE (for USB, we will copy it to an
- *     specific buffer). Make sure everything is in proper little
- *     endian.
- *
- *     A raw buffer can be also sent, just cast it and set flags to
- *     I2400M_BM_CMD_RAW.
- *
- *     This function will generate a checksum for you if the
- *     checksum bit in the command is set (unless I2400M_BM_CMD_RAW
- *     is set).
- *
- *     You can use the i2400m->bm_cmd_buf to stage your commands and
- *     send them.
- *
- *     If NULL, no command is sent (we just wait for an ack).
- *
- * @cmd_size: size of the command. Will be auto padded to the
- *     bus-specific drivers padding requirements.
- *
- * @ack: buffer where to place the acknowledgement. If it is a regular
- *     command response, all fields will be returned with the right,
- *     native endianess.
- *
- *     You *cannot* use i2400m->bm_ack_buf for this buffer.
- *
- * @ack_size: size of @ack, 16 aligned; you need to provide at least
- *     sizeof(*ack) bytes and then enough to contain the return data
- *     from the command
- *
- * @flags: see I2400M_BM_CMD_* above.
- *
- * @returns: bytes received by the notification; if < 0, an errno code
- *     denoting an error or:
- *
- *     -ERESTARTSYS  The device has rebooted
- *
- * Executes a boot-mode command and waits for a response, doing basic
- * validation on it; if a zero length response is received, it retries
- * waiting for a response until a non-zero one is received (timing out
- * after %I2400M_BOOT_RETRIES retries).
- */
-static
-ssize_t i2400m_bm_cmd(struct i2400m *i2400m,
-		      const struct i2400m_bootrom_header *cmd, size_t cmd_size,
-		      struct i2400m_bootrom_header *ack, size_t ack_size,
-		      int flags)
-{
-	ssize_t result = -ENOMEM, rx_bytes;
-	struct device *dev = i2400m_dev(i2400m);
-	int opcode = cmd == NULL ? -1 : i2400m_brh_get_opcode(cmd);
-
-	d_fnstart(6, dev, "(i2400m %p cmd %p size %zu ack %p size %zu)\n",
-		  i2400m, cmd, cmd_size, ack, ack_size);
-	BUG_ON(ack_size < sizeof(*ack));
-	BUG_ON(i2400m->boot_mode == 0);
-
-	if (cmd != NULL) {		/* send the command */
-		result = i2400m->bus_bm_cmd_send(i2400m, cmd, cmd_size, flags);
-		if (result < 0)
-			goto error_cmd_send;
-		if ((flags & I2400M_BM_CMD_RAW) == 0)
-			d_printf(5, dev,
-				 "boot-mode cmd %d csum %u rr %u da %u: "
-				 "addr 0x%04x size %u block csum 0x%04x\n",
-				 opcode, i2400m_brh_get_use_checksum(cmd),
-				 i2400m_brh_get_response_required(cmd),
-				 i2400m_brh_get_direct_access(cmd),
-				 cmd->target_addr, cmd->data_size,
-				 cmd->block_checksum);
-	}
-	result = i2400m->bus_bm_wait_for_ack(i2400m, ack, ack_size);
-	if (result < 0) {
-		dev_err(dev, "boot-mode cmd %d: error waiting for an ack: %d\n",
-			opcode, (int) result);	/* bah, %zd doesn't work */
-		goto error_wait_for_ack;
-	}
-	rx_bytes = result;
-	/* verify the ack and read more if necessary [result is the
-	 * final amount of bytes we get in the ack]  */
-	result = __i2400m_bm_ack_verify(i2400m, opcode, ack, ack_size, flags);
-	if (result < 0)
-		goto error_bad_ack;
-	/* Don't you love this stack of empty targets? Well, I don't
-	 * either, but it helps track exactly who comes in here and
-	 * why :) */
-	result = rx_bytes;
-error_bad_ack:
-error_wait_for_ack:
-error_cmd_send:
-	d_fnend(6, dev, "(i2400m %p cmd %p size %zu ack %p size %zu) = %d\n",
-		i2400m, cmd, cmd_size, ack, ack_size, (int) result);
-	return result;
-}
-
-
-/**
- * i2400m_download_chunk - write a single chunk of data to the device's memory
- *
- * @i2400m: device descriptor
- * @buf: the buffer to write
- * @buf_len: length of the buffer to write
- * @addr: address in the device memory space
- * @direct: bootrom write mode
- * @do_csum: should a checksum validation be performed
- */
-static int i2400m_download_chunk(struct i2400m *i2400m, const void *chunk,
-				 size_t __chunk_len, unsigned long addr,
-				 unsigned int direct, unsigned int do_csum)
-{
-	int ret;
-	size_t chunk_len = ALIGN(__chunk_len, I2400M_PL_ALIGN);
-	struct device *dev = i2400m_dev(i2400m);
-	struct {
-		struct i2400m_bootrom_header cmd;
-		u8 cmd_payload[];
-	} __packed *buf;
-	struct i2400m_bootrom_header ack;
-
-	d_fnstart(5, dev, "(i2400m %p chunk %p __chunk_len %zu addr 0x%08lx "
-		  "direct %u do_csum %u)\n", i2400m, chunk, __chunk_len,
-		  addr, direct, do_csum);
-	buf = i2400m->bm_cmd_buf;
-	memcpy(buf->cmd_payload, chunk, __chunk_len);
-	memset(buf->cmd_payload + __chunk_len, 0xad, chunk_len - __chunk_len);
-
-	buf->cmd.command = i2400m_brh_command(I2400M_BRH_WRITE,
-					      __chunk_len & 0x3 ? 0 : do_csum,
-					      __chunk_len & 0xf ? 0 : direct);
-	buf->cmd.target_addr = cpu_to_le32(addr);
-	buf->cmd.data_size = cpu_to_le32(__chunk_len);
-	ret = i2400m_bm_cmd(i2400m, &buf->cmd, sizeof(buf->cmd) + chunk_len,
-			    &ack, sizeof(ack), 0);
-	if (ret >= 0)
-		ret = 0;
-	d_fnend(5, dev, "(i2400m %p chunk %p __chunk_len %zu addr 0x%08lx "
-		"direct %u do_csum %u) = %d\n", i2400m, chunk, __chunk_len,
-		addr, direct, do_csum, ret);
-	return ret;
-}
-
-
-/*
- * Download a BCF file's sections to the device
- *
- * @i2400m: device descriptor
- * @bcf: pointer to firmware data (first header followed by the
- *     payloads). Assumed verified and consistent.
- * @bcf_len: length (in bytes) of the @bcf buffer.
- *
- * Returns: < 0 errno code on error or the offset to the jump instruction.
- *
- * Given a BCF file, downloads each section (a command and a payload)
- * to the device's address space. Actually, it just executes each
- * command i the BCF file.
- *
- * The section size has to be aligned to 4 bytes AND the padding has
- * to be taken from the firmware file, as the signature takes it into
- * account.
- */
-static
-ssize_t i2400m_dnload_bcf(struct i2400m *i2400m,
-			  const struct i2400m_bcf_hdr *bcf, size_t bcf_len)
-{
-	ssize_t ret;
-	struct device *dev = i2400m_dev(i2400m);
-	size_t offset,		/* iterator offset */
-		data_size,	/* Size of the data payload */
-		section_size,	/* Size of the whole section (cmd + payload) */
-		section = 1;
-	const struct i2400m_bootrom_header *bh;
-	struct i2400m_bootrom_header ack;
-
-	d_fnstart(3, dev, "(i2400m %p bcf %p bcf_len %zu)\n",
-		  i2400m, bcf, bcf_len);
-	/* Iterate over the command blocks in the BCF file that start
-	 * after the header */
-	offset = le32_to_cpu(bcf->header_len) * sizeof(u32);
-	while (1) {	/* start sending the file */
-		bh = (void *) bcf + offset;
-		data_size = le32_to_cpu(bh->data_size);
-		section_size = ALIGN(sizeof(*bh) + data_size, 4);
-		d_printf(7, dev,
-			 "downloading section #%zu (@%zu %zu B) to 0x%08x\n",
-			 section, offset, sizeof(*bh) + data_size,
-			 le32_to_cpu(bh->target_addr));
-		/*
-		 * We look for JUMP cmd from the bootmode header,
-		 * either I2400M_BRH_SIGNED_JUMP for secure boot
-		 * or I2400M_BRH_JUMP for unsecure boot, the last chunk
-		 * should be the bootmode header with JUMP cmd.
-		 */
-		if (i2400m_brh_get_opcode(bh) == I2400M_BRH_SIGNED_JUMP ||
-			i2400m_brh_get_opcode(bh) == I2400M_BRH_JUMP) {
-			d_printf(5, dev,  "jump found @%zu\n", offset);
-			break;
-		}
-		if (offset + section_size > bcf_len) {
-			dev_err(dev, "fw %s: bad section #%zu, "
-				"end (@%zu) beyond EOF (@%zu)\n",
-				i2400m->fw_name, section,
-				offset + section_size,  bcf_len);
-			ret = -EINVAL;
-			goto error_section_beyond_eof;
-		}
-		__i2400m_msleep(20);
-		ret = i2400m_bm_cmd(i2400m, bh, section_size,
-				    &ack, sizeof(ack), I2400M_BM_CMD_RAW);
-		if (ret < 0) {
-			dev_err(dev, "fw %s: section #%zu (@%zu %zu B) "
-				"failed %d\n", i2400m->fw_name, section,
-				offset, sizeof(*bh) + data_size, (int) ret);
-			goto error_send;
-		}
-		offset += section_size;
-		section++;
-	}
-	ret = offset;
-error_section_beyond_eof:
-error_send:
-	d_fnend(3, dev, "(i2400m %p bcf %p bcf_len %zu) = %d\n",
-		i2400m, bcf, bcf_len, (int) ret);
-	return ret;
-}
-
-
-/*
- * Indicate if the device emitted a reboot barker that indicates
- * "signed boot"
- */
-static
-unsigned i2400m_boot_is_signed(struct i2400m *i2400m)
-{
-	return likely(i2400m->sboot);
-}
-
-
-/*
- * Do the final steps of uploading firmware
- *
- * @bcf_hdr: BCF header we are actually using
- * @bcf: pointer to the firmware image (which matches the first header
- *     that is followed by the actual payloads).
- * @offset: [byte] offset into @bcf for the command we need to send.
- *
- * Depending on the boot mode (signed vs non-signed), different
- * actions need to be taken.
- */
-static
-int i2400m_dnload_finalize(struct i2400m *i2400m,
-			   const struct i2400m_bcf_hdr *bcf_hdr,
-			   const struct i2400m_bcf_hdr *bcf, size_t offset)
-{
-	int ret = 0;
-	struct device *dev = i2400m_dev(i2400m);
-	struct i2400m_bootrom_header *cmd, ack;
-	struct {
-		struct i2400m_bootrom_header cmd;
-		u8 cmd_pl[0];
-	} __packed *cmd_buf;
-	size_t signature_block_offset, signature_block_size;
-
-	d_fnstart(3, dev, "offset %zu\n", offset);
-	cmd = (void *) bcf + offset;
-	if (i2400m_boot_is_signed(i2400m) == 0) {
-		struct i2400m_bootrom_header jump_ack;
-		d_printf(1, dev, "unsecure boot, jumping to 0x%08x\n",
-			le32_to_cpu(cmd->target_addr));
-		cmd_buf = i2400m->bm_cmd_buf;
-		memcpy(&cmd_buf->cmd, cmd, sizeof(*cmd));
-		cmd = &cmd_buf->cmd;
-		/* now cmd points to the actual bootrom_header in cmd_buf */
-		i2400m_brh_set_opcode(cmd, I2400M_BRH_JUMP);
-		cmd->data_size = 0;
-		ret = i2400m_bm_cmd(i2400m, cmd, sizeof(*cmd),
-				    &jump_ack, sizeof(jump_ack), 0);
-	} else {
-		d_printf(1, dev, "secure boot, jumping to 0x%08x\n",
-			 le32_to_cpu(cmd->target_addr));
-		cmd_buf = i2400m->bm_cmd_buf;
-		memcpy(&cmd_buf->cmd, cmd, sizeof(*cmd));
-		signature_block_offset =
-			sizeof(*bcf_hdr)
-			+ le32_to_cpu(bcf_hdr->key_size) * sizeof(u32)
-			+ le32_to_cpu(bcf_hdr->exponent_size) * sizeof(u32);
-		signature_block_size =
-			le32_to_cpu(bcf_hdr->modulus_size) * sizeof(u32);
-		memcpy(cmd_buf->cmd_pl,
-		       (void *) bcf_hdr + signature_block_offset,
-		       signature_block_size);
-		ret = i2400m_bm_cmd(i2400m, &cmd_buf->cmd,
-				    sizeof(cmd_buf->cmd) + signature_block_size,
-				    &ack, sizeof(ack), I2400M_BM_CMD_RAW);
-	}
-	d_fnend(3, dev, "returning %d\n", ret);
-	return ret;
-}
-
-
-/**
- * i2400m_bootrom_init - Reboots a powered device into boot mode
- *
- * @i2400m: device descriptor
- * @flags:
- *      I2400M_BRI_SOFT: a reboot barker has been seen
- *          already, so don't wait for it.
- *
- *      I2400M_BRI_NO_REBOOT: Don't send a reboot command, but wait
- *          for a reboot barker notification. This is a one shot; if
- *          the state machine needs to send a reboot command it will.
- *
- * Returns:
- *
- *     < 0 errno code on error, 0 if ok.
- *
- * Description:
- *
- * Tries hard enough to put the device in boot-mode. There are two
- * main phases to this:
- *
- * a. (1) send a reboot command and (2) get a reboot barker
- *
- * b. (1) echo/ack the reboot sending the reboot barker back and (2)
- *        getting an ack barker in return
- *
- * We want to skip (a) in some cases [soft]. The state machine is
- * horrible, but it is basically: on each phase, send what has to be
- * sent (if any), wait for the answer and act on the answer. We might
- * have to backtrack and retry, so we keep a max tries counter for
- * that.
- *
- * It sucks because we don't know ahead of time which is going to be
- * the reboot barker (the device might send different ones depending
- * on its EEPROM config) and once the device reboots and waits for the
- * echo/ack reboot barker being sent back, it doesn't understand
- * anything else. So we can be left at the point where we don't know
- * what to send to it -- cold reset and bus reset seem to have little
- * effect. So the function iterates (in this case) through all the
- * known barkers and tries them all until an ACK is
- * received. Otherwise, it gives up.
- *
- * If we get a timeout after sending a warm reset, we do it again.
- */
-int i2400m_bootrom_init(struct i2400m *i2400m, enum i2400m_bri flags)
-{
-	int result;
-	struct device *dev = i2400m_dev(i2400m);
-	struct i2400m_bootrom_header *cmd;
-	struct i2400m_bootrom_header ack;
-	int count = i2400m->bus_bm_retries;
-	int ack_timeout_cnt = 1;
-	unsigned i;
-
-	BUILD_BUG_ON(sizeof(*cmd) != sizeof(i2400m_barker_db[0].data));
-	BUILD_BUG_ON(sizeof(ack) != sizeof(i2400m_ACK_BARKER));
-
-	d_fnstart(4, dev, "(i2400m %p flags 0x%08x)\n", i2400m, flags);
-	result = -ENOMEM;
-	cmd = i2400m->bm_cmd_buf;
-	if (flags & I2400M_BRI_SOFT)
-		goto do_reboot_ack;
-do_reboot:
-	ack_timeout_cnt = 1;
-	if (--count < 0)
-		goto error_timeout;
-	d_printf(4, dev, "device reboot: reboot command [%d # left]\n",
-		 count);
-	if ((flags & I2400M_BRI_NO_REBOOT) == 0)
-		i2400m_reset(i2400m, I2400M_RT_WARM);
-	result = i2400m_bm_cmd(i2400m, NULL, 0, &ack, sizeof(ack),
-			       I2400M_BM_CMD_RAW);
-	flags &= ~I2400M_BRI_NO_REBOOT;
-	switch (result) {
-	case -ERESTARTSYS:
-		/*
-		 * at this point, i2400m_bm_cmd(), through
-		 * __i2400m_bm_ack_process(), has updated
-		 * i2400m->barker and we are good to go.
-		 */
-		d_printf(4, dev, "device reboot: got reboot barker\n");
-		break;
-	case -EISCONN:	/* we don't know how it got here...but we follow it */
-		d_printf(4, dev, "device reboot: got ack barker - whatever\n");
-		goto do_reboot;
-	case -ETIMEDOUT:
-		/*
-		 * Device has timed out, we might be in boot mode
-		 * already and expecting an ack; if we don't know what
-		 * the barker is, we just send them all. Cold reset
-		 * and bus reset don't work. Beats me.
-		 */
-		if (i2400m->barker != NULL) {
-			dev_err(dev, "device boot: reboot barker timed out, "
-				"trying (set) %08x echo/ack\n",
-				le32_to_cpu(i2400m->barker->data[0]));
-			goto do_reboot_ack;
-		}
-		for (i = 0; i < i2400m_barker_db_used; i++) {
-			struct i2400m_barker_db *barker = &i2400m_barker_db[i];
-			memcpy(cmd, barker->data, sizeof(barker->data));
-			result = i2400m_bm_cmd(i2400m, cmd, sizeof(*cmd),
-					       &ack, sizeof(ack),
-					       I2400M_BM_CMD_RAW);
-			if (result == -EISCONN) {
-				dev_warn(dev, "device boot: got ack barker "
-					 "after sending echo/ack barker "
-					 "#%d/%08x; rebooting j.i.c.\n",
-					 i, le32_to_cpu(barker->data[0]));
-				flags &= ~I2400M_BRI_NO_REBOOT;
-				goto do_reboot;
-			}
-		}
-		dev_err(dev, "device boot: tried all the echo/acks, could "
-			"not get device to respond; giving up");
-		result = -ESHUTDOWN;
-	case -EPROTO:
-	case -ESHUTDOWN:	/* dev is gone */
-	case -EINTR:		/* user cancelled */
-		goto error_dev_gone;
-	default:
-		dev_err(dev, "device reboot: error %d while waiting "
-			"for reboot barker - rebooting\n", result);
-		d_dump(1, dev, &ack, result);
-		goto do_reboot;
-	}
-	/* At this point we ack back with 4 REBOOT barkers and expect
-	 * 4 ACK barkers. This is ugly, as we send a raw command --
-	 * hence the cast. _bm_cmd() will catch the reboot ack
-	 * notification and report it as -EISCONN. */
-do_reboot_ack:
-	d_printf(4, dev, "device reboot ack: sending ack [%d # left]\n", count);
-	memcpy(cmd, i2400m->barker->data, sizeof(i2400m->barker->data));
-	result = i2400m_bm_cmd(i2400m, cmd, sizeof(*cmd),
-			       &ack, sizeof(ack), I2400M_BM_CMD_RAW);
-	switch (result) {
-	case -ERESTARTSYS:
-		d_printf(4, dev, "reboot ack: got reboot barker - retrying\n");
-		if (--count < 0)
-			goto error_timeout;
-		goto do_reboot_ack;
-	case -EISCONN:
-		d_printf(4, dev, "reboot ack: got ack barker - good\n");
-		break;
-	case -ETIMEDOUT:	/* no response, maybe it is the other type? */
-		if (ack_timeout_cnt-- < 0) {
-			d_printf(4, dev, "reboot ack timedout: retrying\n");
-			goto do_reboot_ack;
-		} else {
-			dev_err(dev, "reboot ack timedout too long: "
-				"trying reboot\n");
-			goto do_reboot;
-		}
-		break;
-	case -EPROTO:
-	case -ESHUTDOWN:	/* dev is gone */
-		goto error_dev_gone;
-	default:
-		dev_err(dev, "device reboot ack: error %d while waiting for "
-			"reboot ack barker - rebooting\n", result);
-		goto do_reboot;
-	}
-	d_printf(2, dev, "device reboot ack: got ack barker - boot done\n");
-	result = 0;
-exit_timeout:
-error_dev_gone:
-	d_fnend(4, dev, "(i2400m %p flags 0x%08x) = %d\n",
-		i2400m, flags, result);
-	return result;
-
-error_timeout:
-	dev_err(dev, "Timed out waiting for reboot ack\n");
-	result = -ETIMEDOUT;
-	goto exit_timeout;
-}
-
-
-/*
- * Read the MAC addr
- *
- * The position this function reads is fixed in device memory and
- * always available, even without firmware.
- *
- * Note we specify we want to read only six bytes, but provide space
- * for 16, as we always get it rounded up.
- */
-int i2400m_read_mac_addr(struct i2400m *i2400m)
-{
-	int result;
-	struct device *dev = i2400m_dev(i2400m);
-	struct net_device *net_dev = i2400m->wimax_dev.net_dev;
-	struct i2400m_bootrom_header *cmd;
-	struct {
-		struct i2400m_bootrom_header ack;
-		u8 ack_pl[16];
-	} __packed ack_buf;
-
-	d_fnstart(5, dev, "(i2400m %p)\n", i2400m);
-	cmd = i2400m->bm_cmd_buf;
-	cmd->command = i2400m_brh_command(I2400M_BRH_READ, 0, 1);
-	cmd->target_addr = cpu_to_le32(0x00203fe8);
-	cmd->data_size = cpu_to_le32(6);
-	result = i2400m_bm_cmd(i2400m, cmd, sizeof(*cmd),
-			       &ack_buf.ack, sizeof(ack_buf), 0);
-	if (result < 0) {
-		dev_err(dev, "BM: read mac addr failed: %d\n", result);
-		goto error_read_mac;
-	}
-	d_printf(2, dev, "mac addr is %pM\n", ack_buf.ack_pl);
-	if (i2400m->bus_bm_mac_addr_impaired == 1) {
-		ack_buf.ack_pl[0] = 0x00;
-		ack_buf.ack_pl[1] = 0x16;
-		ack_buf.ack_pl[2] = 0xd3;
-		get_random_bytes(&ack_buf.ack_pl[3], 3);
-		dev_err(dev, "BM is MAC addr impaired, faking MAC addr to "
-			"mac addr is %pM\n", ack_buf.ack_pl);
-		result = 0;
-	}
-	net_dev->addr_len = ETH_ALEN;
-	memcpy(net_dev->dev_addr, ack_buf.ack_pl, ETH_ALEN);
-error_read_mac:
-	d_fnend(5, dev, "(i2400m %p) = %d\n", i2400m, result);
-	return result;
-}
-
-
-/*
- * Initialize a non signed boot
- *
- * This implies sending some magic values to the device's memory. Note
- * we convert the values to little endian in the same array
- * declaration.
- */
-static
-int i2400m_dnload_init_nonsigned(struct i2400m *i2400m)
-{
-	unsigned i = 0;
-	int ret = 0;
-	struct device *dev = i2400m_dev(i2400m);
-	d_fnstart(5, dev, "(i2400m %p)\n", i2400m);
-	if (i2400m->bus_bm_pokes_table) {
-		while (i2400m->bus_bm_pokes_table[i].address) {
-			ret = i2400m_download_chunk(
-				i2400m,
-				&i2400m->bus_bm_pokes_table[i].data,
-				sizeof(i2400m->bus_bm_pokes_table[i].data),
-				i2400m->bus_bm_pokes_table[i].address, 1, 1);
-			if (ret < 0)
-				break;
-			i++;
-		}
-	}
-	d_fnend(5, dev, "(i2400m %p) = %d\n", i2400m, ret);
-	return ret;
-}
-
-
-/*
- * Initialize the signed boot process
- *
- * @i2400m: device descriptor
- *
- * @bcf_hdr: pointer to the firmware header; assumes it is fully in
- *     memory (it has gone through basic validation).
- *
- * Returns: 0 if ok, < 0 errno code on error, -ERESTARTSYS if the hw
- *     rebooted.
- *
- * This writes the firmware BCF header to the device using the
- * HASH_PAYLOAD_ONLY command.
- */
-static
-int i2400m_dnload_init_signed(struct i2400m *i2400m,
-			      const struct i2400m_bcf_hdr *bcf_hdr)
-{
-	int ret;
-	struct device *dev = i2400m_dev(i2400m);
-	struct {
-		struct i2400m_bootrom_header cmd;
-		struct i2400m_bcf_hdr cmd_pl;
-	} __packed *cmd_buf;
-	struct i2400m_bootrom_header ack;
-
-	d_fnstart(5, dev, "(i2400m %p bcf_hdr %p)\n", i2400m, bcf_hdr);
-	cmd_buf = i2400m->bm_cmd_buf;
-	cmd_buf->cmd.command =
-		i2400m_brh_command(I2400M_BRH_HASH_PAYLOAD_ONLY, 0, 0);
-	cmd_buf->cmd.target_addr = 0;
-	cmd_buf->cmd.data_size = cpu_to_le32(sizeof(cmd_buf->cmd_pl));
-	memcpy(&cmd_buf->cmd_pl, bcf_hdr, sizeof(*bcf_hdr));
-	ret = i2400m_bm_cmd(i2400m, &cmd_buf->cmd, sizeof(*cmd_buf),
-			    &ack, sizeof(ack), 0);
-	if (ret >= 0)
-		ret = 0;
-	d_fnend(5, dev, "(i2400m %p bcf_hdr %p) = %d\n", i2400m, bcf_hdr, ret);
-	return ret;
-}
-
-
-/*
- * Initialize the firmware download at the device size
- *
- * Multiplex to the one that matters based on the device's mode
- * (signed or non-signed).
- */
-static
-int i2400m_dnload_init(struct i2400m *i2400m,
-		       const struct i2400m_bcf_hdr *bcf_hdr)
-{
-	int result;
-	struct device *dev = i2400m_dev(i2400m);
-
-	if (i2400m_boot_is_signed(i2400m)) {
-		d_printf(1, dev, "signed boot\n");
-		result = i2400m_dnload_init_signed(i2400m, bcf_hdr);
-		if (result == -ERESTARTSYS)
-			return result;
-		if (result < 0)
-			dev_err(dev, "firmware %s: signed boot download "
-				"initialization failed: %d\n",
-				i2400m->fw_name, result);
-	} else {
-		/* non-signed boot process without pokes */
-		d_printf(1, dev, "non-signed boot\n");
-		result = i2400m_dnload_init_nonsigned(i2400m);
-		if (result == -ERESTARTSYS)
-			return result;
-		if (result < 0)
-			dev_err(dev, "firmware %s: non-signed download "
-				"initialization failed: %d\n",
-				i2400m->fw_name, result);
-	}
-	return result;
-}
-
-
-/*
- * Run consistency tests on the firmware file and load up headers
- *
- * Check for the firmware being made for the i2400m device,
- * etc...These checks are mostly informative, as the device will make
- * them too; but the driver's response is more informative on what
- * went wrong.
- *
- * This will also look at all the headers present on the firmware
- * file, and update i2400m->fw_bcf_hdr to point to them.
- */
-static
-int i2400m_fw_hdr_check(struct i2400m *i2400m,
-			const struct i2400m_bcf_hdr *bcf_hdr,
-			size_t index, size_t offset)
-{
-	struct device *dev = i2400m_dev(i2400m);
-
-	unsigned module_type, header_len, major_version, minor_version,
-		module_id, module_vendor, date, size;
-
-	module_type = le32_to_cpu(bcf_hdr->module_type);
-	header_len = sizeof(u32) * le32_to_cpu(bcf_hdr->header_len);
-	major_version = (le32_to_cpu(bcf_hdr->header_version) & 0xffff0000)
-		>> 16;
-	minor_version = le32_to_cpu(bcf_hdr->header_version) & 0x0000ffff;
-	module_id = le32_to_cpu(bcf_hdr->module_id);
-	module_vendor = le32_to_cpu(bcf_hdr->module_vendor);
-	date = le32_to_cpu(bcf_hdr->date);
-	size = sizeof(u32) * le32_to_cpu(bcf_hdr->size);
-
-	d_printf(1, dev, "firmware %s #%zd@%08zx: BCF header "
-		 "type:vendor:id 0x%x:%x:%x v%u.%u (%u/%u B) built %08x\n",
-		 i2400m->fw_name, index, offset,
-		 module_type, module_vendor, module_id,
-		 major_version, minor_version, header_len, size, date);
-
-	/* Hard errors */
-	if (major_version != 1) {
-		dev_err(dev, "firmware %s #%zd@%08zx: major header version "
-			"v%u.%u not supported\n",
-			i2400m->fw_name, index, offset,
-			major_version, minor_version);
-		return -EBADF;
-	}
-
-	if (module_type != 6) {		/* built for the right hardware? */
-		dev_err(dev, "firmware %s #%zd@%08zx: unexpected module "
-			"type 0x%x; aborting\n",
-			i2400m->fw_name, index, offset,
-			module_type);
-		return -EBADF;
-	}
-
-	if (module_vendor != 0x8086) {
-		dev_err(dev, "firmware %s #%zd@%08zx: unexpected module "
-			"vendor 0x%x; aborting\n",
-			i2400m->fw_name, index, offset, module_vendor);
-		return -EBADF;
-	}
-
-	if (date < 0x20080300)
-		dev_warn(dev, "firmware %s #%zd@%08zx: build date %08x "
-			 "too old; unsupported\n",
-			 i2400m->fw_name, index, offset, date);
-	return 0;
-}
-
-
-/*
- * Run consistency tests on the firmware file and load up headers
- *
- * Check for the firmware being made for the i2400m device,
- * etc...These checks are mostly informative, as the device will make
- * them too; but the driver's response is more informative on what
- * went wrong.
- *
- * This will also look at all the headers present on the firmware
- * file, and update i2400m->fw_hdrs to point to them.
- */
-static
-int i2400m_fw_check(struct i2400m *i2400m, const void *bcf, size_t bcf_size)
-{
-	int result;
-	struct device *dev = i2400m_dev(i2400m);
-	size_t headers = 0;
-	const struct i2400m_bcf_hdr *bcf_hdr;
-	const void *itr, *next, *top;
-	size_t slots = 0, used_slots = 0;
-
-	for (itr = bcf, top = itr + bcf_size;
-	     itr < top;
-	     headers++, itr = next) {
-		size_t leftover, offset, header_len, size;
-
-		leftover = top - itr;
-		offset = itr - bcf;
-		if (leftover <= sizeof(*bcf_hdr)) {
-			dev_err(dev, "firmware %s: %zu B left at @%zx, "
-				"not enough for BCF header\n",
-				i2400m->fw_name, leftover, offset);
-			break;
-		}
-		bcf_hdr = itr;
-		/* Only the first header is supposed to be followed by
-		 * payload */
-		header_len = sizeof(u32) * le32_to_cpu(bcf_hdr->header_len);
-		size = sizeof(u32) * le32_to_cpu(bcf_hdr->size);
-		if (headers == 0)
-			next = itr + size;
-		else
-			next = itr + header_len;
-
-		result = i2400m_fw_hdr_check(i2400m, bcf_hdr, headers, offset);
-		if (result < 0)
-			continue;
-		if (used_slots + 1 >= slots) {
-			/* +1 -> we need to account for the one we'll
-			 * occupy and at least an extra one for
-			 * always being NULL */
-			result = i2400m_zrealloc_2x(
-				(void **) &i2400m->fw_hdrs, &slots,
-				sizeof(i2400m->fw_hdrs[0]),
-				GFP_KERNEL);
-			if (result < 0)
-				goto error_zrealloc;
-		}
-		i2400m->fw_hdrs[used_slots] = bcf_hdr;
-		used_slots++;
-	}
-	if (headers == 0) {
-		dev_err(dev, "firmware %s: no usable headers found\n",
-			i2400m->fw_name);
-		result = -EBADF;
-	} else
-		result = 0;
-error_zrealloc:
-	return result;
-}
-
-
-/*
- * Match a barker to a BCF header module ID
- *
- * The device sends a barker which tells the firmware loader which
- * header in the BCF file has to be used. This does the matching.
- */
-static
-unsigned i2400m_bcf_hdr_match(struct i2400m *i2400m,
-			      const struct i2400m_bcf_hdr *bcf_hdr)
-{
-	u32 barker = le32_to_cpu(i2400m->barker->data[0])
-		& 0x7fffffff;
-	u32 module_id = le32_to_cpu(bcf_hdr->module_id)
-		& 0x7fffffff;	/* high bit used for something else */
-
-	/* special case for 5x50 */
-	if (barker == I2400M_SBOOT_BARKER && module_id == 0)
-		return 1;
-	if (module_id == barker)
-		return 1;
-	return 0;
-}
-
-static
-const struct i2400m_bcf_hdr *i2400m_bcf_hdr_find(struct i2400m *i2400m)
-{
-	struct device *dev = i2400m_dev(i2400m);
-	const struct i2400m_bcf_hdr **bcf_itr, *bcf_hdr;
-	unsigned i = 0;
-	u32 barker = le32_to_cpu(i2400m->barker->data[0]);
-
-	d_printf(2, dev, "finding BCF header for barker %08x\n", barker);
-	if (barker == I2400M_NBOOT_BARKER) {
-		bcf_hdr = i2400m->fw_hdrs[0];
-		d_printf(1, dev, "using BCF header #%u/%08x for non-signed "
-			 "barker\n", 0, le32_to_cpu(bcf_hdr->module_id));
-		return bcf_hdr;
-	}
-	for (bcf_itr = i2400m->fw_hdrs; *bcf_itr != NULL; bcf_itr++, i++) {
-		bcf_hdr = *bcf_itr;
-		if (i2400m_bcf_hdr_match(i2400m, bcf_hdr)) {
-			d_printf(1, dev, "hit on BCF hdr #%u/%08x\n",
-				 i, le32_to_cpu(bcf_hdr->module_id));
-			return bcf_hdr;
-		} else
-			d_printf(1, dev, "miss on BCF hdr #%u/%08x\n",
-				 i, le32_to_cpu(bcf_hdr->module_id));
-	}
-	dev_err(dev, "cannot find a matching BCF header for barker %08x\n",
-		barker);
-	return NULL;
-}
-
-
-/*
- * Download the firmware to the device
- *
- * @i2400m: device descriptor
- * @bcf: pointer to loaded (and minimally verified for consistency)
- *    firmware
- * @bcf_size: size of the @bcf buffer (header plus payloads)
- *
- * The process for doing this is described in this file's header.
- *
- * Note we only reinitialize boot-mode if the flags say so. Some hw
- * iterations need it, some don't. In any case, if we loop, we always
- * need to reinitialize the boot room, hence the flags modification.
- */
-static
-int i2400m_fw_dnload(struct i2400m *i2400m, const struct i2400m_bcf_hdr *bcf,
-		     size_t fw_size, enum i2400m_bri flags)
-{
-	int ret = 0;
-	struct device *dev = i2400m_dev(i2400m);
-	int count = i2400m->bus_bm_retries;
-	const struct i2400m_bcf_hdr *bcf_hdr;
-	size_t bcf_size;
-
-	d_fnstart(5, dev, "(i2400m %p bcf %p fw size %zu)\n",
-		  i2400m, bcf, fw_size);
-	i2400m->boot_mode = 1;
-	wmb();		/* Make sure other readers see it */
-hw_reboot:
-	if (count-- == 0) {
-		ret = -ERESTARTSYS;
-		dev_err(dev, "device rebooted too many times, aborting\n");
-		goto error_too_many_reboots;
-	}
-	if (flags & I2400M_BRI_MAC_REINIT) {
-		ret = i2400m_bootrom_init(i2400m, flags);
-		if (ret < 0) {
-			dev_err(dev, "bootrom init failed: %d\n", ret);
-			goto error_bootrom_init;
-		}
-	}
-	flags |= I2400M_BRI_MAC_REINIT;
-
-	/*
-	 * Initialize the download, push the bytes to the device and
-	 * then jump to the new firmware. Note @ret is passed with the
-	 * offset of the jump instruction to _dnload_finalize()
-	 *
-	 * Note we need to use the BCF header in the firmware image
-	 * that matches the barker that the device sent when it
-	 * rebooted, so it has to be passed along.
-	 */
-	ret = -EBADF;
-	bcf_hdr = i2400m_bcf_hdr_find(i2400m);
-	if (bcf_hdr == NULL)
-		goto error_bcf_hdr_find;
-
-	ret = i2400m_dnload_init(i2400m, bcf_hdr);
-	if (ret == -ERESTARTSYS)
-		goto error_dev_rebooted;
-	if (ret < 0)
-		goto error_dnload_init;
-
-	/*
-	 * bcf_size refers to one header size plus the fw sections size
-	 * indicated by the header,ie. if there are other extended headers
-	 * at the tail, they are not counted
-	 */
-	bcf_size = sizeof(u32) * le32_to_cpu(bcf_hdr->size);
-	ret = i2400m_dnload_bcf(i2400m, bcf, bcf_size);
-	if (ret == -ERESTARTSYS)
-		goto error_dev_rebooted;
-	if (ret < 0) {
-		dev_err(dev, "fw %s: download failed: %d\n",
-			i2400m->fw_name, ret);
-		goto error_dnload_bcf;
-	}
-
-	ret = i2400m_dnload_finalize(i2400m, bcf_hdr, bcf, ret);
-	if (ret == -ERESTARTSYS)
-		goto error_dev_rebooted;
-	if (ret < 0) {
-		dev_err(dev, "fw %s: "
-			"download finalization failed: %d\n",
-			i2400m->fw_name, ret);
-		goto error_dnload_finalize;
-	}
-
-	d_printf(2, dev, "fw %s successfully uploaded\n",
-		 i2400m->fw_name);
-	i2400m->boot_mode = 0;
-	wmb();		/* Make sure i2400m_msg_to_dev() sees boot_mode */
-error_dnload_finalize:
-error_dnload_bcf:
-error_dnload_init:
-error_bcf_hdr_find:
-error_bootrom_init:
-error_too_many_reboots:
-	d_fnend(5, dev, "(i2400m %p bcf %p size %zu) = %d\n",
-		i2400m, bcf, fw_size, ret);
-	return ret;
-
-error_dev_rebooted:
-	dev_err(dev, "device rebooted, %d tries left\n", count);
-	/* we got the notification already, no need to wait for it again */
-	flags |= I2400M_BRI_SOFT;
-	goto hw_reboot;
-}
-
-static
-int i2400m_fw_bootstrap(struct i2400m *i2400m, const struct firmware *fw,
-			enum i2400m_bri flags)
-{
-	int ret;
-	struct device *dev = i2400m_dev(i2400m);
-	const struct i2400m_bcf_hdr *bcf;	/* Firmware data */
-
-	d_fnstart(5, dev, "(i2400m %p)\n", i2400m);
-	bcf = (void *) fw->data;
-	ret = i2400m_fw_check(i2400m, bcf, fw->size);
-	if (ret >= 0)
-		ret = i2400m_fw_dnload(i2400m, bcf, fw->size, flags);
-	if (ret < 0)
-		dev_err(dev, "%s: cannot use: %d, skipping\n",
-			i2400m->fw_name, ret);
-	kfree(i2400m->fw_hdrs);
-	i2400m->fw_hdrs = NULL;
-	d_fnend(5, dev, "(i2400m %p) = %d\n", i2400m, ret);
-	return ret;
-}
-
-
-/* Refcounted container for firmware data */
-struct i2400m_fw {
-	struct kref kref;
-	const struct firmware *fw;
-};
-
-
-static
-void i2400m_fw_destroy(struct kref *kref)
-{
-	struct i2400m_fw *i2400m_fw =
-		container_of(kref, struct i2400m_fw, kref);
-	release_firmware(i2400m_fw->fw);
-	kfree(i2400m_fw);
-}
-
-
-static
-struct i2400m_fw *i2400m_fw_get(struct i2400m_fw *i2400m_fw)
-{
-	if (i2400m_fw != NULL && i2400m_fw != (void *) ~0)
-		kref_get(&i2400m_fw->kref);
-	return i2400m_fw;
-}
-
-
-static
-void i2400m_fw_put(struct i2400m_fw *i2400m_fw)
-{
-	kref_put(&i2400m_fw->kref, i2400m_fw_destroy);
-}
-
-
-/**
- * i2400m_dev_bootstrap - Bring the device to a known state and upload firmware
- *
- * @i2400m: device descriptor
- *
- * Returns: >= 0 if ok, < 0 errno code on error.
- *
- * This sets up the firmware upload environment, loads the firmware
- * file from disk, verifies and then calls the firmware upload process
- * per se.
- *
- * Can be called either from probe, or after a warm reset.  Can not be
- * called from within an interrupt.  All the flow in this code is
- * single-threade; all I/Os are synchronous.
- */
-int i2400m_dev_bootstrap(struct i2400m *i2400m, enum i2400m_bri flags)
-{
-	int ret, itr;
-	struct device *dev = i2400m_dev(i2400m);
-	struct i2400m_fw *i2400m_fw;
-	const struct firmware *fw;
-	const char *fw_name;
-
-	d_fnstart(5, dev, "(i2400m %p)\n", i2400m);
-
-	ret = -ENODEV;
-	spin_lock(&i2400m->rx_lock);
-	i2400m_fw = i2400m_fw_get(i2400m->fw_cached);
-	spin_unlock(&i2400m->rx_lock);
-	if (i2400m_fw == (void *) ~0) {
-		dev_err(dev, "can't load firmware now!");
-		goto out;
-	} else if (i2400m_fw != NULL) {
-		dev_info(dev, "firmware %s: loading from cache\n",
-			 i2400m->fw_name);
-		ret = i2400m_fw_bootstrap(i2400m, i2400m_fw->fw, flags);
-		i2400m_fw_put(i2400m_fw);
-		goto out;
-	}
-
-	/* Load firmware files to memory. */
-	for (itr = 0, ret = -ENOENT; ; itr++) {
-		fw_name = i2400m->bus_fw_names[itr];
-		if (fw_name == NULL) {
-			dev_err(dev, "Could not find a usable firmware image\n");
-			break;
-		}
-		d_printf(1, dev, "trying firmware %s (%d)\n", fw_name, itr);
-		ret = request_firmware(&fw, fw_name, dev);
-		if (ret < 0) {
-			dev_err(dev, "fw %s: cannot load file: %d\n",
-				fw_name, ret);
-			continue;
-		}
-		i2400m->fw_name = fw_name;
-		ret = i2400m_fw_bootstrap(i2400m, fw, flags);
-		release_firmware(fw);
-		if (ret >= 0)	/* firmware loaded successfully */
-			break;
-		i2400m->fw_name = NULL;
-	}
-out:
-	d_fnend(5, dev, "(i2400m %p) = %d\n", i2400m, ret);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(i2400m_dev_bootstrap);
-
-
-void i2400m_fw_cache(struct i2400m *i2400m)
-{
-	int result;
-	struct i2400m_fw *i2400m_fw;
-	struct device *dev = i2400m_dev(i2400m);
-
-	/* if there is anything there, free it -- now, this'd be weird */
-	spin_lock(&i2400m->rx_lock);
-	i2400m_fw = i2400m->fw_cached;
-	spin_unlock(&i2400m->rx_lock);
-	if (i2400m_fw != NULL && i2400m_fw != (void *) ~0) {
-		i2400m_fw_put(i2400m_fw);
-		WARN(1, "%s:%u: still cached fw still present?\n",
-		     __func__, __LINE__);
-	}
-
-	if (i2400m->fw_name == NULL) {
-		dev_err(dev, "firmware n/a: can't cache\n");
-		i2400m_fw = (void *) ~0;
-		goto out;
-	}
-
-	i2400m_fw = kzalloc(sizeof(*i2400m_fw), GFP_ATOMIC);
-	if (i2400m_fw == NULL)
-		goto out;
-	kref_init(&i2400m_fw->kref);
-	result = request_firmware(&i2400m_fw->fw, i2400m->fw_name, dev);
-	if (result < 0) {
-		dev_err(dev, "firmware %s: failed to cache: %d\n",
-			i2400m->fw_name, result);
-		kfree(i2400m_fw);
-		i2400m_fw = (void *) ~0;
-	} else
-		dev_info(dev, "firmware %s: cached\n", i2400m->fw_name);
-out:
-	spin_lock(&i2400m->rx_lock);
-	i2400m->fw_cached = i2400m_fw;
-	spin_unlock(&i2400m->rx_lock);
-}
-
-
-void i2400m_fw_uncache(struct i2400m *i2400m)
-{
-	struct i2400m_fw *i2400m_fw;
-
-	spin_lock(&i2400m->rx_lock);
-	i2400m_fw = i2400m->fw_cached;
-	i2400m->fw_cached = NULL;
-	spin_unlock(&i2400m->rx_lock);
-
-	if (i2400m_fw != NULL && i2400m_fw != (void *) ~0)
-		i2400m_fw_put(i2400m_fw);
-}
-
diff --git a/drivers/net/wimax/i2400m/i2400m-usb.h b/drivers/net/wimax/i2400m/i2400m-usb.h
deleted file mode 100644
index eff4f464a23e..000000000000
--- a/drivers/net/wimax/i2400m/i2400m-usb.h
+++ /dev/null
@@ -1,275 +0,0 @@
-/*
- * Intel Wireless WiMAX Connection 2400m
- * USB-specific i2400m driver definitions
- *
- *
- * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * Intel Corporation <linux-wimax@intel.com>
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- * Yanir Lubetkin <yanirx.lubetkin@intel.com>
- *  - Initial implementation
- *
- *
- * This driver implements the bus-specific part of the i2400m for
- * USB. Check i2400m.h for a generic driver description.
- *
- * ARCHITECTURE
- *
- * This driver listens to notifications sent from the notification
- * endpoint (in usb-notif.c); when data is ready to read, the code in
- * there schedules a read from the device (usb-rx.c) and then passes
- * the data to the generic RX code (rx.c).
- *
- * When the generic driver needs to send data (network or control), it
- * queues up in the TX FIFO (tx.c) and that will notify the driver
- * through the i2400m->bus_tx_kick() callback
- * (usb-tx.c:i2400mu_bus_tx_kick) which will send the items in the
- * FIFO queue.
- *
- * This driver, as well, implements the USB-specific ops for the generic
- * driver to be able to setup/teardown communication with the device
- * [i2400m_bus_dev_start() and i2400m_bus_dev_stop()], reseting the
- * device [i2400m_bus_reset()] and performing firmware upload
- * [i2400m_bus_bm_cmd() and i2400_bus_bm_wait_for_ack()].
- */
-
-#ifndef __I2400M_USB_H__
-#define __I2400M_USB_H__
-
-#include "i2400m.h"
-#include <linux/kthread.h>
-
-
-/*
- * Error Density Count: cheapo error density (over time) counter
- *
- * Originally by Reinette Chatre <reinette.chatre@intel.com>
- *
- * Embed an 'struct edc' somewhere. Each time there is a soft or
- * retryable error, call edc_inc() and check if the error top
- * watermark has been reached.
- */
-enum {
-	EDC_MAX_ERRORS = 10,
-	EDC_ERROR_TIMEFRAME = HZ,
-};
-
-/* error density counter */
-struct edc {
-	unsigned long timestart;
-	u16 errorcount;
-};
-
-struct i2400m_endpoint_cfg {
-	unsigned char bulk_out;
-	unsigned char notification;
-	unsigned char reset_cold;
-	unsigned char bulk_in;
-};
-
-static inline void edc_init(struct edc *edc)
-{
-	edc->timestart = jiffies;
-}
-
-/**
- * edc_inc - report a soft error and check if we are over the watermark
- *
- * @edc: pointer to error density counter.
- * @max_err: maximum number of errors we can accept over the timeframe
- * @timeframe: length of the timeframe (in jiffies).
- *
- * Returns: !0 1 if maximum acceptable errors per timeframe has been
- *     exceeded. 0 otherwise.
- *
- * This is way to determine if the number of acceptable errors per time
- * period has been exceeded. It is not accurate as there are cases in which
- * this scheme will not work, for example if there are periodic occurrences
- * of errors that straddle updates to the start time. This scheme is
- * sufficient for our usage.
- *
- * To use, embed a 'struct edc' somewhere, initialize it with
- * edc_init() and when an error hits:
- *
- * if (do_something_fails_with_a_soft_error) {
- *        if (edc_inc(&my->edc, MAX_ERRORS, MAX_TIMEFRAME))
- * 	           Ops, hard error, do something about it
- *        else
- *                 Retry or ignore, depending on whatever
- * }
- */
-static inline int edc_inc(struct edc *edc, u16 max_err, u16 timeframe)
-{
-	unsigned long now;
-
-	now = jiffies;
-	if (time_after(now, edc->timestart + timeframe)) {
-		edc->errorcount = 1;
-		edc->timestart = now;
-	} else if (++edc->errorcount > max_err) {
-		edc->errorcount = 0;
-		edc->timestart = now;
-		return 1;
-	}
-	return 0;
-}
-
-/* Host-Device interface for USB */
-enum {
-	I2400M_USB_BOOT_RETRIES = 3,
-	I2400MU_MAX_NOTIFICATION_LEN = 256,
-	I2400MU_BLK_SIZE = 16,
-	I2400MU_PL_SIZE_MAX = 0x3EFF,
-
-	/* Device IDs */
-	USB_DEVICE_ID_I6050 = 0x0186,
-	USB_DEVICE_ID_I6050_2 = 0x0188,
-	USB_DEVICE_ID_I6150 = 0x07d6,
-	USB_DEVICE_ID_I6150_2 = 0x07d7,
-	USB_DEVICE_ID_I6150_3 = 0x07d9,
-	USB_DEVICE_ID_I6250 = 0x0187,
-};
-
-
-/**
- * struct i2400mu - descriptor for a USB connected i2400m
- *
- * @i2400m: bus-generic i2400m implementation; has to be first (see
- *     it's documentation in i2400m.h).
- *
- * @usb_dev: pointer to our USB device
- *
- * @usb_iface: pointer to our USB interface
- *
- * @urb_edc: error density counter; used to keep a density-on-time tab
- *     on how many soft (retryable or ignorable) errors we get. If we
- *     go over the threshold, we consider the bus transport is failing
- *     too much and reset.
- *
- * @notif_urb: URB for receiving notifications from the device.
- *
- * @tx_kthread: thread we use for data TX. We use a thread because in
- *     order to do deep power saving and put the device to sleep, we
- *     need to call usb_autopm_*() [blocking functions].
- *
- * @tx_wq: waitqueue for the TX kthread to sleep when there is no data
- *     to be sent; when more data is available, it is woken up by
- *     i2400mu_bus_tx_kick().
- *
- * @rx_kthread: thread we use for data RX. We use a thread because in
- *     order to do deep power saving and put the device to sleep, we
- *     need to call usb_autopm_*() [blocking functions].
- *
- * @rx_wq: waitqueue for the RX kthread to sleep when there is no data
- *     to receive. When data is available, it is woken up by
- *     usb-notif.c:i2400mu_notification_grok().
- *
- * @rx_pending_count: number of rx-data-ready notifications that were
- *     still not handled by the RX kthread.
- *
- * @rx_size: current RX buffer size that is being used.
- *
- * @rx_size_acc: accumulator of the sizes of the previous read
- *     transactions.
- *
- * @rx_size_cnt: number of read transactions accumulated in
- *     @rx_size_acc.
- *
- * @do_autopm: disable(0)/enable(>0) calling the
- *     usb_autopm_get/put_interface() barriers when executing
- *     commands. See doc in i2400mu_suspend() for more information.
- *
- * @rx_size_auto_shrink: if true, the rx_size is shrunk
- *     automatically based on the average size of the received
- *     transactions. This allows the receive code to allocate smaller
- *     chunks of memory and thus reduce pressure on the memory
- *     allocator by not wasting so much space. By default it is
- *     enabled.
- *
- * @debugfs_dentry: hookup for debugfs files.
- *     These have to be in a separate directory, a child of
- *     (wimax_dev->debugfs_dentry) so they can be removed when the
- *     module unloads, as we don't keep each dentry.
- */
-struct i2400mu {
-	struct i2400m i2400m;		/* FIRST! See doc */
-
-	struct usb_device *usb_dev;
-	struct usb_interface *usb_iface;
-	struct edc urb_edc;		/* Error density counter */
-	struct i2400m_endpoint_cfg endpoint_cfg;
-
-	struct urb *notif_urb;
-	struct task_struct *tx_kthread;
-	wait_queue_head_t tx_wq;
-
-	struct task_struct *rx_kthread;
-	wait_queue_head_t rx_wq;
-	atomic_t rx_pending_count;
-	size_t rx_size, rx_size_acc, rx_size_cnt;
-	atomic_t do_autopm;
-	u8 rx_size_auto_shrink;
-
-	struct dentry *debugfs_dentry;
-	unsigned i6050:1;	/* 1 if this is a 6050 based SKU */
-};
-
-
-static inline
-void i2400mu_init(struct i2400mu *i2400mu)
-{
-	i2400m_init(&i2400mu->i2400m);
-	edc_init(&i2400mu->urb_edc);
-	init_waitqueue_head(&i2400mu->tx_wq);
-	atomic_set(&i2400mu->rx_pending_count, 0);
-	init_waitqueue_head(&i2400mu->rx_wq);
-	i2400mu->rx_size = PAGE_SIZE - sizeof(struct skb_shared_info);
-	atomic_set(&i2400mu->do_autopm, 1);
-	i2400mu->rx_size_auto_shrink = 1;
-}
-
-int i2400mu_notification_setup(struct i2400mu *);
-void i2400mu_notification_release(struct i2400mu *);
-
-int i2400mu_rx_setup(struct i2400mu *);
-void i2400mu_rx_release(struct i2400mu *);
-void i2400mu_rx_kick(struct i2400mu *);
-
-int i2400mu_tx_setup(struct i2400mu *);
-void i2400mu_tx_release(struct i2400mu *);
-void i2400mu_bus_tx_kick(struct i2400m *);
-
-ssize_t i2400mu_bus_bm_cmd_send(struct i2400m *,
-				const struct i2400m_bootrom_header *, size_t,
-				int);
-ssize_t i2400mu_bus_bm_wait_for_ack(struct i2400m *,
-				    struct i2400m_bootrom_header *, size_t);
-#endif /* #ifndef __I2400M_USB_H__ */
diff --git a/drivers/net/wimax/i2400m/i2400m.h b/drivers/net/wimax/i2400m/i2400m.h
deleted file mode 100644
index a3733a6d14f5..000000000000
--- a/drivers/net/wimax/i2400m/i2400m.h
+++ /dev/null
@@ -1,970 +0,0 @@
-/*
- * Intel Wireless WiMAX Connection 2400m
- * Declarations for bus-generic internal APIs
- *
- *
- * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * Intel Corporation <linux-wimax@intel.com>
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- * Yanir Lubetkin <yanirx.lubetkin@intel.com>
- *  - Initial implementation
- *
- *
- * GENERAL DRIVER ARCHITECTURE
- *
- * The i2400m driver is split in the following two major parts:
- *
- *  - bus specific driver
- *  - bus generic driver (this part)
- *
- * The bus specific driver sets up stuff specific to the bus the
- * device is connected to (USB, PCI, tam-tam...non-authoritative
- * nor binding list) which is basically the device-model management
- * (probe/disconnect, etc), moving data from device to kernel and
- * back, doing the power saving details and reseting the device.
- *
- * For details on each bus-specific driver, see it's include file,
- * i2400m-BUSNAME.h
- *
- * The bus-generic functionality break up is:
- *
- *  - Firmware upload: fw.c - takes care of uploading firmware to the
- *        device. bus-specific driver just needs to provides a way to
- *        execute boot-mode commands and to reset the device.
- *
- *  - RX handling: rx.c - receives data from the bus-specific code and
- *        feeds it to the network or WiMAX stack or uses it to modify
- *        the driver state. bus-specific driver only has to receive
- *        frames and pass them to this module.
- *
- *  - TX handling: tx.c - manages the TX FIFO queue and provides means
- *        for the bus-specific TX code to pull data from the FIFO
- *        queue. bus-specific code just pulls frames from this module
- *        to sends them to the device.
- *
- *  - netdev glue: netdev.c - interface with Linux networking
- *        stack. Pass around data frames, and configure when the
- *        device is up and running or shutdown (through ifconfig up /
- *        down). Bus-generic only.
- *
- *  - control ops: control.c - implements various commands for
- *        controlling the device. bus-generic only.
- *
- *  - device model glue: driver.c - implements helpers for the
- *        device-model glue done by the bus-specific layer
- *        (setup/release the driver resources), turning the device on
- *        and off, handling the device reboots/resets and a few simple
- *        WiMAX stack ops.
- *
- * Code is also broken up in linux-glue / device-glue.
- *
- * Linux glue contains functions that deal mostly with gluing with the
- * rest of the Linux kernel.
- *
- * Device-glue are functions that deal mostly with the way the device
- * does things and talk the device's language.
- *
- * device-glue code is licensed BSD so other open source OSes can take
- * it to implement their drivers.
- *
- *
- * APIs AND HEADER FILES
- *
- * This bus generic code exports three APIs:
- *
- *  - HDI (host-device interface) definitions common to all busses
- *    (include/linux/wimax/i2400m.h); these can be also used by user
- *    space code.
- *  - internal API for the bus-generic code
- *  - external API for the bus-specific drivers
- *
- *
- * LIFE CYCLE:
- *
- * When the bus-specific driver probes, it allocates a network device
- * with enough space for it's data structue, that must contain a
- * &struct i2400m at the top.
- *
- * On probe, it needs to fill the i2400m members marked as [fill], as
- * well as i2400m->wimax_dev.net_dev and call i2400m_setup(). The
- * i2400m driver will only register with the WiMAX and network stacks;
- * the only access done to the device is to read the MAC address so we
- * can register a network device.
- *
- * The high-level call flow is:
- *
- * bus_probe()
- *   i2400m_setup()
- *     i2400m->bus_setup()
- *     boot rom initialization / read mac addr
- *     network / WiMAX stacks registration
- *     i2400m_dev_start()
- *       i2400m->bus_dev_start()
- *       i2400m_dev_initialize()
- *
- * The reverse applies for a disconnect() call:
- *
- * bus_disconnect()
- *   i2400m_release()
- *     i2400m_dev_stop()
- *       i2400m_dev_shutdown()
- *       i2400m->bus_dev_stop()
- *     network / WiMAX stack unregistration
- *     i2400m->bus_release()
- *
- * At this point, control and data communications are possible.
- *
- * While the device is up, it might reset. The bus-specific driver has
- * to catch that situation and call i2400m_dev_reset_handle() to deal
- * with it (reset the internal driver structures and go back to square
- * one).
- */
-
-#ifndef __I2400M_H__
-#define __I2400M_H__
-
-#include <linux/usb.h>
-#include <linux/netdevice.h>
-#include <linux/completion.h>
-#include <linux/rwsem.h>
-#include <linux/atomic.h>
-#include <net/wimax.h>
-#include <linux/wimax/i2400m.h>
-#include <asm/byteorder.h>
-
-enum {
-/* netdev interface */
-	/*
-	 * Out of NWG spec (R1_v1.2.2), 3.3.3 ASN Bearer Plane MTU Size
-	 *
-	 * The MTU is 1400 or less
-	 */
-	I2400M_MAX_MTU = 1400,
-};
-
-/* Misc constants */
-enum {
-	/* Size of the Boot Mode Command buffer */
-	I2400M_BM_CMD_BUF_SIZE = 16 * 1024,
-	I2400M_BM_ACK_BUF_SIZE = 256,
-};
-
-enum {
-	/* Maximum number of bus reset can be retried */
-	I2400M_BUS_RESET_RETRIES = 3,
-};
-
-/**
- * struct i2400m_poke_table - Hardware poke table for the Intel 2400m
- *
- * This structure will be used to create a device specific poke table
- * to put the device in a consistent state at boot time.
- *
- * @address: The device address to poke
- *
- * @data: The data value to poke to the device address
- *
- */
-struct i2400m_poke_table{
-	__le32 address;
-	__le32 data;
-};
-
-#define I2400M_FW_POKE(a, d) {		\
-	.address = cpu_to_le32(a),	\
-	.data = cpu_to_le32(d)		\
-}
-
-
-/**
- * i2400m_reset_type - methods to reset a device
- *
- * @I2400M_RT_WARM: Reset without device disconnection, device handles
- *     are kept valid but state is back to power on, with firmware
- *     re-uploaded.
- * @I2400M_RT_COLD: Tell the device to disconnect itself from the bus
- *     and reconnect. Renders all device handles invalid.
- * @I2400M_RT_BUS: Tells the bus to reset the device; last measure
- *     used when both types above don't work.
- */
-enum i2400m_reset_type {
-	I2400M_RT_WARM,	/* first measure */
-	I2400M_RT_COLD,	/* second measure */
-	I2400M_RT_BUS,	/* call in artillery */
-};
-
-struct i2400m_reset_ctx;
-struct i2400m_roq;
-struct i2400m_barker_db;
-
-/**
- * struct i2400m - descriptor for an Intel 2400m
- *
- * Members marked with [fill] must be filled out/initialized before
- * calling i2400m_setup().
- *
- * Note the @bus_setup/@bus_release, @bus_dev_start/@bus_dev_release
- * call pairs are very much doing almost the same, and depending on
- * the underlying bus, some stuff has to be put in one or the
- * other. The idea of setup/release is that they setup the minimal
- * amount needed for loading firmware, where us dev_start/stop setup
- * the rest needed to do full data/control traffic.
- *
- * @bus_tx_block_size: [fill] USB imposes a 16 block size, but other
- *     busses will differ.  So we have a tx_blk_size variable that the
- *     bus layer sets to tell the engine how much of that we need.
- *
- * @bus_tx_room_min: [fill] Minimum room required while allocating
- *     TX queue's buffer space for message header. USB requires
- *     16 bytes. Refer to bus specific driver code for details.
- *
- * @bus_pl_size_max: [fill] Maximum payload size.
- *
- * @bus_setup: [optional fill] Function called by the bus-generic code
- *     [i2400m_setup()] to setup the basic bus-specific communications
- *     to the the device needed to load firmware. See LIFE CYCLE above.
- *
- *     NOTE: Doesn't need to upload the firmware, as that is taken
- *     care of by the bus-generic code.
- *
- * @bus_release: [optional fill] Function called by the bus-generic
- *     code [i2400m_release()] to shutdown the basic bus-specific
- *     communications to the the device needed to load firmware. See
- *     LIFE CYCLE above.
- *
- *     This function does not need to reset the device, just tear down
- *     all the host resources created to  handle communication with
- *     the device.
- *
- * @bus_dev_start: [optional fill] Function called by the bus-generic
- *     code [i2400m_dev_start()] to do things needed to start the
- *     device. See LIFE CYCLE above.
- *
- *     NOTE: Doesn't need to upload the firmware, as that is taken
- *     care of by the bus-generic code.
- *
- * @bus_dev_stop: [optional fill] Function called by the bus-generic
- *     code [i2400m_dev_stop()] to do things needed for stopping the
- *     device. See LIFE CYCLE above.
- *
- *     This function does not need to reset the device, just tear down
- *     all the host resources created to handle communication with
- *     the device.
- *
- * @bus_tx_kick: [fill] Function called by the bus-generic code to let
- *     the bus-specific code know that there is data available in the
- *     TX FIFO for transmission to the device.
- *
- *     This function cannot sleep.
- *
- * @bus_reset: [fill] Function called by the bus-generic code to reset
- *     the device in in various ways. Doesn't need to wait for the
- *     reset to finish.
- *
- *     If warm or cold reset fail, this function is expected to do a
- *     bus-specific reset (eg: USB reset) to get the device to a
- *     working state (even if it implies device disconecction).
- *
- *     Note the warm reset is used by the firmware uploader to
- *     reinitialize the device.
- *
- *     IMPORTANT: this is called very early in the device setup
- *     process, so it cannot rely on common infrastructure being laid
- *     out.
- *
- *     IMPORTANT: don't call reset on RT_BUS with i2400m->init_mutex
- *     held, as the .pre/.post reset handlers will deadlock.
- *
- * @bus_bm_retries: [fill] How many times shall a firmware upload /
- *     device initialization be retried? Different models of the same
- *     device might need different values, hence it is set by the
- *     bus-specific driver. Note this value is used in two places,
- *     i2400m_fw_dnload() and __i2400m_dev_start(); they won't become
- *     multiplicative (__i2400m_dev_start() calling N times
- *     i2400m_fw_dnload() and this trying N times to download the
- *     firmware), as if __i2400m_dev_start() only retries if the
- *     firmware crashed while initializing the device (not in a
- *     general case).
- *
- * @bus_bm_cmd_send: [fill] Function called to send a boot-mode
- *     command. Flags are defined in 'enum i2400m_bm_cmd_flags'. This
- *     is synchronous and has to return 0 if ok or < 0 errno code in
- *     any error condition.
- *
- * @bus_bm_wait_for_ack: [fill] Function called to wait for a
- *     boot-mode notification (that can be a response to a previously
- *     issued command or an asynchronous one). Will read until all the
- *     indicated size is read or timeout. Reading more or less data
- *     than asked for is an error condition. Return 0 if ok, < 0 errno
- *     code on error.
- *
- *     The caller to this function will check if the response is a
- *     barker that indicates the device going into reset mode.
- *
- * @bus_fw_names: [fill] a NULL-terminated array with the names of the
- *     firmware images to try loading. This is made a list so we can
- *     support backward compatibility of firmware releases (eg: if we
- *     can't find the default v1.4, we try v1.3). In general, the name
- *     should be i2400m-fw-X-VERSION.sbcf, where X is the bus name.
- *     The list is tried in order and the first one that loads is
- *     used. The fw loader will set i2400m->fw_name to point to the
- *     active firmware image.
- *
- * @bus_bm_mac_addr_impaired: [fill] Set to true if the device's MAC
- *     address provided in boot mode is kind of broken and needs to
- *     be re-read later on.
- *
- * @bus_bm_pokes_table: [fill/optional] A table of device addresses
- *     and values that will be poked at device init time to move the
- *     device to the correct state for the type of boot/firmware being
- *     used.  This table MUST be terminated with (0x000000,
- *     0x00000000) or bad things will happen.
- *
- *
- * @wimax_dev: WiMAX generic device for linkage into the kernel WiMAX
- *     stack. Due to the way a net_device is allocated, we need to
- *     force this to be the first field so that we can get from
- *     netdev_priv() the right pointer.
- *
- * @updown: the device is up and ready for transmitting control and
- *     data packets. This implies @ready (communication infrastructure
- *     with the device is ready) and the device's firmware has been
- *     loaded and the device initialized.
- *
- *     Write to it only inside a i2400m->init_mutex protected area
- *     followed with a wmb(); rmb() before accesing (unless locked
- *     inside i2400m->init_mutex). Read access can be loose like that
- *     [just using rmb()] because the paths that use this also do
- *     other error checks later on.
- *
- * @ready: Communication infrastructure with the device is ready, data
- *     frames can start to be passed around (this is lighter than
- *     using the WiMAX state for certain hot paths).
- *
- *     Write to it only inside a i2400m->init_mutex protected area
- *     followed with a wmb(); rmb() before accesing (unless locked
- *     inside i2400m->init_mutex). Read access can be loose like that
- *     [just using rmb()] because the paths that use this also do
- *     other error checks later on.
- *
- * @rx_reorder: 1 if RX reordering is enabled; this can only be
- *     set at probe time.
- *
- * @state: device's state (as reported by it)
- *
- * @state_wq: waitqueue that is woken up whenever the state changes
- *
- * @tx_lock: spinlock to protect TX members
- *
- * @tx_buf: FIFO buffer for TX; we queue data here
- *
- * @tx_in: FIFO index for incoming data. Note this doesn't wrap around
- *     and it is always greater than @tx_out.
- *
- * @tx_out: FIFO index for outgoing data
- *
- * @tx_msg: current TX message that is active in the FIFO for
- *     appending payloads.
- *
- * @tx_sequence: current sequence number for TX messages from the
- *     device to the host.
- *
- * @tx_msg_size: size of the current message being transmitted by the
- *     bus-specific code.
- *
- * @tx_pl_num: total number of payloads sent
- *
- * @tx_pl_max: maximum number of payloads sent in a TX message
- *
- * @tx_pl_min: minimum number of payloads sent in a TX message
- *
- * @tx_num: number of TX messages sent
- *
- * @tx_size_acc: number of bytes in all TX messages sent
- *     (this is different to net_dev's statistics as it also counts
- *     control messages).
- *
- * @tx_size_min: smallest TX message sent.
- *
- * @tx_size_max: biggest TX message sent.
- *
- * @rx_lock: spinlock to protect RX members and rx_roq_refcount.
- *
- * @rx_pl_num: total number of payloads received
- *
- * @rx_pl_max: maximum number of payloads received in a RX message
- *
- * @rx_pl_min: minimum number of payloads received in a RX message
- *
- * @rx_num: number of RX messages received
- *
- * @rx_size_acc: number of bytes in all RX messages received
- *     (this is different to net_dev's statistics as it also counts
- *     control messages).
- *
- * @rx_size_min: smallest RX message received.
- *
- * @rx_size_max: buggest RX message received.
- *
- * @rx_roq: RX ReOrder queues. (fw >= v1.4) When packets are received
- *     out of order, the device will ask the driver to hold certain
- *     packets until the ones that are received out of order can be
- *     delivered. Then the driver can release them to the host. See
- *     drivers/net/i2400m/rx.c for details.
- *
- * @rx_roq_refcount: refcount rx_roq. This refcounts any access to
- *     rx_roq thus preventing rx_roq being destroyed when rx_roq
- *     is being accessed. rx_roq_refcount is protected by rx_lock.
- *
- * @rx_reports: reports received from the device that couldn't be
- *     processed because the driver wasn't still ready; when ready,
- *     they are pulled from here and chewed.
- *
- * @rx_reports_ws: Work struct used to kick a scan of the RX reports
- *     list and to process each.
- *
- * @src_mac_addr: MAC address used to make ethernet packets be coming
- *     from. This is generated at i2400m_setup() time and used during
- *     the life cycle of the instance. See i2400m_fake_eth_header().
- *
- * @init_mutex: Mutex used for serializing the device bringup
- *     sequence; this way if the device reboots in the middle, we
- *     don't try to do a bringup again while we are tearing down the
- *     one that failed.
- *
- *     Can't reuse @msg_mutex because from within the bringup sequence
- *     we need to send messages to the device and thus use @msg_mutex.
- *
- * @msg_mutex: mutex used to send control commands to the device (we
- *     only allow one at a time, per host-device interface design).
- *
- * @msg_completion: used to wait for an ack to a control command sent
- *     to the device.
- *
- * @ack_skb: used to store the actual ack to a control command if the
- *     reception of the command was successful. Otherwise, a ERR_PTR()
- *     errno code that indicates what failed with the ack reception.
- *
- *     Only valid after @msg_completion is woken up. Only updateable
- *     if @msg_completion is armed. Only touched by
- *     i2400m_msg_to_dev().
- *
- *     Protected by @rx_lock. In theory the command execution flow is
- *     sequential, but in case the device sends an out-of-phase or
- *     very delayed response, we need to avoid it trampling current
- *     execution.
- *
- * @bm_cmd_buf: boot mode command buffer for composing firmware upload
- *     commands.
- *
- *     USB can't r/w to stack, vmalloc, etc...as well, we end up
- *     having to alloc/free a lot to compose commands, so we use these
- *     for stagging and not having to realloc all the time.
- *
- *     This assumes the code always runs serialized. Only one thread
- *     can call i2400m_bm_cmd() at the same time.
- *
- * @bm_ack_buf: boot mode acknoledge buffer for staging reception of
- *     responses to commands.
- *
- *     See @bm_cmd_buf.
- *
- * @work_queue: work queue for processing device reports. This
- *     workqueue cannot be used for processing TX or RX to the device,
- *     as from it we'll process device reports, which might require
- *     further communication with the device.
- *
- * @debugfs_dentry: hookup for debugfs files.
- *     These have to be in a separate directory, a child of
- *     (wimax_dev->debugfs_dentry) so they can be removed when the
- *     module unloads, as we don't keep each dentry.
- *
- * @fw_name: name of the firmware image that is currently being used.
- *
- * @fw_version: version of the firmware interface, Major.minor,
- *     encoded in the high word and low word (major << 16 | minor).
- *
- * @fw_hdrs: NULL terminated array of pointers to the firmware
- *     headers. This is only available during firmware load time.
- *
- * @fw_cached: Used to cache firmware when the system goes to
- *     suspend/standby/hibernation (as on resume we can't read it). If
- *     NULL, no firmware was cached, read it. If ~0, you can't read
- *     any firmware files (the system still didn't come out of suspend
- *     and failed to cache one), so abort; otherwise, a valid cached
- *     firmware to be used. Access to this variable is protected by
- *     the spinlock i2400m->rx_lock.
- *
- * @barker: barker type that the device uses; this is initialized by
- *     i2400m_is_boot_barker() the first time it is called. Then it
- *     won't change during the life cycle of the device and every time
- *     a boot barker is received, it is just verified for it being the
- *     same.
- *
- * @pm_notifier: used to register for PM events
- *
- * @bus_reset_retries: counter for the number of bus resets attempted for
- *	this boot. It's not for tracking the number of bus resets during
- *	the whole driver life cycle (from insmod to rmmod) but for the
- *	number of dev_start() executed until dev_start() returns a success
- *	(ie: a good boot means a dev_stop() followed by a successful
- *	dev_start()). dev_reset_handler() increments this counter whenever
- *	it is triggering a bus reset. It checks this counter to decide if a
- *	subsequent bus reset should be retried. dev_reset_handler() retries
- *	the bus reset until dev_start() succeeds or the counter reaches
- *	I2400M_BUS_RESET_RETRIES. The counter is cleared to 0 in
- *	dev_reset_handle() when dev_start() returns a success,
- *	ie: a successul boot is completed.
- *
- * @alive: flag to denote if the device *should* be alive. This flag is
- *	everything like @updown (see doc for @updown) except reflecting
- *	the device state *we expect* rather than the actual state as denoted
- *	by @updown. It is set 1 whenever @updown is set 1 in dev_start().
- *	Then the device is expected to be alive all the time
- *	(i2400m->alive remains 1) until the driver is removed. Therefore
- *	all the device reboot events detected can be still handled properly
- *	by either dev_reset_handle() or .pre_reset/.post_reset as long as
- *	the driver presents. It is set 0 along with @updown in dev_stop().
- *
- * @error_recovery: flag to denote if we are ready to take an error recovery.
- *	0 for ready to take an error recovery; 1 for not ready. It is
- *	initialized to 1 while probe() since we don't tend to take any error
- *	recovery during probe(). It is decremented by 1 whenever dev_start()
- *	succeeds to indicate we are ready to take error recovery from now on.
- *	It is checked every time we wanna schedule an error recovery. If an
- *	error recovery is already in place (error_recovery was set 1), we
- *	should not schedule another one until the last one is done.
- */
-struct i2400m {
-	struct wimax_dev wimax_dev;	/* FIRST! See doc */
-
-	unsigned updown:1;		/* Network device is up or down */
-	unsigned boot_mode:1;		/* is the device in boot mode? */
-	unsigned sboot:1;		/* signed or unsigned fw boot */
-	unsigned ready:1;		/* Device comm infrastructure ready */
-	unsigned rx_reorder:1;		/* RX reorder is enabled */
-	u8 trace_msg_from_user;		/* echo rx msgs to 'trace' pipe */
-					/* typed u8 so /sys/kernel/debug/u8 can tweak */
-	enum i2400m_system_state state;
-	wait_queue_head_t state_wq;	/* Woken up when on state updates */
-
-	size_t bus_tx_block_size;
-	size_t bus_tx_room_min;
-	size_t bus_pl_size_max;
-	unsigned bus_bm_retries;
-
-	int (*bus_setup)(struct i2400m *);
-	int (*bus_dev_start)(struct i2400m *);
-	void (*bus_dev_stop)(struct i2400m *);
-	void (*bus_release)(struct i2400m *);
-	void (*bus_tx_kick)(struct i2400m *);
-	int (*bus_reset)(struct i2400m *, enum i2400m_reset_type);
-	ssize_t (*bus_bm_cmd_send)(struct i2400m *,
-				   const struct i2400m_bootrom_header *,
-				   size_t, int flags);
-	ssize_t (*bus_bm_wait_for_ack)(struct i2400m *,
-				       struct i2400m_bootrom_header *, size_t);
-	const char **bus_fw_names;
-	unsigned bus_bm_mac_addr_impaired:1;
-	const struct i2400m_poke_table *bus_bm_pokes_table;
-
-	spinlock_t tx_lock;		/* protect TX state */
-	void *tx_buf;
-	size_t tx_in, tx_out;
-	struct i2400m_msg_hdr *tx_msg;
-	size_t tx_sequence, tx_msg_size;
-	/* TX stats */
-	unsigned tx_pl_num, tx_pl_max, tx_pl_min,
-		tx_num, tx_size_acc, tx_size_min, tx_size_max;
-
-	/* RX stuff */
-	/* protect RX state and rx_roq_refcount */
-	spinlock_t rx_lock;
-	unsigned rx_pl_num, rx_pl_max, rx_pl_min,
-		rx_num, rx_size_acc, rx_size_min, rx_size_max;
-	struct i2400m_roq *rx_roq;	/* access is refcounted */
-	struct kref rx_roq_refcount;	/* refcount access to rx_roq */
-	u8 src_mac_addr[ETH_HLEN];
-	struct list_head rx_reports;	/* under rx_lock! */
-	struct work_struct rx_report_ws;
-
-	struct mutex msg_mutex;		/* serialize command execution */
-	struct completion msg_completion;
-	struct sk_buff *ack_skb;	/* protected by rx_lock */
-
-	void *bm_ack_buf;		/* for receiving acks over USB */
-	void *bm_cmd_buf;		/* for issuing commands over USB */
-
-	struct workqueue_struct *work_queue;
-
-	struct mutex init_mutex;	/* protect bringup seq */
-	struct i2400m_reset_ctx *reset_ctx;	/* protected by init_mutex */
-
-	struct work_struct wake_tx_ws;
-	struct sk_buff *wake_tx_skb;
-
-	struct work_struct reset_ws;
-	const char *reset_reason;
-
-	struct work_struct recovery_ws;
-
-	struct dentry *debugfs_dentry;
-	const char *fw_name;		/* name of the current firmware image */
-	unsigned long fw_version;	/* version of the firmware interface */
-	const struct i2400m_bcf_hdr **fw_hdrs;
-	struct i2400m_fw *fw_cached;	/* protected by rx_lock */
-	struct i2400m_barker_db *barker;
-
-	struct notifier_block pm_notifier;
-
-	/* counting bus reset retries in this boot */
-	atomic_t bus_reset_retries;
-
-	/* if the device is expected to be alive */
-	unsigned alive;
-
-	/* 0 if we are ready for error recovery; 1 if not ready  */
-	atomic_t error_recovery;
-
-};
-
-
-/*
- * Bus-generic internal APIs
- * -------------------------
- */
-
-static inline
-struct i2400m *wimax_dev_to_i2400m(struct wimax_dev *wimax_dev)
-{
-	return container_of(wimax_dev, struct i2400m, wimax_dev);
-}
-
-static inline
-struct i2400m *net_dev_to_i2400m(struct net_device *net_dev)
-{
-	return wimax_dev_to_i2400m(netdev_priv(net_dev));
-}
-
-/*
- * Boot mode support
- */
-
-/**
- * i2400m_bm_cmd_flags - flags to i2400m_bm_cmd()
- *
- * @I2400M_BM_CMD_RAW: send the command block as-is, without doing any
- *     extra processing for adding CRC.
- */
-enum i2400m_bm_cmd_flags {
-	I2400M_BM_CMD_RAW	= 1 << 2,
-};
-
-/**
- * i2400m_bri - Boot-ROM indicators
- *
- * Flags for i2400m_bootrom_init() and i2400m_dev_bootstrap() [which
- * are passed from things like i2400m_setup()]. Can be combined with
- * |.
- *
- * @I2400M_BRI_SOFT: The device rebooted already and a reboot
- *     barker received, proceed directly to ack the boot sequence.
- * @I2400M_BRI_NO_REBOOT: Do not reboot the device and proceed
- *     directly to wait for a reboot barker from the device.
- * @I2400M_BRI_MAC_REINIT: We need to reinitialize the boot
- *     rom after reading the MAC address. This is quite a dirty hack,
- *     if you ask me -- the device requires the bootrom to be
- *     initialized after reading the MAC address.
- */
-enum i2400m_bri {
-	I2400M_BRI_SOFT       = 1 << 1,
-	I2400M_BRI_NO_REBOOT  = 1 << 2,
-	I2400M_BRI_MAC_REINIT = 1 << 3,
-};
-
-void i2400m_bm_cmd_prepare(struct i2400m_bootrom_header *);
-int i2400m_dev_bootstrap(struct i2400m *, enum i2400m_bri);
-int i2400m_read_mac_addr(struct i2400m *);
-int i2400m_bootrom_init(struct i2400m *, enum i2400m_bri);
-int i2400m_is_boot_barker(struct i2400m *, const void *, size_t);
-static inline
-int i2400m_is_d2h_barker(const void *buf)
-{
-	const __le32 *barker = buf;
-	return le32_to_cpu(*barker) == I2400M_D2H_MSG_BARKER;
-}
-void i2400m_unknown_barker(struct i2400m *, const void *, size_t);
-
-/* Make/grok boot-rom header commands */
-
-static inline
-__le32 i2400m_brh_command(enum i2400m_brh_opcode opcode, unsigned use_checksum,
-			  unsigned direct_access)
-{
-	return cpu_to_le32(
-		I2400M_BRH_SIGNATURE
-		| (direct_access ? I2400M_BRH_DIRECT_ACCESS : 0)
-		| I2400M_BRH_RESPONSE_REQUIRED /* response always required */
-		| (use_checksum ? I2400M_BRH_USE_CHECKSUM : 0)
-		| (opcode & I2400M_BRH_OPCODE_MASK));
-}
-
-static inline
-void i2400m_brh_set_opcode(struct i2400m_bootrom_header *hdr,
-			   enum i2400m_brh_opcode opcode)
-{
-	hdr->command = cpu_to_le32(
-		(le32_to_cpu(hdr->command) & ~I2400M_BRH_OPCODE_MASK)
-		| (opcode & I2400M_BRH_OPCODE_MASK));
-}
-
-static inline
-unsigned i2400m_brh_get_opcode(const struct i2400m_bootrom_header *hdr)
-{
-	return le32_to_cpu(hdr->command) & I2400M_BRH_OPCODE_MASK;
-}
-
-static inline
-unsigned i2400m_brh_get_response(const struct i2400m_bootrom_header *hdr)
-{
-	return (le32_to_cpu(hdr->command) & I2400M_BRH_RESPONSE_MASK)
-		>> I2400M_BRH_RESPONSE_SHIFT;
-}
-
-static inline
-unsigned i2400m_brh_get_use_checksum(const struct i2400m_bootrom_header *hdr)
-{
-	return le32_to_cpu(hdr->command) & I2400M_BRH_USE_CHECKSUM;
-}
-
-static inline
-unsigned i2400m_brh_get_response_required(
-	const struct i2400m_bootrom_header *hdr)
-{
-	return le32_to_cpu(hdr->command) & I2400M_BRH_RESPONSE_REQUIRED;
-}
-
-static inline
-unsigned i2400m_brh_get_direct_access(const struct i2400m_bootrom_header *hdr)
-{
-	return le32_to_cpu(hdr->command) & I2400M_BRH_DIRECT_ACCESS;
-}
-
-static inline
-unsigned i2400m_brh_get_signature(const struct i2400m_bootrom_header *hdr)
-{
-	return (le32_to_cpu(hdr->command) & I2400M_BRH_SIGNATURE_MASK)
-		>> I2400M_BRH_SIGNATURE_SHIFT;
-}
-
-
-/*
- * Driver / device setup and internal functions
- */
-void i2400m_init(struct i2400m *);
-int i2400m_reset(struct i2400m *, enum i2400m_reset_type);
-void i2400m_netdev_setup(struct net_device *net_dev);
-int i2400m_sysfs_setup(struct device_driver *);
-void i2400m_sysfs_release(struct device_driver *);
-int i2400m_tx_setup(struct i2400m *);
-void i2400m_wake_tx_work(struct work_struct *);
-void i2400m_tx_release(struct i2400m *);
-
-int i2400m_rx_setup(struct i2400m *);
-void i2400m_rx_release(struct i2400m *);
-
-void i2400m_fw_cache(struct i2400m *);
-void i2400m_fw_uncache(struct i2400m *);
-
-void i2400m_net_rx(struct i2400m *, struct sk_buff *, unsigned, const void *,
-		   int);
-void i2400m_net_erx(struct i2400m *, struct sk_buff *, enum i2400m_cs);
-void i2400m_net_wake_stop(struct i2400m *);
-enum i2400m_pt;
-int i2400m_tx(struct i2400m *, const void *, size_t, enum i2400m_pt);
-
-#ifdef CONFIG_DEBUG_FS
-void i2400m_debugfs_add(struct i2400m *);
-void i2400m_debugfs_rm(struct i2400m *);
-#else
-static inline void i2400m_debugfs_add(struct i2400m *i2400m) {}
-static inline void i2400m_debugfs_rm(struct i2400m *i2400m) {}
-#endif
-
-/* Initialize/shutdown the device */
-int i2400m_dev_initialize(struct i2400m *);
-void i2400m_dev_shutdown(struct i2400m *);
-
-extern struct attribute_group i2400m_dev_attr_group;
-
-
-/* HDI message's payload description handling */
-
-static inline
-size_t i2400m_pld_size(const struct i2400m_pld *pld)
-{
-	return I2400M_PLD_SIZE_MASK & le32_to_cpu(pld->val);
-}
-
-static inline
-enum i2400m_pt i2400m_pld_type(const struct i2400m_pld *pld)
-{
-	return (I2400M_PLD_TYPE_MASK & le32_to_cpu(pld->val))
-		>> I2400M_PLD_TYPE_SHIFT;
-}
-
-static inline
-void i2400m_pld_set(struct i2400m_pld *pld, size_t size,
-		    enum i2400m_pt type)
-{
-	pld->val = cpu_to_le32(
-		((type << I2400M_PLD_TYPE_SHIFT) & I2400M_PLD_TYPE_MASK)
-		|  (size & I2400M_PLD_SIZE_MASK));
-}
-
-
-/*
- * API for the bus-specific drivers
- * --------------------------------
- */
-
-static inline
-struct i2400m *i2400m_get(struct i2400m *i2400m)
-{
-	dev_hold(i2400m->wimax_dev.net_dev);
-	return i2400m;
-}
-
-static inline
-void i2400m_put(struct i2400m *i2400m)
-{
-	dev_put(i2400m->wimax_dev.net_dev);
-}
-
-int i2400m_dev_reset_handle(struct i2400m *, const char *);
-int i2400m_pre_reset(struct i2400m *);
-int i2400m_post_reset(struct i2400m *);
-void i2400m_error_recovery(struct i2400m *);
-
-/*
- * _setup()/_release() are called by the probe/disconnect functions of
- * the bus-specific drivers.
- */
-int i2400m_setup(struct i2400m *, enum i2400m_bri bm_flags);
-void i2400m_release(struct i2400m *);
-
-int i2400m_rx(struct i2400m *, struct sk_buff *);
-struct i2400m_msg_hdr *i2400m_tx_msg_get(struct i2400m *, size_t *);
-void i2400m_tx_msg_sent(struct i2400m *);
-
-
-/*
- * Utility functions
- */
-
-static inline
-struct device *i2400m_dev(struct i2400m *i2400m)
-{
-	return i2400m->wimax_dev.net_dev->dev.parent;
-}
-
-int i2400m_msg_check_status(const struct i2400m_l3l4_hdr *, char *, size_t);
-int i2400m_msg_size_check(struct i2400m *, const struct i2400m_l3l4_hdr *,
-			  size_t);
-struct sk_buff *i2400m_msg_to_dev(struct i2400m *, const void *, size_t);
-void i2400m_msg_to_dev_cancel_wait(struct i2400m *, int);
-void i2400m_report_hook(struct i2400m *, const struct i2400m_l3l4_hdr *,
-			size_t);
-void i2400m_report_hook_work(struct work_struct *);
-int i2400m_cmd_enter_powersave(struct i2400m *);
-int i2400m_cmd_exit_idle(struct i2400m *);
-struct sk_buff *i2400m_get_device_info(struct i2400m *);
-int i2400m_firmware_check(struct i2400m *);
-int i2400m_set_idle_timeout(struct i2400m *, unsigned);
-
-static inline
-struct usb_endpoint_descriptor *usb_get_epd(struct usb_interface *iface, int ep)
-{
-	return &iface->cur_altsetting->endpoint[ep].desc;
-}
-
-int i2400m_op_rfkill_sw_toggle(struct wimax_dev *, enum wimax_rf_state);
-void i2400m_report_tlv_rf_switches_status(struct i2400m *,
-					  const struct i2400m_tlv_rf_switches_status *);
-
-/*
- * Helpers for firmware backwards compatibility
- *
- * As we aim to support at least the firmware version that was
- * released with the previous kernel/driver release, some code will be
- * conditionally executed depending on the firmware version. On each
- * release, the code to support fw releases past the last two ones
- * will be purged.
- *
- * By making it depend on this macros, it is easier to keep it a tab
- * on what has to go and what not.
- */
-static inline
-unsigned i2400m_le_v1_3(struct i2400m *i2400m)
-{
-	/* running fw is lower or v1.3 */
-	return i2400m->fw_version <= 0x00090001;
-}
-
-static inline
-unsigned i2400m_ge_v1_4(struct i2400m *i2400m)
-{
-	/* running fw is higher or v1.4 */
-	return i2400m->fw_version >= 0x00090002;
-}
-
-
-/*
- * Do a millisecond-sleep for allowing wireshark to dump all the data
- * packets. Used only for debugging.
- */
-static inline
-void __i2400m_msleep(unsigned ms)
-{
-#if 1
-#else
-	msleep(ms);
-#endif
-}
-
-
-/* module initialization helpers */
-int i2400m_barker_db_init(const char *);
-void i2400m_barker_db_exit(void);
-
-
-
-#endif /* #ifndef __I2400M_H__ */
diff --git a/drivers/net/wimax/i2400m/netdev.c b/drivers/net/wimax/i2400m/netdev.c
deleted file mode 100644
index a7fcbceb6e6b..000000000000
--- a/drivers/net/wimax/i2400m/netdev.c
+++ /dev/null
@@ -1,603 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel Wireless WiMAX Connection 2400m
- * Glue with the networking stack
- *
- * Copyright (C) 2007 Intel Corporation <linux-wimax@intel.com>
- * Yanir Lubetkin <yanirx.lubetkin@intel.com>
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This implements an ethernet device for the i2400m.
- *
- * We fake being an ethernet device to simplify the support from user
- * space and from the other side. The world is (sadly) configured to
- * take in only Ethernet devices...
- *
- * Because of this, when using firmwares <= v1.3, there is an
- * copy-each-rxed-packet overhead on the RX path. Each IP packet has
- * to be reallocated to add an ethernet header (as there is no space
- * in what we get from the device). This is a known drawback and
- * firmwares >= 1.4 add header space that can be used to insert the
- * ethernet header without having to reallocate and copy.
- *
- * TX error handling is tricky; because we have to FIFO/queue the
- * buffers for transmission (as the hardware likes it aggregated), we
- * just give the skb to the TX subsystem and by the time it is
- * transmitted, we have long forgotten about it. So we just don't care
- * too much about it.
- *
- * Note that when the device is in idle mode with the basestation, we
- * need to negotiate coming back up online. That involves negotiation
- * and possible user space interaction. Thus, we defer to a workqueue
- * to do all that. By default, we only queue a single packet and drop
- * the rest, as potentially the time to go back from idle to normal is
- * long.
- *
- * ROADMAP
- *
- * i2400m_open         Called on ifconfig up
- * i2400m_stop         Called on ifconfig down
- *
- * i2400m_hard_start_xmit Called by the network stack to send a packet
- *   i2400m_net_wake_tx	  Wake up device from basestation-IDLE & TX
- *     i2400m_wake_tx_work
- *       i2400m_cmd_exit_idle
- *       i2400m_tx
- *   i2400m_net_tx        TX a data frame
- *     i2400m_tx
- *
- * i2400m_change_mtu      Called on ifconfig mtu XXX
- *
- * i2400m_tx_timeout      Called when the device times out
- *
- * i2400m_net_rx          Called by the RX code when a data frame is
- *                        available (firmware <= 1.3)
- * i2400m_net_erx         Called by the RX code when a data frame is
- *                        available (firmware >= 1.4).
- * i2400m_netdev_setup    Called to setup all the netdev stuff from
- *                        alloc_netdev.
- */
-#include <linux/if_arp.h>
-#include <linux/slab.h>
-#include <linux/netdevice.h>
-#include <linux/ethtool.h>
-#include <linux/export.h>
-#include "i2400m.h"
-
-
-#define D_SUBMODULE netdev
-#include "debug-levels.h"
-
-enum {
-/* netdev interface */
-	/* 20 secs? yep, this is the maximum timeout that the device
-	 * might take to get out of IDLE / negotiate it with the base
-	 * station. We add 1sec for good measure. */
-	I2400M_TX_TIMEOUT = 21 * HZ,
-	/*
-	 * Experimentation has determined that, 20 to be a good value
-	 * for minimizing the jitter in the throughput.
-	 */
-	I2400M_TX_QLEN = 20,
-};
-
-
-static
-int i2400m_open(struct net_device *net_dev)
-{
-	int result;
-	struct i2400m *i2400m = net_dev_to_i2400m(net_dev);
-	struct device *dev = i2400m_dev(i2400m);
-
-	d_fnstart(3, dev, "(net_dev %p [i2400m %p])\n", net_dev, i2400m);
-	/* Make sure we wait until init is complete... */
-	mutex_lock(&i2400m->init_mutex);
-	if (i2400m->updown)
-		result = 0;
-	else
-		result = -EBUSY;
-	mutex_unlock(&i2400m->init_mutex);
-	d_fnend(3, dev, "(net_dev %p [i2400m %p]) = %d\n",
-		net_dev, i2400m, result);
-	return result;
-}
-
-
-static
-int i2400m_stop(struct net_device *net_dev)
-{
-	struct i2400m *i2400m = net_dev_to_i2400m(net_dev);
-	struct device *dev = i2400m_dev(i2400m);
-
-	d_fnstart(3, dev, "(net_dev %p [i2400m %p])\n", net_dev, i2400m);
-	i2400m_net_wake_stop(i2400m);
-	d_fnend(3, dev, "(net_dev %p [i2400m %p]) = 0\n", net_dev, i2400m);
-	return 0;
-}
-
-
-/*
- * Wake up the device and transmit a held SKB, then restart the net queue
- *
- * When the device goes into basestation-idle mode, we need to tell it
- * to exit that mode; it will negotiate with the base station, user
- * space may have to intervene to rehandshake crypto and then tell us
- * when it is ready to transmit the packet we have "queued". Still we
- * need to give it sometime after it reports being ok.
- *
- * On error, there is not much we can do. If the error was on TX, we
- * still wake the queue up to see if the next packet will be luckier.
- *
- * If _cmd_exit_idle() fails...well, it could be many things; most
- * commonly it is that something else took the device out of IDLE mode
- * (for example, the base station). In that case we get an -EILSEQ and
- * we are just going to ignore that one. If the device is back to
- * connected, then fine -- if it is someother state, the packet will
- * be dropped anyway.
- */
-void i2400m_wake_tx_work(struct work_struct *ws)
-{
-	int result;
-	struct i2400m *i2400m = container_of(ws, struct i2400m, wake_tx_ws);
-	struct net_device *net_dev = i2400m->wimax_dev.net_dev;
-	struct device *dev = i2400m_dev(i2400m);
-	struct sk_buff *skb;
-	unsigned long flags;
-
-	spin_lock_irqsave(&i2400m->tx_lock, flags);
-	skb = i2400m->wake_tx_skb;
-	i2400m->wake_tx_skb = NULL;
-	spin_unlock_irqrestore(&i2400m->tx_lock, flags);
-
-	d_fnstart(3, dev, "(ws %p i2400m %p skb %p)\n", ws, i2400m, skb);
-	result = -EINVAL;
-	if (skb == NULL) {
-		dev_err(dev, "WAKE&TX: skb disappeared!\n");
-		goto out_put;
-	}
-	/* If we have, somehow, lost the connection after this was
-	 * queued, don't do anything; this might be the device got
-	 * reset or just disconnected. */
-	if (unlikely(!netif_carrier_ok(net_dev)))
-		goto out_kfree;
-	result = i2400m_cmd_exit_idle(i2400m);
-	if (result == -EILSEQ)
-		result = 0;
-	if (result < 0) {
-		dev_err(dev, "WAKE&TX: device didn't get out of idle: "
-			"%d - resetting\n", result);
-		i2400m_reset(i2400m, I2400M_RT_BUS);
-		goto error;
-	}
-	result = wait_event_timeout(i2400m->state_wq,
-				    i2400m->state != I2400M_SS_IDLE,
-				    net_dev->watchdog_timeo - HZ/2);
-	if (result == 0)
-		result = -ETIMEDOUT;
-	if (result < 0) {
-		dev_err(dev, "WAKE&TX: error waiting for device to exit IDLE: "
-			"%d - resetting\n", result);
-		i2400m_reset(i2400m, I2400M_RT_BUS);
-		goto error;
-	}
-	msleep(20);	/* device still needs some time or it drops it */
-	result = i2400m_tx(i2400m, skb->data, skb->len, I2400M_PT_DATA);
-error:
-	netif_wake_queue(net_dev);
-out_kfree:
-	kfree_skb(skb);	/* refcount transferred by _hard_start_xmit() */
-out_put:
-	i2400m_put(i2400m);
-	d_fnend(3, dev, "(ws %p i2400m %p skb %p) = void [%d]\n",
-		ws, i2400m, skb, result);
-}
-
-
-/*
- * Prepare the data payload TX header
- *
- * The i2400m expects a 4 byte header in front of a data packet.
- *
- * Because we pretend to be an ethernet device, this packet comes with
- * an ethernet header. Pull it and push our header.
- */
-static
-void i2400m_tx_prep_header(struct sk_buff *skb)
-{
-	struct i2400m_pl_data_hdr *pl_hdr;
-	skb_pull(skb, ETH_HLEN);
-	pl_hdr = skb_push(skb, sizeof(*pl_hdr));
-	pl_hdr->reserved = 0;
-}
-
-
-
-/*
- * Cleanup resources acquired during i2400m_net_wake_tx()
- *
- * This is called by __i2400m_dev_stop and means we have to make sure
- * the workqueue is flushed from any pending work.
- */
-void i2400m_net_wake_stop(struct i2400m *i2400m)
-{
-	struct device *dev = i2400m_dev(i2400m);
-	struct sk_buff *wake_tx_skb;
-	unsigned long flags;
-
-	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
-	/*
-	 * See i2400m_hard_start_xmit(), references are taken there and
-	 * here we release them if the packet was still pending.
-	 */
-	cancel_work_sync(&i2400m->wake_tx_ws);
-
-	spin_lock_irqsave(&i2400m->tx_lock, flags);
-	wake_tx_skb = i2400m->wake_tx_skb;
-	i2400m->wake_tx_skb = NULL;
-	spin_unlock_irqrestore(&i2400m->tx_lock, flags);
-
-	if (wake_tx_skb) {
-		i2400m_put(i2400m);
-		kfree_skb(wake_tx_skb);
-	}
-
-	d_fnend(3, dev, "(i2400m %p) = void\n", i2400m);
-}
-
-
-/*
- * TX an skb to an idle device
- *
- * When the device is in basestation-idle mode, we need to wake it up
- * and then TX. So we queue a work_struct for doing so.
- *
- * We need to get an extra ref for the skb (so it is not dropped), as
- * well as be careful not to queue more than one request (won't help
- * at all). If more than one request comes or there are errors, we
- * just drop the packets (see i2400m_hard_start_xmit()).
- */
-static
-int i2400m_net_wake_tx(struct i2400m *i2400m, struct net_device *net_dev,
-		       struct sk_buff *skb)
-{
-	int result;
-	struct device *dev = i2400m_dev(i2400m);
-	unsigned long flags;
-
-	d_fnstart(3, dev, "(skb %p net_dev %p)\n", skb, net_dev);
-	if (net_ratelimit()) {
-		d_printf(3, dev, "WAKE&NETTX: "
-			 "skb %p sending %d bytes to radio\n",
-			 skb, skb->len);
-		d_dump(4, dev, skb->data, skb->len);
-	}
-	/* We hold a ref count for i2400m and skb, so when
-	 * stopping() the device, we need to cancel that work
-	 * and if pending, release those resources. */
-	result = 0;
-	spin_lock_irqsave(&i2400m->tx_lock, flags);
-	if (!i2400m->wake_tx_skb) {
-		netif_stop_queue(net_dev);
-		i2400m_get(i2400m);
-		i2400m->wake_tx_skb = skb_get(skb);	/* transfer ref count */
-		i2400m_tx_prep_header(skb);
-		result = schedule_work(&i2400m->wake_tx_ws);
-		WARN_ON(result == 0);
-	}
-	spin_unlock_irqrestore(&i2400m->tx_lock, flags);
-	if (result == 0) {
-		/* Yes, this happens even if we stopped the
-		 * queue -- blame the queue disciplines that
-		 * queue without looking -- I guess there is a reason
-		 * for that. */
-		if (net_ratelimit())
-			d_printf(1, dev, "NETTX: device exiting idle, "
-				 "dropping skb %p, queue running %d\n",
-				 skb, netif_queue_stopped(net_dev));
-		result = -EBUSY;
-	}
-	d_fnend(3, dev, "(skb %p net_dev %p) = %d\n", skb, net_dev, result);
-	return result;
-}
-
-
-/*
- * Transmit a packet to the base station on behalf of the network stack.
- *
- * Returns: 0 if ok, < 0 errno code on error.
- *
- * We need to pull the ethernet header and add the hardware header,
- * which is currently set to all zeroes and reserved.
- */
-static
-int i2400m_net_tx(struct i2400m *i2400m, struct net_device *net_dev,
-		  struct sk_buff *skb)
-{
-	int result;
-	struct device *dev = i2400m_dev(i2400m);
-
-	d_fnstart(3, dev, "(i2400m %p net_dev %p skb %p)\n",
-		  i2400m, net_dev, skb);
-	/* FIXME: check eth hdr, only IPv4 is routed by the device as of now */
-	netif_trans_update(net_dev);
-	i2400m_tx_prep_header(skb);
-	d_printf(3, dev, "NETTX: skb %p sending %d bytes to radio\n",
-		 skb, skb->len);
-	d_dump(4, dev, skb->data, skb->len);
-	result = i2400m_tx(i2400m, skb->data, skb->len, I2400M_PT_DATA);
-	d_fnend(3, dev, "(i2400m %p net_dev %p skb %p) = %d\n",
-		i2400m, net_dev, skb, result);
-	return result;
-}
-
-
-/*
- * Transmit a packet to the base station on behalf of the network stack
- *
- *
- * Returns: NETDEV_TX_OK (always, even in case of error)
- *
- * In case of error, we just drop it. Reasons:
- *
- *  - we add a hw header to each skb, and if the network stack
- *    retries, we have no way to know if that skb has it or not.
- *
- *  - network protocols have their own drop-recovery mechanisms
- *
- *  - there is not much else we can do
- *
- * If the device is idle, we need to wake it up; that is an operation
- * that will sleep. See i2400m_net_wake_tx() for details.
- */
-static
-netdev_tx_t i2400m_hard_start_xmit(struct sk_buff *skb,
-					 struct net_device *net_dev)
-{
-	struct i2400m *i2400m = net_dev_to_i2400m(net_dev);
-	struct device *dev = i2400m_dev(i2400m);
-	int result = -1;
-
-	d_fnstart(3, dev, "(skb %p net_dev %p)\n", skb, net_dev);
-
-	if (skb_cow_head(skb, 0))
-		goto drop;
-
-	if (i2400m->state == I2400M_SS_IDLE)
-		result = i2400m_net_wake_tx(i2400m, net_dev, skb);
-	else
-		result = i2400m_net_tx(i2400m, net_dev, skb);
-	if (result <  0) {
-drop:
-		net_dev->stats.tx_dropped++;
-	} else {
-		net_dev->stats.tx_packets++;
-		net_dev->stats.tx_bytes += skb->len;
-	}
-	dev_kfree_skb(skb);
-	d_fnend(3, dev, "(skb %p net_dev %p) = %d\n", skb, net_dev, result);
-	return NETDEV_TX_OK;
-}
-
-
-static
-void i2400m_tx_timeout(struct net_device *net_dev, unsigned int txqueue)
-{
-	/*
-	 * We might want to kick the device
-	 *
-	 * There is not much we can do though, as the device requires
-	 * that we send the data aggregated. By the time we receive
-	 * this, there might be data pending to be sent or not...
-	 */
-	net_dev->stats.tx_errors++;
-}
-
-
-/*
- * Create a fake ethernet header
- *
- * For emulating an ethernet device, every received IP header has to
- * be prefixed with an ethernet header. Fake it with the given
- * protocol.
- */
-static
-void i2400m_rx_fake_eth_header(struct net_device *net_dev,
-			       void *_eth_hdr, __be16 protocol)
-{
-	struct i2400m *i2400m = net_dev_to_i2400m(net_dev);
-	struct ethhdr *eth_hdr = _eth_hdr;
-
-	memcpy(eth_hdr->h_dest, net_dev->dev_addr, sizeof(eth_hdr->h_dest));
-	memcpy(eth_hdr->h_source, i2400m->src_mac_addr,
-	       sizeof(eth_hdr->h_source));
-	eth_hdr->h_proto = protocol;
-}
-
-
-/*
- * i2400m_net_rx - pass a network packet to the stack
- *
- * @i2400m: device instance
- * @skb_rx: the skb where the buffer pointed to by @buf is
- * @i: 1 if payload is the only one
- * @buf: pointer to the buffer containing the data
- * @len: buffer's length
- *
- * This is only used now for the v1.3 firmware. It will be deprecated
- * in >= 2.6.31.
- *
- * Note that due to firmware limitations, we don't have space to add
- * an ethernet header, so we need to copy each packet. Firmware
- * versions >= v1.4 fix this [see i2400m_net_erx()].
- *
- * We just clone the skb and set it up so that it's skb->data pointer
- * points to "buf" and it's length.
- *
- * Note that if the payload is the last (or the only one) in a
- * multi-payload message, we don't clone the SKB but just reuse it.
- *
- * This function is normally run from a thread context. However, we
- * still use netif_rx() instead of netif_receive_skb() as was
- * recommended in the mailing list. Reason is in some stress tests
- * when sending/receiving a lot of data we seem to hit a softlock in
- * the kernel's TCP implementation [aroudn tcp_delay_timer()]. Using
- * netif_rx() took care of the issue.
- *
- * This is, of course, still open to do more research on why running
- * with netif_receive_skb() hits this softlock. FIXME.
- *
- * FIXME: currently we don't do any efforts at distinguishing if what
- * we got was an IPv4 or IPv6 header, to setup the protocol field
- * correctly.
- */
-void i2400m_net_rx(struct i2400m *i2400m, struct sk_buff *skb_rx,
-		   unsigned i, const void *buf, int buf_len)
-{
-	struct net_device *net_dev = i2400m->wimax_dev.net_dev;
-	struct device *dev = i2400m_dev(i2400m);
-	struct sk_buff *skb;
-
-	d_fnstart(2, dev, "(i2400m %p buf %p buf_len %d)\n",
-		  i2400m, buf, buf_len);
-	if (i) {
-		skb = skb_get(skb_rx);
-		d_printf(2, dev, "RX: reusing first payload skb %p\n", skb);
-		skb_pull(skb, buf - (void *) skb->data);
-		skb_trim(skb, (void *) skb_end_pointer(skb) - buf);
-	} else {
-		/* Yes, this is bad -- a lot of overhead -- see
-		 * comments at the top of the file */
-		skb = __netdev_alloc_skb(net_dev, buf_len, GFP_KERNEL);
-		if (skb == NULL) {
-			dev_err(dev, "NETRX: no memory to realloc skb\n");
-			net_dev->stats.rx_dropped++;
-			goto error_skb_realloc;
-		}
-		skb_put_data(skb, buf, buf_len);
-	}
-	i2400m_rx_fake_eth_header(i2400m->wimax_dev.net_dev,
-				  skb->data - ETH_HLEN,
-				  cpu_to_be16(ETH_P_IP));
-	skb_set_mac_header(skb, -ETH_HLEN);
-	skb->dev = i2400m->wimax_dev.net_dev;
-	skb->protocol = htons(ETH_P_IP);
-	net_dev->stats.rx_packets++;
-	net_dev->stats.rx_bytes += buf_len;
-	d_printf(3, dev, "NETRX: receiving %d bytes to network stack\n",
-		buf_len);
-	d_dump(4, dev, buf, buf_len);
-	netif_rx_ni(skb);	/* see notes in function header */
-error_skb_realloc:
-	d_fnend(2, dev, "(i2400m %p buf %p buf_len %d) = void\n",
-		i2400m, buf, buf_len);
-}
-
-
-/*
- * i2400m_net_erx - pass a network packet to the stack (extended version)
- *
- * @i2400m: device descriptor
- * @skb: the skb where the packet is - the skb should be set to point
- *     at the IP packet; this function will add ethernet headers if
- *     needed.
- * @cs: packet type
- *
- * This is only used now for firmware >= v1.4. Note it is quite
- * similar to i2400m_net_rx() (used only for v1.3 firmware).
- *
- * This function is normally run from a thread context. However, we
- * still use netif_rx() instead of netif_receive_skb() as was
- * recommended in the mailing list. Reason is in some stress tests
- * when sending/receiving a lot of data we seem to hit a softlock in
- * the kernel's TCP implementation [aroudn tcp_delay_timer()]. Using
- * netif_rx() took care of the issue.
- *
- * This is, of course, still open to do more research on why running
- * with netif_receive_skb() hits this softlock. FIXME.
- */
-void i2400m_net_erx(struct i2400m *i2400m, struct sk_buff *skb,
-		    enum i2400m_cs cs)
-{
-	struct net_device *net_dev = i2400m->wimax_dev.net_dev;
-	struct device *dev = i2400m_dev(i2400m);
-
-	d_fnstart(2, dev, "(i2400m %p skb %p [%u] cs %d)\n",
-		  i2400m, skb, skb->len, cs);
-	switch(cs) {
-	case I2400M_CS_IPV4_0:
-	case I2400M_CS_IPV4:
-		i2400m_rx_fake_eth_header(i2400m->wimax_dev.net_dev,
-					  skb->data - ETH_HLEN,
-					  cpu_to_be16(ETH_P_IP));
-		skb_set_mac_header(skb, -ETH_HLEN);
-		skb->dev = i2400m->wimax_dev.net_dev;
-		skb->protocol = htons(ETH_P_IP);
-		net_dev->stats.rx_packets++;
-		net_dev->stats.rx_bytes += skb->len;
-		break;
-	default:
-		dev_err(dev, "ERX: BUG? CS type %u unsupported\n", cs);
-		goto error;
-
-	}
-	d_printf(3, dev, "ERX: receiving %d bytes to the network stack\n",
-		 skb->len);
-	d_dump(4, dev, skb->data, skb->len);
-	netif_rx_ni(skb);	/* see notes in function header */
-error:
-	d_fnend(2, dev, "(i2400m %p skb %p [%u] cs %d) = void\n",
-		i2400m, skb, skb->len, cs);
-}
-
-static const struct net_device_ops i2400m_netdev_ops = {
-	.ndo_open = i2400m_open,
-	.ndo_stop = i2400m_stop,
-	.ndo_start_xmit = i2400m_hard_start_xmit,
-	.ndo_tx_timeout = i2400m_tx_timeout,
-};
-
-static void i2400m_get_drvinfo(struct net_device *net_dev,
-			       struct ethtool_drvinfo *info)
-{
-	struct i2400m *i2400m = net_dev_to_i2400m(net_dev);
-
-	strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
-	strlcpy(info->fw_version, i2400m->fw_name ? : "",
-		sizeof(info->fw_version));
-	if (net_dev->dev.parent)
-		strlcpy(info->bus_info, dev_name(net_dev->dev.parent),
-			sizeof(info->bus_info));
-}
-
-static const struct ethtool_ops i2400m_ethtool_ops = {
-	.get_drvinfo = i2400m_get_drvinfo,
-	.get_link = ethtool_op_get_link,
-};
-
-/**
- * i2400m_netdev_setup - Setup setup @net_dev's i2400m private data
- *
- * Called by alloc_netdev()
- */
-void i2400m_netdev_setup(struct net_device *net_dev)
-{
-	d_fnstart(3, NULL, "(net_dev %p)\n", net_dev);
-	ether_setup(net_dev);
-	net_dev->mtu = I2400M_MAX_MTU;
-	net_dev->min_mtu = 0;
-	net_dev->max_mtu = I2400M_MAX_MTU;
-	net_dev->tx_queue_len = I2400M_TX_QLEN;
-	net_dev->features =
-		  NETIF_F_VLAN_CHALLENGED
-		| NETIF_F_HIGHDMA;
-	net_dev->flags =
-		IFF_NOARP		/* i2400m is apure IP device */
-		& (~IFF_BROADCAST	/* i2400m is P2P */
-		   & ~IFF_MULTICAST);
-	net_dev->watchdog_timeo = I2400M_TX_TIMEOUT;
-	net_dev->netdev_ops = &i2400m_netdev_ops;
-	net_dev->ethtool_ops = &i2400m_ethtool_ops;
-	d_fnend(3, NULL, "(net_dev %p) = void\n", net_dev);
-}
-EXPORT_SYMBOL_GPL(i2400m_netdev_setup);
-
diff --git a/drivers/net/wimax/i2400m/op-rfkill.c b/drivers/net/wimax/i2400m/op-rfkill.c
deleted file mode 100644
index 5c79f052cad2..000000000000
--- a/drivers/net/wimax/i2400m/op-rfkill.c
+++ /dev/null
@@ -1,196 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel Wireless WiMAX Connection 2400m
- * Implement backend for the WiMAX stack rfkill support
- *
- * Copyright (C) 2007-2008 Intel Corporation <linux-wimax@intel.com>
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * The WiMAX kernel stack integrates into RF-Kill and keeps the
- * switches's status. We just need to:
- *
- * - report changes in the HW RF Kill switch [with
- *   wimax_rfkill_{sw,hw}_report(), which happens when we detect those
- *   indications coming through hardware reports]. We also do it on
- *   initialization to let the stack know the initial HW state.
- *
- * - implement indications from the stack to change the SW RF Kill
- *   switch (coming from sysfs, the wimax stack or user space).
- */
-#include "i2400m.h"
-#include <linux/wimax/i2400m.h>
-#include <linux/slab.h>
-
-
-
-#define D_SUBMODULE rfkill
-#include "debug-levels.h"
-
-/*
- * Return true if the i2400m radio is in the requested wimax_rf_state state
- *
- */
-static
-int i2400m_radio_is(struct i2400m *i2400m, enum wimax_rf_state state)
-{
-	if (state == WIMAX_RF_OFF)
-		return i2400m->state == I2400M_SS_RF_OFF
-			|| i2400m->state == I2400M_SS_RF_SHUTDOWN;
-	else if (state == WIMAX_RF_ON)
-		/* state == WIMAX_RF_ON */
-		return i2400m->state != I2400M_SS_RF_OFF
-			&& i2400m->state != I2400M_SS_RF_SHUTDOWN;
-	else {
-		BUG();
-		return -EINVAL;	/* shut gcc warnings on certain arches */
-	}
-}
-
-
-/*
- * WiMAX stack operation: implement SW RFKill toggling
- *
- * @wimax_dev: device descriptor
- * @skb: skb where the message has been received; skb->data is
- *       expected to point to the message payload.
- * @genl_info: passed by the generic netlink layer
- *
- * Generic Netlink will call this function when a message is sent from
- * userspace to change the software RF-Kill switch status.
- *
- * This function will set the device's software RF-Kill switch state to
- * match what is requested.
- *
- * NOTE: the i2400m has a strict state machine; we can only set the
- *       RF-Kill switch when it is on, the HW RF-Kill is on and the
- *       device is initialized. So we ignore errors steaming from not
- *       being in the right state (-EILSEQ).
- */
-int i2400m_op_rfkill_sw_toggle(struct wimax_dev *wimax_dev,
-			       enum wimax_rf_state state)
-{
-	int result;
-	struct i2400m *i2400m = wimax_dev_to_i2400m(wimax_dev);
-	struct device *dev = i2400m_dev(i2400m);
-	struct sk_buff *ack_skb;
-	struct {
-		struct i2400m_l3l4_hdr hdr;
-		struct i2400m_tlv_rf_operation sw_rf;
-	} __packed *cmd;
-	char strerr[32];
-
-	d_fnstart(4, dev, "(wimax_dev %p state %d)\n", wimax_dev, state);
-
-	result = -ENOMEM;
-	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
-	if (cmd == NULL)
-		goto error_alloc;
-	cmd->hdr.type = cpu_to_le16(I2400M_MT_CMD_RF_CONTROL);
-	cmd->hdr.length = sizeof(cmd->sw_rf);
-	cmd->hdr.version = cpu_to_le16(I2400M_L3L4_VERSION);
-	cmd->sw_rf.hdr.type = cpu_to_le16(I2400M_TLV_RF_OPERATION);
-	cmd->sw_rf.hdr.length = cpu_to_le16(sizeof(cmd->sw_rf.status));
-	switch (state) {
-	case WIMAX_RF_OFF:	/* RFKILL ON, radio OFF */
-		cmd->sw_rf.status = cpu_to_le32(2);
-		break;
-	case WIMAX_RF_ON:	/* RFKILL OFF, radio ON */
-		cmd->sw_rf.status = cpu_to_le32(1);
-		break;
-	default:
-		BUG();
-	}
-
-	ack_skb = i2400m_msg_to_dev(i2400m, cmd, sizeof(*cmd));
-	result = PTR_ERR(ack_skb);
-	if (IS_ERR(ack_skb)) {
-		dev_err(dev, "Failed to issue 'RF Control' command: %d\n",
-			result);
-		goto error_msg_to_dev;
-	}
-	result = i2400m_msg_check_status(wimax_msg_data(ack_skb),
-					 strerr, sizeof(strerr));
-	if (result < 0) {
-		dev_err(dev, "'RF Control' (0x%04x) command failed: %d - %s\n",
-			I2400M_MT_CMD_RF_CONTROL, result, strerr);
-		goto error_cmd;
-	}
-
-	/* Now we wait for the state to change to RADIO_OFF or RADIO_ON */
-	result = wait_event_timeout(
-		i2400m->state_wq, i2400m_radio_is(i2400m, state),
-		5 * HZ);
-	if (result == 0)
-		result = -ETIMEDOUT;
-	if (result < 0)
-		dev_err(dev, "Error waiting for device to toggle RF state: "
-			"%d\n", result);
-	result = 0;
-error_cmd:
-	kfree_skb(ack_skb);
-error_msg_to_dev:
-error_alloc:
-	d_fnend(4, dev, "(wimax_dev %p state %d) = %d\n",
-		wimax_dev, state, result);
-	kfree(cmd);
-	return result;
-}
-
-
-/*
- * Inform the WiMAX stack of changes in the RF Kill switches reported
- * by the device
- *
- * @i2400m: device descriptor
- * @rfss: TLV for RF Switches status; already validated
- *
- * NOTE: the reports on RF switch status cannot be trusted
- *       or used until the device is in a state of RADIO_OFF
- *       or greater.
- */
-void i2400m_report_tlv_rf_switches_status(
-	struct i2400m *i2400m,
-	const struct i2400m_tlv_rf_switches_status *rfss)
-{
-	struct device *dev = i2400m_dev(i2400m);
-	enum i2400m_rf_switch_status hw, sw;
-	enum wimax_st wimax_state;
-
-	sw = le32_to_cpu(rfss->sw_rf_switch);
-	hw = le32_to_cpu(rfss->hw_rf_switch);
-
-	d_fnstart(3, dev, "(i2400m %p rfss %p [hw %u sw %u])\n",
-		  i2400m, rfss, hw, sw);
-	/* We only process rw switch evens when the device has been
-	 * fully initialized */
-	wimax_state = wimax_state_get(&i2400m->wimax_dev);
-	if (wimax_state < WIMAX_ST_RADIO_OFF) {
-		d_printf(3, dev, "ignoring RF switches report, state %u\n",
-			 wimax_state);
-		goto out;
-	}
-	switch (sw) {
-	case I2400M_RF_SWITCH_ON:	/* RF Kill disabled (radio on) */
-		wimax_report_rfkill_sw(&i2400m->wimax_dev, WIMAX_RF_ON);
-		break;
-	case I2400M_RF_SWITCH_OFF:	/* RF Kill enabled (radio off) */
-		wimax_report_rfkill_sw(&i2400m->wimax_dev, WIMAX_RF_OFF);
-		break;
-	default:
-		dev_err(dev, "HW BUG? Unknown RF SW state 0x%x\n", sw);
-	}
-
-	switch (hw) {
-	case I2400M_RF_SWITCH_ON:	/* RF Kill disabled (radio on) */
-		wimax_report_rfkill_hw(&i2400m->wimax_dev, WIMAX_RF_ON);
-		break;
-	case I2400M_RF_SWITCH_OFF:	/* RF Kill enabled (radio off) */
-		wimax_report_rfkill_hw(&i2400m->wimax_dev, WIMAX_RF_OFF);
-		break;
-	default:
-		dev_err(dev, "HW BUG? Unknown RF HW state 0x%x\n", hw);
-	}
-out:
-	d_fnend(3, dev, "(i2400m %p rfss %p [hw %u sw %u]) = void\n",
-		i2400m, rfss, hw, sw);
-}
diff --git a/drivers/net/wimax/i2400m/rx.c b/drivers/net/wimax/i2400m/rx.c
deleted file mode 100644
index c9fb619a9e01..000000000000
--- a/drivers/net/wimax/i2400m/rx.c
+++ /dev/null
@@ -1,1395 +0,0 @@
-/*
- * Intel Wireless WiMAX Connection 2400m
- * Handle incoming traffic and deliver it to the control or data planes
- *
- *
- * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * Intel Corporation <linux-wimax@intel.com>
- * Yanir Lubetkin <yanirx.lubetkin@intel.com>
- *  - Initial implementation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *  - Use skb_clone(), break up processing in chunks
- *  - Split transport/device specific
- *  - Make buffer size dynamic to exert less memory pressure
- *  - RX reorder support
- *
- * This handles the RX path.
- *
- * We receive an RX message from the bus-specific driver, which
- * contains one or more payloads that have potentially different
- * destinataries (data or control paths).
- *
- * So we just take that payload from the transport specific code in
- * the form of an skb, break it up in chunks (a cloned skb each in the
- * case of network packets) and pass it to netdev or to the
- * command/ack handler (and from there to the WiMAX stack).
- *
- * PROTOCOL FORMAT
- *
- * The format of the buffer is:
- *
- * HEADER                      (struct i2400m_msg_hdr)
- * PAYLOAD DESCRIPTOR 0        (struct i2400m_pld)
- * PAYLOAD DESCRIPTOR 1
- * ...
- * PAYLOAD DESCRIPTOR N
- * PAYLOAD 0                   (raw bytes)
- * PAYLOAD 1
- * ...
- * PAYLOAD N
- *
- * See tx.c for a deeper description on alignment requirements and
- * other fun facts of it.
- *
- * DATA PACKETS
- *
- * In firmwares <= v1.3, data packets have no header for RX, but they
- * do for TX (currently unused).
- *
- * In firmware >= 1.4, RX packets have an extended header (16
- * bytes). This header conveys information for management of host
- * reordering of packets (the device offloads storage of the packets
- * for reordering to the host). Read below for more information.
- *
- * The header is used as dummy space to emulate an ethernet header and
- * thus be able to act as an ethernet device without having to reallocate.
- *
- * DATA RX REORDERING
- *
- * Starting in firmware v1.4, the device can deliver packets for
- * delivery with special reordering information; this allows it to
- * more effectively do packet management when some frames were lost in
- * the radio traffic.
- *
- * Thus, for RX packets that come out of order, the device gives the
- * driver enough information to queue them properly and then at some
- * point, the signal to deliver the whole (or part) of the queued
- * packets to the networking stack. There are 16 such queues.
- *
- * This only happens when a packet comes in with the "need reorder"
- * flag set in the RX header. When such bit is set, the following
- * operations might be indicated:
- *
- *  - reset queue: send all queued packets to the OS
- *
- *  - queue: queue a packet
- *
- *  - update ws: update the queue's window start and deliver queued
- *    packets that meet the criteria
- *
- *  - queue & update ws: queue a packet, update the window start and
- *    deliver queued packets that meet the criteria
- *
- * (delivery criteria: the packet's [normalized] sequence number is
- * lower than the new [normalized] window start).
- *
- * See the i2400m_roq_*() functions for details.
- *
- * ROADMAP
- *
- * i2400m_rx
- *   i2400m_rx_msg_hdr_check
- *   i2400m_rx_pl_descr_check
- *   i2400m_rx_payload
- *     i2400m_net_rx
- *     i2400m_rx_edata
- *       i2400m_net_erx
- *       i2400m_roq_reset
- *         i2400m_net_erx
- *       i2400m_roq_queue
- *         __i2400m_roq_queue
- *       i2400m_roq_update_ws
- *         __i2400m_roq_update_ws
- *           i2400m_net_erx
- *       i2400m_roq_queue_update_ws
- *         __i2400m_roq_queue
- *         __i2400m_roq_update_ws
- *           i2400m_net_erx
- *     i2400m_rx_ctl
- *       i2400m_msg_size_check
- *       i2400m_report_hook_work    [in a workqueue]
- *         i2400m_report_hook
- *       wimax_msg_to_user
- *       i2400m_rx_ctl_ack
- *         wimax_msg_to_user_alloc
- *     i2400m_rx_trace
- *       i2400m_msg_size_check
- *       wimax_msg
- */
-#include <linux/slab.h>
-#include <linux/kernel.h>
-#include <linux/if_arp.h>
-#include <linux/netdevice.h>
-#include <linux/workqueue.h>
-#include <linux/export.h>
-#include <linux/moduleparam.h>
-#include "i2400m.h"
-
-
-#define D_SUBMODULE rx
-#include "debug-levels.h"
-
-static int i2400m_rx_reorder_disabled;	/* 0 (rx reorder enabled) by default */
-module_param_named(rx_reorder_disabled, i2400m_rx_reorder_disabled, int, 0644);
-MODULE_PARM_DESC(rx_reorder_disabled,
-		 "If true, RX reordering will be disabled.");
-
-struct i2400m_report_hook_args {
-	struct sk_buff *skb_rx;
-	const struct i2400m_l3l4_hdr *l3l4_hdr;
-	size_t size;
-	struct list_head list_node;
-};
-
-
-/*
- * Execute i2400m_report_hook in a workqueue
- *
- * Goes over the list of queued reports in i2400m->rx_reports and
- * processes them.
- *
- * NOTE: refcounts on i2400m are not needed because we flush the
- *     workqueue this runs on (i2400m->work_queue) before destroying
- *     i2400m.
- */
-void i2400m_report_hook_work(struct work_struct *ws)
-{
-	struct i2400m *i2400m = container_of(ws, struct i2400m, rx_report_ws);
-	struct device *dev = i2400m_dev(i2400m);
-	struct i2400m_report_hook_args *args, *args_next;
-	LIST_HEAD(list);
-	unsigned long flags;
-
-	while (1) {
-		spin_lock_irqsave(&i2400m->rx_lock, flags);
-		list_splice_init(&i2400m->rx_reports, &list);
-		spin_unlock_irqrestore(&i2400m->rx_lock, flags);
-		if (list_empty(&list))
-			break;
-		else
-			d_printf(1, dev, "processing queued reports\n");
-		list_for_each_entry_safe(args, args_next, &list, list_node) {
-			d_printf(2, dev, "processing queued report %p\n", args);
-			i2400m_report_hook(i2400m, args->l3l4_hdr, args->size);
-			kfree_skb(args->skb_rx);
-			list_del(&args->list_node);
-			kfree(args);
-		}
-	}
-}
-
-
-/*
- * Flush the list of queued reports
- */
-static
-void i2400m_report_hook_flush(struct i2400m *i2400m)
-{
-	struct device *dev = i2400m_dev(i2400m);
-	struct i2400m_report_hook_args *args, *args_next;
-	LIST_HEAD(list);
-	unsigned long flags;
-
-	d_printf(1, dev, "flushing queued reports\n");
-	spin_lock_irqsave(&i2400m->rx_lock, flags);
-	list_splice_init(&i2400m->rx_reports, &list);
-	spin_unlock_irqrestore(&i2400m->rx_lock, flags);
-	list_for_each_entry_safe(args, args_next, &list, list_node) {
-		d_printf(2, dev, "flushing queued report %p\n", args);
-		kfree_skb(args->skb_rx);
-		list_del(&args->list_node);
-		kfree(args);
-	}
-}
-
-
-/*
- * Queue a report for later processing
- *
- * @i2400m: device descriptor
- * @skb_rx: skb that contains the payload (for reference counting)
- * @l3l4_hdr: pointer to the control
- * @size: size of the message
- */
-static
-void i2400m_report_hook_queue(struct i2400m *i2400m, struct sk_buff *skb_rx,
-			      const void *l3l4_hdr, size_t size)
-{
-	struct device *dev = i2400m_dev(i2400m);
-	unsigned long flags;
-	struct i2400m_report_hook_args *args;
-
-	args = kzalloc(sizeof(*args), GFP_NOIO);
-	if (args) {
-		args->skb_rx = skb_get(skb_rx);
-		args->l3l4_hdr = l3l4_hdr;
-		args->size = size;
-		spin_lock_irqsave(&i2400m->rx_lock, flags);
-		list_add_tail(&args->list_node, &i2400m->rx_reports);
-		spin_unlock_irqrestore(&i2400m->rx_lock, flags);
-		d_printf(2, dev, "queued report %p\n", args);
-		rmb();		/* see i2400m->ready's documentation  */
-		if (likely(i2400m->ready))	/* only send if up */
-			queue_work(i2400m->work_queue, &i2400m->rx_report_ws);
-	} else  {
-		if (printk_ratelimit())
-			dev_err(dev, "%s:%u: Can't allocate %zu B\n",
-				__func__, __LINE__, sizeof(*args));
-	}
-}
-
-
-/*
- * Process an ack to a command
- *
- * @i2400m: device descriptor
- * @payload: pointer to message
- * @size: size of the message
- *
- * Pass the acknodledgment (in an skb) to the thread that is waiting
- * for it in i2400m->msg_completion.
- *
- * We need to coordinate properly with the thread waiting for the
- * ack. Check if it is waiting or if it is gone. We loose the spinlock
- * to avoid allocating on atomic contexts (yeah, could use GFP_ATOMIC,
- * but this is not so speed critical).
- */
-static
-void i2400m_rx_ctl_ack(struct i2400m *i2400m,
-		       const void *payload, size_t size)
-{
-	struct device *dev = i2400m_dev(i2400m);
-	struct wimax_dev *wimax_dev = &i2400m->wimax_dev;
-	unsigned long flags;
-	struct sk_buff *ack_skb;
-
-	/* Anyone waiting for an answer? */
-	spin_lock_irqsave(&i2400m->rx_lock, flags);
-	if (i2400m->ack_skb != ERR_PTR(-EINPROGRESS)) {
-		dev_err(dev, "Huh? reply to command with no waiters\n");
-		goto error_no_waiter;
-	}
-	spin_unlock_irqrestore(&i2400m->rx_lock, flags);
-
-	ack_skb = wimax_msg_alloc(wimax_dev, NULL, payload, size, GFP_KERNEL);
-
-	/* Check waiter didn't time out waiting for the answer... */
-	spin_lock_irqsave(&i2400m->rx_lock, flags);
-	if (i2400m->ack_skb != ERR_PTR(-EINPROGRESS)) {
-		d_printf(1, dev, "Huh? waiter for command reply cancelled\n");
-		goto error_waiter_cancelled;
-	}
-	if (IS_ERR(ack_skb))
-		dev_err(dev, "CMD/GET/SET ack: cannot allocate SKB\n");
-	i2400m->ack_skb = ack_skb;
-	spin_unlock_irqrestore(&i2400m->rx_lock, flags);
-	complete(&i2400m->msg_completion);
-	return;
-
-error_waiter_cancelled:
-	if (!IS_ERR(ack_skb))
-		kfree_skb(ack_skb);
-error_no_waiter:
-	spin_unlock_irqrestore(&i2400m->rx_lock, flags);
-}
-
-
-/*
- * Receive and process a control payload
- *
- * @i2400m: device descriptor
- * @skb_rx: skb that contains the payload (for reference counting)
- * @payload: pointer to message
- * @size: size of the message
- *
- * There are two types of control RX messages: reports (asynchronous,
- * like your every day interrupts) and 'acks' (reponses to a command,
- * get or set request).
- *
- * If it is a report, we run hooks on it (to extract information for
- * things we need to do in the driver) and then pass it over to the
- * WiMAX stack to send it to user space.
- *
- * NOTE: report processing is done in a workqueue specific to the
- *     generic driver, to avoid deadlocks in the system.
- *
- * If it is not a report, it is an ack to a previously executed
- * command, set or get, so wake up whoever is waiting for it from
- * i2400m_msg_to_dev(). i2400m_rx_ctl_ack() takes care of that.
- *
- * Note that the sizes we pass to other functions from here are the
- * sizes of the _l3l4_hdr + payload, not full buffer sizes, as we have
- * verified in _msg_size_check() that they are congruent.
- *
- * For reports: We can't clone the original skb where the data is
- * because we need to send this up via netlink; netlink has to add
- * headers and we can't overwrite what's preceding the payload...as
- * it is another message. So we just dup them.
- */
-static
-void i2400m_rx_ctl(struct i2400m *i2400m, struct sk_buff *skb_rx,
-		   const void *payload, size_t size)
-{
-	int result;
-	struct device *dev = i2400m_dev(i2400m);
-	const struct i2400m_l3l4_hdr *l3l4_hdr = payload;
-	unsigned msg_type;
-
-	result = i2400m_msg_size_check(i2400m, l3l4_hdr, size);
-	if (result < 0) {
-		dev_err(dev, "HW BUG? device sent a bad message: %d\n",
-			result);
-		goto error_check;
-	}
-	msg_type = le16_to_cpu(l3l4_hdr->type);
-	d_printf(1, dev, "%s 0x%04x: %zu bytes\n",
-		 msg_type & I2400M_MT_REPORT_MASK ? "REPORT" : "CMD/SET/GET",
-		 msg_type, size);
-	d_dump(2, dev, l3l4_hdr, size);
-	if (msg_type & I2400M_MT_REPORT_MASK) {
-		/*
-		 * Process each report
-		 *
-		 * - has to be ran serialized as well
-		 *
-		 * - the handling might force the execution of
-		 *   commands. That might cause reentrancy issues with
-		 *   bus-specific subdrivers and workqueues, so the we
-		 *   run it in a separate workqueue.
-		 *
-		 * - when the driver is not yet ready to handle them,
-		 *   they are queued and at some point the queue is
-		 *   restarted [NOTE: we can't queue SKBs directly, as
-		 *   this might be a piece of a SKB, not the whole
-		 *   thing, and this is cheaper than cloning the
-		 *   SKB].
-		 *
-		 * Note we don't do refcounting for the device
-		 * structure; this is because before destroying
-		 * 'i2400m', we make sure to flush the
-		 * i2400m->work_queue, so there are no issues.
-		 */
-		i2400m_report_hook_queue(i2400m, skb_rx, l3l4_hdr, size);
-		if (unlikely(i2400m->trace_msg_from_user))
-			wimax_msg(&i2400m->wimax_dev, "echo",
-				  l3l4_hdr, size, GFP_KERNEL);
-		result = wimax_msg(&i2400m->wimax_dev, NULL, l3l4_hdr, size,
-				   GFP_KERNEL);
-		if (result < 0)
-			dev_err(dev, "error sending report to userspace: %d\n",
-				result);
-	} else		/* an ack to a CMD, GET or SET */
-		i2400m_rx_ctl_ack(i2400m, payload, size);
-error_check:
-	return;
-}
-
-
-/*
- * Receive and send up a trace
- *
- * @i2400m: device descriptor
- * @skb_rx: skb that contains the trace (for reference counting)
- * @payload: pointer to trace message inside the skb
- * @size: size of the message
- *
- * THe i2400m might produce trace information (diagnostics) and we
- * send them through a different kernel-to-user pipe (to avoid
- * clogging it).
- *
- * As in i2400m_rx_ctl(), we can't clone the original skb where the
- * data is because we need to send this up via netlink; netlink has to
- * add headers and we can't overwrite what's preceding the
- * payload...as it is another message. So we just dup them.
- */
-static
-void i2400m_rx_trace(struct i2400m *i2400m,
-		     const void *payload, size_t size)
-{
-	int result;
-	struct device *dev = i2400m_dev(i2400m);
-	struct wimax_dev *wimax_dev = &i2400m->wimax_dev;
-	const struct i2400m_l3l4_hdr *l3l4_hdr = payload;
-	unsigned msg_type;
-
-	result = i2400m_msg_size_check(i2400m, l3l4_hdr, size);
-	if (result < 0) {
-		dev_err(dev, "HW BUG? device sent a bad trace message: %d\n",
-			result);
-		goto error_check;
-	}
-	msg_type = le16_to_cpu(l3l4_hdr->type);
-	d_printf(1, dev, "Trace %s 0x%04x: %zu bytes\n",
-		 msg_type & I2400M_MT_REPORT_MASK ? "REPORT" : "CMD/SET/GET",
-		 msg_type, size);
-	d_dump(2, dev, l3l4_hdr, size);
-	result = wimax_msg(wimax_dev, "trace", l3l4_hdr, size, GFP_KERNEL);
-	if (result < 0)
-		dev_err(dev, "error sending trace to userspace: %d\n",
-			result);
-error_check:
-	return;
-}
-
-
-/*
- * Reorder queue data stored on skb->cb while the skb is queued in the
- * reorder queues.
- */
-struct i2400m_roq_data {
-	unsigned sn;		/* Serial number for the skb */
-	enum i2400m_cs cs;	/* packet type for the skb */
-};
-
-
-/*
- * ReOrder Queue
- *
- * @ws: Window Start; sequence number where the current window start
- *     is for this queue
- * @queue: the skb queue itself
- * @log: circular ring buffer used to log information about the
- *     reorder process in this queue that can be displayed in case of
- *     error to help diagnose it.
- *
- * This is the head for a list of skbs. In the skb->cb member of the
- * skb when queued here contains a 'struct i2400m_roq_data' were we
- * store the sequence number (sn) and the cs (packet type) coming from
- * the RX payload header from the device.
- */
-struct i2400m_roq
-{
-	unsigned ws;
-	struct sk_buff_head queue;
-	struct i2400m_roq_log *log;
-};
-
-
-static
-void __i2400m_roq_init(struct i2400m_roq *roq)
-{
-	roq->ws = 0;
-	skb_queue_head_init(&roq->queue);
-}
-
-
-static
-unsigned __i2400m_roq_index(struct i2400m *i2400m, struct i2400m_roq *roq)
-{
-	return ((unsigned long) roq - (unsigned long) i2400m->rx_roq)
-		/ sizeof(*roq);
-}
-
-
-/*
- * Normalize a sequence number based on the queue's window start
- *
- * nsn = (sn - ws) % 2048
- *
- * Note that if @sn < @roq->ws, we still need a positive number; %'s
- * sign is implementation specific, so we normalize it by adding 2048
- * to bring it to be positive.
- */
-static
-unsigned __i2400m_roq_nsn(struct i2400m_roq *roq, unsigned sn)
-{
-	int r;
-	r =  ((int) sn - (int) roq->ws) % 2048;
-	if (r < 0)
-		r += 2048;
-	return r;
-}
-
-
-/*
- * Circular buffer to keep the last N reorder operations
- *
- * In case something fails, dumb then to try to come up with what
- * happened.
- */
-enum {
-	I2400M_ROQ_LOG_LENGTH = 32,
-};
-
-struct i2400m_roq_log {
-	struct i2400m_roq_log_entry {
-		enum i2400m_ro_type type;
-		unsigned ws, count, sn, nsn, new_ws;
-	} entry[I2400M_ROQ_LOG_LENGTH];
-	unsigned in, out;
-};
-
-
-/* Print a log entry */
-static
-void i2400m_roq_log_entry_print(struct i2400m *i2400m, unsigned index,
-				unsigned e_index,
-				struct i2400m_roq_log_entry *e)
-{
-	struct device *dev = i2400m_dev(i2400m);
-
-	switch(e->type) {
-	case I2400M_RO_TYPE_RESET:
-		dev_err(dev, "q#%d reset           ws %u cnt %u sn %u/%u"
-			" - new nws %u\n",
-			index, e->ws, e->count, e->sn, e->nsn, e->new_ws);
-		break;
-	case I2400M_RO_TYPE_PACKET:
-		dev_err(dev, "q#%d queue           ws %u cnt %u sn %u/%u\n",
-			index, e->ws, e->count, e->sn, e->nsn);
-		break;
-	case I2400M_RO_TYPE_WS:
-		dev_err(dev, "q#%d update_ws       ws %u cnt %u sn %u/%u"
-			" - new nws %u\n",
-			index, e->ws, e->count, e->sn, e->nsn, e->new_ws);
-		break;
-	case I2400M_RO_TYPE_PACKET_WS:
-		dev_err(dev, "q#%d queue_update_ws ws %u cnt %u sn %u/%u"
-			" - new nws %u\n",
-			index, e->ws, e->count, e->sn, e->nsn, e->new_ws);
-		break;
-	default:
-		dev_err(dev, "q#%d BUG? entry %u - unknown type %u\n",
-			index, e_index, e->type);
-		break;
-	}
-}
-
-
-static
-void i2400m_roq_log_add(struct i2400m *i2400m,
-			struct i2400m_roq *roq, enum i2400m_ro_type type,
-			unsigned ws, unsigned count, unsigned sn,
-			unsigned nsn, unsigned new_ws)
-{
-	struct i2400m_roq_log_entry *e;
-	unsigned cnt_idx;
-	int index = __i2400m_roq_index(i2400m, roq);
-
-	/* if we run out of space, we eat from the end */
-	if (roq->log->in - roq->log->out == I2400M_ROQ_LOG_LENGTH)
-		roq->log->out++;
-	cnt_idx = roq->log->in++ % I2400M_ROQ_LOG_LENGTH;
-	e = &roq->log->entry[cnt_idx];
-
-	e->type = type;
-	e->ws = ws;
-	e->count = count;
-	e->sn = sn;
-	e->nsn = nsn;
-	e->new_ws = new_ws;
-
-	if (d_test(1))
-		i2400m_roq_log_entry_print(i2400m, index, cnt_idx, e);
-}
-
-
-/* Dump all the entries in the FIFO and reinitialize it */
-static
-void i2400m_roq_log_dump(struct i2400m *i2400m, struct i2400m_roq *roq)
-{
-	unsigned cnt, cnt_idx;
-	struct i2400m_roq_log_entry *e;
-	int index = __i2400m_roq_index(i2400m, roq);
-
-	BUG_ON(roq->log->out > roq->log->in);
-	for (cnt = roq->log->out; cnt < roq->log->in; cnt++) {
-		cnt_idx = cnt % I2400M_ROQ_LOG_LENGTH;
-		e = &roq->log->entry[cnt_idx];
-		i2400m_roq_log_entry_print(i2400m, index, cnt_idx, e);
-		memset(e, 0, sizeof(*e));
-	}
-	roq->log->in = roq->log->out = 0;
-}
-
-
-/*
- * Backbone for the queuing of an skb (by normalized sequence number)
- *
- * @i2400m: device descriptor
- * @roq: reorder queue where to add
- * @skb: the skb to add
- * @sn: the sequence number of the skb
- * @nsn: the normalized sequence number of the skb (pre-computed by the
- *     caller from the @sn and @roq->ws).
- *
- * We try first a couple of quick cases:
- *
- *   - the queue is empty
- *   - the skb would be appended to the queue
- *
- * These will be the most common operations.
- *
- * If these fail, then we have to do a sorted insertion in the queue,
- * which is the slowest path.
- *
- * We don't have to acquire a reference count as we are going to own it.
- */
-static
-void __i2400m_roq_queue(struct i2400m *i2400m, struct i2400m_roq *roq,
-			struct sk_buff *skb, unsigned sn, unsigned nsn)
-{
-	struct device *dev = i2400m_dev(i2400m);
-	struct sk_buff *skb_itr;
-	struct i2400m_roq_data *roq_data_itr, *roq_data;
-	unsigned nsn_itr;
-
-	d_fnstart(4, dev, "(i2400m %p roq %p skb %p sn %u nsn %u)\n",
-		  i2400m, roq, skb, sn, nsn);
-
-	roq_data = (struct i2400m_roq_data *) &skb->cb;
-	BUILD_BUG_ON(sizeof(*roq_data) > sizeof(skb->cb));
-	roq_data->sn = sn;
-	d_printf(3, dev, "ERX: roq %p [ws %u] nsn %d sn %u\n",
-		 roq, roq->ws, nsn, roq_data->sn);
-
-	/* Queues will be empty on not-so-bad environments, so try
-	 * that first */
-	if (skb_queue_empty(&roq->queue)) {
-		d_printf(2, dev, "ERX: roq %p - first one\n", roq);
-		__skb_queue_head(&roq->queue, skb);
-		goto out;
-	}
-	/* Now try append, as most of the operations will be that */
-	skb_itr = skb_peek_tail(&roq->queue);
-	roq_data_itr = (struct i2400m_roq_data *) &skb_itr->cb;
-	nsn_itr = __i2400m_roq_nsn(roq, roq_data_itr->sn);
-	/* NSN bounds assumed correct (checked when it was queued) */
-	if (nsn >= nsn_itr) {
-		d_printf(2, dev, "ERX: roq %p - appended after %p (nsn %d sn %u)\n",
-			 roq, skb_itr, nsn_itr, roq_data_itr->sn);
-		__skb_queue_tail(&roq->queue, skb);
-		goto out;
-	}
-	/* None of the fast paths option worked. Iterate to find the
-	 * right spot where to insert the packet; we know the queue is
-	 * not empty, so we are not the first ones; we also know we
-	 * are not going to be the last ones. The list is sorted, so
-	 * we have to insert before the the first guy with an nsn_itr
-	 * greater that our nsn. */
-	skb_queue_walk(&roq->queue, skb_itr) {
-		roq_data_itr = (struct i2400m_roq_data *) &skb_itr->cb;
-		nsn_itr = __i2400m_roq_nsn(roq, roq_data_itr->sn);
-		/* NSN bounds assumed correct (checked when it was queued) */
-		if (nsn_itr > nsn) {
-			d_printf(2, dev, "ERX: roq %p - queued before %p "
-				 "(nsn %d sn %u)\n", roq, skb_itr, nsn_itr,
-				 roq_data_itr->sn);
-			__skb_queue_before(&roq->queue, skb_itr, skb);
-			goto out;
-		}
-	}
-	/* If we get here, that is VERY bad -- print info to help
-	 * diagnose and crash it */
-	dev_err(dev, "SW BUG? failed to insert packet\n");
-	dev_err(dev, "ERX: roq %p [ws %u] skb %p nsn %d sn %u\n",
-		roq, roq->ws, skb, nsn, roq_data->sn);
-	skb_queue_walk(&roq->queue, skb_itr) {
-		roq_data_itr = (struct i2400m_roq_data *) &skb_itr->cb;
-		nsn_itr = __i2400m_roq_nsn(roq, roq_data_itr->sn);
-		/* NSN bounds assumed correct (checked when it was queued) */
-		dev_err(dev, "ERX: roq %p skb_itr %p nsn %d sn %u\n",
-			roq, skb_itr, nsn_itr, roq_data_itr->sn);
-	}
-	BUG();
-out:
-	d_fnend(4, dev, "(i2400m %p roq %p skb %p sn %u nsn %d) = void\n",
-		i2400m, roq, skb, sn, nsn);
-}
-
-
-/*
- * Backbone for the update window start operation
- *
- * @i2400m: device descriptor
- * @roq: Reorder queue
- * @sn: New sequence number
- *
- * Updates the window start of a queue; when doing so, it must deliver
- * to the networking stack all the queued skb's whose normalized
- * sequence number is lower than the new normalized window start.
- */
-static
-unsigned __i2400m_roq_update_ws(struct i2400m *i2400m, struct i2400m_roq *roq,
-				unsigned sn)
-{
-	struct device *dev = i2400m_dev(i2400m);
-	struct sk_buff *skb_itr, *tmp_itr;
-	struct i2400m_roq_data *roq_data_itr;
-	unsigned new_nws, nsn_itr;
-
-	new_nws = __i2400m_roq_nsn(roq, sn);
-	/*
-	 * For type 2(update_window_start) rx messages, there is no
-	 * need to check if the normalized sequence number is greater 1023.
-	 * Simply insert and deliver all packets to the host up to the
-	 * window start.
-	 */
-	skb_queue_walk_safe(&roq->queue, skb_itr, tmp_itr) {
-		roq_data_itr = (struct i2400m_roq_data *) &skb_itr->cb;
-		nsn_itr = __i2400m_roq_nsn(roq, roq_data_itr->sn);
-		/* NSN bounds assumed correct (checked when it was queued) */
-		if (nsn_itr < new_nws) {
-			d_printf(2, dev, "ERX: roq %p - release skb %p "
-				 "(nsn %u/%u new nws %u)\n",
-				 roq, skb_itr, nsn_itr, roq_data_itr->sn,
-				 new_nws);
-			__skb_unlink(skb_itr, &roq->queue);
-			i2400m_net_erx(i2400m, skb_itr, roq_data_itr->cs);
-		}
-		else
-			break;	/* rest of packets all nsn_itr > nws */
-	}
-	roq->ws = sn;
-	return new_nws;
-}
-
-
-/*
- * Reset a queue
- *
- * @i2400m: device descriptor
- * @cin: Queue Index
- *
- * Deliver all the packets and reset the window-start to zero. Name is
- * kind of misleading.
- */
-static
-void i2400m_roq_reset(struct i2400m *i2400m, struct i2400m_roq *roq)
-{
-	struct device *dev = i2400m_dev(i2400m);
-	struct sk_buff *skb_itr, *tmp_itr;
-	struct i2400m_roq_data *roq_data_itr;
-
-	d_fnstart(2, dev, "(i2400m %p roq %p)\n", i2400m, roq);
-	i2400m_roq_log_add(i2400m, roq, I2400M_RO_TYPE_RESET,
-			     roq->ws, skb_queue_len(&roq->queue),
-			     ~0, ~0, 0);
-	skb_queue_walk_safe(&roq->queue, skb_itr, tmp_itr) {
-		roq_data_itr = (struct i2400m_roq_data *) &skb_itr->cb;
-		d_printf(2, dev, "ERX: roq %p - release skb %p (sn %u)\n",
-			 roq, skb_itr, roq_data_itr->sn);
-		__skb_unlink(skb_itr, &roq->queue);
-		i2400m_net_erx(i2400m, skb_itr, roq_data_itr->cs);
-	}
-	roq->ws = 0;
-	d_fnend(2, dev, "(i2400m %p roq %p) = void\n", i2400m, roq);
-}
-
-
-/*
- * Queue a packet
- *
- * @i2400m: device descriptor
- * @cin: Queue Index
- * @skb: containing the packet data
- * @fbn: First block number of the packet in @skb
- * @lbn: Last block number of the packet in @skb
- *
- * The hardware is asking the driver to queue a packet for later
- * delivery to the networking stack.
- */
-static
-void i2400m_roq_queue(struct i2400m *i2400m, struct i2400m_roq *roq,
-		      struct sk_buff * skb, unsigned lbn)
-{
-	struct device *dev = i2400m_dev(i2400m);
-	unsigned nsn, len;
-
-	d_fnstart(2, dev, "(i2400m %p roq %p skb %p lbn %u) = void\n",
-		  i2400m, roq, skb, lbn);
-	len = skb_queue_len(&roq->queue);
-	nsn = __i2400m_roq_nsn(roq, lbn);
-	if (unlikely(nsn >= 1024)) {
-		dev_err(dev, "SW BUG? queue nsn %d (lbn %u ws %u)\n",
-			nsn, lbn, roq->ws);
-		i2400m_roq_log_dump(i2400m, roq);
-		i2400m_reset(i2400m, I2400M_RT_WARM);
-	} else {
-		__i2400m_roq_queue(i2400m, roq, skb, lbn, nsn);
-		i2400m_roq_log_add(i2400m, roq, I2400M_RO_TYPE_PACKET,
-				     roq->ws, len, lbn, nsn, ~0);
-	}
-	d_fnend(2, dev, "(i2400m %p roq %p skb %p lbn %u) = void\n",
-		i2400m, roq, skb, lbn);
-}
-
-
-/*
- * Update the window start in a reorder queue and deliver all skbs
- * with a lower window start
- *
- * @i2400m: device descriptor
- * @roq: Reorder queue
- * @sn: New sequence number
- */
-static
-void i2400m_roq_update_ws(struct i2400m *i2400m, struct i2400m_roq *roq,
-			  unsigned sn)
-{
-	struct device *dev = i2400m_dev(i2400m);
-	unsigned old_ws, nsn, len;
-
-	d_fnstart(2, dev, "(i2400m %p roq %p sn %u)\n", i2400m, roq, sn);
-	old_ws = roq->ws;
-	len = skb_queue_len(&roq->queue);
-	nsn = __i2400m_roq_update_ws(i2400m, roq, sn);
-	i2400m_roq_log_add(i2400m, roq, I2400M_RO_TYPE_WS,
-			     old_ws, len, sn, nsn, roq->ws);
-	d_fnstart(2, dev, "(i2400m %p roq %p sn %u) = void\n", i2400m, roq, sn);
-}
-
-
-/*
- * Queue a packet and update the window start
- *
- * @i2400m: device descriptor
- * @cin: Queue Index
- * @skb: containing the packet data
- * @fbn: First block number of the packet in @skb
- * @sn: Last block number of the packet in @skb
- *
- * Note that unlike i2400m_roq_update_ws(), which sets the new window
- * start to @sn, in here we'll set it to @sn + 1.
- */
-static
-void i2400m_roq_queue_update_ws(struct i2400m *i2400m, struct i2400m_roq *roq,
-				struct sk_buff * skb, unsigned sn)
-{
-	struct device *dev = i2400m_dev(i2400m);
-	unsigned nsn, old_ws, len;
-
-	d_fnstart(2, dev, "(i2400m %p roq %p skb %p sn %u)\n",
-		  i2400m, roq, skb, sn);
-	len = skb_queue_len(&roq->queue);
-	nsn = __i2400m_roq_nsn(roq, sn);
-	/*
-	 * For type 3(queue_update_window_start) rx messages, there is no
-	 * need to check if the normalized sequence number is greater 1023.
-	 * Simply insert and deliver all packets to the host up to the
-	 * window start.
-	 */
-	old_ws = roq->ws;
-	/* If the queue is empty, don't bother as we'd queue
-	 * it and immediately unqueue it -- just deliver it.
-	 */
-	if (len == 0) {
-		struct i2400m_roq_data *roq_data;
-		roq_data = (struct i2400m_roq_data *) &skb->cb;
-		i2400m_net_erx(i2400m, skb, roq_data->cs);
-	} else
-		__i2400m_roq_queue(i2400m, roq, skb, sn, nsn);
-
-	__i2400m_roq_update_ws(i2400m, roq, sn + 1);
-	i2400m_roq_log_add(i2400m, roq, I2400M_RO_TYPE_PACKET_WS,
-			   old_ws, len, sn, nsn, roq->ws);
-
-	d_fnend(2, dev, "(i2400m %p roq %p skb %p sn %u) = void\n",
-		i2400m, roq, skb, sn);
-}
-
-
-/*
- * This routine destroys the memory allocated for rx_roq, when no
- * other thread is accessing it. Access to rx_roq is refcounted by
- * rx_roq_refcount, hence memory allocated must be destroyed when
- * rx_roq_refcount becomes zero. This routine gets executed when
- * rx_roq_refcount becomes zero.
- */
-static void i2400m_rx_roq_destroy(struct kref *ref)
-{
-	unsigned itr;
-	struct i2400m *i2400m
-			= container_of(ref, struct i2400m, rx_roq_refcount);
-	for (itr = 0; itr < I2400M_RO_CIN + 1; itr++)
-		__skb_queue_purge(&i2400m->rx_roq[itr].queue);
-	kfree(i2400m->rx_roq[0].log);
-	kfree(i2400m->rx_roq);
-	i2400m->rx_roq = NULL;
-}
-
-/*
- * Receive and send up an extended data packet
- *
- * @i2400m: device descriptor
- * @skb_rx: skb that contains the extended data packet
- * @single_last: 1 if the payload is the only one or the last one of
- *     the skb.
- * @payload: pointer to the packet's data inside the skb
- * @size: size of the payload
- *
- * Starting in v1.4 of the i2400m's firmware, the device can send data
- * packets to the host in an extended format that; this incudes a 16
- * byte header (struct i2400m_pl_edata_hdr). Using this header's space
- * we can fake ethernet headers for ethernet device emulation without
- * having to copy packets around.
- *
- * This function handles said path.
- *
- *
- * Receive and send up an extended data packet that requires no reordering
- *
- * @i2400m: device descriptor
- * @skb_rx: skb that contains the extended data packet
- * @single_last: 1 if the payload is the only one or the last one of
- *     the skb.
- * @payload: pointer to the packet's data (past the actual extended
- *     data payload header).
- * @size: size of the payload
- *
- * Pass over to the networking stack a data packet that might have
- * reordering requirements.
- *
- * This needs to the decide if the skb in which the packet is
- * contained can be reused or if it needs to be cloned. Then it has to
- * be trimmed in the edges so that the beginning is the space for eth
- * header and then pass it to i2400m_net_erx() for the stack
- *
- * Assumes the caller has verified the sanity of the payload (size,
- * etc) already.
- */
-static
-void i2400m_rx_edata(struct i2400m *i2400m, struct sk_buff *skb_rx,
-		     unsigned single_last, const void *payload, size_t size)
-{
-	struct device *dev = i2400m_dev(i2400m);
-	const struct i2400m_pl_edata_hdr *hdr = payload;
-	struct net_device *net_dev = i2400m->wimax_dev.net_dev;
-	struct sk_buff *skb;
-	enum i2400m_cs cs;
-	u32 reorder;
-	unsigned ro_needed, ro_type, ro_cin, ro_sn;
-	struct i2400m_roq *roq;
-	struct i2400m_roq_data *roq_data;
-	unsigned long flags;
-
-	BUILD_BUG_ON(ETH_HLEN > sizeof(*hdr));
-
-	d_fnstart(2, dev, "(i2400m %p skb_rx %p single %u payload %p "
-		  "size %zu)\n", i2400m, skb_rx, single_last, payload, size);
-	if (size < sizeof(*hdr)) {
-		dev_err(dev, "ERX: HW BUG? message with short header (%zu "
-			"vs %zu bytes expected)\n", size, sizeof(*hdr));
-		goto error;
-	}
-
-	if (single_last) {
-		skb = skb_get(skb_rx);
-		d_printf(3, dev, "ERX: skb %p reusing\n", skb);
-	} else {
-		skb = skb_clone(skb_rx, GFP_KERNEL);
-		if (skb == NULL) {
-			dev_err(dev, "ERX: no memory to clone skb\n");
-			net_dev->stats.rx_dropped++;
-			goto error_skb_clone;
-		}
-		d_printf(3, dev, "ERX: skb %p cloned from %p\n", skb, skb_rx);
-	}
-	/* now we have to pull and trim so that the skb points to the
-	 * beginning of the IP packet; the netdev part will add the
-	 * ethernet header as needed - we know there is enough space
-	 * because we checked in i2400m_rx_edata(). */
-	skb_pull(skb, payload + sizeof(*hdr) - (void *) skb->data);
-	skb_trim(skb, (void *) skb_end_pointer(skb) - payload - sizeof(*hdr));
-
-	reorder = le32_to_cpu(hdr->reorder);
-	ro_needed = reorder & I2400M_RO_NEEDED;
-	cs = hdr->cs;
-	if (ro_needed) {
-		ro_type = (reorder >> I2400M_RO_TYPE_SHIFT) & I2400M_RO_TYPE;
-		ro_cin = (reorder >> I2400M_RO_CIN_SHIFT) & I2400M_RO_CIN;
-		ro_sn = (reorder >> I2400M_RO_SN_SHIFT) & I2400M_RO_SN;
-
-		spin_lock_irqsave(&i2400m->rx_lock, flags);
-		if (i2400m->rx_roq == NULL) {
-			kfree_skb(skb);	/* rx_roq is already destroyed */
-			spin_unlock_irqrestore(&i2400m->rx_lock, flags);
-			goto error;
-		}
-		roq = &i2400m->rx_roq[ro_cin];
-		kref_get(&i2400m->rx_roq_refcount);
-		spin_unlock_irqrestore(&i2400m->rx_lock, flags);
-
-		roq_data = (struct i2400m_roq_data *) &skb->cb;
-		roq_data->sn = ro_sn;
-		roq_data->cs = cs;
-		d_printf(2, dev, "ERX: reorder needed: "
-			 "type %u cin %u [ws %u] sn %u/%u len %zuB\n",
-			 ro_type, ro_cin, roq->ws, ro_sn,
-			 __i2400m_roq_nsn(roq, ro_sn), size);
-		d_dump(2, dev, payload, size);
-		switch(ro_type) {
-		case I2400M_RO_TYPE_RESET:
-			i2400m_roq_reset(i2400m, roq);
-			kfree_skb(skb);	/* no data here */
-			break;
-		case I2400M_RO_TYPE_PACKET:
-			i2400m_roq_queue(i2400m, roq, skb, ro_sn);
-			break;
-		case I2400M_RO_TYPE_WS:
-			i2400m_roq_update_ws(i2400m, roq, ro_sn);
-			kfree_skb(skb);	/* no data here */
-			break;
-		case I2400M_RO_TYPE_PACKET_WS:
-			i2400m_roq_queue_update_ws(i2400m, roq, skb, ro_sn);
-			break;
-		default:
-			dev_err(dev, "HW BUG? unknown reorder type %u\n", ro_type);
-		}
-
-		spin_lock_irqsave(&i2400m->rx_lock, flags);
-		kref_put(&i2400m->rx_roq_refcount, i2400m_rx_roq_destroy);
-		spin_unlock_irqrestore(&i2400m->rx_lock, flags);
-	}
-	else
-		i2400m_net_erx(i2400m, skb, cs);
-error_skb_clone:
-error:
-	d_fnend(2, dev, "(i2400m %p skb_rx %p single %u payload %p "
-		"size %zu) = void\n", i2400m, skb_rx, single_last, payload, size);
-}
-
-
-/*
- * Act on a received payload
- *
- * @i2400m: device instance
- * @skb_rx: skb where the transaction was received
- * @single_last: 1 this is the only payload or the last one (so the
- *     skb can be reused instead of cloned).
- * @pld: payload descriptor
- * @payload: payload data
- *
- * Upon reception of a payload, look at its guts in the payload
- * descriptor and decide what to do with it. If it is a single payload
- * skb or if the last skb is a data packet, the skb will be referenced
- * and modified (so it doesn't have to be cloned).
- */
-static
-void i2400m_rx_payload(struct i2400m *i2400m, struct sk_buff *skb_rx,
-		       unsigned single_last, const struct i2400m_pld *pld,
-		       const void *payload)
-{
-	struct device *dev = i2400m_dev(i2400m);
-	size_t pl_size = i2400m_pld_size(pld);
-	enum i2400m_pt pl_type = i2400m_pld_type(pld);
-
-	d_printf(7, dev, "RX: received payload type %u, %zu bytes\n",
-		 pl_type, pl_size);
-	d_dump(8, dev, payload, pl_size);
-
-	switch (pl_type) {
-	case I2400M_PT_DATA:
-		d_printf(3, dev, "RX: data payload %zu bytes\n", pl_size);
-		i2400m_net_rx(i2400m, skb_rx, single_last, payload, pl_size);
-		break;
-	case I2400M_PT_CTRL:
-		i2400m_rx_ctl(i2400m, skb_rx, payload, pl_size);
-		break;
-	case I2400M_PT_TRACE:
-		i2400m_rx_trace(i2400m, payload, pl_size);
-		break;
-	case I2400M_PT_EDATA:
-		d_printf(3, dev, "ERX: data payload %zu bytes\n", pl_size);
-		i2400m_rx_edata(i2400m, skb_rx, single_last, payload, pl_size);
-		break;
-	default:	/* Anything else shouldn't come to the host */
-		if (printk_ratelimit())
-			dev_err(dev, "RX: HW BUG? unexpected payload type %u\n",
-				pl_type);
-	}
-}
-
-
-/*
- * Check a received transaction's message header
- *
- * @i2400m: device descriptor
- * @msg_hdr: message header
- * @buf_size: size of the received buffer
- *
- * Check that the declarations done by a RX buffer message header are
- * sane and consistent with the amount of data that was received.
- */
-static
-int i2400m_rx_msg_hdr_check(struct i2400m *i2400m,
-			    const struct i2400m_msg_hdr *msg_hdr,
-			    size_t buf_size)
-{
-	int result = -EIO;
-	struct device *dev = i2400m_dev(i2400m);
-	if (buf_size < sizeof(*msg_hdr)) {
-		dev_err(dev, "RX: HW BUG? message with short header (%zu "
-			"vs %zu bytes expected)\n", buf_size, sizeof(*msg_hdr));
-		goto error;
-	}
-	if (msg_hdr->barker != cpu_to_le32(I2400M_D2H_MSG_BARKER)) {
-		dev_err(dev, "RX: HW BUG? message received with unknown "
-			"barker 0x%08x (buf_size %zu bytes)\n",
-			le32_to_cpu(msg_hdr->barker), buf_size);
-		goto error;
-	}
-	if (msg_hdr->num_pls == 0) {
-		dev_err(dev, "RX: HW BUG? zero payload packets in message\n");
-		goto error;
-	}
-	if (le16_to_cpu(msg_hdr->num_pls) > I2400M_MAX_PLS_IN_MSG) {
-		dev_err(dev, "RX: HW BUG? message contains more payload "
-			"than maximum; ignoring.\n");
-		goto error;
-	}
-	result = 0;
-error:
-	return result;
-}
-
-
-/*
- * Check a payload descriptor against the received data
- *
- * @i2400m: device descriptor
- * @pld: payload descriptor
- * @pl_itr: offset (in bytes) in the received buffer the payload is
- *          located
- * @buf_size: size of the received buffer
- *
- * Given a payload descriptor (part of a RX buffer), check it is sane
- * and that the data it declares fits in the buffer.
- */
-static
-int i2400m_rx_pl_descr_check(struct i2400m *i2400m,
-			      const struct i2400m_pld *pld,
-			      size_t pl_itr, size_t buf_size)
-{
-	int result = -EIO;
-	struct device *dev = i2400m_dev(i2400m);
-	size_t pl_size = i2400m_pld_size(pld);
-	enum i2400m_pt pl_type = i2400m_pld_type(pld);
-
-	if (pl_size > i2400m->bus_pl_size_max) {
-		dev_err(dev, "RX: HW BUG? payload @%zu: size %zu is "
-			"bigger than maximum %zu; ignoring message\n",
-			pl_itr, pl_size, i2400m->bus_pl_size_max);
-		goto error;
-	}
-	if (pl_itr + pl_size > buf_size) {	/* enough? */
-		dev_err(dev, "RX: HW BUG? payload @%zu: size %zu "
-			"goes beyond the received buffer "
-			"size (%zu bytes); ignoring message\n",
-			pl_itr, pl_size, buf_size);
-		goto error;
-	}
-	if (pl_type >= I2400M_PT_ILLEGAL) {
-		dev_err(dev, "RX: HW BUG? illegal payload type %u; "
-			"ignoring message\n", pl_type);
-		goto error;
-	}
-	result = 0;
-error:
-	return result;
-}
-
-
-/**
- * i2400m_rx - Receive a buffer of data from the device
- *
- * @i2400m: device descriptor
- * @skb: skbuff where the data has been received
- *
- * Parse in a buffer of data that contains an RX message sent from the
- * device. See the file header for the format. Run all checks on the
- * buffer header, then run over each payload's descriptors, verify
- * their consistency and act on each payload's contents.  If
- * everything is successful, update the device's statistics.
- *
- * Note: You need to set the skb to contain only the length of the
- * received buffer; for that, use skb_trim(skb, RECEIVED_SIZE).
- *
- * Returns:
- *
- * 0 if ok, < 0 errno on error
- *
- * If ok, this function owns now the skb and the caller DOESN'T have
- * to run kfree_skb() on it. However, on error, the caller still owns
- * the skb and it is responsible for releasing it.
- */
-int i2400m_rx(struct i2400m *i2400m, struct sk_buff *skb)
-{
-	int i, result;
-	struct device *dev = i2400m_dev(i2400m);
-	const struct i2400m_msg_hdr *msg_hdr;
-	size_t pl_itr, pl_size;
-	unsigned long flags;
-	unsigned num_pls, single_last, skb_len;
-
-	skb_len = skb->len;
-	d_fnstart(4, dev, "(i2400m %p skb %p [size %u])\n",
-		  i2400m, skb, skb_len);
-	msg_hdr = (void *) skb->data;
-	result = i2400m_rx_msg_hdr_check(i2400m, msg_hdr, skb_len);
-	if (result < 0)
-		goto error_msg_hdr_check;
-	result = -EIO;
-	num_pls = le16_to_cpu(msg_hdr->num_pls);
-	/* Check payload descriptor(s) */
-	pl_itr = struct_size(msg_hdr, pld, num_pls);
-	pl_itr = ALIGN(pl_itr, I2400M_PL_ALIGN);
-	if (pl_itr > skb_len) {	/* got all the payload descriptors? */
-		dev_err(dev, "RX: HW BUG? message too short (%u bytes) for "
-			"%u payload descriptors (%zu each, total %zu)\n",
-			skb_len, num_pls, sizeof(msg_hdr->pld[0]), pl_itr);
-		goto error_pl_descr_short;
-	}
-	/* Walk each payload payload--check we really got it */
-	for (i = 0; i < num_pls; i++) {
-		/* work around old gcc warnings */
-		pl_size = i2400m_pld_size(&msg_hdr->pld[i]);
-		result = i2400m_rx_pl_descr_check(i2400m, &msg_hdr->pld[i],
-						  pl_itr, skb_len);
-		if (result < 0)
-			goto error_pl_descr_check;
-		single_last = num_pls == 1 || i == num_pls - 1;
-		i2400m_rx_payload(i2400m, skb, single_last, &msg_hdr->pld[i],
-				  skb->data + pl_itr);
-		pl_itr += ALIGN(pl_size, I2400M_PL_ALIGN);
-		cond_resched();		/* Don't monopolize */
-	}
-	kfree_skb(skb);
-	/* Update device statistics */
-	spin_lock_irqsave(&i2400m->rx_lock, flags);
-	i2400m->rx_pl_num += i;
-	if (i > i2400m->rx_pl_max)
-		i2400m->rx_pl_max = i;
-	if (i < i2400m->rx_pl_min)
-		i2400m->rx_pl_min = i;
-	i2400m->rx_num++;
-	i2400m->rx_size_acc += skb_len;
-	if (skb_len < i2400m->rx_size_min)
-		i2400m->rx_size_min = skb_len;
-	if (skb_len > i2400m->rx_size_max)
-		i2400m->rx_size_max = skb_len;
-	spin_unlock_irqrestore(&i2400m->rx_lock, flags);
-error_pl_descr_check:
-error_pl_descr_short:
-error_msg_hdr_check:
-	d_fnend(4, dev, "(i2400m %p skb %p [size %u]) = %d\n",
-		i2400m, skb, skb_len, result);
-	return result;
-}
-EXPORT_SYMBOL_GPL(i2400m_rx);
-
-
-void i2400m_unknown_barker(struct i2400m *i2400m,
-			   const void *buf, size_t size)
-{
-	struct device *dev = i2400m_dev(i2400m);
-	char prefix[64];
-	const __le32 *barker = buf;
-	dev_err(dev, "RX: HW BUG? unknown barker %08x, "
-		"dropping %zu bytes\n", le32_to_cpu(*barker), size);
-	snprintf(prefix, sizeof(prefix), "%s %s: ",
-		 dev_driver_string(dev), dev_name(dev));
-	if (size > 64) {
-		print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET,
-			       8, 4, buf, 64, 0);
-		printk(KERN_ERR "%s... (only first 64 bytes "
-		       "dumped)\n", prefix);
-	} else
-		print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET,
-			       8, 4, buf, size, 0);
-}
-EXPORT_SYMBOL(i2400m_unknown_barker);
-
-
-/*
- * Initialize the RX queue and infrastructure
- *
- * This sets up all the RX reordering infrastructures, which will not
- * be used if reordering is not enabled or if the firmware does not
- * support it. The device is told to do reordering in
- * i2400m_dev_initialize(), where it also looks at the value of the
- * i2400m->rx_reorder switch before taking a decission.
- *
- * Note we allocate the roq queues in one chunk and the actual logging
- * support for it (logging) in another one and then we setup the
- * pointers from the first to the last.
- */
-int i2400m_rx_setup(struct i2400m *i2400m)
-{
-	int result = 0;
-
-	i2400m->rx_reorder = i2400m_rx_reorder_disabled? 0 : 1;
-	if (i2400m->rx_reorder) {
-		unsigned itr;
-		struct i2400m_roq_log *rd;
-
-		result = -ENOMEM;
-
-		i2400m->rx_roq = kcalloc(I2400M_RO_CIN + 1,
-					 sizeof(i2400m->rx_roq[0]), GFP_KERNEL);
-		if (i2400m->rx_roq == NULL)
-			goto error_roq_alloc;
-
-		rd = kcalloc(I2400M_RO_CIN + 1, sizeof(*i2400m->rx_roq[0].log),
-			     GFP_KERNEL);
-		if (rd == NULL) {
-			result = -ENOMEM;
-			goto error_roq_log_alloc;
-		}
-
-		for(itr = 0; itr < I2400M_RO_CIN + 1; itr++) {
-			__i2400m_roq_init(&i2400m->rx_roq[itr]);
-			i2400m->rx_roq[itr].log = &rd[itr];
-		}
-		kref_init(&i2400m->rx_roq_refcount);
-	}
-	return 0;
-
-error_roq_log_alloc:
-	kfree(i2400m->rx_roq);
-error_roq_alloc:
-	return result;
-}
-
-
-/* Tear down the RX queue and infrastructure */
-void i2400m_rx_release(struct i2400m *i2400m)
-{
-	unsigned long flags;
-
-	if (i2400m->rx_reorder) {
-		spin_lock_irqsave(&i2400m->rx_lock, flags);
-		kref_put(&i2400m->rx_roq_refcount, i2400m_rx_roq_destroy);
-		spin_unlock_irqrestore(&i2400m->rx_lock, flags);
-	}
-	/* at this point, nothing can be received... */
-	i2400m_report_hook_flush(i2400m);
-}
diff --git a/drivers/net/wimax/i2400m/sysfs.c b/drivers/net/wimax/i2400m/sysfs.c
deleted file mode 100644
index 895ee265909b..000000000000
--- a/drivers/net/wimax/i2400m/sysfs.c
+++ /dev/null
@@ -1,65 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel Wireless WiMAX Connection 2400m
- * Sysfs interfaces to show driver and device information
- *
- * Copyright (C) 2007 Intel Corporation <linux-wimax@intel.com>
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- */
-
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/spinlock.h>
-#include <linux/device.h>
-#include "i2400m.h"
-
-
-#define D_SUBMODULE sysfs
-#include "debug-levels.h"
-
-
-/*
- * Set the idle timeout (msecs)
- *
- * FIXME: eventually this should be a common WiMAX stack method, but
- * would like to wait to see how other devices manage it.
- */
-static
-ssize_t i2400m_idle_timeout_store(struct device *dev,
-				  struct device_attribute *attr,
-				  const char *buf, size_t size)
-{
-	ssize_t result;
-	struct i2400m *i2400m = net_dev_to_i2400m(to_net_dev(dev));
-	unsigned val;
-
-	result = -EINVAL;
-	if (sscanf(buf, "%u\n", &val) != 1)
-		goto error_no_unsigned;
-	if (val != 0 && (val < 100 || val > 300000 || val % 100 != 0)) {
-		dev_err(dev, "idle_timeout: %u: invalid msecs specification; "
-			"valid values are 0, 100-300000 in 100 increments\n",
-			val);
-		goto error_bad_value;
-	}
-	result = i2400m_set_idle_timeout(i2400m, val);
-	if (result >= 0)
-		result = size;
-error_no_unsigned:
-error_bad_value:
-	return result;
-}
-
-static
-DEVICE_ATTR_WO(i2400m_idle_timeout);
-
-static
-struct attribute *i2400m_dev_attrs[] = {
-	&dev_attr_i2400m_idle_timeout.attr,
-	NULL,
-};
-
-struct attribute_group i2400m_dev_attr_group = {
-	.name = NULL,		/* we want them in the same directory */
-	.attrs = i2400m_dev_attrs,
-};
diff --git a/drivers/net/wimax/i2400m/tx.c b/drivers/net/wimax/i2400m/tx.c
deleted file mode 100644
index 1255302e251e..000000000000
--- a/drivers/net/wimax/i2400m/tx.c
+++ /dev/null
@@ -1,1011 +0,0 @@
-/*
- * Intel Wireless WiMAX Connection 2400m
- * Generic (non-bus specific) TX handling
- *
- *
- * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * Intel Corporation <linux-wimax@intel.com>
- * Yanir Lubetkin <yanirx.lubetkin@intel.com>
- *  - Initial implementation
- *
- * Intel Corporation <linux-wimax@intel.com>
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *  - Rewritten to use a single FIFO to lower the memory allocation
- *    pressure and optimize cache hits when copying to the queue, as
- *    well as splitting out bus-specific code.
- *
- *
- * Implements data transmission to the device; this is done through a
- * software FIFO, as data/control frames can be coalesced (while the
- * device is reading the previous tx transaction, others accumulate).
- *
- * A FIFO is used because at the end it is resource-cheaper that trying
- * to implement scatter/gather over USB. As well, most traffic is going
- * to be download (vs upload).
- *
- * The format for sending/receiving data to/from the i2400m is
- * described in detail in rx.c:PROTOCOL FORMAT. In here we implement
- * the transmission of that. This is split between a bus-independent
- * part that just prepares everything and a bus-specific part that
- * does the actual transmission over the bus to the device (in the
- * bus-specific driver).
- *
- *
- * The general format of a device-host transaction is MSG-HDR, PLD1,
- * PLD2...PLDN, PL1, PL2,...PLN, PADDING.
- *
- * Because we need the send payload descriptors and then payloads and
- * because it is kind of expensive to do scatterlists in USB (one URB
- * per node), it becomes cheaper to append all the data to a FIFO
- * (copying to a FIFO potentially in cache is cheaper).
- *
- * Then the bus-specific code takes the parts of that FIFO that are
- * written and passes them to the device.
- *
- * So the concepts to keep in mind there are:
- *
- * We use a FIFO to queue the data in a linear buffer. We first append
- * a MSG-HDR, space for I2400M_TX_PLD_MAX payload descriptors and then
- * go appending payloads until we run out of space or of payload
- * descriptors. Then we append padding to make the whole transaction a
- * multiple of i2400m->bus_tx_block_size (as defined by the bus layer).
- *
- * - A TX message: a combination of a message header, payload
- *   descriptors and payloads.
- *
- *     Open: it is marked as active (i2400m->tx_msg is valid) and we
- *       can keep adding payloads to it.
- *
- *     Closed: we are not appending more payloads to this TX message
- *       (exahusted space in the queue, too many payloads or
- *       whichever).  We have appended padding so the whole message
- *       length is aligned to i2400m->bus_tx_block_size (as set by the
- *       bus/transport layer).
- *
- * - Most of the time we keep a TX message open to which we append
- *   payloads.
- *
- * - If we are going to append and there is no more space (we are at
- *   the end of the FIFO), we close the message, mark the rest of the
- *   FIFO space unusable (skip_tail), create a new message at the
- *   beginning of the FIFO (if there is space) and append the message
- *   there.
- *
- *   This is because we need to give linear TX messages to the bus
- *   engine. So we don't write a message to the remaining FIFO space
- *   until the tail and continue at the head of it.
- *
- * - We overload one of the fields in the message header to use it as
- *   'size' of the TX message, so we can iterate over them. It also
- *   contains a flag that indicates if we have to skip it or not.
- *   When we send the buffer, we update that to its real on-the-wire
- *   value.
- *
- * - The MSG-HDR PLD1...PLD2 stuff has to be a size multiple of 16.
- *
- *   It follows that if MSG-HDR says we have N messages, the whole
- *   header + descriptors is 16 + 4*N; for those to be a multiple of
- *   16, it follows that N can be 4, 8, 12, ... (32, 48, 64, 80...
- *   bytes).
- *
- *   So if we have only 1 payload, we have to submit a header that in
- *   all truth has space for 4.
- *
- *   The implication is that we reserve space for 12 (64 bytes); but
- *   if we fill up only (eg) 2, our header becomes 32 bytes only. So
- *   the TX engine has to shift those 32 bytes of msg header and 2
- *   payloads and padding so that right after it the payloads start
- *   and the TX engine has to know about that.
- *
- *   It is cheaper to move the header up than the whole payloads down.
- *
- *   We do this in i2400m_tx_close(). See 'i2400m_msg_hdr->offset'.
- *
- * - Each payload has to be size-padded to 16 bytes; before appending
- *   it, we just do it.
- *
- * - The whole message has to be padded to i2400m->bus_tx_block_size;
- *   we do this at close time. Thus, when reserving space for the
- *   payload, we always make sure there is also free space for this
- *   padding that sooner or later will happen.
- *
- * When we append a message, we tell the bus specific code to kick in
- * TXs. It will TX (in parallel) until the buffer is exhausted--hence
- * the lockin we do. The TX code will only send a TX message at the
- * time (which remember, might contain more than one payload). Of
- * course, when the bus-specific driver attempts to TX a message that
- * is still open, it gets closed first.
- *
- * Gee, this is messy; well a picture. In the example below we have a
- * partially full FIFO, with a closed message ready to be delivered
- * (with a moved message header to make sure it is size-aligned to
- * 16), TAIL room that was unusable (and thus is marked with a message
- * header that says 'skip this') and at the head of the buffer, an
- * incomplete message with a couple of payloads.
- *
- * N   ___________________________________________________
- *    |                                                   |
- *    |     TAIL room                                     |
- *    |                                                   |
- *    |  msg_hdr to skip (size |= 0x80000)                |
- *    |---------------------------------------------------|-------
- *    |                                                   |  /|\
- *    |                                                   |   |
- *    |  TX message padding                               |   |
- *    |                                                   |   |
- *    |                                                   |   |
- *    |- - - - - - - - - - - - - - - - - - - - - - - - - -|   |
- *    |                                                   |   |
- *    |  payload 1                                        |   |
- *    |                                                   | N * tx_block_size
- *    |                                                   |   |
- *    |- - - - - - - - - - - - - - - - - - - - - - - - - -|   |
- *    |                                                   |   |
- *    |  payload 1                                        |   |
- *    |                                                   |   |
- *    |                                                   |   |
- *    |- - - - - - - - - - - - - - - - - - - - - - - - - -|- -|- - - -
- *    |  padding 3                  /|\                   |   |   /|\
- *    |  padding 2                   |                    |   |    |
- *    |  pld 1                32 bytes (2 * 16)           |   |    |
- *    |  pld 0                       |                    |   |    |
- *    |  moved msg_hdr              \|/                   |  \|/   |
- *    |- - - - - - - - - - - - - - - - - - - - - - - - - -|- - -   |
- *    |                                                   |    _PLD_SIZE
- *    |  unused                                           |        |
- *    |                                                   |        |
- *    |- - - - - - - - - - - - - - - - - - - - - - - - - -|        |
- *    |  msg_hdr (size X)       [this message is closed]  |       \|/
- *    |===================================================|========== <=== OUT
- *    |                                                   |
- *    |                                                   |
- *    |                                                   |
- *    |          Free rooom                               |
- *    |                                                   |
- *    |                                                   |
- *    |                                                   |
- *    |                                                   |
- *    |                                                   |
- *    |                                                   |
- *    |                                                   |
- *    |                                                   |
- *    |                                                   |
- *    |===================================================|========== <=== IN
- *    |                                                   |
- *    |                                                   |
- *    |                                                   |
- *    |                                                   |
- *    |  payload 1                                        |
- *    |                                                   |
- *    |                                                   |
- *    |- - - - - - - - - - - - - - - - - - - - - - - - - -|
- *    |                                                   |
- *    |  payload 0                                        |
- *    |                                                   |
- *    |                                                   |
- *    |- - - - - - - - - - - - - - - - - - - - - - - - - -|
- *    |  pld 11                     /|\                   |
- *    |  ...                         |                    |
- *    |  pld 1                64 bytes (2 * 16)           |
- *    |  pld 0                       |                    |
- *    |  msg_hdr (size X)           \|/ [message is open] |
- * 0   ---------------------------------------------------
- *
- *
- * ROADMAP
- *
- * i2400m_tx_setup()           Called by i2400m_setup
- * i2400m_tx_release()         Called by i2400m_release()
- *
- *  i2400m_tx()                 Called to send data or control frames
- *    i2400m_tx_fifo_push()     Allocates append-space in the FIFO
- *    i2400m_tx_new()           Opens a new message in the FIFO
- *    i2400m_tx_fits()          Checks if a new payload fits in the message
- *    i2400m_tx_close()         Closes an open message in the FIFO
- *    i2400m_tx_skip_tail()     Marks unusable FIFO tail space
- *    i2400m->bus_tx_kick()
- *
- * Now i2400m->bus_tx_kick() is the the bus-specific driver backend
- * implementation; that would do:
- *
- * i2400m->bus_tx_kick()
- *   i2400m_tx_msg_get()	Gets first message ready to go
- *   ...sends it...
- *   i2400m_tx_msg_sent()       Ack the message is sent; repeat from
- *                              _tx_msg_get() until it returns NULL
- *                               (FIFO empty).
- */
-#include <linux/netdevice.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include "i2400m.h"
-
-
-#define D_SUBMODULE tx
-#include "debug-levels.h"
-
-enum {
-	/**
-	 * TX Buffer size
-	 *
-	 * Doc says maximum transaction is 16KiB. If we had 16KiB en
-	 * route and 16KiB being queued, it boils down to needing
-	 * 32KiB.
-	 * 32KiB is insufficient for 1400 MTU, hence increasing
-	 * tx buffer size to 64KiB.
-	 */
-	I2400M_TX_BUF_SIZE = 65536,
-	/**
-	 * Message header and payload descriptors have to be 16
-	 * aligned (16 + 4 * N = 16 * M). If we take that average sent
-	 * packets are MTU size (~1400-~1500) it follows that we could
-	 * fit at most 10-11 payloads in one transaction. To meet the
-	 * alignment requirement, that means we need to leave space
-	 * for 12 (64 bytes). To simplify, we leave space for that. If
-	 * at the end there are less, we pad up to the nearest
-	 * multiple of 16.
-	 */
-	/*
-	 * According to Intel Wimax i3200, i5x50 and i6x50 specification
-	 * documents, the maximum number of payloads per message can be
-	 * up to 60. Increasing the number of payloads to 60 per message
-	 * helps to accommodate smaller payloads in a single transaction.
-	 */
-	I2400M_TX_PLD_MAX = 60,
-	I2400M_TX_PLD_SIZE = sizeof(struct i2400m_msg_hdr)
-	+ I2400M_TX_PLD_MAX * sizeof(struct i2400m_pld),
-	I2400M_TX_SKIP = 0x80000000,
-	/*
-	 * According to Intel Wimax i3200, i5x50 and i6x50 specification
-	 * documents, the maximum size of each message can be up to 16KiB.
-	 */
-	I2400M_TX_MSG_SIZE = 16384,
-};
-
-#define TAIL_FULL ((void *)~(unsigned long)NULL)
-
-/*
- * Calculate how much tail room is available
- *
- * Note the trick here. This path is ONLY caleed for Case A (see
- * i2400m_tx_fifo_push() below), where we have:
- *
- *       Case A
- * N  ___________
- *   | tail room |
- *   |           |
- *   |<-  IN   ->|
- *   |           |
- *   |   data    |
- *   |           |
- *   |<-  OUT  ->|
- *   |           |
- *   | head room |
- * 0  -----------
- *
- * When calculating the tail_room, tx_in might get to be zero if
- * i2400m->tx_in is right at the end of the buffer (really full
- * buffer) if there is no head room. In this case, tail_room would be
- * I2400M_TX_BUF_SIZE, although it is actually zero. Hence the final
- * mod (%) operation. However, when doing this kind of optimization,
- * i2400m->tx_in being zero would fail, so we treat is an a special
- * case.
- */
-static inline
-size_t __i2400m_tx_tail_room(struct i2400m *i2400m)
-{
-	size_t tail_room;
-	size_t tx_in;
-
-	if (unlikely(i2400m->tx_in == 0))
-		return I2400M_TX_BUF_SIZE;
-	tx_in = i2400m->tx_in % I2400M_TX_BUF_SIZE;
-	tail_room = I2400M_TX_BUF_SIZE - tx_in;
-	tail_room %= I2400M_TX_BUF_SIZE;
-	return tail_room;
-}
-
-
-/*
- * Allocate @size bytes in the TX fifo, return a pointer to it
- *
- * @i2400m: device descriptor
- * @size: size of the buffer we need to allocate
- * @padding: ensure that there is at least this many bytes of free
- *     contiguous space in the fifo. This is needed because later on
- *     we might need to add padding.
- * @try_head: specify either to allocate head room or tail room space
- *     in the TX FIFO. This boolean is required to avoids a system hang
- *     due to an infinite loop caused by i2400m_tx_fifo_push().
- *     The caller must always try to allocate tail room space first by
- *     calling this routine with try_head = 0. In case if there
- *     is not enough tail room space but there is enough head room space,
- *     (i2400m_tx_fifo_push() returns TAIL_FULL) try to allocate head
- *     room space, by calling this routine again with try_head = 1.
- *
- * Returns:
- *
- *     Pointer to the allocated space. NULL if there is no
- *     space. TAIL_FULL if there is no space at the tail but there is at
- *     the head (Case B below).
- *
- * These are the two basic cases we need to keep an eye for -- it is
- * much better explained in linux/kernel/kfifo.c, but this code
- * basically does the same. No rocket science here.
- *
- *       Case A               Case B
- * N  ___________          ___________
- *   | tail room |        |   data    |
- *   |           |        |           |
- *   |<-  IN   ->|        |<-  OUT  ->|
- *   |           |        |           |
- *   |   data    |        |   room    |
- *   |           |        |           |
- *   |<-  OUT  ->|        |<-  IN   ->|
- *   |           |        |           |
- *   | head room |        |   data    |
- * 0  -----------          -----------
- *
- * We allocate only *contiguous* space.
- *
- * We can allocate only from 'room'. In Case B, it is simple; in case
- * A, we only try from the tail room; if it is not enough, we just
- * fail and return TAIL_FULL and let the caller figure out if we wants to
- * skip the tail room and try to allocate from the head.
- *
- * There is a corner case, wherein i2400m_tx_new() can get into
- * an infinite loop calling i2400m_tx_fifo_push().
- * In certain situations, tx_in would have reached on the top of TX FIFO
- * and i2400m_tx_tail_room() returns 0, as described below:
- *
- * N  ___________ tail room is zero
- *   |<-  IN   ->|
- *   |           |
- *   |           |
- *   |           |
- *   |   data    |
- *   |<-  OUT  ->|
- *   |           |
- *   |           |
- *   | head room |
- * 0  -----------
- * During such a time, where tail room is zero in the TX FIFO and if there
- * is a request to add a payload to TX FIFO, which calls:
- * i2400m_tx()
- *         ->calls i2400m_tx_close()
- *         ->calls i2400m_tx_skip_tail()
- *         goto try_new;
- *         ->calls i2400m_tx_new()
- *                    |----> [try_head:]
- *     infinite loop  |     ->calls i2400m_tx_fifo_push()
- *                    |                if (tail_room < needed)
- *                    |                   if (head_room => needed)
- *                    |                       return TAIL_FULL;
- *                    |<----  goto try_head;
- *
- * i2400m_tx() calls i2400m_tx_close() to close the message, since there
- * is no tail room to accommodate the payload and calls
- * i2400m_tx_skip_tail() to skip the tail space. Now i2400m_tx() calls
- * i2400m_tx_new() to allocate space for new message header calling
- * i2400m_tx_fifo_push() that returns TAIL_FULL, since there is no tail space
- * to accommodate the message header, but there is enough head space.
- * The i2400m_tx_new() keeps re-retrying by calling i2400m_tx_fifo_push()
- * ending up in a loop causing system freeze.
- *
- * This corner case is avoided by using a try_head boolean,
- * as an argument to i2400m_tx_fifo_push().
- *
- * Note:
- *
- *     Assumes i2400m->tx_lock is taken, and we use that as a barrier
- *
- *     The indexes keep increasing and we reset them to zero when we
- *     pop data off the queue
- */
-static
-void *i2400m_tx_fifo_push(struct i2400m *i2400m, size_t size,
-			  size_t padding, bool try_head)
-{
-	struct device *dev = i2400m_dev(i2400m);
-	size_t room, tail_room, needed_size;
-	void *ptr;
-
-	needed_size = size + padding;
-	room = I2400M_TX_BUF_SIZE - (i2400m->tx_in - i2400m->tx_out);
-	if (room < needed_size)	{ /* this takes care of Case B */
-		d_printf(2, dev, "fifo push %zu/%zu: no space\n",
-			 size, padding);
-		return NULL;
-	}
-	/* Is there space at the tail? */
-	tail_room = __i2400m_tx_tail_room(i2400m);
-	if (!try_head && tail_room < needed_size) {
-		/*
-		 * If the tail room space is not enough to push the message
-		 * in the TX FIFO, then there are two possibilities:
-		 * 1. There is enough head room space to accommodate
-		 * this message in the TX FIFO.
-		 * 2. There is not enough space in the head room and
-		 * in tail room of the TX FIFO to accommodate the message.
-		 * In the case (1), return TAIL_FULL so that the caller
-		 * can figure out, if the caller wants to push the message
-		 * into the head room space.
-		 * In the case (2), return NULL, indicating that the TX FIFO
-		 * cannot accommodate the message.
-		 */
-		if (room - tail_room >= needed_size) {
-			d_printf(2, dev, "fifo push %zu/%zu: tail full\n",
-				 size, padding);
-			return TAIL_FULL;	/* There might be head space */
-		} else {
-			d_printf(2, dev, "fifo push %zu/%zu: no head space\n",
-				 size, padding);
-			return NULL;	/* There is no space */
-		}
-	}
-	ptr = i2400m->tx_buf + i2400m->tx_in % I2400M_TX_BUF_SIZE;
-	d_printf(2, dev, "fifo push %zu/%zu: at @%zu\n", size, padding,
-		 i2400m->tx_in % I2400M_TX_BUF_SIZE);
-	i2400m->tx_in += size;
-	return ptr;
-}
-
-
-/*
- * Mark the tail of the FIFO buffer as 'to-skip'
- *
- * We should never hit the BUG_ON() because all the sizes we push to
- * the FIFO are padded to be a multiple of 16 -- the size of *msg
- * (I2400M_PL_PAD for the payloads, I2400M_TX_PLD_SIZE for the
- * header).
- *
- * Tail room can get to be zero if a message was opened when there was
- * space only for a header. _tx_close() will mark it as to-skip (as it
- * will have no payloads) and there will be no more space to flush, so
- * nothing has to be done here. This is probably cheaper than ensuring
- * in _tx_new() that there is some space for payloads...as we could
- * always possibly hit the same problem if the payload wouldn't fit.
- *
- * Note:
- *
- *     Assumes i2400m->tx_lock is taken, and we use that as a barrier
- *
- *     This path is only taken for Case A FIFO situations [see
- *     i2400m_tx_fifo_push()]
- */
-static
-void i2400m_tx_skip_tail(struct i2400m *i2400m)
-{
-	struct device *dev = i2400m_dev(i2400m);
-	size_t tx_in = i2400m->tx_in % I2400M_TX_BUF_SIZE;
-	size_t tail_room = __i2400m_tx_tail_room(i2400m);
-	struct i2400m_msg_hdr *msg = i2400m->tx_buf + tx_in;
-	if (unlikely(tail_room == 0))
-		return;
-	BUG_ON(tail_room < sizeof(*msg));
-	msg->size = tail_room | I2400M_TX_SKIP;
-	d_printf(2, dev, "skip tail: skipping %zu bytes @%zu\n",
-		 tail_room, tx_in);
-	i2400m->tx_in += tail_room;
-}
-
-
-/*
- * Check if a skb will fit in the TX queue's current active TX
- * message (if there are still descriptors left unused).
- *
- * Returns:
- *     0 if the message won't fit, 1 if it will.
- *
- * Note:
- *
- *     Assumes a TX message is active (i2400m->tx_msg).
- *
- *     Assumes i2400m->tx_lock is taken, and we use that as a barrier
- */
-static
-unsigned i2400m_tx_fits(struct i2400m *i2400m)
-{
-	struct i2400m_msg_hdr *msg_hdr = i2400m->tx_msg;
-	return le16_to_cpu(msg_hdr->num_pls) < I2400M_TX_PLD_MAX;
-
-}
-
-
-/*
- * Start a new TX message header in the queue.
- *
- * Reserve memory from the base FIFO engine and then just initialize
- * the message header.
- *
- * We allocate the biggest TX message header we might need (one that'd
- * fit I2400M_TX_PLD_MAX payloads) -- when it is closed it will be
- * 'ironed it out' and the unneeded parts removed.
- *
- * NOTE:
- *
- *     Assumes that the previous message is CLOSED (eg: either
- *     there was none or 'i2400m_tx_close()' was called on it).
- *
- *     Assumes i2400m->tx_lock is taken, and we use that as a barrier
- */
-static
-void i2400m_tx_new(struct i2400m *i2400m)
-{
-	struct device *dev = i2400m_dev(i2400m);
-	struct i2400m_msg_hdr *tx_msg;
-	bool try_head = false;
-	BUG_ON(i2400m->tx_msg != NULL);
-	/*
-	 * In certain situations, TX queue might have enough space to
-	 * accommodate the new message header I2400M_TX_PLD_SIZE, but
-	 * might not have enough space to accommodate the payloads.
-	 * Adding bus_tx_room_min padding while allocating a new TX message
-	 * increases the possibilities of including at least one payload of the
-	 * size <= bus_tx_room_min.
-	 */
-try_head:
-	tx_msg = i2400m_tx_fifo_push(i2400m, I2400M_TX_PLD_SIZE,
-				     i2400m->bus_tx_room_min, try_head);
-	if (tx_msg == NULL)
-		goto out;
-	else if (tx_msg == TAIL_FULL) {
-		i2400m_tx_skip_tail(i2400m);
-		d_printf(2, dev, "new TX message: tail full, trying head\n");
-		try_head = true;
-		goto try_head;
-	}
-	memset(tx_msg, 0, I2400M_TX_PLD_SIZE);
-	tx_msg->size = I2400M_TX_PLD_SIZE;
-out:
-	i2400m->tx_msg = tx_msg;
-	d_printf(2, dev, "new TX message: %p @%zu\n",
-		 tx_msg, (void *) tx_msg - i2400m->tx_buf);
-}
-
-
-/*
- * Finalize the current TX message header
- *
- * Sets the message header to be at the proper location depending on
- * how many descriptors we have (check documentation at the file's
- * header for more info on that).
- *
- * Appends padding bytes to make sure the whole TX message (counting
- * from the 'relocated' message header) is aligned to
- * tx_block_size. We assume the _append() code has left enough space
- * in the FIFO for that. If there are no payloads, just pass, as it
- * won't be transferred.
- *
- * The amount of padding bytes depends on how many payloads are in the
- * TX message, as the "msg header and payload descriptors" will be
- * shifted up in the buffer.
- */
-static
-void i2400m_tx_close(struct i2400m *i2400m)
-{
-	struct device *dev = i2400m_dev(i2400m);
-	struct i2400m_msg_hdr *tx_msg = i2400m->tx_msg;
-	struct i2400m_msg_hdr *tx_msg_moved;
-	size_t aligned_size, padding, hdr_size;
-	void *pad_buf;
-	unsigned num_pls;
-
-	if (tx_msg->size & I2400M_TX_SKIP)	/* a skipper? nothing to do */
-		goto out;
-	num_pls = le16_to_cpu(tx_msg->num_pls);
-	/* We can get this situation when a new message was started
-	 * and there was no space to add payloads before hitting the
-	 tail (and taking padding into consideration). */
-	if (num_pls == 0) {
-		tx_msg->size |= I2400M_TX_SKIP;
-		goto out;
-	}
-	/* Relocate the message header
-	 *
-	 * Find the current header size, align it to 16 and if we need
-	 * to move it so the tail is next to the payloads, move it and
-	 * set the offset.
-	 *
-	 * If it moved, this header is good only for transmission; the
-	 * original one (it is kept if we moved) is still used to
-	 * figure out where the next TX message starts (and where the
-	 * offset to the moved header is).
-	 */
-	hdr_size = struct_size(tx_msg, pld, le16_to_cpu(tx_msg->num_pls));
-	hdr_size = ALIGN(hdr_size, I2400M_PL_ALIGN);
-	tx_msg->offset = I2400M_TX_PLD_SIZE - hdr_size;
-	tx_msg_moved = (void *) tx_msg + tx_msg->offset;
-	memmove(tx_msg_moved, tx_msg, hdr_size);
-	tx_msg_moved->size -= tx_msg->offset;
-	/*
-	 * Now figure out how much we have to add to the (moved!)
-	 * message so the size is a multiple of i2400m->bus_tx_block_size.
-	 */
-	aligned_size = ALIGN(tx_msg_moved->size, i2400m->bus_tx_block_size);
-	padding = aligned_size - tx_msg_moved->size;
-	if (padding > 0) {
-		pad_buf = i2400m_tx_fifo_push(i2400m, padding, 0, 0);
-		if (WARN_ON(pad_buf == NULL || pad_buf == TAIL_FULL)) {
-			/* This should not happen -- append should verify
-			 * there is always space left at least to append
-			 * tx_block_size */
-			dev_err(dev,
-				"SW BUG! Possible data leakage from memory the "
-				"device should not read for padding - "
-				"size %lu aligned_size %zu tx_buf %p in "
-				"%zu out %zu\n",
-				(unsigned long) tx_msg_moved->size,
-				aligned_size, i2400m->tx_buf, i2400m->tx_in,
-				i2400m->tx_out);
-		} else
-			memset(pad_buf, 0xad, padding);
-	}
-	tx_msg_moved->padding = cpu_to_le16(padding);
-	tx_msg_moved->size += padding;
-	if (tx_msg != tx_msg_moved)
-		tx_msg->size += padding;
-out:
-	i2400m->tx_msg = NULL;
-}
-
-
-/**
- * i2400m_tx - send the data in a buffer to the device
- *
- * @buf: pointer to the buffer to transmit
- *
- * @buf_len: buffer size
- *
- * @pl_type: type of the payload we are sending.
- *
- * Returns:
- *     0 if ok, < 0 errno code on error (-ENOSPC, if there is no more
- *     room for the message in the queue).
- *
- * Appends the buffer to the TX FIFO and notifies the bus-specific
- * part of the driver that there is new data ready to transmit.
- * Once this function returns, the buffer has been copied, so it can
- * be reused.
- *
- * The steps followed to append are explained in detail in the file
- * header.
- *
- * Whenever we write to a message, we increase msg->size, so it
- * reflects exactly how big the message is. This is needed so that if
- * we concatenate two messages before they can be sent, the code that
- * sends the messages can find the boundaries (and it will replace the
- * size with the real barker before sending).
- *
- * Note:
- *
- *     Cold and warm reset payloads need to be sent as a single
- *     payload, so we handle that.
- */
-int i2400m_tx(struct i2400m *i2400m, const void *buf, size_t buf_len,
-	      enum i2400m_pt pl_type)
-{
-	int result = -ENOSPC;
-	struct device *dev = i2400m_dev(i2400m);
-	unsigned long flags;
-	size_t padded_len;
-	void *ptr;
-	bool try_head = false;
-	unsigned is_singleton = pl_type == I2400M_PT_RESET_WARM
-		|| pl_type == I2400M_PT_RESET_COLD;
-
-	d_fnstart(3, dev, "(i2400m %p skb %p [%zu bytes] pt %u)\n",
-		  i2400m, buf, buf_len, pl_type);
-	padded_len = ALIGN(buf_len, I2400M_PL_ALIGN);
-	d_printf(5, dev, "padded_len %zd buf_len %zd\n", padded_len, buf_len);
-	/* If there is no current TX message, create one; if the
-	 * current one is out of payload slots or we have a singleton,
-	 * close it and start a new one */
-	spin_lock_irqsave(&i2400m->tx_lock, flags);
-	/* If tx_buf is NULL, device is shutdown */
-	if (i2400m->tx_buf == NULL) {
-		result = -ESHUTDOWN;
-		goto error_tx_new;
-	}
-try_new:
-	if (unlikely(i2400m->tx_msg == NULL))
-		i2400m_tx_new(i2400m);
-	else if (unlikely(!i2400m_tx_fits(i2400m)
-			  || (is_singleton && i2400m->tx_msg->num_pls != 0))) {
-		d_printf(2, dev, "closing TX message (fits %u singleton "
-			 "%u num_pls %u)\n", i2400m_tx_fits(i2400m),
-			 is_singleton, i2400m->tx_msg->num_pls);
-		i2400m_tx_close(i2400m);
-		i2400m_tx_new(i2400m);
-	}
-	if (i2400m->tx_msg == NULL)
-		goto error_tx_new;
-	/*
-	 * Check if this skb will fit in the TX queue's current active
-	 * TX message. The total message size must not exceed the maximum
-	 * size of each message I2400M_TX_MSG_SIZE. If it exceeds,
-	 * close the current message and push this skb into the new message.
-	 */
-	if (i2400m->tx_msg->size + padded_len > I2400M_TX_MSG_SIZE) {
-		d_printf(2, dev, "TX: message too big, going new\n");
-		i2400m_tx_close(i2400m);
-		i2400m_tx_new(i2400m);
-	}
-	if (i2400m->tx_msg == NULL)
-		goto error_tx_new;
-	/* So we have a current message header; now append space for
-	 * the message -- if there is not enough, try the head */
-	ptr = i2400m_tx_fifo_push(i2400m, padded_len,
-				  i2400m->bus_tx_block_size, try_head);
-	if (ptr == TAIL_FULL) {	/* Tail is full, try head */
-		d_printf(2, dev, "pl append: tail full\n");
-		i2400m_tx_close(i2400m);
-		i2400m_tx_skip_tail(i2400m);
-		try_head = true;
-		goto try_new;
-	} else if (ptr == NULL) {	/* All full */
-		result = -ENOSPC;
-		d_printf(2, dev, "pl append: all full\n");
-	} else {			/* Got space, copy it, set padding */
-		struct i2400m_msg_hdr *tx_msg = i2400m->tx_msg;
-		unsigned num_pls = le16_to_cpu(tx_msg->num_pls);
-		memcpy(ptr, buf, buf_len);
-		memset(ptr + buf_len, 0xad, padded_len - buf_len);
-		i2400m_pld_set(&tx_msg->pld[num_pls], buf_len, pl_type);
-		d_printf(3, dev, "pld 0x%08x (type 0x%1x len 0x%04zx\n",
-			 le32_to_cpu(tx_msg->pld[num_pls].val),
-			 pl_type, buf_len);
-		tx_msg->num_pls = le16_to_cpu(num_pls+1);
-		tx_msg->size += padded_len;
-		d_printf(2, dev, "TX: appended %zu b (up to %u b) pl #%u\n",
-			padded_len, tx_msg->size, num_pls+1);
-		d_printf(2, dev,
-			 "TX: appended hdr @%zu %zu b pl #%u @%zu %zu/%zu b\n",
-			 (void *)tx_msg - i2400m->tx_buf, (size_t)tx_msg->size,
-			 num_pls+1, ptr - i2400m->tx_buf, buf_len, padded_len);
-		result = 0;
-		if (is_singleton)
-			i2400m_tx_close(i2400m);
-	}
-error_tx_new:
-	spin_unlock_irqrestore(&i2400m->tx_lock, flags);
-	/* kick in most cases, except when the TX subsys is down, as
-	 * it might free space */
-	if (likely(result != -ESHUTDOWN))
-		i2400m->bus_tx_kick(i2400m);
-	d_fnend(3, dev, "(i2400m %p skb %p [%zu bytes] pt %u) = %d\n",
-		i2400m, buf, buf_len, pl_type, result);
-	return result;
-}
-EXPORT_SYMBOL_GPL(i2400m_tx);
-
-
-/**
- * i2400m_tx_msg_get - Get the first TX message in the FIFO to start sending it
- *
- * @i2400m: device descriptors
- * @bus_size: where to place the size of the TX message
- *
- * Called by the bus-specific driver to get the first TX message at
- * the FIF that is ready for transmission.
- *
- * It sets the state in @i2400m to indicate the bus-specific driver is
- * transferring that message (i2400m->tx_msg_size).
- *
- * Once the transfer is completed, call i2400m_tx_msg_sent().
- *
- * Notes:
- *
- *     The size of the TX message to be transmitted might be smaller than
- *     that of the TX message in the FIFO (in case the header was
- *     shorter). Hence, we copy it in @bus_size, for the bus layer to
- *     use. We keep the message's size in i2400m->tx_msg_size so that
- *     when the bus later is done transferring we know how much to
- *     advance the fifo.
- *
- *     We collect statistics here as all the data is available and we
- *     assume it is going to work [see i2400m_tx_msg_sent()].
- */
-struct i2400m_msg_hdr *i2400m_tx_msg_get(struct i2400m *i2400m,
-					 size_t *bus_size)
-{
-	struct device *dev = i2400m_dev(i2400m);
-	struct i2400m_msg_hdr *tx_msg, *tx_msg_moved;
-	unsigned long flags, pls;
-
-	d_fnstart(3, dev, "(i2400m %p bus_size %p)\n", i2400m, bus_size);
-	spin_lock_irqsave(&i2400m->tx_lock, flags);
-	tx_msg_moved = NULL;
-	if (i2400m->tx_buf == NULL)
-		goto out_unlock;
-skip:
-	tx_msg_moved = NULL;
-	if (i2400m->tx_in == i2400m->tx_out) {	/* Empty FIFO? */
-		i2400m->tx_in = 0;
-		i2400m->tx_out = 0;
-		d_printf(2, dev, "TX: FIFO empty: resetting\n");
-		goto out_unlock;
-	}
-	tx_msg = i2400m->tx_buf + i2400m->tx_out % I2400M_TX_BUF_SIZE;
-	if (tx_msg->size & I2400M_TX_SKIP) {	/* skip? */
-		d_printf(2, dev, "TX: skip: msg @%zu (%zu b)\n",
-			 i2400m->tx_out % I2400M_TX_BUF_SIZE,
-			 (size_t) tx_msg->size & ~I2400M_TX_SKIP);
-		i2400m->tx_out += tx_msg->size & ~I2400M_TX_SKIP;
-		goto skip;
-	}
-
-	if (tx_msg->num_pls == 0) {		/* No payloads? */
-		if (tx_msg == i2400m->tx_msg) {	/* open, we are done */
-			d_printf(2, dev,
-				 "TX: FIFO empty: open msg w/o payloads @%zu\n",
-				 (void *) tx_msg - i2400m->tx_buf);
-			tx_msg = NULL;
-			goto out_unlock;
-		} else {			/* closed, skip it */
-			d_printf(2, dev,
-				 "TX: skip msg w/o payloads @%zu (%zu b)\n",
-				 (void *) tx_msg - i2400m->tx_buf,
-				 (size_t) tx_msg->size);
-			i2400m->tx_out += tx_msg->size & ~I2400M_TX_SKIP;
-			goto skip;
-		}
-	}
-	if (tx_msg == i2400m->tx_msg)		/* open msg? */
-		i2400m_tx_close(i2400m);
-
-	/* Now we have a valid TX message (with payloads) to TX */
-	tx_msg_moved = (void *) tx_msg + tx_msg->offset;
-	i2400m->tx_msg_size = tx_msg->size;
-	*bus_size = tx_msg_moved->size;
-	d_printf(2, dev, "TX: pid %d msg hdr at @%zu offset +@%zu "
-		 "size %zu bus_size %zu\n",
-		 current->pid, (void *) tx_msg - i2400m->tx_buf,
-		 (size_t) tx_msg->offset, (size_t) tx_msg->size,
-		 (size_t) tx_msg_moved->size);
-	tx_msg_moved->barker = le32_to_cpu(I2400M_H2D_PREVIEW_BARKER);
-	tx_msg_moved->sequence = le32_to_cpu(i2400m->tx_sequence++);
-
-	pls = le32_to_cpu(tx_msg_moved->num_pls);
-	i2400m->tx_pl_num += pls;		/* Update stats */
-	if (pls > i2400m->tx_pl_max)
-		i2400m->tx_pl_max = pls;
-	if (pls < i2400m->tx_pl_min)
-		i2400m->tx_pl_min = pls;
-	i2400m->tx_num++;
-	i2400m->tx_size_acc += *bus_size;
-	if (*bus_size < i2400m->tx_size_min)
-		i2400m->tx_size_min = *bus_size;
-	if (*bus_size > i2400m->tx_size_max)
-		i2400m->tx_size_max = *bus_size;
-out_unlock:
-	spin_unlock_irqrestore(&i2400m->tx_lock, flags);
-	d_fnstart(3, dev, "(i2400m %p bus_size %p [%zu]) = %p\n",
-		  i2400m, bus_size, *bus_size, tx_msg_moved);
-	return tx_msg_moved;
-}
-EXPORT_SYMBOL_GPL(i2400m_tx_msg_get);
-
-
-/**
- * i2400m_tx_msg_sent - indicate the transmission of a TX message
- *
- * @i2400m: device descriptor
- *
- * Called by the bus-specific driver when a message has been sent;
- * this pops it from the FIFO; and as there is space, start the queue
- * in case it was stopped.
- *
- * Should be called even if the message send failed and we are
- * dropping this TX message.
- */
-void i2400m_tx_msg_sent(struct i2400m *i2400m)
-{
-	unsigned n;
-	unsigned long flags;
-	struct device *dev = i2400m_dev(i2400m);
-
-	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
-	spin_lock_irqsave(&i2400m->tx_lock, flags);
-	if (i2400m->tx_buf == NULL)
-		goto out_unlock;
-	i2400m->tx_out += i2400m->tx_msg_size;
-	d_printf(2, dev, "TX: sent %zu b\n", (size_t) i2400m->tx_msg_size);
-	i2400m->tx_msg_size = 0;
-	BUG_ON(i2400m->tx_out > i2400m->tx_in);
-	/* level them FIFO markers off */
-	n = i2400m->tx_out / I2400M_TX_BUF_SIZE;
-	i2400m->tx_out %= I2400M_TX_BUF_SIZE;
-	i2400m->tx_in -= n * I2400M_TX_BUF_SIZE;
-out_unlock:
-	spin_unlock_irqrestore(&i2400m->tx_lock, flags);
-	d_fnend(3, dev, "(i2400m %p) = void\n", i2400m);
-}
-EXPORT_SYMBOL_GPL(i2400m_tx_msg_sent);
-
-
-/**
- * i2400m_tx_setup - Initialize the TX queue and infrastructure
- *
- * Make sure we reset the TX sequence to zero, as when this function
- * is called, the firmware has been just restarted. Same rational
- * for tx_in, tx_out, tx_msg_size and tx_msg. We reset them since
- * the memory for TX queue is reallocated.
- */
-int i2400m_tx_setup(struct i2400m *i2400m)
-{
-	int result = 0;
-	void *tx_buf;
-	unsigned long flags;
-
-	/* Do this here only once -- can't do on
-	 * i2400m_hard_start_xmit() as we'll cause race conditions if
-	 * the WS was scheduled on another CPU */
-	INIT_WORK(&i2400m->wake_tx_ws, i2400m_wake_tx_work);
-
-	tx_buf = kmalloc(I2400M_TX_BUF_SIZE, GFP_ATOMIC);
-	if (tx_buf == NULL) {
-		result = -ENOMEM;
-		goto error_kmalloc;
-	}
-
-	/*
-	 * Fail the build if we can't fit at least two maximum size messages
-	 * on the TX FIFO [one being delivered while one is constructed].
-	 */
-	BUILD_BUG_ON(2 * I2400M_TX_MSG_SIZE > I2400M_TX_BUF_SIZE);
-	spin_lock_irqsave(&i2400m->tx_lock, flags);
-	i2400m->tx_sequence = 0;
-	i2400m->tx_in = 0;
-	i2400m->tx_out = 0;
-	i2400m->tx_msg_size = 0;
-	i2400m->tx_msg = NULL;
-	i2400m->tx_buf = tx_buf;
-	spin_unlock_irqrestore(&i2400m->tx_lock, flags);
-	/* Huh? the bus layer has to define this... */
-	BUG_ON(i2400m->bus_tx_block_size == 0);
-error_kmalloc:
-	return result;
-
-}
-
-
-/**
- * i2400m_tx_release - Tear down the TX queue and infrastructure
- */
-void i2400m_tx_release(struct i2400m *i2400m)
-{
-	unsigned long flags;
-	spin_lock_irqsave(&i2400m->tx_lock, flags);
-	kfree(i2400m->tx_buf);
-	i2400m->tx_buf = NULL;
-	spin_unlock_irqrestore(&i2400m->tx_lock, flags);
-}
diff --git a/drivers/net/wimax/i2400m/usb-debug-levels.h b/drivers/net/wimax/i2400m/usb-debug-levels.h
deleted file mode 100644
index b6f7335de765..000000000000
--- a/drivers/net/wimax/i2400m/usb-debug-levels.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Intel Wireless WiMAX Connection 2400m
- * Debug levels control file for the i2400m-usb module
- *
- * Copyright (C) 2007-2008 Intel Corporation <linux-wimax@intel.com>
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- */
-#ifndef __debug_levels__h__
-#define __debug_levels__h__
-
-/* Maximum compile and run time debug level for all submodules */
-#define D_MODULENAME i2400m_usb
-#define D_MASTER CONFIG_WIMAX_I2400M_DEBUG_LEVEL
-
-#include <linux/wimax/debug.h>
-
-/* List of all the enabled modules */
-enum d_module {
-	D_SUBMODULE_DECLARE(usb),
-	D_SUBMODULE_DECLARE(fw),
-	D_SUBMODULE_DECLARE(notif),
-	D_SUBMODULE_DECLARE(rx),
-	D_SUBMODULE_DECLARE(tx),
-};
-
-
-#endif /* #ifndef __debug_levels__h__ */
diff --git a/drivers/net/wimax/i2400m/usb-fw.c b/drivers/net/wimax/i2400m/usb-fw.c
deleted file mode 100644
index 27ab233650d5..000000000000
--- a/drivers/net/wimax/i2400m/usb-fw.c
+++ /dev/null
@@ -1,365 +0,0 @@
-/*
- * Intel Wireless WiMAX Connection 2400m
- * Firmware uploader's USB specifics
- *
- *
- * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * Intel Corporation <linux-wimax@intel.com>
- * Yanir Lubetkin <yanirx.lubetkin@intel.com>
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *  - Initial implementation
- *
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *  - bus generic/specific split
- *
- * THE PROCEDURE
- *
- * See fw.c for the generic description of this procedure.
- *
- * This file implements only the USB specifics. It boils down to how
- * to send a command and waiting for an acknowledgement from the
- * device.
- *
- * This code (and process) is single threaded. It assumes it is the
- * only thread poking around (guaranteed by fw.c).
- *
- * COMMAND EXECUTION
- *
- * A write URB is posted with the buffer to the bulk output endpoint.
- *
- * ACK RECEPTION
- *
- * We just post a URB to the notification endpoint and wait for
- * data. We repeat until we get all the data we expect (as indicated
- * by the call from the bus generic code).
- *
- * The data is not read from the bulk in endpoint for boot mode.
- *
- * ROADMAP
- *
- * i2400mu_bus_bm_cmd_send
- *   i2400m_bm_cmd_prepare...
- *   i2400mu_tx_bulk_out
- *
- * i2400mu_bus_bm_wait_for_ack
- *   i2400m_notif_submit
- */
-#include <linux/usb.h>
-#include <linux/gfp.h>
-#include "i2400m-usb.h"
-
-
-#define D_SUBMODULE fw
-#include "usb-debug-levels.h"
-
-
-/*
- * Synchronous write to the device
- *
- * Takes care of updating EDC counts and thus, handle device errors.
- */
-static
-ssize_t i2400mu_tx_bulk_out(struct i2400mu *i2400mu, void *buf, size_t buf_size)
-{
-	int result;
-	struct device *dev = &i2400mu->usb_iface->dev;
-	int len;
-	struct usb_endpoint_descriptor *epd;
-	int pipe, do_autopm = 1;
-
-	result = usb_autopm_get_interface(i2400mu->usb_iface);
-	if (result < 0) {
-		dev_err(dev, "BM-CMD: can't get autopm: %d\n", result);
-		do_autopm = 0;
-	}
-	epd = usb_get_epd(i2400mu->usb_iface, i2400mu->endpoint_cfg.bulk_out);
-	pipe = usb_sndbulkpipe(i2400mu->usb_dev, epd->bEndpointAddress);
-retry:
-	result = usb_bulk_msg(i2400mu->usb_dev, pipe, buf, buf_size, &len, 200);
-	switch (result) {
-	case 0:
-		if (len != buf_size) {
-			dev_err(dev, "BM-CMD: short write (%u B vs %zu "
-				"expected)\n", len, buf_size);
-			result = -EIO;
-			break;
-		}
-		result = len;
-		break;
-	case -EPIPE:
-		/*
-		 * Stall -- maybe the device is choking with our
-		 * requests. Clear it and give it some time. If they
-		 * happen to often, it might be another symptom, so we
-		 * reset.
-		 *
-		 * No error handling for usb_clear_halt(0; if it
-		 * works, the retry works; if it fails, this switch
-		 * does the error handling for us.
-		 */
-		if (edc_inc(&i2400mu->urb_edc,
-			    10 * EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME)) {
-			dev_err(dev, "BM-CMD: too many stalls in "
-				"URB; resetting device\n");
-			usb_queue_reset_device(i2400mu->usb_iface);
-		} else {
-			usb_clear_halt(i2400mu->usb_dev, pipe);
-			msleep(10);	/* give the device some time */
-			goto retry;
-		}
-		fallthrough;
-	case -EINVAL:			/* while removing driver */
-	case -ENODEV:			/* dev disconnect ... */
-	case -ENOENT:			/* just ignore it */
-	case -ESHUTDOWN:		/* and exit */
-	case -ECONNRESET:
-		result = -ESHUTDOWN;
-		break;
-	case -ETIMEDOUT:			/* bah... */
-		break;
-	default:				/* any other? */
-		if (edc_inc(&i2400mu->urb_edc,
-			    EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME)) {
-				dev_err(dev, "BM-CMD: maximum errors in "
-					"URB exceeded; resetting device\n");
-				usb_queue_reset_device(i2400mu->usb_iface);
-				result = -ENODEV;
-				break;
-		}
-		dev_err(dev, "BM-CMD: URB error %d, retrying\n",
-			result);
-		goto retry;
-	}
-	if (do_autopm)
-		usb_autopm_put_interface(i2400mu->usb_iface);
-	return result;
-}
-
-
-/*
- * Send a boot-mode command over the bulk-out pipe
- *
- * Command can be a raw command, which requires no preparation (and
- * which might not even be following the command format). Checks that
- * the right amount of data was transferred.
- *
- * To satisfy USB requirements (no onstack, vmalloc or in data segment
- * buffers), we copy the command to i2400m->bm_cmd_buf and send it from
- * there.
- *
- * @flags: pass thru from i2400m_bm_cmd()
- * @return: cmd_size if ok, < 0 errno code on error.
- */
-ssize_t i2400mu_bus_bm_cmd_send(struct i2400m *i2400m,
-				const struct i2400m_bootrom_header *_cmd,
-				size_t cmd_size, int flags)
-{
-	ssize_t result;
-	struct device *dev = i2400m_dev(i2400m);
-	struct i2400mu *i2400mu = container_of(i2400m, struct i2400mu, i2400m);
-	int opcode = _cmd == NULL ? -1 : i2400m_brh_get_opcode(_cmd);
-	struct i2400m_bootrom_header *cmd;
-	size_t cmd_size_a = ALIGN(cmd_size, 16);	/* USB restriction */
-
-	d_fnstart(8, dev, "(i2400m %p cmd %p size %zu)\n",
-		  i2400m, _cmd, cmd_size);
-	result = -E2BIG;
-	if (cmd_size > I2400M_BM_CMD_BUF_SIZE)
-		goto error_too_big;
-	if (_cmd != i2400m->bm_cmd_buf)
-		memmove(i2400m->bm_cmd_buf, _cmd, cmd_size);
-	cmd = i2400m->bm_cmd_buf;
-	if (cmd_size_a > cmd_size)			/* Zero pad space */
-		memset(i2400m->bm_cmd_buf + cmd_size, 0, cmd_size_a - cmd_size);
-	if ((flags & I2400M_BM_CMD_RAW) == 0) {
-		if (WARN_ON(i2400m_brh_get_response_required(cmd) == 0))
-			dev_warn(dev, "SW BUG: response_required == 0\n");
-		i2400m_bm_cmd_prepare(cmd);
-	}
-	result = i2400mu_tx_bulk_out(i2400mu, i2400m->bm_cmd_buf, cmd_size);
-	if (result < 0) {
-		dev_err(dev, "boot-mode cmd %d: cannot send: %zd\n",
-			opcode, result);
-		goto error_cmd_send;
-	}
-	if (result != cmd_size) {		/* all was transferred? */
-		dev_err(dev, "boot-mode cmd %d: incomplete transfer "
-			"(%zd vs %zu submitted)\n",  opcode, result, cmd_size);
-		result = -EIO;
-		goto error_cmd_size;
-	}
-error_cmd_size:
-error_cmd_send:
-error_too_big:
-	d_fnend(8, dev, "(i2400m %p cmd %p size %zu) = %zd\n",
-		i2400m, _cmd, cmd_size, result);
-	return result;
-}
-
-
-static
-void __i2400mu_bm_notif_cb(struct urb *urb)
-{
-	complete(urb->context);
-}
-
-
-/*
- * submit a read to the notification endpoint
- *
- * @i2400m: device descriptor
- * @urb: urb to use
- * @completion: completion variable to complete when done
- *
- * Data is always read to i2400m->bm_ack_buf
- */
-static
-int i2400mu_notif_submit(struct i2400mu *i2400mu, struct urb *urb,
-			 struct completion *completion)
-{
-	struct i2400m *i2400m = &i2400mu->i2400m;
-	struct usb_endpoint_descriptor *epd;
-	int pipe;
-
-	epd = usb_get_epd(i2400mu->usb_iface,
-			  i2400mu->endpoint_cfg.notification);
-	pipe = usb_rcvintpipe(i2400mu->usb_dev, epd->bEndpointAddress);
-	usb_fill_int_urb(urb, i2400mu->usb_dev, pipe,
-			 i2400m->bm_ack_buf, I2400M_BM_ACK_BUF_SIZE,
-			 __i2400mu_bm_notif_cb, completion,
-			 epd->bInterval);
-	return usb_submit_urb(urb, GFP_KERNEL);
-}
-
-
-/*
- * Read an ack from  the notification endpoint
- *
- * @i2400m:
- * @_ack: pointer to where to store the read data
- * @ack_size: how many bytes we should read
- *
- * Returns: < 0 errno code on error; otherwise, amount of received bytes.
- *
- * Submits a notification read, appends the read data to the given ack
- * buffer and then repeats (until @ack_size bytes have been
- * received).
- */
-ssize_t i2400mu_bus_bm_wait_for_ack(struct i2400m *i2400m,
-				    struct i2400m_bootrom_header *_ack,
-				    size_t ack_size)
-{
-	ssize_t result = -ENOMEM;
-	struct device *dev = i2400m_dev(i2400m);
-	struct i2400mu *i2400mu = container_of(i2400m, struct i2400mu, i2400m);
-	struct urb notif_urb;
-	void *ack = _ack;
-	size_t offset, len;
-	long val;
-	int do_autopm = 1;
-	DECLARE_COMPLETION_ONSTACK(notif_completion);
-
-	d_fnstart(8, dev, "(i2400m %p ack %p size %zu)\n",
-		  i2400m, ack, ack_size);
-	BUG_ON(_ack == i2400m->bm_ack_buf);
-	result = usb_autopm_get_interface(i2400mu->usb_iface);
-	if (result < 0) {
-		dev_err(dev, "BM-ACK: can't get autopm: %d\n", (int) result);
-		do_autopm = 0;
-	}
-	usb_init_urb(&notif_urb);	/* ready notifications */
-	usb_get_urb(&notif_urb);
-	offset = 0;
-	while (offset < ack_size) {
-		init_completion(&notif_completion);
-		result = i2400mu_notif_submit(i2400mu, &notif_urb,
-					      &notif_completion);
-		if (result < 0)
-			goto error_notif_urb_submit;
-		val = wait_for_completion_interruptible_timeout(
-			&notif_completion, HZ);
-		if (val == 0) {
-			result = -ETIMEDOUT;
-			usb_kill_urb(&notif_urb);	/* Timedout */
-			goto error_notif_wait;
-		}
-		if (val == -ERESTARTSYS) {
-			result = -EINTR;		/* Interrupted */
-			usb_kill_urb(&notif_urb);
-			goto error_notif_wait;
-		}
-		result = notif_urb.status;		/* How was the ack? */
-		switch (result) {
-		case 0:
-			break;
-		case -EINVAL:			/* while removing driver */
-		case -ENODEV:			/* dev disconnect ... */
-		case -ENOENT:			/* just ignore it */
-		case -ESHUTDOWN:		/* and exit */
-		case -ECONNRESET:
-			result = -ESHUTDOWN;
-			goto error_dev_gone;
-		default:				/* any other? */
-			usb_kill_urb(&notif_urb);	/* Timedout */
-			if (edc_inc(&i2400mu->urb_edc,
-				    EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME))
-				goto error_exceeded;
-			dev_err(dev, "BM-ACK: URB error %d, "
-				"retrying\n", notif_urb.status);
-			continue;	/* retry */
-		}
-		if (notif_urb.actual_length == 0) {
-			d_printf(6, dev, "ZLP received, retrying\n");
-			continue;
-		}
-		/* Got data, append it to the buffer */
-		len = min(ack_size - offset, (size_t) notif_urb.actual_length);
-		memcpy(ack + offset, i2400m->bm_ack_buf, len);
-		offset += len;
-	}
-	result = offset;
-error_notif_urb_submit:
-error_notif_wait:
-error_dev_gone:
-out:
-	if (do_autopm)
-		usb_autopm_put_interface(i2400mu->usb_iface);
-	d_fnend(8, dev, "(i2400m %p ack %p size %zu) = %ld\n",
-		i2400m, ack, ack_size, (long) result);
-	usb_put_urb(&notif_urb);
-	return result;
-
-error_exceeded:
-	dev_err(dev, "bm: maximum errors in notification URB exceeded; "
-		"resetting device\n");
-	usb_queue_reset_device(i2400mu->usb_iface);
-	goto out;
-}
diff --git a/drivers/net/wimax/i2400m/usb-notif.c b/drivers/net/wimax/i2400m/usb-notif.c
deleted file mode 100644
index 5d429f816125..000000000000
--- a/drivers/net/wimax/i2400m/usb-notif.c
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- * Intel Wireless WiMAX Connection 2400m over USB
- * Notification handling
- *
- *
- * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * Intel Corporation <linux-wimax@intel.com>
- * Yanir Lubetkin <yanirx.lubetkin@intel.com>
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *  - Initial implementation
- *
- *
- * The notification endpoint is active when the device is not in boot
- * mode; in here we just read and get notifications; based on those,
- * we act to either reinitialize the device after a reboot or to
- * submit a RX request.
- *
- * ROADMAP
- *
- * i2400mu_usb_notification_setup()
- *
- * i2400mu_usb_notification_release()
- *
- * i2400mu_usb_notification_cb()	Called when a URB is ready
- *   i2400mu_notif_grok()
- *     i2400m_is_boot_barker()
- *     i2400m_dev_reset_handle()
- *     i2400mu_rx_kick()
- */
-#include <linux/usb.h>
-#include <linux/slab.h>
-#include "i2400m-usb.h"
-
-
-#define D_SUBMODULE notif
-#include "usb-debug-levels.h"
-
-
-static const
-__le32 i2400m_ZERO_BARKER[4] = { 0, 0, 0, 0 };
-
-
-/*
- * Process a received notification
- *
- * In normal operation mode, we can only receive two types of payloads
- * on the notification endpoint:
- *
- *   - a reboot barker, we do a bootstrap (the device has reseted).
- *
- *   - a block of zeroes: there is pending data in the IN endpoint
- */
-static
-int i2400mu_notification_grok(struct i2400mu *i2400mu, const void *buf,
-				 size_t buf_len)
-{
-	int ret;
-	struct device *dev = &i2400mu->usb_iface->dev;
-	struct i2400m *i2400m = &i2400mu->i2400m;
-
-	d_fnstart(4, dev, "(i2400m %p buf %p buf_len %zu)\n",
-		  i2400mu, buf, buf_len);
-	ret = -EIO;
-	if (buf_len < sizeof(i2400m_ZERO_BARKER))
-		/* Not a bug, just ignore */
-		goto error_bad_size;
-	ret = 0;
-	if (!memcmp(i2400m_ZERO_BARKER, buf, sizeof(i2400m_ZERO_BARKER))) {
-		i2400mu_rx_kick(i2400mu);
-		goto out;
-	}
-	ret = i2400m_is_boot_barker(i2400m, buf, buf_len);
-	if (unlikely(ret >= 0))
-		ret = i2400m_dev_reset_handle(i2400m, "device rebooted");
-	else	/* Unknown or unexpected data in the notif message */
-		i2400m_unknown_barker(i2400m, buf, buf_len);
-error_bad_size:
-out:
-	d_fnend(4, dev, "(i2400m %p buf %p buf_len %zu) = %d\n",
-		i2400mu, buf, buf_len, ret);
-	return ret;
-}
-
-
-/*
- * URB callback for the notification endpoint
- *
- * @urb: the urb received from the notification endpoint
- *
- * This function will just process the USB side of the transaction,
- * checking everything is fine, pass the processing to
- * i2400m_notification_grok() and resubmit the URB.
- */
-static
-void i2400mu_notification_cb(struct urb *urb)
-{
-	int ret;
-	struct i2400mu *i2400mu = urb->context;
-	struct device *dev = &i2400mu->usb_iface->dev;
-
-	d_fnstart(4, dev, "(urb %p status %d actual_length %d)\n",
-		  urb, urb->status, urb->actual_length);
-	ret = urb->status;
-	switch (ret) {
-	case 0:
-		ret = i2400mu_notification_grok(i2400mu, urb->transfer_buffer,
-						urb->actual_length);
-		if (ret == -EIO && edc_inc(&i2400mu->urb_edc, EDC_MAX_ERRORS,
-					   EDC_ERROR_TIMEFRAME))
-			goto error_exceeded;
-		if (ret == -ENOMEM)	/* uff...power cycle? shutdown? */
-			goto error_exceeded;
-		break;
-	case -EINVAL:			/* while removing driver */
-	case -ENODEV:			/* dev disconnect ... */
-	case -ENOENT:			/* ditto */
-	case -ESHUTDOWN:		/* URB killed */
-	case -ECONNRESET:		/* disconnection */
-		goto out;		/* Notify around */
-	default:			/* Some error? */
-		if (edc_inc(&i2400mu->urb_edc,
-			    EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME))
-			goto error_exceeded;
-		dev_err(dev, "notification: URB error %d, retrying\n",
-			urb->status);
-	}
-	usb_mark_last_busy(i2400mu->usb_dev);
-	ret = usb_submit_urb(i2400mu->notif_urb, GFP_ATOMIC);
-	switch (ret) {
-	case 0:
-	case -EINVAL:			/* while removing driver */
-	case -ENODEV:			/* dev disconnect ... */
-	case -ENOENT:			/* ditto */
-	case -ESHUTDOWN:		/* URB killed */
-	case -ECONNRESET:		/* disconnection */
-		break;			/* just ignore */
-	default:			/* Some error? */
-		dev_err(dev, "notification: cannot submit URB: %d\n", ret);
-		goto error_submit;
-	}
-	d_fnend(4, dev, "(urb %p status %d actual_length %d) = void\n",
-		urb, urb->status, urb->actual_length);
-	return;
-
-error_exceeded:
-	dev_err(dev, "maximum errors in notification URB exceeded; "
-		"resetting device\n");
-error_submit:
-	usb_queue_reset_device(i2400mu->usb_iface);
-out:
-	d_fnend(4, dev, "(urb %p status %d actual_length %d) = void\n",
-		urb, urb->status, urb->actual_length);
-}
-
-
-/*
- * setup the notification endpoint
- *
- * @i2400m: device descriptor
- *
- * This procedure prepares the notification urb and handler for receiving
- * unsolicited barkers from the device.
- */
-int i2400mu_notification_setup(struct i2400mu *i2400mu)
-{
-	struct device *dev = &i2400mu->usb_iface->dev;
-	int usb_pipe, ret = 0;
-	struct usb_endpoint_descriptor *epd;
-	char *buf;
-
-	d_fnstart(4, dev, "(i2400m %p)\n", i2400mu);
-	buf = kmalloc(I2400MU_MAX_NOTIFICATION_LEN, GFP_KERNEL | GFP_DMA);
-	if (buf == NULL) {
-		ret = -ENOMEM;
-		goto error_buf_alloc;
-	}
-
-	i2400mu->notif_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!i2400mu->notif_urb) {
-		ret = -ENOMEM;
-		goto error_alloc_urb;
-	}
-	epd = usb_get_epd(i2400mu->usb_iface,
-			  i2400mu->endpoint_cfg.notification);
-	usb_pipe = usb_rcvintpipe(i2400mu->usb_dev, epd->bEndpointAddress);
-	usb_fill_int_urb(i2400mu->notif_urb, i2400mu->usb_dev, usb_pipe,
-			 buf, I2400MU_MAX_NOTIFICATION_LEN,
-			 i2400mu_notification_cb, i2400mu, epd->bInterval);
-	ret = usb_submit_urb(i2400mu->notif_urb, GFP_KERNEL);
-	if (ret != 0) {
-		dev_err(dev, "notification: cannot submit URB: %d\n", ret);
-		goto error_submit;
-	}
-	d_fnend(4, dev, "(i2400m %p) = %d\n", i2400mu, ret);
-	return ret;
-
-error_submit:
-	usb_free_urb(i2400mu->notif_urb);
-error_alloc_urb:
-	kfree(buf);
-error_buf_alloc:
-	d_fnend(4, dev, "(i2400m %p) = %d\n", i2400mu, ret);
-	return ret;
-}
-
-
-/*
- * Tear down of the notification mechanism
- *
- * @i2400m: device descriptor
- *
- * Kill the interrupt endpoint urb, free any allocated resources.
- *
- * We need to check if we have done it before as for example,
- * _suspend() call this; if after a suspend() we get a _disconnect()
- * (as the case is when hibernating), nothing bad happens.
- */
-void i2400mu_notification_release(struct i2400mu *i2400mu)
-{
-	struct device *dev = &i2400mu->usb_iface->dev;
-
-	d_fnstart(4, dev, "(i2400mu %p)\n", i2400mu);
-	if (i2400mu->notif_urb != NULL) {
-		usb_kill_urb(i2400mu->notif_urb);
-		kfree(i2400mu->notif_urb->transfer_buffer);
-		usb_free_urb(i2400mu->notif_urb);
-		i2400mu->notif_urb = NULL;
-	}
-	d_fnend(4, dev, "(i2400mu %p)\n", i2400mu);
-}
diff --git a/drivers/net/wimax/i2400m/usb-rx.c b/drivers/net/wimax/i2400m/usb-rx.c
deleted file mode 100644
index 5b64bda7d9e7..000000000000
--- a/drivers/net/wimax/i2400m/usb-rx.c
+++ /dev/null
@@ -1,462 +0,0 @@
-/*
- * Intel Wireless WiMAX Connection 2400m
- * USB RX handling
- *
- *
- * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * Intel Corporation <linux-wimax@intel.com>
- * Yanir Lubetkin <yanirx.lubetkin@intel.com>
- *  - Initial implementation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *  - Use skb_clone(), break up processing in chunks
- *  - Split transport/device specific
- *  - Make buffer size dynamic to exert less memory pressure
- *
- *
- * This handles the RX path on USB.
- *
- * When a notification is received that says 'there is RX data ready',
- * we call i2400mu_rx_kick(); that wakes up the RX kthread, which
- * reads a buffer from USB and passes it to i2400m_rx() in the generic
- * handling code. The RX buffer has an specific format that is
- * described in rx.c.
- *
- * We use a kernel thread in a loop because:
- *
- *  - we want to be able to call the USB power management get/put
- *    functions (blocking) before each transaction.
- *
- *  - We might get a lot of notifications and we don't want to submit
- *    a zillion reads; by serializing, we are throttling.
- *
- *  - RX data processing can get heavy enough so that it is not
- *    appropriate for doing it in the USB callback; thus we run it in a
- *    process context.
- *
- * We provide a read buffer of an arbitrary size (short of a page); if
- * the callback reports -EOVERFLOW, it means it was too small, so we
- * just double the size and retry (being careful to append, as
- * sometimes the device provided some data). Every now and then we
- * check if the average packet size is smaller than the current packet
- * size and if so, we halve it. At the end, the size of the
- * preallocated buffer should be following the average received
- * transaction size, adapting dynamically to it.
- *
- * ROADMAP
- *
- * i2400mu_rx_kick()		   Called from notif.c when we get a
- *   			           'data ready' notification
- * i2400mu_rxd()                   Kernel RX daemon
- *   i2400mu_rx()                  Receive USB data
- *   i2400m_rx()                   Send data to generic i2400m RX handling
- *
- * i2400mu_rx_setup()              called from i2400mu_bus_dev_start()
- *
- * i2400mu_rx_release()            called from i2400mu_bus_dev_stop()
- */
-#include <linux/workqueue.h>
-#include <linux/slab.h>
-#include <linux/usb.h>
-#include "i2400m-usb.h"
-
-
-#define D_SUBMODULE rx
-#include "usb-debug-levels.h"
-
-/*
- * Dynamic RX size
- *
- * We can't let the rx_size be a multiple of 512 bytes (the RX
- * endpoint's max packet size). On some USB host controllers (we
- * haven't been able to fully characterize which), if the device is
- * about to send (for example) X bytes and we only post a buffer to
- * receive n*512, it will fail to mark that as babble (so that
- * i2400mu_rx() [case -EOVERFLOW] can resize the buffer and get the
- * rest).
- *
- * So on growing or shrinking, if it is a multiple of the
- * maxpacketsize, we remove some (instead of incresing some, so in a
- * buddy allocator we try to waste less space).
- *
- * Note we also need a hook for this on i2400mu_rx() -- when we do the
- * first read, we are sure we won't hit this spot because
- * i240mm->rx_size has been set properly. However, if we have to
- * double because of -EOVERFLOW, when we launch the read to get the
- * rest of the data, we *have* to make sure that also is not a
- * multiple of the max_pkt_size.
- */
-
-static
-size_t i2400mu_rx_size_grow(struct i2400mu *i2400mu)
-{
-	struct device *dev = &i2400mu->usb_iface->dev;
-	size_t rx_size;
-	const size_t max_pkt_size = 512;
-
-	rx_size = 2 * i2400mu->rx_size;
-	if (rx_size % max_pkt_size == 0) {
-		rx_size -= 8;
-		d_printf(1, dev,
-			 "RX: expected size grew to %zu [adjusted -8] "
-			 "from %zu\n",
-			 rx_size, i2400mu->rx_size);
-	} else
-		d_printf(1, dev,
-			 "RX: expected size grew to %zu from %zu\n",
-			 rx_size, i2400mu->rx_size);
-	return rx_size;
-}
-
-
-static
-void i2400mu_rx_size_maybe_shrink(struct i2400mu *i2400mu)
-{
-	const size_t max_pkt_size = 512;
-	struct device *dev = &i2400mu->usb_iface->dev;
-
-	if (unlikely(i2400mu->rx_size_cnt >= 100
-		     && i2400mu->rx_size_auto_shrink)) {
-		size_t avg_rx_size =
-			i2400mu->rx_size_acc / i2400mu->rx_size_cnt;
-		size_t new_rx_size = i2400mu->rx_size / 2;
-		if (avg_rx_size < new_rx_size) {
-			if (new_rx_size % max_pkt_size == 0) {
-				new_rx_size -= 8;
-				d_printf(1, dev,
-					 "RX: expected size shrank to %zu "
-					 "[adjusted -8] from %zu\n",
-					 new_rx_size, i2400mu->rx_size);
-			} else
-				d_printf(1, dev,
-					 "RX: expected size shrank to %zu "
-					 "from %zu\n",
-					 new_rx_size, i2400mu->rx_size);
-			i2400mu->rx_size = new_rx_size;
-			i2400mu->rx_size_cnt = 0;
-			i2400mu->rx_size_acc = i2400mu->rx_size;
-		}
-	}
-}
-
-/*
- * Receive a message with payloads from the USB bus into an skb
- *
- * @i2400mu: USB device descriptor
- * @rx_skb: skb where to place the received message
- *
- * Deals with all the USB-specifics of receiving, dynamically
- * increasing the buffer size if so needed. Returns the payload in the
- * skb, ready to process. On a zero-length packet, we retry.
- *
- * On soft USB errors, we retry (until they become too frequent and
- * then are promoted to hard); on hard USB errors, we reset the
- * device. On other errors (skb realloacation, we just drop it and
- * hope for the next invocation to solve it).
- *
- * Returns: pointer to the skb if ok, ERR_PTR on error.
- *   NOTE: this function might realloc the skb (if it is too small),
- *   so always update with the one returned.
- *   ERR_PTR() is < 0 on error.
- *   Will return NULL if it cannot reallocate -- this can be
- *   considered a transient retryable error.
- */
-static
-struct sk_buff *i2400mu_rx(struct i2400mu *i2400mu, struct sk_buff *rx_skb)
-{
-	int result = 0;
-	struct device *dev = &i2400mu->usb_iface->dev;
-	int usb_pipe, read_size, rx_size, do_autopm;
-	struct usb_endpoint_descriptor *epd;
-	const size_t max_pkt_size = 512;
-
-	d_fnstart(4, dev, "(i2400mu %p)\n", i2400mu);
-	do_autopm = atomic_read(&i2400mu->do_autopm);
-	result = do_autopm ?
-		usb_autopm_get_interface(i2400mu->usb_iface) : 0;
-	if (result < 0) {
-		dev_err(dev, "RX: can't get autopm: %d\n", result);
-		do_autopm = 0;
-	}
-	epd = usb_get_epd(i2400mu->usb_iface, i2400mu->endpoint_cfg.bulk_in);
-	usb_pipe = usb_rcvbulkpipe(i2400mu->usb_dev, epd->bEndpointAddress);
-retry:
-	rx_size = skb_end_pointer(rx_skb) - rx_skb->data - rx_skb->len;
-	if (unlikely(rx_size % max_pkt_size == 0)) {
-		rx_size -= 8;
-		d_printf(1, dev, "RX: rx_size adapted to %d [-8]\n", rx_size);
-	}
-	result = usb_bulk_msg(
-		i2400mu->usb_dev, usb_pipe, rx_skb->data + rx_skb->len,
-		rx_size, &read_size, 200);
-	usb_mark_last_busy(i2400mu->usb_dev);
-	switch (result) {
-	case 0:
-		if (read_size == 0)
-			goto retry;	/* ZLP, just resubmit */
-		skb_put(rx_skb, read_size);
-		break;
-	case -EPIPE:
-		/*
-		 * Stall -- maybe the device is choking with our
-		 * requests. Clear it and give it some time. If they
-		 * happen to often, it might be another symptom, so we
-		 * reset.
-		 *
-		 * No error handling for usb_clear_halt(0; if it
-		 * works, the retry works; if it fails, this switch
-		 * does the error handling for us.
-		 */
-		if (edc_inc(&i2400mu->urb_edc,
-			    10 * EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME)) {
-			dev_err(dev, "BM-CMD: too many stalls in "
-				"URB; resetting device\n");
-			goto do_reset;
-		}
-		usb_clear_halt(i2400mu->usb_dev, usb_pipe);
-		msleep(10);	/* give the device some time */
-		goto retry;
-	case -EINVAL:			/* while removing driver */
-	case -ENODEV:			/* dev disconnect ... */
-	case -ENOENT:			/* just ignore it */
-	case -ESHUTDOWN:
-	case -ECONNRESET:
-		break;
-	case -EOVERFLOW: {		/* too small, reallocate */
-		struct sk_buff *new_skb;
-		rx_size = i2400mu_rx_size_grow(i2400mu);
-		if (rx_size <= (1 << 16))	/* cap it */
-			i2400mu->rx_size = rx_size;
-		else if (printk_ratelimit()) {
-			dev_err(dev, "BUG? rx_size up to %d\n", rx_size);
-			result = -EINVAL;
-			goto out;
-		}
-		skb_put(rx_skb, read_size);
-		new_skb = skb_copy_expand(rx_skb, 0, rx_size - rx_skb->len,
-					  GFP_KERNEL);
-		if (new_skb == NULL) {
-			kfree_skb(rx_skb);
-			rx_skb = NULL;
-			goto out;	/* drop it...*/
-		}
-		kfree_skb(rx_skb);
-		rx_skb = new_skb;
-		i2400mu->rx_size_cnt = 0;
-		i2400mu->rx_size_acc = i2400mu->rx_size;
-		d_printf(1, dev, "RX: size changed to %d, received %d, "
-			 "copied %d, capacity %ld\n",
-			 rx_size, read_size, rx_skb->len,
-			 (long) skb_end_offset(new_skb));
-		goto retry;
-	}
-		/* In most cases, it happens due to the hardware scheduling a
-		 * read when there was no data - unfortunately, we have no way
-		 * to tell this timeout from a USB timeout. So we just ignore
-		 * it. */
-	case -ETIMEDOUT:
-		dev_err(dev, "RX: timeout: %d\n", result);
-		result = 0;
-		break;
-	default:			/* Any error */
-		if (edc_inc(&i2400mu->urb_edc,
-			    EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME))
-			goto error_reset;
-		dev_err(dev, "RX: error receiving URB: %d, retrying\n", result);
-		goto retry;
-	}
-out:
-	if (do_autopm)
-		usb_autopm_put_interface(i2400mu->usb_iface);
-	d_fnend(4, dev, "(i2400mu %p) = %p\n", i2400mu, rx_skb);
-	return rx_skb;
-
-error_reset:
-	dev_err(dev, "RX: maximum errors in URB exceeded; "
-		"resetting device\n");
-do_reset:
-	usb_queue_reset_device(i2400mu->usb_iface);
-	rx_skb = ERR_PTR(result);
-	goto out;
-}
-
-
-/*
- * Kernel thread for USB reception of data
- *
- * This thread waits for a kick; once kicked, it will allocate an skb
- * and receive a single message to it from USB (using
- * i2400mu_rx()). Once received, it is passed to the generic i2400m RX
- * code for processing.
- *
- * When done processing, it runs some dirty statistics to verify if
- * the last 100 messages received were smaller than half of the
- * current RX buffer size. In that case, the RX buffer size is
- * halved. This will helps lowering the pressure on the memory
- * allocator.
- *
- * Hard errors force the thread to exit.
- */
-static
-int i2400mu_rxd(void *_i2400mu)
-{
-	int result = 0;
-	struct i2400mu *i2400mu = _i2400mu;
-	struct i2400m *i2400m = &i2400mu->i2400m;
-	struct device *dev = &i2400mu->usb_iface->dev;
-	struct net_device *net_dev = i2400m->wimax_dev.net_dev;
-	size_t pending;
-	int rx_size;
-	struct sk_buff *rx_skb;
-	unsigned long flags;
-
-	d_fnstart(4, dev, "(i2400mu %p)\n", i2400mu);
-	spin_lock_irqsave(&i2400m->rx_lock, flags);
-	BUG_ON(i2400mu->rx_kthread != NULL);
-	i2400mu->rx_kthread = current;
-	spin_unlock_irqrestore(&i2400m->rx_lock, flags);
-	while (1) {
-		d_printf(2, dev, "RX: waiting for messages\n");
-		pending = 0;
-		wait_event_interruptible(
-			i2400mu->rx_wq,
-			(kthread_should_stop()	/* check this first! */
-			 || (pending = atomic_read(&i2400mu->rx_pending_count)))
-			);
-		if (kthread_should_stop())
-			break;
-		if (pending == 0)
-			continue;
-		rx_size = i2400mu->rx_size;
-		d_printf(2, dev, "RX: reading up to %d bytes\n", rx_size);
-		rx_skb = __netdev_alloc_skb(net_dev, rx_size, GFP_KERNEL);
-		if (rx_skb == NULL) {
-			dev_err(dev, "RX: can't allocate skb [%d bytes]\n",
-				rx_size);
-			msleep(50);	/* give it some time? */
-			continue;
-		}
-
-		/* Receive the message with the payloads */
-		rx_skb = i2400mu_rx(i2400mu, rx_skb);
-		result = PTR_ERR(rx_skb);
-		if (IS_ERR(rx_skb))
-			goto out;
-		atomic_dec(&i2400mu->rx_pending_count);
-		if (rx_skb == NULL || rx_skb->len == 0) {
-			/* some "ignorable" condition */
-			kfree_skb(rx_skb);
-			continue;
-		}
-
-		/* Deliver the message to the generic i2400m code */
-		i2400mu->rx_size_cnt++;
-		i2400mu->rx_size_acc += rx_skb->len;
-		result = i2400m_rx(i2400m, rx_skb);
-		if (result == -EIO
-		    && edc_inc(&i2400mu->urb_edc,
-			       EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME)) {
-			goto error_reset;
-		}
-
-		/* Maybe adjust RX buffer size */
-		i2400mu_rx_size_maybe_shrink(i2400mu);
-	}
-	result = 0;
-out:
-	spin_lock_irqsave(&i2400m->rx_lock, flags);
-	i2400mu->rx_kthread = NULL;
-	spin_unlock_irqrestore(&i2400m->rx_lock, flags);
-	d_fnend(4, dev, "(i2400mu %p) = %d\n", i2400mu, result);
-	return result;
-
-error_reset:
-	dev_err(dev, "RX: maximum errors in received buffer exceeded; "
-		"resetting device\n");
-	usb_queue_reset_device(i2400mu->usb_iface);
-	goto out;
-}
-
-
-/*
- * Start reading from the device
- *
- * @i2400m: device instance
- *
- * Notify the RX thread that there is data pending.
- */
-void i2400mu_rx_kick(struct i2400mu *i2400mu)
-{
-	struct i2400m *i2400m = &i2400mu->i2400m;
-	struct device *dev = &i2400mu->usb_iface->dev;
-
-	d_fnstart(3, dev, "(i2400mu %p)\n", i2400m);
-	atomic_inc(&i2400mu->rx_pending_count);
-	wake_up_all(&i2400mu->rx_wq);
-	d_fnend(3, dev, "(i2400m %p) = void\n", i2400m);
-}
-
-
-int i2400mu_rx_setup(struct i2400mu *i2400mu)
-{
-	int result = 0;
-	struct i2400m *i2400m = &i2400mu->i2400m;
-	struct device *dev = &i2400mu->usb_iface->dev;
-	struct wimax_dev *wimax_dev = &i2400m->wimax_dev;
-	struct task_struct *kthread;
-
-	kthread = kthread_run(i2400mu_rxd, i2400mu, "%s-rx",
-			      wimax_dev->name);
-	/* the kthread function sets i2400mu->rx_thread */
-	if (IS_ERR(kthread)) {
-		result = PTR_ERR(kthread);
-		dev_err(dev, "RX: cannot start thread: %d\n", result);
-	}
-	return result;
-}
-
-
-void i2400mu_rx_release(struct i2400mu *i2400mu)
-{
-	unsigned long flags;
-	struct i2400m *i2400m = &i2400mu->i2400m;
-	struct device *dev = i2400m_dev(i2400m);
-	struct task_struct *kthread;
-
-	spin_lock_irqsave(&i2400m->rx_lock, flags);
-	kthread = i2400mu->rx_kthread;
-	i2400mu->rx_kthread = NULL;
-	spin_unlock_irqrestore(&i2400m->rx_lock, flags);
-	if (kthread)
-		kthread_stop(kthread);
-	else
-		d_printf(1, dev, "RX: kthread had already exited\n");
-}
-
diff --git a/drivers/net/wimax/i2400m/usb-tx.c b/drivers/net/wimax/i2400m/usb-tx.c
deleted file mode 100644
index 3ba9d70cca1b..000000000000
--- a/drivers/net/wimax/i2400m/usb-tx.c
+++ /dev/null
@@ -1,273 +0,0 @@
-/*
- * Intel Wireless WiMAX Connection 2400m
- * USB specific TX handling
- *
- *
- * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * Intel Corporation <linux-wimax@intel.com>
- * Yanir Lubetkin <yanirx.lubetkin@intel.com>
- *  - Initial implementation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *  - Split transport/device specific
- *
- *
- * Takes the TX messages in the i2400m's driver TX FIFO and sends them
- * to the device until there are no more.
- *
- * If we fail sending the message, we just drop it. There isn't much
- * we can do at this point. We could also retry, but the USB stack has
- * already retried and still failed, so there is not much of a
- * point. As well, most of the traffic is network, which has recovery
- * methods for dropped packets.
- *
- * For sending we just obtain a FIFO buffer to send, send it to the
- * USB bulk out, tell the TX FIFO code we have sent it; query for
- * another one, etc... until done.
- *
- * We use a thread so we can call usb_autopm_enable() and
- * usb_autopm_disable() for each transaction; this way when the device
- * goes idle, it will suspend. It also has less overhead than a
- * dedicated workqueue, as it is being used for a single task.
- *
- * ROADMAP
- *
- * i2400mu_tx_setup()
- * i2400mu_tx_release()
- *
- * i2400mu_bus_tx_kick()	- Called by the tx.c code when there
- *                                is new data in the FIFO.
- * i2400mu_txd()
- *   i2400m_tx_msg_get()
- *   i2400m_tx_msg_sent()
- */
-#include "i2400m-usb.h"
-
-
-#define D_SUBMODULE tx
-#include "usb-debug-levels.h"
-
-
-/*
- * Get the next TX message in the TX FIFO and send it to the device
- *
- * Note that any iteration consumes a message to be sent, no matter if
- * it succeeds or fails (we have no real way to retry or complain).
- *
- * Return: 0 if ok, < 0 errno code on hard error.
- */
-static
-int i2400mu_tx(struct i2400mu *i2400mu, struct i2400m_msg_hdr *tx_msg,
-	       size_t tx_msg_size)
-{
-	int result = 0;
-	struct i2400m *i2400m = &i2400mu->i2400m;
-	struct device *dev = &i2400mu->usb_iface->dev;
-	int usb_pipe, sent_size, do_autopm;
-	struct usb_endpoint_descriptor *epd;
-
-	d_fnstart(4, dev, "(i2400mu %p)\n", i2400mu);
-	do_autopm = atomic_read(&i2400mu->do_autopm);
-	result = do_autopm ?
-		usb_autopm_get_interface(i2400mu->usb_iface) : 0;
-	if (result < 0) {
-		dev_err(dev, "TX: can't get autopm: %d\n", result);
-		do_autopm = 0;
-	}
-	epd = usb_get_epd(i2400mu->usb_iface, i2400mu->endpoint_cfg.bulk_out);
-	usb_pipe = usb_sndbulkpipe(i2400mu->usb_dev, epd->bEndpointAddress);
-retry:
-	result = usb_bulk_msg(i2400mu->usb_dev, usb_pipe,
-			      tx_msg, tx_msg_size, &sent_size, 200);
-	usb_mark_last_busy(i2400mu->usb_dev);
-	switch (result) {
-	case 0:
-		if (sent_size != tx_msg_size) {	/* Too short? drop it */
-			dev_err(dev, "TX: short write (%d B vs %zu "
-				"expected)\n", sent_size, tx_msg_size);
-			result = -EIO;
-		}
-		break;
-	case -EPIPE:
-		/*
-		 * Stall -- maybe the device is choking with our
-		 * requests. Clear it and give it some time. If they
-		 * happen to often, it might be another symptom, so we
-		 * reset.
-		 *
-		 * No error handling for usb_clear_halt(0; if it
-		 * works, the retry works; if it fails, this switch
-		 * does the error handling for us.
-		 */
-		if (edc_inc(&i2400mu->urb_edc,
-			    10 * EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME)) {
-			dev_err(dev, "BM-CMD: too many stalls in "
-				"URB; resetting device\n");
-			usb_queue_reset_device(i2400mu->usb_iface);
-		} else {
-			usb_clear_halt(i2400mu->usb_dev, usb_pipe);
-			msleep(10);	/* give the device some time */
-			goto retry;
-		}
-		fallthrough;
-	case -EINVAL:			/* while removing driver */
-	case -ENODEV:			/* dev disconnect ... */
-	case -ENOENT:			/* just ignore it */
-	case -ESHUTDOWN:		/* and exit */
-	case -ECONNRESET:
-		result = -ESHUTDOWN;
-		break;
-	default:			/* Some error? */
-		if (edc_inc(&i2400mu->urb_edc,
-			    EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME)) {
-			dev_err(dev, "TX: maximum errors in URB "
-				"exceeded; resetting device\n");
-			usb_queue_reset_device(i2400mu->usb_iface);
-		} else {
-			dev_err(dev, "TX: cannot send URB; retrying. "
-				"tx_msg @%zu %zu B [%d sent]: %d\n",
-				(void *) tx_msg - i2400m->tx_buf,
-				tx_msg_size, sent_size, result);
-			goto retry;
-		}
-	}
-	if (do_autopm)
-		usb_autopm_put_interface(i2400mu->usb_iface);
-	d_fnend(4, dev, "(i2400mu %p) = result\n", i2400mu);
-	return result;
-}
-
-
-/*
- * Get the next TX message in the TX FIFO and send it to the device
- *
- * Note we exit the loop if i2400mu_tx() fails; that function only
- * fails on hard error (failing to tx a buffer not being one of them,
- * see its doc).
- *
- * Return: 0
- */
-static
-int i2400mu_txd(void *_i2400mu)
-{
-	struct i2400mu *i2400mu = _i2400mu;
-	struct i2400m *i2400m = &i2400mu->i2400m;
-	struct device *dev = &i2400mu->usb_iface->dev;
-	struct i2400m_msg_hdr *tx_msg;
-	size_t tx_msg_size;
-	unsigned long flags;
-
-	d_fnstart(4, dev, "(i2400mu %p)\n", i2400mu);
-
-	spin_lock_irqsave(&i2400m->tx_lock, flags);
-	BUG_ON(i2400mu->tx_kthread != NULL);
-	i2400mu->tx_kthread = current;
-	spin_unlock_irqrestore(&i2400m->tx_lock, flags);
-
-	while (1) {
-		d_printf(2, dev, "TX: waiting for messages\n");
-		tx_msg = NULL;
-		wait_event_interruptible(
-			i2400mu->tx_wq,
-			(kthread_should_stop()	/* check this first! */
-			 || (tx_msg = i2400m_tx_msg_get(i2400m, &tx_msg_size)))
-			);
-		if (kthread_should_stop())
-			break;
-		WARN_ON(tx_msg == NULL);	/* should not happen...*/
-		d_printf(2, dev, "TX: submitting %zu bytes\n", tx_msg_size);
-		d_dump(5, dev, tx_msg, tx_msg_size);
-		/* Yeah, we ignore errors ... not much we can do */
-		i2400mu_tx(i2400mu, tx_msg, tx_msg_size);
-		i2400m_tx_msg_sent(i2400m);	/* ack it, advance the FIFO */
-	}
-
-	spin_lock_irqsave(&i2400m->tx_lock, flags);
-	i2400mu->tx_kthread = NULL;
-	spin_unlock_irqrestore(&i2400m->tx_lock, flags);
-
-	d_fnend(4, dev, "(i2400mu %p)\n", i2400mu);
-	return 0;
-}
-
-
-/*
- * i2400m TX engine notifies us that there is data in the FIFO ready
- * for TX
- *
- * If there is a URB in flight, don't do anything; when it finishes,
- * it will see there is data in the FIFO and send it. Else, just
- * submit a write.
- */
-void i2400mu_bus_tx_kick(struct i2400m *i2400m)
-{
-	struct i2400mu *i2400mu = container_of(i2400m, struct i2400mu, i2400m);
-	struct device *dev = &i2400mu->usb_iface->dev;
-
-	d_fnstart(3, dev, "(i2400m %p) = void\n", i2400m);
-	wake_up_all(&i2400mu->tx_wq);
-	d_fnend(3, dev, "(i2400m %p) = void\n", i2400m);
-}
-
-
-int i2400mu_tx_setup(struct i2400mu *i2400mu)
-{
-	int result = 0;
-	struct i2400m *i2400m = &i2400mu->i2400m;
-	struct device *dev = &i2400mu->usb_iface->dev;
-	struct wimax_dev *wimax_dev = &i2400m->wimax_dev;
-	struct task_struct *kthread;
-
-	kthread = kthread_run(i2400mu_txd, i2400mu, "%s-tx",
-			      wimax_dev->name);
-	/* the kthread function sets i2400mu->tx_thread */
-	if (IS_ERR(kthread)) {
-		result = PTR_ERR(kthread);
-		dev_err(dev, "TX: cannot start thread: %d\n", result);
-	}
-	return result;
-}
-
-void i2400mu_tx_release(struct i2400mu *i2400mu)
-{
-	unsigned long flags;
-	struct i2400m *i2400m = &i2400mu->i2400m;
-	struct device *dev = i2400m_dev(i2400m);
-	struct task_struct *kthread;
-
-	spin_lock_irqsave(&i2400m->tx_lock, flags);
-	kthread = i2400mu->tx_kthread;
-	i2400mu->tx_kthread = NULL;
-	spin_unlock_irqrestore(&i2400m->tx_lock, flags);
-	if (kthread)
-		kthread_stop(kthread);
-	else
-		d_printf(1, dev, "TX: kthread had already exited\n");
-}
diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c
deleted file mode 100644
index b684e97ac976..000000000000
--- a/drivers/net/wimax/i2400m/usb.c
+++ /dev/null
@@ -1,764 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel Wireless WiMAX Connection 2400m
- * Linux driver model glue for USB device, reset & fw upload
- *
- * Copyright (C) 2007-2008 Intel Corporation <linux-wimax@intel.com>
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- * Yanir Lubetkin <yanirx.lubetkin@intel.com>
- *
- * See i2400m-usb.h for a general description of this driver.
- *
- * This file implements driver model glue, and hook ups for the
- * generic driver to implement the bus-specific functions (device
- * communication setup/tear down, firmware upload and resetting).
- *
- * ROADMAP
- *
- * i2400mu_probe()
- *   alloc_netdev()...
- *     i2400mu_netdev_setup()
- *       i2400mu_init()
- *       i2400m_netdev_setup()
- *   i2400m_setup()...
- *
- * i2400mu_disconnect
- *   i2400m_release()
- *   free_netdev()
- *
- * i2400mu_suspend()
- *   i2400m_cmd_enter_powersave()
- *   i2400mu_notification_release()
- *
- * i2400mu_resume()
- *   i2400mu_notification_setup()
- *
- * i2400mu_bus_dev_start()        Called by i2400m_dev_start() [who is
- *   i2400mu_tx_setup()           called by i2400m_setup()]
- *   i2400mu_rx_setup()
- *   i2400mu_notification_setup()
- *
- * i2400mu_bus_dev_stop()         Called by i2400m_dev_stop() [who is
- *   i2400mu_notification_release()  called by i2400m_release()]
- *   i2400mu_rx_release()
- *   i2400mu_tx_release()
- *
- * i2400mu_bus_reset()            Called by i2400m_reset
- *   __i2400mu_reset()
- *     __i2400mu_send_barker()
- *   usb_reset_device()
- */
-#include "i2400m-usb.h"
-#include <linux/wimax/i2400m.h>
-#include <linux/debugfs.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-
-
-#define D_SUBMODULE usb
-#include "usb-debug-levels.h"
-
-static char i2400mu_debug_params[128];
-module_param_string(debug, i2400mu_debug_params, sizeof(i2400mu_debug_params),
-		    0644);
-MODULE_PARM_DESC(debug,
-		 "String of space-separated NAME:VALUE pairs, where NAMEs "
-		 "are the different debug submodules and VALUE are the "
-		 "initial debug value to set.");
-
-/* Our firmware file name */
-static const char *i2400mu_bus_fw_names_5x50[] = {
-#define I2400MU_FW_FILE_NAME_v1_5 "i2400m-fw-usb-1.5.sbcf"
-	I2400MU_FW_FILE_NAME_v1_5,
-#define I2400MU_FW_FILE_NAME_v1_4 "i2400m-fw-usb-1.4.sbcf"
-	I2400MU_FW_FILE_NAME_v1_4,
-	NULL,
-};
-
-
-static const char *i2400mu_bus_fw_names_6050[] = {
-#define I6050U_FW_FILE_NAME_v1_5 "i6050-fw-usb-1.5.sbcf"
-	I6050U_FW_FILE_NAME_v1_5,
-	NULL,
-};
-
-
-static
-int i2400mu_bus_dev_start(struct i2400m *i2400m)
-{
-	int result;
-	struct i2400mu *i2400mu = container_of(i2400m, struct i2400mu, i2400m);
-	struct device *dev = &i2400mu->usb_iface->dev;
-
-	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
-	result = i2400mu_tx_setup(i2400mu);
-	if (result < 0)
-		goto error_usb_tx_setup;
-	result = i2400mu_rx_setup(i2400mu);
-	if (result < 0)
-		goto error_usb_rx_setup;
-	result = i2400mu_notification_setup(i2400mu);
-	if (result < 0)
-		goto error_notif_setup;
-	d_fnend(3, dev, "(i2400m %p) = %d\n", i2400m, result);
-	return result;
-
-error_notif_setup:
-	i2400mu_rx_release(i2400mu);
-error_usb_rx_setup:
-	i2400mu_tx_release(i2400mu);
-error_usb_tx_setup:
-	d_fnend(3, dev, "(i2400m %p) = void\n", i2400m);
-	return result;
-}
-
-
-static
-void i2400mu_bus_dev_stop(struct i2400m *i2400m)
-{
-	struct i2400mu *i2400mu = container_of(i2400m, struct i2400mu, i2400m);
-	struct device *dev = &i2400mu->usb_iface->dev;
-
-	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
-	i2400mu_notification_release(i2400mu);
-	i2400mu_rx_release(i2400mu);
-	i2400mu_tx_release(i2400mu);
-	d_fnend(3, dev, "(i2400m %p) = void\n", i2400m);
-}
-
-
-/*
- * Sends a barker buffer to the device
- *
- * This helper will allocate a kmalloced buffer and use it to transmit
- * (then free it). Reason for this is that other arches cannot use
- * stack/vmalloc/text areas for DMA transfers.
- *
- * Error recovery here is simpler: anything is considered a hard error
- * and will move the reset code to use a last-resort bus-based reset.
- */
-static
-int __i2400mu_send_barker(struct i2400mu *i2400mu,
-			  const __le32 *barker,
-			  size_t barker_size,
-			  unsigned endpoint)
-{
-	struct usb_endpoint_descriptor *epd = NULL;
-	int pipe, actual_len, ret;
-	struct device *dev = &i2400mu->usb_iface->dev;
-	void *buffer;
-	int do_autopm = 1;
-
-	ret = usb_autopm_get_interface(i2400mu->usb_iface);
-	if (ret < 0) {
-		dev_err(dev, "RESET: can't get autopm: %d\n", ret);
-		do_autopm = 0;
-	}
-	ret = -ENOMEM;
-	buffer = kmalloc(barker_size, GFP_KERNEL);
-	if (buffer == NULL)
-		goto error_kzalloc;
-	epd = usb_get_epd(i2400mu->usb_iface, endpoint);
-	pipe = usb_sndbulkpipe(i2400mu->usb_dev, epd->bEndpointAddress);
-	memcpy(buffer, barker, barker_size);
-retry:
-	ret = usb_bulk_msg(i2400mu->usb_dev, pipe, buffer, barker_size,
-			   &actual_len, 200);
-	switch (ret) {
-	case 0:
-		if (actual_len != barker_size) {	/* Too short? drop it */
-			dev_err(dev, "E: %s: short write (%d B vs %zu "
-				"expected)\n",
-				__func__, actual_len, barker_size);
-			ret = -EIO;
-		}
-		break;
-	case -EPIPE:
-		/*
-		 * Stall -- maybe the device is choking with our
-		 * requests. Clear it and give it some time. If they
-		 * happen to often, it might be another symptom, so we
-		 * reset.
-		 *
-		 * No error handling for usb_clear_halt(0; if it
-		 * works, the retry works; if it fails, this switch
-		 * does the error handling for us.
-		 */
-		if (edc_inc(&i2400mu->urb_edc,
-			    10 * EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME)) {
-			dev_err(dev, "E: %s: too many stalls in "
-				"URB; resetting device\n", __func__);
-			usb_queue_reset_device(i2400mu->usb_iface);
-			/* fallthrough */
-		} else {
-			usb_clear_halt(i2400mu->usb_dev, pipe);
-			msleep(10);	/* give the device some time */
-			goto retry;
-		}
-		fallthrough;
-	case -EINVAL:			/* while removing driver */
-	case -ENODEV:			/* dev disconnect ... */
-	case -ENOENT:			/* just ignore it */
-	case -ESHUTDOWN:		/* and exit */
-	case -ECONNRESET:
-		ret = -ESHUTDOWN;
-		break;
-	default:			/* Some error? */
-		if (edc_inc(&i2400mu->urb_edc,
-			    EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME)) {
-			dev_err(dev, "E: %s: maximum errors in URB "
-				"exceeded; resetting device\n",
-				__func__);
-			usb_queue_reset_device(i2400mu->usb_iface);
-		} else {
-			dev_warn(dev, "W: %s: cannot send URB: %d\n",
-				 __func__, ret);
-			goto retry;
-		}
-	}
-	kfree(buffer);
-error_kzalloc:
-	if (do_autopm)
-		usb_autopm_put_interface(i2400mu->usb_iface);
-	return ret;
-}
-
-
-/*
- * Reset a device at different levels (warm, cold or bus)
- *
- * @i2400m: device descriptor
- * @reset_type: soft, warm or bus reset (I2400M_RT_WARM/SOFT/BUS)
- *
- * Warm and cold resets get a USB reset if they fail.
- *
- * Warm reset:
- *
- * The device will be fully reset internally, but won't be
- * disconnected from the USB bus (so no reenumeration will
- * happen). Firmware upload will be necessary.
- *
- * The device will send a reboot barker in the notification endpoint
- * that will trigger the driver to reinitialize the state
- * automatically from notif.c:i2400m_notification_grok() into
- * i2400m_dev_bootstrap_delayed().
- *
- * Cold and bus (USB) reset:
- *
- * The device will be fully reset internally, disconnected from the
- * USB bus an a reenumeration will happen. Firmware upload will be
- * necessary. Thus, we don't do any locking or struct
- * reinitialization, as we are going to be fully disconnected and
- * reenumerated.
- *
- * Note we need to return -ENODEV if a warm reset was requested and we
- * had to resort to a bus reset. See i2400m_op_reset(), wimax_reset()
- * and wimax_dev->op_reset.
- *
- * WARNING: no driver state saved/fixed
- */
-static
-int i2400mu_bus_reset(struct i2400m *i2400m, enum i2400m_reset_type rt)
-{
-	int result;
-	struct i2400mu *i2400mu =
-		container_of(i2400m, struct i2400mu, i2400m);
-	struct device *dev = i2400m_dev(i2400m);
-	static const __le32 i2400m_WARM_BOOT_BARKER[4] = {
-		cpu_to_le32(I2400M_WARM_RESET_BARKER),
-		cpu_to_le32(I2400M_WARM_RESET_BARKER),
-		cpu_to_le32(I2400M_WARM_RESET_BARKER),
-		cpu_to_le32(I2400M_WARM_RESET_BARKER),
-	};
-	static const __le32 i2400m_COLD_BOOT_BARKER[4] = {
-		cpu_to_le32(I2400M_COLD_RESET_BARKER),
-		cpu_to_le32(I2400M_COLD_RESET_BARKER),
-		cpu_to_le32(I2400M_COLD_RESET_BARKER),
-		cpu_to_le32(I2400M_COLD_RESET_BARKER),
-	};
-
-	d_fnstart(3, dev, "(i2400m %p rt %u)\n", i2400m, rt);
-	if (rt == I2400M_RT_WARM)
-		result = __i2400mu_send_barker(
-			i2400mu, i2400m_WARM_BOOT_BARKER,
-			sizeof(i2400m_WARM_BOOT_BARKER),
-			i2400mu->endpoint_cfg.bulk_out);
-	else if (rt == I2400M_RT_COLD)
-		result = __i2400mu_send_barker(
-			i2400mu, i2400m_COLD_BOOT_BARKER,
-			sizeof(i2400m_COLD_BOOT_BARKER),
-			i2400mu->endpoint_cfg.reset_cold);
-	else if (rt == I2400M_RT_BUS) {
-		result = usb_reset_device(i2400mu->usb_dev);
-		switch (result) {
-		case 0:
-		case -EINVAL:	/* device is gone */
-		case -ENODEV:
-		case -ENOENT:
-		case -ESHUTDOWN:
-			result = 0;
-			break;	/* We assume the device is disconnected */
-		default:
-			dev_err(dev, "USB reset failed (%d), giving up!\n",
-				result);
-		}
-	} else {
-		result = -EINVAL;	/* shut gcc up in certain arches */
-		BUG();
-	}
-	if (result < 0
-	    && result != -EINVAL	/* device is gone */
-	    && rt != I2400M_RT_BUS) {
-		/*
-		 * Things failed -- resort to lower level reset, that
-		 * we queue in another context; the reason for this is
-		 * that the pre and post reset functionality requires
-		 * the i2400m->init_mutex; RT_WARM and RT_COLD can
-		 * come from areas where i2400m->init_mutex is taken.
-		 */
-		dev_err(dev, "%s reset failed (%d); trying USB reset\n",
-			rt == I2400M_RT_WARM ? "warm" : "cold", result);
-		usb_queue_reset_device(i2400mu->usb_iface);
-		result = -ENODEV;
-	}
-	d_fnend(3, dev, "(i2400m %p rt %u) = %d\n", i2400m, rt, result);
-	return result;
-}
-
-static void i2400mu_get_drvinfo(struct net_device *net_dev,
-                                struct ethtool_drvinfo *info)
-{
-	struct i2400m *i2400m = net_dev_to_i2400m(net_dev);
-	struct i2400mu *i2400mu = container_of(i2400m, struct i2400mu, i2400m);
-	struct usb_device *udev = i2400mu->usb_dev;
-
-	strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
-	strlcpy(info->fw_version, i2400m->fw_name ? : "",
-		sizeof(info->fw_version));
-	usb_make_path(udev, info->bus_info, sizeof(info->bus_info));
-}
-
-static const struct ethtool_ops i2400mu_ethtool_ops = {
-	.get_drvinfo = i2400mu_get_drvinfo,
-	.get_link = ethtool_op_get_link,
-};
-
-static
-void i2400mu_netdev_setup(struct net_device *net_dev)
-{
-	struct i2400m *i2400m = net_dev_to_i2400m(net_dev);
-	struct i2400mu *i2400mu = container_of(i2400m, struct i2400mu, i2400m);
-	i2400mu_init(i2400mu);
-	i2400m_netdev_setup(net_dev);
-	net_dev->ethtool_ops = &i2400mu_ethtool_ops;
-}
-
-
-/*
- * Debug levels control; see debug.h
- */
-struct d_level D_LEVEL[] = {
-	D_SUBMODULE_DEFINE(usb),
-	D_SUBMODULE_DEFINE(fw),
-	D_SUBMODULE_DEFINE(notif),
-	D_SUBMODULE_DEFINE(rx),
-	D_SUBMODULE_DEFINE(tx),
-};
-size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL);
-
-static
-void i2400mu_debugfs_add(struct i2400mu *i2400mu)
-{
-	struct dentry *dentry = i2400mu->i2400m.wimax_dev.debugfs_dentry;
-
-	dentry = debugfs_create_dir("i2400m-usb", dentry);
-	i2400mu->debugfs_dentry = dentry;
-
-	d_level_register_debugfs("dl_", usb, dentry);
-	d_level_register_debugfs("dl_", fw, dentry);
-	d_level_register_debugfs("dl_", notif, dentry);
-	d_level_register_debugfs("dl_", rx, dentry);
-	d_level_register_debugfs("dl_", tx, dentry);
-
-	/* Don't touch these if you don't know what you are doing */
-	debugfs_create_u8("rx_size_auto_shrink", 0600, dentry,
-			  &i2400mu->rx_size_auto_shrink);
-
-	debugfs_create_size_t("rx_size", 0600, dentry, &i2400mu->rx_size);
-}
-
-
-static struct device_type i2400mu_type = {
-	.name	= "wimax",
-};
-
-/*
- * Probe a i2400m interface and register it
- *
- * @iface:   USB interface to link to
- * @id:      USB class/subclass/protocol id
- * @returns: 0 if ok, < 0 errno code on error.
- *
- * Alloc a net device, initialize the bus-specific details and then
- * calls the bus-generic initialization routine. That will register
- * the wimax and netdev devices, upload the firmware [using
- * _bus_bm_*()], call _bus_dev_start() to finalize the setup of the
- * communication with the device and then will start to talk to it to
- * finnish setting it up.
- */
-static
-int i2400mu_probe(struct usb_interface *iface,
-		  const struct usb_device_id *id)
-{
-	int result;
-	struct net_device *net_dev;
-	struct device *dev = &iface->dev;
-	struct i2400m *i2400m;
-	struct i2400mu *i2400mu;
-	struct usb_device *usb_dev = interface_to_usbdev(iface);
-
-	if (iface->cur_altsetting->desc.bNumEndpoints < 4)
-		return -ENODEV;
-
-	if (usb_dev->speed != USB_SPEED_HIGH)
-		dev_err(dev, "device not connected as high speed\n");
-
-	/* Allocate instance [calls i2400m_netdev_setup() on it]. */
-	result = -ENOMEM;
-	net_dev = alloc_netdev(sizeof(*i2400mu), "wmx%d", NET_NAME_UNKNOWN,
-			       i2400mu_netdev_setup);
-	if (net_dev == NULL) {
-		dev_err(dev, "no memory for network device instance\n");
-		goto error_alloc_netdev;
-	}
-	SET_NETDEV_DEV(net_dev, dev);
-	SET_NETDEV_DEVTYPE(net_dev, &i2400mu_type);
-	i2400m = net_dev_to_i2400m(net_dev);
-	i2400mu = container_of(i2400m, struct i2400mu, i2400m);
-	i2400m->wimax_dev.net_dev = net_dev;
-	i2400mu->usb_dev = usb_get_dev(usb_dev);
-	i2400mu->usb_iface = iface;
-	usb_set_intfdata(iface, i2400mu);
-
-	i2400m->bus_tx_block_size = I2400MU_BLK_SIZE;
-	/*
-	 * Room required in the Tx queue for USB message to accommodate
-	 * a smallest payload while allocating header space is 16 bytes.
-	 * Adding this room  for the new tx message increases the
-	 * possibilities of including any payload with size <= 16 bytes.
-	 */
-	i2400m->bus_tx_room_min = I2400MU_BLK_SIZE;
-	i2400m->bus_pl_size_max = I2400MU_PL_SIZE_MAX;
-	i2400m->bus_setup = NULL;
-	i2400m->bus_dev_start = i2400mu_bus_dev_start;
-	i2400m->bus_dev_stop = i2400mu_bus_dev_stop;
-	i2400m->bus_release = NULL;
-	i2400m->bus_tx_kick = i2400mu_bus_tx_kick;
-	i2400m->bus_reset = i2400mu_bus_reset;
-	i2400m->bus_bm_retries = I2400M_USB_BOOT_RETRIES;
-	i2400m->bus_bm_cmd_send = i2400mu_bus_bm_cmd_send;
-	i2400m->bus_bm_wait_for_ack = i2400mu_bus_bm_wait_for_ack;
-	i2400m->bus_bm_mac_addr_impaired = 0;
-
-	switch (id->idProduct) {
-	case USB_DEVICE_ID_I6050:
-	case USB_DEVICE_ID_I6050_2:
-	case USB_DEVICE_ID_I6150:
-	case USB_DEVICE_ID_I6150_2:
-	case USB_DEVICE_ID_I6150_3:
-	case USB_DEVICE_ID_I6250:
-		i2400mu->i6050 = 1;
-		break;
-	default:
-		break;
-	}
-
-	if (i2400mu->i6050) {
-		i2400m->bus_fw_names = i2400mu_bus_fw_names_6050;
-		i2400mu->endpoint_cfg.bulk_out = 0;
-		i2400mu->endpoint_cfg.notification = 3;
-		i2400mu->endpoint_cfg.reset_cold = 2;
-		i2400mu->endpoint_cfg.bulk_in = 1;
-	} else {
-		i2400m->bus_fw_names = i2400mu_bus_fw_names_5x50;
-		i2400mu->endpoint_cfg.bulk_out = 0;
-		i2400mu->endpoint_cfg.notification = 1;
-		i2400mu->endpoint_cfg.reset_cold = 2;
-		i2400mu->endpoint_cfg.bulk_in = 3;
-	}
-#ifdef CONFIG_PM
-	iface->needs_remote_wakeup = 1;		/* autosuspend (15s delay) */
-	device_init_wakeup(dev, 1);
-	pm_runtime_set_autosuspend_delay(&usb_dev->dev, 15000);
-	usb_enable_autosuspend(usb_dev);
-#endif
-
-	result = i2400m_setup(i2400m, I2400M_BRI_MAC_REINIT);
-	if (result < 0) {
-		dev_err(dev, "cannot setup device: %d\n", result);
-		goto error_setup;
-	}
-	i2400mu_debugfs_add(i2400mu);
-	return 0;
-
-error_setup:
-	usb_set_intfdata(iface, NULL);
-	usb_put_dev(i2400mu->usb_dev);
-	free_netdev(net_dev);
-error_alloc_netdev:
-	return result;
-}
-
-
-/*
- * Disconnect a i2400m from the system.
- *
- * i2400m_stop() has been called before, so al the rx and tx contexts
- * have been taken down already. Make sure the queue is stopped,
- * unregister netdev and i2400m, free and kill.
- */
-static
-void i2400mu_disconnect(struct usb_interface *iface)
-{
-	struct i2400mu *i2400mu = usb_get_intfdata(iface);
-	struct i2400m *i2400m = &i2400mu->i2400m;
-	struct net_device *net_dev = i2400m->wimax_dev.net_dev;
-	struct device *dev = &iface->dev;
-
-	d_fnstart(3, dev, "(iface %p i2400m %p)\n", iface, i2400m);
-
-	debugfs_remove_recursive(i2400mu->debugfs_dentry);
-	i2400m_release(i2400m);
-	usb_set_intfdata(iface, NULL);
-	usb_put_dev(i2400mu->usb_dev);
-	free_netdev(net_dev);
-	d_fnend(3, dev, "(iface %p i2400m %p) = void\n", iface, i2400m);
-}
-
-
-/*
- * Get the device ready for USB port or system standby and hibernation
- *
- * USB port and system standby are handled the same.
- *
- * When the system hibernates, the USB device is powered down and then
- * up, so we don't really have to do much here, as it will be seen as
- * a reconnect. Still for simplicity we consider this case the same as
- * suspend, so that the device has a chance to do notify the base
- * station (if connected).
- *
- * So at the end, the three cases require common handling.
- *
- * If at the time of this call the device's firmware is not loaded,
- * nothing has to be done. Note we can be "loose" about not reading
- * i2400m->updown under i2400m->init_mutex. If it happens to change
- * inmediately, other parts of the call flow will fail and effectively
- * catch it.
- *
- * If the firmware is loaded, we need to:
- *
- *  - tell the device to go into host interface power save mode, wait
- *    for it to ack
- *
- *    This is quite more interesting than it is; we need to execute a
- *    command, but this time, we don't want the code in usb-{tx,rx}.c
- *    to call the usb_autopm_get/put_interface() barriers as it'd
- *    deadlock, so we need to decrement i2400mu->do_autopm, that acts
- *    as a poor man's semaphore. Ugly, but it works.
- *
- *    As well, the device might refuse going to sleep for whichever
- *    reason. In this case we just fail. For system suspend/hibernate,
- *    we *can't* fail. We check PMSG_IS_AUTO to see if the
- *    suspend call comes from the USB stack or from the system and act
- *    in consequence.
- *
- *  - stop the notification endpoint polling
- */
-static
-int i2400mu_suspend(struct usb_interface *iface, pm_message_t pm_msg)
-{
-	int result = 0;
-	struct device *dev = &iface->dev;
-	struct i2400mu *i2400mu = usb_get_intfdata(iface);
-	unsigned is_autosuspend = 0;
-	struct i2400m *i2400m = &i2400mu->i2400m;
-
-#ifdef CONFIG_PM
-	if (PMSG_IS_AUTO(pm_msg))
-		is_autosuspend = 1;
-#endif
-
-	d_fnstart(3, dev, "(iface %p pm_msg %u)\n", iface, pm_msg.event);
-	rmb();		/* see i2400m->updown's documentation  */
-	if (i2400m->updown == 0)
-		goto no_firmware;
-	if (i2400m->state == I2400M_SS_DATA_PATH_CONNECTED && is_autosuspend) {
-		/* ugh -- the device is connected and this suspend
-		 * request is an autosuspend one (not a system standby
-		 * / hibernate).
-		 *
-		 * The only way the device can go to standby is if the
-		 * link with the base station is in IDLE mode; that
-		 * were the case, we'd be in status
-		 * I2400M_SS_CONNECTED_IDLE. But we are not.
-		 *
-		 * If we *tell* him to go power save now, it'll reset
-		 * as a precautionary measure, so if this is an
-		 * autosuspend thing, say no and it'll come back
-		 * later, when the link is IDLE
-		 */
-		result = -EBADF;
-		d_printf(1, dev, "fw up, link up, not-idle, autosuspend: "
-			 "not entering powersave\n");
-		goto error_not_now;
-	}
-	d_printf(1, dev, "fw up: entering powersave\n");
-	atomic_dec(&i2400mu->do_autopm);
-	result = i2400m_cmd_enter_powersave(i2400m);
-	atomic_inc(&i2400mu->do_autopm);
-	if (result < 0 && !is_autosuspend) {
-		/* System suspend, can't fail */
-		dev_err(dev, "failed to suspend, will reset on resume\n");
-		result = 0;
-	}
-	if (result < 0)
-		goto error_enter_powersave;
-	i2400mu_notification_release(i2400mu);
-	d_printf(1, dev, "powersave requested\n");
-error_enter_powersave:
-error_not_now:
-no_firmware:
-	d_fnend(3, dev, "(iface %p pm_msg %u) = %d\n",
-		iface, pm_msg.event, result);
-	return result;
-}
-
-
-static
-int i2400mu_resume(struct usb_interface *iface)
-{
-	int ret = 0;
-	struct device *dev = &iface->dev;
-	struct i2400mu *i2400mu = usb_get_intfdata(iface);
-	struct i2400m *i2400m = &i2400mu->i2400m;
-
-	d_fnstart(3, dev, "(iface %p)\n", iface);
-	rmb();		/* see i2400m->updown's documentation  */
-	if (i2400m->updown == 0) {
-		d_printf(1, dev, "fw was down, no resume needed\n");
-		goto out;
-	}
-	d_printf(1, dev, "fw was up, resuming\n");
-	i2400mu_notification_setup(i2400mu);
-	/* USB has flow control, so we don't need to give it time to
-	 * come back; otherwise, we'd use something like a get-state
-	 * command... */
-out:
-	d_fnend(3, dev, "(iface %p) = %d\n", iface, ret);
-	return ret;
-}
-
-
-static
-int i2400mu_reset_resume(struct usb_interface *iface)
-{
-	int result;
-	struct device *dev = &iface->dev;
-	struct i2400mu *i2400mu = usb_get_intfdata(iface);
-	struct i2400m *i2400m = &i2400mu->i2400m;
-
-	d_fnstart(3, dev, "(iface %p)\n", iface);
-	result = i2400m_dev_reset_handle(i2400m, "device reset on resume");
-	d_fnend(3, dev, "(iface %p) = %d\n", iface, result);
-	return result < 0 ? result : 0;
-}
-
-
-/*
- * Another driver or user space is triggering a reset on the device
- * which contains the interface passed as an argument. Cease IO and
- * save any device state you need to restore.
- *
- * If you need to allocate memory here, use GFP_NOIO or GFP_ATOMIC, if
- * you are in atomic context.
- */
-static
-int i2400mu_pre_reset(struct usb_interface *iface)
-{
-	struct i2400mu *i2400mu = usb_get_intfdata(iface);
-	return i2400m_pre_reset(&i2400mu->i2400m);
-}
-
-
-/*
- * The reset has completed.  Restore any saved device state and begin
- * using the device again.
- *
- * If you need to allocate memory here, use GFP_NOIO or GFP_ATOMIC, if
- * you are in atomic context.
- */
-static
-int i2400mu_post_reset(struct usb_interface *iface)
-{
-	struct i2400mu *i2400mu = usb_get_intfdata(iface);
-	return i2400m_post_reset(&i2400mu->i2400m);
-}
-
-
-static
-struct usb_device_id i2400mu_id_table[] = {
-	{ USB_DEVICE(0x8086, USB_DEVICE_ID_I6050) },
-	{ USB_DEVICE(0x8086, USB_DEVICE_ID_I6050_2) },
-	{ USB_DEVICE(0x8087, USB_DEVICE_ID_I6150) },
-	{ USB_DEVICE(0x8087, USB_DEVICE_ID_I6150_2) },
-	{ USB_DEVICE(0x8087, USB_DEVICE_ID_I6150_3) },
-	{ USB_DEVICE(0x8086, USB_DEVICE_ID_I6250) },
-	{ USB_DEVICE(0x8086, 0x0181) },
-	{ USB_DEVICE(0x8086, 0x1403) },
-	{ USB_DEVICE(0x8086, 0x1405) },
-	{ USB_DEVICE(0x8086, 0x0180) },
-	{ USB_DEVICE(0x8086, 0x0182) },
-	{ USB_DEVICE(0x8086, 0x1406) },
-	{ USB_DEVICE(0x8086, 0x1403) },
-	{ },
-};
-MODULE_DEVICE_TABLE(usb, i2400mu_id_table);
-
-
-static
-struct usb_driver i2400mu_driver = {
-	.name = KBUILD_MODNAME,
-	.suspend = i2400mu_suspend,
-	.resume = i2400mu_resume,
-	.reset_resume = i2400mu_reset_resume,
-	.probe = i2400mu_probe,
-	.disconnect = i2400mu_disconnect,
-	.pre_reset = i2400mu_pre_reset,
-	.post_reset = i2400mu_post_reset,
-	.id_table = i2400mu_id_table,
-	.supports_autosuspend = 1,
-};
-
-static
-int __init i2400mu_driver_init(void)
-{
-	d_parse_params(D_LEVEL, D_LEVEL_SIZE, i2400mu_debug_params,
-		       "i2400m_usb.debug");
-	return usb_register(&i2400mu_driver);
-}
-module_init(i2400mu_driver_init);
-
-
-static
-void __exit i2400mu_driver_exit(void)
-{
-	usb_deregister(&i2400mu_driver);
-}
-module_exit(i2400mu_driver_exit);
-
-MODULE_AUTHOR("Intel Corporation <linux-wimax@intel.com>");
-MODULE_DESCRIPTION("Driver for USB based Intel Wireless WiMAX Connection 2400M "
-		   "(5x50 & 6050)");
-MODULE_LICENSE("GPL");
-MODULE_FIRMWARE(I2400MU_FW_FILE_NAME_v1_5);
-MODULE_FIRMWARE(I6050U_FW_FILE_NAME_v1_5);
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 2d0310448eba..443ca3f3cdf0 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -114,6 +114,8 @@ source "drivers/staging/kpc2000/Kconfig"
 
 source "drivers/staging/qlge/Kconfig"
 
+source "drivers/staging/wimax/Kconfig"
+
 source "drivers/staging/wfx/Kconfig"
 
 source "drivers/staging/hikey9xx/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index 757a892ab5b9..dc45128ef525 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -47,5 +47,6 @@ obj-$(CONFIG_XIL_AXIS_FIFO)	+= axis-fifo/
 obj-$(CONFIG_FIELDBUS_DEV)     += fieldbus/
 obj-$(CONFIG_KPC2000)		+= kpc2000/
 obj-$(CONFIG_QLGE)		+= qlge/
+obj-$(CONFIG_WIMAX)		+= wimax/
 obj-$(CONFIG_WFX)		+= wfx/
 obj-y				+= hikey9xx/
diff --git a/drivers/staging/wimax/Documentation/i2400m.rst b/drivers/staging/wimax/Documentation/i2400m.rst
new file mode 100644
index 000000000000..194388c0c351
--- /dev/null
+++ b/drivers/staging/wimax/Documentation/i2400m.rst
@@ -0,0 +1,283 @@
+.. include:: <isonum.txt>
+
+====================================================
+Driver for the Intel Wireless Wimax Connection 2400m
+====================================================
+
+:Copyright: |copy| 2008 Intel Corporation < linux-wimax@intel.com >
+
+   This provides a driver for the Intel Wireless WiMAX Connection 2400m
+   and a basic Linux kernel WiMAX stack.
+
+1. Requirements
+===============
+
+     * Linux installation with Linux kernel 2.6.22 or newer (if building
+       from a separate tree)
+     * Intel i2400m Echo Peak or Baxter Peak; this includes the Intel
+       Wireless WiMAX/WiFi Link 5x50 series.
+     * build tools:
+
+          + Linux kernel development package for the target kernel; to
+            build against your currently running kernel, you need to have
+            the kernel development package corresponding to the running
+            image installed (usually if your kernel is named
+            linux-VERSION, the development package is called
+            linux-dev-VERSION or linux-headers-VERSION).
+          + GNU C Compiler, make
+
+2. Compilation and installation
+===============================
+
+2.1. Compilation of the drivers included in the kernel
+------------------------------------------------------
+
+   Configure the kernel; to enable the WiMAX drivers select Drivers >
+   Networking Drivers > WiMAX device support. Enable all of them as
+   modules (easier).
+
+   If USB or SDIO are not enabled in the kernel configuration, the options
+   to build the i2400m USB or SDIO drivers will not show. Enable said
+   subsystems and go back to the WiMAX menu to enable the drivers.
+
+   Compile and install your kernel as usual.
+
+2.2. Compilation of the drivers distributed as an standalone module
+-------------------------------------------------------------------
+
+   To compile::
+
+	$ cd source/directory
+	$ make
+
+   Once built you can load and unload using the provided load.sh script;
+   load.sh will load the modules, load.sh u will unload them.
+
+   To install in the default kernel directories (and enable auto loading
+   when the device is plugged)::
+
+	$ make install
+	$ depmod -a
+
+   If your kernel development files are located in a non standard
+   directory or if you want to build for a kernel that is not the
+   currently running one, set KDIR to the right location::
+
+	$ make KDIR=/path/to/kernel/dev/tree
+
+   For more information, please contact linux-wimax@intel.com.
+
+3. Installing the firmware
+--------------------------
+
+   The firmware can be obtained from http://linuxwimax.org or might have
+   been supplied with your hardware.
+
+   It has to be installed in the target system::
+
+	$ cp FIRMWAREFILE.sbcf /lib/firmware/i2400m-fw-BUSTYPE-1.3.sbcf
+
+     * NOTE: if your firmware came in an .rpm or .deb file, just install
+       it as normal, with the rpm (rpm -i FIRMWARE.rpm) or dpkg
+       (dpkg -i FIRMWARE.deb) commands. No further action is needed.
+     * BUSTYPE will be usb or sdio, depending on the hardware you have.
+       Each hardware type comes with its own firmware and will not work
+       with other types.
+
+4. Design
+=========
+
+   This package contains two major parts: a WiMAX kernel stack and a
+   driver for the Intel i2400m.
+
+   The WiMAX stack is designed to provide for common WiMAX control
+   services to current and future WiMAX devices from any vendor; please
+   see README.wimax for details.
+
+   The i2400m kernel driver is broken up in two main parts: the bus
+   generic driver and the bus-specific drivers. The bus generic driver
+   forms the drivercore and contain no knowledge of the actual method we
+   use to connect to the device. The bus specific drivers are just the
+   glue to connect the bus-generic driver and the device. Currently only
+   USB and SDIO are supported. See drivers/net/wimax/i2400m/i2400m.h for
+   more information.
+
+   The bus generic driver is logically broken up in two parts: OS-glue and
+   hardware-glue. The OS-glue interfaces with Linux. The hardware-glue
+   interfaces with the device on using an interface provided by the
+   bus-specific driver. The reason for this breakup is to be able to
+   easily reuse the hardware-glue to write drivers for other OSes; note
+   the hardware glue part is written as a native Linux driver; no
+   abstraction layers are used, so to port to another OS, the Linux kernel
+   API calls should be replaced with the target OS's.
+
+5. Usage
+========
+
+   To load the driver, follow the instructions in the install section;
+   once the driver is loaded, plug in the device (unless it is permanently
+   plugged in). The driver will enumerate the device, upload the firmware
+   and output messages in the kernel log (dmesg, /var/log/messages or
+   /var/log/kern.log) such as::
+
+	...
+	i2400m_usb 5-4:1.0: firmware interface version 8.0.0
+	i2400m_usb 5-4:1.0: WiMAX interface wmx0 (00:1d:e1:01:94:2c) ready
+
+   At this point the device is ready to work.
+
+   Current versions require the Intel WiMAX Network Service in userspace
+   to make things work. See the network service's README for instructions
+   on how to scan, connect and disconnect.
+
+5.1. Module parameters
+----------------------
+
+   Module parameters can be set at kernel or module load time or by
+   echoing values::
+
+	$ echo VALUE > /sys/module/MODULENAME/parameters/PARAMETERNAME
+
+   To make changes permanent, for example, for the i2400m module, you can
+   also create a file named /etc/modprobe.d/i2400m containing::
+
+	options i2400m idle_mode_disabled=1
+
+   To find which parameters are supported by a module, run::
+
+	$ modinfo path/to/module.ko
+
+   During kernel bootup (if the driver is linked in the kernel), specify
+   the following to the kernel command line::
+
+	i2400m.PARAMETER=VALUE
+
+5.1.1. i2400m: idle_mode_disabled
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   The i2400m module supports a parameter to disable idle mode. This
+   parameter, once set, will take effect only when the device is
+   reinitialized by the driver (eg: following a reset or a reconnect).
+
+5.2. Debug operations: debugfs entries
+--------------------------------------
+
+   The driver will register debugfs entries that allow the user to tweak
+   debug settings. There are three main container directories where
+   entries are placed, which correspond to the three blocks a i2400m WiMAX
+   driver has:
+
+     * /sys/kernel/debug/wimax:DEVNAME/ for the generic WiMAX stack
+       controls
+     * /sys/kernel/debug/wimax:DEVNAME/i2400m for the i2400m generic
+       driver controls
+     * /sys/kernel/debug/wimax:DEVNAME/i2400m-usb (or -sdio) for the
+       bus-specific i2400m-usb or i2400m-sdio controls).
+
+   Of course, if debugfs is mounted in a directory other than
+   /sys/kernel/debug, those paths will change.
+
+5.2.1. Increasing debug output
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   The files named *dl_* indicate knobs for controlling the debug output
+   of different submodules::
+
+	# find /sys/kernel/debug/wimax\:wmx0 -name \*dl_\*
+	/sys/kernel/debug/wimax:wmx0/i2400m-usb/dl_tx
+	/sys/kernel/debug/wimax:wmx0/i2400m-usb/dl_rx
+	/sys/kernel/debug/wimax:wmx0/i2400m-usb/dl_notif
+	/sys/kernel/debug/wimax:wmx0/i2400m-usb/dl_fw
+	/sys/kernel/debug/wimax:wmx0/i2400m-usb/dl_usb
+	/sys/kernel/debug/wimax:wmx0/i2400m/dl_tx
+	/sys/kernel/debug/wimax:wmx0/i2400m/dl_rx
+	/sys/kernel/debug/wimax:wmx0/i2400m/dl_rfkill
+	/sys/kernel/debug/wimax:wmx0/i2400m/dl_netdev
+	/sys/kernel/debug/wimax:wmx0/i2400m/dl_fw
+	/sys/kernel/debug/wimax:wmx0/i2400m/dl_debugfs
+	/sys/kernel/debug/wimax:wmx0/i2400m/dl_driver
+	/sys/kernel/debug/wimax:wmx0/i2400m/dl_control
+	/sys/kernel/debug/wimax:wmx0/wimax_dl_stack
+	/sys/kernel/debug/wimax:wmx0/wimax_dl_op_rfkill
+	/sys/kernel/debug/wimax:wmx0/wimax_dl_op_reset
+	/sys/kernel/debug/wimax:wmx0/wimax_dl_op_msg
+	/sys/kernel/debug/wimax:wmx0/wimax_dl_id_table
+	/sys/kernel/debug/wimax:wmx0/wimax_dl_debugfs
+
+   By reading the file you can obtain the current value of said debug
+   level; by writing to it, you can set it.
+
+   To increase the debug level of, for example, the i2400m's generic TX
+   engine, just write::
+
+	$ echo 3 > /sys/kernel/debug/wimax:wmx0/i2400m/dl_tx
+
+   Increasing numbers yield increasing debug information; for details of
+   what is printed and the available levels, check the source. The code
+   uses 0 for disabled and increasing values until 8.
+
+5.2.2. RX and TX statistics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   The i2400m/rx_stats and i2400m/tx_stats provide statistics about the
+   data reception/delivery from the device::
+
+	$ cat /sys/kernel/debug/wimax:wmx0/i2400m/rx_stats
+	45 1 3 34 3104 48 480
+
+   The numbers reported are:
+
+     * packets/RX-buffer: total, min, max
+     * RX-buffers: total RX buffers received, accumulated RX buffer size
+       in bytes, min size received, max size received
+
+   Thus, to find the average buffer size received, divide accumulated
+   RX-buffer / total RX-buffers.
+
+   To clear the statistics back to 0, write anything to the rx_stats file::
+
+	$ echo 1 > /sys/kernel/debug/wimax:wmx0/i2400m_rx_stats
+
+   Likewise for TX.
+
+   Note the packets this debug file refers to are not network packet, but
+   packets in the sense of the device-specific protocol for communication
+   to the host. See drivers/net/wimax/i2400m/tx.c.
+
+5.2.3. Tracing messages received from user space
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   To echo messages received from user space into the trace pipe that the
+   i2400m driver creates, set the debug file i2400m/trace_msg_from_user to
+   1::
+
+	$ echo 1 > /sys/kernel/debug/wimax:wmx0/i2400m/trace_msg_from_user
+
+5.2.4. Performing a device reset
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   By writing a 0, a 1 or a 2 to the file
+   /sys/kernel/debug/wimax:wmx0/reset, the driver performs a warm (without
+   disconnecting from the bus), cold (disconnecting from the bus) or bus
+   (bus specific) reset on the device.
+
+5.2.5. Asking the device to enter power saving mode
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   By writing any value to the /sys/kernel/debug/wimax:wmx0 file, the
+   device will attempt to enter power saving mode.
+
+6. Troubleshooting
+==================
+
+6.1. Driver complains about ``i2400m-fw-usb-1.2.sbcf: request failed``
+----------------------------------------------------------------------
+
+   If upon connecting the device, the following is output in the kernel
+   log::
+
+	i2400m_usb 5-4:1.0: fw i2400m-fw-usb-1.3.sbcf: request failed: -2
+
+   This means that the driver cannot locate the firmware file named
+   /lib/firmware/i2400m-fw-usb-1.2.sbcf. Check that the file is present in
+   the right location.
diff --git a/drivers/staging/wimax/Documentation/index.rst b/drivers/staging/wimax/Documentation/index.rst
new file mode 100644
index 000000000000..fdf7c1f99ff5
--- /dev/null
+++ b/drivers/staging/wimax/Documentation/index.rst
@@ -0,0 +1,19 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+WiMAX subsystem
+===============
+
+.. toctree::
+   :maxdepth: 2
+
+   wimax
+
+   i2400m
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/drivers/staging/wimax/Documentation/wimax.rst b/drivers/staging/wimax/Documentation/wimax.rst
new file mode 100644
index 000000000000..817ee8ba2732
--- /dev/null
+++ b/drivers/staging/wimax/Documentation/wimax.rst
@@ -0,0 +1,89 @@
+.. include:: <isonum.txt>
+
+========================
+Linux kernel WiMAX stack
+========================
+
+:Copyright: |copy| 2008 Intel Corporation < linux-wimax@intel.com >
+
+   This provides a basic Linux kernel WiMAX stack to provide a common
+   control API for WiMAX devices, usable from kernel and user space.
+
+1. Design
+=========
+
+   The WiMAX stack is designed to provide for common WiMAX control
+   services to current and future WiMAX devices from any vendor.
+
+   Because currently there is only one and we don't know what would be the
+   common services, the APIs it currently provides are very minimal.
+   However, it is done in such a way that it is easily extensible to
+   accommodate future requirements.
+
+   The stack works by embedding a struct wimax_dev in your device's
+   control structures. This provides a set of callbacks that the WiMAX
+   stack will call in order to implement control operations requested by
+   the user. As well, the stack provides API functions that the driver
+   calls to notify about changes of state in the device.
+
+   The stack exports the API calls needed to control the device to user
+   space using generic netlink as a marshalling mechanism. You can access
+   them using your own code or use the wrappers provided for your
+   convenience in libwimax (in the wimax-tools package).
+
+   For detailed information on the stack, please see
+   include/linux/wimax.h.
+
+2. Usage
+========
+
+   For usage in a driver (registration, API, etc) please refer to the
+   instructions in the header file include/linux/wimax.h.
+
+   When a device is registered with the WiMAX stack, a set of debugfs
+   files will appear in /sys/kernel/debug/wimax:wmxX can tweak for
+   control.
+
+2.1. Obtaining debug information: debugfs entries
+-------------------------------------------------
+
+   The WiMAX stack is compiled, by default, with debug messages that can
+   be used to diagnose issues. By default, said messages are disabled.
+
+   The drivers will register debugfs entries that allow the user to tweak
+   debug settings.
+
+   Each driver, when registering with the stack, will cause a debugfs
+   directory named wimax:DEVICENAME to be created; optionally, it might
+   create more subentries below it.
+
+2.1.1. Increasing debug output
+------------------------------
+
+   The files named *dl_* indicate knobs for controlling the debug output
+   of different submodules of the WiMAX stack::
+
+	# find /sys/kernel/debug/wimax\:wmx0 -name \*dl_\*
+	/sys/kernel/debug/wimax:wmx0/wimax_dl_stack
+	/sys/kernel/debug/wimax:wmx0/wimax_dl_op_rfkill
+	/sys/kernel/debug/wimax:wmx0/wimax_dl_op_reset
+	/sys/kernel/debug/wimax:wmx0/wimax_dl_op_msg
+	/sys/kernel/debug/wimax:wmx0/wimax_dl_id_table
+	/sys/kernel/debug/wimax:wmx0/wimax_dl_debugfs
+	/sys/kernel/debug/wimax:wmx0/.... # other driver specific files
+
+   NOTE:
+       Of course, if debugfs is mounted in a directory other than
+       /sys/kernel/debug, those paths will change.
+
+   By reading the file you can obtain the current value of said debug
+   level; by writing to it, you can set it.
+
+   To increase the debug level of, for example, the id-table submodule,
+   just write:
+
+	$ echo 3 > /sys/kernel/debug/wimax:wmx0/wimax_dl_id_table
+
+   Increasing numbers yield increasing debug information; for details of
+   what is printed and the available levels, check the source. The code
+   uses 0 for disabled and increasing values until 8.
diff --git a/drivers/staging/wimax/Kconfig b/drivers/staging/wimax/Kconfig
new file mode 100644
index 000000000000..ded8b70b25ee
--- /dev/null
+++ b/drivers/staging/wimax/Kconfig
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# WiMAX LAN device configuration
+#
+
+menuconfig WIMAX
+	tristate "WiMAX Wireless Broadband support"
+	depends on RFKILL || !RFKILL
+	help
+
+	  Select to configure support for devices that provide
+	  wireless broadband connectivity using the WiMAX protocol
+	  (IEEE 802.16).
+
+	  Please note that most of these devices require signing up
+	  for a service plan with a provider.
+
+	  The different WiMAX drivers can be enabled in the menu entry
+
+	  Device Drivers > Network device support > WiMAX Wireless
+	  Broadband devices
+
+	  If unsure, it is safe to select M (module).
+
+if WIMAX
+
+config WIMAX_DEBUG_LEVEL
+	int "WiMAX debug level"
+	depends on WIMAX
+	default 8
+	help
+
+	  Select the maximum debug verbosity level to be compiled into
+	  the WiMAX stack code.
+
+	  By default, debug messages are disabled at runtime and can
+	  be selectively enabled for different parts of the code using
+	  the sysfs debug-levels file.
+
+	  If set at zero, this will compile out all the debug code.
+
+	  It is recommended that it is left at 8.
+
+source "drivers/staging/wimax/i2400m/Kconfig"
+
+endif
diff --git a/drivers/staging/wimax/Makefile b/drivers/staging/wimax/Makefile
new file mode 100644
index 000000000000..0e3f988656aa
--- /dev/null
+++ b/drivers/staging/wimax/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_WIMAX)		+= wimax.o
+
+wimax-y :=		\
+	id-table.o	\
+	op-msg.o	\
+	op-reset.o	\
+	op-rfkill.o	\
+	op-state-get.o	\
+	stack.o
+
+wimax-$(CONFIG_DEBUG_FS) += debugfs.o
+
+obj-$(CONFIG_WIMAX_I2400M)	+= i2400m/
diff --git a/drivers/staging/wimax/TODO b/drivers/staging/wimax/TODO
new file mode 100644
index 000000000000..26e4cb9e9599
--- /dev/null
+++ b/drivers/staging/wimax/TODO
@@ -0,0 +1,18 @@
+There are no known users of this driver as of October 2020, and it will
+be removed unless someone turns out to still need it in future releases.
+
+According to https://en.wikipedia.org/wiki/List_of_WiMAX_networks, there
+have been many public wimax networks, but it appears that many of these
+have migrated to LTE or discontinued their service altogether.  As most
+PCs and phones lack WiMAX hardware support, the remaining networks tend
+to use standalone routers. These almost certainly run Linux, but not a
+modern kernel or the mainline wimax driver stack.
+
+NetworkManager appears to have dropped userspace support in 2015
+https://bugzilla.gnome.org/show_bug.cgi?id=747846, the www.linuxwimax.org
+site had already shut down earlier.
+
+WiMax is apparently still being deployed on airport campus networks
+("AeroMACS"), but in a frequency band that was not supported by the old
+Intel 2400m (used in Sandy Bridge laptops and earlier), which is the
+only driver using the kernel's wimax stack.
diff --git a/drivers/staging/wimax/debug-levels.h b/drivers/staging/wimax/debug-levels.h
new file mode 100644
index 000000000000..b854802d1d00
--- /dev/null
+++ b/drivers/staging/wimax/debug-levels.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Linux WiMAX Stack
+ * Debug levels control file for the wimax module
+ *
+ * Copyright (C) 2007-2008 Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ */
+#ifndef __debug_levels__h__
+#define __debug_levels__h__
+
+/* Maximum compile and run time debug level for all submodules */
+#define D_MODULENAME wimax
+#define D_MASTER CONFIG_WIMAX_DEBUG_LEVEL
+
+#include "linux-wimax-debug.h"
+
+/* List of all the enabled modules */
+enum d_module {
+	D_SUBMODULE_DECLARE(debugfs),
+	D_SUBMODULE_DECLARE(id_table),
+	D_SUBMODULE_DECLARE(op_msg),
+	D_SUBMODULE_DECLARE(op_reset),
+	D_SUBMODULE_DECLARE(op_rfkill),
+	D_SUBMODULE_DECLARE(op_state_get),
+	D_SUBMODULE_DECLARE(stack),
+};
+
+#endif /* #ifndef __debug_levels__h__ */
diff --git a/drivers/staging/wimax/debugfs.c b/drivers/staging/wimax/debugfs.c
new file mode 100644
index 000000000000..e11bff61ffcf
--- /dev/null
+++ b/drivers/staging/wimax/debugfs.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Linux WiMAX
+ * Debugfs support
+ *
+ * Copyright (C) 2005-2006 Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ */
+#include <linux/debugfs.h>
+#include "linux-wimax.h"
+#include "wimax-internal.h"
+
+#define D_SUBMODULE debugfs
+#include "debug-levels.h"
+
+void wimax_debugfs_add(struct wimax_dev *wimax_dev)
+{
+	struct net_device *net_dev = wimax_dev->net_dev;
+	struct dentry *dentry;
+	char buf[128];
+
+	snprintf(buf, sizeof(buf), "wimax:%s", net_dev->name);
+	dentry = debugfs_create_dir(buf, NULL);
+	wimax_dev->debugfs_dentry = dentry;
+
+	d_level_register_debugfs("wimax_dl_", debugfs, dentry);
+	d_level_register_debugfs("wimax_dl_", id_table, dentry);
+	d_level_register_debugfs("wimax_dl_", op_msg, dentry);
+	d_level_register_debugfs("wimax_dl_", op_reset, dentry);
+	d_level_register_debugfs("wimax_dl_", op_rfkill, dentry);
+	d_level_register_debugfs("wimax_dl_", op_state_get, dentry);
+	d_level_register_debugfs("wimax_dl_", stack, dentry);
+}
+
+void wimax_debugfs_rm(struct wimax_dev *wimax_dev)
+{
+	debugfs_remove_recursive(wimax_dev->debugfs_dentry);
+}
diff --git a/drivers/staging/wimax/i2400m/Kconfig b/drivers/staging/wimax/i2400m/Kconfig
new file mode 100644
index 000000000000..843b905a26a3
--- /dev/null
+++ b/drivers/staging/wimax/i2400m/Kconfig
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config WIMAX_I2400M
+	tristate
+	depends on WIMAX
+	select FW_LOADER
+
+comment "Enable USB support to see WiMAX USB drivers"
+	depends on USB = n
+
+config WIMAX_I2400M_USB
+	tristate "Intel Wireless WiMAX Connection 2400 over USB (including 5x50)"
+	depends on WIMAX && USB
+	select WIMAX_I2400M
+	help
+	  Select if you have a device based on the Intel WiMAX
+	  Connection 2400 over USB (like any of the Intel Wireless
+	  WiMAX/WiFi Link 5x50 series).
+
+	  If unsure, it is safe to select M (module).
+
+config WIMAX_I2400M_DEBUG_LEVEL
+	int "WiMAX i2400m debug level"
+	depends on WIMAX_I2400M
+	default 8
+	help
+
+	  Select the maximum debug verbosity level to be compiled into
+	  the WiMAX i2400m driver code.
+
+	  By default, this is disabled at runtime and can be
+	  selectively enabled at runtime for different parts of the
+	  code using the sysfs debug-levels file.
+
+	  If set at zero, this will compile out all the debug code.
+
+	  It is recommended that it is left at 8.
diff --git a/drivers/staging/wimax/i2400m/Makefile b/drivers/staging/wimax/i2400m/Makefile
new file mode 100644
index 000000000000..b1db1eff0648
--- /dev/null
+++ b/drivers/staging/wimax/i2400m/Makefile
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_WIMAX_I2400M) += i2400m.o
+obj-$(CONFIG_WIMAX_I2400M_USB) += i2400m-usb.o
+
+i2400m-y :=		\
+	control.o	\
+	driver.o	\
+	fw.o		\
+	op-rfkill.o	\
+	sysfs.o		\
+	netdev.o	\
+	tx.o		\
+	rx.o
+
+i2400m-$(CONFIG_DEBUG_FS) += debugfs.o
+
+i2400m-usb-y :=			\
+	usb-fw.o		\
+	usb-notif.o		\
+	usb-tx.o		\
+	usb-rx.o		\
+	usb.o
diff --git a/drivers/staging/wimax/i2400m/control.c b/drivers/staging/wimax/i2400m/control.c
new file mode 100644
index 000000000000..fe885aa56cf3
--- /dev/null
+++ b/drivers/staging/wimax/i2400m/control.c
@@ -0,0 +1,1434 @@
+/*
+ * Intel Wireless WiMAX Connection 2400m
+ * Miscellaneous control functions for managing the device
+ *
+ *
+ * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *  - Initial implementation
+ *
+ * This is a collection of functions used to control the device (plus
+ * a few helpers).
+ *
+ * There are utilities for handling TLV buffers, hooks on the device's
+ * reports to act on device changes of state [i2400m_report_hook()],
+ * on acks to commands [i2400m_msg_ack_hook()], a helper for sending
+ * commands to the device and blocking until a reply arrives
+ * [i2400m_msg_to_dev()], a few high level commands for manipulating
+ * the device state, powersving mode and configuration plus the
+ * routines to setup the device once communication is stablished with
+ * it [i2400m_dev_initialize()].
+ *
+ * ROADMAP
+ *
+ * i2400m_dev_initialize()       Called by i2400m_dev_start()
+ *   i2400m_set_init_config()
+ *   i2400m_cmd_get_state()
+ * i2400m_dev_shutdown()        Called by i2400m_dev_stop()
+ *   i2400m_reset()
+ *
+ * i2400m_{cmd,get,set}_*()
+ *   i2400m_msg_to_dev()
+ *   i2400m_msg_check_status()
+ *
+ * i2400m_report_hook()         Called on reception of an event
+ *   i2400m_report_state_hook()
+ *     i2400m_tlv_buffer_walk()
+ *     i2400m_tlv_match()
+ *     i2400m_report_tlv_system_state()
+ *     i2400m_report_tlv_rf_switches_status()
+ *     i2400m_report_tlv_media_status()
+ *   i2400m_cmd_enter_powersave()
+ *
+ * i2400m_msg_ack_hook()        Called on reception of a reply to a
+ *                              command, get or set
+ */
+
+#include <stdarg.h>
+#include "i2400m.h"
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include "linux-wimax-i2400m.h"
+#include <linux/export.h>
+#include <linux/moduleparam.h>
+
+
+#define D_SUBMODULE control
+#include "debug-levels.h"
+
+static int i2400m_idle_mode_disabled;/* 0 (idle mode enabled) by default */
+module_param_named(idle_mode_disabled, i2400m_idle_mode_disabled, int, 0644);
+MODULE_PARM_DESC(idle_mode_disabled,
+		 "If true, the device will not enable idle mode negotiation "
+		 "with the base station (when connected) to save power.");
+
+/* 0 (power saving enabled) by default */
+static int i2400m_power_save_disabled;
+module_param_named(power_save_disabled, i2400m_power_save_disabled, int, 0644);
+MODULE_PARM_DESC(power_save_disabled,
+		 "If true, the driver will not tell the device to enter "
+		 "power saving mode when it reports it is ready for it. "
+		 "False by default (so the device is told to do power "
+		 "saving).");
+
+static int i2400m_passive_mode;	/* 0 (passive mode disabled) by default */
+module_param_named(passive_mode, i2400m_passive_mode, int, 0644);
+MODULE_PARM_DESC(passive_mode,
+		 "If true, the driver will not do any device setup "
+		 "and leave it up to user space, who must be properly "
+		 "setup.");
+
+
+/*
+ * Return if a TLV is of a give type and size
+ *
+ * @tlv_hdr: pointer to the TLV
+ * @tlv_type: type of the TLV we are looking for
+ * @tlv_size: expected size of the TLV we are looking for (if -1,
+ *            don't check the size). This includes the header
+ * Returns: 0 if the TLV matches
+ *          < 0 if it doesn't match at all
+ *          > 0 total TLV + payload size, if the type matches, but not
+ *              the size
+ */
+static
+ssize_t i2400m_tlv_match(const struct i2400m_tlv_hdr *tlv,
+		     enum i2400m_tlv tlv_type, ssize_t tlv_size)
+{
+	if (le16_to_cpu(tlv->type) != tlv_type)	/* Not our type? skip */
+		return -1;
+	if (tlv_size != -1
+	    && le16_to_cpu(tlv->length) + sizeof(*tlv) != tlv_size) {
+		size_t size = le16_to_cpu(tlv->length) + sizeof(*tlv);
+		printk(KERN_WARNING "W: tlv type 0x%x mismatched because of "
+		       "size (got %zu vs %zd expected)\n",
+		       tlv_type, size, tlv_size);
+		return size;
+	}
+	return 0;
+}
+
+
+/*
+ * Given a buffer of TLVs, iterate over them
+ *
+ * @i2400m: device instance
+ * @tlv_buf: pointer to the beginning of the TLV buffer
+ * @buf_size: buffer size in bytes
+ * @tlv_pos: seek position; this is assumed to be a pointer returned
+ *           by i2400m_tlv_buffer_walk() [and thus, validated]. The
+ *           TLV returned will be the one following this one.
+ *
+ * Usage:
+ *
+ * tlv_itr = NULL;
+ * while (tlv_itr = i2400m_tlv_buffer_walk(i2400m, buf, size, tlv_itr))  {
+ *         ...
+ *         // Do stuff with tlv_itr, DON'T MODIFY IT
+ *         ...
+ * }
+ */
+static
+const struct i2400m_tlv_hdr *i2400m_tlv_buffer_walk(
+	struct i2400m *i2400m,
+	const void *tlv_buf, size_t buf_size,
+	const struct i2400m_tlv_hdr *tlv_pos)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	const struct i2400m_tlv_hdr *tlv_top = tlv_buf + buf_size;
+	size_t offset, length, avail_size;
+	unsigned type;
+
+	if (tlv_pos == NULL)	/* Take the first one? */
+		tlv_pos = tlv_buf;
+	else			/* Nope, the next one */
+		tlv_pos = (void *) tlv_pos
+			+ le16_to_cpu(tlv_pos->length) + sizeof(*tlv_pos);
+	if (tlv_pos == tlv_top) {	/* buffer done */
+		tlv_pos = NULL;
+		goto error_beyond_end;
+	}
+	if (tlv_pos > tlv_top) {
+		tlv_pos = NULL;
+		WARN_ON(1);
+		goto error_beyond_end;
+	}
+	offset = (void *) tlv_pos - (void *) tlv_buf;
+	avail_size = buf_size - offset;
+	if (avail_size < sizeof(*tlv_pos)) {
+		dev_err(dev, "HW BUG? tlv_buf %p [%zu bytes], tlv @%zu: "
+			"short header\n", tlv_buf, buf_size, offset);
+		goto error_short_header;
+	}
+	type = le16_to_cpu(tlv_pos->type);
+	length = le16_to_cpu(tlv_pos->length);
+	if (avail_size < sizeof(*tlv_pos) + length) {
+		dev_err(dev, "HW BUG? tlv_buf %p [%zu bytes], "
+			"tlv type 0x%04x @%zu: "
+			"short data (%zu bytes vs %zu needed)\n",
+			tlv_buf, buf_size, type, offset, avail_size,
+			sizeof(*tlv_pos) + length);
+		goto error_short_header;
+	}
+error_short_header:
+error_beyond_end:
+	return tlv_pos;
+}
+
+
+/*
+ * Find a TLV in a buffer of sequential TLVs
+ *
+ * @i2400m: device descriptor
+ * @tlv_hdr: pointer to the first TLV in the sequence
+ * @size: size of the buffer in bytes; all TLVs are assumed to fit
+ *        fully in the buffer (otherwise we'll complain).
+ * @tlv_type: type of the TLV we are looking for
+ * @tlv_size: expected size of the TLV we are looking for (if -1,
+ *            don't check the size). This includes the header
+ *
+ * Returns: NULL if the TLV is not found, otherwise a pointer to
+ *          it. If the sizes don't match, an error is printed and NULL
+ *          returned.
+ */
+static
+const struct i2400m_tlv_hdr *i2400m_tlv_find(
+	struct i2400m *i2400m,
+	const struct i2400m_tlv_hdr *tlv_hdr, size_t size,
+	enum i2400m_tlv tlv_type, ssize_t tlv_size)
+{
+	ssize_t match;
+	struct device *dev = i2400m_dev(i2400m);
+	const struct i2400m_tlv_hdr *tlv = NULL;
+	while ((tlv = i2400m_tlv_buffer_walk(i2400m, tlv_hdr, size, tlv))) {
+		match = i2400m_tlv_match(tlv, tlv_type, tlv_size);
+		if (match == 0)		/* found it :) */
+			break;
+		if (match > 0)
+			dev_warn(dev, "TLV type 0x%04x found with size "
+				 "mismatch (%zu vs %zd needed)\n",
+				 tlv_type, match, tlv_size);
+	}
+	return tlv;
+}
+
+
+static const struct
+{
+	char *msg;
+	int errno;
+} ms_to_errno[I2400M_MS_MAX] = {
+	[I2400M_MS_DONE_OK] = { "", 0 },
+	[I2400M_MS_DONE_IN_PROGRESS] = { "", 0 },
+	[I2400M_MS_INVALID_OP] = { "invalid opcode", -ENOSYS },
+	[I2400M_MS_BAD_STATE] = { "invalid state", -EILSEQ },
+	[I2400M_MS_ILLEGAL_VALUE] = { "illegal value", -EINVAL },
+	[I2400M_MS_MISSING_PARAMS] = { "missing parameters", -ENOMSG },
+	[I2400M_MS_VERSION_ERROR] = { "bad version", -EIO },
+	[I2400M_MS_ACCESSIBILITY_ERROR] = { "accesibility error", -EIO },
+	[I2400M_MS_BUSY] = { "busy", -EBUSY },
+	[I2400M_MS_CORRUPTED_TLV] = { "corrupted TLV", -EILSEQ },
+	[I2400M_MS_UNINITIALIZED] = { "uninitialized", -EILSEQ },
+	[I2400M_MS_UNKNOWN_ERROR] = { "unknown error", -EIO },
+	[I2400M_MS_PRODUCTION_ERROR] = { "production error", -EIO },
+	[I2400M_MS_NO_RF] = { "no RF", -EIO },
+	[I2400M_MS_NOT_READY_FOR_POWERSAVE] =
+		{ "not ready for powersave", -EACCES },
+	[I2400M_MS_THERMAL_CRITICAL] = { "thermal critical", -EL3HLT },
+};
+
+
+/*
+ * i2400m_msg_check_status - translate a message's status code
+ *
+ * @i2400m: device descriptor
+ * @l3l4_hdr: message header
+ * @strbuf: buffer to place a formatted error message (unless NULL).
+ * @strbuf_size: max amount of available space; larger messages will
+ * be truncated.
+ *
+ * Returns: errno code corresponding to the status code in @l3l4_hdr
+ *          and a message in @strbuf describing the error.
+ */
+int i2400m_msg_check_status(const struct i2400m_l3l4_hdr *l3l4_hdr,
+			    char *strbuf, size_t strbuf_size)
+{
+	int result;
+	enum i2400m_ms status = le16_to_cpu(l3l4_hdr->status);
+	const char *str;
+
+	if (status == 0)
+		return 0;
+	if (status >= ARRAY_SIZE(ms_to_errno)) {
+		str = "unknown status code";
+		result = -EBADR;
+	} else {
+		str = ms_to_errno[status].msg;
+		result = ms_to_errno[status].errno;
+	}
+	if (strbuf)
+		snprintf(strbuf, strbuf_size, "%s (%d)", str, status);
+	return result;
+}
+
+
+/*
+ * Act on a TLV System State reported by the device
+ *
+ * @i2400m: device descriptor
+ * @ss: validated System State TLV
+ */
+static
+void i2400m_report_tlv_system_state(struct i2400m *i2400m,
+				    const struct i2400m_tlv_system_state *ss)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	struct wimax_dev *wimax_dev = &i2400m->wimax_dev;
+	enum i2400m_system_state i2400m_state = le32_to_cpu(ss->state);
+
+	d_fnstart(3, dev, "(i2400m %p ss %p [%u])\n", i2400m, ss, i2400m_state);
+
+	if (i2400m->state != i2400m_state) {
+		i2400m->state = i2400m_state;
+		wake_up_all(&i2400m->state_wq);
+	}
+	switch (i2400m_state) {
+	case I2400M_SS_UNINITIALIZED:
+	case I2400M_SS_INIT:
+	case I2400M_SS_CONFIG:
+	case I2400M_SS_PRODUCTION:
+		wimax_state_change(wimax_dev, WIMAX_ST_UNINITIALIZED);
+		break;
+
+	case I2400M_SS_RF_OFF:
+	case I2400M_SS_RF_SHUTDOWN:
+		wimax_state_change(wimax_dev, WIMAX_ST_RADIO_OFF);
+		break;
+
+	case I2400M_SS_READY:
+	case I2400M_SS_STANDBY:
+	case I2400M_SS_SLEEPACTIVE:
+		wimax_state_change(wimax_dev, WIMAX_ST_READY);
+		break;
+
+	case I2400M_SS_CONNECTING:
+	case I2400M_SS_WIMAX_CONNECTED:
+		wimax_state_change(wimax_dev, WIMAX_ST_READY);
+		break;
+
+	case I2400M_SS_SCAN:
+	case I2400M_SS_OUT_OF_ZONE:
+		wimax_state_change(wimax_dev, WIMAX_ST_SCANNING);
+		break;
+
+	case I2400M_SS_IDLE:
+		d_printf(1, dev, "entering BS-negotiated idle mode\n");
+		fallthrough;
+	case I2400M_SS_DISCONNECTING:
+	case I2400M_SS_DATA_PATH_CONNECTED:
+		wimax_state_change(wimax_dev, WIMAX_ST_CONNECTED);
+		break;
+
+	default:
+		/* Huh? just in case, shut it down */
+		dev_err(dev, "HW BUG? unknown state %u: shutting down\n",
+			i2400m_state);
+		i2400m_reset(i2400m, I2400M_RT_WARM);
+		break;
+	}
+	d_fnend(3, dev, "(i2400m %p ss %p [%u]) = void\n",
+		i2400m, ss, i2400m_state);
+}
+
+
+/*
+ * Parse and act on a TLV Media Status sent by the device
+ *
+ * @i2400m: device descriptor
+ * @ms: validated Media Status TLV
+ *
+ * This will set the carrier up on down based on the device's link
+ * report. This is done asides of what the WiMAX stack does based on
+ * the device's state as sometimes we need to do a link-renew (the BS
+ * wants us to renew a DHCP lease, for example).
+ *
+ * In fact, doc says that every time we get a link-up, we should do a
+ * DHCP negotiation...
+ */
+static
+void i2400m_report_tlv_media_status(struct i2400m *i2400m,
+				    const struct i2400m_tlv_media_status *ms)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	struct wimax_dev *wimax_dev = &i2400m->wimax_dev;
+	struct net_device *net_dev = wimax_dev->net_dev;
+	enum i2400m_media_status status = le32_to_cpu(ms->media_status);
+
+	d_fnstart(3, dev, "(i2400m %p ms %p [%u])\n", i2400m, ms, status);
+
+	switch (status) {
+	case I2400M_MEDIA_STATUS_LINK_UP:
+		netif_carrier_on(net_dev);
+		break;
+	case I2400M_MEDIA_STATUS_LINK_DOWN:
+		netif_carrier_off(net_dev);
+		break;
+	/*
+	 * This is the network telling us we need to retrain the DHCP
+	 * lease -- so far, we are trusting the WiMAX Network Service
+	 * in user space to pick this up and poke the DHCP client.
+	 */
+	case I2400M_MEDIA_STATUS_LINK_RENEW:
+		netif_carrier_on(net_dev);
+		break;
+	default:
+		dev_err(dev, "HW BUG? unknown media status %u\n",
+			status);
+	}
+	d_fnend(3, dev, "(i2400m %p ms %p [%u]) = void\n",
+		i2400m, ms, status);
+}
+
+
+/*
+ * Process a TLV from a 'state report'
+ *
+ * @i2400m: device descriptor
+ * @tlv: pointer to the TLV header; it has been already validated for
+ *     consistent size.
+ * @tag: for error messages
+ *
+ * Act on the TLVs from a 'state report'.
+ */
+static
+void i2400m_report_state_parse_tlv(struct i2400m *i2400m,
+				   const struct i2400m_tlv_hdr *tlv,
+				   const char *tag)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	const struct i2400m_tlv_media_status *ms;
+	const struct i2400m_tlv_system_state *ss;
+	const struct i2400m_tlv_rf_switches_status *rfss;
+
+	if (0 == i2400m_tlv_match(tlv, I2400M_TLV_SYSTEM_STATE, sizeof(*ss))) {
+		ss = container_of(tlv, typeof(*ss), hdr);
+		d_printf(2, dev, "%s: system state TLV "
+			 "found (0x%04x), state 0x%08x\n",
+			 tag, I2400M_TLV_SYSTEM_STATE,
+			 le32_to_cpu(ss->state));
+		i2400m_report_tlv_system_state(i2400m, ss);
+	}
+	if (0 == i2400m_tlv_match(tlv, I2400M_TLV_RF_STATUS, sizeof(*rfss))) {
+		rfss = container_of(tlv, typeof(*rfss), hdr);
+		d_printf(2, dev, "%s: RF status TLV "
+			 "found (0x%04x), sw 0x%02x hw 0x%02x\n",
+			 tag, I2400M_TLV_RF_STATUS,
+			 le32_to_cpu(rfss->sw_rf_switch),
+			 le32_to_cpu(rfss->hw_rf_switch));
+		i2400m_report_tlv_rf_switches_status(i2400m, rfss);
+	}
+	if (0 == i2400m_tlv_match(tlv, I2400M_TLV_MEDIA_STATUS, sizeof(*ms))) {
+		ms = container_of(tlv, typeof(*ms), hdr);
+		d_printf(2, dev, "%s: Media Status TLV: %u\n",
+			 tag, le32_to_cpu(ms->media_status));
+		i2400m_report_tlv_media_status(i2400m, ms);
+	}
+}
+
+
+/*
+ * Parse a 'state report' and extract information
+ *
+ * @i2400m: device descriptor
+ * @l3l4_hdr: pointer to message; it has been already validated for
+ *            consistent size.
+ * @size: size of the message (header + payload). The header length
+ *        declaration is assumed to be congruent with @size (as in
+ *        sizeof(*l3l4_hdr) + l3l4_hdr->length == size)
+ *
+ * Walk over the TLVs in a report state and act on them.
+ */
+static
+void i2400m_report_state_hook(struct i2400m *i2400m,
+			      const struct i2400m_l3l4_hdr *l3l4_hdr,
+			      size_t size, const char *tag)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	const struct i2400m_tlv_hdr *tlv;
+	size_t tlv_size = le16_to_cpu(l3l4_hdr->length);
+
+	d_fnstart(4, dev, "(i2400m %p, l3l4_hdr %p, size %zu, %s)\n",
+		  i2400m, l3l4_hdr, size, tag);
+	tlv = NULL;
+
+	while ((tlv = i2400m_tlv_buffer_walk(i2400m, &l3l4_hdr->pl,
+					     tlv_size, tlv)))
+		i2400m_report_state_parse_tlv(i2400m, tlv, tag);
+	d_fnend(4, dev, "(i2400m %p, l3l4_hdr %p, size %zu, %s) = void\n",
+		i2400m, l3l4_hdr, size, tag);
+}
+
+
+/*
+ * i2400m_report_hook - (maybe) act on a report
+ *
+ * @i2400m: device descriptor
+ * @l3l4_hdr: pointer to message; it has been already validated for
+ *            consistent size.
+ * @size: size of the message (header + payload). The header length
+ *        declaration is assumed to be congruent with @size (as in
+ *        sizeof(*l3l4_hdr) + l3l4_hdr->length == size)
+ *
+ * Extract information we might need (like carrien on/off) from a
+ * device report.
+ */
+void i2400m_report_hook(struct i2400m *i2400m,
+			const struct i2400m_l3l4_hdr *l3l4_hdr, size_t size)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	unsigned msg_type;
+
+	d_fnstart(3, dev, "(i2400m %p l3l4_hdr %p size %zu)\n",
+		  i2400m, l3l4_hdr, size);
+	/* Chew on the message, we might need some information from
+	 * here */
+	msg_type = le16_to_cpu(l3l4_hdr->type);
+	switch (msg_type) {
+	case I2400M_MT_REPORT_STATE:	/* carrier detection... */
+		i2400m_report_state_hook(i2400m,
+					 l3l4_hdr, size, "REPORT STATE");
+		break;
+	/* If the device is ready for power save, then ask it to do
+	 * it. */
+	case I2400M_MT_REPORT_POWERSAVE_READY:	/* zzzzz */
+		if (l3l4_hdr->status == cpu_to_le16(I2400M_MS_DONE_OK)) {
+			if (i2400m_power_save_disabled)
+				d_printf(1, dev, "ready for powersave, "
+					 "not requesting (disabled by module "
+					 "parameter)\n");
+			else {
+				d_printf(1, dev, "ready for powersave, "
+					 "requesting\n");
+				i2400m_cmd_enter_powersave(i2400m);
+			}
+		}
+		break;
+	}
+	d_fnend(3, dev, "(i2400m %p l3l4_hdr %p size %zu) = void\n",
+		i2400m, l3l4_hdr, size);
+}
+
+
+/*
+ * i2400m_msg_ack_hook - process cmd/set/get ack for internal status
+ *
+ * @i2400m: device descriptor
+ * @l3l4_hdr: pointer to message; it has been already validated for
+ *            consistent size.
+ * @size: size of the message
+ *
+ * Extract information we might need from acks to commands and act on
+ * it. This is akin to i2400m_report_hook(). Note most of this
+ * processing should be done in the function that calls the
+ * command. This is here for some cases where it can't happen...
+ */
+static void i2400m_msg_ack_hook(struct i2400m *i2400m,
+				 const struct i2400m_l3l4_hdr *l3l4_hdr,
+				 size_t size)
+{
+	int result;
+	struct device *dev = i2400m_dev(i2400m);
+	unsigned int ack_type;
+	char strerr[32];
+
+	/* Chew on the message, we might need some information from
+	 * here */
+	ack_type = le16_to_cpu(l3l4_hdr->type);
+	switch (ack_type) {
+	case I2400M_MT_CMD_ENTER_POWERSAVE:
+		/* This is just left here for the sake of example, as
+		 * the processing is done somewhere else. */
+		if (0) {
+			result = i2400m_msg_check_status(
+				l3l4_hdr, strerr, sizeof(strerr));
+			if (result >= 0)
+				d_printf(1, dev, "ready for power save: %zd\n",
+					 size);
+		}
+		break;
+	}
+}
+
+
+/*
+ * i2400m_msg_size_check() - verify message size and header are congruent
+ *
+ * It is ok if the total message size is larger than the expected
+ * size, as there can be padding.
+ */
+int i2400m_msg_size_check(struct i2400m *i2400m,
+			  const struct i2400m_l3l4_hdr *l3l4_hdr,
+			  size_t msg_size)
+{
+	int result;
+	struct device *dev = i2400m_dev(i2400m);
+	size_t expected_size;
+	d_fnstart(4, dev, "(i2400m %p l3l4_hdr %p msg_size %zu)\n",
+		  i2400m, l3l4_hdr, msg_size);
+	if (msg_size < sizeof(*l3l4_hdr)) {
+		dev_err(dev, "bad size for message header "
+			"(expected at least %zu, got %zu)\n",
+			(size_t) sizeof(*l3l4_hdr), msg_size);
+		result = -EIO;
+		goto error_hdr_size;
+	}
+	expected_size = le16_to_cpu(l3l4_hdr->length) + sizeof(*l3l4_hdr);
+	if (msg_size < expected_size) {
+		dev_err(dev, "bad size for message code 0x%04x (expected %zu, "
+			"got %zu)\n", le16_to_cpu(l3l4_hdr->type),
+			expected_size, msg_size);
+		result = -EIO;
+	} else
+		result = 0;
+error_hdr_size:
+	d_fnend(4, dev,
+		"(i2400m %p l3l4_hdr %p msg_size %zu) = %d\n",
+		i2400m, l3l4_hdr, msg_size, result);
+	return result;
+}
+
+
+
+/*
+ * Cancel a wait for a command ACK
+ *
+ * @i2400m: device descriptor
+ * @code: [negative] errno code to cancel with (don't use
+ *     -EINPROGRESS)
+ *
+ * If there is an ack already filled out, free it.
+ */
+void i2400m_msg_to_dev_cancel_wait(struct i2400m *i2400m, int code)
+{
+	struct sk_buff *ack_skb;
+	unsigned long flags;
+
+	spin_lock_irqsave(&i2400m->rx_lock, flags);
+	ack_skb = i2400m->ack_skb;
+	if (ack_skb && !IS_ERR(ack_skb))
+		kfree_skb(ack_skb);
+	i2400m->ack_skb = ERR_PTR(code);
+	spin_unlock_irqrestore(&i2400m->rx_lock, flags);
+}
+
+
+/**
+ * i2400m_msg_to_dev - Send a control message to the device and get a response
+ *
+ * @i2400m: device descriptor
+ *
+ * @buf: pointer to the buffer containing the message to be sent; it
+ *           has to start with a &struct i2400M_l3l4_hdr and then
+ *           followed by the payload. Once this function returns, the
+ *           buffer can be reused.
+ *
+ * @buf_len: buffer size
+ *
+ * Returns:
+ *
+ * Pointer to skb containing the ack message. You need to check the
+ * pointer with IS_ERR(), as it might be an error code. Error codes
+ * could happen because:
+ *
+ *  - the message wasn't formatted correctly
+ *  - couldn't send the message
+ *  - failed waiting for a response
+ *  - the ack message wasn't formatted correctly
+ *
+ * The returned skb has been allocated with wimax_msg_to_user_alloc(),
+ * it contains the response in a netlink attribute and is ready to be
+ * passed up to user space with wimax_msg_to_user_send(). To access
+ * the payload and its length, use wimax_msg_{data,len}() on the skb.
+ *
+ * The skb has to be freed with kfree_skb() once done.
+ *
+ * Description:
+ *
+ * This function delivers a message/command to the device and waits
+ * for an ack to be received. The format is described in
+ * linux/wimax/i2400m.h. In summary, a command/get/set is followed by an
+ * ack.
+ *
+ * This function will not check the ack status, that's left up to the
+ * caller.  Once done with the ack skb, it has to be kfree_skb()ed.
+ *
+ * The i2400m handles only one message at the same time, thus we need
+ * the mutex to exclude other players.
+ *
+ * We write the message and then wait for an answer to come back. The
+ * RX path intercepts control messages and handles them in
+ * i2400m_rx_ctl(). Reports (notifications) are (maybe) processed
+ * locally and then forwarded (as needed) to user space on the WiMAX
+ * stack message pipe. Acks are saved and passed back to us through an
+ * skb in i2400m->ack_skb which is ready to be given to generic
+ * netlink if need be.
+ */
+struct sk_buff *i2400m_msg_to_dev(struct i2400m *i2400m,
+				  const void *buf, size_t buf_len)
+{
+	int result;
+	struct device *dev = i2400m_dev(i2400m);
+	const struct i2400m_l3l4_hdr *msg_l3l4_hdr;
+	struct sk_buff *ack_skb;
+	const struct i2400m_l3l4_hdr *ack_l3l4_hdr;
+	size_t ack_len;
+	int ack_timeout;
+	unsigned msg_type;
+	unsigned long flags;
+
+	d_fnstart(3, dev, "(i2400m %p buf %p len %zu)\n",
+		  i2400m, buf, buf_len);
+
+	rmb();		/* Make sure we see what i2400m_dev_reset_handle() */
+	if (i2400m->boot_mode)
+		return ERR_PTR(-EL3RST);
+
+	msg_l3l4_hdr = buf;
+	/* Check msg & payload consistency */
+	result = i2400m_msg_size_check(i2400m, msg_l3l4_hdr, buf_len);
+	if (result < 0)
+		goto error_bad_msg;
+	msg_type = le16_to_cpu(msg_l3l4_hdr->type);
+	d_printf(1, dev, "CMD/GET/SET 0x%04x %zu bytes\n",
+		 msg_type, buf_len);
+	d_dump(2, dev, buf, buf_len);
+
+	/* Setup the completion, ack_skb ("we are waiting") and send
+	 * the message to the device */
+	mutex_lock(&i2400m->msg_mutex);
+	spin_lock_irqsave(&i2400m->rx_lock, flags);
+	i2400m->ack_skb = ERR_PTR(-EINPROGRESS);
+	spin_unlock_irqrestore(&i2400m->rx_lock, flags);
+	init_completion(&i2400m->msg_completion);
+	result = i2400m_tx(i2400m, buf, buf_len, I2400M_PT_CTRL);
+	if (result < 0) {
+		dev_err(dev, "can't send message 0x%04x: %d\n",
+			le16_to_cpu(msg_l3l4_hdr->type), result);
+		goto error_tx;
+	}
+
+	/* Some commands take longer to execute because of crypto ops,
+	 * so we give them some more leeway on timeout */
+	switch (msg_type) {
+	case I2400M_MT_GET_TLS_OPERATION_RESULT:
+	case I2400M_MT_CMD_SEND_EAP_RESPONSE:
+		ack_timeout = 5 * HZ;
+		break;
+	default:
+		ack_timeout = HZ;
+	}
+
+	if (unlikely(i2400m->trace_msg_from_user))
+		wimax_msg(&i2400m->wimax_dev, "echo", buf, buf_len, GFP_KERNEL);
+	/* The RX path in rx.c will put any response for this message
+	 * in i2400m->ack_skb and wake us up. If we cancel the wait,
+	 * we need to change the value of i2400m->ack_skb to something
+	 * not -EINPROGRESS so RX knows there is no one waiting. */
+	result = wait_for_completion_interruptible_timeout(
+		&i2400m->msg_completion, ack_timeout);
+	if (result == 0) {
+		dev_err(dev, "timeout waiting for reply to message 0x%04x\n",
+			msg_type);
+		result = -ETIMEDOUT;
+		i2400m_msg_to_dev_cancel_wait(i2400m, result);
+		goto error_wait_for_completion;
+	} else if (result < 0) {
+		dev_err(dev, "error waiting for reply to message 0x%04x: %d\n",
+			msg_type, result);
+		i2400m_msg_to_dev_cancel_wait(i2400m, result);
+		goto error_wait_for_completion;
+	}
+
+	/* Pull out the ack data from i2400m->ack_skb -- see if it is
+	 * an error and act accordingly */
+	spin_lock_irqsave(&i2400m->rx_lock, flags);
+	ack_skb = i2400m->ack_skb;
+	if (IS_ERR(ack_skb))
+		result = PTR_ERR(ack_skb);
+	else
+		result = 0;
+	i2400m->ack_skb = NULL;
+	spin_unlock_irqrestore(&i2400m->rx_lock, flags);
+	if (result < 0)
+		goto error_ack_status;
+	ack_l3l4_hdr = wimax_msg_data_len(ack_skb, &ack_len);
+
+	/* Check the ack and deliver it if it is ok */
+	if (unlikely(i2400m->trace_msg_from_user))
+		wimax_msg(&i2400m->wimax_dev, "echo",
+			  ack_l3l4_hdr, ack_len, GFP_KERNEL);
+	result = i2400m_msg_size_check(i2400m, ack_l3l4_hdr, ack_len);
+	if (result < 0) {
+		dev_err(dev, "HW BUG? reply to message 0x%04x: %d\n",
+			msg_type, result);
+		goto error_bad_ack_len;
+	}
+	if (msg_type != le16_to_cpu(ack_l3l4_hdr->type)) {
+		dev_err(dev, "HW BUG? bad reply 0x%04x to message 0x%04x\n",
+			le16_to_cpu(ack_l3l4_hdr->type), msg_type);
+		result = -EIO;
+		goto error_bad_ack_type;
+	}
+	i2400m_msg_ack_hook(i2400m, ack_l3l4_hdr, ack_len);
+	mutex_unlock(&i2400m->msg_mutex);
+	d_fnend(3, dev, "(i2400m %p buf %p len %zu) = %p\n",
+		i2400m, buf, buf_len, ack_skb);
+	return ack_skb;
+
+error_bad_ack_type:
+error_bad_ack_len:
+	kfree_skb(ack_skb);
+error_ack_status:
+error_wait_for_completion:
+error_tx:
+	mutex_unlock(&i2400m->msg_mutex);
+error_bad_msg:
+	d_fnend(3, dev, "(i2400m %p buf %p len %zu) = %d\n",
+		i2400m, buf, buf_len, result);
+	return ERR_PTR(result);
+}
+
+
+/*
+ * Definitions for the Enter Power Save command
+ *
+ * The Enter Power Save command requests the device to go into power
+ * saving mode. The device will ack or nak the command depending on it
+ * being ready for it. If it acks, we tell the USB subsystem to
+ *
+ * As well, the device might request to go into power saving mode by
+ * sending a report (REPORT_POWERSAVE_READY), in which case, we issue
+ * this command. The hookups in the RX coder allow
+ */
+enum {
+	I2400M_WAKEUP_ENABLED  = 0x01,
+	I2400M_WAKEUP_DISABLED = 0x02,
+	I2400M_TLV_TYPE_WAKEUP_MODE = 144,
+};
+
+struct i2400m_cmd_enter_power_save {
+	struct i2400m_l3l4_hdr hdr;
+	struct i2400m_tlv_hdr tlv;
+	__le32 val;
+} __packed;
+
+
+/*
+ * Request entering power save
+ *
+ * This command is (mainly) executed when the device indicates that it
+ * is ready to go into powersave mode via a REPORT_POWERSAVE_READY.
+ */
+int i2400m_cmd_enter_powersave(struct i2400m *i2400m)
+{
+	int result;
+	struct device *dev = i2400m_dev(i2400m);
+	struct sk_buff *ack_skb;
+	struct i2400m_cmd_enter_power_save *cmd;
+	char strerr[32];
+
+	result = -ENOMEM;
+	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
+	if (cmd == NULL)
+		goto error_alloc;
+	cmd->hdr.type = cpu_to_le16(I2400M_MT_CMD_ENTER_POWERSAVE);
+	cmd->hdr.length = cpu_to_le16(sizeof(*cmd) - sizeof(cmd->hdr));
+	cmd->hdr.version = cpu_to_le16(I2400M_L3L4_VERSION);
+	cmd->tlv.type = cpu_to_le16(I2400M_TLV_TYPE_WAKEUP_MODE);
+	cmd->tlv.length = cpu_to_le16(sizeof(cmd->val));
+	cmd->val = cpu_to_le32(I2400M_WAKEUP_ENABLED);
+
+	ack_skb = i2400m_msg_to_dev(i2400m, cmd, sizeof(*cmd));
+	result = PTR_ERR(ack_skb);
+	if (IS_ERR(ack_skb)) {
+		dev_err(dev, "Failed to issue 'Enter power save' command: %d\n",
+			result);
+		goto error_msg_to_dev;
+	}
+	result = i2400m_msg_check_status(wimax_msg_data(ack_skb),
+					 strerr, sizeof(strerr));
+	if (result == -EACCES)
+		d_printf(1, dev, "Cannot enter power save mode\n");
+	else if (result < 0)
+		dev_err(dev, "'Enter power save' (0x%04x) command failed: "
+			"%d - %s\n", I2400M_MT_CMD_ENTER_POWERSAVE,
+			result, strerr);
+	else
+		d_printf(1, dev, "device ready to power save\n");
+	kfree_skb(ack_skb);
+error_msg_to_dev:
+	kfree(cmd);
+error_alloc:
+	return result;
+}
+EXPORT_SYMBOL_GPL(i2400m_cmd_enter_powersave);
+
+
+/*
+ * Definitions for getting device information
+ */
+enum {
+	I2400M_TLV_DETAILED_DEVICE_INFO = 140
+};
+
+/**
+ * i2400m_get_device_info - Query the device for detailed device information
+ *
+ * @i2400m: device descriptor
+ *
+ * Returns: an skb whose skb->data points to a 'struct
+ *    i2400m_tlv_detailed_device_info'. When done, kfree_skb() it. The
+ *    skb is *guaranteed* to contain the whole TLV data structure.
+ *
+ *    On error, IS_ERR(skb) is true and ERR_PTR(skb) is the error
+ *    code.
+ */
+struct sk_buff *i2400m_get_device_info(struct i2400m *i2400m)
+{
+	int result;
+	struct device *dev = i2400m_dev(i2400m);
+	struct sk_buff *ack_skb;
+	struct i2400m_l3l4_hdr *cmd;
+	const struct i2400m_l3l4_hdr *ack;
+	size_t ack_len;
+	const struct i2400m_tlv_hdr *tlv;
+	const struct i2400m_tlv_detailed_device_info *ddi;
+	char strerr[32];
+
+	ack_skb = ERR_PTR(-ENOMEM);
+	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
+	if (cmd == NULL)
+		goto error_alloc;
+	cmd->type = cpu_to_le16(I2400M_MT_GET_DEVICE_INFO);
+	cmd->length = 0;
+	cmd->version = cpu_to_le16(I2400M_L3L4_VERSION);
+
+	ack_skb = i2400m_msg_to_dev(i2400m, cmd, sizeof(*cmd));
+	if (IS_ERR(ack_skb)) {
+		dev_err(dev, "Failed to issue 'get device info' command: %ld\n",
+			PTR_ERR(ack_skb));
+		goto error_msg_to_dev;
+	}
+	ack = wimax_msg_data_len(ack_skb, &ack_len);
+	result = i2400m_msg_check_status(ack, strerr, sizeof(strerr));
+	if (result < 0) {
+		dev_err(dev, "'get device info' (0x%04x) command failed: "
+			"%d - %s\n", I2400M_MT_GET_DEVICE_INFO, result,
+			strerr);
+		goto error_cmd_failed;
+	}
+	tlv = i2400m_tlv_find(i2400m, ack->pl, ack_len - sizeof(*ack),
+			      I2400M_TLV_DETAILED_DEVICE_INFO, sizeof(*ddi));
+	if (tlv == NULL) {
+		dev_err(dev, "GET DEVICE INFO: "
+			"detailed device info TLV not found (0x%04x)\n",
+			I2400M_TLV_DETAILED_DEVICE_INFO);
+		result = -EIO;
+		goto error_no_tlv;
+	}
+	skb_pull(ack_skb, (void *) tlv - (void *) ack_skb->data);
+error_msg_to_dev:
+	kfree(cmd);
+error_alloc:
+	return ack_skb;
+
+error_no_tlv:
+error_cmd_failed:
+	kfree_skb(ack_skb);
+	kfree(cmd);
+	return ERR_PTR(result);
+}
+
+
+/* Firmware interface versions we support */
+enum {
+	I2400M_HDIv_MAJOR = 9,
+	I2400M_HDIv_MINOR = 1,
+	I2400M_HDIv_MINOR_2 = 2,
+};
+
+
+/**
+ * i2400m_firmware_check - check firmware versions are compatible with
+ * the driver
+ *
+ * @i2400m: device descriptor
+ *
+ * Returns: 0 if ok, < 0 errno code an error and a message in the
+ *    kernel log.
+ *
+ * Long function, but quite simple; first chunk launches the command
+ * and double checks the reply for the right TLV. Then we process the
+ * TLV (where the meat is).
+ *
+ * Once we process the TLV that gives us the firmware's interface
+ * version, we encode it and save it in i2400m->fw_version for future
+ * reference.
+ */
+int i2400m_firmware_check(struct i2400m *i2400m)
+{
+	int result;
+	struct device *dev = i2400m_dev(i2400m);
+	struct sk_buff *ack_skb;
+	struct i2400m_l3l4_hdr *cmd;
+	const struct i2400m_l3l4_hdr *ack;
+	size_t ack_len;
+	const struct i2400m_tlv_hdr *tlv;
+	const struct i2400m_tlv_l4_message_versions *l4mv;
+	char strerr[32];
+	unsigned major, minor, branch;
+
+	result = -ENOMEM;
+	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
+	if (cmd == NULL)
+		goto error_alloc;
+	cmd->type = cpu_to_le16(I2400M_MT_GET_LM_VERSION);
+	cmd->length = 0;
+	cmd->version = cpu_to_le16(I2400M_L3L4_VERSION);
+
+	ack_skb = i2400m_msg_to_dev(i2400m, cmd, sizeof(*cmd));
+	if (IS_ERR(ack_skb)) {
+		result = PTR_ERR(ack_skb);
+		dev_err(dev, "Failed to issue 'get lm version' command: %-d\n",
+			result);
+		goto error_msg_to_dev;
+	}
+	ack = wimax_msg_data_len(ack_skb, &ack_len);
+	result = i2400m_msg_check_status(ack, strerr, sizeof(strerr));
+	if (result < 0) {
+		dev_err(dev, "'get lm version' (0x%04x) command failed: "
+			"%d - %s\n", I2400M_MT_GET_LM_VERSION, result,
+			strerr);
+		goto error_cmd_failed;
+	}
+	tlv = i2400m_tlv_find(i2400m, ack->pl, ack_len - sizeof(*ack),
+			      I2400M_TLV_L4_MESSAGE_VERSIONS, sizeof(*l4mv));
+	if (tlv == NULL) {
+		dev_err(dev, "get lm version: TLV not found (0x%04x)\n",
+			I2400M_TLV_L4_MESSAGE_VERSIONS);
+		result = -EIO;
+		goto error_no_tlv;
+	}
+	l4mv = container_of(tlv, typeof(*l4mv), hdr);
+	major = le16_to_cpu(l4mv->major);
+	minor = le16_to_cpu(l4mv->minor);
+	branch = le16_to_cpu(l4mv->branch);
+	result = -EINVAL;
+	if (major != I2400M_HDIv_MAJOR) {
+		dev_err(dev, "unsupported major fw version "
+			"%u.%u.%u\n", major, minor, branch);
+		goto error_bad_major;
+	}
+	result = 0;
+	if (minor > I2400M_HDIv_MINOR_2 || minor < I2400M_HDIv_MINOR)
+		dev_warn(dev, "untested minor fw version %u.%u.%u\n",
+			 major, minor, branch);
+	/* Yes, we ignore the branch -- we don't have to track it */
+	i2400m->fw_version = major << 16 | minor;
+	dev_info(dev, "firmware interface version %u.%u.%u\n",
+		 major, minor, branch);
+error_bad_major:
+error_no_tlv:
+error_cmd_failed:
+	kfree_skb(ack_skb);
+error_msg_to_dev:
+	kfree(cmd);
+error_alloc:
+	return result;
+}
+
+
+/*
+ * Send an DoExitIdle command to the device to ask it to go out of
+ * basestation-idle mode.
+ *
+ * @i2400m: device descriptor
+ *
+ * This starts a renegotiation with the basestation that might involve
+ * another crypto handshake with user space.
+ *
+ * Returns: 0 if ok, < 0 errno code on error.
+ */
+int i2400m_cmd_exit_idle(struct i2400m *i2400m)
+{
+	int result;
+	struct device *dev = i2400m_dev(i2400m);
+	struct sk_buff *ack_skb;
+	struct i2400m_l3l4_hdr *cmd;
+	char strerr[32];
+
+	result = -ENOMEM;
+	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
+	if (cmd == NULL)
+		goto error_alloc;
+	cmd->type = cpu_to_le16(I2400M_MT_CMD_EXIT_IDLE);
+	cmd->length = 0;
+	cmd->version = cpu_to_le16(I2400M_L3L4_VERSION);
+
+	ack_skb = i2400m_msg_to_dev(i2400m, cmd, sizeof(*cmd));
+	result = PTR_ERR(ack_skb);
+	if (IS_ERR(ack_skb)) {
+		dev_err(dev, "Failed to issue 'exit idle' command: %d\n",
+			result);
+		goto error_msg_to_dev;
+	}
+	result = i2400m_msg_check_status(wimax_msg_data(ack_skb),
+					 strerr, sizeof(strerr));
+	kfree_skb(ack_skb);
+error_msg_to_dev:
+	kfree(cmd);
+error_alloc:
+	return result;
+
+}
+
+
+/*
+ * Query the device for its state, update the WiMAX stack's idea of it
+ *
+ * @i2400m: device descriptor
+ *
+ * Returns: 0 if ok, < 0 errno code on error.
+ *
+ * Executes a 'Get State' command and parses the returned
+ * TLVs.
+ *
+ * Because this is almost identical to a 'Report State', we use
+ * i2400m_report_state_hook() to parse the answer. This will set the
+ * carrier state, as well as the RF Kill switches state.
+ */
+static int i2400m_cmd_get_state(struct i2400m *i2400m)
+{
+	int result;
+	struct device *dev = i2400m_dev(i2400m);
+	struct sk_buff *ack_skb;
+	struct i2400m_l3l4_hdr *cmd;
+	const struct i2400m_l3l4_hdr *ack;
+	size_t ack_len;
+	char strerr[32];
+
+	result = -ENOMEM;
+	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
+	if (cmd == NULL)
+		goto error_alloc;
+	cmd->type = cpu_to_le16(I2400M_MT_GET_STATE);
+	cmd->length = 0;
+	cmd->version = cpu_to_le16(I2400M_L3L4_VERSION);
+
+	ack_skb = i2400m_msg_to_dev(i2400m, cmd, sizeof(*cmd));
+	if (IS_ERR(ack_skb)) {
+		dev_err(dev, "Failed to issue 'get state' command: %ld\n",
+			PTR_ERR(ack_skb));
+		result = PTR_ERR(ack_skb);
+		goto error_msg_to_dev;
+	}
+	ack = wimax_msg_data_len(ack_skb, &ack_len);
+	result = i2400m_msg_check_status(ack, strerr, sizeof(strerr));
+	if (result < 0) {
+		dev_err(dev, "'get state' (0x%04x) command failed: "
+			"%d - %s\n", I2400M_MT_GET_STATE, result, strerr);
+		goto error_cmd_failed;
+	}
+	i2400m_report_state_hook(i2400m, ack, ack_len - sizeof(*ack),
+				 "GET STATE");
+	result = 0;
+	kfree_skb(ack_skb);
+error_cmd_failed:
+error_msg_to_dev:
+	kfree(cmd);
+error_alloc:
+	return result;
+}
+
+/**
+ * Set basic configuration settings
+ *
+ * @i2400m: device descriptor
+ * @args: array of pointers to the TLV headers to send for
+ *     configuration (each followed by its payload).
+ *     TLV headers and payloads must be properly initialized, with the
+ *     right endianess (LE).
+ * @arg_size: number of pointers in the @args array
+ */
+static int i2400m_set_init_config(struct i2400m *i2400m,
+				  const struct i2400m_tlv_hdr **arg,
+				  size_t args)
+{
+	int result;
+	struct device *dev = i2400m_dev(i2400m);
+	struct sk_buff *ack_skb;
+	struct i2400m_l3l4_hdr *cmd;
+	char strerr[32];
+	unsigned argc, argsize, tlv_size;
+	const struct i2400m_tlv_hdr *tlv_hdr;
+	void *buf, *itr;
+
+	d_fnstart(3, dev, "(i2400m %p arg %p args %zu)\n", i2400m, arg, args);
+	result = 0;
+	if (args == 0)
+		goto none;
+	/* Compute the size of all the TLVs, so we can alloc a
+	 * contiguous command block to copy them. */
+	argsize = 0;
+	for (argc = 0; argc < args; argc++) {
+		tlv_hdr = arg[argc];
+		argsize += sizeof(*tlv_hdr) + le16_to_cpu(tlv_hdr->length);
+	}
+	WARN_ON(argc >= 9);	/* As per hw spec */
+
+	/* Alloc the space for the command and TLVs*/
+	result = -ENOMEM;
+	buf = kzalloc(sizeof(*cmd) + argsize, GFP_KERNEL);
+	if (buf == NULL)
+		goto error_alloc;
+	cmd = buf;
+	cmd->type = cpu_to_le16(I2400M_MT_SET_INIT_CONFIG);
+	cmd->length = cpu_to_le16(argsize);
+	cmd->version = cpu_to_le16(I2400M_L3L4_VERSION);
+
+	/* Copy the TLVs */
+	itr = buf + sizeof(*cmd);
+	for (argc = 0; argc < args; argc++) {
+		tlv_hdr = arg[argc];
+		tlv_size = sizeof(*tlv_hdr) + le16_to_cpu(tlv_hdr->length);
+		memcpy(itr, tlv_hdr, tlv_size);
+		itr += tlv_size;
+	}
+
+	/* Send the message! */
+	ack_skb = i2400m_msg_to_dev(i2400m, buf, sizeof(*cmd) + argsize);
+	result = PTR_ERR(ack_skb);
+	if (IS_ERR(ack_skb)) {
+		dev_err(dev, "Failed to issue 'init config' command: %d\n",
+			result);
+
+		goto error_msg_to_dev;
+	}
+	result = i2400m_msg_check_status(wimax_msg_data(ack_skb),
+					 strerr, sizeof(strerr));
+	if (result < 0)
+		dev_err(dev, "'init config' (0x%04x) command failed: %d - %s\n",
+			I2400M_MT_SET_INIT_CONFIG, result, strerr);
+	kfree_skb(ack_skb);
+error_msg_to_dev:
+	kfree(buf);
+error_alloc:
+none:
+	d_fnend(3, dev, "(i2400m %p arg %p args %zu) = %d\n",
+		i2400m, arg, args, result);
+	return result;
+
+}
+
+/**
+ * i2400m_set_idle_timeout - Set the device's idle mode timeout
+ *
+ * @i2400m: i2400m device descriptor
+ *
+ * @msecs: milliseconds for the timeout to enter idle mode. Between
+ *     100 to 300000 (5m); 0 to disable. In increments of 100.
+ *
+ * After this @msecs of the link being idle (no data being sent or
+ * received), the device will negotiate with the basestation entering
+ * idle mode for saving power. The connection is maintained, but
+ * getting out of it (done in tx.c) will require some negotiation,
+ * possible crypto re-handshake and a possible DHCP re-lease.
+ *
+ * Only available if fw_version >= 0x00090002.
+ *
+ * Returns: 0 if ok, < 0 errno code on error.
+ */
+int i2400m_set_idle_timeout(struct i2400m *i2400m, unsigned msecs)
+{
+	int result;
+	struct device *dev = i2400m_dev(i2400m);
+	struct sk_buff *ack_skb;
+	struct {
+		struct i2400m_l3l4_hdr hdr;
+		struct i2400m_tlv_config_idle_timeout cit;
+	} *cmd;
+	const struct i2400m_l3l4_hdr *ack;
+	size_t ack_len;
+	char strerr[32];
+
+	result = -ENOSYS;
+	if (i2400m_le_v1_3(i2400m))
+		goto error_alloc;
+	result = -ENOMEM;
+	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
+	if (cmd == NULL)
+		goto error_alloc;
+	cmd->hdr.type = cpu_to_le16(I2400M_MT_GET_STATE);
+	cmd->hdr.length = cpu_to_le16(sizeof(*cmd) - sizeof(cmd->hdr));
+	cmd->hdr.version = cpu_to_le16(I2400M_L3L4_VERSION);
+
+	cmd->cit.hdr.type =
+		cpu_to_le16(I2400M_TLV_CONFIG_IDLE_TIMEOUT);
+	cmd->cit.hdr.length = cpu_to_le16(sizeof(cmd->cit.timeout));
+	cmd->cit.timeout = cpu_to_le32(msecs);
+
+	ack_skb = i2400m_msg_to_dev(i2400m, cmd, sizeof(*cmd));
+	if (IS_ERR(ack_skb)) {
+		dev_err(dev, "Failed to issue 'set idle timeout' command: "
+			"%ld\n", PTR_ERR(ack_skb));
+		result = PTR_ERR(ack_skb);
+		goto error_msg_to_dev;
+	}
+	ack = wimax_msg_data_len(ack_skb, &ack_len);
+	result = i2400m_msg_check_status(ack, strerr, sizeof(strerr));
+	if (result < 0) {
+		dev_err(dev, "'set idle timeout' (0x%04x) command failed: "
+			"%d - %s\n", I2400M_MT_GET_STATE, result, strerr);
+		goto error_cmd_failed;
+	}
+	result = 0;
+	kfree_skb(ack_skb);
+error_cmd_failed:
+error_msg_to_dev:
+	kfree(cmd);
+error_alloc:
+	return result;
+}
+
+
+/**
+ * i2400m_dev_initialize - Initialize the device once communications are ready
+ *
+ * @i2400m: device descriptor
+ *
+ * Returns: 0 if ok, < 0 errno code on error.
+ *
+ * Configures the device to work the way we like it.
+ *
+ * At the point of this call, the device is registered with the WiMAX
+ * and netdev stacks, firmware is uploaded and we can talk to the
+ * device normally.
+ */
+int i2400m_dev_initialize(struct i2400m *i2400m)
+{
+	int result;
+	struct device *dev = i2400m_dev(i2400m);
+	struct i2400m_tlv_config_idle_parameters idle_params;
+	struct i2400m_tlv_config_idle_timeout idle_timeout;
+	struct i2400m_tlv_config_d2h_data_format df;
+	struct i2400m_tlv_config_dl_host_reorder dlhr;
+	const struct i2400m_tlv_hdr *args[9];
+	unsigned argc = 0;
+
+	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
+	if (i2400m_passive_mode)
+		goto out_passive;
+	/* Disable idle mode? (enabled by default) */
+	if (i2400m_idle_mode_disabled) {
+		if (i2400m_le_v1_3(i2400m)) {
+			idle_params.hdr.type =
+				cpu_to_le16(I2400M_TLV_CONFIG_IDLE_PARAMETERS);
+			idle_params.hdr.length = cpu_to_le16(
+				sizeof(idle_params) - sizeof(idle_params.hdr));
+			idle_params.idle_timeout = 0;
+			idle_params.idle_paging_interval = 0;
+			args[argc++] = &idle_params.hdr;
+		} else {
+			idle_timeout.hdr.type =
+				cpu_to_le16(I2400M_TLV_CONFIG_IDLE_TIMEOUT);
+			idle_timeout.hdr.length = cpu_to_le16(
+				sizeof(idle_timeout) - sizeof(idle_timeout.hdr));
+			idle_timeout.timeout = 0;
+			args[argc++] = &idle_timeout.hdr;
+		}
+	}
+	if (i2400m_ge_v1_4(i2400m)) {
+		/* Enable extended RX data format? */
+		df.hdr.type =
+			cpu_to_le16(I2400M_TLV_CONFIG_D2H_DATA_FORMAT);
+		df.hdr.length = cpu_to_le16(
+			sizeof(df) - sizeof(df.hdr));
+		df.format = 1;
+		args[argc++] = &df.hdr;
+
+		/* Enable RX data reordering?
+		 * (switch flipped in rx.c:i2400m_rx_setup() after fw upload) */
+		if (i2400m->rx_reorder) {
+			dlhr.hdr.type =
+				cpu_to_le16(I2400M_TLV_CONFIG_DL_HOST_REORDER);
+			dlhr.hdr.length = cpu_to_le16(
+				sizeof(dlhr) - sizeof(dlhr.hdr));
+			dlhr.reorder = 1;
+			args[argc++] = &dlhr.hdr;
+		}
+	}
+	result = i2400m_set_init_config(i2400m, args, argc);
+	if (result < 0)
+		goto error;
+out_passive:
+	/*
+	 * Update state: Here it just calls a get state; parsing the
+	 * result (System State TLV and RF Status TLV [done in the rx
+	 * path hooks]) will set the hardware and software RF-Kill
+	 * status.
+	 */
+	result = i2400m_cmd_get_state(i2400m);
+error:
+	if (result < 0)
+		dev_err(dev, "failed to initialize the device: %d\n", result);
+	d_fnend(3, dev, "(i2400m %p) = %d\n", i2400m, result);
+	return result;
+}
+
+
+/**
+ * i2400m_dev_shutdown - Shutdown a running device
+ *
+ * @i2400m: device descriptor
+ *
+ * Release resources acquired during the running of the device; in
+ * theory, should also tell the device to go to sleep, switch off the
+ * radio, all that, but at this point, in most cases (driver
+ * disconnection, reset handling) we can't even talk to the device.
+ */
+void i2400m_dev_shutdown(struct i2400m *i2400m)
+{
+	struct device *dev = i2400m_dev(i2400m);
+
+	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
+	d_fnend(3, dev, "(i2400m %p) = void\n", i2400m);
+}
diff --git a/drivers/staging/wimax/i2400m/debug-levels.h b/drivers/staging/wimax/i2400m/debug-levels.h
new file mode 100644
index 000000000000..a317e9fbb734
--- /dev/null
+++ b/drivers/staging/wimax/i2400m/debug-levels.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Intel Wireless WiMAX Connection 2400m
+ * Debug levels control file for the i2400m module
+ *
+ * Copyright (C) 2007-2008 Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ */
+#ifndef __debug_levels__h__
+#define __debug_levels__h__
+
+/* Maximum compile and run time debug level for all submodules */
+#define D_MODULENAME i2400m
+#define D_MASTER CONFIG_WIMAX_I2400M_DEBUG_LEVEL
+
+#include "../linux-wimax-debug.h"
+
+/* List of all the enabled modules */
+enum d_module {
+	D_SUBMODULE_DECLARE(control),
+	D_SUBMODULE_DECLARE(driver),
+	D_SUBMODULE_DECLARE(debugfs),
+	D_SUBMODULE_DECLARE(fw),
+	D_SUBMODULE_DECLARE(netdev),
+	D_SUBMODULE_DECLARE(rfkill),
+	D_SUBMODULE_DECLARE(rx),
+	D_SUBMODULE_DECLARE(sysfs),
+	D_SUBMODULE_DECLARE(tx),
+};
+
+
+#endif /* #ifndef __debug_levels__h__ */
diff --git a/drivers/staging/wimax/i2400m/debugfs.c b/drivers/staging/wimax/i2400m/debugfs.c
new file mode 100644
index 000000000000..1c640b41ea4c
--- /dev/null
+++ b/drivers/staging/wimax/i2400m/debugfs.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Intel Wireless WiMAX Connection 2400m
+ * Debugfs interfaces to manipulate driver and device information
+ *
+ * Copyright (C) 2007 Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ */
+
+#include <linux/debugfs.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/export.h>
+#include "i2400m.h"
+
+
+#define D_SUBMODULE debugfs
+#include "debug-levels.h"
+
+static
+int debugfs_netdev_queue_stopped_get(void *data, u64 *val)
+{
+	struct i2400m *i2400m = data;
+	*val = netif_queue_stopped(i2400m->wimax_dev.net_dev);
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(fops_netdev_queue_stopped,
+			debugfs_netdev_queue_stopped_get,
+			NULL, "%llu\n");
+
+/*
+ * We don't allow partial reads of this file, as then the reader would
+ * get weirdly confused data as it is updated.
+ *
+ * So or you read it all or nothing; if you try to read with an offset
+ * != 0, we consider you are done reading.
+ */
+static
+ssize_t i2400m_rx_stats_read(struct file *filp, char __user *buffer,
+			     size_t count, loff_t *ppos)
+{
+	struct i2400m *i2400m = filp->private_data;
+	char buf[128];
+	unsigned long flags;
+
+	if (*ppos != 0)
+		return 0;
+	if (count < sizeof(buf))
+		return -ENOSPC;
+	spin_lock_irqsave(&i2400m->rx_lock, flags);
+	snprintf(buf, sizeof(buf), "%u %u %u %u %u %u %u\n",
+		 i2400m->rx_pl_num, i2400m->rx_pl_min,
+		 i2400m->rx_pl_max, i2400m->rx_num,
+		 i2400m->rx_size_acc,
+		 i2400m->rx_size_min, i2400m->rx_size_max);
+	spin_unlock_irqrestore(&i2400m->rx_lock, flags);
+	return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
+}
+
+
+/* Any write clears the stats */
+static
+ssize_t i2400m_rx_stats_write(struct file *filp, const char __user *buffer,
+			      size_t count, loff_t *ppos)
+{
+	struct i2400m *i2400m = filp->private_data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&i2400m->rx_lock, flags);
+	i2400m->rx_pl_num = 0;
+	i2400m->rx_pl_max = 0;
+	i2400m->rx_pl_min = UINT_MAX;
+	i2400m->rx_num = 0;
+	i2400m->rx_size_acc = 0;
+	i2400m->rx_size_min = UINT_MAX;
+	i2400m->rx_size_max = 0;
+	spin_unlock_irqrestore(&i2400m->rx_lock, flags);
+	return count;
+}
+
+static
+const struct file_operations i2400m_rx_stats_fops = {
+	.owner =	THIS_MODULE,
+	.open =		simple_open,
+	.read =		i2400m_rx_stats_read,
+	.write =	i2400m_rx_stats_write,
+	.llseek =	default_llseek,
+};
+
+
+/* See i2400m_rx_stats_read() */
+static
+ssize_t i2400m_tx_stats_read(struct file *filp, char __user *buffer,
+			     size_t count, loff_t *ppos)
+{
+	struct i2400m *i2400m = filp->private_data;
+	char buf[128];
+	unsigned long flags;
+
+	if (*ppos != 0)
+		return 0;
+	if (count < sizeof(buf))
+		return -ENOSPC;
+	spin_lock_irqsave(&i2400m->tx_lock, flags);
+	snprintf(buf, sizeof(buf), "%u %u %u %u %u %u %u\n",
+		 i2400m->tx_pl_num, i2400m->tx_pl_min,
+		 i2400m->tx_pl_max, i2400m->tx_num,
+		 i2400m->tx_size_acc,
+		 i2400m->tx_size_min, i2400m->tx_size_max);
+	spin_unlock_irqrestore(&i2400m->tx_lock, flags);
+	return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
+}
+
+/* Any write clears the stats */
+static
+ssize_t i2400m_tx_stats_write(struct file *filp, const char __user *buffer,
+			      size_t count, loff_t *ppos)
+{
+	struct i2400m *i2400m = filp->private_data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&i2400m->tx_lock, flags);
+	i2400m->tx_pl_num = 0;
+	i2400m->tx_pl_max = 0;
+	i2400m->tx_pl_min = UINT_MAX;
+	i2400m->tx_num = 0;
+	i2400m->tx_size_acc = 0;
+	i2400m->tx_size_min = UINT_MAX;
+	i2400m->tx_size_max = 0;
+	spin_unlock_irqrestore(&i2400m->tx_lock, flags);
+	return count;
+}
+
+static
+const struct file_operations i2400m_tx_stats_fops = {
+	.owner =	THIS_MODULE,
+	.open =		simple_open,
+	.read =		i2400m_tx_stats_read,
+	.write =	i2400m_tx_stats_write,
+	.llseek =	default_llseek,
+};
+
+
+/* Write 1 to ask the device to go into suspend */
+static
+int debugfs_i2400m_suspend_set(void *data, u64 val)
+{
+	int result;
+	struct i2400m *i2400m = data;
+	result = i2400m_cmd_enter_powersave(i2400m);
+	if (result >= 0)
+		result = 0;
+	return result;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(fops_i2400m_suspend,
+			NULL, debugfs_i2400m_suspend_set,
+			"%llu\n");
+
+/*
+ * Reset the device
+ *
+ * Write 0 to ask the device to soft reset, 1 to cold reset, 2 to bus
+ * reset (as defined by enum i2400m_reset_type).
+ */
+static
+int debugfs_i2400m_reset_set(void *data, u64 val)
+{
+	int result;
+	struct i2400m *i2400m = data;
+	enum i2400m_reset_type rt = val;
+	switch(rt) {
+	case I2400M_RT_WARM:
+	case I2400M_RT_COLD:
+	case I2400M_RT_BUS:
+		result = i2400m_reset(i2400m, rt);
+		if (result >= 0)
+			result = 0;
+		break;
+	default:
+		result = -EINVAL;
+	}
+	return result;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(fops_i2400m_reset,
+			NULL, debugfs_i2400m_reset_set,
+			"%llu\n");
+
+void i2400m_debugfs_add(struct i2400m *i2400m)
+{
+	struct dentry *dentry = i2400m->wimax_dev.debugfs_dentry;
+
+	dentry = debugfs_create_dir("i2400m", dentry);
+	i2400m->debugfs_dentry = dentry;
+
+	d_level_register_debugfs("dl_", control, dentry);
+	d_level_register_debugfs("dl_", driver, dentry);
+	d_level_register_debugfs("dl_", debugfs, dentry);
+	d_level_register_debugfs("dl_", fw, dentry);
+	d_level_register_debugfs("dl_", netdev, dentry);
+	d_level_register_debugfs("dl_", rfkill, dentry);
+	d_level_register_debugfs("dl_", rx, dentry);
+	d_level_register_debugfs("dl_", tx, dentry);
+
+	debugfs_create_size_t("tx_in", 0400, dentry, &i2400m->tx_in);
+	debugfs_create_size_t("tx_out", 0400, dentry, &i2400m->tx_out);
+	debugfs_create_u32("state", 0600, dentry, &i2400m->state);
+
+	/*
+	 * Trace received messages from user space
+	 *
+	 * In order to tap the bidirectional message stream in the
+	 * 'msg' pipe, user space can read from the 'msg' pipe;
+	 * however, due to limitations in libnl, we can't know what
+	 * the different applications are sending down to the kernel.
+	 *
+	 * So we have this hack where the driver will echo any message
+	 * received on the msg pipe from user space [through a call to
+	 * wimax_dev->op_msg_from_user() into
+	 * i2400m_op_msg_from_user()] into the 'trace' pipe that this
+	 * driver creates.
+	 *
+	 * So then, reading from both the 'trace' and 'msg' pipes in
+	 * user space will provide a full dump of the traffic.
+	 *
+	 * Write 1 to activate, 0 to clear.
+	 *
+	 * It is not really very atomic, but it is also not too
+	 * critical.
+	 */
+	debugfs_create_u8("trace_msg_from_user", 0600, dentry,
+			  &i2400m->trace_msg_from_user);
+
+	debugfs_create_file("netdev_queue_stopped", 0400, dentry, i2400m,
+			    &fops_netdev_queue_stopped);
+
+	debugfs_create_file("rx_stats", 0600, dentry, i2400m,
+			    &i2400m_rx_stats_fops);
+
+	debugfs_create_file("tx_stats", 0600, dentry, i2400m,
+			    &i2400m_tx_stats_fops);
+
+	debugfs_create_file("suspend", 0200, dentry, i2400m,
+			    &fops_i2400m_suspend);
+
+	debugfs_create_file("reset", 0200, dentry, i2400m, &fops_i2400m_reset);
+}
+
+void i2400m_debugfs_rm(struct i2400m *i2400m)
+{
+	debugfs_remove_recursive(i2400m->debugfs_dentry);
+}
diff --git a/drivers/staging/wimax/i2400m/driver.c b/drivers/staging/wimax/i2400m/driver.c
new file mode 100644
index 000000000000..dc8939ff78c0
--- /dev/null
+++ b/drivers/staging/wimax/i2400m/driver.c
@@ -0,0 +1,1002 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Intel Wireless WiMAX Connection 2400m
+ * Generic probe/disconnect, reset and message passing
+ *
+ * Copyright (C) 2007-2008 Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * See i2400m.h for driver documentation. This contains helpers for
+ * the driver model glue [_setup()/_release()], handling device resets
+ * [_dev_reset_handle()], and the backends for the WiMAX stack ops
+ * reset [_op_reset()] and message from user [_op_msg_from_user()].
+ *
+ * ROADMAP:
+ *
+ * i2400m_op_msg_from_user()
+ *   i2400m_msg_to_dev()
+ *   wimax_msg_to_user_send()
+ *
+ * i2400m_op_reset()
+ *   i240m->bus_reset()
+ *
+ * i2400m_dev_reset_handle()
+ *   __i2400m_dev_reset_handle()
+ *     __i2400m_dev_stop()
+ *     __i2400m_dev_start()
+ *
+ * i2400m_setup()
+ *   i2400m->bus_setup()
+ *   i2400m_bootrom_init()
+ *   register_netdev()
+ *   wimax_dev_add()
+ *   i2400m_dev_start()
+ *     __i2400m_dev_start()
+ *       i2400m_dev_bootstrap()
+ *       i2400m_tx_setup()
+ *       i2400m->bus_dev_start()
+ *       i2400m_firmware_check()
+ *       i2400m_check_mac_addr()
+ *
+ * i2400m_release()
+ *   i2400m_dev_stop()
+ *     __i2400m_dev_stop()
+ *       i2400m_dev_shutdown()
+ *       i2400m->bus_dev_stop()
+ *       i2400m_tx_release()
+ *   i2400m->bus_release()
+ *   wimax_dev_rm()
+ *   unregister_netdev()
+ */
+#include "i2400m.h"
+#include <linux/etherdevice.h>
+#include "linux-wimax-i2400m.h"
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/suspend.h>
+#include <linux/slab.h>
+
+#define D_SUBMODULE driver
+#include "debug-levels.h"
+
+
+static char i2400m_debug_params[128];
+module_param_string(debug, i2400m_debug_params, sizeof(i2400m_debug_params),
+		    0644);
+MODULE_PARM_DESC(debug,
+		 "String of space-separated NAME:VALUE pairs, where NAMEs "
+		 "are the different debug submodules and VALUE are the "
+		 "initial debug value to set.");
+
+static char i2400m_barkers_params[128];
+module_param_string(barkers, i2400m_barkers_params,
+		    sizeof(i2400m_barkers_params), 0644);
+MODULE_PARM_DESC(barkers,
+		 "String of comma-separated 32-bit values; each is "
+		 "recognized as the value the device sends as a reboot "
+		 "signal; values are appended to a list--setting one value "
+		 "as zero cleans the existing list and starts a new one.");
+
+/*
+ * WiMAX stack operation: relay a message from user space
+ *
+ * @wimax_dev: device descriptor
+ * @pipe_name: named pipe the message is for
+ * @msg_buf: pointer to the message bytes
+ * @msg_len: length of the buffer
+ * @genl_info: passed by the generic netlink layer
+ *
+ * The WiMAX stack will call this function when a message was received
+ * from user space.
+ *
+ * For the i2400m, this is an L3L4 message, as specified in
+ * include/linux/wimax/i2400m.h, and thus prefixed with a 'struct
+ * i2400m_l3l4_hdr'. Driver (and device) expect the messages to be
+ * coded in Little Endian.
+ *
+ * This function just verifies that the header declaration and the
+ * payload are consistent and then deals with it, either forwarding it
+ * to the device or procesing it locally.
+ *
+ * In the i2400m, messages are basically commands that will carry an
+ * ack, so we use i2400m_msg_to_dev() and then deliver the ack back to
+ * user space. The rx.c code might intercept the response and use it
+ * to update the driver's state, but then it will pass it on so it can
+ * be relayed back to user space.
+ *
+ * Note that asynchronous events from the device are processed and
+ * sent to user space in rx.c.
+ */
+static
+int i2400m_op_msg_from_user(struct wimax_dev *wimax_dev,
+			    const char *pipe_name,
+			    const void *msg_buf, size_t msg_len,
+			    const struct genl_info *genl_info)
+{
+	int result;
+	struct i2400m *i2400m = wimax_dev_to_i2400m(wimax_dev);
+	struct device *dev = i2400m_dev(i2400m);
+	struct sk_buff *ack_skb;
+
+	d_fnstart(4, dev, "(wimax_dev %p [i2400m %p] msg_buf %p "
+		  "msg_len %zu genl_info %p)\n", wimax_dev, i2400m,
+		  msg_buf, msg_len, genl_info);
+	ack_skb = i2400m_msg_to_dev(i2400m, msg_buf, msg_len);
+	result = PTR_ERR(ack_skb);
+	if (IS_ERR(ack_skb))
+		goto error_msg_to_dev;
+	result = wimax_msg_send(&i2400m->wimax_dev, ack_skb);
+error_msg_to_dev:
+	d_fnend(4, dev, "(wimax_dev %p [i2400m %p] msg_buf %p msg_len %zu "
+		"genl_info %p) = %d\n", wimax_dev, i2400m, msg_buf, msg_len,
+		genl_info, result);
+	return result;
+}
+
+
+/*
+ * Context to wait for a reset to finalize
+ */
+struct i2400m_reset_ctx {
+	struct completion completion;
+	int result;
+};
+
+
+/*
+ * WiMAX stack operation: reset a device
+ *
+ * @wimax_dev: device descriptor
+ *
+ * See the documentation for wimax_reset() and wimax_dev->op_reset for
+ * the requirements of this function. The WiMAX stack guarantees
+ * serialization on calls to this function.
+ *
+ * Do a warm reset on the device; if it fails, resort to a cold reset
+ * and return -ENODEV. On successful warm reset, we need to block
+ * until it is complete.
+ *
+ * The bus-driver implementation of reset takes care of falling back
+ * to cold reset if warm fails.
+ */
+static
+int i2400m_op_reset(struct wimax_dev *wimax_dev)
+{
+	int result;
+	struct i2400m *i2400m = wimax_dev_to_i2400m(wimax_dev);
+	struct device *dev = i2400m_dev(i2400m);
+	struct i2400m_reset_ctx ctx = {
+		.completion = COMPLETION_INITIALIZER_ONSTACK(ctx.completion),
+		.result = 0,
+	};
+
+	d_fnstart(4, dev, "(wimax_dev %p)\n", wimax_dev);
+	mutex_lock(&i2400m->init_mutex);
+	i2400m->reset_ctx = &ctx;
+	mutex_unlock(&i2400m->init_mutex);
+	result = i2400m_reset(i2400m, I2400M_RT_WARM);
+	if (result < 0)
+		goto out;
+	result = wait_for_completion_timeout(&ctx.completion, 4*HZ);
+	if (result == 0)
+		result = -ETIMEDOUT;
+	else if (result > 0)
+		result = ctx.result;
+	/* if result < 0, pass it on */
+	mutex_lock(&i2400m->init_mutex);
+	i2400m->reset_ctx = NULL;
+	mutex_unlock(&i2400m->init_mutex);
+out:
+	d_fnend(4, dev, "(wimax_dev %p) = %d\n", wimax_dev, result);
+	return result;
+}
+
+
+/*
+ * Check the MAC address we got from boot mode is ok
+ *
+ * @i2400m: device descriptor
+ *
+ * Returns: 0 if ok, < 0 errno code on error.
+ */
+static
+int i2400m_check_mac_addr(struct i2400m *i2400m)
+{
+	int result;
+	struct device *dev = i2400m_dev(i2400m);
+	struct sk_buff *skb;
+	const struct i2400m_tlv_detailed_device_info *ddi;
+	struct net_device *net_dev = i2400m->wimax_dev.net_dev;
+
+	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
+	skb = i2400m_get_device_info(i2400m);
+	if (IS_ERR(skb)) {
+		result = PTR_ERR(skb);
+		dev_err(dev, "Cannot verify MAC address, error reading: %d\n",
+			result);
+		goto error;
+	}
+	/* Extract MAC address */
+	ddi = (void *) skb->data;
+	BUILD_BUG_ON(ETH_ALEN != sizeof(ddi->mac_address));
+	d_printf(2, dev, "GET DEVICE INFO: mac addr %pM\n",
+		 ddi->mac_address);
+	if (!memcmp(net_dev->perm_addr, ddi->mac_address,
+		   sizeof(ddi->mac_address)))
+		goto ok;
+	dev_warn(dev, "warning: device reports a different MAC address "
+		 "to that of boot mode's\n");
+	dev_warn(dev, "device reports     %pM\n", ddi->mac_address);
+	dev_warn(dev, "boot mode reported %pM\n", net_dev->perm_addr);
+	if (is_zero_ether_addr(ddi->mac_address))
+		dev_err(dev, "device reports an invalid MAC address, "
+			"not updating\n");
+	else {
+		dev_warn(dev, "updating MAC address\n");
+		net_dev->addr_len = ETH_ALEN;
+		memcpy(net_dev->perm_addr, ddi->mac_address, ETH_ALEN);
+		memcpy(net_dev->dev_addr, ddi->mac_address, ETH_ALEN);
+	}
+ok:
+	result = 0;
+	kfree_skb(skb);
+error:
+	d_fnend(3, dev, "(i2400m %p) = %d\n", i2400m, result);
+	return result;
+}
+
+
+/**
+ * __i2400m_dev_start - Bring up driver communication with the device
+ *
+ * @i2400m: device descriptor
+ * @flags: boot mode flags
+ *
+ * Returns: 0 if ok, < 0 errno code on error.
+ *
+ * Uploads firmware and brings up all the resources needed to be able
+ * to communicate with the device.
+ *
+ * The workqueue has to be setup early, at least before RX handling
+ * (it's only real user for now) so it can process reports as they
+ * arrive. We also want to destroy it if we retry, to make sure it is
+ * flushed...easier like this.
+ *
+ * TX needs to be setup before the bus-specific code (otherwise on
+ * shutdown, the bus-tx code could try to access it).
+ */
+static
+int __i2400m_dev_start(struct i2400m *i2400m, enum i2400m_bri flags)
+{
+	int result;
+	struct wimax_dev *wimax_dev = &i2400m->wimax_dev;
+	struct net_device *net_dev = wimax_dev->net_dev;
+	struct device *dev = i2400m_dev(i2400m);
+	int times = i2400m->bus_bm_retries;
+
+	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
+retry:
+	result = i2400m_dev_bootstrap(i2400m, flags);
+	if (result < 0) {
+		dev_err(dev, "cannot bootstrap device: %d\n", result);
+		goto error_bootstrap;
+	}
+	result = i2400m_tx_setup(i2400m);
+	if (result < 0)
+		goto error_tx_setup;
+	result = i2400m_rx_setup(i2400m);
+	if (result < 0)
+		goto error_rx_setup;
+	i2400m->work_queue = create_singlethread_workqueue(wimax_dev->name);
+	if (i2400m->work_queue == NULL) {
+		result = -ENOMEM;
+		dev_err(dev, "cannot create workqueue\n");
+		goto error_create_workqueue;
+	}
+	if (i2400m->bus_dev_start) {
+		result = i2400m->bus_dev_start(i2400m);
+		if (result < 0)
+			goto error_bus_dev_start;
+	}
+	i2400m->ready = 1;
+	wmb();		/* see i2400m->ready's documentation  */
+	/* process pending reports from the device */
+	queue_work(i2400m->work_queue, &i2400m->rx_report_ws);
+	result = i2400m_firmware_check(i2400m);	/* fw versions ok? */
+	if (result < 0)
+		goto error_fw_check;
+	/* At this point is ok to send commands to the device */
+	result = i2400m_check_mac_addr(i2400m);
+	if (result < 0)
+		goto error_check_mac_addr;
+	result = i2400m_dev_initialize(i2400m);
+	if (result < 0)
+		goto error_dev_initialize;
+
+	/* We don't want any additional unwanted error recovery triggered
+	 * from any other context so if anything went wrong before we come
+	 * here, let's keep i2400m->error_recovery untouched and leave it to
+	 * dev_reset_handle(). See dev_reset_handle(). */
+
+	atomic_dec(&i2400m->error_recovery);
+	/* Every thing works so far, ok, now we are ready to
+	 * take error recovery if it's required. */
+
+	/* At this point, reports will come for the device and set it
+	 * to the right state if it is different than UNINITIALIZED */
+	d_fnend(3, dev, "(net_dev %p [i2400m %p]) = %d\n",
+		net_dev, i2400m, result);
+	return result;
+
+error_dev_initialize:
+error_check_mac_addr:
+error_fw_check:
+	i2400m->ready = 0;
+	wmb();		/* see i2400m->ready's documentation  */
+	flush_workqueue(i2400m->work_queue);
+	if (i2400m->bus_dev_stop)
+		i2400m->bus_dev_stop(i2400m);
+error_bus_dev_start:
+	destroy_workqueue(i2400m->work_queue);
+error_create_workqueue:
+	i2400m_rx_release(i2400m);
+error_rx_setup:
+	i2400m_tx_release(i2400m);
+error_tx_setup:
+error_bootstrap:
+	if (result == -EL3RST && times-- > 0) {
+		flags = I2400M_BRI_SOFT|I2400M_BRI_MAC_REINIT;
+		goto retry;
+	}
+	d_fnend(3, dev, "(net_dev %p [i2400m %p]) = %d\n",
+		net_dev, i2400m, result);
+	return result;
+}
+
+
+static
+int i2400m_dev_start(struct i2400m *i2400m, enum i2400m_bri bm_flags)
+{
+	int result = 0;
+	mutex_lock(&i2400m->init_mutex);	/* Well, start the device */
+	if (i2400m->updown == 0) {
+		result = __i2400m_dev_start(i2400m, bm_flags);
+		if (result >= 0) {
+			i2400m->updown = 1;
+			i2400m->alive = 1;
+			wmb();/* see i2400m->updown and i2400m->alive's doc */
+		}
+	}
+	mutex_unlock(&i2400m->init_mutex);
+	return result;
+}
+
+
+/**
+ * i2400m_dev_stop - Tear down driver communication with the device
+ *
+ * @i2400m: device descriptor
+ *
+ * Returns: 0 if ok, < 0 errno code on error.
+ *
+ * Releases all the resources allocated to communicate with the
+ * device. Note we cannot destroy the workqueue earlier as until RX is
+ * fully destroyed, it could still try to schedule jobs.
+ */
+static
+void __i2400m_dev_stop(struct i2400m *i2400m)
+{
+	struct wimax_dev *wimax_dev = &i2400m->wimax_dev;
+	struct device *dev = i2400m_dev(i2400m);
+
+	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
+	wimax_state_change(wimax_dev, __WIMAX_ST_QUIESCING);
+	i2400m_msg_to_dev_cancel_wait(i2400m, -EL3RST);
+	complete(&i2400m->msg_completion);
+	i2400m_net_wake_stop(i2400m);
+	i2400m_dev_shutdown(i2400m);
+	/*
+	 * Make sure no report hooks are running *before* we stop the
+	 * communication infrastructure with the device.
+	 */
+	i2400m->ready = 0;	/* nobody can queue work anymore */
+	wmb();		/* see i2400m->ready's documentation  */
+	flush_workqueue(i2400m->work_queue);
+
+	if (i2400m->bus_dev_stop)
+		i2400m->bus_dev_stop(i2400m);
+	destroy_workqueue(i2400m->work_queue);
+	i2400m_rx_release(i2400m);
+	i2400m_tx_release(i2400m);
+	wimax_state_change(wimax_dev, WIMAX_ST_DOWN);
+	d_fnend(3, dev, "(i2400m %p) = 0\n", i2400m);
+}
+
+
+/*
+ * Watch out -- we only need to stop if there is a need for it. The
+ * device could have reset itself and failed to come up again (see
+ * _i2400m_dev_reset_handle()).
+ */
+static
+void i2400m_dev_stop(struct i2400m *i2400m)
+{
+	mutex_lock(&i2400m->init_mutex);
+	if (i2400m->updown) {
+		__i2400m_dev_stop(i2400m);
+		i2400m->updown = 0;
+		i2400m->alive = 0;
+		wmb();	/* see i2400m->updown and i2400m->alive's doc */
+	}
+	mutex_unlock(&i2400m->init_mutex);
+}
+
+
+/*
+ * Listen to PM events to cache the firmware before suspend/hibernation
+ *
+ * When the device comes out of suspend, it might go into reset and
+ * firmware has to be uploaded again. At resume, most of the times, we
+ * can't load firmware images from disk, so we need to cache it.
+ *
+ * i2400m_fw_cache() will allocate a kobject and attach the firmware
+ * to it; that way we don't have to worry too much about the fw loader
+ * hitting a race condition.
+ *
+ * Note: modus operandi stolen from the Orinoco driver; thx.
+ */
+static
+int i2400m_pm_notifier(struct notifier_block *notifier,
+		       unsigned long pm_event,
+		       void *unused)
+{
+	struct i2400m *i2400m =
+		container_of(notifier, struct i2400m, pm_notifier);
+	struct device *dev = i2400m_dev(i2400m);
+
+	d_fnstart(3, dev, "(i2400m %p pm_event %lx)\n", i2400m, pm_event);
+	switch (pm_event) {
+	case PM_HIBERNATION_PREPARE:
+	case PM_SUSPEND_PREPARE:
+		i2400m_fw_cache(i2400m);
+		break;
+	case PM_POST_RESTORE:
+		/* Restore from hibernation failed. We need to clean
+		 * up in exactly the same way, so fall through. */
+	case PM_POST_HIBERNATION:
+	case PM_POST_SUSPEND:
+		i2400m_fw_uncache(i2400m);
+		break;
+
+	case PM_RESTORE_PREPARE:
+	default:
+		break;
+	}
+	d_fnend(3, dev, "(i2400m %p pm_event %lx) = void\n", i2400m, pm_event);
+	return NOTIFY_DONE;
+}
+
+
+/*
+ * pre-reset is called before a device is going on reset
+ *
+ * This has to be followed by a call to i2400m_post_reset(), otherwise
+ * bad things might happen.
+ */
+int i2400m_pre_reset(struct i2400m *i2400m)
+{
+	struct device *dev = i2400m_dev(i2400m);
+
+	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
+	d_printf(1, dev, "pre-reset shut down\n");
+
+	mutex_lock(&i2400m->init_mutex);
+	if (i2400m->updown) {
+		netif_tx_disable(i2400m->wimax_dev.net_dev);
+		__i2400m_dev_stop(i2400m);
+		/* down't set updown to zero -- this way
+		 * post_reset can restore properly */
+	}
+	mutex_unlock(&i2400m->init_mutex);
+	if (i2400m->bus_release)
+		i2400m->bus_release(i2400m);
+	d_fnend(3, dev, "(i2400m %p) = 0\n", i2400m);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(i2400m_pre_reset);
+
+
+/*
+ * Restore device state after a reset
+ *
+ * Do the work needed after a device reset to bring it up to the same
+ * state as it was before the reset.
+ *
+ * NOTE: this requires i2400m->init_mutex taken
+ */
+int i2400m_post_reset(struct i2400m *i2400m)
+{
+	int result = 0;
+	struct device *dev = i2400m_dev(i2400m);
+
+	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
+	d_printf(1, dev, "post-reset start\n");
+	if (i2400m->bus_setup) {
+		result = i2400m->bus_setup(i2400m);
+		if (result < 0) {
+			dev_err(dev, "bus-specific setup failed: %d\n",
+				result);
+			goto error_bus_setup;
+		}
+	}
+	mutex_lock(&i2400m->init_mutex);
+	if (i2400m->updown) {
+		result = __i2400m_dev_start(
+			i2400m, I2400M_BRI_SOFT | I2400M_BRI_MAC_REINIT);
+		if (result < 0)
+			goto error_dev_start;
+	}
+	mutex_unlock(&i2400m->init_mutex);
+	d_fnend(3, dev, "(i2400m %p) = %d\n", i2400m, result);
+	return result;
+
+error_dev_start:
+	if (i2400m->bus_release)
+		i2400m->bus_release(i2400m);
+	/* even if the device was up, it could not be recovered, so we
+	 * mark it as down. */
+	i2400m->updown = 0;
+	wmb();		/* see i2400m->updown's documentation  */
+	mutex_unlock(&i2400m->init_mutex);
+error_bus_setup:
+	d_fnend(3, dev, "(i2400m %p) = %d\n", i2400m, result);
+	return result;
+}
+EXPORT_SYMBOL_GPL(i2400m_post_reset);
+
+
+/*
+ * The device has rebooted; fix up the device and the driver
+ *
+ * Tear down the driver communication with the device, reload the
+ * firmware and reinitialize the communication with the device.
+ *
+ * If someone calls a reset when the device's firmware is down, in
+ * theory we won't see it because we are not listening. However, just
+ * in case, leave the code to handle it.
+ *
+ * If there is a reset context, use it; this means someone is waiting
+ * for us to tell him when the reset operation is complete and the
+ * device is ready to rock again.
+ *
+ * NOTE: if we are in the process of bringing up or down the
+ *       communication with the device [running i2400m_dev_start() or
+ *       _stop()], don't do anything, let it fail and handle it.
+ *
+ * This function is ran always in a thread context
+ *
+ * This function gets passed, as payload to i2400m_work() a 'const
+ * char *' ptr with a "reason" why the reset happened (for messages).
+ */
+static
+void __i2400m_dev_reset_handle(struct work_struct *ws)
+{
+	struct i2400m *i2400m = container_of(ws, struct i2400m, reset_ws);
+	const char *reason = i2400m->reset_reason;
+	struct device *dev = i2400m_dev(i2400m);
+	struct i2400m_reset_ctx *ctx = i2400m->reset_ctx;
+	int result;
+
+	d_fnstart(3, dev, "(ws %p i2400m %p reason %s)\n", ws, i2400m, reason);
+
+	i2400m->boot_mode = 1;
+	wmb();		/* Make sure i2400m_msg_to_dev() sees boot_mode */
+
+	result = 0;
+	if (mutex_trylock(&i2400m->init_mutex) == 0) {
+		/* We are still in i2400m_dev_start() [let it fail] or
+		 * i2400m_dev_stop() [we are shutting down anyway, so
+		 * ignore it] or we are resetting somewhere else. */
+		dev_err(dev, "device rebooted somewhere else?\n");
+		i2400m_msg_to_dev_cancel_wait(i2400m, -EL3RST);
+		complete(&i2400m->msg_completion);
+		goto out;
+	}
+
+	dev_err(dev, "%s: reinitializing driver\n", reason);
+	rmb();
+	if (i2400m->updown) {
+		__i2400m_dev_stop(i2400m);
+		i2400m->updown = 0;
+		wmb();		/* see i2400m->updown's documentation  */
+	}
+
+	if (i2400m->alive) {
+		result = __i2400m_dev_start(i2400m,
+				    I2400M_BRI_SOFT | I2400M_BRI_MAC_REINIT);
+		if (result < 0) {
+			dev_err(dev, "%s: cannot start the device: %d\n",
+				reason, result);
+			result = -EUCLEAN;
+			if (atomic_read(&i2400m->bus_reset_retries)
+					>= I2400M_BUS_RESET_RETRIES) {
+				result = -ENODEV;
+				dev_err(dev, "tried too many times to "
+					"reset the device, giving up\n");
+			}
+		}
+	}
+
+	if (i2400m->reset_ctx) {
+		ctx->result = result;
+		complete(&ctx->completion);
+	}
+	mutex_unlock(&i2400m->init_mutex);
+	if (result == -EUCLEAN) {
+		/*
+		 * We come here because the reset during operational mode
+		 * wasn't successfully done and need to proceed to a bus
+		 * reset. For the dev_reset_handle() to be able to handle
+		 * the reset event later properly, we restore boot_mode back
+		 * to the state before previous reset. ie: just like we are
+		 * issuing the bus reset for the first time
+		 */
+		i2400m->boot_mode = 0;
+		wmb();
+
+		atomic_inc(&i2400m->bus_reset_retries);
+		/* ops, need to clean up [w/ init_mutex not held] */
+		result = i2400m_reset(i2400m, I2400M_RT_BUS);
+		if (result >= 0)
+			result = -ENODEV;
+	} else {
+		rmb();
+		if (i2400m->alive) {
+			/* great, we expect the device state up and
+			 * dev_start() actually brings the device state up */
+			i2400m->updown = 1;
+			wmb();
+			atomic_set(&i2400m->bus_reset_retries, 0);
+		}
+	}
+out:
+	d_fnend(3, dev, "(ws %p i2400m %p reason %s) = void\n",
+		ws, i2400m, reason);
+}
+
+
+/**
+ * i2400m_dev_reset_handle - Handle a device's reset in a thread context
+ *
+ * Schedule a device reset handling out on a thread context, so it
+ * is safe to call from atomic context. We can't use the i2400m's
+ * queue as we are going to destroy it and reinitialize it as part of
+ * the driver bringup/bringup process.
+ *
+ * See __i2400m_dev_reset_handle() for details; that takes care of
+ * reinitializing the driver to handle the reset, calling into the
+ * bus-specific functions ops as needed.
+ */
+int i2400m_dev_reset_handle(struct i2400m *i2400m, const char *reason)
+{
+	i2400m->reset_reason = reason;
+	return schedule_work(&i2400m->reset_ws);
+}
+EXPORT_SYMBOL_GPL(i2400m_dev_reset_handle);
+
+
+ /*
+ * The actual work of error recovery.
+ *
+ * The current implementation of error recovery is to trigger a bus reset.
+ */
+static
+void __i2400m_error_recovery(struct work_struct *ws)
+{
+	struct i2400m *i2400m = container_of(ws, struct i2400m, recovery_ws);
+
+	i2400m_reset(i2400m, I2400M_RT_BUS);
+}
+
+/*
+ * Schedule a work struct for error recovery.
+ *
+ * The intention of error recovery is to bring back the device to some
+ * known state whenever TX sees -110 (-ETIMEOUT) on copying the data to
+ * the device. The TX failure could mean a device bus stuck, so the current
+ * error recovery implementation is to trigger a bus reset to the device
+ * and hopefully it can bring back the device.
+ *
+ * The actual work of error recovery has to be in a thread context because
+ * it is kicked off in the TX thread (i2400ms->tx_workqueue) which is to be
+ * destroyed by the error recovery mechanism (currently a bus reset).
+ *
+ * Also, there may be already a queue of TX works that all hit
+ * the -ETIMEOUT error condition because the device is stuck already.
+ * Since bus reset is used as the error recovery mechanism and we don't
+ * want consecutive bus resets simply because the multiple TX works
+ * in the queue all hit the same device erratum, the flag "error_recovery"
+ * is introduced for preventing unwanted consecutive bus resets.
+ *
+ * Error recovery shall only be invoked again if previous one was completed.
+ * The flag error_recovery is set when error recovery mechanism is scheduled,
+ * and is checked when we need to schedule another error recovery. If it is
+ * in place already, then we shouldn't schedule another one.
+ */
+void i2400m_error_recovery(struct i2400m *i2400m)
+{
+	if (atomic_add_return(1, &i2400m->error_recovery) == 1)
+		schedule_work(&i2400m->recovery_ws);
+	else
+		atomic_dec(&i2400m->error_recovery);
+}
+EXPORT_SYMBOL_GPL(i2400m_error_recovery);
+
+/*
+ * Alloc the command and ack buffers for boot mode
+ *
+ * Get the buffers needed to deal with boot mode messages.
+ */
+static
+int i2400m_bm_buf_alloc(struct i2400m *i2400m)
+{
+	i2400m->bm_cmd_buf = kzalloc(I2400M_BM_CMD_BUF_SIZE, GFP_KERNEL);
+	if (i2400m->bm_cmd_buf == NULL)
+		goto error_bm_cmd_kzalloc;
+	i2400m->bm_ack_buf = kzalloc(I2400M_BM_ACK_BUF_SIZE, GFP_KERNEL);
+	if (i2400m->bm_ack_buf == NULL)
+		goto error_bm_ack_buf_kzalloc;
+	return 0;
+
+error_bm_ack_buf_kzalloc:
+	kfree(i2400m->bm_cmd_buf);
+error_bm_cmd_kzalloc:
+	return -ENOMEM;
+}
+
+
+/*
+ * Free boot mode command and ack buffers.
+ */
+static
+void i2400m_bm_buf_free(struct i2400m *i2400m)
+{
+	kfree(i2400m->bm_ack_buf);
+	kfree(i2400m->bm_cmd_buf);
+}
+
+
+/**
+ * i2400m_init - Initialize a 'struct i2400m' from all zeroes
+ *
+ * This is a bus-generic API call.
+ */
+void i2400m_init(struct i2400m *i2400m)
+{
+	wimax_dev_init(&i2400m->wimax_dev);
+
+	i2400m->boot_mode = 1;
+	i2400m->rx_reorder = 1;
+	init_waitqueue_head(&i2400m->state_wq);
+
+	spin_lock_init(&i2400m->tx_lock);
+	i2400m->tx_pl_min = UINT_MAX;
+	i2400m->tx_size_min = UINT_MAX;
+
+	spin_lock_init(&i2400m->rx_lock);
+	i2400m->rx_pl_min = UINT_MAX;
+	i2400m->rx_size_min = UINT_MAX;
+	INIT_LIST_HEAD(&i2400m->rx_reports);
+	INIT_WORK(&i2400m->rx_report_ws, i2400m_report_hook_work);
+
+	mutex_init(&i2400m->msg_mutex);
+	init_completion(&i2400m->msg_completion);
+
+	mutex_init(&i2400m->init_mutex);
+	/* wake_tx_ws is initialized in i2400m_tx_setup() */
+
+	INIT_WORK(&i2400m->reset_ws, __i2400m_dev_reset_handle);
+	INIT_WORK(&i2400m->recovery_ws, __i2400m_error_recovery);
+
+	atomic_set(&i2400m->bus_reset_retries, 0);
+
+	i2400m->alive = 0;
+
+	/* initialize error_recovery to 1 for denoting we
+	 * are not yet ready to take any error recovery */
+	atomic_set(&i2400m->error_recovery, 1);
+}
+EXPORT_SYMBOL_GPL(i2400m_init);
+
+
+int i2400m_reset(struct i2400m *i2400m, enum i2400m_reset_type rt)
+{
+	struct net_device *net_dev = i2400m->wimax_dev.net_dev;
+
+	/*
+	 * Make sure we stop TXs and down the carrier before
+	 * resetting; this is needed to avoid things like
+	 * i2400m_wake_tx() scheduling stuff in parallel.
+	 */
+	if (net_dev->reg_state == NETREG_REGISTERED) {
+		netif_tx_disable(net_dev);
+		netif_carrier_off(net_dev);
+	}
+	return i2400m->bus_reset(i2400m, rt);
+}
+EXPORT_SYMBOL_GPL(i2400m_reset);
+
+
+/**
+ * i2400m_setup - bus-generic setup function for the i2400m device
+ *
+ * @i2400m: device descriptor (bus-specific parts have been initialized)
+ *
+ * Returns: 0 if ok, < 0 errno code on error.
+ *
+ * Sets up basic device comunication infrastructure, boots the ROM to
+ * read the MAC address, registers with the WiMAX and network stacks
+ * and then brings up the device.
+ */
+int i2400m_setup(struct i2400m *i2400m, enum i2400m_bri bm_flags)
+{
+	int result;
+	struct device *dev = i2400m_dev(i2400m);
+	struct wimax_dev *wimax_dev = &i2400m->wimax_dev;
+	struct net_device *net_dev = i2400m->wimax_dev.net_dev;
+
+	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
+
+	snprintf(wimax_dev->name, sizeof(wimax_dev->name),
+		 "i2400m-%s:%s", dev->bus->name, dev_name(dev));
+
+	result = i2400m_bm_buf_alloc(i2400m);
+	if (result < 0) {
+		dev_err(dev, "cannot allocate bootmode scratch buffers\n");
+		goto error_bm_buf_alloc;
+	}
+
+	if (i2400m->bus_setup) {
+		result = i2400m->bus_setup(i2400m);
+		if (result < 0) {
+			dev_err(dev, "bus-specific setup failed: %d\n",
+				result);
+			goto error_bus_setup;
+		}
+	}
+
+	result = i2400m_bootrom_init(i2400m, bm_flags);
+	if (result < 0) {
+		dev_err(dev, "read mac addr: bootrom init "
+			"failed: %d\n", result);
+		goto error_bootrom_init;
+	}
+	result = i2400m_read_mac_addr(i2400m);
+	if (result < 0)
+		goto error_read_mac_addr;
+	eth_random_addr(i2400m->src_mac_addr);
+
+	i2400m->pm_notifier.notifier_call = i2400m_pm_notifier;
+	register_pm_notifier(&i2400m->pm_notifier);
+
+	result = register_netdev(net_dev);	/* Okey dokey, bring it up */
+	if (result < 0) {
+		dev_err(dev, "cannot register i2400m network device: %d\n",
+			result);
+		goto error_register_netdev;
+	}
+	netif_carrier_off(net_dev);
+
+	i2400m->wimax_dev.op_msg_from_user = i2400m_op_msg_from_user;
+	i2400m->wimax_dev.op_rfkill_sw_toggle = i2400m_op_rfkill_sw_toggle;
+	i2400m->wimax_dev.op_reset = i2400m_op_reset;
+
+	result = wimax_dev_add(&i2400m->wimax_dev, net_dev);
+	if (result < 0)
+		goto error_wimax_dev_add;
+
+	/* Now setup all that requires a registered net and wimax device. */
+	result = sysfs_create_group(&net_dev->dev.kobj, &i2400m_dev_attr_group);
+	if (result < 0) {
+		dev_err(dev, "cannot setup i2400m's sysfs: %d\n", result);
+		goto error_sysfs_setup;
+	}
+
+	i2400m_debugfs_add(i2400m);
+
+	result = i2400m_dev_start(i2400m, bm_flags);
+	if (result < 0)
+		goto error_dev_start;
+	d_fnend(3, dev, "(i2400m %p) = %d\n", i2400m, result);
+	return result;
+
+error_dev_start:
+	i2400m_debugfs_rm(i2400m);
+	sysfs_remove_group(&i2400m->wimax_dev.net_dev->dev.kobj,
+			   &i2400m_dev_attr_group);
+error_sysfs_setup:
+	wimax_dev_rm(&i2400m->wimax_dev);
+error_wimax_dev_add:
+	unregister_netdev(net_dev);
+error_register_netdev:
+	unregister_pm_notifier(&i2400m->pm_notifier);
+error_read_mac_addr:
+error_bootrom_init:
+	if (i2400m->bus_release)
+		i2400m->bus_release(i2400m);
+error_bus_setup:
+	i2400m_bm_buf_free(i2400m);
+error_bm_buf_alloc:
+	d_fnend(3, dev, "(i2400m %p) = %d\n", i2400m, result);
+	return result;
+}
+EXPORT_SYMBOL_GPL(i2400m_setup);
+
+
+/**
+ * i2400m_release - release the bus-generic driver resources
+ *
+ * Sends a disconnect message and undoes any setup done by i2400m_setup()
+ */
+void i2400m_release(struct i2400m *i2400m)
+{
+	struct device *dev = i2400m_dev(i2400m);
+
+	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
+	netif_stop_queue(i2400m->wimax_dev.net_dev);
+
+	i2400m_dev_stop(i2400m);
+
+	cancel_work_sync(&i2400m->reset_ws);
+	cancel_work_sync(&i2400m->recovery_ws);
+
+	i2400m_debugfs_rm(i2400m);
+	sysfs_remove_group(&i2400m->wimax_dev.net_dev->dev.kobj,
+			   &i2400m_dev_attr_group);
+	wimax_dev_rm(&i2400m->wimax_dev);
+	unregister_netdev(i2400m->wimax_dev.net_dev);
+	unregister_pm_notifier(&i2400m->pm_notifier);
+	if (i2400m->bus_release)
+		i2400m->bus_release(i2400m);
+	i2400m_bm_buf_free(i2400m);
+	d_fnend(3, dev, "(i2400m %p) = void\n", i2400m);
+}
+EXPORT_SYMBOL_GPL(i2400m_release);
+
+
+/*
+ * Debug levels control; see debug.h
+ */
+struct d_level D_LEVEL[] = {
+	D_SUBMODULE_DEFINE(control),
+	D_SUBMODULE_DEFINE(driver),
+	D_SUBMODULE_DEFINE(debugfs),
+	D_SUBMODULE_DEFINE(fw),
+	D_SUBMODULE_DEFINE(netdev),
+	D_SUBMODULE_DEFINE(rfkill),
+	D_SUBMODULE_DEFINE(rx),
+	D_SUBMODULE_DEFINE(sysfs),
+	D_SUBMODULE_DEFINE(tx),
+};
+size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL);
+
+
+static
+int __init i2400m_driver_init(void)
+{
+	d_parse_params(D_LEVEL, D_LEVEL_SIZE, i2400m_debug_params,
+		       "i2400m.debug");
+	return i2400m_barker_db_init(i2400m_barkers_params);
+}
+module_init(i2400m_driver_init);
+
+static
+void __exit i2400m_driver_exit(void)
+{
+	i2400m_barker_db_exit();
+}
+module_exit(i2400m_driver_exit);
+
+MODULE_AUTHOR("Intel Corporation <linux-wimax@intel.com>");
+MODULE_DESCRIPTION("Intel 2400M WiMAX networking bus-generic driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/staging/wimax/i2400m/fw.c b/drivers/staging/wimax/i2400m/fw.c
new file mode 100644
index 000000000000..6c9a41bff2e0
--- /dev/null
+++ b/drivers/staging/wimax/i2400m/fw.c
@@ -0,0 +1,1653 @@
+/*
+ * Intel Wireless WiMAX Connection 2400m
+ * Firmware uploader
+ *
+ *
+ * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * Intel Corporation <linux-wimax@intel.com>
+ * Yanir Lubetkin <yanirx.lubetkin@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *  - Initial implementation
+ *
+ *
+ * THE PROCEDURE
+ *
+ * The 2400m and derived devices work in two modes: boot-mode or
+ * normal mode. In boot mode we can execute only a handful of commands
+ * targeted at uploading the firmware and launching it.
+ *
+ * The 2400m enters boot mode when it is first connected to the
+ * system, when it crashes and when you ask it to reboot. There are
+ * two submodes of the boot mode: signed and non-signed. Signed takes
+ * firmwares signed with a certain private key, non-signed takes any
+ * firmware. Normal hardware takes only signed firmware.
+ *
+ * On boot mode, in USB, we write to the device using the bulk out
+ * endpoint and read from it in the notification endpoint.
+ *
+ * Upon entrance to boot mode, the device sends (preceded with a few
+ * zero length packets (ZLPs) on the notification endpoint in USB) a
+ * reboot barker (4 le32 words with the same value). We ack it by
+ * sending the same barker to the device. The device acks with a
+ * reboot ack barker (4 le32 words with value I2400M_ACK_BARKER) and
+ * then is fully booted. At this point we can upload the firmware.
+ *
+ * Note that different iterations of the device and EEPROM
+ * configurations will send different [re]boot barkers; these are
+ * collected in i2400m_barker_db along with the firmware
+ * characteristics they require.
+ *
+ * This process is accomplished by the i2400m_bootrom_init()
+ * function. All the device interaction happens through the
+ * i2400m_bm_cmd() [boot mode command]. Special return values will
+ * indicate if the device did reset during the process.
+ *
+ * After this, we read the MAC address and then (if needed)
+ * reinitialize the device. We need to read it ahead of time because
+ * in the future, we might not upload the firmware until userspace
+ * 'ifconfig up's the device.
+ *
+ * We can then upload the firmware file. The file is composed of a BCF
+ * header (basic data, keys and signatures) and a list of write
+ * commands and payloads. Optionally more BCF headers might follow the
+ * main payload. We first upload the header [i2400m_dnload_init()] and
+ * then pass the commands and payloads verbatim to the i2400m_bm_cmd()
+ * function [i2400m_dnload_bcf()]. Then we tell the device to jump to
+ * the new firmware [i2400m_dnload_finalize()].
+ *
+ * Once firmware is uploaded, we are good to go :)
+ *
+ * When we don't know in which mode we are, we first try by sending a
+ * warm reset request that will take us to boot-mode. If we time out
+ * waiting for a reboot barker, that means maybe we are already in
+ * boot mode, so we send a reboot barker.
+ *
+ * COMMAND EXECUTION
+ *
+ * This code (and process) is single threaded; for executing commands,
+ * we post a URB to the notification endpoint, post the command, wait
+ * for data on the notification buffer. We don't need to worry about
+ * others as we know we are the only ones in there.
+ *
+ * BACKEND IMPLEMENTATION
+ *
+ * This code is bus-generic; the bus-specific driver provides back end
+ * implementations to send a boot mode command to the device and to
+ * read an acknolwedgement from it (or an asynchronous notification)
+ * from it.
+ *
+ * FIRMWARE LOADING
+ *
+ * Note that in some cases, we can't just load a firmware file (for
+ * example, when resuming). For that, we might cache the firmware
+ * file. Thus, when doing the bootstrap, if there is a cache firmware
+ * file, it is used; if not, loading from disk is attempted.
+ *
+ * ROADMAP
+ *
+ * i2400m_barker_db_init              Called by i2400m_driver_init()
+ *   i2400m_barker_db_add
+ *
+ * i2400m_barker_db_exit              Called by i2400m_driver_exit()
+ *
+ * i2400m_dev_bootstrap               Called by __i2400m_dev_start()
+ *   request_firmware
+ *   i2400m_fw_bootstrap
+ *     i2400m_fw_check
+ *       i2400m_fw_hdr_check
+ *     i2400m_fw_dnload
+ *   release_firmware
+ *
+ * i2400m_fw_dnload
+ *   i2400m_bootrom_init
+ *     i2400m_bm_cmd
+ *     i2400m_reset
+ *   i2400m_dnload_init
+ *     i2400m_dnload_init_signed
+ *     i2400m_dnload_init_nonsigned
+ *       i2400m_download_chunk
+ *         i2400m_bm_cmd
+ *   i2400m_dnload_bcf
+ *     i2400m_bm_cmd
+ *   i2400m_dnload_finalize
+ *     i2400m_bm_cmd
+ *
+ * i2400m_bm_cmd
+ *   i2400m->bus_bm_cmd_send()
+ *   i2400m->bus_bm_wait_for_ack
+ *   __i2400m_bm_ack_verify
+ *     i2400m_is_boot_barker
+ *
+ * i2400m_bm_cmd_prepare              Used by bus-drivers to prep
+ *                                    commands before sending
+ *
+ * i2400m_pm_notifier                 Called on Power Management events
+ *   i2400m_fw_cache
+ *   i2400m_fw_uncache
+ */
+#include <linux/firmware.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/usb.h>
+#include <linux/export.h>
+#include "i2400m.h"
+
+
+#define D_SUBMODULE fw
+#include "debug-levels.h"
+
+
+static const __le32 i2400m_ACK_BARKER[4] = {
+	cpu_to_le32(I2400M_ACK_BARKER),
+	cpu_to_le32(I2400M_ACK_BARKER),
+	cpu_to_le32(I2400M_ACK_BARKER),
+	cpu_to_le32(I2400M_ACK_BARKER)
+};
+
+
+/**
+ * Prepare a boot-mode command for delivery
+ *
+ * @cmd: pointer to bootrom header to prepare
+ *
+ * Computes checksum if so needed. After calling this function, DO NOT
+ * modify the command or header as the checksum won't work anymore.
+ *
+ * We do it from here because some times we cannot do it in the
+ * original context the command was sent (it is a const), so when we
+ * copy it to our staging buffer, we add the checksum there.
+ */
+void i2400m_bm_cmd_prepare(struct i2400m_bootrom_header *cmd)
+{
+	if (i2400m_brh_get_use_checksum(cmd)) {
+		int i;
+		u32 checksum = 0;
+		const u32 *checksum_ptr = (void *) cmd->payload;
+		for (i = 0; i < cmd->data_size / 4; i++)
+			checksum += cpu_to_le32(*checksum_ptr++);
+		checksum += cmd->command + cmd->target_addr + cmd->data_size;
+		cmd->block_checksum = cpu_to_le32(checksum);
+	}
+}
+EXPORT_SYMBOL_GPL(i2400m_bm_cmd_prepare);
+
+
+/*
+ * Database of known barkers.
+ *
+ * A barker is what the device sends indicating he is ready to be
+ * bootloaded. Different versions of the device will send different
+ * barkers. Depending on the barker, it might mean the device wants
+ * some kind of firmware or the other.
+ */
+static struct i2400m_barker_db {
+	__le32 data[4];
+} *i2400m_barker_db;
+static size_t i2400m_barker_db_used, i2400m_barker_db_size;
+
+
+static
+int i2400m_zrealloc_2x(void **ptr, size_t *_count, size_t el_size,
+		       gfp_t gfp_flags)
+{
+	size_t old_count = *_count,
+		new_count = old_count ? 2 * old_count : 2,
+		old_size = el_size * old_count,
+		new_size = el_size * new_count;
+	void *nptr = krealloc(*ptr, new_size, gfp_flags);
+	if (nptr) {
+		/* zero the other half or the whole thing if old_count
+		 * was zero */
+		if (old_size == 0)
+			memset(nptr, 0, new_size);
+		else
+			memset(nptr + old_size, 0, old_size);
+		*_count = new_count;
+		*ptr = nptr;
+		return 0;
+	} else
+		return -ENOMEM;
+}
+
+
+/*
+ * Add a barker to the database
+ *
+ * This cannot used outside of this module and only at at module_init
+ * time. This is to avoid the need to do locking.
+ */
+static
+int i2400m_barker_db_add(u32 barker_id)
+{
+	int result;
+
+	struct i2400m_barker_db *barker;
+	if (i2400m_barker_db_used >= i2400m_barker_db_size) {
+		result = i2400m_zrealloc_2x(
+			(void **) &i2400m_barker_db, &i2400m_barker_db_size,
+			sizeof(i2400m_barker_db[0]), GFP_KERNEL);
+		if (result < 0)
+			return result;
+	}
+	barker = i2400m_barker_db + i2400m_barker_db_used++;
+	barker->data[0] = le32_to_cpu(barker_id);
+	barker->data[1] = le32_to_cpu(barker_id);
+	barker->data[2] = le32_to_cpu(barker_id);
+	barker->data[3] = le32_to_cpu(barker_id);
+	return 0;
+}
+
+
+void i2400m_barker_db_exit(void)
+{
+	kfree(i2400m_barker_db);
+	i2400m_barker_db = NULL;
+	i2400m_barker_db_size = 0;
+	i2400m_barker_db_used = 0;
+}
+
+
+/*
+ * Helper function to add all the known stable barkers to the barker
+ * database.
+ */
+static
+int i2400m_barker_db_known_barkers(void)
+{
+	int result;
+
+	result = i2400m_barker_db_add(I2400M_NBOOT_BARKER);
+	if (result < 0)
+		goto error_add;
+	result = i2400m_barker_db_add(I2400M_SBOOT_BARKER);
+	if (result < 0)
+		goto error_add;
+	result = i2400m_barker_db_add(I2400M_SBOOT_BARKER_6050);
+	if (result < 0)
+		goto error_add;
+error_add:
+       return result;
+}
+
+
+/*
+ * Initialize the barker database
+ *
+ * This can only be used from the module_init function for this
+ * module; this is to avoid the need to do locking.
+ *
+ * @options: command line argument with extra barkers to
+ *     recognize. This is a comma-separated list of 32-bit hex
+ *     numbers. They are appended to the existing list. Setting 0
+ *     cleans the existing list and starts a new one.
+ */
+int i2400m_barker_db_init(const char *_options)
+{
+	int result;
+	char *options = NULL, *options_orig, *token;
+
+	i2400m_barker_db = NULL;
+	i2400m_barker_db_size = 0;
+	i2400m_barker_db_used = 0;
+
+	result = i2400m_barker_db_known_barkers();
+	if (result < 0)
+		goto error_add;
+	/* parse command line options from i2400m.barkers */
+	if (_options != NULL) {
+		unsigned barker;
+
+		options_orig = kstrdup(_options, GFP_KERNEL);
+		if (options_orig == NULL) {
+			result = -ENOMEM;
+			goto error_parse;
+		}
+		options = options_orig;
+
+		while ((token = strsep(&options, ",")) != NULL) {
+			if (*token == '\0')	/* eat joint commas */
+				continue;
+			if (sscanf(token, "%x", &barker) != 1
+			    || barker > 0xffffffff) {
+				printk(KERN_ERR "%s: can't recognize "
+				       "i2400m.barkers value '%s' as "
+				       "a 32-bit number\n",
+				       __func__, token);
+				result = -EINVAL;
+				goto error_parse;
+			}
+			if (barker == 0) {
+				/* clean list and start new */
+				i2400m_barker_db_exit();
+				continue;
+			}
+			result = i2400m_barker_db_add(barker);
+			if (result < 0)
+				goto error_parse_add;
+		}
+		kfree(options_orig);
+	}
+	return 0;
+
+error_parse_add:
+error_parse:
+	kfree(options_orig);
+error_add:
+	kfree(i2400m_barker_db);
+	return result;
+}
+
+
+/*
+ * Recognize a boot barker
+ *
+ * @buf: buffer where the boot barker.
+ * @buf_size: size of the buffer (has to be 16 bytes). It is passed
+ *     here so the function can check it for the caller.
+ *
+ * Note that as a side effect, upon identifying the obtained boot
+ * barker, this function will set i2400m->barker to point to the right
+ * barker database entry. Subsequent calls to the function will result
+ * in verifying that the same type of boot barker is returned when the
+ * device [re]boots (as long as the same device instance is used).
+ *
+ * Return: 0 if @buf matches a known boot barker. -ENOENT if the
+ *     buffer in @buf doesn't match any boot barker in the database or
+ *     -EILSEQ if the buffer doesn't have the right size.
+ */
+int i2400m_is_boot_barker(struct i2400m *i2400m,
+			  const void *buf, size_t buf_size)
+{
+	int result;
+	struct device *dev = i2400m_dev(i2400m);
+	struct i2400m_barker_db *barker;
+	int i;
+
+	result = -ENOENT;
+	if (buf_size != sizeof(i2400m_barker_db[i].data))
+		return result;
+
+	/* Short circuit if we have already discovered the barker
+	 * associated with the device. */
+	if (i2400m->barker &&
+	    !memcmp(buf, i2400m->barker, sizeof(i2400m->barker->data)))
+		return 0;
+
+	for (i = 0; i < i2400m_barker_db_used; i++) {
+		barker = &i2400m_barker_db[i];
+		BUILD_BUG_ON(sizeof(barker->data) != 16);
+		if (memcmp(buf, barker->data, sizeof(barker->data)))
+			continue;
+
+		if (i2400m->barker == NULL) {
+			i2400m->barker = barker;
+			d_printf(1, dev, "boot barker set to #%u/%08x\n",
+				 i, le32_to_cpu(barker->data[0]));
+			if (barker->data[0] == le32_to_cpu(I2400M_NBOOT_BARKER))
+				i2400m->sboot = 0;
+			else
+				i2400m->sboot = 1;
+		} else if (i2400m->barker != barker) {
+			dev_err(dev, "HW inconsistency: device "
+				"reports a different boot barker "
+				"than set (from %08x to %08x)\n",
+				le32_to_cpu(i2400m->barker->data[0]),
+				le32_to_cpu(barker->data[0]));
+			result = -EIO;
+		} else
+			d_printf(2, dev, "boot barker confirmed #%u/%08x\n",
+				 i, le32_to_cpu(barker->data[0]));
+		result = 0;
+		break;
+	}
+	return result;
+}
+EXPORT_SYMBOL_GPL(i2400m_is_boot_barker);
+
+
+/*
+ * Verify the ack data received
+ *
+ * Given a reply to a boot mode command, chew it and verify everything
+ * is ok.
+ *
+ * @opcode: opcode which generated this ack. For error messages.
+ * @ack: pointer to ack data we received
+ * @ack_size: size of that data buffer
+ * @flags: I2400M_BM_CMD_* flags we called the command with.
+ *
+ * Way too long function -- maybe it should be further split
+ */
+static
+ssize_t __i2400m_bm_ack_verify(struct i2400m *i2400m, int opcode,
+			       struct i2400m_bootrom_header *ack,
+			       size_t ack_size, int flags)
+{
+	ssize_t result = -ENOMEM;
+	struct device *dev = i2400m_dev(i2400m);
+
+	d_fnstart(8, dev, "(i2400m %p opcode %d ack %p size %zu)\n",
+		  i2400m, opcode, ack, ack_size);
+	if (ack_size < sizeof(*ack)) {
+		result = -EIO;
+		dev_err(dev, "boot-mode cmd %d: HW BUG? notification didn't "
+			"return enough data (%zu bytes vs %zu expected)\n",
+			opcode, ack_size, sizeof(*ack));
+		goto error_ack_short;
+	}
+	result = i2400m_is_boot_barker(i2400m, ack, ack_size);
+	if (result >= 0) {
+		result = -ERESTARTSYS;
+		d_printf(6, dev, "boot-mode cmd %d: HW boot barker\n", opcode);
+		goto error_reboot;
+	}
+	if (ack_size == sizeof(i2400m_ACK_BARKER)
+		 && memcmp(ack, i2400m_ACK_BARKER, sizeof(*ack)) == 0) {
+		result = -EISCONN;
+		d_printf(3, dev, "boot-mode cmd %d: HW reboot ack barker\n",
+			 opcode);
+		goto error_reboot_ack;
+	}
+	result = 0;
+	if (flags & I2400M_BM_CMD_RAW)
+		goto out_raw;
+	ack->data_size = le32_to_cpu(ack->data_size);
+	ack->target_addr = le32_to_cpu(ack->target_addr);
+	ack->block_checksum = le32_to_cpu(ack->block_checksum);
+	d_printf(5, dev, "boot-mode cmd %d: notification for opcode %u "
+		 "response %u csum %u rr %u da %u\n",
+		 opcode, i2400m_brh_get_opcode(ack),
+		 i2400m_brh_get_response(ack),
+		 i2400m_brh_get_use_checksum(ack),
+		 i2400m_brh_get_response_required(ack),
+		 i2400m_brh_get_direct_access(ack));
+	result = -EIO;
+	if (i2400m_brh_get_signature(ack) != 0xcbbc) {
+		dev_err(dev, "boot-mode cmd %d: HW BUG? wrong signature "
+			"0x%04x\n", opcode, i2400m_brh_get_signature(ack));
+		goto error_ack_signature;
+	}
+	if (opcode != -1 && opcode != i2400m_brh_get_opcode(ack)) {
+		dev_err(dev, "boot-mode cmd %d: HW BUG? "
+			"received response for opcode %u, expected %u\n",
+			opcode, i2400m_brh_get_opcode(ack), opcode);
+		goto error_ack_opcode;
+	}
+	if (i2400m_brh_get_response(ack) != 0) {	/* failed? */
+		dev_err(dev, "boot-mode cmd %d: error; hw response %u\n",
+			opcode, i2400m_brh_get_response(ack));
+		goto error_ack_failed;
+	}
+	if (ack_size < ack->data_size + sizeof(*ack)) {
+		dev_err(dev, "boot-mode cmd %d: SW BUG "
+			"driver provided only %zu bytes for %zu bytes "
+			"of data\n", opcode, ack_size,
+			(size_t) le32_to_cpu(ack->data_size) + sizeof(*ack));
+		goto error_ack_short_buffer;
+	}
+	result = ack_size;
+	/* Don't you love this stack of empty targets? Well, I don't
+	 * either, but it helps track exactly who comes in here and
+	 * why :) */
+error_ack_short_buffer:
+error_ack_failed:
+error_ack_opcode:
+error_ack_signature:
+out_raw:
+error_reboot_ack:
+error_reboot:
+error_ack_short:
+	d_fnend(8, dev, "(i2400m %p opcode %d ack %p size %zu) = %d\n",
+		i2400m, opcode, ack, ack_size, (int) result);
+	return result;
+}
+
+
+/**
+ * i2400m_bm_cmd - Execute a boot mode command
+ *
+ * @cmd: buffer containing the command data (pointing at the header).
+ *     This data can be ANYWHERE (for USB, we will copy it to an
+ *     specific buffer). Make sure everything is in proper little
+ *     endian.
+ *
+ *     A raw buffer can be also sent, just cast it and set flags to
+ *     I2400M_BM_CMD_RAW.
+ *
+ *     This function will generate a checksum for you if the
+ *     checksum bit in the command is set (unless I2400M_BM_CMD_RAW
+ *     is set).
+ *
+ *     You can use the i2400m->bm_cmd_buf to stage your commands and
+ *     send them.
+ *
+ *     If NULL, no command is sent (we just wait for an ack).
+ *
+ * @cmd_size: size of the command. Will be auto padded to the
+ *     bus-specific drivers padding requirements.
+ *
+ * @ack: buffer where to place the acknowledgement. If it is a regular
+ *     command response, all fields will be returned with the right,
+ *     native endianess.
+ *
+ *     You *cannot* use i2400m->bm_ack_buf for this buffer.
+ *
+ * @ack_size: size of @ack, 16 aligned; you need to provide at least
+ *     sizeof(*ack) bytes and then enough to contain the return data
+ *     from the command
+ *
+ * @flags: see I2400M_BM_CMD_* above.
+ *
+ * @returns: bytes received by the notification; if < 0, an errno code
+ *     denoting an error or:
+ *
+ *     -ERESTARTSYS  The device has rebooted
+ *
+ * Executes a boot-mode command and waits for a response, doing basic
+ * validation on it; if a zero length response is received, it retries
+ * waiting for a response until a non-zero one is received (timing out
+ * after %I2400M_BOOT_RETRIES retries).
+ */
+static
+ssize_t i2400m_bm_cmd(struct i2400m *i2400m,
+		      const struct i2400m_bootrom_header *cmd, size_t cmd_size,
+		      struct i2400m_bootrom_header *ack, size_t ack_size,
+		      int flags)
+{
+	ssize_t result = -ENOMEM, rx_bytes;
+	struct device *dev = i2400m_dev(i2400m);
+	int opcode = cmd == NULL ? -1 : i2400m_brh_get_opcode(cmd);
+
+	d_fnstart(6, dev, "(i2400m %p cmd %p size %zu ack %p size %zu)\n",
+		  i2400m, cmd, cmd_size, ack, ack_size);
+	BUG_ON(ack_size < sizeof(*ack));
+	BUG_ON(i2400m->boot_mode == 0);
+
+	if (cmd != NULL) {		/* send the command */
+		result = i2400m->bus_bm_cmd_send(i2400m, cmd, cmd_size, flags);
+		if (result < 0)
+			goto error_cmd_send;
+		if ((flags & I2400M_BM_CMD_RAW) == 0)
+			d_printf(5, dev,
+				 "boot-mode cmd %d csum %u rr %u da %u: "
+				 "addr 0x%04x size %u block csum 0x%04x\n",
+				 opcode, i2400m_brh_get_use_checksum(cmd),
+				 i2400m_brh_get_response_required(cmd),
+				 i2400m_brh_get_direct_access(cmd),
+				 cmd->target_addr, cmd->data_size,
+				 cmd->block_checksum);
+	}
+	result = i2400m->bus_bm_wait_for_ack(i2400m, ack, ack_size);
+	if (result < 0) {
+		dev_err(dev, "boot-mode cmd %d: error waiting for an ack: %d\n",
+			opcode, (int) result);	/* bah, %zd doesn't work */
+		goto error_wait_for_ack;
+	}
+	rx_bytes = result;
+	/* verify the ack and read more if necessary [result is the
+	 * final amount of bytes we get in the ack]  */
+	result = __i2400m_bm_ack_verify(i2400m, opcode, ack, ack_size, flags);
+	if (result < 0)
+		goto error_bad_ack;
+	/* Don't you love this stack of empty targets? Well, I don't
+	 * either, but it helps track exactly who comes in here and
+	 * why :) */
+	result = rx_bytes;
+error_bad_ack:
+error_wait_for_ack:
+error_cmd_send:
+	d_fnend(6, dev, "(i2400m %p cmd %p size %zu ack %p size %zu) = %d\n",
+		i2400m, cmd, cmd_size, ack, ack_size, (int) result);
+	return result;
+}
+
+
+/**
+ * i2400m_download_chunk - write a single chunk of data to the device's memory
+ *
+ * @i2400m: device descriptor
+ * @buf: the buffer to write
+ * @buf_len: length of the buffer to write
+ * @addr: address in the device memory space
+ * @direct: bootrom write mode
+ * @do_csum: should a checksum validation be performed
+ */
+static int i2400m_download_chunk(struct i2400m *i2400m, const void *chunk,
+				 size_t __chunk_len, unsigned long addr,
+				 unsigned int direct, unsigned int do_csum)
+{
+	int ret;
+	size_t chunk_len = ALIGN(__chunk_len, I2400M_PL_ALIGN);
+	struct device *dev = i2400m_dev(i2400m);
+	struct {
+		struct i2400m_bootrom_header cmd;
+		u8 cmd_payload[];
+	} __packed *buf;
+	struct i2400m_bootrom_header ack;
+
+	d_fnstart(5, dev, "(i2400m %p chunk %p __chunk_len %zu addr 0x%08lx "
+		  "direct %u do_csum %u)\n", i2400m, chunk, __chunk_len,
+		  addr, direct, do_csum);
+	buf = i2400m->bm_cmd_buf;
+	memcpy(buf->cmd_payload, chunk, __chunk_len);
+	memset(buf->cmd_payload + __chunk_len, 0xad, chunk_len - __chunk_len);
+
+	buf->cmd.command = i2400m_brh_command(I2400M_BRH_WRITE,
+					      __chunk_len & 0x3 ? 0 : do_csum,
+					      __chunk_len & 0xf ? 0 : direct);
+	buf->cmd.target_addr = cpu_to_le32(addr);
+	buf->cmd.data_size = cpu_to_le32(__chunk_len);
+	ret = i2400m_bm_cmd(i2400m, &buf->cmd, sizeof(buf->cmd) + chunk_len,
+			    &ack, sizeof(ack), 0);
+	if (ret >= 0)
+		ret = 0;
+	d_fnend(5, dev, "(i2400m %p chunk %p __chunk_len %zu addr 0x%08lx "
+		"direct %u do_csum %u) = %d\n", i2400m, chunk, __chunk_len,
+		addr, direct, do_csum, ret);
+	return ret;
+}
+
+
+/*
+ * Download a BCF file's sections to the device
+ *
+ * @i2400m: device descriptor
+ * @bcf: pointer to firmware data (first header followed by the
+ *     payloads). Assumed verified and consistent.
+ * @bcf_len: length (in bytes) of the @bcf buffer.
+ *
+ * Returns: < 0 errno code on error or the offset to the jump instruction.
+ *
+ * Given a BCF file, downloads each section (a command and a payload)
+ * to the device's address space. Actually, it just executes each
+ * command i the BCF file.
+ *
+ * The section size has to be aligned to 4 bytes AND the padding has
+ * to be taken from the firmware file, as the signature takes it into
+ * account.
+ */
+static
+ssize_t i2400m_dnload_bcf(struct i2400m *i2400m,
+			  const struct i2400m_bcf_hdr *bcf, size_t bcf_len)
+{
+	ssize_t ret;
+	struct device *dev = i2400m_dev(i2400m);
+	size_t offset,		/* iterator offset */
+		data_size,	/* Size of the data payload */
+		section_size,	/* Size of the whole section (cmd + payload) */
+		section = 1;
+	const struct i2400m_bootrom_header *bh;
+	struct i2400m_bootrom_header ack;
+
+	d_fnstart(3, dev, "(i2400m %p bcf %p bcf_len %zu)\n",
+		  i2400m, bcf, bcf_len);
+	/* Iterate over the command blocks in the BCF file that start
+	 * after the header */
+	offset = le32_to_cpu(bcf->header_len) * sizeof(u32);
+	while (1) {	/* start sending the file */
+		bh = (void *) bcf + offset;
+		data_size = le32_to_cpu(bh->data_size);
+		section_size = ALIGN(sizeof(*bh) + data_size, 4);
+		d_printf(7, dev,
+			 "downloading section #%zu (@%zu %zu B) to 0x%08x\n",
+			 section, offset, sizeof(*bh) + data_size,
+			 le32_to_cpu(bh->target_addr));
+		/*
+		 * We look for JUMP cmd from the bootmode header,
+		 * either I2400M_BRH_SIGNED_JUMP for secure boot
+		 * or I2400M_BRH_JUMP for unsecure boot, the last chunk
+		 * should be the bootmode header with JUMP cmd.
+		 */
+		if (i2400m_brh_get_opcode(bh) == I2400M_BRH_SIGNED_JUMP ||
+			i2400m_brh_get_opcode(bh) == I2400M_BRH_JUMP) {
+			d_printf(5, dev,  "jump found @%zu\n", offset);
+			break;
+		}
+		if (offset + section_size > bcf_len) {
+			dev_err(dev, "fw %s: bad section #%zu, "
+				"end (@%zu) beyond EOF (@%zu)\n",
+				i2400m->fw_name, section,
+				offset + section_size,  bcf_len);
+			ret = -EINVAL;
+			goto error_section_beyond_eof;
+		}
+		__i2400m_msleep(20);
+		ret = i2400m_bm_cmd(i2400m, bh, section_size,
+				    &ack, sizeof(ack), I2400M_BM_CMD_RAW);
+		if (ret < 0) {
+			dev_err(dev, "fw %s: section #%zu (@%zu %zu B) "
+				"failed %d\n", i2400m->fw_name, section,
+				offset, sizeof(*bh) + data_size, (int) ret);
+			goto error_send;
+		}
+		offset += section_size;
+		section++;
+	}
+	ret = offset;
+error_section_beyond_eof:
+error_send:
+	d_fnend(3, dev, "(i2400m %p bcf %p bcf_len %zu) = %d\n",
+		i2400m, bcf, bcf_len, (int) ret);
+	return ret;
+}
+
+
+/*
+ * Indicate if the device emitted a reboot barker that indicates
+ * "signed boot"
+ */
+static
+unsigned i2400m_boot_is_signed(struct i2400m *i2400m)
+{
+	return likely(i2400m->sboot);
+}
+
+
+/*
+ * Do the final steps of uploading firmware
+ *
+ * @bcf_hdr: BCF header we are actually using
+ * @bcf: pointer to the firmware image (which matches the first header
+ *     that is followed by the actual payloads).
+ * @offset: [byte] offset into @bcf for the command we need to send.
+ *
+ * Depending on the boot mode (signed vs non-signed), different
+ * actions need to be taken.
+ */
+static
+int i2400m_dnload_finalize(struct i2400m *i2400m,
+			   const struct i2400m_bcf_hdr *bcf_hdr,
+			   const struct i2400m_bcf_hdr *bcf, size_t offset)
+{
+	int ret = 0;
+	struct device *dev = i2400m_dev(i2400m);
+	struct i2400m_bootrom_header *cmd, ack;
+	struct {
+		struct i2400m_bootrom_header cmd;
+		u8 cmd_pl[0];
+	} __packed *cmd_buf;
+	size_t signature_block_offset, signature_block_size;
+
+	d_fnstart(3, dev, "offset %zu\n", offset);
+	cmd = (void *) bcf + offset;
+	if (i2400m_boot_is_signed(i2400m) == 0) {
+		struct i2400m_bootrom_header jump_ack;
+		d_printf(1, dev, "unsecure boot, jumping to 0x%08x\n",
+			le32_to_cpu(cmd->target_addr));
+		cmd_buf = i2400m->bm_cmd_buf;
+		memcpy(&cmd_buf->cmd, cmd, sizeof(*cmd));
+		cmd = &cmd_buf->cmd;
+		/* now cmd points to the actual bootrom_header in cmd_buf */
+		i2400m_brh_set_opcode(cmd, I2400M_BRH_JUMP);
+		cmd->data_size = 0;
+		ret = i2400m_bm_cmd(i2400m, cmd, sizeof(*cmd),
+				    &jump_ack, sizeof(jump_ack), 0);
+	} else {
+		d_printf(1, dev, "secure boot, jumping to 0x%08x\n",
+			 le32_to_cpu(cmd->target_addr));
+		cmd_buf = i2400m->bm_cmd_buf;
+		memcpy(&cmd_buf->cmd, cmd, sizeof(*cmd));
+		signature_block_offset =
+			sizeof(*bcf_hdr)
+			+ le32_to_cpu(bcf_hdr->key_size) * sizeof(u32)
+			+ le32_to_cpu(bcf_hdr->exponent_size) * sizeof(u32);
+		signature_block_size =
+			le32_to_cpu(bcf_hdr->modulus_size) * sizeof(u32);
+		memcpy(cmd_buf->cmd_pl,
+		       (void *) bcf_hdr + signature_block_offset,
+		       signature_block_size);
+		ret = i2400m_bm_cmd(i2400m, &cmd_buf->cmd,
+				    sizeof(cmd_buf->cmd) + signature_block_size,
+				    &ack, sizeof(ack), I2400M_BM_CMD_RAW);
+	}
+	d_fnend(3, dev, "returning %d\n", ret);
+	return ret;
+}
+
+
+/**
+ * i2400m_bootrom_init - Reboots a powered device into boot mode
+ *
+ * @i2400m: device descriptor
+ * @flags:
+ *      I2400M_BRI_SOFT: a reboot barker has been seen
+ *          already, so don't wait for it.
+ *
+ *      I2400M_BRI_NO_REBOOT: Don't send a reboot command, but wait
+ *          for a reboot barker notification. This is a one shot; if
+ *          the state machine needs to send a reboot command it will.
+ *
+ * Returns:
+ *
+ *     < 0 errno code on error, 0 if ok.
+ *
+ * Description:
+ *
+ * Tries hard enough to put the device in boot-mode. There are two
+ * main phases to this:
+ *
+ * a. (1) send a reboot command and (2) get a reboot barker
+ *
+ * b. (1) echo/ack the reboot sending the reboot barker back and (2)
+ *        getting an ack barker in return
+ *
+ * We want to skip (a) in some cases [soft]. The state machine is
+ * horrible, but it is basically: on each phase, send what has to be
+ * sent (if any), wait for the answer and act on the answer. We might
+ * have to backtrack and retry, so we keep a max tries counter for
+ * that.
+ *
+ * It sucks because we don't know ahead of time which is going to be
+ * the reboot barker (the device might send different ones depending
+ * on its EEPROM config) and once the device reboots and waits for the
+ * echo/ack reboot barker being sent back, it doesn't understand
+ * anything else. So we can be left at the point where we don't know
+ * what to send to it -- cold reset and bus reset seem to have little
+ * effect. So the function iterates (in this case) through all the
+ * known barkers and tries them all until an ACK is
+ * received. Otherwise, it gives up.
+ *
+ * If we get a timeout after sending a warm reset, we do it again.
+ */
+int i2400m_bootrom_init(struct i2400m *i2400m, enum i2400m_bri flags)
+{
+	int result;
+	struct device *dev = i2400m_dev(i2400m);
+	struct i2400m_bootrom_header *cmd;
+	struct i2400m_bootrom_header ack;
+	int count = i2400m->bus_bm_retries;
+	int ack_timeout_cnt = 1;
+	unsigned i;
+
+	BUILD_BUG_ON(sizeof(*cmd) != sizeof(i2400m_barker_db[0].data));
+	BUILD_BUG_ON(sizeof(ack) != sizeof(i2400m_ACK_BARKER));
+
+	d_fnstart(4, dev, "(i2400m %p flags 0x%08x)\n", i2400m, flags);
+	result = -ENOMEM;
+	cmd = i2400m->bm_cmd_buf;
+	if (flags & I2400M_BRI_SOFT)
+		goto do_reboot_ack;
+do_reboot:
+	ack_timeout_cnt = 1;
+	if (--count < 0)
+		goto error_timeout;
+	d_printf(4, dev, "device reboot: reboot command [%d # left]\n",
+		 count);
+	if ((flags & I2400M_BRI_NO_REBOOT) == 0)
+		i2400m_reset(i2400m, I2400M_RT_WARM);
+	result = i2400m_bm_cmd(i2400m, NULL, 0, &ack, sizeof(ack),
+			       I2400M_BM_CMD_RAW);
+	flags &= ~I2400M_BRI_NO_REBOOT;
+	switch (result) {
+	case -ERESTARTSYS:
+		/*
+		 * at this point, i2400m_bm_cmd(), through
+		 * __i2400m_bm_ack_process(), has updated
+		 * i2400m->barker and we are good to go.
+		 */
+		d_printf(4, dev, "device reboot: got reboot barker\n");
+		break;
+	case -EISCONN:	/* we don't know how it got here...but we follow it */
+		d_printf(4, dev, "device reboot: got ack barker - whatever\n");
+		goto do_reboot;
+	case -ETIMEDOUT:
+		/*
+		 * Device has timed out, we might be in boot mode
+		 * already and expecting an ack; if we don't know what
+		 * the barker is, we just send them all. Cold reset
+		 * and bus reset don't work. Beats me.
+		 */
+		if (i2400m->barker != NULL) {
+			dev_err(dev, "device boot: reboot barker timed out, "
+				"trying (set) %08x echo/ack\n",
+				le32_to_cpu(i2400m->barker->data[0]));
+			goto do_reboot_ack;
+		}
+		for (i = 0; i < i2400m_barker_db_used; i++) {
+			struct i2400m_barker_db *barker = &i2400m_barker_db[i];
+			memcpy(cmd, barker->data, sizeof(barker->data));
+			result = i2400m_bm_cmd(i2400m, cmd, sizeof(*cmd),
+					       &ack, sizeof(ack),
+					       I2400M_BM_CMD_RAW);
+			if (result == -EISCONN) {
+				dev_warn(dev, "device boot: got ack barker "
+					 "after sending echo/ack barker "
+					 "#%d/%08x; rebooting j.i.c.\n",
+					 i, le32_to_cpu(barker->data[0]));
+				flags &= ~I2400M_BRI_NO_REBOOT;
+				goto do_reboot;
+			}
+		}
+		dev_err(dev, "device boot: tried all the echo/acks, could "
+			"not get device to respond; giving up");
+		result = -ESHUTDOWN;
+	case -EPROTO:
+	case -ESHUTDOWN:	/* dev is gone */
+	case -EINTR:		/* user cancelled */
+		goto error_dev_gone;
+	default:
+		dev_err(dev, "device reboot: error %d while waiting "
+			"for reboot barker - rebooting\n", result);
+		d_dump(1, dev, &ack, result);
+		goto do_reboot;
+	}
+	/* At this point we ack back with 4 REBOOT barkers and expect
+	 * 4 ACK barkers. This is ugly, as we send a raw command --
+	 * hence the cast. _bm_cmd() will catch the reboot ack
+	 * notification and report it as -EISCONN. */
+do_reboot_ack:
+	d_printf(4, dev, "device reboot ack: sending ack [%d # left]\n", count);
+	memcpy(cmd, i2400m->barker->data, sizeof(i2400m->barker->data));
+	result = i2400m_bm_cmd(i2400m, cmd, sizeof(*cmd),
+			       &ack, sizeof(ack), I2400M_BM_CMD_RAW);
+	switch (result) {
+	case -ERESTARTSYS:
+		d_printf(4, dev, "reboot ack: got reboot barker - retrying\n");
+		if (--count < 0)
+			goto error_timeout;
+		goto do_reboot_ack;
+	case -EISCONN:
+		d_printf(4, dev, "reboot ack: got ack barker - good\n");
+		break;
+	case -ETIMEDOUT:	/* no response, maybe it is the other type? */
+		if (ack_timeout_cnt-- < 0) {
+			d_printf(4, dev, "reboot ack timedout: retrying\n");
+			goto do_reboot_ack;
+		} else {
+			dev_err(dev, "reboot ack timedout too long: "
+				"trying reboot\n");
+			goto do_reboot;
+		}
+		break;
+	case -EPROTO:
+	case -ESHUTDOWN:	/* dev is gone */
+		goto error_dev_gone;
+	default:
+		dev_err(dev, "device reboot ack: error %d while waiting for "
+			"reboot ack barker - rebooting\n", result);
+		goto do_reboot;
+	}
+	d_printf(2, dev, "device reboot ack: got ack barker - boot done\n");
+	result = 0;
+exit_timeout:
+error_dev_gone:
+	d_fnend(4, dev, "(i2400m %p flags 0x%08x) = %d\n",
+		i2400m, flags, result);
+	return result;
+
+error_timeout:
+	dev_err(dev, "Timed out waiting for reboot ack\n");
+	result = -ETIMEDOUT;
+	goto exit_timeout;
+}
+
+
+/*
+ * Read the MAC addr
+ *
+ * The position this function reads is fixed in device memory and
+ * always available, even without firmware.
+ *
+ * Note we specify we want to read only six bytes, but provide space
+ * for 16, as we always get it rounded up.
+ */
+int i2400m_read_mac_addr(struct i2400m *i2400m)
+{
+	int result;
+	struct device *dev = i2400m_dev(i2400m);
+	struct net_device *net_dev = i2400m->wimax_dev.net_dev;
+	struct i2400m_bootrom_header *cmd;
+	struct {
+		struct i2400m_bootrom_header ack;
+		u8 ack_pl[16];
+	} __packed ack_buf;
+
+	d_fnstart(5, dev, "(i2400m %p)\n", i2400m);
+	cmd = i2400m->bm_cmd_buf;
+	cmd->command = i2400m_brh_command(I2400M_BRH_READ, 0, 1);
+	cmd->target_addr = cpu_to_le32(0x00203fe8);
+	cmd->data_size = cpu_to_le32(6);
+	result = i2400m_bm_cmd(i2400m, cmd, sizeof(*cmd),
+			       &ack_buf.ack, sizeof(ack_buf), 0);
+	if (result < 0) {
+		dev_err(dev, "BM: read mac addr failed: %d\n", result);
+		goto error_read_mac;
+	}
+	d_printf(2, dev, "mac addr is %pM\n", ack_buf.ack_pl);
+	if (i2400m->bus_bm_mac_addr_impaired == 1) {
+		ack_buf.ack_pl[0] = 0x00;
+		ack_buf.ack_pl[1] = 0x16;
+		ack_buf.ack_pl[2] = 0xd3;
+		get_random_bytes(&ack_buf.ack_pl[3], 3);
+		dev_err(dev, "BM is MAC addr impaired, faking MAC addr to "
+			"mac addr is %pM\n", ack_buf.ack_pl);
+		result = 0;
+	}
+	net_dev->addr_len = ETH_ALEN;
+	memcpy(net_dev->dev_addr, ack_buf.ack_pl, ETH_ALEN);
+error_read_mac:
+	d_fnend(5, dev, "(i2400m %p) = %d\n", i2400m, result);
+	return result;
+}
+
+
+/*
+ * Initialize a non signed boot
+ *
+ * This implies sending some magic values to the device's memory. Note
+ * we convert the values to little endian in the same array
+ * declaration.
+ */
+static
+int i2400m_dnload_init_nonsigned(struct i2400m *i2400m)
+{
+	unsigned i = 0;
+	int ret = 0;
+	struct device *dev = i2400m_dev(i2400m);
+	d_fnstart(5, dev, "(i2400m %p)\n", i2400m);
+	if (i2400m->bus_bm_pokes_table) {
+		while (i2400m->bus_bm_pokes_table[i].address) {
+			ret = i2400m_download_chunk(
+				i2400m,
+				&i2400m->bus_bm_pokes_table[i].data,
+				sizeof(i2400m->bus_bm_pokes_table[i].data),
+				i2400m->bus_bm_pokes_table[i].address, 1, 1);
+			if (ret < 0)
+				break;
+			i++;
+		}
+	}
+	d_fnend(5, dev, "(i2400m %p) = %d\n", i2400m, ret);
+	return ret;
+}
+
+
+/*
+ * Initialize the signed boot process
+ *
+ * @i2400m: device descriptor
+ *
+ * @bcf_hdr: pointer to the firmware header; assumes it is fully in
+ *     memory (it has gone through basic validation).
+ *
+ * Returns: 0 if ok, < 0 errno code on error, -ERESTARTSYS if the hw
+ *     rebooted.
+ *
+ * This writes the firmware BCF header to the device using the
+ * HASH_PAYLOAD_ONLY command.
+ */
+static
+int i2400m_dnload_init_signed(struct i2400m *i2400m,
+			      const struct i2400m_bcf_hdr *bcf_hdr)
+{
+	int ret;
+	struct device *dev = i2400m_dev(i2400m);
+	struct {
+		struct i2400m_bootrom_header cmd;
+		struct i2400m_bcf_hdr cmd_pl;
+	} __packed *cmd_buf;
+	struct i2400m_bootrom_header ack;
+
+	d_fnstart(5, dev, "(i2400m %p bcf_hdr %p)\n", i2400m, bcf_hdr);
+	cmd_buf = i2400m->bm_cmd_buf;
+	cmd_buf->cmd.command =
+		i2400m_brh_command(I2400M_BRH_HASH_PAYLOAD_ONLY, 0, 0);
+	cmd_buf->cmd.target_addr = 0;
+	cmd_buf->cmd.data_size = cpu_to_le32(sizeof(cmd_buf->cmd_pl));
+	memcpy(&cmd_buf->cmd_pl, bcf_hdr, sizeof(*bcf_hdr));
+	ret = i2400m_bm_cmd(i2400m, &cmd_buf->cmd, sizeof(*cmd_buf),
+			    &ack, sizeof(ack), 0);
+	if (ret >= 0)
+		ret = 0;
+	d_fnend(5, dev, "(i2400m %p bcf_hdr %p) = %d\n", i2400m, bcf_hdr, ret);
+	return ret;
+}
+
+
+/*
+ * Initialize the firmware download at the device size
+ *
+ * Multiplex to the one that matters based on the device's mode
+ * (signed or non-signed).
+ */
+static
+int i2400m_dnload_init(struct i2400m *i2400m,
+		       const struct i2400m_bcf_hdr *bcf_hdr)
+{
+	int result;
+	struct device *dev = i2400m_dev(i2400m);
+
+	if (i2400m_boot_is_signed(i2400m)) {
+		d_printf(1, dev, "signed boot\n");
+		result = i2400m_dnload_init_signed(i2400m, bcf_hdr);
+		if (result == -ERESTARTSYS)
+			return result;
+		if (result < 0)
+			dev_err(dev, "firmware %s: signed boot download "
+				"initialization failed: %d\n",
+				i2400m->fw_name, result);
+	} else {
+		/* non-signed boot process without pokes */
+		d_printf(1, dev, "non-signed boot\n");
+		result = i2400m_dnload_init_nonsigned(i2400m);
+		if (result == -ERESTARTSYS)
+			return result;
+		if (result < 0)
+			dev_err(dev, "firmware %s: non-signed download "
+				"initialization failed: %d\n",
+				i2400m->fw_name, result);
+	}
+	return result;
+}
+
+
+/*
+ * Run consistency tests on the firmware file and load up headers
+ *
+ * Check for the firmware being made for the i2400m device,
+ * etc...These checks are mostly informative, as the device will make
+ * them too; but the driver's response is more informative on what
+ * went wrong.
+ *
+ * This will also look at all the headers present on the firmware
+ * file, and update i2400m->fw_bcf_hdr to point to them.
+ */
+static
+int i2400m_fw_hdr_check(struct i2400m *i2400m,
+			const struct i2400m_bcf_hdr *bcf_hdr,
+			size_t index, size_t offset)
+{
+	struct device *dev = i2400m_dev(i2400m);
+
+	unsigned module_type, header_len, major_version, minor_version,
+		module_id, module_vendor, date, size;
+
+	module_type = le32_to_cpu(bcf_hdr->module_type);
+	header_len = sizeof(u32) * le32_to_cpu(bcf_hdr->header_len);
+	major_version = (le32_to_cpu(bcf_hdr->header_version) & 0xffff0000)
+		>> 16;
+	minor_version = le32_to_cpu(bcf_hdr->header_version) & 0x0000ffff;
+	module_id = le32_to_cpu(bcf_hdr->module_id);
+	module_vendor = le32_to_cpu(bcf_hdr->module_vendor);
+	date = le32_to_cpu(bcf_hdr->date);
+	size = sizeof(u32) * le32_to_cpu(bcf_hdr->size);
+
+	d_printf(1, dev, "firmware %s #%zd@%08zx: BCF header "
+		 "type:vendor:id 0x%x:%x:%x v%u.%u (%u/%u B) built %08x\n",
+		 i2400m->fw_name, index, offset,
+		 module_type, module_vendor, module_id,
+		 major_version, minor_version, header_len, size, date);
+
+	/* Hard errors */
+	if (major_version != 1) {
+		dev_err(dev, "firmware %s #%zd@%08zx: major header version "
+			"v%u.%u not supported\n",
+			i2400m->fw_name, index, offset,
+			major_version, minor_version);
+		return -EBADF;
+	}
+
+	if (module_type != 6) {		/* built for the right hardware? */
+		dev_err(dev, "firmware %s #%zd@%08zx: unexpected module "
+			"type 0x%x; aborting\n",
+			i2400m->fw_name, index, offset,
+			module_type);
+		return -EBADF;
+	}
+
+	if (module_vendor != 0x8086) {
+		dev_err(dev, "firmware %s #%zd@%08zx: unexpected module "
+			"vendor 0x%x; aborting\n",
+			i2400m->fw_name, index, offset, module_vendor);
+		return -EBADF;
+	}
+
+	if (date < 0x20080300)
+		dev_warn(dev, "firmware %s #%zd@%08zx: build date %08x "
+			 "too old; unsupported\n",
+			 i2400m->fw_name, index, offset, date);
+	return 0;
+}
+
+
+/*
+ * Run consistency tests on the firmware file and load up headers
+ *
+ * Check for the firmware being made for the i2400m device,
+ * etc...These checks are mostly informative, as the device will make
+ * them too; but the driver's response is more informative on what
+ * went wrong.
+ *
+ * This will also look at all the headers present on the firmware
+ * file, and update i2400m->fw_hdrs to point to them.
+ */
+static
+int i2400m_fw_check(struct i2400m *i2400m, const void *bcf, size_t bcf_size)
+{
+	int result;
+	struct device *dev = i2400m_dev(i2400m);
+	size_t headers = 0;
+	const struct i2400m_bcf_hdr *bcf_hdr;
+	const void *itr, *next, *top;
+	size_t slots = 0, used_slots = 0;
+
+	for (itr = bcf, top = itr + bcf_size;
+	     itr < top;
+	     headers++, itr = next) {
+		size_t leftover, offset, header_len, size;
+
+		leftover = top - itr;
+		offset = itr - bcf;
+		if (leftover <= sizeof(*bcf_hdr)) {
+			dev_err(dev, "firmware %s: %zu B left at @%zx, "
+				"not enough for BCF header\n",
+				i2400m->fw_name, leftover, offset);
+			break;
+		}
+		bcf_hdr = itr;
+		/* Only the first header is supposed to be followed by
+		 * payload */
+		header_len = sizeof(u32) * le32_to_cpu(bcf_hdr->header_len);
+		size = sizeof(u32) * le32_to_cpu(bcf_hdr->size);
+		if (headers == 0)
+			next = itr + size;
+		else
+			next = itr + header_len;
+
+		result = i2400m_fw_hdr_check(i2400m, bcf_hdr, headers, offset);
+		if (result < 0)
+			continue;
+		if (used_slots + 1 >= slots) {
+			/* +1 -> we need to account for the one we'll
+			 * occupy and at least an extra one for
+			 * always being NULL */
+			result = i2400m_zrealloc_2x(
+				(void **) &i2400m->fw_hdrs, &slots,
+				sizeof(i2400m->fw_hdrs[0]),
+				GFP_KERNEL);
+			if (result < 0)
+				goto error_zrealloc;
+		}
+		i2400m->fw_hdrs[used_slots] = bcf_hdr;
+		used_slots++;
+	}
+	if (headers == 0) {
+		dev_err(dev, "firmware %s: no usable headers found\n",
+			i2400m->fw_name);
+		result = -EBADF;
+	} else
+		result = 0;
+error_zrealloc:
+	return result;
+}
+
+
+/*
+ * Match a barker to a BCF header module ID
+ *
+ * The device sends a barker which tells the firmware loader which
+ * header in the BCF file has to be used. This does the matching.
+ */
+static
+unsigned i2400m_bcf_hdr_match(struct i2400m *i2400m,
+			      const struct i2400m_bcf_hdr *bcf_hdr)
+{
+	u32 barker = le32_to_cpu(i2400m->barker->data[0])
+		& 0x7fffffff;
+	u32 module_id = le32_to_cpu(bcf_hdr->module_id)
+		& 0x7fffffff;	/* high bit used for something else */
+
+	/* special case for 5x50 */
+	if (barker == I2400M_SBOOT_BARKER && module_id == 0)
+		return 1;
+	if (module_id == barker)
+		return 1;
+	return 0;
+}
+
+static
+const struct i2400m_bcf_hdr *i2400m_bcf_hdr_find(struct i2400m *i2400m)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	const struct i2400m_bcf_hdr **bcf_itr, *bcf_hdr;
+	unsigned i = 0;
+	u32 barker = le32_to_cpu(i2400m->barker->data[0]);
+
+	d_printf(2, dev, "finding BCF header for barker %08x\n", barker);
+	if (barker == I2400M_NBOOT_BARKER) {
+		bcf_hdr = i2400m->fw_hdrs[0];
+		d_printf(1, dev, "using BCF header #%u/%08x for non-signed "
+			 "barker\n", 0, le32_to_cpu(bcf_hdr->module_id));
+		return bcf_hdr;
+	}
+	for (bcf_itr = i2400m->fw_hdrs; *bcf_itr != NULL; bcf_itr++, i++) {
+		bcf_hdr = *bcf_itr;
+		if (i2400m_bcf_hdr_match(i2400m, bcf_hdr)) {
+			d_printf(1, dev, "hit on BCF hdr #%u/%08x\n",
+				 i, le32_to_cpu(bcf_hdr->module_id));
+			return bcf_hdr;
+		} else
+			d_printf(1, dev, "miss on BCF hdr #%u/%08x\n",
+				 i, le32_to_cpu(bcf_hdr->module_id));
+	}
+	dev_err(dev, "cannot find a matching BCF header for barker %08x\n",
+		barker);
+	return NULL;
+}
+
+
+/*
+ * Download the firmware to the device
+ *
+ * @i2400m: device descriptor
+ * @bcf: pointer to loaded (and minimally verified for consistency)
+ *    firmware
+ * @bcf_size: size of the @bcf buffer (header plus payloads)
+ *
+ * The process for doing this is described in this file's header.
+ *
+ * Note we only reinitialize boot-mode if the flags say so. Some hw
+ * iterations need it, some don't. In any case, if we loop, we always
+ * need to reinitialize the boot room, hence the flags modification.
+ */
+static
+int i2400m_fw_dnload(struct i2400m *i2400m, const struct i2400m_bcf_hdr *bcf,
+		     size_t fw_size, enum i2400m_bri flags)
+{
+	int ret = 0;
+	struct device *dev = i2400m_dev(i2400m);
+	int count = i2400m->bus_bm_retries;
+	const struct i2400m_bcf_hdr *bcf_hdr;
+	size_t bcf_size;
+
+	d_fnstart(5, dev, "(i2400m %p bcf %p fw size %zu)\n",
+		  i2400m, bcf, fw_size);
+	i2400m->boot_mode = 1;
+	wmb();		/* Make sure other readers see it */
+hw_reboot:
+	if (count-- == 0) {
+		ret = -ERESTARTSYS;
+		dev_err(dev, "device rebooted too many times, aborting\n");
+		goto error_too_many_reboots;
+	}
+	if (flags & I2400M_BRI_MAC_REINIT) {
+		ret = i2400m_bootrom_init(i2400m, flags);
+		if (ret < 0) {
+			dev_err(dev, "bootrom init failed: %d\n", ret);
+			goto error_bootrom_init;
+		}
+	}
+	flags |= I2400M_BRI_MAC_REINIT;
+
+	/*
+	 * Initialize the download, push the bytes to the device and
+	 * then jump to the new firmware. Note @ret is passed with the
+	 * offset of the jump instruction to _dnload_finalize()
+	 *
+	 * Note we need to use the BCF header in the firmware image
+	 * that matches the barker that the device sent when it
+	 * rebooted, so it has to be passed along.
+	 */
+	ret = -EBADF;
+	bcf_hdr = i2400m_bcf_hdr_find(i2400m);
+	if (bcf_hdr == NULL)
+		goto error_bcf_hdr_find;
+
+	ret = i2400m_dnload_init(i2400m, bcf_hdr);
+	if (ret == -ERESTARTSYS)
+		goto error_dev_rebooted;
+	if (ret < 0)
+		goto error_dnload_init;
+
+	/*
+	 * bcf_size refers to one header size plus the fw sections size
+	 * indicated by the header,ie. if there are other extended headers
+	 * at the tail, they are not counted
+	 */
+	bcf_size = sizeof(u32) * le32_to_cpu(bcf_hdr->size);
+	ret = i2400m_dnload_bcf(i2400m, bcf, bcf_size);
+	if (ret == -ERESTARTSYS)
+		goto error_dev_rebooted;
+	if (ret < 0) {
+		dev_err(dev, "fw %s: download failed: %d\n",
+			i2400m->fw_name, ret);
+		goto error_dnload_bcf;
+	}
+
+	ret = i2400m_dnload_finalize(i2400m, bcf_hdr, bcf, ret);
+	if (ret == -ERESTARTSYS)
+		goto error_dev_rebooted;
+	if (ret < 0) {
+		dev_err(dev, "fw %s: "
+			"download finalization failed: %d\n",
+			i2400m->fw_name, ret);
+		goto error_dnload_finalize;
+	}
+
+	d_printf(2, dev, "fw %s successfully uploaded\n",
+		 i2400m->fw_name);
+	i2400m->boot_mode = 0;
+	wmb();		/* Make sure i2400m_msg_to_dev() sees boot_mode */
+error_dnload_finalize:
+error_dnload_bcf:
+error_dnload_init:
+error_bcf_hdr_find:
+error_bootrom_init:
+error_too_many_reboots:
+	d_fnend(5, dev, "(i2400m %p bcf %p size %zu) = %d\n",
+		i2400m, bcf, fw_size, ret);
+	return ret;
+
+error_dev_rebooted:
+	dev_err(dev, "device rebooted, %d tries left\n", count);
+	/* we got the notification already, no need to wait for it again */
+	flags |= I2400M_BRI_SOFT;
+	goto hw_reboot;
+}
+
+static
+int i2400m_fw_bootstrap(struct i2400m *i2400m, const struct firmware *fw,
+			enum i2400m_bri flags)
+{
+	int ret;
+	struct device *dev = i2400m_dev(i2400m);
+	const struct i2400m_bcf_hdr *bcf;	/* Firmware data */
+
+	d_fnstart(5, dev, "(i2400m %p)\n", i2400m);
+	bcf = (void *) fw->data;
+	ret = i2400m_fw_check(i2400m, bcf, fw->size);
+	if (ret >= 0)
+		ret = i2400m_fw_dnload(i2400m, bcf, fw->size, flags);
+	if (ret < 0)
+		dev_err(dev, "%s: cannot use: %d, skipping\n",
+			i2400m->fw_name, ret);
+	kfree(i2400m->fw_hdrs);
+	i2400m->fw_hdrs = NULL;
+	d_fnend(5, dev, "(i2400m %p) = %d\n", i2400m, ret);
+	return ret;
+}
+
+
+/* Refcounted container for firmware data */
+struct i2400m_fw {
+	struct kref kref;
+	const struct firmware *fw;
+};
+
+
+static
+void i2400m_fw_destroy(struct kref *kref)
+{
+	struct i2400m_fw *i2400m_fw =
+		container_of(kref, struct i2400m_fw, kref);
+	release_firmware(i2400m_fw->fw);
+	kfree(i2400m_fw);
+}
+
+
+static
+struct i2400m_fw *i2400m_fw_get(struct i2400m_fw *i2400m_fw)
+{
+	if (i2400m_fw != NULL && i2400m_fw != (void *) ~0)
+		kref_get(&i2400m_fw->kref);
+	return i2400m_fw;
+}
+
+
+static
+void i2400m_fw_put(struct i2400m_fw *i2400m_fw)
+{
+	kref_put(&i2400m_fw->kref, i2400m_fw_destroy);
+}
+
+
+/**
+ * i2400m_dev_bootstrap - Bring the device to a known state and upload firmware
+ *
+ * @i2400m: device descriptor
+ *
+ * Returns: >= 0 if ok, < 0 errno code on error.
+ *
+ * This sets up the firmware upload environment, loads the firmware
+ * file from disk, verifies and then calls the firmware upload process
+ * per se.
+ *
+ * Can be called either from probe, or after a warm reset.  Can not be
+ * called from within an interrupt.  All the flow in this code is
+ * single-threade; all I/Os are synchronous.
+ */
+int i2400m_dev_bootstrap(struct i2400m *i2400m, enum i2400m_bri flags)
+{
+	int ret, itr;
+	struct device *dev = i2400m_dev(i2400m);
+	struct i2400m_fw *i2400m_fw;
+	const struct firmware *fw;
+	const char *fw_name;
+
+	d_fnstart(5, dev, "(i2400m %p)\n", i2400m);
+
+	ret = -ENODEV;
+	spin_lock(&i2400m->rx_lock);
+	i2400m_fw = i2400m_fw_get(i2400m->fw_cached);
+	spin_unlock(&i2400m->rx_lock);
+	if (i2400m_fw == (void *) ~0) {
+		dev_err(dev, "can't load firmware now!");
+		goto out;
+	} else if (i2400m_fw != NULL) {
+		dev_info(dev, "firmware %s: loading from cache\n",
+			 i2400m->fw_name);
+		ret = i2400m_fw_bootstrap(i2400m, i2400m_fw->fw, flags);
+		i2400m_fw_put(i2400m_fw);
+		goto out;
+	}
+
+	/* Load firmware files to memory. */
+	for (itr = 0, ret = -ENOENT; ; itr++) {
+		fw_name = i2400m->bus_fw_names[itr];
+		if (fw_name == NULL) {
+			dev_err(dev, "Could not find a usable firmware image\n");
+			break;
+		}
+		d_printf(1, dev, "trying firmware %s (%d)\n", fw_name, itr);
+		ret = request_firmware(&fw, fw_name, dev);
+		if (ret < 0) {
+			dev_err(dev, "fw %s: cannot load file: %d\n",
+				fw_name, ret);
+			continue;
+		}
+		i2400m->fw_name = fw_name;
+		ret = i2400m_fw_bootstrap(i2400m, fw, flags);
+		release_firmware(fw);
+		if (ret >= 0)	/* firmware loaded successfully */
+			break;
+		i2400m->fw_name = NULL;
+	}
+out:
+	d_fnend(5, dev, "(i2400m %p) = %d\n", i2400m, ret);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(i2400m_dev_bootstrap);
+
+
+void i2400m_fw_cache(struct i2400m *i2400m)
+{
+	int result;
+	struct i2400m_fw *i2400m_fw;
+	struct device *dev = i2400m_dev(i2400m);
+
+	/* if there is anything there, free it -- now, this'd be weird */
+	spin_lock(&i2400m->rx_lock);
+	i2400m_fw = i2400m->fw_cached;
+	spin_unlock(&i2400m->rx_lock);
+	if (i2400m_fw != NULL && i2400m_fw != (void *) ~0) {
+		i2400m_fw_put(i2400m_fw);
+		WARN(1, "%s:%u: still cached fw still present?\n",
+		     __func__, __LINE__);
+	}
+
+	if (i2400m->fw_name == NULL) {
+		dev_err(dev, "firmware n/a: can't cache\n");
+		i2400m_fw = (void *) ~0;
+		goto out;
+	}
+
+	i2400m_fw = kzalloc(sizeof(*i2400m_fw), GFP_ATOMIC);
+	if (i2400m_fw == NULL)
+		goto out;
+	kref_init(&i2400m_fw->kref);
+	result = request_firmware(&i2400m_fw->fw, i2400m->fw_name, dev);
+	if (result < 0) {
+		dev_err(dev, "firmware %s: failed to cache: %d\n",
+			i2400m->fw_name, result);
+		kfree(i2400m_fw);
+		i2400m_fw = (void *) ~0;
+	} else
+		dev_info(dev, "firmware %s: cached\n", i2400m->fw_name);
+out:
+	spin_lock(&i2400m->rx_lock);
+	i2400m->fw_cached = i2400m_fw;
+	spin_unlock(&i2400m->rx_lock);
+}
+
+
+void i2400m_fw_uncache(struct i2400m *i2400m)
+{
+	struct i2400m_fw *i2400m_fw;
+
+	spin_lock(&i2400m->rx_lock);
+	i2400m_fw = i2400m->fw_cached;
+	i2400m->fw_cached = NULL;
+	spin_unlock(&i2400m->rx_lock);
+
+	if (i2400m_fw != NULL && i2400m_fw != (void *) ~0)
+		i2400m_fw_put(i2400m_fw);
+}
+
diff --git a/drivers/staging/wimax/i2400m/i2400m-usb.h b/drivers/staging/wimax/i2400m/i2400m-usb.h
new file mode 100644
index 000000000000..eff4f464a23e
--- /dev/null
+++ b/drivers/staging/wimax/i2400m/i2400m-usb.h
@@ -0,0 +1,275 @@
+/*
+ * Intel Wireless WiMAX Connection 2400m
+ * USB-specific i2400m driver definitions
+ *
+ *
+ * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ * Yanir Lubetkin <yanirx.lubetkin@intel.com>
+ *  - Initial implementation
+ *
+ *
+ * This driver implements the bus-specific part of the i2400m for
+ * USB. Check i2400m.h for a generic driver description.
+ *
+ * ARCHITECTURE
+ *
+ * This driver listens to notifications sent from the notification
+ * endpoint (in usb-notif.c); when data is ready to read, the code in
+ * there schedules a read from the device (usb-rx.c) and then passes
+ * the data to the generic RX code (rx.c).
+ *
+ * When the generic driver needs to send data (network or control), it
+ * queues up in the TX FIFO (tx.c) and that will notify the driver
+ * through the i2400m->bus_tx_kick() callback
+ * (usb-tx.c:i2400mu_bus_tx_kick) which will send the items in the
+ * FIFO queue.
+ *
+ * This driver, as well, implements the USB-specific ops for the generic
+ * driver to be able to setup/teardown communication with the device
+ * [i2400m_bus_dev_start() and i2400m_bus_dev_stop()], reseting the
+ * device [i2400m_bus_reset()] and performing firmware upload
+ * [i2400m_bus_bm_cmd() and i2400_bus_bm_wait_for_ack()].
+ */
+
+#ifndef __I2400M_USB_H__
+#define __I2400M_USB_H__
+
+#include "i2400m.h"
+#include <linux/kthread.h>
+
+
+/*
+ * Error Density Count: cheapo error density (over time) counter
+ *
+ * Originally by Reinette Chatre <reinette.chatre@intel.com>
+ *
+ * Embed an 'struct edc' somewhere. Each time there is a soft or
+ * retryable error, call edc_inc() and check if the error top
+ * watermark has been reached.
+ */
+enum {
+	EDC_MAX_ERRORS = 10,
+	EDC_ERROR_TIMEFRAME = HZ,
+};
+
+/* error density counter */
+struct edc {
+	unsigned long timestart;
+	u16 errorcount;
+};
+
+struct i2400m_endpoint_cfg {
+	unsigned char bulk_out;
+	unsigned char notification;
+	unsigned char reset_cold;
+	unsigned char bulk_in;
+};
+
+static inline void edc_init(struct edc *edc)
+{
+	edc->timestart = jiffies;
+}
+
+/**
+ * edc_inc - report a soft error and check if we are over the watermark
+ *
+ * @edc: pointer to error density counter.
+ * @max_err: maximum number of errors we can accept over the timeframe
+ * @timeframe: length of the timeframe (in jiffies).
+ *
+ * Returns: !0 1 if maximum acceptable errors per timeframe has been
+ *     exceeded. 0 otherwise.
+ *
+ * This is way to determine if the number of acceptable errors per time
+ * period has been exceeded. It is not accurate as there are cases in which
+ * this scheme will not work, for example if there are periodic occurrences
+ * of errors that straddle updates to the start time. This scheme is
+ * sufficient for our usage.
+ *
+ * To use, embed a 'struct edc' somewhere, initialize it with
+ * edc_init() and when an error hits:
+ *
+ * if (do_something_fails_with_a_soft_error) {
+ *        if (edc_inc(&my->edc, MAX_ERRORS, MAX_TIMEFRAME))
+ * 	           Ops, hard error, do something about it
+ *        else
+ *                 Retry or ignore, depending on whatever
+ * }
+ */
+static inline int edc_inc(struct edc *edc, u16 max_err, u16 timeframe)
+{
+	unsigned long now;
+
+	now = jiffies;
+	if (time_after(now, edc->timestart + timeframe)) {
+		edc->errorcount = 1;
+		edc->timestart = now;
+	} else if (++edc->errorcount > max_err) {
+		edc->errorcount = 0;
+		edc->timestart = now;
+		return 1;
+	}
+	return 0;
+}
+
+/* Host-Device interface for USB */
+enum {
+	I2400M_USB_BOOT_RETRIES = 3,
+	I2400MU_MAX_NOTIFICATION_LEN = 256,
+	I2400MU_BLK_SIZE = 16,
+	I2400MU_PL_SIZE_MAX = 0x3EFF,
+
+	/* Device IDs */
+	USB_DEVICE_ID_I6050 = 0x0186,
+	USB_DEVICE_ID_I6050_2 = 0x0188,
+	USB_DEVICE_ID_I6150 = 0x07d6,
+	USB_DEVICE_ID_I6150_2 = 0x07d7,
+	USB_DEVICE_ID_I6150_3 = 0x07d9,
+	USB_DEVICE_ID_I6250 = 0x0187,
+};
+
+
+/**
+ * struct i2400mu - descriptor for a USB connected i2400m
+ *
+ * @i2400m: bus-generic i2400m implementation; has to be first (see
+ *     it's documentation in i2400m.h).
+ *
+ * @usb_dev: pointer to our USB device
+ *
+ * @usb_iface: pointer to our USB interface
+ *
+ * @urb_edc: error density counter; used to keep a density-on-time tab
+ *     on how many soft (retryable or ignorable) errors we get. If we
+ *     go over the threshold, we consider the bus transport is failing
+ *     too much and reset.
+ *
+ * @notif_urb: URB for receiving notifications from the device.
+ *
+ * @tx_kthread: thread we use for data TX. We use a thread because in
+ *     order to do deep power saving and put the device to sleep, we
+ *     need to call usb_autopm_*() [blocking functions].
+ *
+ * @tx_wq: waitqueue for the TX kthread to sleep when there is no data
+ *     to be sent; when more data is available, it is woken up by
+ *     i2400mu_bus_tx_kick().
+ *
+ * @rx_kthread: thread we use for data RX. We use a thread because in
+ *     order to do deep power saving and put the device to sleep, we
+ *     need to call usb_autopm_*() [blocking functions].
+ *
+ * @rx_wq: waitqueue for the RX kthread to sleep when there is no data
+ *     to receive. When data is available, it is woken up by
+ *     usb-notif.c:i2400mu_notification_grok().
+ *
+ * @rx_pending_count: number of rx-data-ready notifications that were
+ *     still not handled by the RX kthread.
+ *
+ * @rx_size: current RX buffer size that is being used.
+ *
+ * @rx_size_acc: accumulator of the sizes of the previous read
+ *     transactions.
+ *
+ * @rx_size_cnt: number of read transactions accumulated in
+ *     @rx_size_acc.
+ *
+ * @do_autopm: disable(0)/enable(>0) calling the
+ *     usb_autopm_get/put_interface() barriers when executing
+ *     commands. See doc in i2400mu_suspend() for more information.
+ *
+ * @rx_size_auto_shrink: if true, the rx_size is shrunk
+ *     automatically based on the average size of the received
+ *     transactions. This allows the receive code to allocate smaller
+ *     chunks of memory and thus reduce pressure on the memory
+ *     allocator by not wasting so much space. By default it is
+ *     enabled.
+ *
+ * @debugfs_dentry: hookup for debugfs files.
+ *     These have to be in a separate directory, a child of
+ *     (wimax_dev->debugfs_dentry) so they can be removed when the
+ *     module unloads, as we don't keep each dentry.
+ */
+struct i2400mu {
+	struct i2400m i2400m;		/* FIRST! See doc */
+
+	struct usb_device *usb_dev;
+	struct usb_interface *usb_iface;
+	struct edc urb_edc;		/* Error density counter */
+	struct i2400m_endpoint_cfg endpoint_cfg;
+
+	struct urb *notif_urb;
+	struct task_struct *tx_kthread;
+	wait_queue_head_t tx_wq;
+
+	struct task_struct *rx_kthread;
+	wait_queue_head_t rx_wq;
+	atomic_t rx_pending_count;
+	size_t rx_size, rx_size_acc, rx_size_cnt;
+	atomic_t do_autopm;
+	u8 rx_size_auto_shrink;
+
+	struct dentry *debugfs_dentry;
+	unsigned i6050:1;	/* 1 if this is a 6050 based SKU */
+};
+
+
+static inline
+void i2400mu_init(struct i2400mu *i2400mu)
+{
+	i2400m_init(&i2400mu->i2400m);
+	edc_init(&i2400mu->urb_edc);
+	init_waitqueue_head(&i2400mu->tx_wq);
+	atomic_set(&i2400mu->rx_pending_count, 0);
+	init_waitqueue_head(&i2400mu->rx_wq);
+	i2400mu->rx_size = PAGE_SIZE - sizeof(struct skb_shared_info);
+	atomic_set(&i2400mu->do_autopm, 1);
+	i2400mu->rx_size_auto_shrink = 1;
+}
+
+int i2400mu_notification_setup(struct i2400mu *);
+void i2400mu_notification_release(struct i2400mu *);
+
+int i2400mu_rx_setup(struct i2400mu *);
+void i2400mu_rx_release(struct i2400mu *);
+void i2400mu_rx_kick(struct i2400mu *);
+
+int i2400mu_tx_setup(struct i2400mu *);
+void i2400mu_tx_release(struct i2400mu *);
+void i2400mu_bus_tx_kick(struct i2400m *);
+
+ssize_t i2400mu_bus_bm_cmd_send(struct i2400m *,
+				const struct i2400m_bootrom_header *, size_t,
+				int);
+ssize_t i2400mu_bus_bm_wait_for_ack(struct i2400m *,
+				    struct i2400m_bootrom_header *, size_t);
+#endif /* #ifndef __I2400M_USB_H__ */
diff --git a/drivers/staging/wimax/i2400m/i2400m.h b/drivers/staging/wimax/i2400m/i2400m.h
new file mode 100644
index 000000000000..de22cc6f2c5c
--- /dev/null
+++ b/drivers/staging/wimax/i2400m/i2400m.h
@@ -0,0 +1,970 @@
+/*
+ * Intel Wireless WiMAX Connection 2400m
+ * Declarations for bus-generic internal APIs
+ *
+ *
+ * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ * Yanir Lubetkin <yanirx.lubetkin@intel.com>
+ *  - Initial implementation
+ *
+ *
+ * GENERAL DRIVER ARCHITECTURE
+ *
+ * The i2400m driver is split in the following two major parts:
+ *
+ *  - bus specific driver
+ *  - bus generic driver (this part)
+ *
+ * The bus specific driver sets up stuff specific to the bus the
+ * device is connected to (USB, PCI, tam-tam...non-authoritative
+ * nor binding list) which is basically the device-model management
+ * (probe/disconnect, etc), moving data from device to kernel and
+ * back, doing the power saving details and reseting the device.
+ *
+ * For details on each bus-specific driver, see it's include file,
+ * i2400m-BUSNAME.h
+ *
+ * The bus-generic functionality break up is:
+ *
+ *  - Firmware upload: fw.c - takes care of uploading firmware to the
+ *        device. bus-specific driver just needs to provides a way to
+ *        execute boot-mode commands and to reset the device.
+ *
+ *  - RX handling: rx.c - receives data from the bus-specific code and
+ *        feeds it to the network or WiMAX stack or uses it to modify
+ *        the driver state. bus-specific driver only has to receive
+ *        frames and pass them to this module.
+ *
+ *  - TX handling: tx.c - manages the TX FIFO queue and provides means
+ *        for the bus-specific TX code to pull data from the FIFO
+ *        queue. bus-specific code just pulls frames from this module
+ *        to sends them to the device.
+ *
+ *  - netdev glue: netdev.c - interface with Linux networking
+ *        stack. Pass around data frames, and configure when the
+ *        device is up and running or shutdown (through ifconfig up /
+ *        down). Bus-generic only.
+ *
+ *  - control ops: control.c - implements various commands for
+ *        controlling the device. bus-generic only.
+ *
+ *  - device model glue: driver.c - implements helpers for the
+ *        device-model glue done by the bus-specific layer
+ *        (setup/release the driver resources), turning the device on
+ *        and off, handling the device reboots/resets and a few simple
+ *        WiMAX stack ops.
+ *
+ * Code is also broken up in linux-glue / device-glue.
+ *
+ * Linux glue contains functions that deal mostly with gluing with the
+ * rest of the Linux kernel.
+ *
+ * Device-glue are functions that deal mostly with the way the device
+ * does things and talk the device's language.
+ *
+ * device-glue code is licensed BSD so other open source OSes can take
+ * it to implement their drivers.
+ *
+ *
+ * APIs AND HEADER FILES
+ *
+ * This bus generic code exports three APIs:
+ *
+ *  - HDI (host-device interface) definitions common to all busses
+ *    (include/linux/wimax/i2400m.h); these can be also used by user
+ *    space code.
+ *  - internal API for the bus-generic code
+ *  - external API for the bus-specific drivers
+ *
+ *
+ * LIFE CYCLE:
+ *
+ * When the bus-specific driver probes, it allocates a network device
+ * with enough space for it's data structue, that must contain a
+ * &struct i2400m at the top.
+ *
+ * On probe, it needs to fill the i2400m members marked as [fill], as
+ * well as i2400m->wimax_dev.net_dev and call i2400m_setup(). The
+ * i2400m driver will only register with the WiMAX and network stacks;
+ * the only access done to the device is to read the MAC address so we
+ * can register a network device.
+ *
+ * The high-level call flow is:
+ *
+ * bus_probe()
+ *   i2400m_setup()
+ *     i2400m->bus_setup()
+ *     boot rom initialization / read mac addr
+ *     network / WiMAX stacks registration
+ *     i2400m_dev_start()
+ *       i2400m->bus_dev_start()
+ *       i2400m_dev_initialize()
+ *
+ * The reverse applies for a disconnect() call:
+ *
+ * bus_disconnect()
+ *   i2400m_release()
+ *     i2400m_dev_stop()
+ *       i2400m_dev_shutdown()
+ *       i2400m->bus_dev_stop()
+ *     network / WiMAX stack unregistration
+ *     i2400m->bus_release()
+ *
+ * At this point, control and data communications are possible.
+ *
+ * While the device is up, it might reset. The bus-specific driver has
+ * to catch that situation and call i2400m_dev_reset_handle() to deal
+ * with it (reset the internal driver structures and go back to square
+ * one).
+ */
+
+#ifndef __I2400M_H__
+#define __I2400M_H__
+
+#include <linux/usb.h>
+#include <linux/netdevice.h>
+#include <linux/completion.h>
+#include <linux/rwsem.h>
+#include <linux/atomic.h>
+#include "../net-wimax.h"
+#include "linux-wimax-i2400m.h"
+#include <asm/byteorder.h>
+
+enum {
+/* netdev interface */
+	/*
+	 * Out of NWG spec (R1_v1.2.2), 3.3.3 ASN Bearer Plane MTU Size
+	 *
+	 * The MTU is 1400 or less
+	 */
+	I2400M_MAX_MTU = 1400,
+};
+
+/* Misc constants */
+enum {
+	/* Size of the Boot Mode Command buffer */
+	I2400M_BM_CMD_BUF_SIZE = 16 * 1024,
+	I2400M_BM_ACK_BUF_SIZE = 256,
+};
+
+enum {
+	/* Maximum number of bus reset can be retried */
+	I2400M_BUS_RESET_RETRIES = 3,
+};
+
+/**
+ * struct i2400m_poke_table - Hardware poke table for the Intel 2400m
+ *
+ * This structure will be used to create a device specific poke table
+ * to put the device in a consistent state at boot time.
+ *
+ * @address: The device address to poke
+ *
+ * @data: The data value to poke to the device address
+ *
+ */
+struct i2400m_poke_table{
+	__le32 address;
+	__le32 data;
+};
+
+#define I2400M_FW_POKE(a, d) {		\
+	.address = cpu_to_le32(a),	\
+	.data = cpu_to_le32(d)		\
+}
+
+
+/**
+ * i2400m_reset_type - methods to reset a device
+ *
+ * @I2400M_RT_WARM: Reset without device disconnection, device handles
+ *     are kept valid but state is back to power on, with firmware
+ *     re-uploaded.
+ * @I2400M_RT_COLD: Tell the device to disconnect itself from the bus
+ *     and reconnect. Renders all device handles invalid.
+ * @I2400M_RT_BUS: Tells the bus to reset the device; last measure
+ *     used when both types above don't work.
+ */
+enum i2400m_reset_type {
+	I2400M_RT_WARM,	/* first measure */
+	I2400M_RT_COLD,	/* second measure */
+	I2400M_RT_BUS,	/* call in artillery */
+};
+
+struct i2400m_reset_ctx;
+struct i2400m_roq;
+struct i2400m_barker_db;
+
+/**
+ * struct i2400m - descriptor for an Intel 2400m
+ *
+ * Members marked with [fill] must be filled out/initialized before
+ * calling i2400m_setup().
+ *
+ * Note the @bus_setup/@bus_release, @bus_dev_start/@bus_dev_release
+ * call pairs are very much doing almost the same, and depending on
+ * the underlying bus, some stuff has to be put in one or the
+ * other. The idea of setup/release is that they setup the minimal
+ * amount needed for loading firmware, where us dev_start/stop setup
+ * the rest needed to do full data/control traffic.
+ *
+ * @bus_tx_block_size: [fill] USB imposes a 16 block size, but other
+ *     busses will differ.  So we have a tx_blk_size variable that the
+ *     bus layer sets to tell the engine how much of that we need.
+ *
+ * @bus_tx_room_min: [fill] Minimum room required while allocating
+ *     TX queue's buffer space for message header. USB requires
+ *     16 bytes. Refer to bus specific driver code for details.
+ *
+ * @bus_pl_size_max: [fill] Maximum payload size.
+ *
+ * @bus_setup: [optional fill] Function called by the bus-generic code
+ *     [i2400m_setup()] to setup the basic bus-specific communications
+ *     to the the device needed to load firmware. See LIFE CYCLE above.
+ *
+ *     NOTE: Doesn't need to upload the firmware, as that is taken
+ *     care of by the bus-generic code.
+ *
+ * @bus_release: [optional fill] Function called by the bus-generic
+ *     code [i2400m_release()] to shutdown the basic bus-specific
+ *     communications to the the device needed to load firmware. See
+ *     LIFE CYCLE above.
+ *
+ *     This function does not need to reset the device, just tear down
+ *     all the host resources created to  handle communication with
+ *     the device.
+ *
+ * @bus_dev_start: [optional fill] Function called by the bus-generic
+ *     code [i2400m_dev_start()] to do things needed to start the
+ *     device. See LIFE CYCLE above.
+ *
+ *     NOTE: Doesn't need to upload the firmware, as that is taken
+ *     care of by the bus-generic code.
+ *
+ * @bus_dev_stop: [optional fill] Function called by the bus-generic
+ *     code [i2400m_dev_stop()] to do things needed for stopping the
+ *     device. See LIFE CYCLE above.
+ *
+ *     This function does not need to reset the device, just tear down
+ *     all the host resources created to handle communication with
+ *     the device.
+ *
+ * @bus_tx_kick: [fill] Function called by the bus-generic code to let
+ *     the bus-specific code know that there is data available in the
+ *     TX FIFO for transmission to the device.
+ *
+ *     This function cannot sleep.
+ *
+ * @bus_reset: [fill] Function called by the bus-generic code to reset
+ *     the device in in various ways. Doesn't need to wait for the
+ *     reset to finish.
+ *
+ *     If warm or cold reset fail, this function is expected to do a
+ *     bus-specific reset (eg: USB reset) to get the device to a
+ *     working state (even if it implies device disconecction).
+ *
+ *     Note the warm reset is used by the firmware uploader to
+ *     reinitialize the device.
+ *
+ *     IMPORTANT: this is called very early in the device setup
+ *     process, so it cannot rely on common infrastructure being laid
+ *     out.
+ *
+ *     IMPORTANT: don't call reset on RT_BUS with i2400m->init_mutex
+ *     held, as the .pre/.post reset handlers will deadlock.
+ *
+ * @bus_bm_retries: [fill] How many times shall a firmware upload /
+ *     device initialization be retried? Different models of the same
+ *     device might need different values, hence it is set by the
+ *     bus-specific driver. Note this value is used in two places,
+ *     i2400m_fw_dnload() and __i2400m_dev_start(); they won't become
+ *     multiplicative (__i2400m_dev_start() calling N times
+ *     i2400m_fw_dnload() and this trying N times to download the
+ *     firmware), as if __i2400m_dev_start() only retries if the
+ *     firmware crashed while initializing the device (not in a
+ *     general case).
+ *
+ * @bus_bm_cmd_send: [fill] Function called to send a boot-mode
+ *     command. Flags are defined in 'enum i2400m_bm_cmd_flags'. This
+ *     is synchronous and has to return 0 if ok or < 0 errno code in
+ *     any error condition.
+ *
+ * @bus_bm_wait_for_ack: [fill] Function called to wait for a
+ *     boot-mode notification (that can be a response to a previously
+ *     issued command or an asynchronous one). Will read until all the
+ *     indicated size is read or timeout. Reading more or less data
+ *     than asked for is an error condition. Return 0 if ok, < 0 errno
+ *     code on error.
+ *
+ *     The caller to this function will check if the response is a
+ *     barker that indicates the device going into reset mode.
+ *
+ * @bus_fw_names: [fill] a NULL-terminated array with the names of the
+ *     firmware images to try loading. This is made a list so we can
+ *     support backward compatibility of firmware releases (eg: if we
+ *     can't find the default v1.4, we try v1.3). In general, the name
+ *     should be i2400m-fw-X-VERSION.sbcf, where X is the bus name.
+ *     The list is tried in order and the first one that loads is
+ *     used. The fw loader will set i2400m->fw_name to point to the
+ *     active firmware image.
+ *
+ * @bus_bm_mac_addr_impaired: [fill] Set to true if the device's MAC
+ *     address provided in boot mode is kind of broken and needs to
+ *     be re-read later on.
+ *
+ * @bus_bm_pokes_table: [fill/optional] A table of device addresses
+ *     and values that will be poked at device init time to move the
+ *     device to the correct state for the type of boot/firmware being
+ *     used.  This table MUST be terminated with (0x000000,
+ *     0x00000000) or bad things will happen.
+ *
+ *
+ * @wimax_dev: WiMAX generic device for linkage into the kernel WiMAX
+ *     stack. Due to the way a net_device is allocated, we need to
+ *     force this to be the first field so that we can get from
+ *     netdev_priv() the right pointer.
+ *
+ * @updown: the device is up and ready for transmitting control and
+ *     data packets. This implies @ready (communication infrastructure
+ *     with the device is ready) and the device's firmware has been
+ *     loaded and the device initialized.
+ *
+ *     Write to it only inside a i2400m->init_mutex protected area
+ *     followed with a wmb(); rmb() before accesing (unless locked
+ *     inside i2400m->init_mutex). Read access can be loose like that
+ *     [just using rmb()] because the paths that use this also do
+ *     other error checks later on.
+ *
+ * @ready: Communication infrastructure with the device is ready, data
+ *     frames can start to be passed around (this is lighter than
+ *     using the WiMAX state for certain hot paths).
+ *
+ *     Write to it only inside a i2400m->init_mutex protected area
+ *     followed with a wmb(); rmb() before accesing (unless locked
+ *     inside i2400m->init_mutex). Read access can be loose like that
+ *     [just using rmb()] because the paths that use this also do
+ *     other error checks later on.
+ *
+ * @rx_reorder: 1 if RX reordering is enabled; this can only be
+ *     set at probe time.
+ *
+ * @state: device's state (as reported by it)
+ *
+ * @state_wq: waitqueue that is woken up whenever the state changes
+ *
+ * @tx_lock: spinlock to protect TX members
+ *
+ * @tx_buf: FIFO buffer for TX; we queue data here
+ *
+ * @tx_in: FIFO index for incoming data. Note this doesn't wrap around
+ *     and it is always greater than @tx_out.
+ *
+ * @tx_out: FIFO index for outgoing data
+ *
+ * @tx_msg: current TX message that is active in the FIFO for
+ *     appending payloads.
+ *
+ * @tx_sequence: current sequence number for TX messages from the
+ *     device to the host.
+ *
+ * @tx_msg_size: size of the current message being transmitted by the
+ *     bus-specific code.
+ *
+ * @tx_pl_num: total number of payloads sent
+ *
+ * @tx_pl_max: maximum number of payloads sent in a TX message
+ *
+ * @tx_pl_min: minimum number of payloads sent in a TX message
+ *
+ * @tx_num: number of TX messages sent
+ *
+ * @tx_size_acc: number of bytes in all TX messages sent
+ *     (this is different to net_dev's statistics as it also counts
+ *     control messages).
+ *
+ * @tx_size_min: smallest TX message sent.
+ *
+ * @tx_size_max: biggest TX message sent.
+ *
+ * @rx_lock: spinlock to protect RX members and rx_roq_refcount.
+ *
+ * @rx_pl_num: total number of payloads received
+ *
+ * @rx_pl_max: maximum number of payloads received in a RX message
+ *
+ * @rx_pl_min: minimum number of payloads received in a RX message
+ *
+ * @rx_num: number of RX messages received
+ *
+ * @rx_size_acc: number of bytes in all RX messages received
+ *     (this is different to net_dev's statistics as it also counts
+ *     control messages).
+ *
+ * @rx_size_min: smallest RX message received.
+ *
+ * @rx_size_max: buggest RX message received.
+ *
+ * @rx_roq: RX ReOrder queues. (fw >= v1.4) When packets are received
+ *     out of order, the device will ask the driver to hold certain
+ *     packets until the ones that are received out of order can be
+ *     delivered. Then the driver can release them to the host. See
+ *     drivers/net/i2400m/rx.c for details.
+ *
+ * @rx_roq_refcount: refcount rx_roq. This refcounts any access to
+ *     rx_roq thus preventing rx_roq being destroyed when rx_roq
+ *     is being accessed. rx_roq_refcount is protected by rx_lock.
+ *
+ * @rx_reports: reports received from the device that couldn't be
+ *     processed because the driver wasn't still ready; when ready,
+ *     they are pulled from here and chewed.
+ *
+ * @rx_reports_ws: Work struct used to kick a scan of the RX reports
+ *     list and to process each.
+ *
+ * @src_mac_addr: MAC address used to make ethernet packets be coming
+ *     from. This is generated at i2400m_setup() time and used during
+ *     the life cycle of the instance. See i2400m_fake_eth_header().
+ *
+ * @init_mutex: Mutex used for serializing the device bringup
+ *     sequence; this way if the device reboots in the middle, we
+ *     don't try to do a bringup again while we are tearing down the
+ *     one that failed.
+ *
+ *     Can't reuse @msg_mutex because from within the bringup sequence
+ *     we need to send messages to the device and thus use @msg_mutex.
+ *
+ * @msg_mutex: mutex used to send control commands to the device (we
+ *     only allow one at a time, per host-device interface design).
+ *
+ * @msg_completion: used to wait for an ack to a control command sent
+ *     to the device.
+ *
+ * @ack_skb: used to store the actual ack to a control command if the
+ *     reception of the command was successful. Otherwise, a ERR_PTR()
+ *     errno code that indicates what failed with the ack reception.
+ *
+ *     Only valid after @msg_completion is woken up. Only updateable
+ *     if @msg_completion is armed. Only touched by
+ *     i2400m_msg_to_dev().
+ *
+ *     Protected by @rx_lock. In theory the command execution flow is
+ *     sequential, but in case the device sends an out-of-phase or
+ *     very delayed response, we need to avoid it trampling current
+ *     execution.
+ *
+ * @bm_cmd_buf: boot mode command buffer for composing firmware upload
+ *     commands.
+ *
+ *     USB can't r/w to stack, vmalloc, etc...as well, we end up
+ *     having to alloc/free a lot to compose commands, so we use these
+ *     for stagging and not having to realloc all the time.
+ *
+ *     This assumes the code always runs serialized. Only one thread
+ *     can call i2400m_bm_cmd() at the same time.
+ *
+ * @bm_ack_buf: boot mode acknoledge buffer for staging reception of
+ *     responses to commands.
+ *
+ *     See @bm_cmd_buf.
+ *
+ * @work_queue: work queue for processing device reports. This
+ *     workqueue cannot be used for processing TX or RX to the device,
+ *     as from it we'll process device reports, which might require
+ *     further communication with the device.
+ *
+ * @debugfs_dentry: hookup for debugfs files.
+ *     These have to be in a separate directory, a child of
+ *     (wimax_dev->debugfs_dentry) so they can be removed when the
+ *     module unloads, as we don't keep each dentry.
+ *
+ * @fw_name: name of the firmware image that is currently being used.
+ *
+ * @fw_version: version of the firmware interface, Major.minor,
+ *     encoded in the high word and low word (major << 16 | minor).
+ *
+ * @fw_hdrs: NULL terminated array of pointers to the firmware
+ *     headers. This is only available during firmware load time.
+ *
+ * @fw_cached: Used to cache firmware when the system goes to
+ *     suspend/standby/hibernation (as on resume we can't read it). If
+ *     NULL, no firmware was cached, read it. If ~0, you can't read
+ *     any firmware files (the system still didn't come out of suspend
+ *     and failed to cache one), so abort; otherwise, a valid cached
+ *     firmware to be used. Access to this variable is protected by
+ *     the spinlock i2400m->rx_lock.
+ *
+ * @barker: barker type that the device uses; this is initialized by
+ *     i2400m_is_boot_barker() the first time it is called. Then it
+ *     won't change during the life cycle of the device and every time
+ *     a boot barker is received, it is just verified for it being the
+ *     same.
+ *
+ * @pm_notifier: used to register for PM events
+ *
+ * @bus_reset_retries: counter for the number of bus resets attempted for
+ *	this boot. It's not for tracking the number of bus resets during
+ *	the whole driver life cycle (from insmod to rmmod) but for the
+ *	number of dev_start() executed until dev_start() returns a success
+ *	(ie: a good boot means a dev_stop() followed by a successful
+ *	dev_start()). dev_reset_handler() increments this counter whenever
+ *	it is triggering a bus reset. It checks this counter to decide if a
+ *	subsequent bus reset should be retried. dev_reset_handler() retries
+ *	the bus reset until dev_start() succeeds or the counter reaches
+ *	I2400M_BUS_RESET_RETRIES. The counter is cleared to 0 in
+ *	dev_reset_handle() when dev_start() returns a success,
+ *	ie: a successul boot is completed.
+ *
+ * @alive: flag to denote if the device *should* be alive. This flag is
+ *	everything like @updown (see doc for @updown) except reflecting
+ *	the device state *we expect* rather than the actual state as denoted
+ *	by @updown. It is set 1 whenever @updown is set 1 in dev_start().
+ *	Then the device is expected to be alive all the time
+ *	(i2400m->alive remains 1) until the driver is removed. Therefore
+ *	all the device reboot events detected can be still handled properly
+ *	by either dev_reset_handle() or .pre_reset/.post_reset as long as
+ *	the driver presents. It is set 0 along with @updown in dev_stop().
+ *
+ * @error_recovery: flag to denote if we are ready to take an error recovery.
+ *	0 for ready to take an error recovery; 1 for not ready. It is
+ *	initialized to 1 while probe() since we don't tend to take any error
+ *	recovery during probe(). It is decremented by 1 whenever dev_start()
+ *	succeeds to indicate we are ready to take error recovery from now on.
+ *	It is checked every time we wanna schedule an error recovery. If an
+ *	error recovery is already in place (error_recovery was set 1), we
+ *	should not schedule another one until the last one is done.
+ */
+struct i2400m {
+	struct wimax_dev wimax_dev;	/* FIRST! See doc */
+
+	unsigned updown:1;		/* Network device is up or down */
+	unsigned boot_mode:1;		/* is the device in boot mode? */
+	unsigned sboot:1;		/* signed or unsigned fw boot */
+	unsigned ready:1;		/* Device comm infrastructure ready */
+	unsigned rx_reorder:1;		/* RX reorder is enabled */
+	u8 trace_msg_from_user;		/* echo rx msgs to 'trace' pipe */
+					/* typed u8 so /sys/kernel/debug/u8 can tweak */
+	enum i2400m_system_state state;
+	wait_queue_head_t state_wq;	/* Woken up when on state updates */
+
+	size_t bus_tx_block_size;
+	size_t bus_tx_room_min;
+	size_t bus_pl_size_max;
+	unsigned bus_bm_retries;
+
+	int (*bus_setup)(struct i2400m *);
+	int (*bus_dev_start)(struct i2400m *);
+	void (*bus_dev_stop)(struct i2400m *);
+	void (*bus_release)(struct i2400m *);
+	void (*bus_tx_kick)(struct i2400m *);
+	int (*bus_reset)(struct i2400m *, enum i2400m_reset_type);
+	ssize_t (*bus_bm_cmd_send)(struct i2400m *,
+				   const struct i2400m_bootrom_header *,
+				   size_t, int flags);
+	ssize_t (*bus_bm_wait_for_ack)(struct i2400m *,
+				       struct i2400m_bootrom_header *, size_t);
+	const char **bus_fw_names;
+	unsigned bus_bm_mac_addr_impaired:1;
+	const struct i2400m_poke_table *bus_bm_pokes_table;
+
+	spinlock_t tx_lock;		/* protect TX state */
+	void *tx_buf;
+	size_t tx_in, tx_out;
+	struct i2400m_msg_hdr *tx_msg;
+	size_t tx_sequence, tx_msg_size;
+	/* TX stats */
+	unsigned tx_pl_num, tx_pl_max, tx_pl_min,
+		tx_num, tx_size_acc, tx_size_min, tx_size_max;
+
+	/* RX stuff */
+	/* protect RX state and rx_roq_refcount */
+	spinlock_t rx_lock;
+	unsigned rx_pl_num, rx_pl_max, rx_pl_min,
+		rx_num, rx_size_acc, rx_size_min, rx_size_max;
+	struct i2400m_roq *rx_roq;	/* access is refcounted */
+	struct kref rx_roq_refcount;	/* refcount access to rx_roq */
+	u8 src_mac_addr[ETH_HLEN];
+	struct list_head rx_reports;	/* under rx_lock! */
+	struct work_struct rx_report_ws;
+
+	struct mutex msg_mutex;		/* serialize command execution */
+	struct completion msg_completion;
+	struct sk_buff *ack_skb;	/* protected by rx_lock */
+
+	void *bm_ack_buf;		/* for receiving acks over USB */
+	void *bm_cmd_buf;		/* for issuing commands over USB */
+
+	struct workqueue_struct *work_queue;
+
+	struct mutex init_mutex;	/* protect bringup seq */
+	struct i2400m_reset_ctx *reset_ctx;	/* protected by init_mutex */
+
+	struct work_struct wake_tx_ws;
+	struct sk_buff *wake_tx_skb;
+
+	struct work_struct reset_ws;
+	const char *reset_reason;
+
+	struct work_struct recovery_ws;
+
+	struct dentry *debugfs_dentry;
+	const char *fw_name;		/* name of the current firmware image */
+	unsigned long fw_version;	/* version of the firmware interface */
+	const struct i2400m_bcf_hdr **fw_hdrs;
+	struct i2400m_fw *fw_cached;	/* protected by rx_lock */
+	struct i2400m_barker_db *barker;
+
+	struct notifier_block pm_notifier;
+
+	/* counting bus reset retries in this boot */
+	atomic_t bus_reset_retries;
+
+	/* if the device is expected to be alive */
+	unsigned alive;
+
+	/* 0 if we are ready for error recovery; 1 if not ready  */
+	atomic_t error_recovery;
+
+};
+
+
+/*
+ * Bus-generic internal APIs
+ * -------------------------
+ */
+
+static inline
+struct i2400m *wimax_dev_to_i2400m(struct wimax_dev *wimax_dev)
+{
+	return container_of(wimax_dev, struct i2400m, wimax_dev);
+}
+
+static inline
+struct i2400m *net_dev_to_i2400m(struct net_device *net_dev)
+{
+	return wimax_dev_to_i2400m(netdev_priv(net_dev));
+}
+
+/*
+ * Boot mode support
+ */
+
+/**
+ * i2400m_bm_cmd_flags - flags to i2400m_bm_cmd()
+ *
+ * @I2400M_BM_CMD_RAW: send the command block as-is, without doing any
+ *     extra processing for adding CRC.
+ */
+enum i2400m_bm_cmd_flags {
+	I2400M_BM_CMD_RAW	= 1 << 2,
+};
+
+/**
+ * i2400m_bri - Boot-ROM indicators
+ *
+ * Flags for i2400m_bootrom_init() and i2400m_dev_bootstrap() [which
+ * are passed from things like i2400m_setup()]. Can be combined with
+ * |.
+ *
+ * @I2400M_BRI_SOFT: The device rebooted already and a reboot
+ *     barker received, proceed directly to ack the boot sequence.
+ * @I2400M_BRI_NO_REBOOT: Do not reboot the device and proceed
+ *     directly to wait for a reboot barker from the device.
+ * @I2400M_BRI_MAC_REINIT: We need to reinitialize the boot
+ *     rom after reading the MAC address. This is quite a dirty hack,
+ *     if you ask me -- the device requires the bootrom to be
+ *     initialized after reading the MAC address.
+ */
+enum i2400m_bri {
+	I2400M_BRI_SOFT       = 1 << 1,
+	I2400M_BRI_NO_REBOOT  = 1 << 2,
+	I2400M_BRI_MAC_REINIT = 1 << 3,
+};
+
+void i2400m_bm_cmd_prepare(struct i2400m_bootrom_header *);
+int i2400m_dev_bootstrap(struct i2400m *, enum i2400m_bri);
+int i2400m_read_mac_addr(struct i2400m *);
+int i2400m_bootrom_init(struct i2400m *, enum i2400m_bri);
+int i2400m_is_boot_barker(struct i2400m *, const void *, size_t);
+static inline
+int i2400m_is_d2h_barker(const void *buf)
+{
+	const __le32 *barker = buf;
+	return le32_to_cpu(*barker) == I2400M_D2H_MSG_BARKER;
+}
+void i2400m_unknown_barker(struct i2400m *, const void *, size_t);
+
+/* Make/grok boot-rom header commands */
+
+static inline
+__le32 i2400m_brh_command(enum i2400m_brh_opcode opcode, unsigned use_checksum,
+			  unsigned direct_access)
+{
+	return cpu_to_le32(
+		I2400M_BRH_SIGNATURE
+		| (direct_access ? I2400M_BRH_DIRECT_ACCESS : 0)
+		| I2400M_BRH_RESPONSE_REQUIRED /* response always required */
+		| (use_checksum ? I2400M_BRH_USE_CHECKSUM : 0)
+		| (opcode & I2400M_BRH_OPCODE_MASK));
+}
+
+static inline
+void i2400m_brh_set_opcode(struct i2400m_bootrom_header *hdr,
+			   enum i2400m_brh_opcode opcode)
+{
+	hdr->command = cpu_to_le32(
+		(le32_to_cpu(hdr->command) & ~I2400M_BRH_OPCODE_MASK)
+		| (opcode & I2400M_BRH_OPCODE_MASK));
+}
+
+static inline
+unsigned i2400m_brh_get_opcode(const struct i2400m_bootrom_header *hdr)
+{
+	return le32_to_cpu(hdr->command) & I2400M_BRH_OPCODE_MASK;
+}
+
+static inline
+unsigned i2400m_brh_get_response(const struct i2400m_bootrom_header *hdr)
+{
+	return (le32_to_cpu(hdr->command) & I2400M_BRH_RESPONSE_MASK)
+		>> I2400M_BRH_RESPONSE_SHIFT;
+}
+
+static inline
+unsigned i2400m_brh_get_use_checksum(const struct i2400m_bootrom_header *hdr)
+{
+	return le32_to_cpu(hdr->command) & I2400M_BRH_USE_CHECKSUM;
+}
+
+static inline
+unsigned i2400m_brh_get_response_required(
+	const struct i2400m_bootrom_header *hdr)
+{
+	return le32_to_cpu(hdr->command) & I2400M_BRH_RESPONSE_REQUIRED;
+}
+
+static inline
+unsigned i2400m_brh_get_direct_access(const struct i2400m_bootrom_header *hdr)
+{
+	return le32_to_cpu(hdr->command) & I2400M_BRH_DIRECT_ACCESS;
+}
+
+static inline
+unsigned i2400m_brh_get_signature(const struct i2400m_bootrom_header *hdr)
+{
+	return (le32_to_cpu(hdr->command) & I2400M_BRH_SIGNATURE_MASK)
+		>> I2400M_BRH_SIGNATURE_SHIFT;
+}
+
+
+/*
+ * Driver / device setup and internal functions
+ */
+void i2400m_init(struct i2400m *);
+int i2400m_reset(struct i2400m *, enum i2400m_reset_type);
+void i2400m_netdev_setup(struct net_device *net_dev);
+int i2400m_sysfs_setup(struct device_driver *);
+void i2400m_sysfs_release(struct device_driver *);
+int i2400m_tx_setup(struct i2400m *);
+void i2400m_wake_tx_work(struct work_struct *);
+void i2400m_tx_release(struct i2400m *);
+
+int i2400m_rx_setup(struct i2400m *);
+void i2400m_rx_release(struct i2400m *);
+
+void i2400m_fw_cache(struct i2400m *);
+void i2400m_fw_uncache(struct i2400m *);
+
+void i2400m_net_rx(struct i2400m *, struct sk_buff *, unsigned, const void *,
+		   int);
+void i2400m_net_erx(struct i2400m *, struct sk_buff *, enum i2400m_cs);
+void i2400m_net_wake_stop(struct i2400m *);
+enum i2400m_pt;
+int i2400m_tx(struct i2400m *, const void *, size_t, enum i2400m_pt);
+
+#ifdef CONFIG_DEBUG_FS
+void i2400m_debugfs_add(struct i2400m *);
+void i2400m_debugfs_rm(struct i2400m *);
+#else
+static inline void i2400m_debugfs_add(struct i2400m *i2400m) {}
+static inline void i2400m_debugfs_rm(struct i2400m *i2400m) {}
+#endif
+
+/* Initialize/shutdown the device */
+int i2400m_dev_initialize(struct i2400m *);
+void i2400m_dev_shutdown(struct i2400m *);
+
+extern struct attribute_group i2400m_dev_attr_group;
+
+
+/* HDI message's payload description handling */
+
+static inline
+size_t i2400m_pld_size(const struct i2400m_pld *pld)
+{
+	return I2400M_PLD_SIZE_MASK & le32_to_cpu(pld->val);
+}
+
+static inline
+enum i2400m_pt i2400m_pld_type(const struct i2400m_pld *pld)
+{
+	return (I2400M_PLD_TYPE_MASK & le32_to_cpu(pld->val))
+		>> I2400M_PLD_TYPE_SHIFT;
+}
+
+static inline
+void i2400m_pld_set(struct i2400m_pld *pld, size_t size,
+		    enum i2400m_pt type)
+{
+	pld->val = cpu_to_le32(
+		((type << I2400M_PLD_TYPE_SHIFT) & I2400M_PLD_TYPE_MASK)
+		|  (size & I2400M_PLD_SIZE_MASK));
+}
+
+
+/*
+ * API for the bus-specific drivers
+ * --------------------------------
+ */
+
+static inline
+struct i2400m *i2400m_get(struct i2400m *i2400m)
+{
+	dev_hold(i2400m->wimax_dev.net_dev);
+	return i2400m;
+}
+
+static inline
+void i2400m_put(struct i2400m *i2400m)
+{
+	dev_put(i2400m->wimax_dev.net_dev);
+}
+
+int i2400m_dev_reset_handle(struct i2400m *, const char *);
+int i2400m_pre_reset(struct i2400m *);
+int i2400m_post_reset(struct i2400m *);
+void i2400m_error_recovery(struct i2400m *);
+
+/*
+ * _setup()/_release() are called by the probe/disconnect functions of
+ * the bus-specific drivers.
+ */
+int i2400m_setup(struct i2400m *, enum i2400m_bri bm_flags);
+void i2400m_release(struct i2400m *);
+
+int i2400m_rx(struct i2400m *, struct sk_buff *);
+struct i2400m_msg_hdr *i2400m_tx_msg_get(struct i2400m *, size_t *);
+void i2400m_tx_msg_sent(struct i2400m *);
+
+
+/*
+ * Utility functions
+ */
+
+static inline
+struct device *i2400m_dev(struct i2400m *i2400m)
+{
+	return i2400m->wimax_dev.net_dev->dev.parent;
+}
+
+int i2400m_msg_check_status(const struct i2400m_l3l4_hdr *, char *, size_t);
+int i2400m_msg_size_check(struct i2400m *, const struct i2400m_l3l4_hdr *,
+			  size_t);
+struct sk_buff *i2400m_msg_to_dev(struct i2400m *, const void *, size_t);
+void i2400m_msg_to_dev_cancel_wait(struct i2400m *, int);
+void i2400m_report_hook(struct i2400m *, const struct i2400m_l3l4_hdr *,
+			size_t);
+void i2400m_report_hook_work(struct work_struct *);
+int i2400m_cmd_enter_powersave(struct i2400m *);
+int i2400m_cmd_exit_idle(struct i2400m *);
+struct sk_buff *i2400m_get_device_info(struct i2400m *);
+int i2400m_firmware_check(struct i2400m *);
+int i2400m_set_idle_timeout(struct i2400m *, unsigned);
+
+static inline
+struct usb_endpoint_descriptor *usb_get_epd(struct usb_interface *iface, int ep)
+{
+	return &iface->cur_altsetting->endpoint[ep].desc;
+}
+
+int i2400m_op_rfkill_sw_toggle(struct wimax_dev *, enum wimax_rf_state);
+void i2400m_report_tlv_rf_switches_status(struct i2400m *,
+					  const struct i2400m_tlv_rf_switches_status *);
+
+/*
+ * Helpers for firmware backwards compatibility
+ *
+ * As we aim to support at least the firmware version that was
+ * released with the previous kernel/driver release, some code will be
+ * conditionally executed depending on the firmware version. On each
+ * release, the code to support fw releases past the last two ones
+ * will be purged.
+ *
+ * By making it depend on this macros, it is easier to keep it a tab
+ * on what has to go and what not.
+ */
+static inline
+unsigned i2400m_le_v1_3(struct i2400m *i2400m)
+{
+	/* running fw is lower or v1.3 */
+	return i2400m->fw_version <= 0x00090001;
+}
+
+static inline
+unsigned i2400m_ge_v1_4(struct i2400m *i2400m)
+{
+	/* running fw is higher or v1.4 */
+	return i2400m->fw_version >= 0x00090002;
+}
+
+
+/*
+ * Do a millisecond-sleep for allowing wireshark to dump all the data
+ * packets. Used only for debugging.
+ */
+static inline
+void __i2400m_msleep(unsigned ms)
+{
+#if 1
+#else
+	msleep(ms);
+#endif
+}
+
+
+/* module initialization helpers */
+int i2400m_barker_db_init(const char *);
+void i2400m_barker_db_exit(void);
+
+
+
+#endif /* #ifndef __I2400M_H__ */
diff --git a/drivers/staging/wimax/i2400m/linux-wimax-i2400m.h b/drivers/staging/wimax/i2400m/linux-wimax-i2400m.h
new file mode 100644
index 000000000000..fd198bc24a3c
--- /dev/null
+++ b/drivers/staging/wimax/i2400m/linux-wimax-i2400m.h
@@ -0,0 +1,572 @@
+/*
+ * Intel Wireless WiMax Connection 2400m
+ * Host-Device protocol interface definitions
+ *
+ *
+ * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *  - Initial implementation
+ *
+ *
+ * This header defines the data structures and constants used to
+ * communicate with the device.
+ *
+ * BOOTMODE/BOOTROM/FIRMWARE UPLOAD PROTOCOL
+ *
+ * The firmware upload protocol is quite simple and only requires a
+ * handful of commands. See drivers/net/wimax/i2400m/fw.c for more
+ * details.
+ *
+ * The BCF data structure is for the firmware file header.
+ *
+ *
+ * THE DATA / CONTROL PROTOCOL
+ *
+ * This is the normal protocol spoken with the device once the
+ * firmware is uploaded. It transports data payloads and control
+ * messages back and forth.
+ *
+ * It consists 'messages' that pack one or more payloads each. The
+ * format is described in detail in drivers/net/wimax/i2400m/rx.c and
+ * tx.c.
+ *
+ *
+ * THE L3L4 PROTOCOL
+ *
+ * The term L3L4 refers to Layer 3 (the device), Layer 4 (the
+ * driver/host software).
+ *
+ * This is the control protocol used by the host to control the i2400m
+ * device (scan, connect, disconnect...). This is sent to / received
+ * as control frames. These frames consist of a header and zero or
+ * more TLVs with information. We call each control frame a "message".
+ *
+ * Each message is composed of:
+ *
+ * HEADER
+ * [TLV0 + PAYLOAD0]
+ * [TLV1 + PAYLOAD1]
+ * [...]
+ * [TLVN + PAYLOADN]
+ *
+ * The HEADER is defined by 'struct i2400m_l3l4_hdr'. The payloads are
+ * defined by a TLV structure (Type Length Value) which is a 'header'
+ * (struct i2400m_tlv_hdr) and then the payload.
+ *
+ * All integers are represented as Little Endian.
+ *
+ * - REQUESTS AND EVENTS
+ *
+ * The requests can be clasified as follows:
+ *
+ *   COMMAND:  implies a request from the host to the device requesting
+ *             an action being performed. The device will reply with a
+ *             message (with the same type as the command), status and
+ *             no (TLV) payload. Execution of a command might cause
+ *             events (of different type) to be sent later on as
+ *             device's state changes.
+ *
+ *   GET/SET:  similar to COMMAND, but will not cause other
+ *             EVENTs. The reply, in the case of GET, will contain
+ *             TLVs with the requested information.
+ *
+ *   EVENT:    asynchronous messages sent from the device, maybe as a
+ *             consequence of previous COMMANDs but disassociated from
+ *             them.
+ *
+ * Only one request might be pending at the same time (ie: don't
+ * parallelize nor post another GET request before the previous
+ * COMMAND has been acknowledged with it's corresponding reply by the
+ * device).
+ *
+ * The different requests and their formats are described below:
+ *
+ *  I2400M_MT_*   Message types
+ *  I2400M_MS_*   Message status (for replies, events)
+ *  i2400m_tlv_*  TLVs
+ *
+ * data types are named 'struct i2400m_msg_OPNAME', OPNAME matching the
+ * operation.
+ */
+
+#ifndef __LINUX__WIMAX__I2400M_H__
+#define __LINUX__WIMAX__I2400M_H__
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+
+/*
+ * Host Device Interface (HDI) common to all busses
+ */
+
+/* Boot-mode (firmware upload mode) commands */
+
+/* Header for the firmware file */
+struct i2400m_bcf_hdr {
+	__le32 module_type;
+	__le32 header_len;
+	__le32 header_version;
+	__le32 module_id;
+	__le32 module_vendor;
+	__le32 date;		/* BCD YYYMMDD */
+	__le32 size;            /* in dwords */
+	__le32 key_size;	/* in dwords */
+	__le32 modulus_size;	/* in dwords */
+	__le32 exponent_size;	/* in dwords */
+	__u8 reserved[88];
+} __attribute__ ((packed));
+
+/* Boot mode opcodes */
+enum i2400m_brh_opcode {
+	I2400M_BRH_READ = 1,
+	I2400M_BRH_WRITE = 2,
+	I2400M_BRH_JUMP = 3,
+	I2400M_BRH_SIGNED_JUMP = 8,
+	I2400M_BRH_HASH_PAYLOAD_ONLY = 9,
+};
+
+/* Boot mode command masks and stuff */
+enum i2400m_brh {
+	I2400M_BRH_SIGNATURE = 0xcbbc0000,
+	I2400M_BRH_SIGNATURE_MASK = 0xffff0000,
+	I2400M_BRH_SIGNATURE_SHIFT = 16,
+	I2400M_BRH_OPCODE_MASK = 0x0000000f,
+	I2400M_BRH_RESPONSE_MASK = 0x000000f0,
+	I2400M_BRH_RESPONSE_SHIFT = 4,
+	I2400M_BRH_DIRECT_ACCESS = 0x00000400,
+	I2400M_BRH_RESPONSE_REQUIRED = 0x00000200,
+	I2400M_BRH_USE_CHECKSUM = 0x00000100,
+};
+
+
+/**
+ * i2400m_bootrom_header - Header for a boot-mode command
+ *
+ * @cmd: the above command descriptor
+ * @target_addr: where on the device memory should the action be performed.
+ * @data_size: for read/write, amount of data to be read/written
+ * @block_checksum: checksum value (if applicable)
+ * @payload: the beginning of data attached to this header
+ */
+struct i2400m_bootrom_header {
+	__le32 command;		/* Compose with enum i2400_brh */
+	__le32 target_addr;
+	__le32 data_size;
+	__le32 block_checksum;
+	char payload[0];
+} __attribute__ ((packed));
+
+
+/*
+ * Data / control protocol
+ */
+
+/* Packet types for the host-device interface */
+enum i2400m_pt {
+	I2400M_PT_DATA = 0,
+	I2400M_PT_CTRL,
+	I2400M_PT_TRACE,	/* For device debug */
+	I2400M_PT_RESET_WARM,	/* device reset */
+	I2400M_PT_RESET_COLD,	/* USB[transport] reset, like reconnect */
+	I2400M_PT_EDATA,	/* Extended RX data */
+	I2400M_PT_ILLEGAL
+};
+
+
+/*
+ * Payload for a data packet
+ *
+ * This is prefixed to each and every outgoing DATA type.
+ */
+struct i2400m_pl_data_hdr {
+	__le32 reserved;
+} __attribute__((packed));
+
+
+/*
+ * Payload for an extended data packet
+ *
+ * New in fw v1.4
+ *
+ * @reorder: if this payload has to be reorder or not (and how)
+ * @cs: the type of data in the packet, as defined per (802.16e
+ *     T11.13.19.1). Currently only 2 (IPv4 packet) supported.
+ *
+ * This is prefixed to each and every INCOMING DATA packet.
+ */
+struct i2400m_pl_edata_hdr {
+	__le32 reorder;		/* bits defined in i2400m_ro */
+	__u8 cs;
+	__u8 reserved[11];
+} __attribute__((packed));
+
+enum i2400m_cs {
+	I2400M_CS_IPV4_0 = 0,
+	I2400M_CS_IPV4 = 2,
+};
+
+enum i2400m_ro {
+	I2400M_RO_NEEDED     = 0x01,
+	I2400M_RO_TYPE       = 0x03,
+	I2400M_RO_TYPE_SHIFT = 1,
+	I2400M_RO_CIN        = 0x0f,
+	I2400M_RO_CIN_SHIFT  = 4,
+	I2400M_RO_FBN        = 0x07ff,
+	I2400M_RO_FBN_SHIFT  = 8,
+	I2400M_RO_SN         = 0x07ff,
+	I2400M_RO_SN_SHIFT   = 21,
+};
+
+enum i2400m_ro_type {
+	I2400M_RO_TYPE_RESET = 0,
+	I2400M_RO_TYPE_PACKET,
+	I2400M_RO_TYPE_WS,
+	I2400M_RO_TYPE_PACKET_WS,
+};
+
+
+/* Misc constants */
+enum {
+	I2400M_PL_ALIGN = 16,	/* Payload data size alignment */
+	I2400M_PL_SIZE_MAX = 0x3EFF,
+	I2400M_MAX_PLS_IN_MSG = 60,
+	/* protocol barkers: sync sequences; for notifications they
+	 * are sent in groups of four. */
+	I2400M_H2D_PREVIEW_BARKER = 0xcafe900d,
+	I2400M_COLD_RESET_BARKER = 0xc01dc01d,
+	I2400M_WARM_RESET_BARKER = 0x50f750f7,
+	I2400M_NBOOT_BARKER = 0xdeadbeef,
+	I2400M_SBOOT_BARKER = 0x0ff1c1a1,
+	I2400M_SBOOT_BARKER_6050 = 0x80000001,
+	I2400M_ACK_BARKER = 0xfeedbabe,
+	I2400M_D2H_MSG_BARKER = 0xbeefbabe,
+};
+
+
+/*
+ * Hardware payload descriptor
+ *
+ * Bitfields encoded in a struct to enforce typing semantics.
+ *
+ * Look in rx.c and tx.c for a full description of the format.
+ */
+struct i2400m_pld {
+	__le32 val;
+} __attribute__ ((packed));
+
+#define I2400M_PLD_SIZE_MASK 0x00003fff
+#define I2400M_PLD_TYPE_SHIFT 16
+#define I2400M_PLD_TYPE_MASK 0x000f0000
+
+/*
+ * Header for a TX message or RX message
+ *
+ * @barker: preamble
+ * @size: used for management of the FIFO queue buffer; before
+ *     sending, this is converted to be a real preamble. This
+ *     indicates the real size of the TX message that starts at this
+ *     point. If the highest bit is set, then this message is to be
+ *     skipped.
+ * @sequence: sequence number of this message
+ * @offset: offset where the message itself starts -- see the comments
+ *     in the file header about message header and payload descriptor
+ *     alignment.
+ * @num_pls: number of payloads in this message
+ * @padding: amount of padding bytes at the end of the message to make
+ *           it be of block-size aligned
+ *
+ * Look in rx.c and tx.c for a full description of the format.
+ */
+struct i2400m_msg_hdr {
+	union {
+		__le32 barker;
+		__u32 size;	/* same size type as barker!! */
+	};
+	union {
+		__le32 sequence;
+		__u32 offset;	/* same size type as barker!! */
+	};
+	__le16 num_pls;
+	__le16 rsv1;
+	__le16 padding;
+	__le16 rsv2;
+	struct i2400m_pld pld[0];
+} __attribute__ ((packed));
+
+
+
+/*
+ * L3/L4 control protocol
+ */
+
+enum {
+	/* Interface version */
+	I2400M_L3L4_VERSION             = 0x0100,
+};
+
+/* Message types */
+enum i2400m_mt {
+	I2400M_MT_RESERVED              = 0x0000,
+	I2400M_MT_INVALID               = 0xffff,
+	I2400M_MT_REPORT_MASK		= 0x8000,
+
+	I2400M_MT_GET_SCAN_RESULT  	= 0x4202,
+	I2400M_MT_SET_SCAN_PARAM   	= 0x4402,
+	I2400M_MT_CMD_RF_CONTROL   	= 0x4602,
+	I2400M_MT_CMD_SCAN         	= 0x4603,
+	I2400M_MT_CMD_CONNECT      	= 0x4604,
+	I2400M_MT_CMD_DISCONNECT   	= 0x4605,
+	I2400M_MT_CMD_EXIT_IDLE   	= 0x4606,
+	I2400M_MT_GET_LM_VERSION   	= 0x5201,
+	I2400M_MT_GET_DEVICE_INFO  	= 0x5202,
+	I2400M_MT_GET_LINK_STATUS  	= 0x5203,
+	I2400M_MT_GET_STATISTICS   	= 0x5204,
+	I2400M_MT_GET_STATE        	= 0x5205,
+	I2400M_MT_GET_MEDIA_STATUS	= 0x5206,
+	I2400M_MT_SET_INIT_CONFIG	= 0x5404,
+	I2400M_MT_CMD_INIT	        = 0x5601,
+	I2400M_MT_CMD_TERMINATE		= 0x5602,
+	I2400M_MT_CMD_MODE_OF_OP	= 0x5603,
+	I2400M_MT_CMD_RESET_DEVICE	= 0x5604,
+	I2400M_MT_CMD_MONITOR_CONTROL   = 0x5605,
+	I2400M_MT_CMD_ENTER_POWERSAVE   = 0x5606,
+	I2400M_MT_GET_TLS_OPERATION_RESULT = 0x6201,
+	I2400M_MT_SET_EAP_SUCCESS       = 0x6402,
+	I2400M_MT_SET_EAP_FAIL          = 0x6403,
+	I2400M_MT_SET_EAP_KEY          	= 0x6404,
+	I2400M_MT_CMD_SEND_EAP_RESPONSE = 0x6602,
+	I2400M_MT_REPORT_SCAN_RESULT    = 0xc002,
+	I2400M_MT_REPORT_STATE		= 0xd002,
+	I2400M_MT_REPORT_POWERSAVE_READY = 0xd005,
+	I2400M_MT_REPORT_EAP_REQUEST    = 0xe002,
+	I2400M_MT_REPORT_EAP_RESTART    = 0xe003,
+	I2400M_MT_REPORT_ALT_ACCEPT    	= 0xe004,
+	I2400M_MT_REPORT_KEY_REQUEST 	= 0xe005,
+};
+
+
+/*
+ * Message Ack Status codes
+ *
+ * When a message is replied-to, this status is reported.
+ */
+enum i2400m_ms {
+	I2400M_MS_DONE_OK                  = 0,
+	I2400M_MS_DONE_IN_PROGRESS         = 1,
+	I2400M_MS_INVALID_OP               = 2,
+	I2400M_MS_BAD_STATE                = 3,
+	I2400M_MS_ILLEGAL_VALUE            = 4,
+	I2400M_MS_MISSING_PARAMS           = 5,
+	I2400M_MS_VERSION_ERROR            = 6,
+	I2400M_MS_ACCESSIBILITY_ERROR      = 7,
+	I2400M_MS_BUSY                     = 8,
+	I2400M_MS_CORRUPTED_TLV            = 9,
+	I2400M_MS_UNINITIALIZED            = 10,
+	I2400M_MS_UNKNOWN_ERROR            = 11,
+	I2400M_MS_PRODUCTION_ERROR         = 12,
+	I2400M_MS_NO_RF                    = 13,
+	I2400M_MS_NOT_READY_FOR_POWERSAVE  = 14,
+	I2400M_MS_THERMAL_CRITICAL         = 15,
+	I2400M_MS_MAX
+};
+
+
+/**
+ * i2400m_tlv - enumeration of the different types of TLVs
+ *
+ * TLVs stand for type-length-value and are the header for a payload
+ * composed of almost anything. Each payload has a type assigned
+ * and a length.
+ */
+enum i2400m_tlv {
+	I2400M_TLV_L4_MESSAGE_VERSIONS = 129,
+	I2400M_TLV_SYSTEM_STATE = 141,
+	I2400M_TLV_MEDIA_STATUS = 161,
+	I2400M_TLV_RF_OPERATION = 162,
+	I2400M_TLV_RF_STATUS = 163,
+	I2400M_TLV_DEVICE_RESET_TYPE = 132,
+	I2400M_TLV_CONFIG_IDLE_PARAMETERS = 601,
+	I2400M_TLV_CONFIG_IDLE_TIMEOUT = 611,
+	I2400M_TLV_CONFIG_D2H_DATA_FORMAT = 614,
+	I2400M_TLV_CONFIG_DL_HOST_REORDER = 615,
+};
+
+
+struct i2400m_tlv_hdr {
+	__le16 type;
+	__le16 length;		/* payload's */
+	__u8   pl[0];
+} __attribute__((packed));
+
+
+struct i2400m_l3l4_hdr {
+	__le16 type;
+	__le16 length;		/* payload's */
+	__le16 version;
+	__le16 resv1;
+	__le16 status;
+	__le16 resv2;
+	struct i2400m_tlv_hdr pl[0];
+} __attribute__((packed));
+
+
+/**
+ * i2400m_system_state - different states of the device
+ */
+enum i2400m_system_state {
+	I2400M_SS_UNINITIALIZED = 1,
+	I2400M_SS_INIT,
+	I2400M_SS_READY,
+	I2400M_SS_SCAN,
+	I2400M_SS_STANDBY,
+	I2400M_SS_CONNECTING,
+	I2400M_SS_WIMAX_CONNECTED,
+	I2400M_SS_DATA_PATH_CONNECTED,
+	I2400M_SS_IDLE,
+	I2400M_SS_DISCONNECTING,
+	I2400M_SS_OUT_OF_ZONE,
+	I2400M_SS_SLEEPACTIVE,
+	I2400M_SS_PRODUCTION,
+	I2400M_SS_CONFIG,
+	I2400M_SS_RF_OFF,
+	I2400M_SS_RF_SHUTDOWN,
+	I2400M_SS_DEVICE_DISCONNECT,
+	I2400M_SS_MAX,
+};
+
+
+/**
+ * i2400m_tlv_system_state - report on the state of the system
+ *
+ * @state: see enum i2400m_system_state
+ */
+struct i2400m_tlv_system_state {
+	struct i2400m_tlv_hdr hdr;
+	__le32 state;
+} __attribute__((packed));
+
+
+struct i2400m_tlv_l4_message_versions {
+	struct i2400m_tlv_hdr hdr;
+	__le16 major;
+	__le16 minor;
+	__le16 branch;
+	__le16 reserved;
+} __attribute__((packed));
+
+
+struct i2400m_tlv_detailed_device_info {
+	struct i2400m_tlv_hdr hdr;
+	__u8 reserved1[400];
+	__u8 mac_address[ETH_ALEN];
+	__u8 reserved2[2];
+} __attribute__((packed));
+
+
+enum i2400m_rf_switch_status {
+	I2400M_RF_SWITCH_ON = 1,
+	I2400M_RF_SWITCH_OFF = 2,
+};
+
+struct i2400m_tlv_rf_switches_status {
+	struct i2400m_tlv_hdr hdr;
+	__u8 sw_rf_switch;	/* 1 ON, 2 OFF */
+	__u8 hw_rf_switch;	/* 1 ON, 2 OFF */
+	__u8 reserved[2];
+} __attribute__((packed));
+
+
+enum {
+	i2400m_rf_operation_on = 1,
+	i2400m_rf_operation_off = 2
+};
+
+struct i2400m_tlv_rf_operation {
+	struct i2400m_tlv_hdr hdr;
+	__le32 status;	/* 1 ON, 2 OFF */
+} __attribute__((packed));
+
+
+enum i2400m_tlv_reset_type {
+	I2400M_RESET_TYPE_COLD = 1,
+	I2400M_RESET_TYPE_WARM
+};
+
+struct i2400m_tlv_device_reset_type {
+	struct i2400m_tlv_hdr hdr;
+	__le32 reset_type;
+} __attribute__((packed));
+
+
+struct i2400m_tlv_config_idle_parameters {
+	struct i2400m_tlv_hdr hdr;
+	__le32 idle_timeout;	/* 100 to 300000 ms [5min], 100 increments
+				 * 0 disabled */
+	__le32 idle_paging_interval;	/* frames */
+} __attribute__((packed));
+
+
+enum i2400m_media_status {
+	I2400M_MEDIA_STATUS_LINK_UP = 1,
+	I2400M_MEDIA_STATUS_LINK_DOWN,
+	I2400M_MEDIA_STATUS_LINK_RENEW,
+};
+
+struct i2400m_tlv_media_status {
+	struct i2400m_tlv_hdr hdr;
+	__le32 media_status;
+} __attribute__((packed));
+
+
+/* New in v1.4 */
+struct i2400m_tlv_config_idle_timeout {
+	struct i2400m_tlv_hdr hdr;
+	__le32 timeout;	/* 100 to 300000 ms [5min], 100 increments
+			 * 0 disabled */
+} __attribute__((packed));
+
+/* New in v1.4 -- for backward compat, will be removed */
+struct i2400m_tlv_config_d2h_data_format {
+	struct i2400m_tlv_hdr hdr;
+	__u8 format; 		/* 0 old format, 1 enhanced */
+	__u8 reserved[3];
+} __attribute__((packed));
+
+/* New in v1.4 */
+struct i2400m_tlv_config_dl_host_reorder {
+	struct i2400m_tlv_hdr hdr;
+	__u8 reorder; 		/* 0 disabled, 1 enabled */
+	__u8 reserved[3];
+} __attribute__((packed));
+
+
+#endif /* #ifndef __LINUX__WIMAX__I2400M_H__ */
diff --git a/drivers/staging/wimax/i2400m/netdev.c b/drivers/staging/wimax/i2400m/netdev.c
new file mode 100644
index 000000000000..a7fcbceb6e6b
--- /dev/null
+++ b/drivers/staging/wimax/i2400m/netdev.c
@@ -0,0 +1,603 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Intel Wireless WiMAX Connection 2400m
+ * Glue with the networking stack
+ *
+ * Copyright (C) 2007 Intel Corporation <linux-wimax@intel.com>
+ * Yanir Lubetkin <yanirx.lubetkin@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * This implements an ethernet device for the i2400m.
+ *
+ * We fake being an ethernet device to simplify the support from user
+ * space and from the other side. The world is (sadly) configured to
+ * take in only Ethernet devices...
+ *
+ * Because of this, when using firmwares <= v1.3, there is an
+ * copy-each-rxed-packet overhead on the RX path. Each IP packet has
+ * to be reallocated to add an ethernet header (as there is no space
+ * in what we get from the device). This is a known drawback and
+ * firmwares >= 1.4 add header space that can be used to insert the
+ * ethernet header without having to reallocate and copy.
+ *
+ * TX error handling is tricky; because we have to FIFO/queue the
+ * buffers for transmission (as the hardware likes it aggregated), we
+ * just give the skb to the TX subsystem and by the time it is
+ * transmitted, we have long forgotten about it. So we just don't care
+ * too much about it.
+ *
+ * Note that when the device is in idle mode with the basestation, we
+ * need to negotiate coming back up online. That involves negotiation
+ * and possible user space interaction. Thus, we defer to a workqueue
+ * to do all that. By default, we only queue a single packet and drop
+ * the rest, as potentially the time to go back from idle to normal is
+ * long.
+ *
+ * ROADMAP
+ *
+ * i2400m_open         Called on ifconfig up
+ * i2400m_stop         Called on ifconfig down
+ *
+ * i2400m_hard_start_xmit Called by the network stack to send a packet
+ *   i2400m_net_wake_tx	  Wake up device from basestation-IDLE & TX
+ *     i2400m_wake_tx_work
+ *       i2400m_cmd_exit_idle
+ *       i2400m_tx
+ *   i2400m_net_tx        TX a data frame
+ *     i2400m_tx
+ *
+ * i2400m_change_mtu      Called on ifconfig mtu XXX
+ *
+ * i2400m_tx_timeout      Called when the device times out
+ *
+ * i2400m_net_rx          Called by the RX code when a data frame is
+ *                        available (firmware <= 1.3)
+ * i2400m_net_erx         Called by the RX code when a data frame is
+ *                        available (firmware >= 1.4).
+ * i2400m_netdev_setup    Called to setup all the netdev stuff from
+ *                        alloc_netdev.
+ */
+#include <linux/if_arp.h>
+#include <linux/slab.h>
+#include <linux/netdevice.h>
+#include <linux/ethtool.h>
+#include <linux/export.h>
+#include "i2400m.h"
+
+
+#define D_SUBMODULE netdev
+#include "debug-levels.h"
+
+enum {
+/* netdev interface */
+	/* 20 secs? yep, this is the maximum timeout that the device
+	 * might take to get out of IDLE / negotiate it with the base
+	 * station. We add 1sec for good measure. */
+	I2400M_TX_TIMEOUT = 21 * HZ,
+	/*
+	 * Experimentation has determined that, 20 to be a good value
+	 * for minimizing the jitter in the throughput.
+	 */
+	I2400M_TX_QLEN = 20,
+};
+
+
+static
+int i2400m_open(struct net_device *net_dev)
+{
+	int result;
+	struct i2400m *i2400m = net_dev_to_i2400m(net_dev);
+	struct device *dev = i2400m_dev(i2400m);
+
+	d_fnstart(3, dev, "(net_dev %p [i2400m %p])\n", net_dev, i2400m);
+	/* Make sure we wait until init is complete... */
+	mutex_lock(&i2400m->init_mutex);
+	if (i2400m->updown)
+		result = 0;
+	else
+		result = -EBUSY;
+	mutex_unlock(&i2400m->init_mutex);
+	d_fnend(3, dev, "(net_dev %p [i2400m %p]) = %d\n",
+		net_dev, i2400m, result);
+	return result;
+}
+
+
+static
+int i2400m_stop(struct net_device *net_dev)
+{
+	struct i2400m *i2400m = net_dev_to_i2400m(net_dev);
+	struct device *dev = i2400m_dev(i2400m);
+
+	d_fnstart(3, dev, "(net_dev %p [i2400m %p])\n", net_dev, i2400m);
+	i2400m_net_wake_stop(i2400m);
+	d_fnend(3, dev, "(net_dev %p [i2400m %p]) = 0\n", net_dev, i2400m);
+	return 0;
+}
+
+
+/*
+ * Wake up the device and transmit a held SKB, then restart the net queue
+ *
+ * When the device goes into basestation-idle mode, we need to tell it
+ * to exit that mode; it will negotiate with the base station, user
+ * space may have to intervene to rehandshake crypto and then tell us
+ * when it is ready to transmit the packet we have "queued". Still we
+ * need to give it sometime after it reports being ok.
+ *
+ * On error, there is not much we can do. If the error was on TX, we
+ * still wake the queue up to see if the next packet will be luckier.
+ *
+ * If _cmd_exit_idle() fails...well, it could be many things; most
+ * commonly it is that something else took the device out of IDLE mode
+ * (for example, the base station). In that case we get an -EILSEQ and
+ * we are just going to ignore that one. If the device is back to
+ * connected, then fine -- if it is someother state, the packet will
+ * be dropped anyway.
+ */
+void i2400m_wake_tx_work(struct work_struct *ws)
+{
+	int result;
+	struct i2400m *i2400m = container_of(ws, struct i2400m, wake_tx_ws);
+	struct net_device *net_dev = i2400m->wimax_dev.net_dev;
+	struct device *dev = i2400m_dev(i2400m);
+	struct sk_buff *skb;
+	unsigned long flags;
+
+	spin_lock_irqsave(&i2400m->tx_lock, flags);
+	skb = i2400m->wake_tx_skb;
+	i2400m->wake_tx_skb = NULL;
+	spin_unlock_irqrestore(&i2400m->tx_lock, flags);
+
+	d_fnstart(3, dev, "(ws %p i2400m %p skb %p)\n", ws, i2400m, skb);
+	result = -EINVAL;
+	if (skb == NULL) {
+		dev_err(dev, "WAKE&TX: skb disappeared!\n");
+		goto out_put;
+	}
+	/* If we have, somehow, lost the connection after this was
+	 * queued, don't do anything; this might be the device got
+	 * reset or just disconnected. */
+	if (unlikely(!netif_carrier_ok(net_dev)))
+		goto out_kfree;
+	result = i2400m_cmd_exit_idle(i2400m);
+	if (result == -EILSEQ)
+		result = 0;
+	if (result < 0) {
+		dev_err(dev, "WAKE&TX: device didn't get out of idle: "
+			"%d - resetting\n", result);
+		i2400m_reset(i2400m, I2400M_RT_BUS);
+		goto error;
+	}
+	result = wait_event_timeout(i2400m->state_wq,
+				    i2400m->state != I2400M_SS_IDLE,
+				    net_dev->watchdog_timeo - HZ/2);
+	if (result == 0)
+		result = -ETIMEDOUT;
+	if (result < 0) {
+		dev_err(dev, "WAKE&TX: error waiting for device to exit IDLE: "
+			"%d - resetting\n", result);
+		i2400m_reset(i2400m, I2400M_RT_BUS);
+		goto error;
+	}
+	msleep(20);	/* device still needs some time or it drops it */
+	result = i2400m_tx(i2400m, skb->data, skb->len, I2400M_PT_DATA);
+error:
+	netif_wake_queue(net_dev);
+out_kfree:
+	kfree_skb(skb);	/* refcount transferred by _hard_start_xmit() */
+out_put:
+	i2400m_put(i2400m);
+	d_fnend(3, dev, "(ws %p i2400m %p skb %p) = void [%d]\n",
+		ws, i2400m, skb, result);
+}
+
+
+/*
+ * Prepare the data payload TX header
+ *
+ * The i2400m expects a 4 byte header in front of a data packet.
+ *
+ * Because we pretend to be an ethernet device, this packet comes with
+ * an ethernet header. Pull it and push our header.
+ */
+static
+void i2400m_tx_prep_header(struct sk_buff *skb)
+{
+	struct i2400m_pl_data_hdr *pl_hdr;
+	skb_pull(skb, ETH_HLEN);
+	pl_hdr = skb_push(skb, sizeof(*pl_hdr));
+	pl_hdr->reserved = 0;
+}
+
+
+
+/*
+ * Cleanup resources acquired during i2400m_net_wake_tx()
+ *
+ * This is called by __i2400m_dev_stop and means we have to make sure
+ * the workqueue is flushed from any pending work.
+ */
+void i2400m_net_wake_stop(struct i2400m *i2400m)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	struct sk_buff *wake_tx_skb;
+	unsigned long flags;
+
+	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
+	/*
+	 * See i2400m_hard_start_xmit(), references are taken there and
+	 * here we release them if the packet was still pending.
+	 */
+	cancel_work_sync(&i2400m->wake_tx_ws);
+
+	spin_lock_irqsave(&i2400m->tx_lock, flags);
+	wake_tx_skb = i2400m->wake_tx_skb;
+	i2400m->wake_tx_skb = NULL;
+	spin_unlock_irqrestore(&i2400m->tx_lock, flags);
+
+	if (wake_tx_skb) {
+		i2400m_put(i2400m);
+		kfree_skb(wake_tx_skb);
+	}
+
+	d_fnend(3, dev, "(i2400m %p) = void\n", i2400m);
+}
+
+
+/*
+ * TX an skb to an idle device
+ *
+ * When the device is in basestation-idle mode, we need to wake it up
+ * and then TX. So we queue a work_struct for doing so.
+ *
+ * We need to get an extra ref for the skb (so it is not dropped), as
+ * well as be careful not to queue more than one request (won't help
+ * at all). If more than one request comes or there are errors, we
+ * just drop the packets (see i2400m_hard_start_xmit()).
+ */
+static
+int i2400m_net_wake_tx(struct i2400m *i2400m, struct net_device *net_dev,
+		       struct sk_buff *skb)
+{
+	int result;
+	struct device *dev = i2400m_dev(i2400m);
+	unsigned long flags;
+
+	d_fnstart(3, dev, "(skb %p net_dev %p)\n", skb, net_dev);
+	if (net_ratelimit()) {
+		d_printf(3, dev, "WAKE&NETTX: "
+			 "skb %p sending %d bytes to radio\n",
+			 skb, skb->len);
+		d_dump(4, dev, skb->data, skb->len);
+	}
+	/* We hold a ref count for i2400m and skb, so when
+	 * stopping() the device, we need to cancel that work
+	 * and if pending, release those resources. */
+	result = 0;
+	spin_lock_irqsave(&i2400m->tx_lock, flags);
+	if (!i2400m->wake_tx_skb) {
+		netif_stop_queue(net_dev);
+		i2400m_get(i2400m);
+		i2400m->wake_tx_skb = skb_get(skb);	/* transfer ref count */
+		i2400m_tx_prep_header(skb);
+		result = schedule_work(&i2400m->wake_tx_ws);
+		WARN_ON(result == 0);
+	}
+	spin_unlock_irqrestore(&i2400m->tx_lock, flags);
+	if (result == 0) {
+		/* Yes, this happens even if we stopped the
+		 * queue -- blame the queue disciplines that
+		 * queue without looking -- I guess there is a reason
+		 * for that. */
+		if (net_ratelimit())
+			d_printf(1, dev, "NETTX: device exiting idle, "
+				 "dropping skb %p, queue running %d\n",
+				 skb, netif_queue_stopped(net_dev));
+		result = -EBUSY;
+	}
+	d_fnend(3, dev, "(skb %p net_dev %p) = %d\n", skb, net_dev, result);
+	return result;
+}
+
+
+/*
+ * Transmit a packet to the base station on behalf of the network stack.
+ *
+ * Returns: 0 if ok, < 0 errno code on error.
+ *
+ * We need to pull the ethernet header and add the hardware header,
+ * which is currently set to all zeroes and reserved.
+ */
+static
+int i2400m_net_tx(struct i2400m *i2400m, struct net_device *net_dev,
+		  struct sk_buff *skb)
+{
+	int result;
+	struct device *dev = i2400m_dev(i2400m);
+
+	d_fnstart(3, dev, "(i2400m %p net_dev %p skb %p)\n",
+		  i2400m, net_dev, skb);
+	/* FIXME: check eth hdr, only IPv4 is routed by the device as of now */
+	netif_trans_update(net_dev);
+	i2400m_tx_prep_header(skb);
+	d_printf(3, dev, "NETTX: skb %p sending %d bytes to radio\n",
+		 skb, skb->len);
+	d_dump(4, dev, skb->data, skb->len);
+	result = i2400m_tx(i2400m, skb->data, skb->len, I2400M_PT_DATA);
+	d_fnend(3, dev, "(i2400m %p net_dev %p skb %p) = %d\n",
+		i2400m, net_dev, skb, result);
+	return result;
+}
+
+
+/*
+ * Transmit a packet to the base station on behalf of the network stack
+ *
+ *
+ * Returns: NETDEV_TX_OK (always, even in case of error)
+ *
+ * In case of error, we just drop it. Reasons:
+ *
+ *  - we add a hw header to each skb, and if the network stack
+ *    retries, we have no way to know if that skb has it or not.
+ *
+ *  - network protocols have their own drop-recovery mechanisms
+ *
+ *  - there is not much else we can do
+ *
+ * If the device is idle, we need to wake it up; that is an operation
+ * that will sleep. See i2400m_net_wake_tx() for details.
+ */
+static
+netdev_tx_t i2400m_hard_start_xmit(struct sk_buff *skb,
+					 struct net_device *net_dev)
+{
+	struct i2400m *i2400m = net_dev_to_i2400m(net_dev);
+	struct device *dev = i2400m_dev(i2400m);
+	int result = -1;
+
+	d_fnstart(3, dev, "(skb %p net_dev %p)\n", skb, net_dev);
+
+	if (skb_cow_head(skb, 0))
+		goto drop;
+
+	if (i2400m->state == I2400M_SS_IDLE)
+		result = i2400m_net_wake_tx(i2400m, net_dev, skb);
+	else
+		result = i2400m_net_tx(i2400m, net_dev, skb);
+	if (result <  0) {
+drop:
+		net_dev->stats.tx_dropped++;
+	} else {
+		net_dev->stats.tx_packets++;
+		net_dev->stats.tx_bytes += skb->len;
+	}
+	dev_kfree_skb(skb);
+	d_fnend(3, dev, "(skb %p net_dev %p) = %d\n", skb, net_dev, result);
+	return NETDEV_TX_OK;
+}
+
+
+static
+void i2400m_tx_timeout(struct net_device *net_dev, unsigned int txqueue)
+{
+	/*
+	 * We might want to kick the device
+	 *
+	 * There is not much we can do though, as the device requires
+	 * that we send the data aggregated. By the time we receive
+	 * this, there might be data pending to be sent or not...
+	 */
+	net_dev->stats.tx_errors++;
+}
+
+
+/*
+ * Create a fake ethernet header
+ *
+ * For emulating an ethernet device, every received IP header has to
+ * be prefixed with an ethernet header. Fake it with the given
+ * protocol.
+ */
+static
+void i2400m_rx_fake_eth_header(struct net_device *net_dev,
+			       void *_eth_hdr, __be16 protocol)
+{
+	struct i2400m *i2400m = net_dev_to_i2400m(net_dev);
+	struct ethhdr *eth_hdr = _eth_hdr;
+
+	memcpy(eth_hdr->h_dest, net_dev->dev_addr, sizeof(eth_hdr->h_dest));
+	memcpy(eth_hdr->h_source, i2400m->src_mac_addr,
+	       sizeof(eth_hdr->h_source));
+	eth_hdr->h_proto = protocol;
+}
+
+
+/*
+ * i2400m_net_rx - pass a network packet to the stack
+ *
+ * @i2400m: device instance
+ * @skb_rx: the skb where the buffer pointed to by @buf is
+ * @i: 1 if payload is the only one
+ * @buf: pointer to the buffer containing the data
+ * @len: buffer's length
+ *
+ * This is only used now for the v1.3 firmware. It will be deprecated
+ * in >= 2.6.31.
+ *
+ * Note that due to firmware limitations, we don't have space to add
+ * an ethernet header, so we need to copy each packet. Firmware
+ * versions >= v1.4 fix this [see i2400m_net_erx()].
+ *
+ * We just clone the skb and set it up so that it's skb->data pointer
+ * points to "buf" and it's length.
+ *
+ * Note that if the payload is the last (or the only one) in a
+ * multi-payload message, we don't clone the SKB but just reuse it.
+ *
+ * This function is normally run from a thread context. However, we
+ * still use netif_rx() instead of netif_receive_skb() as was
+ * recommended in the mailing list. Reason is in some stress tests
+ * when sending/receiving a lot of data we seem to hit a softlock in
+ * the kernel's TCP implementation [aroudn tcp_delay_timer()]. Using
+ * netif_rx() took care of the issue.
+ *
+ * This is, of course, still open to do more research on why running
+ * with netif_receive_skb() hits this softlock. FIXME.
+ *
+ * FIXME: currently we don't do any efforts at distinguishing if what
+ * we got was an IPv4 or IPv6 header, to setup the protocol field
+ * correctly.
+ */
+void i2400m_net_rx(struct i2400m *i2400m, struct sk_buff *skb_rx,
+		   unsigned i, const void *buf, int buf_len)
+{
+	struct net_device *net_dev = i2400m->wimax_dev.net_dev;
+	struct device *dev = i2400m_dev(i2400m);
+	struct sk_buff *skb;
+
+	d_fnstart(2, dev, "(i2400m %p buf %p buf_len %d)\n",
+		  i2400m, buf, buf_len);
+	if (i) {
+		skb = skb_get(skb_rx);
+		d_printf(2, dev, "RX: reusing first payload skb %p\n", skb);
+		skb_pull(skb, buf - (void *) skb->data);
+		skb_trim(skb, (void *) skb_end_pointer(skb) - buf);
+	} else {
+		/* Yes, this is bad -- a lot of overhead -- see
+		 * comments at the top of the file */
+		skb = __netdev_alloc_skb(net_dev, buf_len, GFP_KERNEL);
+		if (skb == NULL) {
+			dev_err(dev, "NETRX: no memory to realloc skb\n");
+			net_dev->stats.rx_dropped++;
+			goto error_skb_realloc;
+		}
+		skb_put_data(skb, buf, buf_len);
+	}
+	i2400m_rx_fake_eth_header(i2400m->wimax_dev.net_dev,
+				  skb->data - ETH_HLEN,
+				  cpu_to_be16(ETH_P_IP));
+	skb_set_mac_header(skb, -ETH_HLEN);
+	skb->dev = i2400m->wimax_dev.net_dev;
+	skb->protocol = htons(ETH_P_IP);
+	net_dev->stats.rx_packets++;
+	net_dev->stats.rx_bytes += buf_len;
+	d_printf(3, dev, "NETRX: receiving %d bytes to network stack\n",
+		buf_len);
+	d_dump(4, dev, buf, buf_len);
+	netif_rx_ni(skb);	/* see notes in function header */
+error_skb_realloc:
+	d_fnend(2, dev, "(i2400m %p buf %p buf_len %d) = void\n",
+		i2400m, buf, buf_len);
+}
+
+
+/*
+ * i2400m_net_erx - pass a network packet to the stack (extended version)
+ *
+ * @i2400m: device descriptor
+ * @skb: the skb where the packet is - the skb should be set to point
+ *     at the IP packet; this function will add ethernet headers if
+ *     needed.
+ * @cs: packet type
+ *
+ * This is only used now for firmware >= v1.4. Note it is quite
+ * similar to i2400m_net_rx() (used only for v1.3 firmware).
+ *
+ * This function is normally run from a thread context. However, we
+ * still use netif_rx() instead of netif_receive_skb() as was
+ * recommended in the mailing list. Reason is in some stress tests
+ * when sending/receiving a lot of data we seem to hit a softlock in
+ * the kernel's TCP implementation [aroudn tcp_delay_timer()]. Using
+ * netif_rx() took care of the issue.
+ *
+ * This is, of course, still open to do more research on why running
+ * with netif_receive_skb() hits this softlock. FIXME.
+ */
+void i2400m_net_erx(struct i2400m *i2400m, struct sk_buff *skb,
+		    enum i2400m_cs cs)
+{
+	struct net_device *net_dev = i2400m->wimax_dev.net_dev;
+	struct device *dev = i2400m_dev(i2400m);
+
+	d_fnstart(2, dev, "(i2400m %p skb %p [%u] cs %d)\n",
+		  i2400m, skb, skb->len, cs);
+	switch(cs) {
+	case I2400M_CS_IPV4_0:
+	case I2400M_CS_IPV4:
+		i2400m_rx_fake_eth_header(i2400m->wimax_dev.net_dev,
+					  skb->data - ETH_HLEN,
+					  cpu_to_be16(ETH_P_IP));
+		skb_set_mac_header(skb, -ETH_HLEN);
+		skb->dev = i2400m->wimax_dev.net_dev;
+		skb->protocol = htons(ETH_P_IP);
+		net_dev->stats.rx_packets++;
+		net_dev->stats.rx_bytes += skb->len;
+		break;
+	default:
+		dev_err(dev, "ERX: BUG? CS type %u unsupported\n", cs);
+		goto error;
+
+	}
+	d_printf(3, dev, "ERX: receiving %d bytes to the network stack\n",
+		 skb->len);
+	d_dump(4, dev, skb->data, skb->len);
+	netif_rx_ni(skb);	/* see notes in function header */
+error:
+	d_fnend(2, dev, "(i2400m %p skb %p [%u] cs %d) = void\n",
+		i2400m, skb, skb->len, cs);
+}
+
+static const struct net_device_ops i2400m_netdev_ops = {
+	.ndo_open = i2400m_open,
+	.ndo_stop = i2400m_stop,
+	.ndo_start_xmit = i2400m_hard_start_xmit,
+	.ndo_tx_timeout = i2400m_tx_timeout,
+};
+
+static void i2400m_get_drvinfo(struct net_device *net_dev,
+			       struct ethtool_drvinfo *info)
+{
+	struct i2400m *i2400m = net_dev_to_i2400m(net_dev);
+
+	strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
+	strlcpy(info->fw_version, i2400m->fw_name ? : "",
+		sizeof(info->fw_version));
+	if (net_dev->dev.parent)
+		strlcpy(info->bus_info, dev_name(net_dev->dev.parent),
+			sizeof(info->bus_info));
+}
+
+static const struct ethtool_ops i2400m_ethtool_ops = {
+	.get_drvinfo = i2400m_get_drvinfo,
+	.get_link = ethtool_op_get_link,
+};
+
+/**
+ * i2400m_netdev_setup - Setup setup @net_dev's i2400m private data
+ *
+ * Called by alloc_netdev()
+ */
+void i2400m_netdev_setup(struct net_device *net_dev)
+{
+	d_fnstart(3, NULL, "(net_dev %p)\n", net_dev);
+	ether_setup(net_dev);
+	net_dev->mtu = I2400M_MAX_MTU;
+	net_dev->min_mtu = 0;
+	net_dev->max_mtu = I2400M_MAX_MTU;
+	net_dev->tx_queue_len = I2400M_TX_QLEN;
+	net_dev->features =
+		  NETIF_F_VLAN_CHALLENGED
+		| NETIF_F_HIGHDMA;
+	net_dev->flags =
+		IFF_NOARP		/* i2400m is apure IP device */
+		& (~IFF_BROADCAST	/* i2400m is P2P */
+		   & ~IFF_MULTICAST);
+	net_dev->watchdog_timeo = I2400M_TX_TIMEOUT;
+	net_dev->netdev_ops = &i2400m_netdev_ops;
+	net_dev->ethtool_ops = &i2400m_ethtool_ops;
+	d_fnend(3, NULL, "(net_dev %p) = void\n", net_dev);
+}
+EXPORT_SYMBOL_GPL(i2400m_netdev_setup);
+
diff --git a/drivers/staging/wimax/i2400m/op-rfkill.c b/drivers/staging/wimax/i2400m/op-rfkill.c
new file mode 100644
index 000000000000..fbddf2e18c14
--- /dev/null
+++ b/drivers/staging/wimax/i2400m/op-rfkill.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Intel Wireless WiMAX Connection 2400m
+ * Implement backend for the WiMAX stack rfkill support
+ *
+ * Copyright (C) 2007-2008 Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * The WiMAX kernel stack integrates into RF-Kill and keeps the
+ * switches's status. We just need to:
+ *
+ * - report changes in the HW RF Kill switch [with
+ *   wimax_rfkill_{sw,hw}_report(), which happens when we detect those
+ *   indications coming through hardware reports]. We also do it on
+ *   initialization to let the stack know the initial HW state.
+ *
+ * - implement indications from the stack to change the SW RF Kill
+ *   switch (coming from sysfs, the wimax stack or user space).
+ */
+#include "i2400m.h"
+#include "linux-wimax-i2400m.h"
+#include <linux/slab.h>
+
+
+
+#define D_SUBMODULE rfkill
+#include "debug-levels.h"
+
+/*
+ * Return true if the i2400m radio is in the requested wimax_rf_state state
+ *
+ */
+static
+int i2400m_radio_is(struct i2400m *i2400m, enum wimax_rf_state state)
+{
+	if (state == WIMAX_RF_OFF)
+		return i2400m->state == I2400M_SS_RF_OFF
+			|| i2400m->state == I2400M_SS_RF_SHUTDOWN;
+	else if (state == WIMAX_RF_ON)
+		/* state == WIMAX_RF_ON */
+		return i2400m->state != I2400M_SS_RF_OFF
+			&& i2400m->state != I2400M_SS_RF_SHUTDOWN;
+	else {
+		BUG();
+		return -EINVAL;	/* shut gcc warnings on certain arches */
+	}
+}
+
+
+/*
+ * WiMAX stack operation: implement SW RFKill toggling
+ *
+ * @wimax_dev: device descriptor
+ * @skb: skb where the message has been received; skb->data is
+ *       expected to point to the message payload.
+ * @genl_info: passed by the generic netlink layer
+ *
+ * Generic Netlink will call this function when a message is sent from
+ * userspace to change the software RF-Kill switch status.
+ *
+ * This function will set the device's software RF-Kill switch state to
+ * match what is requested.
+ *
+ * NOTE: the i2400m has a strict state machine; we can only set the
+ *       RF-Kill switch when it is on, the HW RF-Kill is on and the
+ *       device is initialized. So we ignore errors steaming from not
+ *       being in the right state (-EILSEQ).
+ */
+int i2400m_op_rfkill_sw_toggle(struct wimax_dev *wimax_dev,
+			       enum wimax_rf_state state)
+{
+	int result;
+	struct i2400m *i2400m = wimax_dev_to_i2400m(wimax_dev);
+	struct device *dev = i2400m_dev(i2400m);
+	struct sk_buff *ack_skb;
+	struct {
+		struct i2400m_l3l4_hdr hdr;
+		struct i2400m_tlv_rf_operation sw_rf;
+	} __packed *cmd;
+	char strerr[32];
+
+	d_fnstart(4, dev, "(wimax_dev %p state %d)\n", wimax_dev, state);
+
+	result = -ENOMEM;
+	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
+	if (cmd == NULL)
+		goto error_alloc;
+	cmd->hdr.type = cpu_to_le16(I2400M_MT_CMD_RF_CONTROL);
+	cmd->hdr.length = sizeof(cmd->sw_rf);
+	cmd->hdr.version = cpu_to_le16(I2400M_L3L4_VERSION);
+	cmd->sw_rf.hdr.type = cpu_to_le16(I2400M_TLV_RF_OPERATION);
+	cmd->sw_rf.hdr.length = cpu_to_le16(sizeof(cmd->sw_rf.status));
+	switch (state) {
+	case WIMAX_RF_OFF:	/* RFKILL ON, radio OFF */
+		cmd->sw_rf.status = cpu_to_le32(2);
+		break;
+	case WIMAX_RF_ON:	/* RFKILL OFF, radio ON */
+		cmd->sw_rf.status = cpu_to_le32(1);
+		break;
+	default:
+		BUG();
+	}
+
+	ack_skb = i2400m_msg_to_dev(i2400m, cmd, sizeof(*cmd));
+	result = PTR_ERR(ack_skb);
+	if (IS_ERR(ack_skb)) {
+		dev_err(dev, "Failed to issue 'RF Control' command: %d\n",
+			result);
+		goto error_msg_to_dev;
+	}
+	result = i2400m_msg_check_status(wimax_msg_data(ack_skb),
+					 strerr, sizeof(strerr));
+	if (result < 0) {
+		dev_err(dev, "'RF Control' (0x%04x) command failed: %d - %s\n",
+			I2400M_MT_CMD_RF_CONTROL, result, strerr);
+		goto error_cmd;
+	}
+
+	/* Now we wait for the state to change to RADIO_OFF or RADIO_ON */
+	result = wait_event_timeout(
+		i2400m->state_wq, i2400m_radio_is(i2400m, state),
+		5 * HZ);
+	if (result == 0)
+		result = -ETIMEDOUT;
+	if (result < 0)
+		dev_err(dev, "Error waiting for device to toggle RF state: "
+			"%d\n", result);
+	result = 0;
+error_cmd:
+	kfree_skb(ack_skb);
+error_msg_to_dev:
+error_alloc:
+	d_fnend(4, dev, "(wimax_dev %p state %d) = %d\n",
+		wimax_dev, state, result);
+	kfree(cmd);
+	return result;
+}
+
+
+/*
+ * Inform the WiMAX stack of changes in the RF Kill switches reported
+ * by the device
+ *
+ * @i2400m: device descriptor
+ * @rfss: TLV for RF Switches status; already validated
+ *
+ * NOTE: the reports on RF switch status cannot be trusted
+ *       or used until the device is in a state of RADIO_OFF
+ *       or greater.
+ */
+void i2400m_report_tlv_rf_switches_status(
+	struct i2400m *i2400m,
+	const struct i2400m_tlv_rf_switches_status *rfss)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	enum i2400m_rf_switch_status hw, sw;
+	enum wimax_st wimax_state;
+
+	sw = le32_to_cpu(rfss->sw_rf_switch);
+	hw = le32_to_cpu(rfss->hw_rf_switch);
+
+	d_fnstart(3, dev, "(i2400m %p rfss %p [hw %u sw %u])\n",
+		  i2400m, rfss, hw, sw);
+	/* We only process rw switch evens when the device has been
+	 * fully initialized */
+	wimax_state = wimax_state_get(&i2400m->wimax_dev);
+	if (wimax_state < WIMAX_ST_RADIO_OFF) {
+		d_printf(3, dev, "ignoring RF switches report, state %u\n",
+			 wimax_state);
+		goto out;
+	}
+	switch (sw) {
+	case I2400M_RF_SWITCH_ON:	/* RF Kill disabled (radio on) */
+		wimax_report_rfkill_sw(&i2400m->wimax_dev, WIMAX_RF_ON);
+		break;
+	case I2400M_RF_SWITCH_OFF:	/* RF Kill enabled (radio off) */
+		wimax_report_rfkill_sw(&i2400m->wimax_dev, WIMAX_RF_OFF);
+		break;
+	default:
+		dev_err(dev, "HW BUG? Unknown RF SW state 0x%x\n", sw);
+	}
+
+	switch (hw) {
+	case I2400M_RF_SWITCH_ON:	/* RF Kill disabled (radio on) */
+		wimax_report_rfkill_hw(&i2400m->wimax_dev, WIMAX_RF_ON);
+		break;
+	case I2400M_RF_SWITCH_OFF:	/* RF Kill enabled (radio off) */
+		wimax_report_rfkill_hw(&i2400m->wimax_dev, WIMAX_RF_OFF);
+		break;
+	default:
+		dev_err(dev, "HW BUG? Unknown RF HW state 0x%x\n", hw);
+	}
+out:
+	d_fnend(3, dev, "(i2400m %p rfss %p [hw %u sw %u]) = void\n",
+		i2400m, rfss, hw, sw);
+}
diff --git a/drivers/staging/wimax/i2400m/rx.c b/drivers/staging/wimax/i2400m/rx.c
new file mode 100644
index 000000000000..c9fb619a9e01
--- /dev/null
+++ b/drivers/staging/wimax/i2400m/rx.c
@@ -0,0 +1,1395 @@
+/*
+ * Intel Wireless WiMAX Connection 2400m
+ * Handle incoming traffic and deliver it to the control or data planes
+ *
+ *
+ * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * Intel Corporation <linux-wimax@intel.com>
+ * Yanir Lubetkin <yanirx.lubetkin@intel.com>
+ *  - Initial implementation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *  - Use skb_clone(), break up processing in chunks
+ *  - Split transport/device specific
+ *  - Make buffer size dynamic to exert less memory pressure
+ *  - RX reorder support
+ *
+ * This handles the RX path.
+ *
+ * We receive an RX message from the bus-specific driver, which
+ * contains one or more payloads that have potentially different
+ * destinataries (data or control paths).
+ *
+ * So we just take that payload from the transport specific code in
+ * the form of an skb, break it up in chunks (a cloned skb each in the
+ * case of network packets) and pass it to netdev or to the
+ * command/ack handler (and from there to the WiMAX stack).
+ *
+ * PROTOCOL FORMAT
+ *
+ * The format of the buffer is:
+ *
+ * HEADER                      (struct i2400m_msg_hdr)
+ * PAYLOAD DESCRIPTOR 0        (struct i2400m_pld)
+ * PAYLOAD DESCRIPTOR 1
+ * ...
+ * PAYLOAD DESCRIPTOR N
+ * PAYLOAD 0                   (raw bytes)
+ * PAYLOAD 1
+ * ...
+ * PAYLOAD N
+ *
+ * See tx.c for a deeper description on alignment requirements and
+ * other fun facts of it.
+ *
+ * DATA PACKETS
+ *
+ * In firmwares <= v1.3, data packets have no header for RX, but they
+ * do for TX (currently unused).
+ *
+ * In firmware >= 1.4, RX packets have an extended header (16
+ * bytes). This header conveys information for management of host
+ * reordering of packets (the device offloads storage of the packets
+ * for reordering to the host). Read below for more information.
+ *
+ * The header is used as dummy space to emulate an ethernet header and
+ * thus be able to act as an ethernet device without having to reallocate.
+ *
+ * DATA RX REORDERING
+ *
+ * Starting in firmware v1.4, the device can deliver packets for
+ * delivery with special reordering information; this allows it to
+ * more effectively do packet management when some frames were lost in
+ * the radio traffic.
+ *
+ * Thus, for RX packets that come out of order, the device gives the
+ * driver enough information to queue them properly and then at some
+ * point, the signal to deliver the whole (or part) of the queued
+ * packets to the networking stack. There are 16 such queues.
+ *
+ * This only happens when a packet comes in with the "need reorder"
+ * flag set in the RX header. When such bit is set, the following
+ * operations might be indicated:
+ *
+ *  - reset queue: send all queued packets to the OS
+ *
+ *  - queue: queue a packet
+ *
+ *  - update ws: update the queue's window start and deliver queued
+ *    packets that meet the criteria
+ *
+ *  - queue & update ws: queue a packet, update the window start and
+ *    deliver queued packets that meet the criteria
+ *
+ * (delivery criteria: the packet's [normalized] sequence number is
+ * lower than the new [normalized] window start).
+ *
+ * See the i2400m_roq_*() functions for details.
+ *
+ * ROADMAP
+ *
+ * i2400m_rx
+ *   i2400m_rx_msg_hdr_check
+ *   i2400m_rx_pl_descr_check
+ *   i2400m_rx_payload
+ *     i2400m_net_rx
+ *     i2400m_rx_edata
+ *       i2400m_net_erx
+ *       i2400m_roq_reset
+ *         i2400m_net_erx
+ *       i2400m_roq_queue
+ *         __i2400m_roq_queue
+ *       i2400m_roq_update_ws
+ *         __i2400m_roq_update_ws
+ *           i2400m_net_erx
+ *       i2400m_roq_queue_update_ws
+ *         __i2400m_roq_queue
+ *         __i2400m_roq_update_ws
+ *           i2400m_net_erx
+ *     i2400m_rx_ctl
+ *       i2400m_msg_size_check
+ *       i2400m_report_hook_work    [in a workqueue]
+ *         i2400m_report_hook
+ *       wimax_msg_to_user
+ *       i2400m_rx_ctl_ack
+ *         wimax_msg_to_user_alloc
+ *     i2400m_rx_trace
+ *       i2400m_msg_size_check
+ *       wimax_msg
+ */
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/if_arp.h>
+#include <linux/netdevice.h>
+#include <linux/workqueue.h>
+#include <linux/export.h>
+#include <linux/moduleparam.h>
+#include "i2400m.h"
+
+
+#define D_SUBMODULE rx
+#include "debug-levels.h"
+
+static int i2400m_rx_reorder_disabled;	/* 0 (rx reorder enabled) by default */
+module_param_named(rx_reorder_disabled, i2400m_rx_reorder_disabled, int, 0644);
+MODULE_PARM_DESC(rx_reorder_disabled,
+		 "If true, RX reordering will be disabled.");
+
+struct i2400m_report_hook_args {
+	struct sk_buff *skb_rx;
+	const struct i2400m_l3l4_hdr *l3l4_hdr;
+	size_t size;
+	struct list_head list_node;
+};
+
+
+/*
+ * Execute i2400m_report_hook in a workqueue
+ *
+ * Goes over the list of queued reports in i2400m->rx_reports and
+ * processes them.
+ *
+ * NOTE: refcounts on i2400m are not needed because we flush the
+ *     workqueue this runs on (i2400m->work_queue) before destroying
+ *     i2400m.
+ */
+void i2400m_report_hook_work(struct work_struct *ws)
+{
+	struct i2400m *i2400m = container_of(ws, struct i2400m, rx_report_ws);
+	struct device *dev = i2400m_dev(i2400m);
+	struct i2400m_report_hook_args *args, *args_next;
+	LIST_HEAD(list);
+	unsigned long flags;
+
+	while (1) {
+		spin_lock_irqsave(&i2400m->rx_lock, flags);
+		list_splice_init(&i2400m->rx_reports, &list);
+		spin_unlock_irqrestore(&i2400m->rx_lock, flags);
+		if (list_empty(&list))
+			break;
+		else
+			d_printf(1, dev, "processing queued reports\n");
+		list_for_each_entry_safe(args, args_next, &list, list_node) {
+			d_printf(2, dev, "processing queued report %p\n", args);
+			i2400m_report_hook(i2400m, args->l3l4_hdr, args->size);
+			kfree_skb(args->skb_rx);
+			list_del(&args->list_node);
+			kfree(args);
+		}
+	}
+}
+
+
+/*
+ * Flush the list of queued reports
+ */
+static
+void i2400m_report_hook_flush(struct i2400m *i2400m)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	struct i2400m_report_hook_args *args, *args_next;
+	LIST_HEAD(list);
+	unsigned long flags;
+
+	d_printf(1, dev, "flushing queued reports\n");
+	spin_lock_irqsave(&i2400m->rx_lock, flags);
+	list_splice_init(&i2400m->rx_reports, &list);
+	spin_unlock_irqrestore(&i2400m->rx_lock, flags);
+	list_for_each_entry_safe(args, args_next, &list, list_node) {
+		d_printf(2, dev, "flushing queued report %p\n", args);
+		kfree_skb(args->skb_rx);
+		list_del(&args->list_node);
+		kfree(args);
+	}
+}
+
+
+/*
+ * Queue a report for later processing
+ *
+ * @i2400m: device descriptor
+ * @skb_rx: skb that contains the payload (for reference counting)
+ * @l3l4_hdr: pointer to the control
+ * @size: size of the message
+ */
+static
+void i2400m_report_hook_queue(struct i2400m *i2400m, struct sk_buff *skb_rx,
+			      const void *l3l4_hdr, size_t size)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	unsigned long flags;
+	struct i2400m_report_hook_args *args;
+
+	args = kzalloc(sizeof(*args), GFP_NOIO);
+	if (args) {
+		args->skb_rx = skb_get(skb_rx);
+		args->l3l4_hdr = l3l4_hdr;
+		args->size = size;
+		spin_lock_irqsave(&i2400m->rx_lock, flags);
+		list_add_tail(&args->list_node, &i2400m->rx_reports);
+		spin_unlock_irqrestore(&i2400m->rx_lock, flags);
+		d_printf(2, dev, "queued report %p\n", args);
+		rmb();		/* see i2400m->ready's documentation  */
+		if (likely(i2400m->ready))	/* only send if up */
+			queue_work(i2400m->work_queue, &i2400m->rx_report_ws);
+	} else  {
+		if (printk_ratelimit())
+			dev_err(dev, "%s:%u: Can't allocate %zu B\n",
+				__func__, __LINE__, sizeof(*args));
+	}
+}
+
+
+/*
+ * Process an ack to a command
+ *
+ * @i2400m: device descriptor
+ * @payload: pointer to message
+ * @size: size of the message
+ *
+ * Pass the acknodledgment (in an skb) to the thread that is waiting
+ * for it in i2400m->msg_completion.
+ *
+ * We need to coordinate properly with the thread waiting for the
+ * ack. Check if it is waiting or if it is gone. We loose the spinlock
+ * to avoid allocating on atomic contexts (yeah, could use GFP_ATOMIC,
+ * but this is not so speed critical).
+ */
+static
+void i2400m_rx_ctl_ack(struct i2400m *i2400m,
+		       const void *payload, size_t size)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	struct wimax_dev *wimax_dev = &i2400m->wimax_dev;
+	unsigned long flags;
+	struct sk_buff *ack_skb;
+
+	/* Anyone waiting for an answer? */
+	spin_lock_irqsave(&i2400m->rx_lock, flags);
+	if (i2400m->ack_skb != ERR_PTR(-EINPROGRESS)) {
+		dev_err(dev, "Huh? reply to command with no waiters\n");
+		goto error_no_waiter;
+	}
+	spin_unlock_irqrestore(&i2400m->rx_lock, flags);
+
+	ack_skb = wimax_msg_alloc(wimax_dev, NULL, payload, size, GFP_KERNEL);
+
+	/* Check waiter didn't time out waiting for the answer... */
+	spin_lock_irqsave(&i2400m->rx_lock, flags);
+	if (i2400m->ack_skb != ERR_PTR(-EINPROGRESS)) {
+		d_printf(1, dev, "Huh? waiter for command reply cancelled\n");
+		goto error_waiter_cancelled;
+	}
+	if (IS_ERR(ack_skb))
+		dev_err(dev, "CMD/GET/SET ack: cannot allocate SKB\n");
+	i2400m->ack_skb = ack_skb;
+	spin_unlock_irqrestore(&i2400m->rx_lock, flags);
+	complete(&i2400m->msg_completion);
+	return;
+
+error_waiter_cancelled:
+	if (!IS_ERR(ack_skb))
+		kfree_skb(ack_skb);
+error_no_waiter:
+	spin_unlock_irqrestore(&i2400m->rx_lock, flags);
+}
+
+
+/*
+ * Receive and process a control payload
+ *
+ * @i2400m: device descriptor
+ * @skb_rx: skb that contains the payload (for reference counting)
+ * @payload: pointer to message
+ * @size: size of the message
+ *
+ * There are two types of control RX messages: reports (asynchronous,
+ * like your every day interrupts) and 'acks' (reponses to a command,
+ * get or set request).
+ *
+ * If it is a report, we run hooks on it (to extract information for
+ * things we need to do in the driver) and then pass it over to the
+ * WiMAX stack to send it to user space.
+ *
+ * NOTE: report processing is done in a workqueue specific to the
+ *     generic driver, to avoid deadlocks in the system.
+ *
+ * If it is not a report, it is an ack to a previously executed
+ * command, set or get, so wake up whoever is waiting for it from
+ * i2400m_msg_to_dev(). i2400m_rx_ctl_ack() takes care of that.
+ *
+ * Note that the sizes we pass to other functions from here are the
+ * sizes of the _l3l4_hdr + payload, not full buffer sizes, as we have
+ * verified in _msg_size_check() that they are congruent.
+ *
+ * For reports: We can't clone the original skb where the data is
+ * because we need to send this up via netlink; netlink has to add
+ * headers and we can't overwrite what's preceding the payload...as
+ * it is another message. So we just dup them.
+ */
+static
+void i2400m_rx_ctl(struct i2400m *i2400m, struct sk_buff *skb_rx,
+		   const void *payload, size_t size)
+{
+	int result;
+	struct device *dev = i2400m_dev(i2400m);
+	const struct i2400m_l3l4_hdr *l3l4_hdr = payload;
+	unsigned msg_type;
+
+	result = i2400m_msg_size_check(i2400m, l3l4_hdr, size);
+	if (result < 0) {
+		dev_err(dev, "HW BUG? device sent a bad message: %d\n",
+			result);
+		goto error_check;
+	}
+	msg_type = le16_to_cpu(l3l4_hdr->type);
+	d_printf(1, dev, "%s 0x%04x: %zu bytes\n",
+		 msg_type & I2400M_MT_REPORT_MASK ? "REPORT" : "CMD/SET/GET",
+		 msg_type, size);
+	d_dump(2, dev, l3l4_hdr, size);
+	if (msg_type & I2400M_MT_REPORT_MASK) {
+		/*
+		 * Process each report
+		 *
+		 * - has to be ran serialized as well
+		 *
+		 * - the handling might force the execution of
+		 *   commands. That might cause reentrancy issues with
+		 *   bus-specific subdrivers and workqueues, so the we
+		 *   run it in a separate workqueue.
+		 *
+		 * - when the driver is not yet ready to handle them,
+		 *   they are queued and at some point the queue is
+		 *   restarted [NOTE: we can't queue SKBs directly, as
+		 *   this might be a piece of a SKB, not the whole
+		 *   thing, and this is cheaper than cloning the
+		 *   SKB].
+		 *
+		 * Note we don't do refcounting for the device
+		 * structure; this is because before destroying
+		 * 'i2400m', we make sure to flush the
+		 * i2400m->work_queue, so there are no issues.
+		 */
+		i2400m_report_hook_queue(i2400m, skb_rx, l3l4_hdr, size);
+		if (unlikely(i2400m->trace_msg_from_user))
+			wimax_msg(&i2400m->wimax_dev, "echo",
+				  l3l4_hdr, size, GFP_KERNEL);
+		result = wimax_msg(&i2400m->wimax_dev, NULL, l3l4_hdr, size,
+				   GFP_KERNEL);
+		if (result < 0)
+			dev_err(dev, "error sending report to userspace: %d\n",
+				result);
+	} else		/* an ack to a CMD, GET or SET */
+		i2400m_rx_ctl_ack(i2400m, payload, size);
+error_check:
+	return;
+}
+
+
+/*
+ * Receive and send up a trace
+ *
+ * @i2400m: device descriptor
+ * @skb_rx: skb that contains the trace (for reference counting)
+ * @payload: pointer to trace message inside the skb
+ * @size: size of the message
+ *
+ * THe i2400m might produce trace information (diagnostics) and we
+ * send them through a different kernel-to-user pipe (to avoid
+ * clogging it).
+ *
+ * As in i2400m_rx_ctl(), we can't clone the original skb where the
+ * data is because we need to send this up via netlink; netlink has to
+ * add headers and we can't overwrite what's preceding the
+ * payload...as it is another message. So we just dup them.
+ */
+static
+void i2400m_rx_trace(struct i2400m *i2400m,
+		     const void *payload, size_t size)
+{
+	int result;
+	struct device *dev = i2400m_dev(i2400m);
+	struct wimax_dev *wimax_dev = &i2400m->wimax_dev;
+	const struct i2400m_l3l4_hdr *l3l4_hdr = payload;
+	unsigned msg_type;
+
+	result = i2400m_msg_size_check(i2400m, l3l4_hdr, size);
+	if (result < 0) {
+		dev_err(dev, "HW BUG? device sent a bad trace message: %d\n",
+			result);
+		goto error_check;
+	}
+	msg_type = le16_to_cpu(l3l4_hdr->type);
+	d_printf(1, dev, "Trace %s 0x%04x: %zu bytes\n",
+		 msg_type & I2400M_MT_REPORT_MASK ? "REPORT" : "CMD/SET/GET",
+		 msg_type, size);
+	d_dump(2, dev, l3l4_hdr, size);
+	result = wimax_msg(wimax_dev, "trace", l3l4_hdr, size, GFP_KERNEL);
+	if (result < 0)
+		dev_err(dev, "error sending trace to userspace: %d\n",
+			result);
+error_check:
+	return;
+}
+
+
+/*
+ * Reorder queue data stored on skb->cb while the skb is queued in the
+ * reorder queues.
+ */
+struct i2400m_roq_data {
+	unsigned sn;		/* Serial number for the skb */
+	enum i2400m_cs cs;	/* packet type for the skb */
+};
+
+
+/*
+ * ReOrder Queue
+ *
+ * @ws: Window Start; sequence number where the current window start
+ *     is for this queue
+ * @queue: the skb queue itself
+ * @log: circular ring buffer used to log information about the
+ *     reorder process in this queue that can be displayed in case of
+ *     error to help diagnose it.
+ *
+ * This is the head for a list of skbs. In the skb->cb member of the
+ * skb when queued here contains a 'struct i2400m_roq_data' were we
+ * store the sequence number (sn) and the cs (packet type) coming from
+ * the RX payload header from the device.
+ */
+struct i2400m_roq
+{
+	unsigned ws;
+	struct sk_buff_head queue;
+	struct i2400m_roq_log *log;
+};
+
+
+static
+void __i2400m_roq_init(struct i2400m_roq *roq)
+{
+	roq->ws = 0;
+	skb_queue_head_init(&roq->queue);
+}
+
+
+static
+unsigned __i2400m_roq_index(struct i2400m *i2400m, struct i2400m_roq *roq)
+{
+	return ((unsigned long) roq - (unsigned long) i2400m->rx_roq)
+		/ sizeof(*roq);
+}
+
+
+/*
+ * Normalize a sequence number based on the queue's window start
+ *
+ * nsn = (sn - ws) % 2048
+ *
+ * Note that if @sn < @roq->ws, we still need a positive number; %'s
+ * sign is implementation specific, so we normalize it by adding 2048
+ * to bring it to be positive.
+ */
+static
+unsigned __i2400m_roq_nsn(struct i2400m_roq *roq, unsigned sn)
+{
+	int r;
+	r =  ((int) sn - (int) roq->ws) % 2048;
+	if (r < 0)
+		r += 2048;
+	return r;
+}
+
+
+/*
+ * Circular buffer to keep the last N reorder operations
+ *
+ * In case something fails, dumb then to try to come up with what
+ * happened.
+ */
+enum {
+	I2400M_ROQ_LOG_LENGTH = 32,
+};
+
+struct i2400m_roq_log {
+	struct i2400m_roq_log_entry {
+		enum i2400m_ro_type type;
+		unsigned ws, count, sn, nsn, new_ws;
+	} entry[I2400M_ROQ_LOG_LENGTH];
+	unsigned in, out;
+};
+
+
+/* Print a log entry */
+static
+void i2400m_roq_log_entry_print(struct i2400m *i2400m, unsigned index,
+				unsigned e_index,
+				struct i2400m_roq_log_entry *e)
+{
+	struct device *dev = i2400m_dev(i2400m);
+
+	switch(e->type) {
+	case I2400M_RO_TYPE_RESET:
+		dev_err(dev, "q#%d reset           ws %u cnt %u sn %u/%u"
+			" - new nws %u\n",
+			index, e->ws, e->count, e->sn, e->nsn, e->new_ws);
+		break;
+	case I2400M_RO_TYPE_PACKET:
+		dev_err(dev, "q#%d queue           ws %u cnt %u sn %u/%u\n",
+			index, e->ws, e->count, e->sn, e->nsn);
+		break;
+	case I2400M_RO_TYPE_WS:
+		dev_err(dev, "q#%d update_ws       ws %u cnt %u sn %u/%u"
+			" - new nws %u\n",
+			index, e->ws, e->count, e->sn, e->nsn, e->new_ws);
+		break;
+	case I2400M_RO_TYPE_PACKET_WS:
+		dev_err(dev, "q#%d queue_update_ws ws %u cnt %u sn %u/%u"
+			" - new nws %u\n",
+			index, e->ws, e->count, e->sn, e->nsn, e->new_ws);
+		break;
+	default:
+		dev_err(dev, "q#%d BUG? entry %u - unknown type %u\n",
+			index, e_index, e->type);
+		break;
+	}
+}
+
+
+static
+void i2400m_roq_log_add(struct i2400m *i2400m,
+			struct i2400m_roq *roq, enum i2400m_ro_type type,
+			unsigned ws, unsigned count, unsigned sn,
+			unsigned nsn, unsigned new_ws)
+{
+	struct i2400m_roq_log_entry *e;
+	unsigned cnt_idx;
+	int index = __i2400m_roq_index(i2400m, roq);
+
+	/* if we run out of space, we eat from the end */
+	if (roq->log->in - roq->log->out == I2400M_ROQ_LOG_LENGTH)
+		roq->log->out++;
+	cnt_idx = roq->log->in++ % I2400M_ROQ_LOG_LENGTH;
+	e = &roq->log->entry[cnt_idx];
+
+	e->type = type;
+	e->ws = ws;
+	e->count = count;
+	e->sn = sn;
+	e->nsn = nsn;
+	e->new_ws = new_ws;
+
+	if (d_test(1))
+		i2400m_roq_log_entry_print(i2400m, index, cnt_idx, e);
+}
+
+
+/* Dump all the entries in the FIFO and reinitialize it */
+static
+void i2400m_roq_log_dump(struct i2400m *i2400m, struct i2400m_roq *roq)
+{
+	unsigned cnt, cnt_idx;
+	struct i2400m_roq_log_entry *e;
+	int index = __i2400m_roq_index(i2400m, roq);
+
+	BUG_ON(roq->log->out > roq->log->in);
+	for (cnt = roq->log->out; cnt < roq->log->in; cnt++) {
+		cnt_idx = cnt % I2400M_ROQ_LOG_LENGTH;
+		e = &roq->log->entry[cnt_idx];
+		i2400m_roq_log_entry_print(i2400m, index, cnt_idx, e);
+		memset(e, 0, sizeof(*e));
+	}
+	roq->log->in = roq->log->out = 0;
+}
+
+
+/*
+ * Backbone for the queuing of an skb (by normalized sequence number)
+ *
+ * @i2400m: device descriptor
+ * @roq: reorder queue where to add
+ * @skb: the skb to add
+ * @sn: the sequence number of the skb
+ * @nsn: the normalized sequence number of the skb (pre-computed by the
+ *     caller from the @sn and @roq->ws).
+ *
+ * We try first a couple of quick cases:
+ *
+ *   - the queue is empty
+ *   - the skb would be appended to the queue
+ *
+ * These will be the most common operations.
+ *
+ * If these fail, then we have to do a sorted insertion in the queue,
+ * which is the slowest path.
+ *
+ * We don't have to acquire a reference count as we are going to own it.
+ */
+static
+void __i2400m_roq_queue(struct i2400m *i2400m, struct i2400m_roq *roq,
+			struct sk_buff *skb, unsigned sn, unsigned nsn)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	struct sk_buff *skb_itr;
+	struct i2400m_roq_data *roq_data_itr, *roq_data;
+	unsigned nsn_itr;
+
+	d_fnstart(4, dev, "(i2400m %p roq %p skb %p sn %u nsn %u)\n",
+		  i2400m, roq, skb, sn, nsn);
+
+	roq_data = (struct i2400m_roq_data *) &skb->cb;
+	BUILD_BUG_ON(sizeof(*roq_data) > sizeof(skb->cb));
+	roq_data->sn = sn;
+	d_printf(3, dev, "ERX: roq %p [ws %u] nsn %d sn %u\n",
+		 roq, roq->ws, nsn, roq_data->sn);
+
+	/* Queues will be empty on not-so-bad environments, so try
+	 * that first */
+	if (skb_queue_empty(&roq->queue)) {
+		d_printf(2, dev, "ERX: roq %p - first one\n", roq);
+		__skb_queue_head(&roq->queue, skb);
+		goto out;
+	}
+	/* Now try append, as most of the operations will be that */
+	skb_itr = skb_peek_tail(&roq->queue);
+	roq_data_itr = (struct i2400m_roq_data *) &skb_itr->cb;
+	nsn_itr = __i2400m_roq_nsn(roq, roq_data_itr->sn);
+	/* NSN bounds assumed correct (checked when it was queued) */
+	if (nsn >= nsn_itr) {
+		d_printf(2, dev, "ERX: roq %p - appended after %p (nsn %d sn %u)\n",
+			 roq, skb_itr, nsn_itr, roq_data_itr->sn);
+		__skb_queue_tail(&roq->queue, skb);
+		goto out;
+	}
+	/* None of the fast paths option worked. Iterate to find the
+	 * right spot where to insert the packet; we know the queue is
+	 * not empty, so we are not the first ones; we also know we
+	 * are not going to be the last ones. The list is sorted, so
+	 * we have to insert before the the first guy with an nsn_itr
+	 * greater that our nsn. */
+	skb_queue_walk(&roq->queue, skb_itr) {
+		roq_data_itr = (struct i2400m_roq_data *) &skb_itr->cb;
+		nsn_itr = __i2400m_roq_nsn(roq, roq_data_itr->sn);
+		/* NSN bounds assumed correct (checked when it was queued) */
+		if (nsn_itr > nsn) {
+			d_printf(2, dev, "ERX: roq %p - queued before %p "
+				 "(nsn %d sn %u)\n", roq, skb_itr, nsn_itr,
+				 roq_data_itr->sn);
+			__skb_queue_before(&roq->queue, skb_itr, skb);
+			goto out;
+		}
+	}
+	/* If we get here, that is VERY bad -- print info to help
+	 * diagnose and crash it */
+	dev_err(dev, "SW BUG? failed to insert packet\n");
+	dev_err(dev, "ERX: roq %p [ws %u] skb %p nsn %d sn %u\n",
+		roq, roq->ws, skb, nsn, roq_data->sn);
+	skb_queue_walk(&roq->queue, skb_itr) {
+		roq_data_itr = (struct i2400m_roq_data *) &skb_itr->cb;
+		nsn_itr = __i2400m_roq_nsn(roq, roq_data_itr->sn);
+		/* NSN bounds assumed correct (checked when it was queued) */
+		dev_err(dev, "ERX: roq %p skb_itr %p nsn %d sn %u\n",
+			roq, skb_itr, nsn_itr, roq_data_itr->sn);
+	}
+	BUG();
+out:
+	d_fnend(4, dev, "(i2400m %p roq %p skb %p sn %u nsn %d) = void\n",
+		i2400m, roq, skb, sn, nsn);
+}
+
+
+/*
+ * Backbone for the update window start operation
+ *
+ * @i2400m: device descriptor
+ * @roq: Reorder queue
+ * @sn: New sequence number
+ *
+ * Updates the window start of a queue; when doing so, it must deliver
+ * to the networking stack all the queued skb's whose normalized
+ * sequence number is lower than the new normalized window start.
+ */
+static
+unsigned __i2400m_roq_update_ws(struct i2400m *i2400m, struct i2400m_roq *roq,
+				unsigned sn)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	struct sk_buff *skb_itr, *tmp_itr;
+	struct i2400m_roq_data *roq_data_itr;
+	unsigned new_nws, nsn_itr;
+
+	new_nws = __i2400m_roq_nsn(roq, sn);
+	/*
+	 * For type 2(update_window_start) rx messages, there is no
+	 * need to check if the normalized sequence number is greater 1023.
+	 * Simply insert and deliver all packets to the host up to the
+	 * window start.
+	 */
+	skb_queue_walk_safe(&roq->queue, skb_itr, tmp_itr) {
+		roq_data_itr = (struct i2400m_roq_data *) &skb_itr->cb;
+		nsn_itr = __i2400m_roq_nsn(roq, roq_data_itr->sn);
+		/* NSN bounds assumed correct (checked when it was queued) */
+		if (nsn_itr < new_nws) {
+			d_printf(2, dev, "ERX: roq %p - release skb %p "
+				 "(nsn %u/%u new nws %u)\n",
+				 roq, skb_itr, nsn_itr, roq_data_itr->sn,
+				 new_nws);
+			__skb_unlink(skb_itr, &roq->queue);
+			i2400m_net_erx(i2400m, skb_itr, roq_data_itr->cs);
+		}
+		else
+			break;	/* rest of packets all nsn_itr > nws */
+	}
+	roq->ws = sn;
+	return new_nws;
+}
+
+
+/*
+ * Reset a queue
+ *
+ * @i2400m: device descriptor
+ * @cin: Queue Index
+ *
+ * Deliver all the packets and reset the window-start to zero. Name is
+ * kind of misleading.
+ */
+static
+void i2400m_roq_reset(struct i2400m *i2400m, struct i2400m_roq *roq)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	struct sk_buff *skb_itr, *tmp_itr;
+	struct i2400m_roq_data *roq_data_itr;
+
+	d_fnstart(2, dev, "(i2400m %p roq %p)\n", i2400m, roq);
+	i2400m_roq_log_add(i2400m, roq, I2400M_RO_TYPE_RESET,
+			     roq->ws, skb_queue_len(&roq->queue),
+			     ~0, ~0, 0);
+	skb_queue_walk_safe(&roq->queue, skb_itr, tmp_itr) {
+		roq_data_itr = (struct i2400m_roq_data *) &skb_itr->cb;
+		d_printf(2, dev, "ERX: roq %p - release skb %p (sn %u)\n",
+			 roq, skb_itr, roq_data_itr->sn);
+		__skb_unlink(skb_itr, &roq->queue);
+		i2400m_net_erx(i2400m, skb_itr, roq_data_itr->cs);
+	}
+	roq->ws = 0;
+	d_fnend(2, dev, "(i2400m %p roq %p) = void\n", i2400m, roq);
+}
+
+
+/*
+ * Queue a packet
+ *
+ * @i2400m: device descriptor
+ * @cin: Queue Index
+ * @skb: containing the packet data
+ * @fbn: First block number of the packet in @skb
+ * @lbn: Last block number of the packet in @skb
+ *
+ * The hardware is asking the driver to queue a packet for later
+ * delivery to the networking stack.
+ */
+static
+void i2400m_roq_queue(struct i2400m *i2400m, struct i2400m_roq *roq,
+		      struct sk_buff * skb, unsigned lbn)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	unsigned nsn, len;
+
+	d_fnstart(2, dev, "(i2400m %p roq %p skb %p lbn %u) = void\n",
+		  i2400m, roq, skb, lbn);
+	len = skb_queue_len(&roq->queue);
+	nsn = __i2400m_roq_nsn(roq, lbn);
+	if (unlikely(nsn >= 1024)) {
+		dev_err(dev, "SW BUG? queue nsn %d (lbn %u ws %u)\n",
+			nsn, lbn, roq->ws);
+		i2400m_roq_log_dump(i2400m, roq);
+		i2400m_reset(i2400m, I2400M_RT_WARM);
+	} else {
+		__i2400m_roq_queue(i2400m, roq, skb, lbn, nsn);
+		i2400m_roq_log_add(i2400m, roq, I2400M_RO_TYPE_PACKET,
+				     roq->ws, len, lbn, nsn, ~0);
+	}
+	d_fnend(2, dev, "(i2400m %p roq %p skb %p lbn %u) = void\n",
+		i2400m, roq, skb, lbn);
+}
+
+
+/*
+ * Update the window start in a reorder queue and deliver all skbs
+ * with a lower window start
+ *
+ * @i2400m: device descriptor
+ * @roq: Reorder queue
+ * @sn: New sequence number
+ */
+static
+void i2400m_roq_update_ws(struct i2400m *i2400m, struct i2400m_roq *roq,
+			  unsigned sn)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	unsigned old_ws, nsn, len;
+
+	d_fnstart(2, dev, "(i2400m %p roq %p sn %u)\n", i2400m, roq, sn);
+	old_ws = roq->ws;
+	len = skb_queue_len(&roq->queue);
+	nsn = __i2400m_roq_update_ws(i2400m, roq, sn);
+	i2400m_roq_log_add(i2400m, roq, I2400M_RO_TYPE_WS,
+			     old_ws, len, sn, nsn, roq->ws);
+	d_fnstart(2, dev, "(i2400m %p roq %p sn %u) = void\n", i2400m, roq, sn);
+}
+
+
+/*
+ * Queue a packet and update the window start
+ *
+ * @i2400m: device descriptor
+ * @cin: Queue Index
+ * @skb: containing the packet data
+ * @fbn: First block number of the packet in @skb
+ * @sn: Last block number of the packet in @skb
+ *
+ * Note that unlike i2400m_roq_update_ws(), which sets the new window
+ * start to @sn, in here we'll set it to @sn + 1.
+ */
+static
+void i2400m_roq_queue_update_ws(struct i2400m *i2400m, struct i2400m_roq *roq,
+				struct sk_buff * skb, unsigned sn)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	unsigned nsn, old_ws, len;
+
+	d_fnstart(2, dev, "(i2400m %p roq %p skb %p sn %u)\n",
+		  i2400m, roq, skb, sn);
+	len = skb_queue_len(&roq->queue);
+	nsn = __i2400m_roq_nsn(roq, sn);
+	/*
+	 * For type 3(queue_update_window_start) rx messages, there is no
+	 * need to check if the normalized sequence number is greater 1023.
+	 * Simply insert and deliver all packets to the host up to the
+	 * window start.
+	 */
+	old_ws = roq->ws;
+	/* If the queue is empty, don't bother as we'd queue
+	 * it and immediately unqueue it -- just deliver it.
+	 */
+	if (len == 0) {
+		struct i2400m_roq_data *roq_data;
+		roq_data = (struct i2400m_roq_data *) &skb->cb;
+		i2400m_net_erx(i2400m, skb, roq_data->cs);
+	} else
+		__i2400m_roq_queue(i2400m, roq, skb, sn, nsn);
+
+	__i2400m_roq_update_ws(i2400m, roq, sn + 1);
+	i2400m_roq_log_add(i2400m, roq, I2400M_RO_TYPE_PACKET_WS,
+			   old_ws, len, sn, nsn, roq->ws);
+
+	d_fnend(2, dev, "(i2400m %p roq %p skb %p sn %u) = void\n",
+		i2400m, roq, skb, sn);
+}
+
+
+/*
+ * This routine destroys the memory allocated for rx_roq, when no
+ * other thread is accessing it. Access to rx_roq is refcounted by
+ * rx_roq_refcount, hence memory allocated must be destroyed when
+ * rx_roq_refcount becomes zero. This routine gets executed when
+ * rx_roq_refcount becomes zero.
+ */
+static void i2400m_rx_roq_destroy(struct kref *ref)
+{
+	unsigned itr;
+	struct i2400m *i2400m
+			= container_of(ref, struct i2400m, rx_roq_refcount);
+	for (itr = 0; itr < I2400M_RO_CIN + 1; itr++)
+		__skb_queue_purge(&i2400m->rx_roq[itr].queue);
+	kfree(i2400m->rx_roq[0].log);
+	kfree(i2400m->rx_roq);
+	i2400m->rx_roq = NULL;
+}
+
+/*
+ * Receive and send up an extended data packet
+ *
+ * @i2400m: device descriptor
+ * @skb_rx: skb that contains the extended data packet
+ * @single_last: 1 if the payload is the only one or the last one of
+ *     the skb.
+ * @payload: pointer to the packet's data inside the skb
+ * @size: size of the payload
+ *
+ * Starting in v1.4 of the i2400m's firmware, the device can send data
+ * packets to the host in an extended format that; this incudes a 16
+ * byte header (struct i2400m_pl_edata_hdr). Using this header's space
+ * we can fake ethernet headers for ethernet device emulation without
+ * having to copy packets around.
+ *
+ * This function handles said path.
+ *
+ *
+ * Receive and send up an extended data packet that requires no reordering
+ *
+ * @i2400m: device descriptor
+ * @skb_rx: skb that contains the extended data packet
+ * @single_last: 1 if the payload is the only one or the last one of
+ *     the skb.
+ * @payload: pointer to the packet's data (past the actual extended
+ *     data payload header).
+ * @size: size of the payload
+ *
+ * Pass over to the networking stack a data packet that might have
+ * reordering requirements.
+ *
+ * This needs to the decide if the skb in which the packet is
+ * contained can be reused or if it needs to be cloned. Then it has to
+ * be trimmed in the edges so that the beginning is the space for eth
+ * header and then pass it to i2400m_net_erx() for the stack
+ *
+ * Assumes the caller has verified the sanity of the payload (size,
+ * etc) already.
+ */
+static
+void i2400m_rx_edata(struct i2400m *i2400m, struct sk_buff *skb_rx,
+		     unsigned single_last, const void *payload, size_t size)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	const struct i2400m_pl_edata_hdr *hdr = payload;
+	struct net_device *net_dev = i2400m->wimax_dev.net_dev;
+	struct sk_buff *skb;
+	enum i2400m_cs cs;
+	u32 reorder;
+	unsigned ro_needed, ro_type, ro_cin, ro_sn;
+	struct i2400m_roq *roq;
+	struct i2400m_roq_data *roq_data;
+	unsigned long flags;
+
+	BUILD_BUG_ON(ETH_HLEN > sizeof(*hdr));
+
+	d_fnstart(2, dev, "(i2400m %p skb_rx %p single %u payload %p "
+		  "size %zu)\n", i2400m, skb_rx, single_last, payload, size);
+	if (size < sizeof(*hdr)) {
+		dev_err(dev, "ERX: HW BUG? message with short header (%zu "
+			"vs %zu bytes expected)\n", size, sizeof(*hdr));
+		goto error;
+	}
+
+	if (single_last) {
+		skb = skb_get(skb_rx);
+		d_printf(3, dev, "ERX: skb %p reusing\n", skb);
+	} else {
+		skb = skb_clone(skb_rx, GFP_KERNEL);
+		if (skb == NULL) {
+			dev_err(dev, "ERX: no memory to clone skb\n");
+			net_dev->stats.rx_dropped++;
+			goto error_skb_clone;
+		}
+		d_printf(3, dev, "ERX: skb %p cloned from %p\n", skb, skb_rx);
+	}
+	/* now we have to pull and trim so that the skb points to the
+	 * beginning of the IP packet; the netdev part will add the
+	 * ethernet header as needed - we know there is enough space
+	 * because we checked in i2400m_rx_edata(). */
+	skb_pull(skb, payload + sizeof(*hdr) - (void *) skb->data);
+	skb_trim(skb, (void *) skb_end_pointer(skb) - payload - sizeof(*hdr));
+
+	reorder = le32_to_cpu(hdr->reorder);
+	ro_needed = reorder & I2400M_RO_NEEDED;
+	cs = hdr->cs;
+	if (ro_needed) {
+		ro_type = (reorder >> I2400M_RO_TYPE_SHIFT) & I2400M_RO_TYPE;
+		ro_cin = (reorder >> I2400M_RO_CIN_SHIFT) & I2400M_RO_CIN;
+		ro_sn = (reorder >> I2400M_RO_SN_SHIFT) & I2400M_RO_SN;
+
+		spin_lock_irqsave(&i2400m->rx_lock, flags);
+		if (i2400m->rx_roq == NULL) {
+			kfree_skb(skb);	/* rx_roq is already destroyed */
+			spin_unlock_irqrestore(&i2400m->rx_lock, flags);
+			goto error;
+		}
+		roq = &i2400m->rx_roq[ro_cin];
+		kref_get(&i2400m->rx_roq_refcount);
+		spin_unlock_irqrestore(&i2400m->rx_lock, flags);
+
+		roq_data = (struct i2400m_roq_data *) &skb->cb;
+		roq_data->sn = ro_sn;
+		roq_data->cs = cs;
+		d_printf(2, dev, "ERX: reorder needed: "
+			 "type %u cin %u [ws %u] sn %u/%u len %zuB\n",
+			 ro_type, ro_cin, roq->ws, ro_sn,
+			 __i2400m_roq_nsn(roq, ro_sn), size);
+		d_dump(2, dev, payload, size);
+		switch(ro_type) {
+		case I2400M_RO_TYPE_RESET:
+			i2400m_roq_reset(i2400m, roq);
+			kfree_skb(skb);	/* no data here */
+			break;
+		case I2400M_RO_TYPE_PACKET:
+			i2400m_roq_queue(i2400m, roq, skb, ro_sn);
+			break;
+		case I2400M_RO_TYPE_WS:
+			i2400m_roq_update_ws(i2400m, roq, ro_sn);
+			kfree_skb(skb);	/* no data here */
+			break;
+		case I2400M_RO_TYPE_PACKET_WS:
+			i2400m_roq_queue_update_ws(i2400m, roq, skb, ro_sn);
+			break;
+		default:
+			dev_err(dev, "HW BUG? unknown reorder type %u\n", ro_type);
+		}
+
+		spin_lock_irqsave(&i2400m->rx_lock, flags);
+		kref_put(&i2400m->rx_roq_refcount, i2400m_rx_roq_destroy);
+		spin_unlock_irqrestore(&i2400m->rx_lock, flags);
+	}
+	else
+		i2400m_net_erx(i2400m, skb, cs);
+error_skb_clone:
+error:
+	d_fnend(2, dev, "(i2400m %p skb_rx %p single %u payload %p "
+		"size %zu) = void\n", i2400m, skb_rx, single_last, payload, size);
+}
+
+
+/*
+ * Act on a received payload
+ *
+ * @i2400m: device instance
+ * @skb_rx: skb where the transaction was received
+ * @single_last: 1 this is the only payload or the last one (so the
+ *     skb can be reused instead of cloned).
+ * @pld: payload descriptor
+ * @payload: payload data
+ *
+ * Upon reception of a payload, look at its guts in the payload
+ * descriptor and decide what to do with it. If it is a single payload
+ * skb or if the last skb is a data packet, the skb will be referenced
+ * and modified (so it doesn't have to be cloned).
+ */
+static
+void i2400m_rx_payload(struct i2400m *i2400m, struct sk_buff *skb_rx,
+		       unsigned single_last, const struct i2400m_pld *pld,
+		       const void *payload)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	size_t pl_size = i2400m_pld_size(pld);
+	enum i2400m_pt pl_type = i2400m_pld_type(pld);
+
+	d_printf(7, dev, "RX: received payload type %u, %zu bytes\n",
+		 pl_type, pl_size);
+	d_dump(8, dev, payload, pl_size);
+
+	switch (pl_type) {
+	case I2400M_PT_DATA:
+		d_printf(3, dev, "RX: data payload %zu bytes\n", pl_size);
+		i2400m_net_rx(i2400m, skb_rx, single_last, payload, pl_size);
+		break;
+	case I2400M_PT_CTRL:
+		i2400m_rx_ctl(i2400m, skb_rx, payload, pl_size);
+		break;
+	case I2400M_PT_TRACE:
+		i2400m_rx_trace(i2400m, payload, pl_size);
+		break;
+	case I2400M_PT_EDATA:
+		d_printf(3, dev, "ERX: data payload %zu bytes\n", pl_size);
+		i2400m_rx_edata(i2400m, skb_rx, single_last, payload, pl_size);
+		break;
+	default:	/* Anything else shouldn't come to the host */
+		if (printk_ratelimit())
+			dev_err(dev, "RX: HW BUG? unexpected payload type %u\n",
+				pl_type);
+	}
+}
+
+
+/*
+ * Check a received transaction's message header
+ *
+ * @i2400m: device descriptor
+ * @msg_hdr: message header
+ * @buf_size: size of the received buffer
+ *
+ * Check that the declarations done by a RX buffer message header are
+ * sane and consistent with the amount of data that was received.
+ */
+static
+int i2400m_rx_msg_hdr_check(struct i2400m *i2400m,
+			    const struct i2400m_msg_hdr *msg_hdr,
+			    size_t buf_size)
+{
+	int result = -EIO;
+	struct device *dev = i2400m_dev(i2400m);
+	if (buf_size < sizeof(*msg_hdr)) {
+		dev_err(dev, "RX: HW BUG? message with short header (%zu "
+			"vs %zu bytes expected)\n", buf_size, sizeof(*msg_hdr));
+		goto error;
+	}
+	if (msg_hdr->barker != cpu_to_le32(I2400M_D2H_MSG_BARKER)) {
+		dev_err(dev, "RX: HW BUG? message received with unknown "
+			"barker 0x%08x (buf_size %zu bytes)\n",
+			le32_to_cpu(msg_hdr->barker), buf_size);
+		goto error;
+	}
+	if (msg_hdr->num_pls == 0) {
+		dev_err(dev, "RX: HW BUG? zero payload packets in message\n");
+		goto error;
+	}
+	if (le16_to_cpu(msg_hdr->num_pls) > I2400M_MAX_PLS_IN_MSG) {
+		dev_err(dev, "RX: HW BUG? message contains more payload "
+			"than maximum; ignoring.\n");
+		goto error;
+	}
+	result = 0;
+error:
+	return result;
+}
+
+
+/*
+ * Check a payload descriptor against the received data
+ *
+ * @i2400m: device descriptor
+ * @pld: payload descriptor
+ * @pl_itr: offset (in bytes) in the received buffer the payload is
+ *          located
+ * @buf_size: size of the received buffer
+ *
+ * Given a payload descriptor (part of a RX buffer), check it is sane
+ * and that the data it declares fits in the buffer.
+ */
+static
+int i2400m_rx_pl_descr_check(struct i2400m *i2400m,
+			      const struct i2400m_pld *pld,
+			      size_t pl_itr, size_t buf_size)
+{
+	int result = -EIO;
+	struct device *dev = i2400m_dev(i2400m);
+	size_t pl_size = i2400m_pld_size(pld);
+	enum i2400m_pt pl_type = i2400m_pld_type(pld);
+
+	if (pl_size > i2400m->bus_pl_size_max) {
+		dev_err(dev, "RX: HW BUG? payload @%zu: size %zu is "
+			"bigger than maximum %zu; ignoring message\n",
+			pl_itr, pl_size, i2400m->bus_pl_size_max);
+		goto error;
+	}
+	if (pl_itr + pl_size > buf_size) {	/* enough? */
+		dev_err(dev, "RX: HW BUG? payload @%zu: size %zu "
+			"goes beyond the received buffer "
+			"size (%zu bytes); ignoring message\n",
+			pl_itr, pl_size, buf_size);
+		goto error;
+	}
+	if (pl_type >= I2400M_PT_ILLEGAL) {
+		dev_err(dev, "RX: HW BUG? illegal payload type %u; "
+			"ignoring message\n", pl_type);
+		goto error;
+	}
+	result = 0;
+error:
+	return result;
+}
+
+
+/**
+ * i2400m_rx - Receive a buffer of data from the device
+ *
+ * @i2400m: device descriptor
+ * @skb: skbuff where the data has been received
+ *
+ * Parse in a buffer of data that contains an RX message sent from the
+ * device. See the file header for the format. Run all checks on the
+ * buffer header, then run over each payload's descriptors, verify
+ * their consistency and act on each payload's contents.  If
+ * everything is successful, update the device's statistics.
+ *
+ * Note: You need to set the skb to contain only the length of the
+ * received buffer; for that, use skb_trim(skb, RECEIVED_SIZE).
+ *
+ * Returns:
+ *
+ * 0 if ok, < 0 errno on error
+ *
+ * If ok, this function owns now the skb and the caller DOESN'T have
+ * to run kfree_skb() on it. However, on error, the caller still owns
+ * the skb and it is responsible for releasing it.
+ */
+int i2400m_rx(struct i2400m *i2400m, struct sk_buff *skb)
+{
+	int i, result;
+	struct device *dev = i2400m_dev(i2400m);
+	const struct i2400m_msg_hdr *msg_hdr;
+	size_t pl_itr, pl_size;
+	unsigned long flags;
+	unsigned num_pls, single_last, skb_len;
+
+	skb_len = skb->len;
+	d_fnstart(4, dev, "(i2400m %p skb %p [size %u])\n",
+		  i2400m, skb, skb_len);
+	msg_hdr = (void *) skb->data;
+	result = i2400m_rx_msg_hdr_check(i2400m, msg_hdr, skb_len);
+	if (result < 0)
+		goto error_msg_hdr_check;
+	result = -EIO;
+	num_pls = le16_to_cpu(msg_hdr->num_pls);
+	/* Check payload descriptor(s) */
+	pl_itr = struct_size(msg_hdr, pld, num_pls);
+	pl_itr = ALIGN(pl_itr, I2400M_PL_ALIGN);
+	if (pl_itr > skb_len) {	/* got all the payload descriptors? */
+		dev_err(dev, "RX: HW BUG? message too short (%u bytes) for "
+			"%u payload descriptors (%zu each, total %zu)\n",
+			skb_len, num_pls, sizeof(msg_hdr->pld[0]), pl_itr);
+		goto error_pl_descr_short;
+	}
+	/* Walk each payload payload--check we really got it */
+	for (i = 0; i < num_pls; i++) {
+		/* work around old gcc warnings */
+		pl_size = i2400m_pld_size(&msg_hdr->pld[i]);
+		result = i2400m_rx_pl_descr_check(i2400m, &msg_hdr->pld[i],
+						  pl_itr, skb_len);
+		if (result < 0)
+			goto error_pl_descr_check;
+		single_last = num_pls == 1 || i == num_pls - 1;
+		i2400m_rx_payload(i2400m, skb, single_last, &msg_hdr->pld[i],
+				  skb->data + pl_itr);
+		pl_itr += ALIGN(pl_size, I2400M_PL_ALIGN);
+		cond_resched();		/* Don't monopolize */
+	}
+	kfree_skb(skb);
+	/* Update device statistics */
+	spin_lock_irqsave(&i2400m->rx_lock, flags);
+	i2400m->rx_pl_num += i;
+	if (i > i2400m->rx_pl_max)
+		i2400m->rx_pl_max = i;
+	if (i < i2400m->rx_pl_min)
+		i2400m->rx_pl_min = i;
+	i2400m->rx_num++;
+	i2400m->rx_size_acc += skb_len;
+	if (skb_len < i2400m->rx_size_min)
+		i2400m->rx_size_min = skb_len;
+	if (skb_len > i2400m->rx_size_max)
+		i2400m->rx_size_max = skb_len;
+	spin_unlock_irqrestore(&i2400m->rx_lock, flags);
+error_pl_descr_check:
+error_pl_descr_short:
+error_msg_hdr_check:
+	d_fnend(4, dev, "(i2400m %p skb %p [size %u]) = %d\n",
+		i2400m, skb, skb_len, result);
+	return result;
+}
+EXPORT_SYMBOL_GPL(i2400m_rx);
+
+
+void i2400m_unknown_barker(struct i2400m *i2400m,
+			   const void *buf, size_t size)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	char prefix[64];
+	const __le32 *barker = buf;
+	dev_err(dev, "RX: HW BUG? unknown barker %08x, "
+		"dropping %zu bytes\n", le32_to_cpu(*barker), size);
+	snprintf(prefix, sizeof(prefix), "%s %s: ",
+		 dev_driver_string(dev), dev_name(dev));
+	if (size > 64) {
+		print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET,
+			       8, 4, buf, 64, 0);
+		printk(KERN_ERR "%s... (only first 64 bytes "
+		       "dumped)\n", prefix);
+	} else
+		print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET,
+			       8, 4, buf, size, 0);
+}
+EXPORT_SYMBOL(i2400m_unknown_barker);
+
+
+/*
+ * Initialize the RX queue and infrastructure
+ *
+ * This sets up all the RX reordering infrastructures, which will not
+ * be used if reordering is not enabled or if the firmware does not
+ * support it. The device is told to do reordering in
+ * i2400m_dev_initialize(), where it also looks at the value of the
+ * i2400m->rx_reorder switch before taking a decission.
+ *
+ * Note we allocate the roq queues in one chunk and the actual logging
+ * support for it (logging) in another one and then we setup the
+ * pointers from the first to the last.
+ */
+int i2400m_rx_setup(struct i2400m *i2400m)
+{
+	int result = 0;
+
+	i2400m->rx_reorder = i2400m_rx_reorder_disabled? 0 : 1;
+	if (i2400m->rx_reorder) {
+		unsigned itr;
+		struct i2400m_roq_log *rd;
+
+		result = -ENOMEM;
+
+		i2400m->rx_roq = kcalloc(I2400M_RO_CIN + 1,
+					 sizeof(i2400m->rx_roq[0]), GFP_KERNEL);
+		if (i2400m->rx_roq == NULL)
+			goto error_roq_alloc;
+
+		rd = kcalloc(I2400M_RO_CIN + 1, sizeof(*i2400m->rx_roq[0].log),
+			     GFP_KERNEL);
+		if (rd == NULL) {
+			result = -ENOMEM;
+			goto error_roq_log_alloc;
+		}
+
+		for(itr = 0; itr < I2400M_RO_CIN + 1; itr++) {
+			__i2400m_roq_init(&i2400m->rx_roq[itr]);
+			i2400m->rx_roq[itr].log = &rd[itr];
+		}
+		kref_init(&i2400m->rx_roq_refcount);
+	}
+	return 0;
+
+error_roq_log_alloc:
+	kfree(i2400m->rx_roq);
+error_roq_alloc:
+	return result;
+}
+
+
+/* Tear down the RX queue and infrastructure */
+void i2400m_rx_release(struct i2400m *i2400m)
+{
+	unsigned long flags;
+
+	if (i2400m->rx_reorder) {
+		spin_lock_irqsave(&i2400m->rx_lock, flags);
+		kref_put(&i2400m->rx_roq_refcount, i2400m_rx_roq_destroy);
+		spin_unlock_irqrestore(&i2400m->rx_lock, flags);
+	}
+	/* at this point, nothing can be received... */
+	i2400m_report_hook_flush(i2400m);
+}
diff --git a/drivers/staging/wimax/i2400m/sysfs.c b/drivers/staging/wimax/i2400m/sysfs.c
new file mode 100644
index 000000000000..895ee265909b
--- /dev/null
+++ b/drivers/staging/wimax/i2400m/sysfs.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Intel Wireless WiMAX Connection 2400m
+ * Sysfs interfaces to show driver and device information
+ *
+ * Copyright (C) 2007 Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ */
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include "i2400m.h"
+
+
+#define D_SUBMODULE sysfs
+#include "debug-levels.h"
+
+
+/*
+ * Set the idle timeout (msecs)
+ *
+ * FIXME: eventually this should be a common WiMAX stack method, but
+ * would like to wait to see how other devices manage it.
+ */
+static
+ssize_t i2400m_idle_timeout_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	ssize_t result;
+	struct i2400m *i2400m = net_dev_to_i2400m(to_net_dev(dev));
+	unsigned val;
+
+	result = -EINVAL;
+	if (sscanf(buf, "%u\n", &val) != 1)
+		goto error_no_unsigned;
+	if (val != 0 && (val < 100 || val > 300000 || val % 100 != 0)) {
+		dev_err(dev, "idle_timeout: %u: invalid msecs specification; "
+			"valid values are 0, 100-300000 in 100 increments\n",
+			val);
+		goto error_bad_value;
+	}
+	result = i2400m_set_idle_timeout(i2400m, val);
+	if (result >= 0)
+		result = size;
+error_no_unsigned:
+error_bad_value:
+	return result;
+}
+
+static
+DEVICE_ATTR_WO(i2400m_idle_timeout);
+
+static
+struct attribute *i2400m_dev_attrs[] = {
+	&dev_attr_i2400m_idle_timeout.attr,
+	NULL,
+};
+
+struct attribute_group i2400m_dev_attr_group = {
+	.name = NULL,		/* we want them in the same directory */
+	.attrs = i2400m_dev_attrs,
+};
diff --git a/drivers/staging/wimax/i2400m/tx.c b/drivers/staging/wimax/i2400m/tx.c
new file mode 100644
index 000000000000..1255302e251e
--- /dev/null
+++ b/drivers/staging/wimax/i2400m/tx.c
@@ -0,0 +1,1011 @@
+/*
+ * Intel Wireless WiMAX Connection 2400m
+ * Generic (non-bus specific) TX handling
+ *
+ *
+ * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * Intel Corporation <linux-wimax@intel.com>
+ * Yanir Lubetkin <yanirx.lubetkin@intel.com>
+ *  - Initial implementation
+ *
+ * Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *  - Rewritten to use a single FIFO to lower the memory allocation
+ *    pressure and optimize cache hits when copying to the queue, as
+ *    well as splitting out bus-specific code.
+ *
+ *
+ * Implements data transmission to the device; this is done through a
+ * software FIFO, as data/control frames can be coalesced (while the
+ * device is reading the previous tx transaction, others accumulate).
+ *
+ * A FIFO is used because at the end it is resource-cheaper that trying
+ * to implement scatter/gather over USB. As well, most traffic is going
+ * to be download (vs upload).
+ *
+ * The format for sending/receiving data to/from the i2400m is
+ * described in detail in rx.c:PROTOCOL FORMAT. In here we implement
+ * the transmission of that. This is split between a bus-independent
+ * part that just prepares everything and a bus-specific part that
+ * does the actual transmission over the bus to the device (in the
+ * bus-specific driver).
+ *
+ *
+ * The general format of a device-host transaction is MSG-HDR, PLD1,
+ * PLD2...PLDN, PL1, PL2,...PLN, PADDING.
+ *
+ * Because we need the send payload descriptors and then payloads and
+ * because it is kind of expensive to do scatterlists in USB (one URB
+ * per node), it becomes cheaper to append all the data to a FIFO
+ * (copying to a FIFO potentially in cache is cheaper).
+ *
+ * Then the bus-specific code takes the parts of that FIFO that are
+ * written and passes them to the device.
+ *
+ * So the concepts to keep in mind there are:
+ *
+ * We use a FIFO to queue the data in a linear buffer. We first append
+ * a MSG-HDR, space for I2400M_TX_PLD_MAX payload descriptors and then
+ * go appending payloads until we run out of space or of payload
+ * descriptors. Then we append padding to make the whole transaction a
+ * multiple of i2400m->bus_tx_block_size (as defined by the bus layer).
+ *
+ * - A TX message: a combination of a message header, payload
+ *   descriptors and payloads.
+ *
+ *     Open: it is marked as active (i2400m->tx_msg is valid) and we
+ *       can keep adding payloads to it.
+ *
+ *     Closed: we are not appending more payloads to this TX message
+ *       (exahusted space in the queue, too many payloads or
+ *       whichever).  We have appended padding so the whole message
+ *       length is aligned to i2400m->bus_tx_block_size (as set by the
+ *       bus/transport layer).
+ *
+ * - Most of the time we keep a TX message open to which we append
+ *   payloads.
+ *
+ * - If we are going to append and there is no more space (we are at
+ *   the end of the FIFO), we close the message, mark the rest of the
+ *   FIFO space unusable (skip_tail), create a new message at the
+ *   beginning of the FIFO (if there is space) and append the message
+ *   there.
+ *
+ *   This is because we need to give linear TX messages to the bus
+ *   engine. So we don't write a message to the remaining FIFO space
+ *   until the tail and continue at the head of it.
+ *
+ * - We overload one of the fields in the message header to use it as
+ *   'size' of the TX message, so we can iterate over them. It also
+ *   contains a flag that indicates if we have to skip it or not.
+ *   When we send the buffer, we update that to its real on-the-wire
+ *   value.
+ *
+ * - The MSG-HDR PLD1...PLD2 stuff has to be a size multiple of 16.
+ *
+ *   It follows that if MSG-HDR says we have N messages, the whole
+ *   header + descriptors is 16 + 4*N; for those to be a multiple of
+ *   16, it follows that N can be 4, 8, 12, ... (32, 48, 64, 80...
+ *   bytes).
+ *
+ *   So if we have only 1 payload, we have to submit a header that in
+ *   all truth has space for 4.
+ *
+ *   The implication is that we reserve space for 12 (64 bytes); but
+ *   if we fill up only (eg) 2, our header becomes 32 bytes only. So
+ *   the TX engine has to shift those 32 bytes of msg header and 2
+ *   payloads and padding so that right after it the payloads start
+ *   and the TX engine has to know about that.
+ *
+ *   It is cheaper to move the header up than the whole payloads down.
+ *
+ *   We do this in i2400m_tx_close(). See 'i2400m_msg_hdr->offset'.
+ *
+ * - Each payload has to be size-padded to 16 bytes; before appending
+ *   it, we just do it.
+ *
+ * - The whole message has to be padded to i2400m->bus_tx_block_size;
+ *   we do this at close time. Thus, when reserving space for the
+ *   payload, we always make sure there is also free space for this
+ *   padding that sooner or later will happen.
+ *
+ * When we append a message, we tell the bus specific code to kick in
+ * TXs. It will TX (in parallel) until the buffer is exhausted--hence
+ * the lockin we do. The TX code will only send a TX message at the
+ * time (which remember, might contain more than one payload). Of
+ * course, when the bus-specific driver attempts to TX a message that
+ * is still open, it gets closed first.
+ *
+ * Gee, this is messy; well a picture. In the example below we have a
+ * partially full FIFO, with a closed message ready to be delivered
+ * (with a moved message header to make sure it is size-aligned to
+ * 16), TAIL room that was unusable (and thus is marked with a message
+ * header that says 'skip this') and at the head of the buffer, an
+ * incomplete message with a couple of payloads.
+ *
+ * N   ___________________________________________________
+ *    |                                                   |
+ *    |     TAIL room                                     |
+ *    |                                                   |
+ *    |  msg_hdr to skip (size |= 0x80000)                |
+ *    |---------------------------------------------------|-------
+ *    |                                                   |  /|\
+ *    |                                                   |   |
+ *    |  TX message padding                               |   |
+ *    |                                                   |   |
+ *    |                                                   |   |
+ *    |- - - - - - - - - - - - - - - - - - - - - - - - - -|   |
+ *    |                                                   |   |
+ *    |  payload 1                                        |   |
+ *    |                                                   | N * tx_block_size
+ *    |                                                   |   |
+ *    |- - - - - - - - - - - - - - - - - - - - - - - - - -|   |
+ *    |                                                   |   |
+ *    |  payload 1                                        |   |
+ *    |                                                   |   |
+ *    |                                                   |   |
+ *    |- - - - - - - - - - - - - - - - - - - - - - - - - -|- -|- - - -
+ *    |  padding 3                  /|\                   |   |   /|\
+ *    |  padding 2                   |                    |   |    |
+ *    |  pld 1                32 bytes (2 * 16)           |   |    |
+ *    |  pld 0                       |                    |   |    |
+ *    |  moved msg_hdr              \|/                   |  \|/   |
+ *    |- - - - - - - - - - - - - - - - - - - - - - - - - -|- - -   |
+ *    |                                                   |    _PLD_SIZE
+ *    |  unused                                           |        |
+ *    |                                                   |        |
+ *    |- - - - - - - - - - - - - - - - - - - - - - - - - -|        |
+ *    |  msg_hdr (size X)       [this message is closed]  |       \|/
+ *    |===================================================|========== <=== OUT
+ *    |                                                   |
+ *    |                                                   |
+ *    |                                                   |
+ *    |          Free rooom                               |
+ *    |                                                   |
+ *    |                                                   |
+ *    |                                                   |
+ *    |                                                   |
+ *    |                                                   |
+ *    |                                                   |
+ *    |                                                   |
+ *    |                                                   |
+ *    |                                                   |
+ *    |===================================================|========== <=== IN
+ *    |                                                   |
+ *    |                                                   |
+ *    |                                                   |
+ *    |                                                   |
+ *    |  payload 1                                        |
+ *    |                                                   |
+ *    |                                                   |
+ *    |- - - - - - - - - - - - - - - - - - - - - - - - - -|
+ *    |                                                   |
+ *    |  payload 0                                        |
+ *    |                                                   |
+ *    |                                                   |
+ *    |- - - - - - - - - - - - - - - - - - - - - - - - - -|
+ *    |  pld 11                     /|\                   |
+ *    |  ...                         |                    |
+ *    |  pld 1                64 bytes (2 * 16)           |
+ *    |  pld 0                       |                    |
+ *    |  msg_hdr (size X)           \|/ [message is open] |
+ * 0   ---------------------------------------------------
+ *
+ *
+ * ROADMAP
+ *
+ * i2400m_tx_setup()           Called by i2400m_setup
+ * i2400m_tx_release()         Called by i2400m_release()
+ *
+ *  i2400m_tx()                 Called to send data or control frames
+ *    i2400m_tx_fifo_push()     Allocates append-space in the FIFO
+ *    i2400m_tx_new()           Opens a new message in the FIFO
+ *    i2400m_tx_fits()          Checks if a new payload fits in the message
+ *    i2400m_tx_close()         Closes an open message in the FIFO
+ *    i2400m_tx_skip_tail()     Marks unusable FIFO tail space
+ *    i2400m->bus_tx_kick()
+ *
+ * Now i2400m->bus_tx_kick() is the the bus-specific driver backend
+ * implementation; that would do:
+ *
+ * i2400m->bus_tx_kick()
+ *   i2400m_tx_msg_get()	Gets first message ready to go
+ *   ...sends it...
+ *   i2400m_tx_msg_sent()       Ack the message is sent; repeat from
+ *                              _tx_msg_get() until it returns NULL
+ *                               (FIFO empty).
+ */
+#include <linux/netdevice.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include "i2400m.h"
+
+
+#define D_SUBMODULE tx
+#include "debug-levels.h"
+
+enum {
+	/**
+	 * TX Buffer size
+	 *
+	 * Doc says maximum transaction is 16KiB. If we had 16KiB en
+	 * route and 16KiB being queued, it boils down to needing
+	 * 32KiB.
+	 * 32KiB is insufficient for 1400 MTU, hence increasing
+	 * tx buffer size to 64KiB.
+	 */
+	I2400M_TX_BUF_SIZE = 65536,
+	/**
+	 * Message header and payload descriptors have to be 16
+	 * aligned (16 + 4 * N = 16 * M). If we take that average sent
+	 * packets are MTU size (~1400-~1500) it follows that we could
+	 * fit at most 10-11 payloads in one transaction. To meet the
+	 * alignment requirement, that means we need to leave space
+	 * for 12 (64 bytes). To simplify, we leave space for that. If
+	 * at the end there are less, we pad up to the nearest
+	 * multiple of 16.
+	 */
+	/*
+	 * According to Intel Wimax i3200, i5x50 and i6x50 specification
+	 * documents, the maximum number of payloads per message can be
+	 * up to 60. Increasing the number of payloads to 60 per message
+	 * helps to accommodate smaller payloads in a single transaction.
+	 */
+	I2400M_TX_PLD_MAX = 60,
+	I2400M_TX_PLD_SIZE = sizeof(struct i2400m_msg_hdr)
+	+ I2400M_TX_PLD_MAX * sizeof(struct i2400m_pld),
+	I2400M_TX_SKIP = 0x80000000,
+	/*
+	 * According to Intel Wimax i3200, i5x50 and i6x50 specification
+	 * documents, the maximum size of each message can be up to 16KiB.
+	 */
+	I2400M_TX_MSG_SIZE = 16384,
+};
+
+#define TAIL_FULL ((void *)~(unsigned long)NULL)
+
+/*
+ * Calculate how much tail room is available
+ *
+ * Note the trick here. This path is ONLY caleed for Case A (see
+ * i2400m_tx_fifo_push() below), where we have:
+ *
+ *       Case A
+ * N  ___________
+ *   | tail room |
+ *   |           |
+ *   |<-  IN   ->|
+ *   |           |
+ *   |   data    |
+ *   |           |
+ *   |<-  OUT  ->|
+ *   |           |
+ *   | head room |
+ * 0  -----------
+ *
+ * When calculating the tail_room, tx_in might get to be zero if
+ * i2400m->tx_in is right at the end of the buffer (really full
+ * buffer) if there is no head room. In this case, tail_room would be
+ * I2400M_TX_BUF_SIZE, although it is actually zero. Hence the final
+ * mod (%) operation. However, when doing this kind of optimization,
+ * i2400m->tx_in being zero would fail, so we treat is an a special
+ * case.
+ */
+static inline
+size_t __i2400m_tx_tail_room(struct i2400m *i2400m)
+{
+	size_t tail_room;
+	size_t tx_in;
+
+	if (unlikely(i2400m->tx_in == 0))
+		return I2400M_TX_BUF_SIZE;
+	tx_in = i2400m->tx_in % I2400M_TX_BUF_SIZE;
+	tail_room = I2400M_TX_BUF_SIZE - tx_in;
+	tail_room %= I2400M_TX_BUF_SIZE;
+	return tail_room;
+}
+
+
+/*
+ * Allocate @size bytes in the TX fifo, return a pointer to it
+ *
+ * @i2400m: device descriptor
+ * @size: size of the buffer we need to allocate
+ * @padding: ensure that there is at least this many bytes of free
+ *     contiguous space in the fifo. This is needed because later on
+ *     we might need to add padding.
+ * @try_head: specify either to allocate head room or tail room space
+ *     in the TX FIFO. This boolean is required to avoids a system hang
+ *     due to an infinite loop caused by i2400m_tx_fifo_push().
+ *     The caller must always try to allocate tail room space first by
+ *     calling this routine with try_head = 0. In case if there
+ *     is not enough tail room space but there is enough head room space,
+ *     (i2400m_tx_fifo_push() returns TAIL_FULL) try to allocate head
+ *     room space, by calling this routine again with try_head = 1.
+ *
+ * Returns:
+ *
+ *     Pointer to the allocated space. NULL if there is no
+ *     space. TAIL_FULL if there is no space at the tail but there is at
+ *     the head (Case B below).
+ *
+ * These are the two basic cases we need to keep an eye for -- it is
+ * much better explained in linux/kernel/kfifo.c, but this code
+ * basically does the same. No rocket science here.
+ *
+ *       Case A               Case B
+ * N  ___________          ___________
+ *   | tail room |        |   data    |
+ *   |           |        |           |
+ *   |<-  IN   ->|        |<-  OUT  ->|
+ *   |           |        |           |
+ *   |   data    |        |   room    |
+ *   |           |        |           |
+ *   |<-  OUT  ->|        |<-  IN   ->|
+ *   |           |        |           |
+ *   | head room |        |   data    |
+ * 0  -----------          -----------
+ *
+ * We allocate only *contiguous* space.
+ *
+ * We can allocate only from 'room'. In Case B, it is simple; in case
+ * A, we only try from the tail room; if it is not enough, we just
+ * fail and return TAIL_FULL and let the caller figure out if we wants to
+ * skip the tail room and try to allocate from the head.
+ *
+ * There is a corner case, wherein i2400m_tx_new() can get into
+ * an infinite loop calling i2400m_tx_fifo_push().
+ * In certain situations, tx_in would have reached on the top of TX FIFO
+ * and i2400m_tx_tail_room() returns 0, as described below:
+ *
+ * N  ___________ tail room is zero
+ *   |<-  IN   ->|
+ *   |           |
+ *   |           |
+ *   |           |
+ *   |   data    |
+ *   |<-  OUT  ->|
+ *   |           |
+ *   |           |
+ *   | head room |
+ * 0  -----------
+ * During such a time, where tail room is zero in the TX FIFO and if there
+ * is a request to add a payload to TX FIFO, which calls:
+ * i2400m_tx()
+ *         ->calls i2400m_tx_close()
+ *         ->calls i2400m_tx_skip_tail()
+ *         goto try_new;
+ *         ->calls i2400m_tx_new()
+ *                    |----> [try_head:]
+ *     infinite loop  |     ->calls i2400m_tx_fifo_push()
+ *                    |                if (tail_room < needed)
+ *                    |                   if (head_room => needed)
+ *                    |                       return TAIL_FULL;
+ *                    |<----  goto try_head;
+ *
+ * i2400m_tx() calls i2400m_tx_close() to close the message, since there
+ * is no tail room to accommodate the payload and calls
+ * i2400m_tx_skip_tail() to skip the tail space. Now i2400m_tx() calls
+ * i2400m_tx_new() to allocate space for new message header calling
+ * i2400m_tx_fifo_push() that returns TAIL_FULL, since there is no tail space
+ * to accommodate the message header, but there is enough head space.
+ * The i2400m_tx_new() keeps re-retrying by calling i2400m_tx_fifo_push()
+ * ending up in a loop causing system freeze.
+ *
+ * This corner case is avoided by using a try_head boolean,
+ * as an argument to i2400m_tx_fifo_push().
+ *
+ * Note:
+ *
+ *     Assumes i2400m->tx_lock is taken, and we use that as a barrier
+ *
+ *     The indexes keep increasing and we reset them to zero when we
+ *     pop data off the queue
+ */
+static
+void *i2400m_tx_fifo_push(struct i2400m *i2400m, size_t size,
+			  size_t padding, bool try_head)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	size_t room, tail_room, needed_size;
+	void *ptr;
+
+	needed_size = size + padding;
+	room = I2400M_TX_BUF_SIZE - (i2400m->tx_in - i2400m->tx_out);
+	if (room < needed_size)	{ /* this takes care of Case B */
+		d_printf(2, dev, "fifo push %zu/%zu: no space\n",
+			 size, padding);
+		return NULL;
+	}
+	/* Is there space at the tail? */
+	tail_room = __i2400m_tx_tail_room(i2400m);
+	if (!try_head && tail_room < needed_size) {
+		/*
+		 * If the tail room space is not enough to push the message
+		 * in the TX FIFO, then there are two possibilities:
+		 * 1. There is enough head room space to accommodate
+		 * this message in the TX FIFO.
+		 * 2. There is not enough space in the head room and
+		 * in tail room of the TX FIFO to accommodate the message.
+		 * In the case (1), return TAIL_FULL so that the caller
+		 * can figure out, if the caller wants to push the message
+		 * into the head room space.
+		 * In the case (2), return NULL, indicating that the TX FIFO
+		 * cannot accommodate the message.
+		 */
+		if (room - tail_room >= needed_size) {
+			d_printf(2, dev, "fifo push %zu/%zu: tail full\n",
+				 size, padding);
+			return TAIL_FULL;	/* There might be head space */
+		} else {
+			d_printf(2, dev, "fifo push %zu/%zu: no head space\n",
+				 size, padding);
+			return NULL;	/* There is no space */
+		}
+	}
+	ptr = i2400m->tx_buf + i2400m->tx_in % I2400M_TX_BUF_SIZE;
+	d_printf(2, dev, "fifo push %zu/%zu: at @%zu\n", size, padding,
+		 i2400m->tx_in % I2400M_TX_BUF_SIZE);
+	i2400m->tx_in += size;
+	return ptr;
+}
+
+
+/*
+ * Mark the tail of the FIFO buffer as 'to-skip'
+ *
+ * We should never hit the BUG_ON() because all the sizes we push to
+ * the FIFO are padded to be a multiple of 16 -- the size of *msg
+ * (I2400M_PL_PAD for the payloads, I2400M_TX_PLD_SIZE for the
+ * header).
+ *
+ * Tail room can get to be zero if a message was opened when there was
+ * space only for a header. _tx_close() will mark it as to-skip (as it
+ * will have no payloads) and there will be no more space to flush, so
+ * nothing has to be done here. This is probably cheaper than ensuring
+ * in _tx_new() that there is some space for payloads...as we could
+ * always possibly hit the same problem if the payload wouldn't fit.
+ *
+ * Note:
+ *
+ *     Assumes i2400m->tx_lock is taken, and we use that as a barrier
+ *
+ *     This path is only taken for Case A FIFO situations [see
+ *     i2400m_tx_fifo_push()]
+ */
+static
+void i2400m_tx_skip_tail(struct i2400m *i2400m)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	size_t tx_in = i2400m->tx_in % I2400M_TX_BUF_SIZE;
+	size_t tail_room = __i2400m_tx_tail_room(i2400m);
+	struct i2400m_msg_hdr *msg = i2400m->tx_buf + tx_in;
+	if (unlikely(tail_room == 0))
+		return;
+	BUG_ON(tail_room < sizeof(*msg));
+	msg->size = tail_room | I2400M_TX_SKIP;
+	d_printf(2, dev, "skip tail: skipping %zu bytes @%zu\n",
+		 tail_room, tx_in);
+	i2400m->tx_in += tail_room;
+}
+
+
+/*
+ * Check if a skb will fit in the TX queue's current active TX
+ * message (if there are still descriptors left unused).
+ *
+ * Returns:
+ *     0 if the message won't fit, 1 if it will.
+ *
+ * Note:
+ *
+ *     Assumes a TX message is active (i2400m->tx_msg).
+ *
+ *     Assumes i2400m->tx_lock is taken, and we use that as a barrier
+ */
+static
+unsigned i2400m_tx_fits(struct i2400m *i2400m)
+{
+	struct i2400m_msg_hdr *msg_hdr = i2400m->tx_msg;
+	return le16_to_cpu(msg_hdr->num_pls) < I2400M_TX_PLD_MAX;
+
+}
+
+
+/*
+ * Start a new TX message header in the queue.
+ *
+ * Reserve memory from the base FIFO engine and then just initialize
+ * the message header.
+ *
+ * We allocate the biggest TX message header we might need (one that'd
+ * fit I2400M_TX_PLD_MAX payloads) -- when it is closed it will be
+ * 'ironed it out' and the unneeded parts removed.
+ *
+ * NOTE:
+ *
+ *     Assumes that the previous message is CLOSED (eg: either
+ *     there was none or 'i2400m_tx_close()' was called on it).
+ *
+ *     Assumes i2400m->tx_lock is taken, and we use that as a barrier
+ */
+static
+void i2400m_tx_new(struct i2400m *i2400m)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	struct i2400m_msg_hdr *tx_msg;
+	bool try_head = false;
+	BUG_ON(i2400m->tx_msg != NULL);
+	/*
+	 * In certain situations, TX queue might have enough space to
+	 * accommodate the new message header I2400M_TX_PLD_SIZE, but
+	 * might not have enough space to accommodate the payloads.
+	 * Adding bus_tx_room_min padding while allocating a new TX message
+	 * increases the possibilities of including at least one payload of the
+	 * size <= bus_tx_room_min.
+	 */
+try_head:
+	tx_msg = i2400m_tx_fifo_push(i2400m, I2400M_TX_PLD_SIZE,
+				     i2400m->bus_tx_room_min, try_head);
+	if (tx_msg == NULL)
+		goto out;
+	else if (tx_msg == TAIL_FULL) {
+		i2400m_tx_skip_tail(i2400m);
+		d_printf(2, dev, "new TX message: tail full, trying head\n");
+		try_head = true;
+		goto try_head;
+	}
+	memset(tx_msg, 0, I2400M_TX_PLD_SIZE);
+	tx_msg->size = I2400M_TX_PLD_SIZE;
+out:
+	i2400m->tx_msg = tx_msg;
+	d_printf(2, dev, "new TX message: %p @%zu\n",
+		 tx_msg, (void *) tx_msg - i2400m->tx_buf);
+}
+
+
+/*
+ * Finalize the current TX message header
+ *
+ * Sets the message header to be at the proper location depending on
+ * how many descriptors we have (check documentation at the file's
+ * header for more info on that).
+ *
+ * Appends padding bytes to make sure the whole TX message (counting
+ * from the 'relocated' message header) is aligned to
+ * tx_block_size. We assume the _append() code has left enough space
+ * in the FIFO for that. If there are no payloads, just pass, as it
+ * won't be transferred.
+ *
+ * The amount of padding bytes depends on how many payloads are in the
+ * TX message, as the "msg header and payload descriptors" will be
+ * shifted up in the buffer.
+ */
+static
+void i2400m_tx_close(struct i2400m *i2400m)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	struct i2400m_msg_hdr *tx_msg = i2400m->tx_msg;
+	struct i2400m_msg_hdr *tx_msg_moved;
+	size_t aligned_size, padding, hdr_size;
+	void *pad_buf;
+	unsigned num_pls;
+
+	if (tx_msg->size & I2400M_TX_SKIP)	/* a skipper? nothing to do */
+		goto out;
+	num_pls = le16_to_cpu(tx_msg->num_pls);
+	/* We can get this situation when a new message was started
+	 * and there was no space to add payloads before hitting the
+	 tail (and taking padding into consideration). */
+	if (num_pls == 0) {
+		tx_msg->size |= I2400M_TX_SKIP;
+		goto out;
+	}
+	/* Relocate the message header
+	 *
+	 * Find the current header size, align it to 16 and if we need
+	 * to move it so the tail is next to the payloads, move it and
+	 * set the offset.
+	 *
+	 * If it moved, this header is good only for transmission; the
+	 * original one (it is kept if we moved) is still used to
+	 * figure out where the next TX message starts (and where the
+	 * offset to the moved header is).
+	 */
+	hdr_size = struct_size(tx_msg, pld, le16_to_cpu(tx_msg->num_pls));
+	hdr_size = ALIGN(hdr_size, I2400M_PL_ALIGN);
+	tx_msg->offset = I2400M_TX_PLD_SIZE - hdr_size;
+	tx_msg_moved = (void *) tx_msg + tx_msg->offset;
+	memmove(tx_msg_moved, tx_msg, hdr_size);
+	tx_msg_moved->size -= tx_msg->offset;
+	/*
+	 * Now figure out how much we have to add to the (moved!)
+	 * message so the size is a multiple of i2400m->bus_tx_block_size.
+	 */
+	aligned_size = ALIGN(tx_msg_moved->size, i2400m->bus_tx_block_size);
+	padding = aligned_size - tx_msg_moved->size;
+	if (padding > 0) {
+		pad_buf = i2400m_tx_fifo_push(i2400m, padding, 0, 0);
+		if (WARN_ON(pad_buf == NULL || pad_buf == TAIL_FULL)) {
+			/* This should not happen -- append should verify
+			 * there is always space left at least to append
+			 * tx_block_size */
+			dev_err(dev,
+				"SW BUG! Possible data leakage from memory the "
+				"device should not read for padding - "
+				"size %lu aligned_size %zu tx_buf %p in "
+				"%zu out %zu\n",
+				(unsigned long) tx_msg_moved->size,
+				aligned_size, i2400m->tx_buf, i2400m->tx_in,
+				i2400m->tx_out);
+		} else
+			memset(pad_buf, 0xad, padding);
+	}
+	tx_msg_moved->padding = cpu_to_le16(padding);
+	tx_msg_moved->size += padding;
+	if (tx_msg != tx_msg_moved)
+		tx_msg->size += padding;
+out:
+	i2400m->tx_msg = NULL;
+}
+
+
+/**
+ * i2400m_tx - send the data in a buffer to the device
+ *
+ * @buf: pointer to the buffer to transmit
+ *
+ * @buf_len: buffer size
+ *
+ * @pl_type: type of the payload we are sending.
+ *
+ * Returns:
+ *     0 if ok, < 0 errno code on error (-ENOSPC, if there is no more
+ *     room for the message in the queue).
+ *
+ * Appends the buffer to the TX FIFO and notifies the bus-specific
+ * part of the driver that there is new data ready to transmit.
+ * Once this function returns, the buffer has been copied, so it can
+ * be reused.
+ *
+ * The steps followed to append are explained in detail in the file
+ * header.
+ *
+ * Whenever we write to a message, we increase msg->size, so it
+ * reflects exactly how big the message is. This is needed so that if
+ * we concatenate two messages before they can be sent, the code that
+ * sends the messages can find the boundaries (and it will replace the
+ * size with the real barker before sending).
+ *
+ * Note:
+ *
+ *     Cold and warm reset payloads need to be sent as a single
+ *     payload, so we handle that.
+ */
+int i2400m_tx(struct i2400m *i2400m, const void *buf, size_t buf_len,
+	      enum i2400m_pt pl_type)
+{
+	int result = -ENOSPC;
+	struct device *dev = i2400m_dev(i2400m);
+	unsigned long flags;
+	size_t padded_len;
+	void *ptr;
+	bool try_head = false;
+	unsigned is_singleton = pl_type == I2400M_PT_RESET_WARM
+		|| pl_type == I2400M_PT_RESET_COLD;
+
+	d_fnstart(3, dev, "(i2400m %p skb %p [%zu bytes] pt %u)\n",
+		  i2400m, buf, buf_len, pl_type);
+	padded_len = ALIGN(buf_len, I2400M_PL_ALIGN);
+	d_printf(5, dev, "padded_len %zd buf_len %zd\n", padded_len, buf_len);
+	/* If there is no current TX message, create one; if the
+	 * current one is out of payload slots or we have a singleton,
+	 * close it and start a new one */
+	spin_lock_irqsave(&i2400m->tx_lock, flags);
+	/* If tx_buf is NULL, device is shutdown */
+	if (i2400m->tx_buf == NULL) {
+		result = -ESHUTDOWN;
+		goto error_tx_new;
+	}
+try_new:
+	if (unlikely(i2400m->tx_msg == NULL))
+		i2400m_tx_new(i2400m);
+	else if (unlikely(!i2400m_tx_fits(i2400m)
+			  || (is_singleton && i2400m->tx_msg->num_pls != 0))) {
+		d_printf(2, dev, "closing TX message (fits %u singleton "
+			 "%u num_pls %u)\n", i2400m_tx_fits(i2400m),
+			 is_singleton, i2400m->tx_msg->num_pls);
+		i2400m_tx_close(i2400m);
+		i2400m_tx_new(i2400m);
+	}
+	if (i2400m->tx_msg == NULL)
+		goto error_tx_new;
+	/*
+	 * Check if this skb will fit in the TX queue's current active
+	 * TX message. The total message size must not exceed the maximum
+	 * size of each message I2400M_TX_MSG_SIZE. If it exceeds,
+	 * close the current message and push this skb into the new message.
+	 */
+	if (i2400m->tx_msg->size + padded_len > I2400M_TX_MSG_SIZE) {
+		d_printf(2, dev, "TX: message too big, going new\n");
+		i2400m_tx_close(i2400m);
+		i2400m_tx_new(i2400m);
+	}
+	if (i2400m->tx_msg == NULL)
+		goto error_tx_new;
+	/* So we have a current message header; now append space for
+	 * the message -- if there is not enough, try the head */
+	ptr = i2400m_tx_fifo_push(i2400m, padded_len,
+				  i2400m->bus_tx_block_size, try_head);
+	if (ptr == TAIL_FULL) {	/* Tail is full, try head */
+		d_printf(2, dev, "pl append: tail full\n");
+		i2400m_tx_close(i2400m);
+		i2400m_tx_skip_tail(i2400m);
+		try_head = true;
+		goto try_new;
+	} else if (ptr == NULL) {	/* All full */
+		result = -ENOSPC;
+		d_printf(2, dev, "pl append: all full\n");
+	} else {			/* Got space, copy it, set padding */
+		struct i2400m_msg_hdr *tx_msg = i2400m->tx_msg;
+		unsigned num_pls = le16_to_cpu(tx_msg->num_pls);
+		memcpy(ptr, buf, buf_len);
+		memset(ptr + buf_len, 0xad, padded_len - buf_len);
+		i2400m_pld_set(&tx_msg->pld[num_pls], buf_len, pl_type);
+		d_printf(3, dev, "pld 0x%08x (type 0x%1x len 0x%04zx\n",
+			 le32_to_cpu(tx_msg->pld[num_pls].val),
+			 pl_type, buf_len);
+		tx_msg->num_pls = le16_to_cpu(num_pls+1);
+		tx_msg->size += padded_len;
+		d_printf(2, dev, "TX: appended %zu b (up to %u b) pl #%u\n",
+			padded_len, tx_msg->size, num_pls+1);
+		d_printf(2, dev,
+			 "TX: appended hdr @%zu %zu b pl #%u @%zu %zu/%zu b\n",
+			 (void *)tx_msg - i2400m->tx_buf, (size_t)tx_msg->size,
+			 num_pls+1, ptr - i2400m->tx_buf, buf_len, padded_len);
+		result = 0;
+		if (is_singleton)
+			i2400m_tx_close(i2400m);
+	}
+error_tx_new:
+	spin_unlock_irqrestore(&i2400m->tx_lock, flags);
+	/* kick in most cases, except when the TX subsys is down, as
+	 * it might free space */
+	if (likely(result != -ESHUTDOWN))
+		i2400m->bus_tx_kick(i2400m);
+	d_fnend(3, dev, "(i2400m %p skb %p [%zu bytes] pt %u) = %d\n",
+		i2400m, buf, buf_len, pl_type, result);
+	return result;
+}
+EXPORT_SYMBOL_GPL(i2400m_tx);
+
+
+/**
+ * i2400m_tx_msg_get - Get the first TX message in the FIFO to start sending it
+ *
+ * @i2400m: device descriptors
+ * @bus_size: where to place the size of the TX message
+ *
+ * Called by the bus-specific driver to get the first TX message at
+ * the FIF that is ready for transmission.
+ *
+ * It sets the state in @i2400m to indicate the bus-specific driver is
+ * transferring that message (i2400m->tx_msg_size).
+ *
+ * Once the transfer is completed, call i2400m_tx_msg_sent().
+ *
+ * Notes:
+ *
+ *     The size of the TX message to be transmitted might be smaller than
+ *     that of the TX message in the FIFO (in case the header was
+ *     shorter). Hence, we copy it in @bus_size, for the bus layer to
+ *     use. We keep the message's size in i2400m->tx_msg_size so that
+ *     when the bus later is done transferring we know how much to
+ *     advance the fifo.
+ *
+ *     We collect statistics here as all the data is available and we
+ *     assume it is going to work [see i2400m_tx_msg_sent()].
+ */
+struct i2400m_msg_hdr *i2400m_tx_msg_get(struct i2400m *i2400m,
+					 size_t *bus_size)
+{
+	struct device *dev = i2400m_dev(i2400m);
+	struct i2400m_msg_hdr *tx_msg, *tx_msg_moved;
+	unsigned long flags, pls;
+
+	d_fnstart(3, dev, "(i2400m %p bus_size %p)\n", i2400m, bus_size);
+	spin_lock_irqsave(&i2400m->tx_lock, flags);
+	tx_msg_moved = NULL;
+	if (i2400m->tx_buf == NULL)
+		goto out_unlock;
+skip:
+	tx_msg_moved = NULL;
+	if (i2400m->tx_in == i2400m->tx_out) {	/* Empty FIFO? */
+		i2400m->tx_in = 0;
+		i2400m->tx_out = 0;
+		d_printf(2, dev, "TX: FIFO empty: resetting\n");
+		goto out_unlock;
+	}
+	tx_msg = i2400m->tx_buf + i2400m->tx_out % I2400M_TX_BUF_SIZE;
+	if (tx_msg->size & I2400M_TX_SKIP) {	/* skip? */
+		d_printf(2, dev, "TX: skip: msg @%zu (%zu b)\n",
+			 i2400m->tx_out % I2400M_TX_BUF_SIZE,
+			 (size_t) tx_msg->size & ~I2400M_TX_SKIP);
+		i2400m->tx_out += tx_msg->size & ~I2400M_TX_SKIP;
+		goto skip;
+	}
+
+	if (tx_msg->num_pls == 0) {		/* No payloads? */
+		if (tx_msg == i2400m->tx_msg) {	/* open, we are done */
+			d_printf(2, dev,
+				 "TX: FIFO empty: open msg w/o payloads @%zu\n",
+				 (void *) tx_msg - i2400m->tx_buf);
+			tx_msg = NULL;
+			goto out_unlock;
+		} else {			/* closed, skip it */
+			d_printf(2, dev,
+				 "TX: skip msg w/o payloads @%zu (%zu b)\n",
+				 (void *) tx_msg - i2400m->tx_buf,
+				 (size_t) tx_msg->size);
+			i2400m->tx_out += tx_msg->size & ~I2400M_TX_SKIP;
+			goto skip;
+		}
+	}
+	if (tx_msg == i2400m->tx_msg)		/* open msg? */
+		i2400m_tx_close(i2400m);
+
+	/* Now we have a valid TX message (with payloads) to TX */
+	tx_msg_moved = (void *) tx_msg + tx_msg->offset;
+	i2400m->tx_msg_size = tx_msg->size;
+	*bus_size = tx_msg_moved->size;
+	d_printf(2, dev, "TX: pid %d msg hdr at @%zu offset +@%zu "
+		 "size %zu bus_size %zu\n",
+		 current->pid, (void *) tx_msg - i2400m->tx_buf,
+		 (size_t) tx_msg->offset, (size_t) tx_msg->size,
+		 (size_t) tx_msg_moved->size);
+	tx_msg_moved->barker = le32_to_cpu(I2400M_H2D_PREVIEW_BARKER);
+	tx_msg_moved->sequence = le32_to_cpu(i2400m->tx_sequence++);
+
+	pls = le32_to_cpu(tx_msg_moved->num_pls);
+	i2400m->tx_pl_num += pls;		/* Update stats */
+	if (pls > i2400m->tx_pl_max)
+		i2400m->tx_pl_max = pls;
+	if (pls < i2400m->tx_pl_min)
+		i2400m->tx_pl_min = pls;
+	i2400m->tx_num++;
+	i2400m->tx_size_acc += *bus_size;
+	if (*bus_size < i2400m->tx_size_min)
+		i2400m->tx_size_min = *bus_size;
+	if (*bus_size > i2400m->tx_size_max)
+		i2400m->tx_size_max = *bus_size;
+out_unlock:
+	spin_unlock_irqrestore(&i2400m->tx_lock, flags);
+	d_fnstart(3, dev, "(i2400m %p bus_size %p [%zu]) = %p\n",
+		  i2400m, bus_size, *bus_size, tx_msg_moved);
+	return tx_msg_moved;
+}
+EXPORT_SYMBOL_GPL(i2400m_tx_msg_get);
+
+
+/**
+ * i2400m_tx_msg_sent - indicate the transmission of a TX message
+ *
+ * @i2400m: device descriptor
+ *
+ * Called by the bus-specific driver when a message has been sent;
+ * this pops it from the FIFO; and as there is space, start the queue
+ * in case it was stopped.
+ *
+ * Should be called even if the message send failed and we are
+ * dropping this TX message.
+ */
+void i2400m_tx_msg_sent(struct i2400m *i2400m)
+{
+	unsigned n;
+	unsigned long flags;
+	struct device *dev = i2400m_dev(i2400m);
+
+	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
+	spin_lock_irqsave(&i2400m->tx_lock, flags);
+	if (i2400m->tx_buf == NULL)
+		goto out_unlock;
+	i2400m->tx_out += i2400m->tx_msg_size;
+	d_printf(2, dev, "TX: sent %zu b\n", (size_t) i2400m->tx_msg_size);
+	i2400m->tx_msg_size = 0;
+	BUG_ON(i2400m->tx_out > i2400m->tx_in);
+	/* level them FIFO markers off */
+	n = i2400m->tx_out / I2400M_TX_BUF_SIZE;
+	i2400m->tx_out %= I2400M_TX_BUF_SIZE;
+	i2400m->tx_in -= n * I2400M_TX_BUF_SIZE;
+out_unlock:
+	spin_unlock_irqrestore(&i2400m->tx_lock, flags);
+	d_fnend(3, dev, "(i2400m %p) = void\n", i2400m);
+}
+EXPORT_SYMBOL_GPL(i2400m_tx_msg_sent);
+
+
+/**
+ * i2400m_tx_setup - Initialize the TX queue and infrastructure
+ *
+ * Make sure we reset the TX sequence to zero, as when this function
+ * is called, the firmware has been just restarted. Same rational
+ * for tx_in, tx_out, tx_msg_size and tx_msg. We reset them since
+ * the memory for TX queue is reallocated.
+ */
+int i2400m_tx_setup(struct i2400m *i2400m)
+{
+	int result = 0;
+	void *tx_buf;
+	unsigned long flags;
+
+	/* Do this here only once -- can't do on
+	 * i2400m_hard_start_xmit() as we'll cause race conditions if
+	 * the WS was scheduled on another CPU */
+	INIT_WORK(&i2400m->wake_tx_ws, i2400m_wake_tx_work);
+
+	tx_buf = kmalloc(I2400M_TX_BUF_SIZE, GFP_ATOMIC);
+	if (tx_buf == NULL) {
+		result = -ENOMEM;
+		goto error_kmalloc;
+	}
+
+	/*
+	 * Fail the build if we can't fit at least two maximum size messages
+	 * on the TX FIFO [one being delivered while one is constructed].
+	 */
+	BUILD_BUG_ON(2 * I2400M_TX_MSG_SIZE > I2400M_TX_BUF_SIZE);
+	spin_lock_irqsave(&i2400m->tx_lock, flags);
+	i2400m->tx_sequence = 0;
+	i2400m->tx_in = 0;
+	i2400m->tx_out = 0;
+	i2400m->tx_msg_size = 0;
+	i2400m->tx_msg = NULL;
+	i2400m->tx_buf = tx_buf;
+	spin_unlock_irqrestore(&i2400m->tx_lock, flags);
+	/* Huh? the bus layer has to define this... */
+	BUG_ON(i2400m->bus_tx_block_size == 0);
+error_kmalloc:
+	return result;
+
+}
+
+
+/**
+ * i2400m_tx_release - Tear down the TX queue and infrastructure
+ */
+void i2400m_tx_release(struct i2400m *i2400m)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&i2400m->tx_lock, flags);
+	kfree(i2400m->tx_buf);
+	i2400m->tx_buf = NULL;
+	spin_unlock_irqrestore(&i2400m->tx_lock, flags);
+}
diff --git a/drivers/staging/wimax/i2400m/usb-debug-levels.h b/drivers/staging/wimax/i2400m/usb-debug-levels.h
new file mode 100644
index 000000000000..8fd0111560f6
--- /dev/null
+++ b/drivers/staging/wimax/i2400m/usb-debug-levels.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Intel Wireless WiMAX Connection 2400m
+ * Debug levels control file for the i2400m-usb module
+ *
+ * Copyright (C) 2007-2008 Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ */
+#ifndef __debug_levels__h__
+#define __debug_levels__h__
+
+/* Maximum compile and run time debug level for all submodules */
+#define D_MODULENAME i2400m_usb
+#define D_MASTER CONFIG_WIMAX_I2400M_DEBUG_LEVEL
+
+#include "../linux-wimax-debug.h"
+
+/* List of all the enabled modules */
+enum d_module {
+	D_SUBMODULE_DECLARE(usb),
+	D_SUBMODULE_DECLARE(fw),
+	D_SUBMODULE_DECLARE(notif),
+	D_SUBMODULE_DECLARE(rx),
+	D_SUBMODULE_DECLARE(tx),
+};
+
+
+#endif /* #ifndef __debug_levels__h__ */
diff --git a/drivers/staging/wimax/i2400m/usb-fw.c b/drivers/staging/wimax/i2400m/usb-fw.c
new file mode 100644
index 000000000000..27ab233650d5
--- /dev/null
+++ b/drivers/staging/wimax/i2400m/usb-fw.c
@@ -0,0 +1,365 @@
+/*
+ * Intel Wireless WiMAX Connection 2400m
+ * Firmware uploader's USB specifics
+ *
+ *
+ * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * Intel Corporation <linux-wimax@intel.com>
+ * Yanir Lubetkin <yanirx.lubetkin@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *  - Initial implementation
+ *
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *  - bus generic/specific split
+ *
+ * THE PROCEDURE
+ *
+ * See fw.c for the generic description of this procedure.
+ *
+ * This file implements only the USB specifics. It boils down to how
+ * to send a command and waiting for an acknowledgement from the
+ * device.
+ *
+ * This code (and process) is single threaded. It assumes it is the
+ * only thread poking around (guaranteed by fw.c).
+ *
+ * COMMAND EXECUTION
+ *
+ * A write URB is posted with the buffer to the bulk output endpoint.
+ *
+ * ACK RECEPTION
+ *
+ * We just post a URB to the notification endpoint and wait for
+ * data. We repeat until we get all the data we expect (as indicated
+ * by the call from the bus generic code).
+ *
+ * The data is not read from the bulk in endpoint for boot mode.
+ *
+ * ROADMAP
+ *
+ * i2400mu_bus_bm_cmd_send
+ *   i2400m_bm_cmd_prepare...
+ *   i2400mu_tx_bulk_out
+ *
+ * i2400mu_bus_bm_wait_for_ack
+ *   i2400m_notif_submit
+ */
+#include <linux/usb.h>
+#include <linux/gfp.h>
+#include "i2400m-usb.h"
+
+
+#define D_SUBMODULE fw
+#include "usb-debug-levels.h"
+
+
+/*
+ * Synchronous write to the device
+ *
+ * Takes care of updating EDC counts and thus, handle device errors.
+ */
+static
+ssize_t i2400mu_tx_bulk_out(struct i2400mu *i2400mu, void *buf, size_t buf_size)
+{
+	int result;
+	struct device *dev = &i2400mu->usb_iface->dev;
+	int len;
+	struct usb_endpoint_descriptor *epd;
+	int pipe, do_autopm = 1;
+
+	result = usb_autopm_get_interface(i2400mu->usb_iface);
+	if (result < 0) {
+		dev_err(dev, "BM-CMD: can't get autopm: %d\n", result);
+		do_autopm = 0;
+	}
+	epd = usb_get_epd(i2400mu->usb_iface, i2400mu->endpoint_cfg.bulk_out);
+	pipe = usb_sndbulkpipe(i2400mu->usb_dev, epd->bEndpointAddress);
+retry:
+	result = usb_bulk_msg(i2400mu->usb_dev, pipe, buf, buf_size, &len, 200);
+	switch (result) {
+	case 0:
+		if (len != buf_size) {
+			dev_err(dev, "BM-CMD: short write (%u B vs %zu "
+				"expected)\n", len, buf_size);
+			result = -EIO;
+			break;
+		}
+		result = len;
+		break;
+	case -EPIPE:
+		/*
+		 * Stall -- maybe the device is choking with our
+		 * requests. Clear it and give it some time. If they
+		 * happen to often, it might be another symptom, so we
+		 * reset.
+		 *
+		 * No error handling for usb_clear_halt(0; if it
+		 * works, the retry works; if it fails, this switch
+		 * does the error handling for us.
+		 */
+		if (edc_inc(&i2400mu->urb_edc,
+			    10 * EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME)) {
+			dev_err(dev, "BM-CMD: too many stalls in "
+				"URB; resetting device\n");
+			usb_queue_reset_device(i2400mu->usb_iface);
+		} else {
+			usb_clear_halt(i2400mu->usb_dev, pipe);
+			msleep(10);	/* give the device some time */
+			goto retry;
+		}
+		fallthrough;
+	case -EINVAL:			/* while removing driver */
+	case -ENODEV:			/* dev disconnect ... */
+	case -ENOENT:			/* just ignore it */
+	case -ESHUTDOWN:		/* and exit */
+	case -ECONNRESET:
+		result = -ESHUTDOWN;
+		break;
+	case -ETIMEDOUT:			/* bah... */
+		break;
+	default:				/* any other? */
+		if (edc_inc(&i2400mu->urb_edc,
+			    EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME)) {
+				dev_err(dev, "BM-CMD: maximum errors in "
+					"URB exceeded; resetting device\n");
+				usb_queue_reset_device(i2400mu->usb_iface);
+				result = -ENODEV;
+				break;
+		}
+		dev_err(dev, "BM-CMD: URB error %d, retrying\n",
+			result);
+		goto retry;
+	}
+	if (do_autopm)
+		usb_autopm_put_interface(i2400mu->usb_iface);
+	return result;
+}
+
+
+/*
+ * Send a boot-mode command over the bulk-out pipe
+ *
+ * Command can be a raw command, which requires no preparation (and
+ * which might not even be following the command format). Checks that
+ * the right amount of data was transferred.
+ *
+ * To satisfy USB requirements (no onstack, vmalloc or in data segment
+ * buffers), we copy the command to i2400m->bm_cmd_buf and send it from
+ * there.
+ *
+ * @flags: pass thru from i2400m_bm_cmd()
+ * @return: cmd_size if ok, < 0 errno code on error.
+ */
+ssize_t i2400mu_bus_bm_cmd_send(struct i2400m *i2400m,
+				const struct i2400m_bootrom_header *_cmd,
+				size_t cmd_size, int flags)
+{
+	ssize_t result;
+	struct device *dev = i2400m_dev(i2400m);
+	struct i2400mu *i2400mu = container_of(i2400m, struct i2400mu, i2400m);
+	int opcode = _cmd == NULL ? -1 : i2400m_brh_get_opcode(_cmd);
+	struct i2400m_bootrom_header *cmd;
+	size_t cmd_size_a = ALIGN(cmd_size, 16);	/* USB restriction */
+
+	d_fnstart(8, dev, "(i2400m %p cmd %p size %zu)\n",
+		  i2400m, _cmd, cmd_size);
+	result = -E2BIG;
+	if (cmd_size > I2400M_BM_CMD_BUF_SIZE)
+		goto error_too_big;
+	if (_cmd != i2400m->bm_cmd_buf)
+		memmove(i2400m->bm_cmd_buf, _cmd, cmd_size);
+	cmd = i2400m->bm_cmd_buf;
+	if (cmd_size_a > cmd_size)			/* Zero pad space */
+		memset(i2400m->bm_cmd_buf + cmd_size, 0, cmd_size_a - cmd_size);
+	if ((flags & I2400M_BM_CMD_RAW) == 0) {
+		if (WARN_ON(i2400m_brh_get_response_required(cmd) == 0))
+			dev_warn(dev, "SW BUG: response_required == 0\n");
+		i2400m_bm_cmd_prepare(cmd);
+	}
+	result = i2400mu_tx_bulk_out(i2400mu, i2400m->bm_cmd_buf, cmd_size);
+	if (result < 0) {
+		dev_err(dev, "boot-mode cmd %d: cannot send: %zd\n",
+			opcode, result);
+		goto error_cmd_send;
+	}
+	if (result != cmd_size) {		/* all was transferred? */
+		dev_err(dev, "boot-mode cmd %d: incomplete transfer "
+			"(%zd vs %zu submitted)\n",  opcode, result, cmd_size);
+		result = -EIO;
+		goto error_cmd_size;
+	}
+error_cmd_size:
+error_cmd_send:
+error_too_big:
+	d_fnend(8, dev, "(i2400m %p cmd %p size %zu) = %zd\n",
+		i2400m, _cmd, cmd_size, result);
+	return result;
+}
+
+
+static
+void __i2400mu_bm_notif_cb(struct urb *urb)
+{
+	complete(urb->context);
+}
+
+
+/*
+ * submit a read to the notification endpoint
+ *
+ * @i2400m: device descriptor
+ * @urb: urb to use
+ * @completion: completion variable to complete when done
+ *
+ * Data is always read to i2400m->bm_ack_buf
+ */
+static
+int i2400mu_notif_submit(struct i2400mu *i2400mu, struct urb *urb,
+			 struct completion *completion)
+{
+	struct i2400m *i2400m = &i2400mu->i2400m;
+	struct usb_endpoint_descriptor *epd;
+	int pipe;
+
+	epd = usb_get_epd(i2400mu->usb_iface,
+			  i2400mu->endpoint_cfg.notification);
+	pipe = usb_rcvintpipe(i2400mu->usb_dev, epd->bEndpointAddress);
+	usb_fill_int_urb(urb, i2400mu->usb_dev, pipe,
+			 i2400m->bm_ack_buf, I2400M_BM_ACK_BUF_SIZE,
+			 __i2400mu_bm_notif_cb, completion,
+			 epd->bInterval);
+	return usb_submit_urb(urb, GFP_KERNEL);
+}
+
+
+/*
+ * Read an ack from  the notification endpoint
+ *
+ * @i2400m:
+ * @_ack: pointer to where to store the read data
+ * @ack_size: how many bytes we should read
+ *
+ * Returns: < 0 errno code on error; otherwise, amount of received bytes.
+ *
+ * Submits a notification read, appends the read data to the given ack
+ * buffer and then repeats (until @ack_size bytes have been
+ * received).
+ */
+ssize_t i2400mu_bus_bm_wait_for_ack(struct i2400m *i2400m,
+				    struct i2400m_bootrom_header *_ack,
+				    size_t ack_size)
+{
+	ssize_t result = -ENOMEM;
+	struct device *dev = i2400m_dev(i2400m);
+	struct i2400mu *i2400mu = container_of(i2400m, struct i2400mu, i2400m);
+	struct urb notif_urb;
+	void *ack = _ack;
+	size_t offset, len;
+	long val;
+	int do_autopm = 1;
+	DECLARE_COMPLETION_ONSTACK(notif_completion);
+
+	d_fnstart(8, dev, "(i2400m %p ack %p size %zu)\n",
+		  i2400m, ack, ack_size);
+	BUG_ON(_ack == i2400m->bm_ack_buf);
+	result = usb_autopm_get_interface(i2400mu->usb_iface);
+	if (result < 0) {
+		dev_err(dev, "BM-ACK: can't get autopm: %d\n", (int) result);
+		do_autopm = 0;
+	}
+	usb_init_urb(&notif_urb);	/* ready notifications */
+	usb_get_urb(&notif_urb);
+	offset = 0;
+	while (offset < ack_size) {
+		init_completion(&notif_completion);
+		result = i2400mu_notif_submit(i2400mu, &notif_urb,
+					      &notif_completion);
+		if (result < 0)
+			goto error_notif_urb_submit;
+		val = wait_for_completion_interruptible_timeout(
+			&notif_completion, HZ);
+		if (val == 0) {
+			result = -ETIMEDOUT;
+			usb_kill_urb(&notif_urb);	/* Timedout */
+			goto error_notif_wait;
+		}
+		if (val == -ERESTARTSYS) {
+			result = -EINTR;		/* Interrupted */
+			usb_kill_urb(&notif_urb);
+			goto error_notif_wait;
+		}
+		result = notif_urb.status;		/* How was the ack? */
+		switch (result) {
+		case 0:
+			break;
+		case -EINVAL:			/* while removing driver */
+		case -ENODEV:			/* dev disconnect ... */
+		case -ENOENT:			/* just ignore it */
+		case -ESHUTDOWN:		/* and exit */
+		case -ECONNRESET:
+			result = -ESHUTDOWN;
+			goto error_dev_gone;
+		default:				/* any other? */
+			usb_kill_urb(&notif_urb);	/* Timedout */
+			if (edc_inc(&i2400mu->urb_edc,
+				    EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME))
+				goto error_exceeded;
+			dev_err(dev, "BM-ACK: URB error %d, "
+				"retrying\n", notif_urb.status);
+			continue;	/* retry */
+		}
+		if (notif_urb.actual_length == 0) {
+			d_printf(6, dev, "ZLP received, retrying\n");
+			continue;
+		}
+		/* Got data, append it to the buffer */
+		len = min(ack_size - offset, (size_t) notif_urb.actual_length);
+		memcpy(ack + offset, i2400m->bm_ack_buf, len);
+		offset += len;
+	}
+	result = offset;
+error_notif_urb_submit:
+error_notif_wait:
+error_dev_gone:
+out:
+	if (do_autopm)
+		usb_autopm_put_interface(i2400mu->usb_iface);
+	d_fnend(8, dev, "(i2400m %p ack %p size %zu) = %ld\n",
+		i2400m, ack, ack_size, (long) result);
+	usb_put_urb(&notif_urb);
+	return result;
+
+error_exceeded:
+	dev_err(dev, "bm: maximum errors in notification URB exceeded; "
+		"resetting device\n");
+	usb_queue_reset_device(i2400mu->usb_iface);
+	goto out;
+}
diff --git a/drivers/staging/wimax/i2400m/usb-notif.c b/drivers/staging/wimax/i2400m/usb-notif.c
new file mode 100644
index 000000000000..5d429f816125
--- /dev/null
+++ b/drivers/staging/wimax/i2400m/usb-notif.c
@@ -0,0 +1,258 @@
+/*
+ * Intel Wireless WiMAX Connection 2400m over USB
+ * Notification handling
+ *
+ *
+ * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * Intel Corporation <linux-wimax@intel.com>
+ * Yanir Lubetkin <yanirx.lubetkin@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *  - Initial implementation
+ *
+ *
+ * The notification endpoint is active when the device is not in boot
+ * mode; in here we just read and get notifications; based on those,
+ * we act to either reinitialize the device after a reboot or to
+ * submit a RX request.
+ *
+ * ROADMAP
+ *
+ * i2400mu_usb_notification_setup()
+ *
+ * i2400mu_usb_notification_release()
+ *
+ * i2400mu_usb_notification_cb()	Called when a URB is ready
+ *   i2400mu_notif_grok()
+ *     i2400m_is_boot_barker()
+ *     i2400m_dev_reset_handle()
+ *     i2400mu_rx_kick()
+ */
+#include <linux/usb.h>
+#include <linux/slab.h>
+#include "i2400m-usb.h"
+
+
+#define D_SUBMODULE notif
+#include "usb-debug-levels.h"
+
+
+static const
+__le32 i2400m_ZERO_BARKER[4] = { 0, 0, 0, 0 };
+
+
+/*
+ * Process a received notification
+ *
+ * In normal operation mode, we can only receive two types of payloads
+ * on the notification endpoint:
+ *
+ *   - a reboot barker, we do a bootstrap (the device has reseted).
+ *
+ *   - a block of zeroes: there is pending data in the IN endpoint
+ */
+static
+int i2400mu_notification_grok(struct i2400mu *i2400mu, const void *buf,
+				 size_t buf_len)
+{
+	int ret;
+	struct device *dev = &i2400mu->usb_iface->dev;
+	struct i2400m *i2400m = &i2400mu->i2400m;
+
+	d_fnstart(4, dev, "(i2400m %p buf %p buf_len %zu)\n",
+		  i2400mu, buf, buf_len);
+	ret = -EIO;
+	if (buf_len < sizeof(i2400m_ZERO_BARKER))
+		/* Not a bug, just ignore */
+		goto error_bad_size;
+	ret = 0;
+	if (!memcmp(i2400m_ZERO_BARKER, buf, sizeof(i2400m_ZERO_BARKER))) {
+		i2400mu_rx_kick(i2400mu);
+		goto out;
+	}
+	ret = i2400m_is_boot_barker(i2400m, buf, buf_len);
+	if (unlikely(ret >= 0))
+		ret = i2400m_dev_reset_handle(i2400m, "device rebooted");
+	else	/* Unknown or unexpected data in the notif message */
+		i2400m_unknown_barker(i2400m, buf, buf_len);
+error_bad_size:
+out:
+	d_fnend(4, dev, "(i2400m %p buf %p buf_len %zu) = %d\n",
+		i2400mu, buf, buf_len, ret);
+	return ret;
+}
+
+
+/*
+ * URB callback for the notification endpoint
+ *
+ * @urb: the urb received from the notification endpoint
+ *
+ * This function will just process the USB side of the transaction,
+ * checking everything is fine, pass the processing to
+ * i2400m_notification_grok() and resubmit the URB.
+ */
+static
+void i2400mu_notification_cb(struct urb *urb)
+{
+	int ret;
+	struct i2400mu *i2400mu = urb->context;
+	struct device *dev = &i2400mu->usb_iface->dev;
+
+	d_fnstart(4, dev, "(urb %p status %d actual_length %d)\n",
+		  urb, urb->status, urb->actual_length);
+	ret = urb->status;
+	switch (ret) {
+	case 0:
+		ret = i2400mu_notification_grok(i2400mu, urb->transfer_buffer,
+						urb->actual_length);
+		if (ret == -EIO && edc_inc(&i2400mu->urb_edc, EDC_MAX_ERRORS,
+					   EDC_ERROR_TIMEFRAME))
+			goto error_exceeded;
+		if (ret == -ENOMEM)	/* uff...power cycle? shutdown? */
+			goto error_exceeded;
+		break;
+	case -EINVAL:			/* while removing driver */
+	case -ENODEV:			/* dev disconnect ... */
+	case -ENOENT:			/* ditto */
+	case -ESHUTDOWN:		/* URB killed */
+	case -ECONNRESET:		/* disconnection */
+		goto out;		/* Notify around */
+	default:			/* Some error? */
+		if (edc_inc(&i2400mu->urb_edc,
+			    EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME))
+			goto error_exceeded;
+		dev_err(dev, "notification: URB error %d, retrying\n",
+			urb->status);
+	}
+	usb_mark_last_busy(i2400mu->usb_dev);
+	ret = usb_submit_urb(i2400mu->notif_urb, GFP_ATOMIC);
+	switch (ret) {
+	case 0:
+	case -EINVAL:			/* while removing driver */
+	case -ENODEV:			/* dev disconnect ... */
+	case -ENOENT:			/* ditto */
+	case -ESHUTDOWN:		/* URB killed */
+	case -ECONNRESET:		/* disconnection */
+		break;			/* just ignore */
+	default:			/* Some error? */
+		dev_err(dev, "notification: cannot submit URB: %d\n", ret);
+		goto error_submit;
+	}
+	d_fnend(4, dev, "(urb %p status %d actual_length %d) = void\n",
+		urb, urb->status, urb->actual_length);
+	return;
+
+error_exceeded:
+	dev_err(dev, "maximum errors in notification URB exceeded; "
+		"resetting device\n");
+error_submit:
+	usb_queue_reset_device(i2400mu->usb_iface);
+out:
+	d_fnend(4, dev, "(urb %p status %d actual_length %d) = void\n",
+		urb, urb->status, urb->actual_length);
+}
+
+
+/*
+ * setup the notification endpoint
+ *
+ * @i2400m: device descriptor
+ *
+ * This procedure prepares the notification urb and handler for receiving
+ * unsolicited barkers from the device.
+ */
+int i2400mu_notification_setup(struct i2400mu *i2400mu)
+{
+	struct device *dev = &i2400mu->usb_iface->dev;
+	int usb_pipe, ret = 0;
+	struct usb_endpoint_descriptor *epd;
+	char *buf;
+
+	d_fnstart(4, dev, "(i2400m %p)\n", i2400mu);
+	buf = kmalloc(I2400MU_MAX_NOTIFICATION_LEN, GFP_KERNEL | GFP_DMA);
+	if (buf == NULL) {
+		ret = -ENOMEM;
+		goto error_buf_alloc;
+	}
+
+	i2400mu->notif_urb = usb_alloc_urb(0, GFP_KERNEL);
+	if (!i2400mu->notif_urb) {
+		ret = -ENOMEM;
+		goto error_alloc_urb;
+	}
+	epd = usb_get_epd(i2400mu->usb_iface,
+			  i2400mu->endpoint_cfg.notification);
+	usb_pipe = usb_rcvintpipe(i2400mu->usb_dev, epd->bEndpointAddress);
+	usb_fill_int_urb(i2400mu->notif_urb, i2400mu->usb_dev, usb_pipe,
+			 buf, I2400MU_MAX_NOTIFICATION_LEN,
+			 i2400mu_notification_cb, i2400mu, epd->bInterval);
+	ret = usb_submit_urb(i2400mu->notif_urb, GFP_KERNEL);
+	if (ret != 0) {
+		dev_err(dev, "notification: cannot submit URB: %d\n", ret);
+		goto error_submit;
+	}
+	d_fnend(4, dev, "(i2400m %p) = %d\n", i2400mu, ret);
+	return ret;
+
+error_submit:
+	usb_free_urb(i2400mu->notif_urb);
+error_alloc_urb:
+	kfree(buf);
+error_buf_alloc:
+	d_fnend(4, dev, "(i2400m %p) = %d\n", i2400mu, ret);
+	return ret;
+}
+
+
+/*
+ * Tear down of the notification mechanism
+ *
+ * @i2400m: device descriptor
+ *
+ * Kill the interrupt endpoint urb, free any allocated resources.
+ *
+ * We need to check if we have done it before as for example,
+ * _suspend() call this; if after a suspend() we get a _disconnect()
+ * (as the case is when hibernating), nothing bad happens.
+ */
+void i2400mu_notification_release(struct i2400mu *i2400mu)
+{
+	struct device *dev = &i2400mu->usb_iface->dev;
+
+	d_fnstart(4, dev, "(i2400mu %p)\n", i2400mu);
+	if (i2400mu->notif_urb != NULL) {
+		usb_kill_urb(i2400mu->notif_urb);
+		kfree(i2400mu->notif_urb->transfer_buffer);
+		usb_free_urb(i2400mu->notif_urb);
+		i2400mu->notif_urb = NULL;
+	}
+	d_fnend(4, dev, "(i2400mu %p)\n", i2400mu);
+}
diff --git a/drivers/staging/wimax/i2400m/usb-rx.c b/drivers/staging/wimax/i2400m/usb-rx.c
new file mode 100644
index 000000000000..5b64bda7d9e7
--- /dev/null
+++ b/drivers/staging/wimax/i2400m/usb-rx.c
@@ -0,0 +1,462 @@
+/*
+ * Intel Wireless WiMAX Connection 2400m
+ * USB RX handling
+ *
+ *
+ * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * Intel Corporation <linux-wimax@intel.com>
+ * Yanir Lubetkin <yanirx.lubetkin@intel.com>
+ *  - Initial implementation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *  - Use skb_clone(), break up processing in chunks
+ *  - Split transport/device specific
+ *  - Make buffer size dynamic to exert less memory pressure
+ *
+ *
+ * This handles the RX path on USB.
+ *
+ * When a notification is received that says 'there is RX data ready',
+ * we call i2400mu_rx_kick(); that wakes up the RX kthread, which
+ * reads a buffer from USB and passes it to i2400m_rx() in the generic
+ * handling code. The RX buffer has an specific format that is
+ * described in rx.c.
+ *
+ * We use a kernel thread in a loop because:
+ *
+ *  - we want to be able to call the USB power management get/put
+ *    functions (blocking) before each transaction.
+ *
+ *  - We might get a lot of notifications and we don't want to submit
+ *    a zillion reads; by serializing, we are throttling.
+ *
+ *  - RX data processing can get heavy enough so that it is not
+ *    appropriate for doing it in the USB callback; thus we run it in a
+ *    process context.
+ *
+ * We provide a read buffer of an arbitrary size (short of a page); if
+ * the callback reports -EOVERFLOW, it means it was too small, so we
+ * just double the size and retry (being careful to append, as
+ * sometimes the device provided some data). Every now and then we
+ * check if the average packet size is smaller than the current packet
+ * size and if so, we halve it. At the end, the size of the
+ * preallocated buffer should be following the average received
+ * transaction size, adapting dynamically to it.
+ *
+ * ROADMAP
+ *
+ * i2400mu_rx_kick()		   Called from notif.c when we get a
+ *   			           'data ready' notification
+ * i2400mu_rxd()                   Kernel RX daemon
+ *   i2400mu_rx()                  Receive USB data
+ *   i2400m_rx()                   Send data to generic i2400m RX handling
+ *
+ * i2400mu_rx_setup()              called from i2400mu_bus_dev_start()
+ *
+ * i2400mu_rx_release()            called from i2400mu_bus_dev_stop()
+ */
+#include <linux/workqueue.h>
+#include <linux/slab.h>
+#include <linux/usb.h>
+#include "i2400m-usb.h"
+
+
+#define D_SUBMODULE rx
+#include "usb-debug-levels.h"
+
+/*
+ * Dynamic RX size
+ *
+ * We can't let the rx_size be a multiple of 512 bytes (the RX
+ * endpoint's max packet size). On some USB host controllers (we
+ * haven't been able to fully characterize which), if the device is
+ * about to send (for example) X bytes and we only post a buffer to
+ * receive n*512, it will fail to mark that as babble (so that
+ * i2400mu_rx() [case -EOVERFLOW] can resize the buffer and get the
+ * rest).
+ *
+ * So on growing or shrinking, if it is a multiple of the
+ * maxpacketsize, we remove some (instead of incresing some, so in a
+ * buddy allocator we try to waste less space).
+ *
+ * Note we also need a hook for this on i2400mu_rx() -- when we do the
+ * first read, we are sure we won't hit this spot because
+ * i240mm->rx_size has been set properly. However, if we have to
+ * double because of -EOVERFLOW, when we launch the read to get the
+ * rest of the data, we *have* to make sure that also is not a
+ * multiple of the max_pkt_size.
+ */
+
+static
+size_t i2400mu_rx_size_grow(struct i2400mu *i2400mu)
+{
+	struct device *dev = &i2400mu->usb_iface->dev;
+	size_t rx_size;
+	const size_t max_pkt_size = 512;
+
+	rx_size = 2 * i2400mu->rx_size;
+	if (rx_size % max_pkt_size == 0) {
+		rx_size -= 8;
+		d_printf(1, dev,
+			 "RX: expected size grew to %zu [adjusted -8] "
+			 "from %zu\n",
+			 rx_size, i2400mu->rx_size);
+	} else
+		d_printf(1, dev,
+			 "RX: expected size grew to %zu from %zu\n",
+			 rx_size, i2400mu->rx_size);
+	return rx_size;
+}
+
+
+static
+void i2400mu_rx_size_maybe_shrink(struct i2400mu *i2400mu)
+{
+	const size_t max_pkt_size = 512;
+	struct device *dev = &i2400mu->usb_iface->dev;
+
+	if (unlikely(i2400mu->rx_size_cnt >= 100
+		     && i2400mu->rx_size_auto_shrink)) {
+		size_t avg_rx_size =
+			i2400mu->rx_size_acc / i2400mu->rx_size_cnt;
+		size_t new_rx_size = i2400mu->rx_size / 2;
+		if (avg_rx_size < new_rx_size) {
+			if (new_rx_size % max_pkt_size == 0) {
+				new_rx_size -= 8;
+				d_printf(1, dev,
+					 "RX: expected size shrank to %zu "
+					 "[adjusted -8] from %zu\n",
+					 new_rx_size, i2400mu->rx_size);
+			} else
+				d_printf(1, dev,
+					 "RX: expected size shrank to %zu "
+					 "from %zu\n",
+					 new_rx_size, i2400mu->rx_size);
+			i2400mu->rx_size = new_rx_size;
+			i2400mu->rx_size_cnt = 0;
+			i2400mu->rx_size_acc = i2400mu->rx_size;
+		}
+	}
+}
+
+/*
+ * Receive a message with payloads from the USB bus into an skb
+ *
+ * @i2400mu: USB device descriptor
+ * @rx_skb: skb where to place the received message
+ *
+ * Deals with all the USB-specifics of receiving, dynamically
+ * increasing the buffer size if so needed. Returns the payload in the
+ * skb, ready to process. On a zero-length packet, we retry.
+ *
+ * On soft USB errors, we retry (until they become too frequent and
+ * then are promoted to hard); on hard USB errors, we reset the
+ * device. On other errors (skb realloacation, we just drop it and
+ * hope for the next invocation to solve it).
+ *
+ * Returns: pointer to the skb if ok, ERR_PTR on error.
+ *   NOTE: this function might realloc the skb (if it is too small),
+ *   so always update with the one returned.
+ *   ERR_PTR() is < 0 on error.
+ *   Will return NULL if it cannot reallocate -- this can be
+ *   considered a transient retryable error.
+ */
+static
+struct sk_buff *i2400mu_rx(struct i2400mu *i2400mu, struct sk_buff *rx_skb)
+{
+	int result = 0;
+	struct device *dev = &i2400mu->usb_iface->dev;
+	int usb_pipe, read_size, rx_size, do_autopm;
+	struct usb_endpoint_descriptor *epd;
+	const size_t max_pkt_size = 512;
+
+	d_fnstart(4, dev, "(i2400mu %p)\n", i2400mu);
+	do_autopm = atomic_read(&i2400mu->do_autopm);
+	result = do_autopm ?
+		usb_autopm_get_interface(i2400mu->usb_iface) : 0;
+	if (result < 0) {
+		dev_err(dev, "RX: can't get autopm: %d\n", result);
+		do_autopm = 0;
+	}
+	epd = usb_get_epd(i2400mu->usb_iface, i2400mu->endpoint_cfg.bulk_in);
+	usb_pipe = usb_rcvbulkpipe(i2400mu->usb_dev, epd->bEndpointAddress);
+retry:
+	rx_size = skb_end_pointer(rx_skb) - rx_skb->data - rx_skb->len;
+	if (unlikely(rx_size % max_pkt_size == 0)) {
+		rx_size -= 8;
+		d_printf(1, dev, "RX: rx_size adapted to %d [-8]\n", rx_size);
+	}
+	result = usb_bulk_msg(
+		i2400mu->usb_dev, usb_pipe, rx_skb->data + rx_skb->len,
+		rx_size, &read_size, 200);
+	usb_mark_last_busy(i2400mu->usb_dev);
+	switch (result) {
+	case 0:
+		if (read_size == 0)
+			goto retry;	/* ZLP, just resubmit */
+		skb_put(rx_skb, read_size);
+		break;
+	case -EPIPE:
+		/*
+		 * Stall -- maybe the device is choking with our
+		 * requests. Clear it and give it some time. If they
+		 * happen to often, it might be another symptom, so we
+		 * reset.
+		 *
+		 * No error handling for usb_clear_halt(0; if it
+		 * works, the retry works; if it fails, this switch
+		 * does the error handling for us.
+		 */
+		if (edc_inc(&i2400mu->urb_edc,
+			    10 * EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME)) {
+			dev_err(dev, "BM-CMD: too many stalls in "
+				"URB; resetting device\n");
+			goto do_reset;
+		}
+		usb_clear_halt(i2400mu->usb_dev, usb_pipe);
+		msleep(10);	/* give the device some time */
+		goto retry;
+	case -EINVAL:			/* while removing driver */
+	case -ENODEV:			/* dev disconnect ... */
+	case -ENOENT:			/* just ignore it */
+	case -ESHUTDOWN:
+	case -ECONNRESET:
+		break;
+	case -EOVERFLOW: {		/* too small, reallocate */
+		struct sk_buff *new_skb;
+		rx_size = i2400mu_rx_size_grow(i2400mu);
+		if (rx_size <= (1 << 16))	/* cap it */
+			i2400mu->rx_size = rx_size;
+		else if (printk_ratelimit()) {
+			dev_err(dev, "BUG? rx_size up to %d\n", rx_size);
+			result = -EINVAL;
+			goto out;
+		}
+		skb_put(rx_skb, read_size);
+		new_skb = skb_copy_expand(rx_skb, 0, rx_size - rx_skb->len,
+					  GFP_KERNEL);
+		if (new_skb == NULL) {
+			kfree_skb(rx_skb);
+			rx_skb = NULL;
+			goto out;	/* drop it...*/
+		}
+		kfree_skb(rx_skb);
+		rx_skb = new_skb;
+		i2400mu->rx_size_cnt = 0;
+		i2400mu->rx_size_acc = i2400mu->rx_size;
+		d_printf(1, dev, "RX: size changed to %d, received %d, "
+			 "copied %d, capacity %ld\n",
+			 rx_size, read_size, rx_skb->len,
+			 (long) skb_end_offset(new_skb));
+		goto retry;
+	}
+		/* In most cases, it happens due to the hardware scheduling a
+		 * read when there was no data - unfortunately, we have no way
+		 * to tell this timeout from a USB timeout. So we just ignore
+		 * it. */
+	case -ETIMEDOUT:
+		dev_err(dev, "RX: timeout: %d\n", result);
+		result = 0;
+		break;
+	default:			/* Any error */
+		if (edc_inc(&i2400mu->urb_edc,
+			    EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME))
+			goto error_reset;
+		dev_err(dev, "RX: error receiving URB: %d, retrying\n", result);
+		goto retry;
+	}
+out:
+	if (do_autopm)
+		usb_autopm_put_interface(i2400mu->usb_iface);
+	d_fnend(4, dev, "(i2400mu %p) = %p\n", i2400mu, rx_skb);
+	return rx_skb;
+
+error_reset:
+	dev_err(dev, "RX: maximum errors in URB exceeded; "
+		"resetting device\n");
+do_reset:
+	usb_queue_reset_device(i2400mu->usb_iface);
+	rx_skb = ERR_PTR(result);
+	goto out;
+}
+
+
+/*
+ * Kernel thread for USB reception of data
+ *
+ * This thread waits for a kick; once kicked, it will allocate an skb
+ * and receive a single message to it from USB (using
+ * i2400mu_rx()). Once received, it is passed to the generic i2400m RX
+ * code for processing.
+ *
+ * When done processing, it runs some dirty statistics to verify if
+ * the last 100 messages received were smaller than half of the
+ * current RX buffer size. In that case, the RX buffer size is
+ * halved. This will helps lowering the pressure on the memory
+ * allocator.
+ *
+ * Hard errors force the thread to exit.
+ */
+static
+int i2400mu_rxd(void *_i2400mu)
+{
+	int result = 0;
+	struct i2400mu *i2400mu = _i2400mu;
+	struct i2400m *i2400m = &i2400mu->i2400m;
+	struct device *dev = &i2400mu->usb_iface->dev;
+	struct net_device *net_dev = i2400m->wimax_dev.net_dev;
+	size_t pending;
+	int rx_size;
+	struct sk_buff *rx_skb;
+	unsigned long flags;
+
+	d_fnstart(4, dev, "(i2400mu %p)\n", i2400mu);
+	spin_lock_irqsave(&i2400m->rx_lock, flags);
+	BUG_ON(i2400mu->rx_kthread != NULL);
+	i2400mu->rx_kthread = current;
+	spin_unlock_irqrestore(&i2400m->rx_lock, flags);
+	while (1) {
+		d_printf(2, dev, "RX: waiting for messages\n");
+		pending = 0;
+		wait_event_interruptible(
+			i2400mu->rx_wq,
+			(kthread_should_stop()	/* check this first! */
+			 || (pending = atomic_read(&i2400mu->rx_pending_count)))
+			);
+		if (kthread_should_stop())
+			break;
+		if (pending == 0)
+			continue;
+		rx_size = i2400mu->rx_size;
+		d_printf(2, dev, "RX: reading up to %d bytes\n", rx_size);
+		rx_skb = __netdev_alloc_skb(net_dev, rx_size, GFP_KERNEL);
+		if (rx_skb == NULL) {
+			dev_err(dev, "RX: can't allocate skb [%d bytes]\n",
+				rx_size);
+			msleep(50);	/* give it some time? */
+			continue;
+		}
+
+		/* Receive the message with the payloads */
+		rx_skb = i2400mu_rx(i2400mu, rx_skb);
+		result = PTR_ERR(rx_skb);
+		if (IS_ERR(rx_skb))
+			goto out;
+		atomic_dec(&i2400mu->rx_pending_count);
+		if (rx_skb == NULL || rx_skb->len == 0) {
+			/* some "ignorable" condition */
+			kfree_skb(rx_skb);
+			continue;
+		}
+
+		/* Deliver the message to the generic i2400m code */
+		i2400mu->rx_size_cnt++;
+		i2400mu->rx_size_acc += rx_skb->len;
+		result = i2400m_rx(i2400m, rx_skb);
+		if (result == -EIO
+		    && edc_inc(&i2400mu->urb_edc,
+			       EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME)) {
+			goto error_reset;
+		}
+
+		/* Maybe adjust RX buffer size */
+		i2400mu_rx_size_maybe_shrink(i2400mu);
+	}
+	result = 0;
+out:
+	spin_lock_irqsave(&i2400m->rx_lock, flags);
+	i2400mu->rx_kthread = NULL;
+	spin_unlock_irqrestore(&i2400m->rx_lock, flags);
+	d_fnend(4, dev, "(i2400mu %p) = %d\n", i2400mu, result);
+	return result;
+
+error_reset:
+	dev_err(dev, "RX: maximum errors in received buffer exceeded; "
+		"resetting device\n");
+	usb_queue_reset_device(i2400mu->usb_iface);
+	goto out;
+}
+
+
+/*
+ * Start reading from the device
+ *
+ * @i2400m: device instance
+ *
+ * Notify the RX thread that there is data pending.
+ */
+void i2400mu_rx_kick(struct i2400mu *i2400mu)
+{
+	struct i2400m *i2400m = &i2400mu->i2400m;
+	struct device *dev = &i2400mu->usb_iface->dev;
+
+	d_fnstart(3, dev, "(i2400mu %p)\n", i2400m);
+	atomic_inc(&i2400mu->rx_pending_count);
+	wake_up_all(&i2400mu->rx_wq);
+	d_fnend(3, dev, "(i2400m %p) = void\n", i2400m);
+}
+
+
+int i2400mu_rx_setup(struct i2400mu *i2400mu)
+{
+	int result = 0;
+	struct i2400m *i2400m = &i2400mu->i2400m;
+	struct device *dev = &i2400mu->usb_iface->dev;
+	struct wimax_dev *wimax_dev = &i2400m->wimax_dev;
+	struct task_struct *kthread;
+
+	kthread = kthread_run(i2400mu_rxd, i2400mu, "%s-rx",
+			      wimax_dev->name);
+	/* the kthread function sets i2400mu->rx_thread */
+	if (IS_ERR(kthread)) {
+		result = PTR_ERR(kthread);
+		dev_err(dev, "RX: cannot start thread: %d\n", result);
+	}
+	return result;
+}
+
+
+void i2400mu_rx_release(struct i2400mu *i2400mu)
+{
+	unsigned long flags;
+	struct i2400m *i2400m = &i2400mu->i2400m;
+	struct device *dev = i2400m_dev(i2400m);
+	struct task_struct *kthread;
+
+	spin_lock_irqsave(&i2400m->rx_lock, flags);
+	kthread = i2400mu->rx_kthread;
+	i2400mu->rx_kthread = NULL;
+	spin_unlock_irqrestore(&i2400m->rx_lock, flags);
+	if (kthread)
+		kthread_stop(kthread);
+	else
+		d_printf(1, dev, "RX: kthread had already exited\n");
+}
+
diff --git a/drivers/staging/wimax/i2400m/usb-tx.c b/drivers/staging/wimax/i2400m/usb-tx.c
new file mode 100644
index 000000000000..3ba9d70cca1b
--- /dev/null
+++ b/drivers/staging/wimax/i2400m/usb-tx.c
@@ -0,0 +1,273 @@
+/*
+ * Intel Wireless WiMAX Connection 2400m
+ * USB specific TX handling
+ *
+ *
+ * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * Intel Corporation <linux-wimax@intel.com>
+ * Yanir Lubetkin <yanirx.lubetkin@intel.com>
+ *  - Initial implementation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *  - Split transport/device specific
+ *
+ *
+ * Takes the TX messages in the i2400m's driver TX FIFO and sends them
+ * to the device until there are no more.
+ *
+ * If we fail sending the message, we just drop it. There isn't much
+ * we can do at this point. We could also retry, but the USB stack has
+ * already retried and still failed, so there is not much of a
+ * point. As well, most of the traffic is network, which has recovery
+ * methods for dropped packets.
+ *
+ * For sending we just obtain a FIFO buffer to send, send it to the
+ * USB bulk out, tell the TX FIFO code we have sent it; query for
+ * another one, etc... until done.
+ *
+ * We use a thread so we can call usb_autopm_enable() and
+ * usb_autopm_disable() for each transaction; this way when the device
+ * goes idle, it will suspend. It also has less overhead than a
+ * dedicated workqueue, as it is being used for a single task.
+ *
+ * ROADMAP
+ *
+ * i2400mu_tx_setup()
+ * i2400mu_tx_release()
+ *
+ * i2400mu_bus_tx_kick()	- Called by the tx.c code when there
+ *                                is new data in the FIFO.
+ * i2400mu_txd()
+ *   i2400m_tx_msg_get()
+ *   i2400m_tx_msg_sent()
+ */
+#include "i2400m-usb.h"
+
+
+#define D_SUBMODULE tx
+#include "usb-debug-levels.h"
+
+
+/*
+ * Get the next TX message in the TX FIFO and send it to the device
+ *
+ * Note that any iteration consumes a message to be sent, no matter if
+ * it succeeds or fails (we have no real way to retry or complain).
+ *
+ * Return: 0 if ok, < 0 errno code on hard error.
+ */
+static
+int i2400mu_tx(struct i2400mu *i2400mu, struct i2400m_msg_hdr *tx_msg,
+	       size_t tx_msg_size)
+{
+	int result = 0;
+	struct i2400m *i2400m = &i2400mu->i2400m;
+	struct device *dev = &i2400mu->usb_iface->dev;
+	int usb_pipe, sent_size, do_autopm;
+	struct usb_endpoint_descriptor *epd;
+
+	d_fnstart(4, dev, "(i2400mu %p)\n", i2400mu);
+	do_autopm = atomic_read(&i2400mu->do_autopm);
+	result = do_autopm ?
+		usb_autopm_get_interface(i2400mu->usb_iface) : 0;
+	if (result < 0) {
+		dev_err(dev, "TX: can't get autopm: %d\n", result);
+		do_autopm = 0;
+	}
+	epd = usb_get_epd(i2400mu->usb_iface, i2400mu->endpoint_cfg.bulk_out);
+	usb_pipe = usb_sndbulkpipe(i2400mu->usb_dev, epd->bEndpointAddress);
+retry:
+	result = usb_bulk_msg(i2400mu->usb_dev, usb_pipe,
+			      tx_msg, tx_msg_size, &sent_size, 200);
+	usb_mark_last_busy(i2400mu->usb_dev);
+	switch (result) {
+	case 0:
+		if (sent_size != tx_msg_size) {	/* Too short? drop it */
+			dev_err(dev, "TX: short write (%d B vs %zu "
+				"expected)\n", sent_size, tx_msg_size);
+			result = -EIO;
+		}
+		break;
+	case -EPIPE:
+		/*
+		 * Stall -- maybe the device is choking with our
+		 * requests. Clear it and give it some time. If they
+		 * happen to often, it might be another symptom, so we
+		 * reset.
+		 *
+		 * No error handling for usb_clear_halt(0; if it
+		 * works, the retry works; if it fails, this switch
+		 * does the error handling for us.
+		 */
+		if (edc_inc(&i2400mu->urb_edc,
+			    10 * EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME)) {
+			dev_err(dev, "BM-CMD: too many stalls in "
+				"URB; resetting device\n");
+			usb_queue_reset_device(i2400mu->usb_iface);
+		} else {
+			usb_clear_halt(i2400mu->usb_dev, usb_pipe);
+			msleep(10);	/* give the device some time */
+			goto retry;
+		}
+		fallthrough;
+	case -EINVAL:			/* while removing driver */
+	case -ENODEV:			/* dev disconnect ... */
+	case -ENOENT:			/* just ignore it */
+	case -ESHUTDOWN:		/* and exit */
+	case -ECONNRESET:
+		result = -ESHUTDOWN;
+		break;
+	default:			/* Some error? */
+		if (edc_inc(&i2400mu->urb_edc,
+			    EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME)) {
+			dev_err(dev, "TX: maximum errors in URB "
+				"exceeded; resetting device\n");
+			usb_queue_reset_device(i2400mu->usb_iface);
+		} else {
+			dev_err(dev, "TX: cannot send URB; retrying. "
+				"tx_msg @%zu %zu B [%d sent]: %d\n",
+				(void *) tx_msg - i2400m->tx_buf,
+				tx_msg_size, sent_size, result);
+			goto retry;
+		}
+	}
+	if (do_autopm)
+		usb_autopm_put_interface(i2400mu->usb_iface);
+	d_fnend(4, dev, "(i2400mu %p) = result\n", i2400mu);
+	return result;
+}
+
+
+/*
+ * Get the next TX message in the TX FIFO and send it to the device
+ *
+ * Note we exit the loop if i2400mu_tx() fails; that function only
+ * fails on hard error (failing to tx a buffer not being one of them,
+ * see its doc).
+ *
+ * Return: 0
+ */
+static
+int i2400mu_txd(void *_i2400mu)
+{
+	struct i2400mu *i2400mu = _i2400mu;
+	struct i2400m *i2400m = &i2400mu->i2400m;
+	struct device *dev = &i2400mu->usb_iface->dev;
+	struct i2400m_msg_hdr *tx_msg;
+	size_t tx_msg_size;
+	unsigned long flags;
+
+	d_fnstart(4, dev, "(i2400mu %p)\n", i2400mu);
+
+	spin_lock_irqsave(&i2400m->tx_lock, flags);
+	BUG_ON(i2400mu->tx_kthread != NULL);
+	i2400mu->tx_kthread = current;
+	spin_unlock_irqrestore(&i2400m->tx_lock, flags);
+
+	while (1) {
+		d_printf(2, dev, "TX: waiting for messages\n");
+		tx_msg = NULL;
+		wait_event_interruptible(
+			i2400mu->tx_wq,
+			(kthread_should_stop()	/* check this first! */
+			 || (tx_msg = i2400m_tx_msg_get(i2400m, &tx_msg_size)))
+			);
+		if (kthread_should_stop())
+			break;
+		WARN_ON(tx_msg == NULL);	/* should not happen...*/
+		d_printf(2, dev, "TX: submitting %zu bytes\n", tx_msg_size);
+		d_dump(5, dev, tx_msg, tx_msg_size);
+		/* Yeah, we ignore errors ... not much we can do */
+		i2400mu_tx(i2400mu, tx_msg, tx_msg_size);
+		i2400m_tx_msg_sent(i2400m);	/* ack it, advance the FIFO */
+	}
+
+	spin_lock_irqsave(&i2400m->tx_lock, flags);
+	i2400mu->tx_kthread = NULL;
+	spin_unlock_irqrestore(&i2400m->tx_lock, flags);
+
+	d_fnend(4, dev, "(i2400mu %p)\n", i2400mu);
+	return 0;
+}
+
+
+/*
+ * i2400m TX engine notifies us that there is data in the FIFO ready
+ * for TX
+ *
+ * If there is a URB in flight, don't do anything; when it finishes,
+ * it will see there is data in the FIFO and send it. Else, just
+ * submit a write.
+ */
+void i2400mu_bus_tx_kick(struct i2400m *i2400m)
+{
+	struct i2400mu *i2400mu = container_of(i2400m, struct i2400mu, i2400m);
+	struct device *dev = &i2400mu->usb_iface->dev;
+
+	d_fnstart(3, dev, "(i2400m %p) = void\n", i2400m);
+	wake_up_all(&i2400mu->tx_wq);
+	d_fnend(3, dev, "(i2400m %p) = void\n", i2400m);
+}
+
+
+int i2400mu_tx_setup(struct i2400mu *i2400mu)
+{
+	int result = 0;
+	struct i2400m *i2400m = &i2400mu->i2400m;
+	struct device *dev = &i2400mu->usb_iface->dev;
+	struct wimax_dev *wimax_dev = &i2400m->wimax_dev;
+	struct task_struct *kthread;
+
+	kthread = kthread_run(i2400mu_txd, i2400mu, "%s-tx",
+			      wimax_dev->name);
+	/* the kthread function sets i2400mu->tx_thread */
+	if (IS_ERR(kthread)) {
+		result = PTR_ERR(kthread);
+		dev_err(dev, "TX: cannot start thread: %d\n", result);
+	}
+	return result;
+}
+
+void i2400mu_tx_release(struct i2400mu *i2400mu)
+{
+	unsigned long flags;
+	struct i2400m *i2400m = &i2400mu->i2400m;
+	struct device *dev = i2400m_dev(i2400m);
+	struct task_struct *kthread;
+
+	spin_lock_irqsave(&i2400m->tx_lock, flags);
+	kthread = i2400mu->tx_kthread;
+	i2400mu->tx_kthread = NULL;
+	spin_unlock_irqrestore(&i2400m->tx_lock, flags);
+	if (kthread)
+		kthread_stop(kthread);
+	else
+		d_printf(1, dev, "TX: kthread had already exited\n");
+}
diff --git a/drivers/staging/wimax/i2400m/usb.c b/drivers/staging/wimax/i2400m/usb.c
new file mode 100644
index 000000000000..3b84dd7b5567
--- /dev/null
+++ b/drivers/staging/wimax/i2400m/usb.c
@@ -0,0 +1,764 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Intel Wireless WiMAX Connection 2400m
+ * Linux driver model glue for USB device, reset & fw upload
+ *
+ * Copyright (C) 2007-2008 Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ * Yanir Lubetkin <yanirx.lubetkin@intel.com>
+ *
+ * See i2400m-usb.h for a general description of this driver.
+ *
+ * This file implements driver model glue, and hook ups for the
+ * generic driver to implement the bus-specific functions (device
+ * communication setup/tear down, firmware upload and resetting).
+ *
+ * ROADMAP
+ *
+ * i2400mu_probe()
+ *   alloc_netdev()...
+ *     i2400mu_netdev_setup()
+ *       i2400mu_init()
+ *       i2400m_netdev_setup()
+ *   i2400m_setup()...
+ *
+ * i2400mu_disconnect
+ *   i2400m_release()
+ *   free_netdev()
+ *
+ * i2400mu_suspend()
+ *   i2400m_cmd_enter_powersave()
+ *   i2400mu_notification_release()
+ *
+ * i2400mu_resume()
+ *   i2400mu_notification_setup()
+ *
+ * i2400mu_bus_dev_start()        Called by i2400m_dev_start() [who is
+ *   i2400mu_tx_setup()           called by i2400m_setup()]
+ *   i2400mu_rx_setup()
+ *   i2400mu_notification_setup()
+ *
+ * i2400mu_bus_dev_stop()         Called by i2400m_dev_stop() [who is
+ *   i2400mu_notification_release()  called by i2400m_release()]
+ *   i2400mu_rx_release()
+ *   i2400mu_tx_release()
+ *
+ * i2400mu_bus_reset()            Called by i2400m_reset
+ *   __i2400mu_reset()
+ *     __i2400mu_send_barker()
+ *   usb_reset_device()
+ */
+#include "i2400m-usb.h"
+#include "linux-wimax-i2400m.h"
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+
+#define D_SUBMODULE usb
+#include "usb-debug-levels.h"
+
+static char i2400mu_debug_params[128];
+module_param_string(debug, i2400mu_debug_params, sizeof(i2400mu_debug_params),
+		    0644);
+MODULE_PARM_DESC(debug,
+		 "String of space-separated NAME:VALUE pairs, where NAMEs "
+		 "are the different debug submodules and VALUE are the "
+		 "initial debug value to set.");
+
+/* Our firmware file name */
+static const char *i2400mu_bus_fw_names_5x50[] = {
+#define I2400MU_FW_FILE_NAME_v1_5 "i2400m-fw-usb-1.5.sbcf"
+	I2400MU_FW_FILE_NAME_v1_5,
+#define I2400MU_FW_FILE_NAME_v1_4 "i2400m-fw-usb-1.4.sbcf"
+	I2400MU_FW_FILE_NAME_v1_4,
+	NULL,
+};
+
+
+static const char *i2400mu_bus_fw_names_6050[] = {
+#define I6050U_FW_FILE_NAME_v1_5 "i6050-fw-usb-1.5.sbcf"
+	I6050U_FW_FILE_NAME_v1_5,
+	NULL,
+};
+
+
+static
+int i2400mu_bus_dev_start(struct i2400m *i2400m)
+{
+	int result;
+	struct i2400mu *i2400mu = container_of(i2400m, struct i2400mu, i2400m);
+	struct device *dev = &i2400mu->usb_iface->dev;
+
+	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
+	result = i2400mu_tx_setup(i2400mu);
+	if (result < 0)
+		goto error_usb_tx_setup;
+	result = i2400mu_rx_setup(i2400mu);
+	if (result < 0)
+		goto error_usb_rx_setup;
+	result = i2400mu_notification_setup(i2400mu);
+	if (result < 0)
+		goto error_notif_setup;
+	d_fnend(3, dev, "(i2400m %p) = %d\n", i2400m, result);
+	return result;
+
+error_notif_setup:
+	i2400mu_rx_release(i2400mu);
+error_usb_rx_setup:
+	i2400mu_tx_release(i2400mu);
+error_usb_tx_setup:
+	d_fnend(3, dev, "(i2400m %p) = void\n", i2400m);
+	return result;
+}
+
+
+static
+void i2400mu_bus_dev_stop(struct i2400m *i2400m)
+{
+	struct i2400mu *i2400mu = container_of(i2400m, struct i2400mu, i2400m);
+	struct device *dev = &i2400mu->usb_iface->dev;
+
+	d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
+	i2400mu_notification_release(i2400mu);
+	i2400mu_rx_release(i2400mu);
+	i2400mu_tx_release(i2400mu);
+	d_fnend(3, dev, "(i2400m %p) = void\n", i2400m);
+}
+
+
+/*
+ * Sends a barker buffer to the device
+ *
+ * This helper will allocate a kmalloced buffer and use it to transmit
+ * (then free it). Reason for this is that other arches cannot use
+ * stack/vmalloc/text areas for DMA transfers.
+ *
+ * Error recovery here is simpler: anything is considered a hard error
+ * and will move the reset code to use a last-resort bus-based reset.
+ */
+static
+int __i2400mu_send_barker(struct i2400mu *i2400mu,
+			  const __le32 *barker,
+			  size_t barker_size,
+			  unsigned endpoint)
+{
+	struct usb_endpoint_descriptor *epd = NULL;
+	int pipe, actual_len, ret;
+	struct device *dev = &i2400mu->usb_iface->dev;
+	void *buffer;
+	int do_autopm = 1;
+
+	ret = usb_autopm_get_interface(i2400mu->usb_iface);
+	if (ret < 0) {
+		dev_err(dev, "RESET: can't get autopm: %d\n", ret);
+		do_autopm = 0;
+	}
+	ret = -ENOMEM;
+	buffer = kmalloc(barker_size, GFP_KERNEL);
+	if (buffer == NULL)
+		goto error_kzalloc;
+	epd = usb_get_epd(i2400mu->usb_iface, endpoint);
+	pipe = usb_sndbulkpipe(i2400mu->usb_dev, epd->bEndpointAddress);
+	memcpy(buffer, barker, barker_size);
+retry:
+	ret = usb_bulk_msg(i2400mu->usb_dev, pipe, buffer, barker_size,
+			   &actual_len, 200);
+	switch (ret) {
+	case 0:
+		if (actual_len != barker_size) {	/* Too short? drop it */
+			dev_err(dev, "E: %s: short write (%d B vs %zu "
+				"expected)\n",
+				__func__, actual_len, barker_size);
+			ret = -EIO;
+		}
+		break;
+	case -EPIPE:
+		/*
+		 * Stall -- maybe the device is choking with our
+		 * requests. Clear it and give it some time. If they
+		 * happen to often, it might be another symptom, so we
+		 * reset.
+		 *
+		 * No error handling for usb_clear_halt(0; if it
+		 * works, the retry works; if it fails, this switch
+		 * does the error handling for us.
+		 */
+		if (edc_inc(&i2400mu->urb_edc,
+			    10 * EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME)) {
+			dev_err(dev, "E: %s: too many stalls in "
+				"URB; resetting device\n", __func__);
+			usb_queue_reset_device(i2400mu->usb_iface);
+			/* fallthrough */
+		} else {
+			usb_clear_halt(i2400mu->usb_dev, pipe);
+			msleep(10);	/* give the device some time */
+			goto retry;
+		}
+		fallthrough;
+	case -EINVAL:			/* while removing driver */
+	case -ENODEV:			/* dev disconnect ... */
+	case -ENOENT:			/* just ignore it */
+	case -ESHUTDOWN:		/* and exit */
+	case -ECONNRESET:
+		ret = -ESHUTDOWN;
+		break;
+	default:			/* Some error? */
+		if (edc_inc(&i2400mu->urb_edc,
+			    EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME)) {
+			dev_err(dev, "E: %s: maximum errors in URB "
+				"exceeded; resetting device\n",
+				__func__);
+			usb_queue_reset_device(i2400mu->usb_iface);
+		} else {
+			dev_warn(dev, "W: %s: cannot send URB: %d\n",
+				 __func__, ret);
+			goto retry;
+		}
+	}
+	kfree(buffer);
+error_kzalloc:
+	if (do_autopm)
+		usb_autopm_put_interface(i2400mu->usb_iface);
+	return ret;
+}
+
+
+/*
+ * Reset a device at different levels (warm, cold or bus)
+ *
+ * @i2400m: device descriptor
+ * @reset_type: soft, warm or bus reset (I2400M_RT_WARM/SOFT/BUS)
+ *
+ * Warm and cold resets get a USB reset if they fail.
+ *
+ * Warm reset:
+ *
+ * The device will be fully reset internally, but won't be
+ * disconnected from the USB bus (so no reenumeration will
+ * happen). Firmware upload will be necessary.
+ *
+ * The device will send a reboot barker in the notification endpoint
+ * that will trigger the driver to reinitialize the state
+ * automatically from notif.c:i2400m_notification_grok() into
+ * i2400m_dev_bootstrap_delayed().
+ *
+ * Cold and bus (USB) reset:
+ *
+ * The device will be fully reset internally, disconnected from the
+ * USB bus an a reenumeration will happen. Firmware upload will be
+ * necessary. Thus, we don't do any locking or struct
+ * reinitialization, as we are going to be fully disconnected and
+ * reenumerated.
+ *
+ * Note we need to return -ENODEV if a warm reset was requested and we
+ * had to resort to a bus reset. See i2400m_op_reset(), wimax_reset()
+ * and wimax_dev->op_reset.
+ *
+ * WARNING: no driver state saved/fixed
+ */
+static
+int i2400mu_bus_reset(struct i2400m *i2400m, enum i2400m_reset_type rt)
+{
+	int result;
+	struct i2400mu *i2400mu =
+		container_of(i2400m, struct i2400mu, i2400m);
+	struct device *dev = i2400m_dev(i2400m);
+	static const __le32 i2400m_WARM_BOOT_BARKER[4] = {
+		cpu_to_le32(I2400M_WARM_RESET_BARKER),
+		cpu_to_le32(I2400M_WARM_RESET_BARKER),
+		cpu_to_le32(I2400M_WARM_RESET_BARKER),
+		cpu_to_le32(I2400M_WARM_RESET_BARKER),
+	};
+	static const __le32 i2400m_COLD_BOOT_BARKER[4] = {
+		cpu_to_le32(I2400M_COLD_RESET_BARKER),
+		cpu_to_le32(I2400M_COLD_RESET_BARKER),
+		cpu_to_le32(I2400M_COLD_RESET_BARKER),
+		cpu_to_le32(I2400M_COLD_RESET_BARKER),
+	};
+
+	d_fnstart(3, dev, "(i2400m %p rt %u)\n", i2400m, rt);
+	if (rt == I2400M_RT_WARM)
+		result = __i2400mu_send_barker(
+			i2400mu, i2400m_WARM_BOOT_BARKER,
+			sizeof(i2400m_WARM_BOOT_BARKER),
+			i2400mu->endpoint_cfg.bulk_out);
+	else if (rt == I2400M_RT_COLD)
+		result = __i2400mu_send_barker(
+			i2400mu, i2400m_COLD_BOOT_BARKER,
+			sizeof(i2400m_COLD_BOOT_BARKER),
+			i2400mu->endpoint_cfg.reset_cold);
+	else if (rt == I2400M_RT_BUS) {
+		result = usb_reset_device(i2400mu->usb_dev);
+		switch (result) {
+		case 0:
+		case -EINVAL:	/* device is gone */
+		case -ENODEV:
+		case -ENOENT:
+		case -ESHUTDOWN:
+			result = 0;
+			break;	/* We assume the device is disconnected */
+		default:
+			dev_err(dev, "USB reset failed (%d), giving up!\n",
+				result);
+		}
+	} else {
+		result = -EINVAL;	/* shut gcc up in certain arches */
+		BUG();
+	}
+	if (result < 0
+	    && result != -EINVAL	/* device is gone */
+	    && rt != I2400M_RT_BUS) {
+		/*
+		 * Things failed -- resort to lower level reset, that
+		 * we queue in another context; the reason for this is
+		 * that the pre and post reset functionality requires
+		 * the i2400m->init_mutex; RT_WARM and RT_COLD can
+		 * come from areas where i2400m->init_mutex is taken.
+		 */
+		dev_err(dev, "%s reset failed (%d); trying USB reset\n",
+			rt == I2400M_RT_WARM ? "warm" : "cold", result);
+		usb_queue_reset_device(i2400mu->usb_iface);
+		result = -ENODEV;
+	}
+	d_fnend(3, dev, "(i2400m %p rt %u) = %d\n", i2400m, rt, result);
+	return result;
+}
+
+static void i2400mu_get_drvinfo(struct net_device *net_dev,
+                                struct ethtool_drvinfo *info)
+{
+	struct i2400m *i2400m = net_dev_to_i2400m(net_dev);
+	struct i2400mu *i2400mu = container_of(i2400m, struct i2400mu, i2400m);
+	struct usb_device *udev = i2400mu->usb_dev;
+
+	strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
+	strlcpy(info->fw_version, i2400m->fw_name ? : "",
+		sizeof(info->fw_version));
+	usb_make_path(udev, info->bus_info, sizeof(info->bus_info));
+}
+
+static const struct ethtool_ops i2400mu_ethtool_ops = {
+	.get_drvinfo = i2400mu_get_drvinfo,
+	.get_link = ethtool_op_get_link,
+};
+
+static
+void i2400mu_netdev_setup(struct net_device *net_dev)
+{
+	struct i2400m *i2400m = net_dev_to_i2400m(net_dev);
+	struct i2400mu *i2400mu = container_of(i2400m, struct i2400mu, i2400m);
+	i2400mu_init(i2400mu);
+	i2400m_netdev_setup(net_dev);
+	net_dev->ethtool_ops = &i2400mu_ethtool_ops;
+}
+
+
+/*
+ * Debug levels control; see debug.h
+ */
+struct d_level D_LEVEL[] = {
+	D_SUBMODULE_DEFINE(usb),
+	D_SUBMODULE_DEFINE(fw),
+	D_SUBMODULE_DEFINE(notif),
+	D_SUBMODULE_DEFINE(rx),
+	D_SUBMODULE_DEFINE(tx),
+};
+size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL);
+
+static
+void i2400mu_debugfs_add(struct i2400mu *i2400mu)
+{
+	struct dentry *dentry = i2400mu->i2400m.wimax_dev.debugfs_dentry;
+
+	dentry = debugfs_create_dir("i2400m-usb", dentry);
+	i2400mu->debugfs_dentry = dentry;
+
+	d_level_register_debugfs("dl_", usb, dentry);
+	d_level_register_debugfs("dl_", fw, dentry);
+	d_level_register_debugfs("dl_", notif, dentry);
+	d_level_register_debugfs("dl_", rx, dentry);
+	d_level_register_debugfs("dl_", tx, dentry);
+
+	/* Don't touch these if you don't know what you are doing */
+	debugfs_create_u8("rx_size_auto_shrink", 0600, dentry,
+			  &i2400mu->rx_size_auto_shrink);
+
+	debugfs_create_size_t("rx_size", 0600, dentry, &i2400mu->rx_size);
+}
+
+
+static struct device_type i2400mu_type = {
+	.name	= "wimax",
+};
+
+/*
+ * Probe a i2400m interface and register it
+ *
+ * @iface:   USB interface to link to
+ * @id:      USB class/subclass/protocol id
+ * @returns: 0 if ok, < 0 errno code on error.
+ *
+ * Alloc a net device, initialize the bus-specific details and then
+ * calls the bus-generic initialization routine. That will register
+ * the wimax and netdev devices, upload the firmware [using
+ * _bus_bm_*()], call _bus_dev_start() to finalize the setup of the
+ * communication with the device and then will start to talk to it to
+ * finnish setting it up.
+ */
+static
+int i2400mu_probe(struct usb_interface *iface,
+		  const struct usb_device_id *id)
+{
+	int result;
+	struct net_device *net_dev;
+	struct device *dev = &iface->dev;
+	struct i2400m *i2400m;
+	struct i2400mu *i2400mu;
+	struct usb_device *usb_dev = interface_to_usbdev(iface);
+
+	if (iface->cur_altsetting->desc.bNumEndpoints < 4)
+		return -ENODEV;
+
+	if (usb_dev->speed != USB_SPEED_HIGH)
+		dev_err(dev, "device not connected as high speed\n");
+
+	/* Allocate instance [calls i2400m_netdev_setup() on it]. */
+	result = -ENOMEM;
+	net_dev = alloc_netdev(sizeof(*i2400mu), "wmx%d", NET_NAME_UNKNOWN,
+			       i2400mu_netdev_setup);
+	if (net_dev == NULL) {
+		dev_err(dev, "no memory for network device instance\n");
+		goto error_alloc_netdev;
+	}
+	SET_NETDEV_DEV(net_dev, dev);
+	SET_NETDEV_DEVTYPE(net_dev, &i2400mu_type);
+	i2400m = net_dev_to_i2400m(net_dev);
+	i2400mu = container_of(i2400m, struct i2400mu, i2400m);
+	i2400m->wimax_dev.net_dev = net_dev;
+	i2400mu->usb_dev = usb_get_dev(usb_dev);
+	i2400mu->usb_iface = iface;
+	usb_set_intfdata(iface, i2400mu);
+
+	i2400m->bus_tx_block_size = I2400MU_BLK_SIZE;
+	/*
+	 * Room required in the Tx queue for USB message to accommodate
+	 * a smallest payload while allocating header space is 16 bytes.
+	 * Adding this room  for the new tx message increases the
+	 * possibilities of including any payload with size <= 16 bytes.
+	 */
+	i2400m->bus_tx_room_min = I2400MU_BLK_SIZE;
+	i2400m->bus_pl_size_max = I2400MU_PL_SIZE_MAX;
+	i2400m->bus_setup = NULL;
+	i2400m->bus_dev_start = i2400mu_bus_dev_start;
+	i2400m->bus_dev_stop = i2400mu_bus_dev_stop;
+	i2400m->bus_release = NULL;
+	i2400m->bus_tx_kick = i2400mu_bus_tx_kick;
+	i2400m->bus_reset = i2400mu_bus_reset;
+	i2400m->bus_bm_retries = I2400M_USB_BOOT_RETRIES;
+	i2400m->bus_bm_cmd_send = i2400mu_bus_bm_cmd_send;
+	i2400m->bus_bm_wait_for_ack = i2400mu_bus_bm_wait_for_ack;
+	i2400m->bus_bm_mac_addr_impaired = 0;
+
+	switch (id->idProduct) {
+	case USB_DEVICE_ID_I6050:
+	case USB_DEVICE_ID_I6050_2:
+	case USB_DEVICE_ID_I6150:
+	case USB_DEVICE_ID_I6150_2:
+	case USB_DEVICE_ID_I6150_3:
+	case USB_DEVICE_ID_I6250:
+		i2400mu->i6050 = 1;
+		break;
+	default:
+		break;
+	}
+
+	if (i2400mu->i6050) {
+		i2400m->bus_fw_names = i2400mu_bus_fw_names_6050;
+		i2400mu->endpoint_cfg.bulk_out = 0;
+		i2400mu->endpoint_cfg.notification = 3;
+		i2400mu->endpoint_cfg.reset_cold = 2;
+		i2400mu->endpoint_cfg.bulk_in = 1;
+	} else {
+		i2400m->bus_fw_names = i2400mu_bus_fw_names_5x50;
+		i2400mu->endpoint_cfg.bulk_out = 0;
+		i2400mu->endpoint_cfg.notification = 1;
+		i2400mu->endpoint_cfg.reset_cold = 2;
+		i2400mu->endpoint_cfg.bulk_in = 3;
+	}
+#ifdef CONFIG_PM
+	iface->needs_remote_wakeup = 1;		/* autosuspend (15s delay) */
+	device_init_wakeup(dev, 1);
+	pm_runtime_set_autosuspend_delay(&usb_dev->dev, 15000);
+	usb_enable_autosuspend(usb_dev);
+#endif
+
+	result = i2400m_setup(i2400m, I2400M_BRI_MAC_REINIT);
+	if (result < 0) {
+		dev_err(dev, "cannot setup device: %d\n", result);
+		goto error_setup;
+	}
+	i2400mu_debugfs_add(i2400mu);
+	return 0;
+
+error_setup:
+	usb_set_intfdata(iface, NULL);
+	usb_put_dev(i2400mu->usb_dev);
+	free_netdev(net_dev);
+error_alloc_netdev:
+	return result;
+}
+
+
+/*
+ * Disconnect a i2400m from the system.
+ *
+ * i2400m_stop() has been called before, so al the rx and tx contexts
+ * have been taken down already. Make sure the queue is stopped,
+ * unregister netdev and i2400m, free and kill.
+ */
+static
+void i2400mu_disconnect(struct usb_interface *iface)
+{
+	struct i2400mu *i2400mu = usb_get_intfdata(iface);
+	struct i2400m *i2400m = &i2400mu->i2400m;
+	struct net_device *net_dev = i2400m->wimax_dev.net_dev;
+	struct device *dev = &iface->dev;
+
+	d_fnstart(3, dev, "(iface %p i2400m %p)\n", iface, i2400m);
+
+	debugfs_remove_recursive(i2400mu->debugfs_dentry);
+	i2400m_release(i2400m);
+	usb_set_intfdata(iface, NULL);
+	usb_put_dev(i2400mu->usb_dev);
+	free_netdev(net_dev);
+	d_fnend(3, dev, "(iface %p i2400m %p) = void\n", iface, i2400m);
+}
+
+
+/*
+ * Get the device ready for USB port or system standby and hibernation
+ *
+ * USB port and system standby are handled the same.
+ *
+ * When the system hibernates, the USB device is powered down and then
+ * up, so we don't really have to do much here, as it will be seen as
+ * a reconnect. Still for simplicity we consider this case the same as
+ * suspend, so that the device has a chance to do notify the base
+ * station (if connected).
+ *
+ * So at the end, the three cases require common handling.
+ *
+ * If at the time of this call the device's firmware is not loaded,
+ * nothing has to be done. Note we can be "loose" about not reading
+ * i2400m->updown under i2400m->init_mutex. If it happens to change
+ * inmediately, other parts of the call flow will fail and effectively
+ * catch it.
+ *
+ * If the firmware is loaded, we need to:
+ *
+ *  - tell the device to go into host interface power save mode, wait
+ *    for it to ack
+ *
+ *    This is quite more interesting than it is; we need to execute a
+ *    command, but this time, we don't want the code in usb-{tx,rx}.c
+ *    to call the usb_autopm_get/put_interface() barriers as it'd
+ *    deadlock, so we need to decrement i2400mu->do_autopm, that acts
+ *    as a poor man's semaphore. Ugly, but it works.
+ *
+ *    As well, the device might refuse going to sleep for whichever
+ *    reason. In this case we just fail. For system suspend/hibernate,
+ *    we *can't* fail. We check PMSG_IS_AUTO to see if the
+ *    suspend call comes from the USB stack or from the system and act
+ *    in consequence.
+ *
+ *  - stop the notification endpoint polling
+ */
+static
+int i2400mu_suspend(struct usb_interface *iface, pm_message_t pm_msg)
+{
+	int result = 0;
+	struct device *dev = &iface->dev;
+	struct i2400mu *i2400mu = usb_get_intfdata(iface);
+	unsigned is_autosuspend = 0;
+	struct i2400m *i2400m = &i2400mu->i2400m;
+
+#ifdef CONFIG_PM
+	if (PMSG_IS_AUTO(pm_msg))
+		is_autosuspend = 1;
+#endif
+
+	d_fnstart(3, dev, "(iface %p pm_msg %u)\n", iface, pm_msg.event);
+	rmb();		/* see i2400m->updown's documentation  */
+	if (i2400m->updown == 0)
+		goto no_firmware;
+	if (i2400m->state == I2400M_SS_DATA_PATH_CONNECTED && is_autosuspend) {
+		/* ugh -- the device is connected and this suspend
+		 * request is an autosuspend one (not a system standby
+		 * / hibernate).
+		 *
+		 * The only way the device can go to standby is if the
+		 * link with the base station is in IDLE mode; that
+		 * were the case, we'd be in status
+		 * I2400M_SS_CONNECTED_IDLE. But we are not.
+		 *
+		 * If we *tell* him to go power save now, it'll reset
+		 * as a precautionary measure, so if this is an
+		 * autosuspend thing, say no and it'll come back
+		 * later, when the link is IDLE
+		 */
+		result = -EBADF;
+		d_printf(1, dev, "fw up, link up, not-idle, autosuspend: "
+			 "not entering powersave\n");
+		goto error_not_now;
+	}
+	d_printf(1, dev, "fw up: entering powersave\n");
+	atomic_dec(&i2400mu->do_autopm);
+	result = i2400m_cmd_enter_powersave(i2400m);
+	atomic_inc(&i2400mu->do_autopm);
+	if (result < 0 && !is_autosuspend) {
+		/* System suspend, can't fail */
+		dev_err(dev, "failed to suspend, will reset on resume\n");
+		result = 0;
+	}
+	if (result < 0)
+		goto error_enter_powersave;
+	i2400mu_notification_release(i2400mu);
+	d_printf(1, dev, "powersave requested\n");
+error_enter_powersave:
+error_not_now:
+no_firmware:
+	d_fnend(3, dev, "(iface %p pm_msg %u) = %d\n",
+		iface, pm_msg.event, result);
+	return result;
+}
+
+
+static
+int i2400mu_resume(struct usb_interface *iface)
+{
+	int ret = 0;
+	struct device *dev = &iface->dev;
+	struct i2400mu *i2400mu = usb_get_intfdata(iface);
+	struct i2400m *i2400m = &i2400mu->i2400m;
+
+	d_fnstart(3, dev, "(iface %p)\n", iface);
+	rmb();		/* see i2400m->updown's documentation  */
+	if (i2400m->updown == 0) {
+		d_printf(1, dev, "fw was down, no resume needed\n");
+		goto out;
+	}
+	d_printf(1, dev, "fw was up, resuming\n");
+	i2400mu_notification_setup(i2400mu);
+	/* USB has flow control, so we don't need to give it time to
+	 * come back; otherwise, we'd use something like a get-state
+	 * command... */
+out:
+	d_fnend(3, dev, "(iface %p) = %d\n", iface, ret);
+	return ret;
+}
+
+
+static
+int i2400mu_reset_resume(struct usb_interface *iface)
+{
+	int result;
+	struct device *dev = &iface->dev;
+	struct i2400mu *i2400mu = usb_get_intfdata(iface);
+	struct i2400m *i2400m = &i2400mu->i2400m;
+
+	d_fnstart(3, dev, "(iface %p)\n", iface);
+	result = i2400m_dev_reset_handle(i2400m, "device reset on resume");
+	d_fnend(3, dev, "(iface %p) = %d\n", iface, result);
+	return result < 0 ? result : 0;
+}
+
+
+/*
+ * Another driver or user space is triggering a reset on the device
+ * which contains the interface passed as an argument. Cease IO and
+ * save any device state you need to restore.
+ *
+ * If you need to allocate memory here, use GFP_NOIO or GFP_ATOMIC, if
+ * you are in atomic context.
+ */
+static
+int i2400mu_pre_reset(struct usb_interface *iface)
+{
+	struct i2400mu *i2400mu = usb_get_intfdata(iface);
+	return i2400m_pre_reset(&i2400mu->i2400m);
+}
+
+
+/*
+ * The reset has completed.  Restore any saved device state and begin
+ * using the device again.
+ *
+ * If you need to allocate memory here, use GFP_NOIO or GFP_ATOMIC, if
+ * you are in atomic context.
+ */
+static
+int i2400mu_post_reset(struct usb_interface *iface)
+{
+	struct i2400mu *i2400mu = usb_get_intfdata(iface);
+	return i2400m_post_reset(&i2400mu->i2400m);
+}
+
+
+static
+struct usb_device_id i2400mu_id_table[] = {
+	{ USB_DEVICE(0x8086, USB_DEVICE_ID_I6050) },
+	{ USB_DEVICE(0x8086, USB_DEVICE_ID_I6050_2) },
+	{ USB_DEVICE(0x8087, USB_DEVICE_ID_I6150) },
+	{ USB_DEVICE(0x8087, USB_DEVICE_ID_I6150_2) },
+	{ USB_DEVICE(0x8087, USB_DEVICE_ID_I6150_3) },
+	{ USB_DEVICE(0x8086, USB_DEVICE_ID_I6250) },
+	{ USB_DEVICE(0x8086, 0x0181) },
+	{ USB_DEVICE(0x8086, 0x1403) },
+	{ USB_DEVICE(0x8086, 0x1405) },
+	{ USB_DEVICE(0x8086, 0x0180) },
+	{ USB_DEVICE(0x8086, 0x0182) },
+	{ USB_DEVICE(0x8086, 0x1406) },
+	{ USB_DEVICE(0x8086, 0x1403) },
+	{ },
+};
+MODULE_DEVICE_TABLE(usb, i2400mu_id_table);
+
+
+static
+struct usb_driver i2400mu_driver = {
+	.name = KBUILD_MODNAME,
+	.suspend = i2400mu_suspend,
+	.resume = i2400mu_resume,
+	.reset_resume = i2400mu_reset_resume,
+	.probe = i2400mu_probe,
+	.disconnect = i2400mu_disconnect,
+	.pre_reset = i2400mu_pre_reset,
+	.post_reset = i2400mu_post_reset,
+	.id_table = i2400mu_id_table,
+	.supports_autosuspend = 1,
+};
+
+static
+int __init i2400mu_driver_init(void)
+{
+	d_parse_params(D_LEVEL, D_LEVEL_SIZE, i2400mu_debug_params,
+		       "i2400m_usb.debug");
+	return usb_register(&i2400mu_driver);
+}
+module_init(i2400mu_driver_init);
+
+
+static
+void __exit i2400mu_driver_exit(void)
+{
+	usb_deregister(&i2400mu_driver);
+}
+module_exit(i2400mu_driver_exit);
+
+MODULE_AUTHOR("Intel Corporation <linux-wimax@intel.com>");
+MODULE_DESCRIPTION("Driver for USB based Intel Wireless WiMAX Connection 2400M "
+		   "(5x50 & 6050)");
+MODULE_LICENSE("GPL");
+MODULE_FIRMWARE(I2400MU_FW_FILE_NAME_v1_5);
+MODULE_FIRMWARE(I6050U_FW_FILE_NAME_v1_5);
diff --git a/drivers/staging/wimax/id-table.c b/drivers/staging/wimax/id-table.c
new file mode 100644
index 000000000000..0e6f4aa87bc9
--- /dev/null
+++ b/drivers/staging/wimax/id-table.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Linux WiMAX
+ * Mappping of generic netlink family IDs to net devices
+ *
+ * Copyright (C) 2005-2006 Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * We assign a single generic netlink family ID to each device (to
+ * simplify lookup).
+ *
+ * We need a way to map family ID to a wimax_dev pointer.
+ *
+ * The idea is to use a very simple lookup. Using a netlink attribute
+ * with (for example) the interface name implies a heavier search over
+ * all the network devices; seemed kind of a waste given that we know
+ * we are looking for a WiMAX device and that most systems will have
+ * just a single WiMAX adapter.
+ *
+ * We put all the WiMAX devices in the system in a linked list and
+ * match the generic link family ID against the list.
+ *
+ * By using a linked list, the case of a single adapter in the system
+ * becomes (almost) no overhead, while still working for many more. If
+ * it ever goes beyond two, I'll be surprised.
+ */
+#include <linux/device.h>
+#include <net/genetlink.h>
+#include <linux/netdevice.h>
+#include <linux/list.h>
+#include "linux-wimax.h"
+#include "wimax-internal.h"
+
+
+#define D_SUBMODULE id_table
+#include "debug-levels.h"
+
+
+static DEFINE_SPINLOCK(wimax_id_table_lock);
+static struct list_head wimax_id_table = LIST_HEAD_INIT(wimax_id_table);
+
+
+/*
+ * wimax_id_table_add - add a gennetlink familiy ID / wimax_dev mapping
+ *
+ * @wimax_dev: WiMAX device descriptor to associate to the Generic
+ *     Netlink family ID.
+ *
+ * Look for an empty spot in the ID table; if none found, double the
+ * table's size and get the first spot.
+ */
+void wimax_id_table_add(struct wimax_dev *wimax_dev)
+{
+	d_fnstart(3, NULL, "(wimax_dev %p)\n", wimax_dev);
+	spin_lock(&wimax_id_table_lock);
+	list_add(&wimax_dev->id_table_node, &wimax_id_table);
+	spin_unlock(&wimax_id_table_lock);
+	d_fnend(3, NULL, "(wimax_dev %p)\n", wimax_dev);
+}
+
+
+/*
+ * wimax_get_netdev_by_info - lookup a wimax_dev from the gennetlink info
+ *
+ * The generic netlink family ID has been filled out in the
+ * nlmsghdr->nlmsg_type field, so we pull it from there, look it up in
+ * the mapping table and reference the wimax_dev.
+ *
+ * When done, the reference should be dropped with
+ * 'dev_put(wimax_dev->net_dev)'.
+ */
+struct wimax_dev *wimax_dev_get_by_genl_info(
+	struct genl_info *info, int ifindex)
+{
+	struct wimax_dev *wimax_dev = NULL;
+
+	d_fnstart(3, NULL, "(info %p ifindex %d)\n", info, ifindex);
+	spin_lock(&wimax_id_table_lock);
+	list_for_each_entry(wimax_dev, &wimax_id_table, id_table_node) {
+		if (wimax_dev->net_dev->ifindex == ifindex) {
+			dev_hold(wimax_dev->net_dev);
+			goto found;
+		}
+	}
+	wimax_dev = NULL;
+	d_printf(1, NULL, "wimax: no devices found with ifindex %d\n",
+		 ifindex);
+found:
+	spin_unlock(&wimax_id_table_lock);
+	d_fnend(3, NULL, "(info %p ifindex %d) = %p\n",
+		info, ifindex, wimax_dev);
+	return wimax_dev;
+}
+
+
+/*
+ * wimax_id_table_rm - Remove a gennetlink familiy ID / wimax_dev mapping
+ *
+ * @id: family ID to remove from the table
+ */
+void wimax_id_table_rm(struct wimax_dev *wimax_dev)
+{
+	spin_lock(&wimax_id_table_lock);
+	list_del_init(&wimax_dev->id_table_node);
+	spin_unlock(&wimax_id_table_lock);
+}
+
+
+/*
+ * Release the gennetlink family id / mapping table
+ *
+ * On debug, verify that the table is empty upon removal. We want the
+ * code always compiled, to ensure it doesn't bit rot. It will be
+ * compiled out if CONFIG_BUG is disabled.
+ */
+void wimax_id_table_release(void)
+{
+	struct wimax_dev *wimax_dev;
+
+#ifndef CONFIG_BUG
+	return;
+#endif
+	spin_lock(&wimax_id_table_lock);
+	list_for_each_entry(wimax_dev, &wimax_id_table, id_table_node) {
+		pr_err("BUG: %s wimax_dev %p ifindex %d not cleared\n",
+		       __func__, wimax_dev, wimax_dev->net_dev->ifindex);
+		WARN_ON(1);
+	}
+	spin_unlock(&wimax_id_table_lock);
+}
diff --git a/drivers/staging/wimax/linux-wimax-debug.h b/drivers/staging/wimax/linux-wimax-debug.h
new file mode 100644
index 000000000000..5b5ec405143b
--- /dev/null
+++ b/drivers/staging/wimax/linux-wimax-debug.h
@@ -0,0 +1,491 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Linux WiMAX
+ * Collection of tools to manage debug operations.
+ *
+ * Copyright (C) 2005-2007 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * Don't #include this file directly, read on!
+ *
+ * EXECUTING DEBUGGING ACTIONS OR NOT
+ *
+ * The main thing this framework provides is decission power to take a
+ * debug action (like printing a message) if the current debug level
+ * allows it.
+ *
+ * The decission power is at two levels: at compile-time (what does
+ * not make it is compiled out) and at run-time. The run-time
+ * selection is done per-submodule (as they are declared by the user
+ * of the framework).
+ *
+ * A call to d_test(L) (L being the target debug level) returns true
+ * if the action should be taken because the current debug levels
+ * allow it (both compile and run time).
+ *
+ * It follows that a call to d_test() that can be determined to be
+ * always false at compile time will get the code depending on it
+ * compiled out by optimization.
+ *
+ * DEBUG LEVELS
+ *
+ * It is up to the caller to define how much a debugging level is.
+ *
+ * Convention sets 0 as "no debug" (so an action marked as debug level 0
+ * will always be taken). The increasing debug levels are used for
+ * increased verbosity.
+ *
+ * USAGE
+ *
+ * Group the code in modules and submodules inside each module [which
+ * in most cases maps to Linux modules and .c files that compose
+ * those].
+ *
+ * For each module, there is:
+ *
+ *  - a MODULENAME (single word, legal C identifier)
+ *
+ *  - a debug-levels.h header file that declares the list of
+ *    submodules and that is included by all .c files that use
+ *    the debugging tools. The file name can be anything.
+ *
+ *  - some (optional) .c code to manipulate the runtime debug levels
+ *    through debugfs.
+ *
+ * The debug-levels.h file would look like:
+ *
+ *     #ifndef __debug_levels__h__
+ *     #define __debug_levels__h__
+ *
+ *     #define D_MODULENAME modulename
+ *     #define D_MASTER 10
+ *
+ *     #include "linux-wimax-debug.h"
+ *
+ *     enum d_module {
+ *             D_SUBMODULE_DECLARE(submodule_1),
+ *             D_SUBMODULE_DECLARE(submodule_2),
+ *             ...
+ *             D_SUBMODULE_DECLARE(submodule_N)
+ *     };
+ *
+ *     #endif
+ *
+ * D_MASTER is the maximum compile-time debug level; any debug actions
+ * above this will be out. D_MODULENAME is the module name (legal C
+ * identifier), which has to be unique for each module (to avoid
+ * namespace collisions during linkage). Note those #defines need to
+ * be done before #including debug.h
+ *
+ * We declare N different submodules whose debug level can be
+ * independently controlled during runtime.
+ *
+ * In a .c file of the module (and only in one of them), define the
+ * following code:
+ *
+ *     struct d_level D_LEVEL[] = {
+ *             D_SUBMODULE_DEFINE(submodule_1),
+ *             D_SUBMODULE_DEFINE(submodule_2),
+ *             ...
+ *             D_SUBMODULE_DEFINE(submodule_N),
+ *     };
+ *     size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL);
+ *
+ * Externs for d_level_MODULENAME and d_level_size_MODULENAME are used
+ * and declared in this file using the D_LEVEL and D_LEVEL_SIZE macros
+ * #defined also in this file.
+ *
+ * To manipulate from user space the levels, create a debugfs dentry
+ * and then register each submodule with:
+ *
+ *     d_level_register_debugfs("PREFIX_", submodule_X, parent);
+ *
+ * Where PREFIX_ is a name of your chosing. This will create debugfs
+ * file with a single numeric value that can be use to tweak it. To
+ * remove the entires, just use debugfs_remove_recursive() on 'parent'.
+ *
+ * NOTE: remember that even if this will show attached to some
+ *     particular instance of a device, the settings are *global*.
+ *
+ * On each submodule (for example, .c files), the debug infrastructure
+ * should be included like this:
+ *
+ *     #define D_SUBMODULE submodule_x     // matches one in debug-levels.h
+ *     #include "debug-levels.h"
+ *
+ * after #including all your include files.
+ *
+ * Now you can use the d_*() macros below [d_test(), d_fnstart(),
+ * d_fnend(), d_printf(), d_dump()].
+ *
+ * If their debug level is greater than D_MASTER, they will be
+ * compiled out.
+ *
+ * If their debug level is lower or equal than D_MASTER but greater
+ * than the current debug level of their submodule, they'll be
+ * ignored.
+ *
+ * Otherwise, the action will be performed.
+ */
+#ifndef __debug__h__
+#define __debug__h__
+
+#include <linux/types.h>
+#include <linux/slab.h>
+
+struct device;
+
+/* Backend stuff */
+
+/*
+ * Debug backend: generate a message header from a 'struct device'
+ *
+ * @head: buffer where to place the header
+ * @head_size: length of @head
+ * @dev: pointer to device used to generate a header from. If NULL,
+ *     an empty ("") header is generated.
+ */
+static inline
+void __d_head(char *head, size_t head_size,
+	      struct device *dev)
+{
+	if (dev == NULL)
+		head[0] = 0;
+	else if ((unsigned long)dev < 4096) {
+		printk(KERN_ERR "E: Corrupt dev %p\n", dev);
+		WARN_ON(1);
+	} else
+		snprintf(head, head_size, "%s %s: ",
+			 dev_driver_string(dev), dev_name(dev));
+}
+
+
+/*
+ * Debug backend: log some message if debugging is enabled
+ *
+ * @l: intended debug level
+ * @tag: tag to prefix the message with
+ * @dev: 'struct device' associated to this message
+ * @f: printf-like format and arguments
+ *
+ * Note this is optimized out if it doesn't pass the compile-time
+ * check; however, it is *always* compiled. This is useful to make
+ * sure the printf-like formats and variables are always checked and
+ * they don't get bit rot if you have all the debugging disabled.
+ */
+#define _d_printf(l, tag, dev, f, a...)					\
+do {									\
+	char head[64];							\
+	if (!d_test(l))							\
+		break;							\
+	__d_head(head, sizeof(head), dev);				\
+	printk(KERN_ERR "%s%s%s: " f, head, __func__, tag, ##a);	\
+} while (0)
+
+
+/*
+ * CPP syntactic sugar to generate A_B like symbol names when one of
+ * the arguments is a preprocessor #define.
+ */
+#define __D_PASTE__(varname, modulename) varname##_##modulename
+#define __D_PASTE(varname, modulename) (__D_PASTE__(varname, modulename))
+#define _D_SUBMODULE_INDEX(_name) (D_SUBMODULE_DECLARE(_name))
+
+
+/*
+ * Store a submodule's runtime debug level and name
+ */
+struct d_level {
+	u8 level;
+	const char *name;
+};
+
+
+/*
+ * List of available submodules and their debug levels
+ *
+ * We call them d_level_MODULENAME and d_level_size_MODULENAME; the
+ * macros D_LEVEL and D_LEVEL_SIZE contain the name already for
+ * convenience.
+ *
+ * This array and the size are defined on some .c file that is part of
+ * the current module.
+ */
+#define D_LEVEL __D_PASTE(d_level, D_MODULENAME)
+#define D_LEVEL_SIZE __D_PASTE(d_level_size, D_MODULENAME)
+
+extern struct d_level D_LEVEL[];
+extern size_t D_LEVEL_SIZE;
+
+
+/*
+ * Frontend stuff
+ *
+ *
+ * Stuff you need to declare prior to using the actual "debug" actions
+ * (defined below).
+ */
+
+#ifndef D_MODULENAME
+#error D_MODULENAME is not defined in your debug-levels.h file
+/**
+ * D_MODULE - Name of the current module
+ *
+ * #define in your module's debug-levels.h, making sure it is
+ * unique. This has to be a legal C identifier.
+ */
+#define D_MODULENAME undefined_modulename
+#endif
+
+
+#ifndef D_MASTER
+#warning D_MASTER not defined, but debug.h included! [see docs]
+/**
+ * D_MASTER - Compile time maximum debug level
+ *
+ * #define in your debug-levels.h file to the maximum debug level the
+ * runtime code will be allowed to have. This allows you to provide a
+ * main knob.
+ *
+ * Anything above that level will be optimized out of the compile.
+ *
+ * Defaults to zero (no debug code compiled in).
+ *
+ * Maximum one definition per module (at the debug-levels.h file).
+ */
+#define D_MASTER 0
+#endif
+
+#ifndef D_SUBMODULE
+#error D_SUBMODULE not defined, but debug.h included! [see docs]
+/**
+ * D_SUBMODULE - Name of the current submodule
+ *
+ * #define in your submodule .c file before #including debug-levels.h
+ * to the name of the current submodule as previously declared and
+ * defined with D_SUBMODULE_DECLARE() (in your module's
+ * debug-levels.h) and D_SUBMODULE_DEFINE().
+ *
+ * This is used to provide runtime-control over the debug levels.
+ *
+ * Maximum one per .c file! Can be shared among different .c files
+ * (meaning they belong to the same submodule categorization).
+ */
+#define D_SUBMODULE undefined_module
+#endif
+
+
+/**
+ * D_SUBMODULE_DECLARE - Declare a submodule for runtime debug level control
+ *
+ * @_name: name of the submodule, restricted to the chars that make up a
+ *     valid C identifier ([a-zA-Z0-9_]).
+ *
+ * Declare in the module's debug-levels.h header file as:
+ *
+ * enum d_module {
+ *         D_SUBMODULE_DECLARE(submodule_1),
+ *         D_SUBMODULE_DECLARE(submodule_2),
+ *         D_SUBMODULE_DECLARE(submodule_3),
+ * };
+ *
+ * Some corresponding .c file needs to have a matching
+ * D_SUBMODULE_DEFINE().
+ */
+#define D_SUBMODULE_DECLARE(_name) __D_SUBMODULE_##_name
+
+
+/**
+ * D_SUBMODULE_DEFINE - Define a submodule for runtime debug level control
+ *
+ * @_name: name of the submodule, restricted to the chars that make up a
+ *     valid C identifier ([a-zA-Z0-9_]).
+ *
+ * Use once per module (in some .c file) as:
+ *
+ * static
+ * struct d_level d_level_SUBMODULENAME[] = {
+ *         D_SUBMODULE_DEFINE(submodule_1),
+ *         D_SUBMODULE_DEFINE(submodule_2),
+ *         D_SUBMODULE_DEFINE(submodule_3),
+ * };
+ * size_t d_level_size_SUBDMODULENAME = ARRAY_SIZE(d_level_SUBDMODULENAME);
+ *
+ * Matching D_SUBMODULE_DECLARE()s have to be present in a
+ * debug-levels.h header file.
+ */
+#define D_SUBMODULE_DEFINE(_name)		\
+[__D_SUBMODULE_##_name] = {			\
+	.level = 0,				\
+	.name = #_name				\
+}
+
+
+
+/* The actual "debug" operations */
+
+
+/**
+ * d_test - Returns true if debugging should be enabled
+ *
+ * @l: intended debug level (unsigned)
+ *
+ * If the master debug switch is enabled and the current settings are
+ * higher or equal to the requested level, then debugging
+ * output/actions should be enabled.
+ *
+ * NOTE:
+ *
+ * This needs to be coded so that it can be evaluated in compile
+ * time; this is why the ugly BUG_ON() is placed in there, so the
+ * D_MASTER evaluation compiles all out if it is compile-time false.
+ */
+#define d_test(l)							\
+({									\
+	unsigned __l = l;	/* type enforcer */			\
+	(D_MASTER) >= __l						\
+	&& ({								\
+		BUG_ON(_D_SUBMODULE_INDEX(D_SUBMODULE) >= D_LEVEL_SIZE);\
+		D_LEVEL[_D_SUBMODULE_INDEX(D_SUBMODULE)].level >= __l;	\
+	});								\
+})
+
+
+/**
+ * d_fnstart - log message at function start if debugging enabled
+ *
+ * @l: intended debug level
+ * @_dev: 'struct device' pointer, NULL if none (for context)
+ * @f: printf-like format and arguments
+ */
+#define d_fnstart(l, _dev, f, a...) _d_printf(l, " FNSTART", _dev, f, ## a)
+
+
+/**
+ * d_fnend - log message at function end if debugging enabled
+ *
+ * @l: intended debug level
+ * @_dev: 'struct device' pointer, NULL if none (for context)
+ * @f: printf-like format and arguments
+ */
+#define d_fnend(l, _dev, f, a...) _d_printf(l, " FNEND", _dev, f, ## a)
+
+
+/**
+ * d_printf - log message if debugging enabled
+ *
+ * @l: intended debug level
+ * @_dev: 'struct device' pointer, NULL if none (for context)
+ * @f: printf-like format and arguments
+ */
+#define d_printf(l, _dev, f, a...) _d_printf(l, "", _dev, f, ## a)
+
+
+/**
+ * d_dump - log buffer hex dump if debugging enabled
+ *
+ * @l: intended debug level
+ * @_dev: 'struct device' pointer, NULL if none (for context)
+ * @f: printf-like format and arguments
+ */
+#define d_dump(l, dev, ptr, size)			\
+do {							\
+	char head[64];					\
+	if (!d_test(l))					\
+		break;					\
+	__d_head(head, sizeof(head), dev);		\
+	print_hex_dump(KERN_ERR, head, 0, 16, 1,	\
+		       ((void *) ptr), (size), 0);	\
+} while (0)
+
+
+/**
+ * Export a submodule's debug level over debugfs as PREFIXSUBMODULE
+ *
+ * @prefix: string to prefix the name with
+ * @submodule: name of submodule (not a string, just the name)
+ * @dentry: debugfs parent dentry
+ *
+ * For removing, just use debugfs_remove_recursive() on the parent.
+ */
+#define d_level_register_debugfs(prefix, name, parent)			\
+({									\
+	debugfs_create_u8(						\
+		prefix #name, 0600, parent,				\
+		&(D_LEVEL[__D_SUBMODULE_ ## name].level));		\
+})
+
+
+static inline
+void d_submodule_set(struct d_level *d_level, size_t d_level_size,
+		     const char *submodule, u8 level, const char *tag)
+{
+	struct d_level *itr, *top;
+	int index = -1;
+
+	for (itr = d_level, top = itr + d_level_size; itr < top; itr++) {
+		index++;
+		if (itr->name == NULL) {
+			printk(KERN_ERR "%s: itr->name NULL?? (%p, #%d)\n",
+			       tag, itr, index);
+			continue;
+		}
+		if (!strcmp(itr->name, submodule)) {
+			itr->level = level;
+			return;
+		}
+	}
+	printk(KERN_ERR "%s: unknown submodule %s\n", tag, submodule);
+}
+
+
+/**
+ * d_parse_params - Parse a string with debug parameters from the
+ * command line
+ *
+ * @d_level: level structure (D_LEVEL)
+ * @d_level_size: number of items in the level structure
+ *     (D_LEVEL_SIZE).
+ * @_params: string with the parameters; this is a space (not tab!)
+ *     separated list of NAME:VALUE, where value is the debug level
+ *     and NAME is the name of the submodule.
+ * @tag: string for error messages (example: MODULE.ARGNAME).
+ */
+static inline
+void d_parse_params(struct d_level *d_level, size_t d_level_size,
+		    const char *_params, const char *tag)
+{
+	char submodule[130], *params, *params_orig, *token, *colon;
+	unsigned level, tokens;
+
+	if (_params == NULL)
+		return;
+	params_orig = kstrdup(_params, GFP_KERNEL);
+	params = params_orig;
+	while (1) {
+		token = strsep(&params, " ");
+		if (token == NULL)
+			break;
+		if (*token == '\0')	/* eat joint spaces */
+			continue;
+		/* kernel's sscanf %s eats until whitespace, so we
+		 * replace : by \n so it doesn't get eaten later by
+		 * strsep */
+		colon = strchr(token, ':');
+		if (colon != NULL)
+			*colon = '\n';
+		tokens = sscanf(token, "%s\n%u", submodule, &level);
+		if (colon != NULL)
+			*colon = ':';	/* set back, for error messages */
+		if (tokens == 2)
+			d_submodule_set(d_level, d_level_size,
+					submodule, level, tag);
+		else
+			printk(KERN_ERR "%s: can't parse '%s' as a "
+			       "SUBMODULE:LEVEL (%d tokens)\n",
+			       tag, token, tokens);
+	}
+	kfree(params_orig);
+}
+
+#endif /* #ifndef __debug__h__ */
diff --git a/drivers/staging/wimax/linux-wimax.h b/drivers/staging/wimax/linux-wimax.h
new file mode 100644
index 000000000000..9f6b77af2f6d
--- /dev/null
+++ b/drivers/staging/wimax/linux-wimax.h
@@ -0,0 +1,239 @@
+/*
+ * Linux WiMax
+ * API for user space
+ *
+ *
+ * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *  - Initial implementation
+ *
+ *
+ * This file declares the user/kernel protocol that is spoken over
+ * Generic Netlink, as well as any type declaration that is to be used
+ * by kernel and user space.
+ *
+ * It is intended for user space to clone it verbatim to use it as a
+ * primary reference for definitions.
+ *
+ * Stuff intended for kernel usage as well as full protocol and stack
+ * documentation is rooted in include/net/wimax.h.
+ */
+
+#ifndef __LINUX__WIMAX_H__
+#define __LINUX__WIMAX_H__
+
+#include <linux/types.h>
+
+enum {
+	/**
+	 * Version of the interface (unsigned decimal, MMm, max 25.5)
+	 * M - Major: change if removing or modifying an existing call.
+	 * m - minor: change when adding a new call
+	 */
+	WIMAX_GNL_VERSION = 01,
+	/* Generic NetLink attributes */
+	WIMAX_GNL_ATTR_INVALID = 0x00,
+	WIMAX_GNL_ATTR_MAX = 10,
+};
+
+
+/*
+ * Generic NetLink operations
+ *
+ * Most of these map to an API call; _OP_ stands for operation, _RP_
+ * for reply and _RE_ for report (aka: signal).
+ */
+enum {
+	WIMAX_GNL_OP_MSG_FROM_USER,	/* User to kernel message */
+	WIMAX_GNL_OP_MSG_TO_USER,	/* Kernel to user message */
+	WIMAX_GNL_OP_RFKILL,	/* Run wimax_rfkill() */
+	WIMAX_GNL_OP_RESET,	/* Run wimax_rfkill() */
+	WIMAX_GNL_RE_STATE_CHANGE,	/* Report: status change */
+	WIMAX_GNL_OP_STATE_GET,		/* Request for current state */
+};
+
+
+/* Message from user / to user */
+enum {
+	WIMAX_GNL_MSG_IFIDX = 1,
+	WIMAX_GNL_MSG_PIPE_NAME,
+	WIMAX_GNL_MSG_DATA,
+};
+
+
+/*
+ * wimax_rfkill()
+ *
+ * The state of the radio (ON/OFF) is mapped to the rfkill subsystem's
+ * switch state (DISABLED/ENABLED).
+ */
+enum wimax_rf_state {
+	WIMAX_RF_OFF = 0,	/* Radio is off, rfkill on/enabled */
+	WIMAX_RF_ON = 1,	/* Radio is on, rfkill off/disabled */
+	WIMAX_RF_QUERY = 2,
+};
+
+/* Attributes */
+enum {
+	WIMAX_GNL_RFKILL_IFIDX = 1,
+	WIMAX_GNL_RFKILL_STATE,
+};
+
+
+/* Attributes for wimax_reset() */
+enum {
+	WIMAX_GNL_RESET_IFIDX = 1,
+};
+
+/* Attributes for wimax_state_get() */
+enum {
+	WIMAX_GNL_STGET_IFIDX = 1,
+};
+
+/*
+ * Attributes for the Report State Change
+ *
+ * For now we just have the old and new states; new attributes might
+ * be added later on.
+ */
+enum {
+	WIMAX_GNL_STCH_IFIDX = 1,
+	WIMAX_GNL_STCH_STATE_OLD,
+	WIMAX_GNL_STCH_STATE_NEW,
+};
+
+
+/**
+ * enum wimax_st - The different states of a WiMAX device
+ * @__WIMAX_ST_NULL: The device structure has been allocated and zeroed,
+ *     but still wimax_dev_add() hasn't been called. There is no state.
+ *
+ * @WIMAX_ST_DOWN: The device has been registered with the WiMAX and
+ *     networking stacks, but it is not initialized (normally that is
+ *     done with 'ifconfig DEV up' [or equivalent], which can upload
+ *     firmware and enable communications with the device).
+ *     In this state, the device is powered down and using as less
+ *     power as possible.
+ *     This state is the default after a call to wimax_dev_add(). It
+ *     is ok to have drivers move directly to %WIMAX_ST_UNINITIALIZED
+ *     or %WIMAX_ST_RADIO_OFF in _probe() after the call to
+ *     wimax_dev_add().
+ *     It is recommended that the driver leaves this state when
+ *     calling 'ifconfig DEV up' and enters it back on 'ifconfig DEV
+ *     down'.
+ *
+ * @__WIMAX_ST_QUIESCING: The device is being torn down, so no API
+ *     operations are allowed to proceed except the ones needed to
+ *     complete the device clean up process.
+ *
+ * @WIMAX_ST_UNINITIALIZED: [optional] Communication with the device
+ *     is setup, but the device still requires some configuration
+ *     before being operational.
+ *     Some WiMAX API calls might work.
+ *
+ * @WIMAX_ST_RADIO_OFF: The device is fully up; radio is off (wether
+ *     by hardware or software switches).
+ *     It is recommended to always leave the device in this state
+ *     after initialization.
+ *
+ * @WIMAX_ST_READY: The device is fully up and radio is on.
+ *
+ * @WIMAX_ST_SCANNING: [optional] The device has been instructed to
+ *     scan. In this state, the device cannot be actively connected to
+ *     a network.
+ *
+ * @WIMAX_ST_CONNECTING: The device is connecting to a network. This
+ *     state exists because in some devices, the connect process can
+ *     include a number of negotiations between user space, kernel
+ *     space and the device. User space needs to know what the device
+ *     is doing. If the connect sequence in a device is atomic and
+ *     fast, the device can transition directly to CONNECTED
+ *
+ * @WIMAX_ST_CONNECTED: The device is connected to a network.
+ *
+ * @__WIMAX_ST_INVALID: This is an invalid state used to mark the
+ *     maximum numeric value of states.
+ *
+ * Description:
+ *
+ * Transitions from one state to another one are atomic and can only
+ * be caused in kernel space with wimax_state_change(). To read the
+ * state, use wimax_state_get().
+ *
+ * States starting with __ are internal and shall not be used or
+ * referred to by drivers or userspace. They look ugly, but that's the
+ * point -- if any use is made non-internal to the stack, it is easier
+ * to catch on review.
+ *
+ * All API operations [with well defined exceptions] will take the
+ * device mutex before starting and then check the state. If the state
+ * is %__WIMAX_ST_NULL, %WIMAX_ST_DOWN, %WIMAX_ST_UNINITIALIZED or
+ * %__WIMAX_ST_QUIESCING, it will drop the lock and quit with
+ * -%EINVAL, -%ENOMEDIUM, -%ENOTCONN or -%ESHUTDOWN.
+ *
+ * The order of the definitions is important, so we can do numerical
+ * comparisons (eg: < %WIMAX_ST_RADIO_OFF means the device is not ready
+ * to operate).
+ */
+/*
+ * The allowed state transitions are described in the table below
+ * (states in rows can go to states in columns where there is an X):
+ *
+ *                                  UNINI   RADIO READY SCAN CONNEC CONNEC
+ *             NULL DOWN QUIESCING TIALIZED  OFF        NING  TING   TED
+ * NULL         -    x
+ * DOWN              -      x        x       x
+ * QUIESCING         x      -
+ * UNINITIALIZED            x        -       x
+ * RADIO_OFF                x                -     x
+ * READY                    x                x     -     x     x      x
+ * SCANNING                 x                x     x     -     x      x
+ * CONNECTING               x                x     x     x     -      x
+ * CONNECTED                x                x     x                  -
+ *
+ * This table not available in kernel-doc because the formatting messes it up.
+ */
+ enum wimax_st {
+	__WIMAX_ST_NULL = 0,
+	WIMAX_ST_DOWN,
+	__WIMAX_ST_QUIESCING,
+	WIMAX_ST_UNINITIALIZED,
+	WIMAX_ST_RADIO_OFF,
+	WIMAX_ST_READY,
+	WIMAX_ST_SCANNING,
+	WIMAX_ST_CONNECTING,
+	WIMAX_ST_CONNECTED,
+	__WIMAX_ST_INVALID			/* Always keep last */
+};
+
+
+#endif /* #ifndef __LINUX__WIMAX_H__ */
diff --git a/drivers/staging/wimax/net-wimax.h b/drivers/staging/wimax/net-wimax.h
new file mode 100644
index 000000000000..f578e345e2bd
--- /dev/null
+++ b/drivers/staging/wimax/net-wimax.h
@@ -0,0 +1,503 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Linux WiMAX
+ * Kernel space API for accessing WiMAX devices
+ *
+ * Copyright (C) 2007-2008 Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * The WiMAX stack provides an API for controlling and managing the
+ * system's WiMAX devices. This API affects the control plane; the
+ * data plane is accessed via the network stack (netdev).
+ *
+ * Parts of the WiMAX stack API and notifications are exported to
+ * user space via Generic Netlink. In user space, libwimax (part of
+ * the wimax-tools package) provides a shim layer for accessing those
+ * calls.
+ *
+ * The API is standarized for all WiMAX devices and different drivers
+ * implement the backend support for it. However, device-specific
+ * messaging pipes are provided that can be used to issue commands and
+ * receive notifications in free form.
+ *
+ * Currently the messaging pipes are the only means of control as it
+ * is not known (due to the lack of more devices in the market) what
+ * will be a good abstraction layer. Expect this to change as more
+ * devices show in the market. This API is designed to be growable in
+ * order to address this problem.
+ *
+ * USAGE
+ *
+ * Embed a `struct wimax_dev` at the beginning of the device's
+ * private structure, initialize and register it. For details, see
+ * `struct wimax_dev`s documentation.
+ *
+ * Once this is done, wimax-tools's libwimaxll can be used to
+ * communicate with the driver from user space. You user space
+ * application does not have to forcibily use libwimaxll and can talk
+ * the generic netlink protocol directly if desired.
+ *
+ * Remember this is a very low level API that will to provide all of
+ * WiMAX features. Other daemons and services running in user space
+ * are the expected clients of it. They offer a higher level API that
+ * applications should use (an example of this is the Intel's WiMAX
+ * Network Service for the i2400m).
+ *
+ * DESIGN
+ *
+ * Although not set on final stone, this very basic interface is
+ * mostly completed. Remember this is meant to grow as new common
+ * operations are decided upon. New operations will be added to the
+ * interface, intent being on keeping backwards compatibility as much
+ * as possible.
+ *
+ * This layer implements a set of calls to control a WiMAX device,
+ * exposing a frontend to the rest of the kernel and user space (via
+ * generic netlink) and a backend implementation in the driver through
+ * function pointers.
+ *
+ * WiMAX devices have a state, and a kernel-only API allows the
+ * drivers to manipulate that state. State transitions are atomic, and
+ * only some of them are allowed (see `enum wimax_st`).
+ *
+ * Most API calls will set the state automatically; in most cases
+ * drivers have to only report state changes due to external
+ * conditions.
+ *
+ * All API operations are 'atomic', serialized through a mutex in the
+ * `struct wimax_dev`.
+ *
+ * EXPORTING TO USER SPACE THROUGH GENERIC NETLINK
+ *
+ * The API is exported to user space using generic netlink (other
+ * methods can be added as needed).
+ *
+ * There is a Generic Netlink Family named "WiMAX", where interfaces
+ * supporting the WiMAX interface receive commands and broadcast their
+ * signals over a multicast group named "msg".
+ *
+ * Mapping to the source/destination interface is done by an interface
+ * index attribute.
+ *
+ * For user-to-kernel traffic (commands) we use a function call
+ * marshalling mechanism, where a message X with attributes A, B, C
+ * sent from user space to kernel space means executing the WiMAX API
+ * call wimax_X(A, B, C), sending the results back as a message.
+ *
+ * Kernel-to-user (notifications or signals) communication is sent
+ * over multicast groups. This allows to have multiple applications
+ * monitoring them.
+ *
+ * Each command/signal gets assigned it's own attribute policy. This
+ * way the validator will verify that all the attributes in there are
+ * only the ones that should be for each command/signal. Thing of an
+ * attribute mapping to a type+argumentname for each command/signal.
+ *
+ * If we had a single policy for *all* commands/signals, after running
+ * the validator we'd have to check "does this attribute belong in
+ * here"?  for each one. It can be done manually, but it's just easier
+ * to have the validator do that job with multiple policies. As well,
+ * it makes it easier to later expand each command/signal signature
+ * without affecting others and keeping the namespace more or less
+ * sane. Not that it is too complicated, but it makes it even easier.
+ *
+ * No state information is maintained in the kernel for each user
+ * space connection (the connection is stateless).
+ *
+ * TESTING FOR THE INTERFACE AND VERSIONING
+ *
+ * If network interface X is a WiMAX device, there will be a Generic
+ * Netlink family named "WiMAX X" and the device will present a
+ * "wimax" directory in it's network sysfs directory
+ * (/sys/class/net/DEVICE/wimax) [used by HAL].
+ *
+ * The inexistence of any of these means the device does not support
+ * this WiMAX API.
+ *
+ * By querying the generic netlink controller, versioning information
+ * and the multicast groups available can be found. Applications using
+ * the interface can either rely on that or use the generic netlink
+ * controller to figure out which generic netlink commands/signals are
+ * supported.
+ *
+ * NOTE: this versioning is a last resort to avoid hard
+ *    incompatibilities. It is the intention of the design of this
+ *    stack not to introduce backward incompatible changes.
+ *
+ * The version code has to fit in one byte (restrictions imposed by
+ * generic netlink); we use `version / 10` for the major version and
+ * `version % 10` for the minor. This gives 9 minors for each major
+ * and 25 majors.
+ *
+ * The version change protocol is as follow:
+ *
+ * - Major versions: needs to be increased if an existing message/API
+ *   call is changed or removed. Doesn't need to be changed if a new
+ *   message is added.
+ *
+ * - Minor version: needs to be increased if new messages/API calls are
+ *   being added or some other consideration that doesn't impact the
+ *   user-kernel interface too much (like some kind of bug fix) and
+ *   that is kind of left up in the air to common sense.
+ *
+ * User space code should not try to work if the major version it was
+ * compiled for differs from what the kernel offers. As well, if the
+ * minor version of the kernel interface is lower than the one user
+ * space is expecting (the one it was compiled for), the kernel
+ * might be missing API calls; user space shall be ready to handle
+ * said condition. Use the generic netlink controller operations to
+ * find which ones are supported and which not.
+ *
+ * libwimaxll:wimaxll_open() takes care of checking versions.
+ *
+ * THE OPERATIONS:
+ *
+ * Each operation is defined in its on file (drivers/net/wimax/op-*.c)
+ * for clarity. The parts needed for an operation are:
+ *
+ *  - a function pointer in `struct wimax_dev`: optional, as the
+ *    operation might be implemented by the stack and not by the
+ *    driver.
+ *
+ *    All function pointers are named wimax_dev->op_*(), and drivers
+ *    must implement them except where noted otherwise.
+ *
+ *  - When exported to user space, a `struct nla_policy` to define the
+ *    attributes of the generic netlink command and a `struct genl_ops`
+ *    to define the operation.
+ *
+ * All the declarations for the operation codes (WIMAX_GNL_OP_<NAME>)
+ * and generic netlink attributes (WIMAX_GNL_<NAME>_*) are declared in
+ * include/linux/wimax.h; this file is intended to be cloned by user
+ * space to gain access to those declarations.
+ *
+ * A few caveats to remember:
+ *
+ *  - Need to define attribute numbers starting in 1; otherwise it
+ *    fails.
+ *
+ *  - the `struct genl_family` requires a maximum attribute id; when
+ *    defining the `struct nla_policy` for each message, it has to have
+ *    an array size of WIMAX_GNL_ATTR_MAX+1.
+ *
+ * The op_*() function pointers will not be called if the wimax_dev is
+ * in a state <= %WIMAX_ST_UNINITIALIZED. The exception is:
+ *
+ * - op_reset: can be called at any time after wimax_dev_add() has
+ *   been called.
+ *
+ * THE PIPE INTERFACE:
+ *
+ * This interface is kept intentionally simple. The driver can send
+ * and receive free-form messages to/from user space through a
+ * pipe. See drivers/net/wimax/op-msg.c for details.
+ *
+ * The kernel-to-user messages are sent with
+ * wimax_msg(). user-to-kernel messages are delivered via
+ * wimax_dev->op_msg_from_user().
+ *
+ * RFKILL:
+ *
+ * RFKILL support is built into the wimax_dev layer; the driver just
+ * needs to call wimax_report_rfkill_{hw,sw}() to inform of changes in
+ * the hardware or software RF kill switches. When the stack wants to
+ * turn the radio off, it will call wimax_dev->op_rfkill_sw_toggle(),
+ * which the driver implements.
+ *
+ * User space can set the software RF Kill switch by calling
+ * wimax_rfkill().
+ *
+ * The code for now only supports devices that don't require polling;
+ * If the device needs to be polled, create a self-rearming delayed
+ * work struct for polling or look into adding polled support to the
+ * WiMAX stack.
+ *
+ * When initializing the hardware (_probe), after calling
+ * wimax_dev_add(), query the device for it's RF Kill switches status
+ * and feed it back to the WiMAX stack using
+ * wimax_report_rfkill_{hw,sw}(). If any switch is missing, always
+ * report it as ON.
+ *
+ * NOTE: the wimax stack uses an inverted terminology to that of the
+ * RFKILL subsystem:
+ *
+ *  - ON: radio is ON, RFKILL is DISABLED or OFF.
+ *  - OFF: radio is OFF, RFKILL is ENABLED or ON.
+ *
+ * MISCELLANEOUS OPS:
+ *
+ * wimax_reset() can be used to reset the device to power on state; by
+ * default it issues a warm reset that maintains the same device
+ * node. If that is not possible, it falls back to a cold reset
+ * (device reconnect). The driver implements the backend to this
+ * through wimax_dev->op_reset().
+ */
+
+#ifndef __NET__WIMAX_H__
+#define __NET__WIMAX_H__
+
+#include "linux-wimax.h"
+#include <net/genetlink.h>
+#include <linux/netdevice.h>
+
+struct net_device;
+struct genl_info;
+struct wimax_dev;
+
+/**
+ * struct wimax_dev - Generic WiMAX device
+ *
+ * @net_dev: [fill] Pointer to the &struct net_device this WiMAX
+ *     device implements.
+ *
+ * @op_msg_from_user: [fill] Driver-specific operation to
+ *     handle a raw message from user space to the driver. The
+ *     driver can send messages to user space using with
+ *     wimax_msg_to_user().
+ *
+ * @op_rfkill_sw_toggle: [fill] Driver-specific operation to act on
+ *     userspace (or any other agent) requesting the WiMAX device to
+ *     change the RF Kill software switch (WIMAX_RF_ON or
+ *     WIMAX_RF_OFF).
+ *     If such hardware support is not present, it is assumed the
+ *     radio cannot be switched off and it is always on (and the stack
+ *     will error out when trying to switch it off). In such case,
+ *     this function pointer can be left as NULL.
+ *
+ * @op_reset: [fill] Driver specific operation to reset the
+ *     device.
+ *     This operation should always attempt first a warm reset that
+ *     does not disconnect the device from the bus and return 0.
+ *     If that fails, it should resort to some sort of cold or bus
+ *     reset (even if it implies a bus disconnection and device
+ *     disappearance). In that case, -ENODEV should be returned to
+ *     indicate the device is gone.
+ *     This operation has to be synchronous, and return only when the
+ *     reset is complete. In case of having had to resort to bus/cold
+ *     reset implying a device disconnection, the call is allowed to
+ *     return immediately.
+ *     NOTE: wimax_dev->mutex is NOT locked when this op is being
+ *     called; however, wimax_dev->mutex_reset IS locked to ensure
+ *     serialization of calls to wimax_reset().
+ *     See wimax_reset()'s documentation.
+ *
+ * @name: [fill] A way to identify this device. We need to register a
+ *     name with many subsystems (rfkill, workqueue creation, etc).
+ *     We can't use the network device name as that
+ *     might change and in some instances we don't know it yet (until
+ *     we don't call register_netdev()). So we generate an unique one
+ *     using the driver name and device bus id, place it here and use
+ *     it across the board. Recommended naming:
+ *     DRIVERNAME-BUSNAME:BUSID (dev->bus->name, dev->bus_id).
+ *
+ * @id_table_node: [private] link to the list of wimax devices kept by
+ *     id-table.c. Protected by it's own spinlock.
+ *
+ * @mutex: [private] Serializes all concurrent access and execution of
+ *     operations.
+ *
+ * @mutex_reset: [private] Serializes reset operations. Needs to be a
+ *     different mutex because as part of the reset operation, the
+ *     driver has to call back into the stack to do things such as
+ *     state change, that require wimax_dev->mutex.
+ *
+ * @state: [private] Current state of the WiMAX device.
+ *
+ * @rfkill: [private] integration into the RF-Kill infrastructure.
+ *
+ * @rf_sw: [private] State of the software radio switch (OFF/ON)
+ *
+ * @rf_hw: [private] State of the hardware radio switch (OFF/ON)
+ *
+ * @debugfs_dentry: [private] Used to hook up a debugfs entry. This
+ *     shows up in the debugfs root as wimax\:DEVICENAME.
+ *
+ * Description:
+ * This structure defines a common interface to access all WiMAX
+ * devices from different vendors and provides a common API as well as
+ * a free-form device-specific messaging channel.
+ *
+ * Usage:
+ *  1. Embed a &struct wimax_dev at *the beginning* the network
+ *     device structure so that netdev_priv() points to it.
+ *
+ *  2. memset() it to zero
+ *
+ *  3. Initialize with wimax_dev_init(). This will leave the WiMAX
+ *     device in the %__WIMAX_ST_NULL state.
+ *
+ *  4. Fill all the fields marked with [fill]; once called
+ *     wimax_dev_add(), those fields CANNOT be modified.
+ *
+ *  5. Call wimax_dev_add() *after* registering the network
+ *     device. This will leave the WiMAX device in the %WIMAX_ST_DOWN
+ *     state.
+ *     Protect the driver's net_device->open() against succeeding if
+ *     the wimax device state is lower than %WIMAX_ST_DOWN.
+ *
+ *  6. Select when the device is going to be turned on/initialized;
+ *     for example, it could be initialized on 'ifconfig up' (when the
+ *     netdev op 'open()' is called on the driver).
+ *
+ * When the device is initialized (at `ifconfig up` time, or right
+ * after calling wimax_dev_add() from _probe(), make sure the
+ * following steps are taken
+ *
+ *  a. Move the device to %WIMAX_ST_UNINITIALIZED. This is needed so
+ *     some API calls that shouldn't work until the device is ready
+ *     can be blocked.
+ *
+ *  b. Initialize the device. Make sure to turn the SW radio switch
+ *     off and move the device to state %WIMAX_ST_RADIO_OFF when
+ *     done. When just initialized, a device should be left in RADIO
+ *     OFF state until user space devices to turn it on.
+ *
+ *  c. Query the device for the state of the hardware rfkill switch
+ *     and call wimax_rfkill_report_hw() and wimax_rfkill_report_sw()
+ *     as needed. See below.
+ *
+ * wimax_dev_rm() undoes before unregistering the network device. Once
+ * wimax_dev_add() is called, the driver can get called on the
+ * wimax_dev->op_* function pointers
+ *
+ * CONCURRENCY:
+ *
+ * The stack provides a mutex for each device that will disallow API
+ * calls happening concurrently; thus, op calls into the driver
+ * through the wimax_dev->op*() function pointers will always be
+ * serialized and *never* concurrent.
+ *
+ * For locking, take wimax_dev->mutex is taken; (most) operations in
+ * the API have to check for wimax_dev_is_ready() to return 0 before
+ * continuing (this is done internally).
+ *
+ * REFERENCE COUNTING:
+ *
+ * The WiMAX device is reference counted by the associated network
+ * device. The only operation that can be used to reference the device
+ * is wimax_dev_get_by_genl_info(), and the reference it acquires has
+ * to be released with dev_put(wimax_dev->net_dev).
+ *
+ * RFKILL:
+ *
+ * At startup, both HW and SW radio switchess are assumed to be off.
+ *
+ * At initialization time [after calling wimax_dev_add()], have the
+ * driver query the device for the status of the software and hardware
+ * RF kill switches and call wimax_report_rfkill_hw() and
+ * wimax_rfkill_report_sw() to indicate their state. If any is
+ * missing, just call it to indicate it is ON (radio always on).
+ *
+ * Whenever the driver detects a change in the state of the RF kill
+ * switches, it should call wimax_report_rfkill_hw() or
+ * wimax_report_rfkill_sw() to report it to the stack.
+ */
+struct wimax_dev {
+	struct net_device *net_dev;
+	struct list_head id_table_node;
+	struct mutex mutex;		/* Protects all members and API calls */
+	struct mutex mutex_reset;
+	enum wimax_st state;
+
+	int (*op_msg_from_user)(struct wimax_dev *wimax_dev,
+				const char *,
+				const void *, size_t,
+				const struct genl_info *info);
+	int (*op_rfkill_sw_toggle)(struct wimax_dev *wimax_dev,
+				   enum wimax_rf_state);
+	int (*op_reset)(struct wimax_dev *wimax_dev);
+
+	struct rfkill *rfkill;
+	unsigned int rf_hw;
+	unsigned int rf_sw;
+	char name[32];
+
+	struct dentry *debugfs_dentry;
+};
+
+
+
+/*
+ * WiMAX stack public API for device drivers
+ * -----------------------------------------
+ *
+ * These functions are not exported to user space.
+ */
+void wimax_dev_init(struct wimax_dev *);
+int wimax_dev_add(struct wimax_dev *, struct net_device *);
+void wimax_dev_rm(struct wimax_dev *);
+
+static inline
+struct wimax_dev *net_dev_to_wimax(struct net_device *net_dev)
+{
+	return netdev_priv(net_dev);
+}
+
+static inline
+struct device *wimax_dev_to_dev(struct wimax_dev *wimax_dev)
+{
+	return wimax_dev->net_dev->dev.parent;
+}
+
+void wimax_state_change(struct wimax_dev *, enum wimax_st);
+enum wimax_st wimax_state_get(struct wimax_dev *);
+
+/*
+ * Radio Switch state reporting.
+ *
+ * enum wimax_rf_state is declared in linux/wimax.h so the exports
+ * to user space can use it.
+ */
+void wimax_report_rfkill_hw(struct wimax_dev *, enum wimax_rf_state);
+void wimax_report_rfkill_sw(struct wimax_dev *, enum wimax_rf_state);
+
+
+/*
+ * Free-form messaging to/from user space
+ *
+ * Sending a message:
+ *
+ *   wimax_msg(wimax_dev, pipe_name, buf, buf_size, GFP_KERNEL);
+ *
+ * Broken up:
+ *
+ *   skb = wimax_msg_alloc(wimax_dev, pipe_name, buf_size, GFP_KERNEL);
+ *   ...fill up skb...
+ *   wimax_msg_send(wimax_dev, pipe_name, skb);
+ *
+ * Be sure not to modify skb->data in the middle (ie: don't use
+ * skb_push()/skb_pull()/skb_reserve() on the skb).
+ *
+ * "pipe_name" is any string, that can be interpreted as the name of
+ * the pipe or recipient; the interpretation of it is driver
+ * specific, so the recipient can multiplex it as wished. It can be
+ * NULL, it won't be used - an example is using a "diagnostics" tag to
+ * send diagnostics information that a device-specific diagnostics
+ * tool would be interested in.
+ */
+struct sk_buff *wimax_msg_alloc(struct wimax_dev *, const char *, const void *,
+				size_t, gfp_t);
+int wimax_msg_send(struct wimax_dev *, struct sk_buff *);
+int wimax_msg(struct wimax_dev *, const char *, const void *, size_t, gfp_t);
+
+const void *wimax_msg_data_len(struct sk_buff *, size_t *);
+const void *wimax_msg_data(struct sk_buff *);
+ssize_t wimax_msg_len(struct sk_buff *);
+
+
+/*
+ * WiMAX stack user space API
+ * --------------------------
+ *
+ * This API is what gets exported to user space for general
+ * operations. As well, they can be called from within the kernel,
+ * (with a properly referenced `struct wimax_dev`).
+ *
+ * Properly referenced means: the 'struct net_device' that embeds the
+ * device's control structure and (as such) the 'struct wimax_dev' is
+ * referenced by the caller.
+ */
+int wimax_rfkill(struct wimax_dev *, enum wimax_rf_state);
+int wimax_reset(struct wimax_dev *);
+
+#endif /* #ifndef __NET__WIMAX_H__ */
diff --git a/drivers/staging/wimax/op-msg.c b/drivers/staging/wimax/op-msg.c
new file mode 100644
index 000000000000..e20ac7d84e82
--- /dev/null
+++ b/drivers/staging/wimax/op-msg.c
@@ -0,0 +1,391 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Linux WiMAX
+ * Generic messaging interface between userspace and driver/device
+ *
+ * Copyright (C) 2007-2008 Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * This implements a direct communication channel between user space and
+ * the driver/device, by which free form messages can be sent back and
+ * forth.
+ *
+ * This is intended for device-specific features, vendor quirks, etc.
+ *
+ * See include/net/wimax.h
+ *
+ * GENERIC NETLINK ENCODING AND CAPACITY
+ *
+ * A destination "pipe name" is added to each message; it is up to the
+ * drivers to assign or use those names (if using them at all).
+ *
+ * Messages are encoded as a binary netlink attribute using nla_put()
+ * using type NLA_UNSPEC (as some versions of libnl still in
+ * deployment don't yet understand NLA_BINARY).
+ *
+ * The maximum capacity of this transport is PAGESIZE per message (so
+ * the actual payload will be bit smaller depending on the
+ * netlink/generic netlink attributes and headers).
+ *
+ * RECEPTION OF MESSAGES
+ *
+ * When a message is received from user space, it is passed verbatim
+ * to the driver calling wimax_dev->op_msg_from_user(). The return
+ * value from this function is passed back to user space as an ack
+ * over the generic netlink protocol.
+ *
+ * The stack doesn't do any processing or interpretation of these
+ * messages.
+ *
+ * SENDING MESSAGES
+ *
+ * Messages can be sent with wimax_msg().
+ *
+ * If the message delivery needs to happen on a different context to
+ * that of its creation, wimax_msg_alloc() can be used to get a
+ * pointer to the message that can be delivered later on with
+ * wimax_msg_send().
+ *
+ * ROADMAP
+ *
+ * wimax_gnl_doit_msg_from_user()    Process a message from user space
+ *   wimax_dev_get_by_genl_info()
+ *   wimax_dev->op_msg_from_user()   Delivery of message to the driver
+ *
+ * wimax_msg()                       Send a message to user space
+ *   wimax_msg_alloc()
+ *   wimax_msg_send()
+ */
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <net/genetlink.h>
+#include <linux/netdevice.h>
+#include "linux-wimax.h"
+#include <linux/security.h>
+#include <linux/export.h>
+#include "wimax-internal.h"
+
+
+#define D_SUBMODULE op_msg
+#include "debug-levels.h"
+
+
+/**
+ * wimax_msg_alloc - Create a new skb for sending a message to userspace
+ *
+ * @wimax_dev: WiMAX device descriptor
+ * @pipe_name: "named pipe" the message will be sent to
+ * @msg: pointer to the message data to send
+ * @size: size of the message to send (in bytes), including the header.
+ * @gfp_flags: flags for memory allocation.
+ *
+ * Returns: %0 if ok, negative errno code on error
+ *
+ * Description:
+ *
+ * Allocates an skb that will contain the message to send to user
+ * space over the messaging pipe and initializes it, copying the
+ * payload.
+ *
+ * Once this call is done, you can deliver it with
+ * wimax_msg_send().
+ *
+ * IMPORTANT:
+ *
+ * Don't use skb_push()/skb_pull()/skb_reserve() on the skb, as
+ * wimax_msg_send() depends on skb->data being placed at the
+ * beginning of the user message.
+ *
+ * Unlike other WiMAX stack calls, this call can be used way early,
+ * even before wimax_dev_add() is called, as long as the
+ * wimax_dev->net_dev pointer is set to point to a proper
+ * net_dev. This is so that drivers can use it early in case they need
+ * to send stuff around or communicate with user space.
+ */
+struct sk_buff *wimax_msg_alloc(struct wimax_dev *wimax_dev,
+				const char *pipe_name,
+				const void *msg, size_t size,
+				gfp_t gfp_flags)
+{
+	int result;
+	struct device *dev = wimax_dev_to_dev(wimax_dev);
+	size_t msg_size;
+	void *genl_msg;
+	struct sk_buff *skb;
+
+	msg_size = nla_total_size(size)
+		+ nla_total_size(sizeof(u32))
+		+ (pipe_name ? nla_total_size(strlen(pipe_name)) : 0);
+	result = -ENOMEM;
+	skb = genlmsg_new(msg_size, gfp_flags);
+	if (skb == NULL)
+		goto error_new;
+	genl_msg = genlmsg_put(skb, 0, 0, &wimax_gnl_family,
+			       0, WIMAX_GNL_OP_MSG_TO_USER);
+	if (genl_msg == NULL) {
+		dev_err(dev, "no memory to create generic netlink message\n");
+		goto error_genlmsg_put;
+	}
+	result = nla_put_u32(skb, WIMAX_GNL_MSG_IFIDX,
+			     wimax_dev->net_dev->ifindex);
+	if (result < 0) {
+		dev_err(dev, "no memory to add ifindex attribute\n");
+		goto error_nla_put;
+	}
+	if (pipe_name) {
+		result = nla_put_string(skb, WIMAX_GNL_MSG_PIPE_NAME,
+					pipe_name);
+		if (result < 0) {
+			dev_err(dev, "no memory to add pipe_name attribute\n");
+			goto error_nla_put;
+		}
+	}
+	result = nla_put(skb, WIMAX_GNL_MSG_DATA, size, msg);
+	if (result < 0) {
+		dev_err(dev, "no memory to add payload (msg %p size %zu) in "
+			"attribute: %d\n", msg, size, result);
+		goto error_nla_put;
+	}
+	genlmsg_end(skb, genl_msg);
+	return skb;
+
+error_nla_put:
+error_genlmsg_put:
+error_new:
+	nlmsg_free(skb);
+	return ERR_PTR(result);
+}
+EXPORT_SYMBOL_GPL(wimax_msg_alloc);
+
+
+/**
+ * wimax_msg_data_len - Return a pointer and size of a message's payload
+ *
+ * @msg: Pointer to a message created with wimax_msg_alloc()
+ * @size: Pointer to where to store the message's size
+ *
+ * Returns the pointer to the message data.
+ */
+const void *wimax_msg_data_len(struct sk_buff *msg, size_t *size)
+{
+	struct nlmsghdr *nlh = (void *) msg->head;
+	struct nlattr *nla;
+
+	nla = nlmsg_find_attr(nlh, sizeof(struct genlmsghdr),
+			      WIMAX_GNL_MSG_DATA);
+	if (nla == NULL) {
+		pr_err("Cannot find attribute WIMAX_GNL_MSG_DATA\n");
+		return NULL;
+	}
+	*size = nla_len(nla);
+	return nla_data(nla);
+}
+EXPORT_SYMBOL_GPL(wimax_msg_data_len);
+
+
+/**
+ * wimax_msg_data - Return a pointer to a message's payload
+ *
+ * @msg: Pointer to a message created with wimax_msg_alloc()
+ */
+const void *wimax_msg_data(struct sk_buff *msg)
+{
+	struct nlmsghdr *nlh = (void *) msg->head;
+	struct nlattr *nla;
+
+	nla = nlmsg_find_attr(nlh, sizeof(struct genlmsghdr),
+			      WIMAX_GNL_MSG_DATA);
+	if (nla == NULL) {
+		pr_err("Cannot find attribute WIMAX_GNL_MSG_DATA\n");
+		return NULL;
+	}
+	return nla_data(nla);
+}
+EXPORT_SYMBOL_GPL(wimax_msg_data);
+
+
+/**
+ * wimax_msg_len - Return a message's payload length
+ *
+ * @msg: Pointer to a message created with wimax_msg_alloc()
+ */
+ssize_t wimax_msg_len(struct sk_buff *msg)
+{
+	struct nlmsghdr *nlh = (void *) msg->head;
+	struct nlattr *nla;
+
+	nla = nlmsg_find_attr(nlh, sizeof(struct genlmsghdr),
+			      WIMAX_GNL_MSG_DATA);
+	if (nla == NULL) {
+		pr_err("Cannot find attribute WIMAX_GNL_MSG_DATA\n");
+		return -EINVAL;
+	}
+	return nla_len(nla);
+}
+EXPORT_SYMBOL_GPL(wimax_msg_len);
+
+
+/**
+ * wimax_msg_send - Send a pre-allocated message to user space
+ *
+ * @wimax_dev: WiMAX device descriptor
+ *
+ * @skb: &struct sk_buff returned by wimax_msg_alloc(). Note the
+ *     ownership of @skb is transferred to this function.
+ *
+ * Returns: 0 if ok, < 0 errno code on error
+ *
+ * Description:
+ *
+ * Sends a free-form message that was preallocated with
+ * wimax_msg_alloc() and filled up.
+ *
+ * Assumes that once you pass an skb to this function for sending, it
+ * owns it and will release it when done (on success).
+ *
+ * IMPORTANT:
+ *
+ * Don't use skb_push()/skb_pull()/skb_reserve() on the skb, as
+ * wimax_msg_send() depends on skb->data being placed at the
+ * beginning of the user message.
+ *
+ * Unlike other WiMAX stack calls, this call can be used way early,
+ * even before wimax_dev_add() is called, as long as the
+ * wimax_dev->net_dev pointer is set to point to a proper
+ * net_dev. This is so that drivers can use it early in case they need
+ * to send stuff around or communicate with user space.
+ */
+int wimax_msg_send(struct wimax_dev *wimax_dev, struct sk_buff *skb)
+{
+	struct device *dev = wimax_dev_to_dev(wimax_dev);
+	void *msg = skb->data;
+	size_t size = skb->len;
+	might_sleep();
+
+	d_printf(1, dev, "CTX: wimax msg, %zu bytes\n", size);
+	d_dump(2, dev, msg, size);
+	genlmsg_multicast(&wimax_gnl_family, skb, 0, 0, GFP_KERNEL);
+	d_printf(1, dev, "CTX: genl multicast done\n");
+	return 0;
+}
+EXPORT_SYMBOL_GPL(wimax_msg_send);
+
+
+/**
+ * wimax_msg - Send a message to user space
+ *
+ * @wimax_dev: WiMAX device descriptor (properly referenced)
+ * @pipe_name: "named pipe" the message will be sent to
+ * @buf: pointer to the message to send.
+ * @size: size of the buffer pointed to by @buf (in bytes).
+ * @gfp_flags: flags for memory allocation.
+ *
+ * Returns: %0 if ok, negative errno code on error.
+ *
+ * Description:
+ *
+ * Sends a free-form message to user space on the device @wimax_dev.
+ *
+ * NOTES:
+ *
+ * Once the @skb is given to this function, who will own it and will
+ * release it when done (unless it returns error).
+ */
+int wimax_msg(struct wimax_dev *wimax_dev, const char *pipe_name,
+	      const void *buf, size_t size, gfp_t gfp_flags)
+{
+	int result = -ENOMEM;
+	struct sk_buff *skb;
+
+	skb = wimax_msg_alloc(wimax_dev, pipe_name, buf, size, gfp_flags);
+	if (IS_ERR(skb))
+		result = PTR_ERR(skb);
+	else
+		result = wimax_msg_send(wimax_dev, skb);
+	return result;
+}
+EXPORT_SYMBOL_GPL(wimax_msg);
+
+/*
+ * Relays a message from user space to the driver
+ *
+ * The skb is passed to the driver-specific function with the netlink
+ * and generic netlink headers already stripped.
+ *
+ * This call will block while handling/relaying the message.
+ */
+int wimax_gnl_doit_msg_from_user(struct sk_buff *skb, struct genl_info *info)
+{
+	int result, ifindex;
+	struct wimax_dev *wimax_dev;
+	struct device *dev;
+	struct nlmsghdr *nlh = info->nlhdr;
+	char *pipe_name;
+	void *msg_buf;
+	size_t msg_len;
+
+	might_sleep();
+	d_fnstart(3, NULL, "(skb %p info %p)\n", skb, info);
+	result = -ENODEV;
+	if (info->attrs[WIMAX_GNL_MSG_IFIDX] == NULL) {
+		pr_err("WIMAX_GNL_MSG_FROM_USER: can't find IFIDX attribute\n");
+		goto error_no_wimax_dev;
+	}
+	ifindex = nla_get_u32(info->attrs[WIMAX_GNL_MSG_IFIDX]);
+	wimax_dev = wimax_dev_get_by_genl_info(info, ifindex);
+	if (wimax_dev == NULL)
+		goto error_no_wimax_dev;
+	dev = wimax_dev_to_dev(wimax_dev);
+
+	/* Unpack arguments */
+	result = -EINVAL;
+	if (info->attrs[WIMAX_GNL_MSG_DATA] == NULL) {
+		dev_err(dev, "WIMAX_GNL_MSG_FROM_USER: can't find MSG_DATA "
+			"attribute\n");
+		goto error_no_data;
+	}
+	msg_buf = nla_data(info->attrs[WIMAX_GNL_MSG_DATA]);
+	msg_len = nla_len(info->attrs[WIMAX_GNL_MSG_DATA]);
+
+	if (info->attrs[WIMAX_GNL_MSG_PIPE_NAME] == NULL)
+		pipe_name = NULL;
+	else {
+		struct nlattr *attr = info->attrs[WIMAX_GNL_MSG_PIPE_NAME];
+		size_t attr_len = nla_len(attr);
+		/* libnl-1.1 does not yet support NLA_NUL_STRING */
+		result = -ENOMEM;
+		pipe_name = kstrndup(nla_data(attr), attr_len + 1, GFP_KERNEL);
+		if (pipe_name == NULL)
+			goto error_alloc;
+		pipe_name[attr_len] = 0;
+	}
+	mutex_lock(&wimax_dev->mutex);
+	result = wimax_dev_is_ready(wimax_dev);
+	if (result == -ENOMEDIUM)
+		result = 0;
+	if (result < 0)
+		goto error_not_ready;
+	result = -ENOSYS;
+	if (wimax_dev->op_msg_from_user == NULL)
+		goto error_noop;
+
+	d_printf(1, dev,
+		 "CRX: nlmsghdr len %u type %u flags 0x%04x seq 0x%x pid %u\n",
+		 nlh->nlmsg_len, nlh->nlmsg_type, nlh->nlmsg_flags,
+		 nlh->nlmsg_seq, nlh->nlmsg_pid);
+	d_printf(1, dev, "CRX: wimax message %zu bytes\n", msg_len);
+	d_dump(2, dev, msg_buf, msg_len);
+
+	result = wimax_dev->op_msg_from_user(wimax_dev, pipe_name,
+					     msg_buf, msg_len, info);
+error_noop:
+error_not_ready:
+	mutex_unlock(&wimax_dev->mutex);
+error_alloc:
+	kfree(pipe_name);
+error_no_data:
+	dev_put(wimax_dev->net_dev);
+error_no_wimax_dev:
+	d_fnend(3, NULL, "(skb %p info %p) = %d\n", skb, info, result);
+	return result;
+}
diff --git a/drivers/staging/wimax/op-reset.c b/drivers/staging/wimax/op-reset.c
new file mode 100644
index 000000000000..b3f000cbe112
--- /dev/null
+++ b/drivers/staging/wimax/op-reset.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Linux WiMAX
+ * Implement and export a method for resetting a WiMAX device
+ *
+ * Copyright (C) 2008 Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * This implements a simple synchronous call to reset a WiMAX device.
+ *
+ * Resets aim at being warm, keeping the device handles active;
+ * however, when that fails, it falls back to a cold reset (that will
+ * disconnect and reconnect the device).
+ */
+
+#include "net-wimax.h"
+#include <net/genetlink.h>
+#include "linux-wimax.h"
+#include <linux/security.h>
+#include <linux/export.h>
+#include "wimax-internal.h"
+
+#define D_SUBMODULE op_reset
+#include "debug-levels.h"
+
+
+/**
+ * wimax_reset - Reset a WiMAX device
+ *
+ * @wimax_dev: WiMAX device descriptor
+ *
+ * Returns:
+ *
+ * %0 if ok and a warm reset was done (the device still exists in
+ * the system).
+ *
+ * -%ENODEV if a cold/bus reset had to be done (device has
+ * disconnected and reconnected, so current handle is not valid
+ * any more).
+ *
+ * -%EINVAL if the device is not even registered.
+ *
+ * Any other negative error code shall be considered as
+ * non-recoverable.
+ *
+ * Description:
+ *
+ * Called when wanting to reset the device for any reason. Device is
+ * taken back to power on status.
+ *
+ * This call blocks; on successful return, the device has completed the
+ * reset process and is ready to operate.
+ */
+int wimax_reset(struct wimax_dev *wimax_dev)
+{
+	int result = -EINVAL;
+	struct device *dev = wimax_dev_to_dev(wimax_dev);
+	enum wimax_st state;
+
+	might_sleep();
+	d_fnstart(3, dev, "(wimax_dev %p)\n", wimax_dev);
+	mutex_lock(&wimax_dev->mutex);
+	dev_hold(wimax_dev->net_dev);
+	state = wimax_dev->state;
+	mutex_unlock(&wimax_dev->mutex);
+
+	if (state >= WIMAX_ST_DOWN) {
+		mutex_lock(&wimax_dev->mutex_reset);
+		result = wimax_dev->op_reset(wimax_dev);
+		mutex_unlock(&wimax_dev->mutex_reset);
+	}
+	dev_put(wimax_dev->net_dev);
+
+	d_fnend(3, dev, "(wimax_dev %p) = %d\n", wimax_dev, result);
+	return result;
+}
+EXPORT_SYMBOL(wimax_reset);
+
+
+/*
+ * Exporting to user space over generic netlink
+ *
+ * Parse the reset command from user space, return error code.
+ *
+ * No attributes.
+ */
+int wimax_gnl_doit_reset(struct sk_buff *skb, struct genl_info *info)
+{
+	int result, ifindex;
+	struct wimax_dev *wimax_dev;
+
+	d_fnstart(3, NULL, "(skb %p info %p)\n", skb, info);
+	result = -ENODEV;
+	if (info->attrs[WIMAX_GNL_RESET_IFIDX] == NULL) {
+		pr_err("WIMAX_GNL_OP_RFKILL: can't find IFIDX attribute\n");
+		goto error_no_wimax_dev;
+	}
+	ifindex = nla_get_u32(info->attrs[WIMAX_GNL_RESET_IFIDX]);
+	wimax_dev = wimax_dev_get_by_genl_info(info, ifindex);
+	if (wimax_dev == NULL)
+		goto error_no_wimax_dev;
+	/* Execute the operation and send the result back to user space */
+	result = wimax_reset(wimax_dev);
+	dev_put(wimax_dev->net_dev);
+error_no_wimax_dev:
+	d_fnend(3, NULL, "(skb %p info %p) = %d\n", skb, info, result);
+	return result;
+}
diff --git a/drivers/staging/wimax/op-rfkill.c b/drivers/staging/wimax/op-rfkill.c
new file mode 100644
index 000000000000..78b294481a59
--- /dev/null
+++ b/drivers/staging/wimax/op-rfkill.c
@@ -0,0 +1,431 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Linux WiMAX
+ * RF-kill framework integration
+ *
+ * Copyright (C) 2008 Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * This integrates into the Linux Kernel rfkill susbystem so that the
+ * drivers just have to do the bare minimal work, which is providing a
+ * method to set the software RF-Kill switch and to report changes in
+ * the software and hardware switch status.
+ *
+ * A non-polled generic rfkill device is embedded into the WiMAX
+ * subsystem's representation of a device.
+ *
+ * FIXME: Need polled support? Let drivers provide a poll routine
+ *	  and hand it to rfkill ops then?
+ *
+ * All device drivers have to do is after wimax_dev_init(), call
+ * wimax_report_rfkill_hw() and wimax_report_rfkill_sw() to update
+ * initial state and then every time it changes. See wimax.h:struct
+ * wimax_dev for more information.
+ *
+ * ROADMAP
+ *
+ * wimax_gnl_doit_rfkill()      User space calling wimax_rfkill()
+ *   wimax_rfkill()             Kernel calling wimax_rfkill()
+ *     __wimax_rf_toggle_radio()
+ *
+ * wimax_rfkill_set_radio_block()  RF-Kill subsystem calling
+ *   __wimax_rf_toggle_radio()
+ *
+ * __wimax_rf_toggle_radio()
+ *   wimax_dev->op_rfkill_sw_toggle() Driver backend
+ *   __wimax_state_change()
+ *
+ * wimax_report_rfkill_sw()     Driver reports state change
+ *   __wimax_state_change()
+ *
+ * wimax_report_rfkill_hw()     Driver reports state change
+ *   __wimax_state_change()
+ *
+ * wimax_rfkill_add()           Initialize/shutdown rfkill support
+ * wimax_rfkill_rm()            [called by wimax_dev_add/rm()]
+ */
+
+#include "net-wimax.h"
+#include <net/genetlink.h>
+#include "linux-wimax.h"
+#include <linux/security.h>
+#include <linux/rfkill.h>
+#include <linux/export.h>
+#include "wimax-internal.h"
+
+#define D_SUBMODULE op_rfkill
+#include "debug-levels.h"
+
+/**
+ * wimax_report_rfkill_hw - Reports changes in the hardware RF switch
+ *
+ * @wimax_dev: WiMAX device descriptor
+ *
+ * @state: New state of the RF Kill switch. %WIMAX_RF_ON radio on,
+ *     %WIMAX_RF_OFF radio off.
+ *
+ * When the device detects a change in the state of thehardware RF
+ * switch, it must call this function to let the WiMAX kernel stack
+ * know that the state has changed so it can be properly propagated.
+ *
+ * The WiMAX stack caches the state (the driver doesn't need to). As
+ * well, as the change is propagated it will come back as a request to
+ * change the software state to mirror the hardware state.
+ *
+ * If the device doesn't have a hardware kill switch, just report
+ * it on initialization as always on (%WIMAX_RF_ON, radio on).
+ */
+void wimax_report_rfkill_hw(struct wimax_dev *wimax_dev,
+			    enum wimax_rf_state state)
+{
+	int result;
+	struct device *dev = wimax_dev_to_dev(wimax_dev);
+	enum wimax_st wimax_state;
+
+	d_fnstart(3, dev, "(wimax_dev %p state %u)\n", wimax_dev, state);
+	BUG_ON(state == WIMAX_RF_QUERY);
+	BUG_ON(state != WIMAX_RF_ON && state != WIMAX_RF_OFF);
+
+	mutex_lock(&wimax_dev->mutex);
+	result = wimax_dev_is_ready(wimax_dev);
+	if (result < 0)
+		goto error_not_ready;
+
+	if (state != wimax_dev->rf_hw) {
+		wimax_dev->rf_hw = state;
+		if (wimax_dev->rf_hw == WIMAX_RF_ON &&
+		    wimax_dev->rf_sw == WIMAX_RF_ON)
+			wimax_state = WIMAX_ST_READY;
+		else
+			wimax_state = WIMAX_ST_RADIO_OFF;
+
+		result = rfkill_set_hw_state(wimax_dev->rfkill,
+					     state == WIMAX_RF_OFF);
+
+		__wimax_state_change(wimax_dev, wimax_state);
+	}
+error_not_ready:
+	mutex_unlock(&wimax_dev->mutex);
+	d_fnend(3, dev, "(wimax_dev %p state %u) = void [%d]\n",
+		wimax_dev, state, result);
+}
+EXPORT_SYMBOL_GPL(wimax_report_rfkill_hw);
+
+
+/**
+ * wimax_report_rfkill_sw - Reports changes in the software RF switch
+ *
+ * @wimax_dev: WiMAX device descriptor
+ *
+ * @state: New state of the RF kill switch. %WIMAX_RF_ON radio on,
+ *     %WIMAX_RF_OFF radio off.
+ *
+ * Reports changes in the software RF switch state to the WiMAX stack.
+ *
+ * The main use is during initialization, so the driver can query the
+ * device for its current software radio kill switch state and feed it
+ * to the system.
+ *
+ * On the side, the device does not change the software state by
+ * itself. In practice, this can happen, as the device might decide to
+ * switch (in software) the radio off for different reasons.
+ */
+void wimax_report_rfkill_sw(struct wimax_dev *wimax_dev,
+			    enum wimax_rf_state state)
+{
+	int result;
+	struct device *dev = wimax_dev_to_dev(wimax_dev);
+	enum wimax_st wimax_state;
+
+	d_fnstart(3, dev, "(wimax_dev %p state %u)\n", wimax_dev, state);
+	BUG_ON(state == WIMAX_RF_QUERY);
+	BUG_ON(state != WIMAX_RF_ON && state != WIMAX_RF_OFF);
+
+	mutex_lock(&wimax_dev->mutex);
+	result = wimax_dev_is_ready(wimax_dev);
+	if (result < 0)
+		goto error_not_ready;
+
+	if (state != wimax_dev->rf_sw) {
+		wimax_dev->rf_sw = state;
+		if (wimax_dev->rf_hw == WIMAX_RF_ON &&
+		    wimax_dev->rf_sw == WIMAX_RF_ON)
+			wimax_state = WIMAX_ST_READY;
+		else
+			wimax_state = WIMAX_ST_RADIO_OFF;
+		__wimax_state_change(wimax_dev, wimax_state);
+		rfkill_set_sw_state(wimax_dev->rfkill, state == WIMAX_RF_OFF);
+	}
+error_not_ready:
+	mutex_unlock(&wimax_dev->mutex);
+	d_fnend(3, dev, "(wimax_dev %p state %u) = void [%d]\n",
+		wimax_dev, state, result);
+}
+EXPORT_SYMBOL_GPL(wimax_report_rfkill_sw);
+
+
+/*
+ * Callback for the RF Kill toggle operation
+ *
+ * This function is called by:
+ *
+ * - The rfkill subsystem when the RF-Kill key is pressed in the
+ *   hardware and the driver notifies through
+ *   wimax_report_rfkill_hw(). The rfkill subsystem ends up calling back
+ *   here so the software RF Kill switch state is changed to reflect
+ *   the hardware switch state.
+ *
+ * - When the user sets the state through sysfs' rfkill/state file
+ *
+ * - When the user calls wimax_rfkill().
+ *
+ * This call blocks!
+ *
+ * WARNING! When we call rfkill_unregister(), this will be called with
+ * state 0!
+ *
+ * WARNING: wimax_dev must be locked
+ */
+static
+int __wimax_rf_toggle_radio(struct wimax_dev *wimax_dev,
+			    enum wimax_rf_state state)
+{
+	int result = 0;
+	struct device *dev = wimax_dev_to_dev(wimax_dev);
+	enum wimax_st wimax_state;
+
+	might_sleep();
+	d_fnstart(3, dev, "(wimax_dev %p state %u)\n", wimax_dev, state);
+	if (wimax_dev->rf_sw == state)
+		goto out_no_change;
+	if (wimax_dev->op_rfkill_sw_toggle != NULL)
+		result = wimax_dev->op_rfkill_sw_toggle(wimax_dev, state);
+	else if (state == WIMAX_RF_OFF)	/* No op? can't turn off */
+		result = -ENXIO;
+	else				/* No op? can turn on */
+		result = 0;		/* should never happen tho */
+	if (result >= 0) {
+		result = 0;
+		wimax_dev->rf_sw = state;
+		wimax_state = state == WIMAX_RF_ON ?
+			WIMAX_ST_READY : WIMAX_ST_RADIO_OFF;
+		__wimax_state_change(wimax_dev, wimax_state);
+	}
+out_no_change:
+	d_fnend(3, dev, "(wimax_dev %p state %u) = %d\n",
+		wimax_dev, state, result);
+	return result;
+}
+
+
+/*
+ * Translate from rfkill state to wimax state
+ *
+ * NOTE: Special state handling rules here
+ *
+ *     Just pretend the call didn't happen if we are in a state where
+ *     we know for sure it cannot be handled (WIMAX_ST_DOWN or
+ *     __WIMAX_ST_QUIESCING). rfkill() needs it to register and
+ *     unregister, as it will run this path.
+ *
+ * NOTE: This call will block until the operation is completed.
+ */
+static int wimax_rfkill_set_radio_block(void *data, bool blocked)
+{
+	int result;
+	struct wimax_dev *wimax_dev = data;
+	struct device *dev = wimax_dev_to_dev(wimax_dev);
+	enum wimax_rf_state rf_state;
+
+	d_fnstart(3, dev, "(wimax_dev %p blocked %u)\n", wimax_dev, blocked);
+	rf_state = WIMAX_RF_ON;
+	if (blocked)
+		rf_state = WIMAX_RF_OFF;
+	mutex_lock(&wimax_dev->mutex);
+	if (wimax_dev->state <= __WIMAX_ST_QUIESCING)
+		result = 0;
+	else
+		result = __wimax_rf_toggle_radio(wimax_dev, rf_state);
+	mutex_unlock(&wimax_dev->mutex);
+	d_fnend(3, dev, "(wimax_dev %p blocked %u) = %d\n",
+		wimax_dev, blocked, result);
+	return result;
+}
+
+static const struct rfkill_ops wimax_rfkill_ops = {
+	.set_block = wimax_rfkill_set_radio_block,
+};
+
+/**
+ * wimax_rfkill - Set the software RF switch state for a WiMAX device
+ *
+ * @wimax_dev: WiMAX device descriptor
+ *
+ * @state: New RF state.
+ *
+ * Returns:
+ *
+ * >= 0 toggle state if ok, < 0 errno code on error. The toggle state
+ * is returned as a bitmap, bit 0 being the hardware RF state, bit 1
+ * the software RF state.
+ *
+ * 0 means disabled (%WIMAX_RF_ON, radio on), 1 means enabled radio
+ * off (%WIMAX_RF_OFF).
+ *
+ * Description:
+ *
+ * Called by the user when he wants to request the WiMAX radio to be
+ * switched on (%WIMAX_RF_ON) or off (%WIMAX_RF_OFF). With
+ * %WIMAX_RF_QUERY, just the current state is returned.
+ *
+ * NOTE:
+ *
+ * This call will block until the operation is complete.
+ */
+int wimax_rfkill(struct wimax_dev *wimax_dev, enum wimax_rf_state state)
+{
+	int result;
+	struct device *dev = wimax_dev_to_dev(wimax_dev);
+
+	d_fnstart(3, dev, "(wimax_dev %p state %u)\n", wimax_dev, state);
+	mutex_lock(&wimax_dev->mutex);
+	result = wimax_dev_is_ready(wimax_dev);
+	if (result < 0) {
+		/* While initializing, < 1.4.3 wimax-tools versions use
+		 * this call to check if the device is a valid WiMAX
+		 * device; so we allow it to proceed always,
+		 * considering the radios are all off. */
+		if (result == -ENOMEDIUM && state == WIMAX_RF_QUERY)
+			result = WIMAX_RF_OFF << 1 | WIMAX_RF_OFF;
+		goto error_not_ready;
+	}
+	switch (state) {
+	case WIMAX_RF_ON:
+	case WIMAX_RF_OFF:
+		result = __wimax_rf_toggle_radio(wimax_dev, state);
+		if (result < 0)
+			goto error;
+		rfkill_set_sw_state(wimax_dev->rfkill, state == WIMAX_RF_OFF);
+		break;
+	case WIMAX_RF_QUERY:
+		break;
+	default:
+		result = -EINVAL;
+		goto error;
+	}
+	result = wimax_dev->rf_sw << 1 | wimax_dev->rf_hw;
+error:
+error_not_ready:
+	mutex_unlock(&wimax_dev->mutex);
+	d_fnend(3, dev, "(wimax_dev %p state %u) = %d\n",
+		wimax_dev, state, result);
+	return result;
+}
+EXPORT_SYMBOL(wimax_rfkill);
+
+
+/*
+ * Register a new WiMAX device's RF Kill support
+ *
+ * WARNING: wimax_dev->mutex must be unlocked
+ */
+int wimax_rfkill_add(struct wimax_dev *wimax_dev)
+{
+	int result;
+	struct rfkill *rfkill;
+	struct device *dev = wimax_dev_to_dev(wimax_dev);
+
+	d_fnstart(3, dev, "(wimax_dev %p)\n", wimax_dev);
+	/* Initialize RF Kill */
+	result = -ENOMEM;
+	rfkill = rfkill_alloc(wimax_dev->name, dev, RFKILL_TYPE_WIMAX,
+			      &wimax_rfkill_ops, wimax_dev);
+	if (rfkill == NULL)
+		goto error_rfkill_allocate;
+
+	d_printf(1, dev, "rfkill %p\n", rfkill);
+
+	wimax_dev->rfkill = rfkill;
+
+	rfkill_init_sw_state(rfkill, 1);
+	result = rfkill_register(wimax_dev->rfkill);
+	if (result < 0)
+		goto error_rfkill_register;
+
+	/* If there is no SW toggle op, SW RFKill is always on */
+	if (wimax_dev->op_rfkill_sw_toggle == NULL)
+		wimax_dev->rf_sw = WIMAX_RF_ON;
+
+	d_fnend(3, dev, "(wimax_dev %p) = 0\n", wimax_dev);
+	return 0;
+
+error_rfkill_register:
+	rfkill_destroy(wimax_dev->rfkill);
+error_rfkill_allocate:
+	d_fnend(3, dev, "(wimax_dev %p) = %d\n", wimax_dev, result);
+	return result;
+}
+
+
+/*
+ * Deregister a WiMAX device's RF Kill support
+ *
+ * Ick, we can't call rfkill_free() after rfkill_unregister()...oh
+ * well.
+ *
+ * WARNING: wimax_dev->mutex must be unlocked
+ */
+void wimax_rfkill_rm(struct wimax_dev *wimax_dev)
+{
+	struct device *dev = wimax_dev_to_dev(wimax_dev);
+	d_fnstart(3, dev, "(wimax_dev %p)\n", wimax_dev);
+	rfkill_unregister(wimax_dev->rfkill);
+	rfkill_destroy(wimax_dev->rfkill);
+	d_fnend(3, dev, "(wimax_dev %p)\n", wimax_dev);
+}
+
+
+/*
+ * Exporting to user space over generic netlink
+ *
+ * Parse the rfkill command from user space, return a combination
+ * value that describe the states of the different toggles.
+ *
+ * Only one attribute: the new state requested (on, off or no change,
+ * just query).
+ */
+
+int wimax_gnl_doit_rfkill(struct sk_buff *skb, struct genl_info *info)
+{
+	int result, ifindex;
+	struct wimax_dev *wimax_dev;
+	struct device *dev;
+	enum wimax_rf_state new_state;
+
+	d_fnstart(3, NULL, "(skb %p info %p)\n", skb, info);
+	result = -ENODEV;
+	if (info->attrs[WIMAX_GNL_RFKILL_IFIDX] == NULL) {
+		pr_err("WIMAX_GNL_OP_RFKILL: can't find IFIDX attribute\n");
+		goto error_no_wimax_dev;
+	}
+	ifindex = nla_get_u32(info->attrs[WIMAX_GNL_RFKILL_IFIDX]);
+	wimax_dev = wimax_dev_get_by_genl_info(info, ifindex);
+	if (wimax_dev == NULL)
+		goto error_no_wimax_dev;
+	dev = wimax_dev_to_dev(wimax_dev);
+	result = -EINVAL;
+	if (info->attrs[WIMAX_GNL_RFKILL_STATE] == NULL) {
+		dev_err(dev, "WIMAX_GNL_RFKILL: can't find RFKILL_STATE "
+			"attribute\n");
+		goto error_no_pid;
+	}
+	new_state = nla_get_u32(info->attrs[WIMAX_GNL_RFKILL_STATE]);
+
+	/* Execute the operation and send the result back to user space */
+	result = wimax_rfkill(wimax_dev, new_state);
+error_no_pid:
+	dev_put(wimax_dev->net_dev);
+error_no_wimax_dev:
+	d_fnend(3, NULL, "(skb %p info %p) = %d\n", skb, info, result);
+	return result;
+}
diff --git a/drivers/staging/wimax/op-state-get.c b/drivers/staging/wimax/op-state-get.c
new file mode 100644
index 000000000000..c5bfbed505f5
--- /dev/null
+++ b/drivers/staging/wimax/op-state-get.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Linux WiMAX
+ * Implement and export a method for getting a WiMAX device current state
+ *
+ * Copyright (C) 2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * Based on previous WiMAX core work by:
+ *  Copyright (C) 2008 Intel Corporation <linux-wimax@intel.com>
+ *  Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ */
+
+#include "net-wimax.h"
+#include <net/genetlink.h>
+#include "linux-wimax.h"
+#include <linux/security.h>
+#include "wimax-internal.h"
+
+#define D_SUBMODULE op_state_get
+#include "debug-levels.h"
+
+
+/*
+ * Exporting to user space over generic netlink
+ *
+ * Parse the state get command from user space, return a combination
+ * value that describe the current state.
+ *
+ * No attributes.
+ */
+int wimax_gnl_doit_state_get(struct sk_buff *skb, struct genl_info *info)
+{
+	int result, ifindex;
+	struct wimax_dev *wimax_dev;
+
+	d_fnstart(3, NULL, "(skb %p info %p)\n", skb, info);
+	result = -ENODEV;
+	if (info->attrs[WIMAX_GNL_STGET_IFIDX] == NULL) {
+		pr_err("WIMAX_GNL_OP_STATE_GET: can't find IFIDX attribute\n");
+		goto error_no_wimax_dev;
+	}
+	ifindex = nla_get_u32(info->attrs[WIMAX_GNL_STGET_IFIDX]);
+	wimax_dev = wimax_dev_get_by_genl_info(info, ifindex);
+	if (wimax_dev == NULL)
+		goto error_no_wimax_dev;
+	/* Execute the operation and send the result back to user space */
+	result = wimax_state_get(wimax_dev);
+	dev_put(wimax_dev->net_dev);
+error_no_wimax_dev:
+	d_fnend(3, NULL, "(skb %p info %p) = %d\n", skb, info, result);
+	return result;
+}
diff --git a/drivers/staging/wimax/stack.c b/drivers/staging/wimax/stack.c
new file mode 100644
index 000000000000..ace24a6dfd2d
--- /dev/null
+++ b/drivers/staging/wimax/stack.c
@@ -0,0 +1,616 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Linux WiMAX
+ * Initialization, addition and removal of wimax devices
+ *
+ * Copyright (C) 2005-2006 Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * This implements:
+ *
+ *   - basic life cycle of 'struct wimax_dev' [wimax_dev_*()]; on
+ *     addition/registration initialize all subfields and allocate
+ *     generic netlink resources for user space communication. On
+ *     removal/unregistration, undo all that.
+ *
+ *   - device state machine [wimax_state_change()] and support to send
+ *     reports to user space when the state changes
+ *     [wimax_gnl_re_state_change*()].
+ *
+ * See include/net/wimax.h for rationales and design.
+ *
+ * ROADMAP
+ *
+ * [__]wimax_state_change()     Called by drivers to update device's state
+ *   wimax_gnl_re_state_change_alloc()
+ *   wimax_gnl_re_state_change_send()
+ *
+ * wimax_dev_init()	        Init a device
+ * wimax_dev_add()              Register
+ *   wimax_rfkill_add()
+ *   wimax_gnl_add()            Register all the generic netlink resources.
+ *   wimax_id_table_add()
+ * wimax_dev_rm()               Unregister
+ *   wimax_id_table_rm()
+ *   wimax_gnl_rm()
+ *   wimax_rfkill_rm()
+ */
+#include <linux/device.h>
+#include <linux/gfp.h>
+#include <net/genetlink.h>
+#include <linux/netdevice.h>
+#include "linux-wimax.h"
+#include <linux/module.h>
+#include "wimax-internal.h"
+
+
+#define D_SUBMODULE stack
+#include "debug-levels.h"
+
+static char wimax_debug_params[128];
+module_param_string(debug, wimax_debug_params, sizeof(wimax_debug_params),
+		    0644);
+MODULE_PARM_DESC(debug,
+		 "String of space-separated NAME:VALUE pairs, where NAMEs "
+		 "are the different debug submodules and VALUE are the "
+		 "initial debug value to set.");
+
+/*
+ * Authoritative source for the RE_STATE_CHANGE attribute policy
+ *
+ * We don't really use it here, but /me likes to keep the definition
+ * close to where the data is generated.
+ */
+/*
+static const struct nla_policy wimax_gnl_re_status_change[WIMAX_GNL_ATTR_MAX + 1] = {
+	[WIMAX_GNL_STCH_STATE_OLD] = { .type = NLA_U8 },
+	[WIMAX_GNL_STCH_STATE_NEW] = { .type = NLA_U8 },
+};
+*/
+
+
+/*
+ * Allocate a Report State Change message
+ *
+ * @header: save it, you need it for _send()
+ *
+ * Creates and fills a basic state change message; different code
+ * paths can then add more attributes to the message as needed.
+ *
+ * Use wimax_gnl_re_state_change_send() to send the returned skb.
+ *
+ * Returns: skb with the genl message if ok, IS_ERR() ptr on error
+ *     with an errno code.
+ */
+static
+struct sk_buff *wimax_gnl_re_state_change_alloc(
+	struct wimax_dev *wimax_dev,
+	enum wimax_st new_state, enum wimax_st old_state,
+	void **header)
+{
+	int result;
+	struct device *dev = wimax_dev_to_dev(wimax_dev);
+	void *data;
+	struct sk_buff *report_skb;
+
+	d_fnstart(3, dev, "(wimax_dev %p new_state %u old_state %u)\n",
+		  wimax_dev, new_state, old_state);
+	result = -ENOMEM;
+	report_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (report_skb == NULL) {
+		dev_err(dev, "RE_STCH: can't create message\n");
+		goto error_new;
+	}
+	/* FIXME: sending a group ID as the seq is wrong */
+	data = genlmsg_put(report_skb, 0, wimax_gnl_family.mcgrp_offset,
+			   &wimax_gnl_family, 0, WIMAX_GNL_RE_STATE_CHANGE);
+	if (data == NULL) {
+		dev_err(dev, "RE_STCH: can't put data into message\n");
+		goto error_put;
+	}
+	*header = data;
+
+	result = nla_put_u8(report_skb, WIMAX_GNL_STCH_STATE_OLD, old_state);
+	if (result < 0) {
+		dev_err(dev, "RE_STCH: Error adding OLD attr: %d\n", result);
+		goto error_put;
+	}
+	result = nla_put_u8(report_skb, WIMAX_GNL_STCH_STATE_NEW, new_state);
+	if (result < 0) {
+		dev_err(dev, "RE_STCH: Error adding NEW attr: %d\n", result);
+		goto error_put;
+	}
+	result = nla_put_u32(report_skb, WIMAX_GNL_STCH_IFIDX,
+			     wimax_dev->net_dev->ifindex);
+	if (result < 0) {
+		dev_err(dev, "RE_STCH: Error adding IFINDEX attribute\n");
+		goto error_put;
+	}
+	d_fnend(3, dev, "(wimax_dev %p new_state %u old_state %u) = %p\n",
+		wimax_dev, new_state, old_state, report_skb);
+	return report_skb;
+
+error_put:
+	nlmsg_free(report_skb);
+error_new:
+	d_fnend(3, dev, "(wimax_dev %p new_state %u old_state %u) = %d\n",
+		wimax_dev, new_state, old_state, result);
+	return ERR_PTR(result);
+}
+
+
+/*
+ * Send a Report State Change message (as created with _alloc).
+ *
+ * @report_skb: as returned by wimax_gnl_re_state_change_alloc()
+ * @header: as returned by wimax_gnl_re_state_change_alloc()
+ *
+ * Returns: 0 if ok, < 0 errno code on error.
+ *
+ * If the message is  NULL, pretend it didn't happen.
+ */
+static
+int wimax_gnl_re_state_change_send(
+	struct wimax_dev *wimax_dev, struct sk_buff *report_skb,
+	void *header)
+{
+	int result = 0;
+	struct device *dev = wimax_dev_to_dev(wimax_dev);
+	d_fnstart(3, dev, "(wimax_dev %p report_skb %p)\n",
+		  wimax_dev, report_skb);
+	if (report_skb == NULL) {
+		result = -ENOMEM;
+		goto out;
+	}
+	genlmsg_end(report_skb, header);
+	genlmsg_multicast(&wimax_gnl_family, report_skb, 0, 0, GFP_KERNEL);
+out:
+	d_fnend(3, dev, "(wimax_dev %p report_skb %p) = %d\n",
+		wimax_dev, report_skb, result);
+	return result;
+}
+
+
+static
+void __check_new_state(enum wimax_st old_state, enum wimax_st new_state,
+		       unsigned int allowed_states_bm)
+{
+	if (WARN_ON(((1 << new_state) & allowed_states_bm) == 0)) {
+		pr_err("SW BUG! Forbidden state change %u -> %u\n",
+		       old_state, new_state);
+	}
+}
+
+
+/*
+ * Set the current state of a WiMAX device [unlocking version of
+ * wimax_state_change().
+ */
+void __wimax_state_change(struct wimax_dev *wimax_dev, enum wimax_st new_state)
+{
+	struct device *dev = wimax_dev_to_dev(wimax_dev);
+	enum wimax_st old_state = wimax_dev->state;
+	struct sk_buff *stch_skb;
+	void *header;
+
+	d_fnstart(3, dev, "(wimax_dev %p new_state %u [old %u])\n",
+		  wimax_dev, new_state, old_state);
+
+	if (WARN_ON(new_state >= __WIMAX_ST_INVALID)) {
+		dev_err(dev, "SW BUG: requesting invalid state %u\n",
+			new_state);
+		goto out;
+	}
+	if (old_state == new_state)
+		goto out;
+	header = NULL;	/* gcc complains? can't grok why */
+	stch_skb = wimax_gnl_re_state_change_alloc(
+		wimax_dev, new_state, old_state, &header);
+
+	/* Verify the state transition and do exit-from-state actions */
+	switch (old_state) {
+	case __WIMAX_ST_NULL:
+		__check_new_state(old_state, new_state,
+				  1 << WIMAX_ST_DOWN);
+		break;
+	case WIMAX_ST_DOWN:
+		__check_new_state(old_state, new_state,
+				  1 << __WIMAX_ST_QUIESCING
+				  | 1 << WIMAX_ST_UNINITIALIZED
+				  | 1 << WIMAX_ST_RADIO_OFF);
+		break;
+	case __WIMAX_ST_QUIESCING:
+		__check_new_state(old_state, new_state, 1 << WIMAX_ST_DOWN);
+		break;
+	case WIMAX_ST_UNINITIALIZED:
+		__check_new_state(old_state, new_state,
+				  1 << __WIMAX_ST_QUIESCING
+				  | 1 << WIMAX_ST_RADIO_OFF);
+		break;
+	case WIMAX_ST_RADIO_OFF:
+		__check_new_state(old_state, new_state,
+				  1 << __WIMAX_ST_QUIESCING
+				  | 1 << WIMAX_ST_READY);
+		break;
+	case WIMAX_ST_READY:
+		__check_new_state(old_state, new_state,
+				  1 << __WIMAX_ST_QUIESCING
+				  | 1 << WIMAX_ST_RADIO_OFF
+				  | 1 << WIMAX_ST_SCANNING
+				  | 1 << WIMAX_ST_CONNECTING
+				  | 1 << WIMAX_ST_CONNECTED);
+		break;
+	case WIMAX_ST_SCANNING:
+		__check_new_state(old_state, new_state,
+				  1 << __WIMAX_ST_QUIESCING
+				  | 1 << WIMAX_ST_RADIO_OFF
+				  | 1 << WIMAX_ST_READY
+				  | 1 << WIMAX_ST_CONNECTING
+				  | 1 << WIMAX_ST_CONNECTED);
+		break;
+	case WIMAX_ST_CONNECTING:
+		__check_new_state(old_state, new_state,
+				  1 << __WIMAX_ST_QUIESCING
+				  | 1 << WIMAX_ST_RADIO_OFF
+				  | 1 << WIMAX_ST_READY
+				  | 1 << WIMAX_ST_SCANNING
+				  | 1 << WIMAX_ST_CONNECTED);
+		break;
+	case WIMAX_ST_CONNECTED:
+		__check_new_state(old_state, new_state,
+				  1 << __WIMAX_ST_QUIESCING
+				  | 1 << WIMAX_ST_RADIO_OFF
+				  | 1 << WIMAX_ST_READY);
+		netif_tx_disable(wimax_dev->net_dev);
+		netif_carrier_off(wimax_dev->net_dev);
+		break;
+	case __WIMAX_ST_INVALID:
+	default:
+		dev_err(dev, "SW BUG: wimax_dev %p is in unknown state %u\n",
+			wimax_dev, wimax_dev->state);
+		WARN_ON(1);
+		goto out;
+	}
+
+	/* Execute the actions of entry to the new state */
+	switch (new_state) {
+	case __WIMAX_ST_NULL:
+		dev_err(dev, "SW BUG: wimax_dev %p entering NULL state "
+			"from %u\n", wimax_dev, wimax_dev->state);
+		WARN_ON(1);		/* Nobody can enter this state */
+		break;
+	case WIMAX_ST_DOWN:
+		break;
+	case __WIMAX_ST_QUIESCING:
+		break;
+	case WIMAX_ST_UNINITIALIZED:
+		break;
+	case WIMAX_ST_RADIO_OFF:
+		break;
+	case WIMAX_ST_READY:
+		break;
+	case WIMAX_ST_SCANNING:
+		break;
+	case WIMAX_ST_CONNECTING:
+		break;
+	case WIMAX_ST_CONNECTED:
+		netif_carrier_on(wimax_dev->net_dev);
+		netif_wake_queue(wimax_dev->net_dev);
+		break;
+	case __WIMAX_ST_INVALID:
+	default:
+		BUG();
+	}
+	__wimax_state_set(wimax_dev, new_state);
+	if (!IS_ERR(stch_skb))
+		wimax_gnl_re_state_change_send(wimax_dev, stch_skb, header);
+out:
+	d_fnend(3, dev, "(wimax_dev %p new_state %u [old %u]) = void\n",
+		wimax_dev, new_state, old_state);
+}
+
+
+/**
+ * wimax_state_change - Set the current state of a WiMAX device
+ *
+ * @wimax_dev: WiMAX device descriptor (properly referenced)
+ * @new_state: New state to switch to
+ *
+ * This implements the state changes for the wimax devices. It will
+ *
+ * - verify that the state transition is legal (for now it'll just
+ *   print a warning if not) according to the table in
+ *   linux/wimax.h's documentation for 'enum wimax_st'.
+ *
+ * - perform the actions needed for leaving the current state and
+ *   whichever are needed for entering the new state.
+ *
+ * - issue a report to user space indicating the new state (and an
+ *   optional payload with information about the new state).
+ *
+ * NOTE: @wimax_dev must be locked
+ */
+void wimax_state_change(struct wimax_dev *wimax_dev, enum wimax_st new_state)
+{
+	/*
+	 * A driver cannot take the wimax_dev out of the
+	 * __WIMAX_ST_NULL state unless by calling wimax_dev_add(). If
+	 * the wimax_dev's state is still NULL, we ignore any request
+	 * to change its state because it means it hasn't been yet
+	 * registered.
+	 *
+	 * There is no need to complain about it, as routines that
+	 * call this might be shared from different code paths that
+	 * are called before or after wimax_dev_add() has done its
+	 * job.
+	 */
+	mutex_lock(&wimax_dev->mutex);
+	if (wimax_dev->state > __WIMAX_ST_NULL)
+		__wimax_state_change(wimax_dev, new_state);
+	mutex_unlock(&wimax_dev->mutex);
+}
+EXPORT_SYMBOL_GPL(wimax_state_change);
+
+
+/**
+ * wimax_state_get() - Return the current state of a WiMAX device
+ *
+ * @wimax_dev: WiMAX device descriptor
+ *
+ * Returns: Current state of the device according to its driver.
+ */
+enum wimax_st wimax_state_get(struct wimax_dev *wimax_dev)
+{
+	enum wimax_st state;
+	mutex_lock(&wimax_dev->mutex);
+	state = wimax_dev->state;
+	mutex_unlock(&wimax_dev->mutex);
+	return state;
+}
+EXPORT_SYMBOL_GPL(wimax_state_get);
+
+
+/**
+ * wimax_dev_init - initialize a newly allocated instance
+ *
+ * @wimax_dev: WiMAX device descriptor to initialize.
+ *
+ * Initializes fields of a freshly allocated @wimax_dev instance. This
+ * function assumes that after allocation, the memory occupied by
+ * @wimax_dev was zeroed.
+ */
+void wimax_dev_init(struct wimax_dev *wimax_dev)
+{
+	INIT_LIST_HEAD(&wimax_dev->id_table_node);
+	__wimax_state_set(wimax_dev, __WIMAX_ST_NULL);
+	mutex_init(&wimax_dev->mutex);
+	mutex_init(&wimax_dev->mutex_reset);
+}
+EXPORT_SYMBOL_GPL(wimax_dev_init);
+
+/*
+ * There are multiple enums reusing the same values, adding
+ * others is only possible if they use a compatible policy.
+ */
+static const struct nla_policy wimax_gnl_policy[WIMAX_GNL_ATTR_MAX + 1] = {
+	/*
+	 * WIMAX_GNL_RESET_IFIDX, WIMAX_GNL_RFKILL_IFIDX,
+	 * WIMAX_GNL_STGET_IFIDX, WIMAX_GNL_MSG_IFIDX
+	 */
+	[1] = { .type = NLA_U32, },
+	/*
+	 * WIMAX_GNL_RFKILL_STATE, WIMAX_GNL_MSG_PIPE_NAME
+	 */
+	[2] = { .type = NLA_U32, }, /* enum wimax_rf_state */
+	/*
+	 * WIMAX_GNL_MSG_DATA
+	 */
+	[3] = { .type = NLA_UNSPEC, }, /* libnl doesn't grok BINARY yet */
+};
+
+static const struct genl_small_ops wimax_gnl_ops[] = {
+	{
+		.cmd = WIMAX_GNL_OP_MSG_FROM_USER,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags = GENL_ADMIN_PERM,
+		.doit = wimax_gnl_doit_msg_from_user,
+	},
+	{
+		.cmd = WIMAX_GNL_OP_RESET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags = GENL_ADMIN_PERM,
+		.doit = wimax_gnl_doit_reset,
+	},
+	{
+		.cmd = WIMAX_GNL_OP_RFKILL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags = GENL_ADMIN_PERM,
+		.doit = wimax_gnl_doit_rfkill,
+	},
+	{
+		.cmd = WIMAX_GNL_OP_STATE_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.flags = GENL_ADMIN_PERM,
+		.doit = wimax_gnl_doit_state_get,
+	},
+};
+
+
+static
+size_t wimax_addr_scnprint(char *addr_str, size_t addr_str_size,
+			   unsigned char *addr, size_t addr_len)
+{
+	unsigned int cnt, total;
+
+	for (total = cnt = 0; cnt < addr_len; cnt++)
+		total += scnprintf(addr_str + total, addr_str_size - total,
+				   "%02x%c", addr[cnt],
+				   cnt == addr_len - 1 ? '\0' : ':');
+	return total;
+}
+
+
+/**
+ * wimax_dev_add - Register a new WiMAX device
+ *
+ * @wimax_dev: WiMAX device descriptor (as embedded in your @net_dev's
+ *     priv data). You must have called wimax_dev_init() on it before.
+ *
+ * @net_dev: net device the @wimax_dev is associated with. The
+ *     function expects SET_NETDEV_DEV() and register_netdev() were
+ *     already called on it.
+ *
+ * Registers the new WiMAX device, sets up the user-kernel control
+ * interface (generic netlink) and common WiMAX infrastructure.
+ *
+ * Note that the parts that will allow interaction with user space are
+ * setup at the very end, when the rest is in place, as once that
+ * happens, the driver might get user space control requests via
+ * netlink or from debugfs that might translate into calls into
+ * wimax_dev->op_*().
+ */
+int wimax_dev_add(struct wimax_dev *wimax_dev, struct net_device *net_dev)
+{
+	int result;
+	struct device *dev = net_dev->dev.parent;
+	char addr_str[32];
+
+	d_fnstart(3, dev, "(wimax_dev %p net_dev %p)\n", wimax_dev, net_dev);
+
+	/* Do the RFKILL setup before locking, as RFKILL will call
+	 * into our functions.
+	 */
+	wimax_dev->net_dev = net_dev;
+	result = wimax_rfkill_add(wimax_dev);
+	if (result < 0)
+		goto error_rfkill_add;
+
+	/* Set up user-space interaction */
+	mutex_lock(&wimax_dev->mutex);
+	wimax_id_table_add(wimax_dev);
+	wimax_debugfs_add(wimax_dev);
+
+	__wimax_state_set(wimax_dev, WIMAX_ST_DOWN);
+	mutex_unlock(&wimax_dev->mutex);
+
+	wimax_addr_scnprint(addr_str, sizeof(addr_str),
+			    net_dev->dev_addr, net_dev->addr_len);
+	dev_err(dev, "WiMAX interface %s (%s) ready\n",
+		net_dev->name, addr_str);
+	d_fnend(3, dev, "(wimax_dev %p net_dev %p) = 0\n", wimax_dev, net_dev);
+	return 0;
+
+error_rfkill_add:
+	d_fnend(3, dev, "(wimax_dev %p net_dev %p) = %d\n",
+		wimax_dev, net_dev, result);
+	return result;
+}
+EXPORT_SYMBOL_GPL(wimax_dev_add);
+
+
+/**
+ * wimax_dev_rm - Unregister an existing WiMAX device
+ *
+ * @wimax_dev: WiMAX device descriptor
+ *
+ * Unregisters a WiMAX device previously registered for use with
+ * wimax_add_rm().
+ *
+ * IMPORTANT! Must call before calling unregister_netdev().
+ *
+ * After this function returns, you will not get any more user space
+ * control requests (via netlink or debugfs) and thus to wimax_dev->ops.
+ *
+ * Reentrancy control is ensured by setting the state to
+ * %__WIMAX_ST_QUIESCING. rfkill operations coming through
+ * wimax_*rfkill*() will be stopped by the quiescing state; ops coming
+ * from the rfkill subsystem will be stopped by the support being
+ * removed by wimax_rfkill_rm().
+ */
+void wimax_dev_rm(struct wimax_dev *wimax_dev)
+{
+	d_fnstart(3, NULL, "(wimax_dev %p)\n", wimax_dev);
+
+	mutex_lock(&wimax_dev->mutex);
+	__wimax_state_change(wimax_dev, __WIMAX_ST_QUIESCING);
+	wimax_debugfs_rm(wimax_dev);
+	wimax_id_table_rm(wimax_dev);
+	__wimax_state_change(wimax_dev, WIMAX_ST_DOWN);
+	mutex_unlock(&wimax_dev->mutex);
+	wimax_rfkill_rm(wimax_dev);
+	d_fnend(3, NULL, "(wimax_dev %p) = void\n", wimax_dev);
+}
+EXPORT_SYMBOL_GPL(wimax_dev_rm);
+
+
+/* Debug framework control of debug levels */
+struct d_level D_LEVEL[] = {
+	D_SUBMODULE_DEFINE(debugfs),
+	D_SUBMODULE_DEFINE(id_table),
+	D_SUBMODULE_DEFINE(op_msg),
+	D_SUBMODULE_DEFINE(op_reset),
+	D_SUBMODULE_DEFINE(op_rfkill),
+	D_SUBMODULE_DEFINE(op_state_get),
+	D_SUBMODULE_DEFINE(stack),
+};
+size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL);
+
+
+static const struct genl_multicast_group wimax_gnl_mcgrps[] = {
+	{ .name = "msg", },
+};
+
+struct genl_family wimax_gnl_family __ro_after_init = {
+	.name = "WiMAX",
+	.version = WIMAX_GNL_VERSION,
+	.hdrsize = 0,
+	.maxattr = WIMAX_GNL_ATTR_MAX,
+	.policy = wimax_gnl_policy,
+	.module = THIS_MODULE,
+	.small_ops = wimax_gnl_ops,
+	.n_small_ops = ARRAY_SIZE(wimax_gnl_ops),
+	.mcgrps = wimax_gnl_mcgrps,
+	.n_mcgrps = ARRAY_SIZE(wimax_gnl_mcgrps),
+};
+
+
+
+/* Shutdown the wimax stack */
+static
+int __init wimax_subsys_init(void)
+{
+	int result;
+
+	d_fnstart(4, NULL, "()\n");
+	d_parse_params(D_LEVEL, D_LEVEL_SIZE, wimax_debug_params,
+		       "wimax.debug");
+
+	result = genl_register_family(&wimax_gnl_family);
+	if (unlikely(result < 0)) {
+		pr_err("cannot register generic netlink family: %d\n", result);
+		goto error_register_family;
+	}
+
+	d_fnend(4, NULL, "() = 0\n");
+	return 0;
+
+error_register_family:
+	d_fnend(4, NULL, "() = %d\n", result);
+	return result;
+
+}
+module_init(wimax_subsys_init);
+
+
+/* Shutdown the wimax stack */
+static
+void __exit wimax_subsys_exit(void)
+{
+	wimax_id_table_release();
+	genl_unregister_family(&wimax_gnl_family);
+}
+module_exit(wimax_subsys_exit);
+
+MODULE_AUTHOR("Intel Corporation <linux-wimax@intel.com>");
+MODULE_DESCRIPTION("Linux WiMAX stack");
+MODULE_LICENSE("GPL");
diff --git a/drivers/staging/wimax/wimax-internal.h b/drivers/staging/wimax/wimax-internal.h
new file mode 100644
index 000000000000..a6b6990642a1
--- /dev/null
+++ b/drivers/staging/wimax/wimax-internal.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Linux WiMAX
+ * Internal API for kernel space WiMAX stack
+ *
+ * Copyright (C) 2007 Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * This header file is for declarations and definitions internal to
+ * the WiMAX stack. For public APIs and documentation, see
+ * include/net/wimax.h and include/linux/wimax.h.
+ */
+
+#ifndef __WIMAX_INTERNAL_H__
+#define __WIMAX_INTERNAL_H__
+#ifdef __KERNEL__
+
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/device.h>
+#include "net-wimax.h"
+
+
+/*
+ * Decide if a (locked) device is ready for use
+ *
+ * Before using the device structure, it must be locked
+ * (wimax_dev->mutex). As well, most operations need to call this
+ * function to check if the state is the right one.
+ *
+ * An error value will be returned if the state is not the right
+ * one. In that case, the caller should not attempt to use the device
+ * and just unlock it.
+ */
+static inline __must_check
+int wimax_dev_is_ready(struct wimax_dev *wimax_dev)
+{
+	if (wimax_dev->state == __WIMAX_ST_NULL)
+		return -EINVAL;	/* Device is not even registered! */
+	if (wimax_dev->state == WIMAX_ST_DOWN)
+		return -ENOMEDIUM;
+	if (wimax_dev->state == __WIMAX_ST_QUIESCING)
+		return -ESHUTDOWN;
+	return 0;
+}
+
+
+static inline
+void __wimax_state_set(struct wimax_dev *wimax_dev, enum wimax_st state)
+{
+	wimax_dev->state = state;
+}
+void __wimax_state_change(struct wimax_dev *, enum wimax_st);
+
+#ifdef CONFIG_DEBUG_FS
+void wimax_debugfs_add(struct wimax_dev *);
+void wimax_debugfs_rm(struct wimax_dev *);
+#else
+static inline void wimax_debugfs_add(struct wimax_dev *wimax_dev) {}
+static inline void wimax_debugfs_rm(struct wimax_dev *wimax_dev) {}
+#endif
+
+void wimax_id_table_add(struct wimax_dev *);
+struct wimax_dev *wimax_dev_get_by_genl_info(struct genl_info *, int);
+void wimax_id_table_rm(struct wimax_dev *);
+void wimax_id_table_release(void);
+
+int wimax_rfkill_add(struct wimax_dev *);
+void wimax_rfkill_rm(struct wimax_dev *);
+
+/* generic netlink */
+extern struct genl_family wimax_gnl_family;
+
+/* ops */
+int wimax_gnl_doit_msg_from_user(struct sk_buff *skb, struct genl_info *info);
+int wimax_gnl_doit_reset(struct sk_buff *skb, struct genl_info *info);
+int wimax_gnl_doit_rfkill(struct sk_buff *skb, struct genl_info *info);
+int wimax_gnl_doit_state_get(struct sk_buff *skb, struct genl_info *info);
+
+#endif /* #ifdef __KERNEL__ */
+#endif /* #ifndef __WIMAX_INTERNAL_H__ */
diff --git a/include/linux/wimax/debug.h b/include/linux/wimax/debug.h
deleted file mode 100644
index cdae052bcdcd..000000000000
--- a/include/linux/wimax/debug.h
+++ /dev/null
@@ -1,491 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Linux WiMAX
- * Collection of tools to manage debug operations.
- *
- * Copyright (C) 2005-2007 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * Don't #include this file directly, read on!
- *
- * EXECUTING DEBUGGING ACTIONS OR NOT
- *
- * The main thing this framework provides is decission power to take a
- * debug action (like printing a message) if the current debug level
- * allows it.
- *
- * The decission power is at two levels: at compile-time (what does
- * not make it is compiled out) and at run-time. The run-time
- * selection is done per-submodule (as they are declared by the user
- * of the framework).
- *
- * A call to d_test(L) (L being the target debug level) returns true
- * if the action should be taken because the current debug levels
- * allow it (both compile and run time).
- *
- * It follows that a call to d_test() that can be determined to be
- * always false at compile time will get the code depending on it
- * compiled out by optimization.
- *
- * DEBUG LEVELS
- *
- * It is up to the caller to define how much a debugging level is.
- *
- * Convention sets 0 as "no debug" (so an action marked as debug level 0
- * will always be taken). The increasing debug levels are used for
- * increased verbosity.
- *
- * USAGE
- *
- * Group the code in modules and submodules inside each module [which
- * in most cases maps to Linux modules and .c files that compose
- * those].
- *
- * For each module, there is:
- *
- *  - a MODULENAME (single word, legal C identifier)
- *
- *  - a debug-levels.h header file that declares the list of
- *    submodules and that is included by all .c files that use
- *    the debugging tools. The file name can be anything.
- *
- *  - some (optional) .c code to manipulate the runtime debug levels
- *    through debugfs.
- *
- * The debug-levels.h file would look like:
- *
- *     #ifndef __debug_levels__h__
- *     #define __debug_levels__h__
- *
- *     #define D_MODULENAME modulename
- *     #define D_MASTER 10
- *
- *     #include <linux/wimax/debug.h>
- *
- *     enum d_module {
- *             D_SUBMODULE_DECLARE(submodule_1),
- *             D_SUBMODULE_DECLARE(submodule_2),
- *             ...
- *             D_SUBMODULE_DECLARE(submodule_N)
- *     };
- *
- *     #endif
- *
- * D_MASTER is the maximum compile-time debug level; any debug actions
- * above this will be out. D_MODULENAME is the module name (legal C
- * identifier), which has to be unique for each module (to avoid
- * namespace collisions during linkage). Note those #defines need to
- * be done before #including debug.h
- *
- * We declare N different submodules whose debug level can be
- * independently controlled during runtime.
- *
- * In a .c file of the module (and only in one of them), define the
- * following code:
- *
- *     struct d_level D_LEVEL[] = {
- *             D_SUBMODULE_DEFINE(submodule_1),
- *             D_SUBMODULE_DEFINE(submodule_2),
- *             ...
- *             D_SUBMODULE_DEFINE(submodule_N),
- *     };
- *     size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL);
- *
- * Externs for d_level_MODULENAME and d_level_size_MODULENAME are used
- * and declared in this file using the D_LEVEL and D_LEVEL_SIZE macros
- * #defined also in this file.
- *
- * To manipulate from user space the levels, create a debugfs dentry
- * and then register each submodule with:
- *
- *     d_level_register_debugfs("PREFIX_", submodule_X, parent);
- *
- * Where PREFIX_ is a name of your chosing. This will create debugfs
- * file with a single numeric value that can be use to tweak it. To
- * remove the entires, just use debugfs_remove_recursive() on 'parent'.
- *
- * NOTE: remember that even if this will show attached to some
- *     particular instance of a device, the settings are *global*.
- *
- * On each submodule (for example, .c files), the debug infrastructure
- * should be included like this:
- *
- *     #define D_SUBMODULE submodule_x     // matches one in debug-levels.h
- *     #include "debug-levels.h"
- *
- * after #including all your include files.
- *
- * Now you can use the d_*() macros below [d_test(), d_fnstart(),
- * d_fnend(), d_printf(), d_dump()].
- *
- * If their debug level is greater than D_MASTER, they will be
- * compiled out.
- *
- * If their debug level is lower or equal than D_MASTER but greater
- * than the current debug level of their submodule, they'll be
- * ignored.
- *
- * Otherwise, the action will be performed.
- */
-#ifndef __debug__h__
-#define __debug__h__
-
-#include <linux/types.h>
-#include <linux/slab.h>
-
-struct device;
-
-/* Backend stuff */
-
-/*
- * Debug backend: generate a message header from a 'struct device'
- *
- * @head: buffer where to place the header
- * @head_size: length of @head
- * @dev: pointer to device used to generate a header from. If NULL,
- *     an empty ("") header is generated.
- */
-static inline
-void __d_head(char *head, size_t head_size,
-	      struct device *dev)
-{
-	if (dev == NULL)
-		head[0] = 0;
-	else if ((unsigned long)dev < 4096) {
-		printk(KERN_ERR "E: Corrupt dev %p\n", dev);
-		WARN_ON(1);
-	} else
-		snprintf(head, head_size, "%s %s: ",
-			 dev_driver_string(dev), dev_name(dev));
-}
-
-
-/*
- * Debug backend: log some message if debugging is enabled
- *
- * @l: intended debug level
- * @tag: tag to prefix the message with
- * @dev: 'struct device' associated to this message
- * @f: printf-like format and arguments
- *
- * Note this is optimized out if it doesn't pass the compile-time
- * check; however, it is *always* compiled. This is useful to make
- * sure the printf-like formats and variables are always checked and
- * they don't get bit rot if you have all the debugging disabled.
- */
-#define _d_printf(l, tag, dev, f, a...)					\
-do {									\
-	char head[64];							\
-	if (!d_test(l))							\
-		break;							\
-	__d_head(head, sizeof(head), dev);				\
-	printk(KERN_ERR "%s%s%s: " f, head, __func__, tag, ##a);	\
-} while (0)
-
-
-/*
- * CPP syntactic sugar to generate A_B like symbol names when one of
- * the arguments is a preprocessor #define.
- */
-#define __D_PASTE__(varname, modulename) varname##_##modulename
-#define __D_PASTE(varname, modulename) (__D_PASTE__(varname, modulename))
-#define _D_SUBMODULE_INDEX(_name) (D_SUBMODULE_DECLARE(_name))
-
-
-/*
- * Store a submodule's runtime debug level and name
- */
-struct d_level {
-	u8 level;
-	const char *name;
-};
-
-
-/*
- * List of available submodules and their debug levels
- *
- * We call them d_level_MODULENAME and d_level_size_MODULENAME; the
- * macros D_LEVEL and D_LEVEL_SIZE contain the name already for
- * convenience.
- *
- * This array and the size are defined on some .c file that is part of
- * the current module.
- */
-#define D_LEVEL __D_PASTE(d_level, D_MODULENAME)
-#define D_LEVEL_SIZE __D_PASTE(d_level_size, D_MODULENAME)
-
-extern struct d_level D_LEVEL[];
-extern size_t D_LEVEL_SIZE;
-
-
-/*
- * Frontend stuff
- *
- *
- * Stuff you need to declare prior to using the actual "debug" actions
- * (defined below).
- */
-
-#ifndef D_MODULENAME
-#error D_MODULENAME is not defined in your debug-levels.h file
-/**
- * D_MODULE - Name of the current module
- *
- * #define in your module's debug-levels.h, making sure it is
- * unique. This has to be a legal C identifier.
- */
-#define D_MODULENAME undefined_modulename
-#endif
-
-
-#ifndef D_MASTER
-#warning D_MASTER not defined, but debug.h included! [see docs]
-/**
- * D_MASTER - Compile time maximum debug level
- *
- * #define in your debug-levels.h file to the maximum debug level the
- * runtime code will be allowed to have. This allows you to provide a
- * main knob.
- *
- * Anything above that level will be optimized out of the compile.
- *
- * Defaults to zero (no debug code compiled in).
- *
- * Maximum one definition per module (at the debug-levels.h file).
- */
-#define D_MASTER 0
-#endif
-
-#ifndef D_SUBMODULE
-#error D_SUBMODULE not defined, but debug.h included! [see docs]
-/**
- * D_SUBMODULE - Name of the current submodule
- *
- * #define in your submodule .c file before #including debug-levels.h
- * to the name of the current submodule as previously declared and
- * defined with D_SUBMODULE_DECLARE() (in your module's
- * debug-levels.h) and D_SUBMODULE_DEFINE().
- *
- * This is used to provide runtime-control over the debug levels.
- *
- * Maximum one per .c file! Can be shared among different .c files
- * (meaning they belong to the same submodule categorization).
- */
-#define D_SUBMODULE undefined_module
-#endif
-
-
-/**
- * D_SUBMODULE_DECLARE - Declare a submodule for runtime debug level control
- *
- * @_name: name of the submodule, restricted to the chars that make up a
- *     valid C identifier ([a-zA-Z0-9_]).
- *
- * Declare in the module's debug-levels.h header file as:
- *
- * enum d_module {
- *         D_SUBMODULE_DECLARE(submodule_1),
- *         D_SUBMODULE_DECLARE(submodule_2),
- *         D_SUBMODULE_DECLARE(submodule_3),
- * };
- *
- * Some corresponding .c file needs to have a matching
- * D_SUBMODULE_DEFINE().
- */
-#define D_SUBMODULE_DECLARE(_name) __D_SUBMODULE_##_name
-
-
-/**
- * D_SUBMODULE_DEFINE - Define a submodule for runtime debug level control
- *
- * @_name: name of the submodule, restricted to the chars that make up a
- *     valid C identifier ([a-zA-Z0-9_]).
- *
- * Use once per module (in some .c file) as:
- *
- * static
- * struct d_level d_level_SUBMODULENAME[] = {
- *         D_SUBMODULE_DEFINE(submodule_1),
- *         D_SUBMODULE_DEFINE(submodule_2),
- *         D_SUBMODULE_DEFINE(submodule_3),
- * };
- * size_t d_level_size_SUBDMODULENAME = ARRAY_SIZE(d_level_SUBDMODULENAME);
- *
- * Matching D_SUBMODULE_DECLARE()s have to be present in a
- * debug-levels.h header file.
- */
-#define D_SUBMODULE_DEFINE(_name)		\
-[__D_SUBMODULE_##_name] = {			\
-	.level = 0,				\
-	.name = #_name				\
-}
-
-
-
-/* The actual "debug" operations */
-
-
-/**
- * d_test - Returns true if debugging should be enabled
- *
- * @l: intended debug level (unsigned)
- *
- * If the master debug switch is enabled and the current settings are
- * higher or equal to the requested level, then debugging
- * output/actions should be enabled.
- *
- * NOTE:
- *
- * This needs to be coded so that it can be evaluated in compile
- * time; this is why the ugly BUG_ON() is placed in there, so the
- * D_MASTER evaluation compiles all out if it is compile-time false.
- */
-#define d_test(l)							\
-({									\
-	unsigned __l = l;	/* type enforcer */			\
-	(D_MASTER) >= __l						\
-	&& ({								\
-		BUG_ON(_D_SUBMODULE_INDEX(D_SUBMODULE) >= D_LEVEL_SIZE);\
-		D_LEVEL[_D_SUBMODULE_INDEX(D_SUBMODULE)].level >= __l;	\
-	});								\
-})
-
-
-/**
- * d_fnstart - log message at function start if debugging enabled
- *
- * @l: intended debug level
- * @_dev: 'struct device' pointer, NULL if none (for context)
- * @f: printf-like format and arguments
- */
-#define d_fnstart(l, _dev, f, a...) _d_printf(l, " FNSTART", _dev, f, ## a)
-
-
-/**
- * d_fnend - log message at function end if debugging enabled
- *
- * @l: intended debug level
- * @_dev: 'struct device' pointer, NULL if none (for context)
- * @f: printf-like format and arguments
- */
-#define d_fnend(l, _dev, f, a...) _d_printf(l, " FNEND", _dev, f, ## a)
-
-
-/**
- * d_printf - log message if debugging enabled
- *
- * @l: intended debug level
- * @_dev: 'struct device' pointer, NULL if none (for context)
- * @f: printf-like format and arguments
- */
-#define d_printf(l, _dev, f, a...) _d_printf(l, "", _dev, f, ## a)
-
-
-/**
- * d_dump - log buffer hex dump if debugging enabled
- *
- * @l: intended debug level
- * @_dev: 'struct device' pointer, NULL if none (for context)
- * @f: printf-like format and arguments
- */
-#define d_dump(l, dev, ptr, size)			\
-do {							\
-	char head[64];					\
-	if (!d_test(l))					\
-		break;					\
-	__d_head(head, sizeof(head), dev);		\
-	print_hex_dump(KERN_ERR, head, 0, 16, 1,	\
-		       ((void *) ptr), (size), 0);	\
-} while (0)
-
-
-/**
- * Export a submodule's debug level over debugfs as PREFIXSUBMODULE
- *
- * @prefix: string to prefix the name with
- * @submodule: name of submodule (not a string, just the name)
- * @dentry: debugfs parent dentry
- *
- * For removing, just use debugfs_remove_recursive() on the parent.
- */
-#define d_level_register_debugfs(prefix, name, parent)			\
-({									\
-	debugfs_create_u8(						\
-		prefix #name, 0600, parent,				\
-		&(D_LEVEL[__D_SUBMODULE_ ## name].level));		\
-})
-
-
-static inline
-void d_submodule_set(struct d_level *d_level, size_t d_level_size,
-		     const char *submodule, u8 level, const char *tag)
-{
-	struct d_level *itr, *top;
-	int index = -1;
-
-	for (itr = d_level, top = itr + d_level_size; itr < top; itr++) {
-		index++;
-		if (itr->name == NULL) {
-			printk(KERN_ERR "%s: itr->name NULL?? (%p, #%d)\n",
-			       tag, itr, index);
-			continue;
-		}
-		if (!strcmp(itr->name, submodule)) {
-			itr->level = level;
-			return;
-		}
-	}
-	printk(KERN_ERR "%s: unknown submodule %s\n", tag, submodule);
-}
-
-
-/**
- * d_parse_params - Parse a string with debug parameters from the
- * command line
- *
- * @d_level: level structure (D_LEVEL)
- * @d_level_size: number of items in the level structure
- *     (D_LEVEL_SIZE).
- * @_params: string with the parameters; this is a space (not tab!)
- *     separated list of NAME:VALUE, where value is the debug level
- *     and NAME is the name of the submodule.
- * @tag: string for error messages (example: MODULE.ARGNAME).
- */
-static inline
-void d_parse_params(struct d_level *d_level, size_t d_level_size,
-		    const char *_params, const char *tag)
-{
-	char submodule[130], *params, *params_orig, *token, *colon;
-	unsigned level, tokens;
-
-	if (_params == NULL)
-		return;
-	params_orig = kstrdup(_params, GFP_KERNEL);
-	params = params_orig;
-	while (1) {
-		token = strsep(&params, " ");
-		if (token == NULL)
-			break;
-		if (*token == '\0')	/* eat joint spaces */
-			continue;
-		/* kernel's sscanf %s eats until whitespace, so we
-		 * replace : by \n so it doesn't get eaten later by
-		 * strsep */
-		colon = strchr(token, ':');
-		if (colon != NULL)
-			*colon = '\n';
-		tokens = sscanf(token, "%s\n%u", submodule, &level);
-		if (colon != NULL)
-			*colon = ':';	/* set back, for error messages */
-		if (tokens == 2)
-			d_submodule_set(d_level, d_level_size,
-					submodule, level, tag);
-		else
-			printk(KERN_ERR "%s: can't parse '%s' as a "
-			       "SUBMODULE:LEVEL (%d tokens)\n",
-			       tag, token, tokens);
-	}
-	kfree(params_orig);
-}
-
-#endif /* #ifndef __debug__h__ */
diff --git a/include/net/wimax.h b/include/net/wimax.h
deleted file mode 100644
index f6e31d2f47aa..000000000000
--- a/include/net/wimax.h
+++ /dev/null
@@ -1,503 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Linux WiMAX
- * Kernel space API for accessing WiMAX devices
- *
- * Copyright (C) 2007-2008 Intel Corporation <linux-wimax@intel.com>
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * The WiMAX stack provides an API for controlling and managing the
- * system's WiMAX devices. This API affects the control plane; the
- * data plane is accessed via the network stack (netdev).
- *
- * Parts of the WiMAX stack API and notifications are exported to
- * user space via Generic Netlink. In user space, libwimax (part of
- * the wimax-tools package) provides a shim layer for accessing those
- * calls.
- *
- * The API is standarized for all WiMAX devices and different drivers
- * implement the backend support for it. However, device-specific
- * messaging pipes are provided that can be used to issue commands and
- * receive notifications in free form.
- *
- * Currently the messaging pipes are the only means of control as it
- * is not known (due to the lack of more devices in the market) what
- * will be a good abstraction layer. Expect this to change as more
- * devices show in the market. This API is designed to be growable in
- * order to address this problem.
- *
- * USAGE
- *
- * Embed a `struct wimax_dev` at the beginning of the device's
- * private structure, initialize and register it. For details, see
- * `struct wimax_dev`s documentation.
- *
- * Once this is done, wimax-tools's libwimaxll can be used to
- * communicate with the driver from user space. You user space
- * application does not have to forcibily use libwimaxll and can talk
- * the generic netlink protocol directly if desired.
- *
- * Remember this is a very low level API that will to provide all of
- * WiMAX features. Other daemons and services running in user space
- * are the expected clients of it. They offer a higher level API that
- * applications should use (an example of this is the Intel's WiMAX
- * Network Service for the i2400m).
- *
- * DESIGN
- *
- * Although not set on final stone, this very basic interface is
- * mostly completed. Remember this is meant to grow as new common
- * operations are decided upon. New operations will be added to the
- * interface, intent being on keeping backwards compatibility as much
- * as possible.
- *
- * This layer implements a set of calls to control a WiMAX device,
- * exposing a frontend to the rest of the kernel and user space (via
- * generic netlink) and a backend implementation in the driver through
- * function pointers.
- *
- * WiMAX devices have a state, and a kernel-only API allows the
- * drivers to manipulate that state. State transitions are atomic, and
- * only some of them are allowed (see `enum wimax_st`).
- *
- * Most API calls will set the state automatically; in most cases
- * drivers have to only report state changes due to external
- * conditions.
- *
- * All API operations are 'atomic', serialized through a mutex in the
- * `struct wimax_dev`.
- *
- * EXPORTING TO USER SPACE THROUGH GENERIC NETLINK
- *
- * The API is exported to user space using generic netlink (other
- * methods can be added as needed).
- *
- * There is a Generic Netlink Family named "WiMAX", where interfaces
- * supporting the WiMAX interface receive commands and broadcast their
- * signals over a multicast group named "msg".
- *
- * Mapping to the source/destination interface is done by an interface
- * index attribute.
- *
- * For user-to-kernel traffic (commands) we use a function call
- * marshalling mechanism, where a message X with attributes A, B, C
- * sent from user space to kernel space means executing the WiMAX API
- * call wimax_X(A, B, C), sending the results back as a message.
- *
- * Kernel-to-user (notifications or signals) communication is sent
- * over multicast groups. This allows to have multiple applications
- * monitoring them.
- *
- * Each command/signal gets assigned it's own attribute policy. This
- * way the validator will verify that all the attributes in there are
- * only the ones that should be for each command/signal. Thing of an
- * attribute mapping to a type+argumentname for each command/signal.
- *
- * If we had a single policy for *all* commands/signals, after running
- * the validator we'd have to check "does this attribute belong in
- * here"?  for each one. It can be done manually, but it's just easier
- * to have the validator do that job with multiple policies. As well,
- * it makes it easier to later expand each command/signal signature
- * without affecting others and keeping the namespace more or less
- * sane. Not that it is too complicated, but it makes it even easier.
- *
- * No state information is maintained in the kernel for each user
- * space connection (the connection is stateless).
- *
- * TESTING FOR THE INTERFACE AND VERSIONING
- *
- * If network interface X is a WiMAX device, there will be a Generic
- * Netlink family named "WiMAX X" and the device will present a
- * "wimax" directory in it's network sysfs directory
- * (/sys/class/net/DEVICE/wimax) [used by HAL].
- *
- * The inexistence of any of these means the device does not support
- * this WiMAX API.
- *
- * By querying the generic netlink controller, versioning information
- * and the multicast groups available can be found. Applications using
- * the interface can either rely on that or use the generic netlink
- * controller to figure out which generic netlink commands/signals are
- * supported.
- *
- * NOTE: this versioning is a last resort to avoid hard
- *    incompatibilities. It is the intention of the design of this
- *    stack not to introduce backward incompatible changes.
- *
- * The version code has to fit in one byte (restrictions imposed by
- * generic netlink); we use `version / 10` for the major version and
- * `version % 10` for the minor. This gives 9 minors for each major
- * and 25 majors.
- *
- * The version change protocol is as follow:
- *
- * - Major versions: needs to be increased if an existing message/API
- *   call is changed or removed. Doesn't need to be changed if a new
- *   message is added.
- *
- * - Minor version: needs to be increased if new messages/API calls are
- *   being added or some other consideration that doesn't impact the
- *   user-kernel interface too much (like some kind of bug fix) and
- *   that is kind of left up in the air to common sense.
- *
- * User space code should not try to work if the major version it was
- * compiled for differs from what the kernel offers. As well, if the
- * minor version of the kernel interface is lower than the one user
- * space is expecting (the one it was compiled for), the kernel
- * might be missing API calls; user space shall be ready to handle
- * said condition. Use the generic netlink controller operations to
- * find which ones are supported and which not.
- *
- * libwimaxll:wimaxll_open() takes care of checking versions.
- *
- * THE OPERATIONS:
- *
- * Each operation is defined in its on file (drivers/net/wimax/op-*.c)
- * for clarity. The parts needed for an operation are:
- *
- *  - a function pointer in `struct wimax_dev`: optional, as the
- *    operation might be implemented by the stack and not by the
- *    driver.
- *
- *    All function pointers are named wimax_dev->op_*(), and drivers
- *    must implement them except where noted otherwise.
- *
- *  - When exported to user space, a `struct nla_policy` to define the
- *    attributes of the generic netlink command and a `struct genl_ops`
- *    to define the operation.
- *
- * All the declarations for the operation codes (WIMAX_GNL_OP_<NAME>)
- * and generic netlink attributes (WIMAX_GNL_<NAME>_*) are declared in
- * include/linux/wimax.h; this file is intended to be cloned by user
- * space to gain access to those declarations.
- *
- * A few caveats to remember:
- *
- *  - Need to define attribute numbers starting in 1; otherwise it
- *    fails.
- *
- *  - the `struct genl_family` requires a maximum attribute id; when
- *    defining the `struct nla_policy` for each message, it has to have
- *    an array size of WIMAX_GNL_ATTR_MAX+1.
- *
- * The op_*() function pointers will not be called if the wimax_dev is
- * in a state <= %WIMAX_ST_UNINITIALIZED. The exception is:
- *
- * - op_reset: can be called at any time after wimax_dev_add() has
- *   been called.
- *
- * THE PIPE INTERFACE:
- *
- * This interface is kept intentionally simple. The driver can send
- * and receive free-form messages to/from user space through a
- * pipe. See drivers/net/wimax/op-msg.c for details.
- *
- * The kernel-to-user messages are sent with
- * wimax_msg(). user-to-kernel messages are delivered via
- * wimax_dev->op_msg_from_user().
- *
- * RFKILL:
- *
- * RFKILL support is built into the wimax_dev layer; the driver just
- * needs to call wimax_report_rfkill_{hw,sw}() to inform of changes in
- * the hardware or software RF kill switches. When the stack wants to
- * turn the radio off, it will call wimax_dev->op_rfkill_sw_toggle(),
- * which the driver implements.
- *
- * User space can set the software RF Kill switch by calling
- * wimax_rfkill().
- *
- * The code for now only supports devices that don't require polling;
- * If the device needs to be polled, create a self-rearming delayed
- * work struct for polling or look into adding polled support to the
- * WiMAX stack.
- *
- * When initializing the hardware (_probe), after calling
- * wimax_dev_add(), query the device for it's RF Kill switches status
- * and feed it back to the WiMAX stack using
- * wimax_report_rfkill_{hw,sw}(). If any switch is missing, always
- * report it as ON.
- *
- * NOTE: the wimax stack uses an inverted terminology to that of the
- * RFKILL subsystem:
- *
- *  - ON: radio is ON, RFKILL is DISABLED or OFF.
- *  - OFF: radio is OFF, RFKILL is ENABLED or ON.
- *
- * MISCELLANEOUS OPS:
- *
- * wimax_reset() can be used to reset the device to power on state; by
- * default it issues a warm reset that maintains the same device
- * node. If that is not possible, it falls back to a cold reset
- * (device reconnect). The driver implements the backend to this
- * through wimax_dev->op_reset().
- */
-
-#ifndef __NET__WIMAX_H__
-#define __NET__WIMAX_H__
-
-#include <linux/wimax.h>
-#include <net/genetlink.h>
-#include <linux/netdevice.h>
-
-struct net_device;
-struct genl_info;
-struct wimax_dev;
-
-/**
- * struct wimax_dev - Generic WiMAX device
- *
- * @net_dev: [fill] Pointer to the &struct net_device this WiMAX
- *     device implements.
- *
- * @op_msg_from_user: [fill] Driver-specific operation to
- *     handle a raw message from user space to the driver. The
- *     driver can send messages to user space using with
- *     wimax_msg_to_user().
- *
- * @op_rfkill_sw_toggle: [fill] Driver-specific operation to act on
- *     userspace (or any other agent) requesting the WiMAX device to
- *     change the RF Kill software switch (WIMAX_RF_ON or
- *     WIMAX_RF_OFF).
- *     If such hardware support is not present, it is assumed the
- *     radio cannot be switched off and it is always on (and the stack
- *     will error out when trying to switch it off). In such case,
- *     this function pointer can be left as NULL.
- *
- * @op_reset: [fill] Driver specific operation to reset the
- *     device.
- *     This operation should always attempt first a warm reset that
- *     does not disconnect the device from the bus and return 0.
- *     If that fails, it should resort to some sort of cold or bus
- *     reset (even if it implies a bus disconnection and device
- *     disappearance). In that case, -ENODEV should be returned to
- *     indicate the device is gone.
- *     This operation has to be synchronous, and return only when the
- *     reset is complete. In case of having had to resort to bus/cold
- *     reset implying a device disconnection, the call is allowed to
- *     return immediately.
- *     NOTE: wimax_dev->mutex is NOT locked when this op is being
- *     called; however, wimax_dev->mutex_reset IS locked to ensure
- *     serialization of calls to wimax_reset().
- *     See wimax_reset()'s documentation.
- *
- * @name: [fill] A way to identify this device. We need to register a
- *     name with many subsystems (rfkill, workqueue creation, etc).
- *     We can't use the network device name as that
- *     might change and in some instances we don't know it yet (until
- *     we don't call register_netdev()). So we generate an unique one
- *     using the driver name and device bus id, place it here and use
- *     it across the board. Recommended naming:
- *     DRIVERNAME-BUSNAME:BUSID (dev->bus->name, dev->bus_id).
- *
- * @id_table_node: [private] link to the list of wimax devices kept by
- *     id-table.c. Protected by it's own spinlock.
- *
- * @mutex: [private] Serializes all concurrent access and execution of
- *     operations.
- *
- * @mutex_reset: [private] Serializes reset operations. Needs to be a
- *     different mutex because as part of the reset operation, the
- *     driver has to call back into the stack to do things such as
- *     state change, that require wimax_dev->mutex.
- *
- * @state: [private] Current state of the WiMAX device.
- *
- * @rfkill: [private] integration into the RF-Kill infrastructure.
- *
- * @rf_sw: [private] State of the software radio switch (OFF/ON)
- *
- * @rf_hw: [private] State of the hardware radio switch (OFF/ON)
- *
- * @debugfs_dentry: [private] Used to hook up a debugfs entry. This
- *     shows up in the debugfs root as wimax\:DEVICENAME.
- *
- * Description:
- * This structure defines a common interface to access all WiMAX
- * devices from different vendors and provides a common API as well as
- * a free-form device-specific messaging channel.
- *
- * Usage:
- *  1. Embed a &struct wimax_dev at *the beginning* the network
- *     device structure so that netdev_priv() points to it.
- *
- *  2. memset() it to zero
- *
- *  3. Initialize with wimax_dev_init(). This will leave the WiMAX
- *     device in the %__WIMAX_ST_NULL state.
- *
- *  4. Fill all the fields marked with [fill]; once called
- *     wimax_dev_add(), those fields CANNOT be modified.
- *
- *  5. Call wimax_dev_add() *after* registering the network
- *     device. This will leave the WiMAX device in the %WIMAX_ST_DOWN
- *     state.
- *     Protect the driver's net_device->open() against succeeding if
- *     the wimax device state is lower than %WIMAX_ST_DOWN.
- *
- *  6. Select when the device is going to be turned on/initialized;
- *     for example, it could be initialized on 'ifconfig up' (when the
- *     netdev op 'open()' is called on the driver).
- *
- * When the device is initialized (at `ifconfig up` time, or right
- * after calling wimax_dev_add() from _probe(), make sure the
- * following steps are taken
- *
- *  a. Move the device to %WIMAX_ST_UNINITIALIZED. This is needed so
- *     some API calls that shouldn't work until the device is ready
- *     can be blocked.
- *
- *  b. Initialize the device. Make sure to turn the SW radio switch
- *     off and move the device to state %WIMAX_ST_RADIO_OFF when
- *     done. When just initialized, a device should be left in RADIO
- *     OFF state until user space devices to turn it on.
- *
- *  c. Query the device for the state of the hardware rfkill switch
- *     and call wimax_rfkill_report_hw() and wimax_rfkill_report_sw()
- *     as needed. See below.
- *
- * wimax_dev_rm() undoes before unregistering the network device. Once
- * wimax_dev_add() is called, the driver can get called on the
- * wimax_dev->op_* function pointers
- *
- * CONCURRENCY:
- *
- * The stack provides a mutex for each device that will disallow API
- * calls happening concurrently; thus, op calls into the driver
- * through the wimax_dev->op*() function pointers will always be
- * serialized and *never* concurrent.
- *
- * For locking, take wimax_dev->mutex is taken; (most) operations in
- * the API have to check for wimax_dev_is_ready() to return 0 before
- * continuing (this is done internally).
- *
- * REFERENCE COUNTING:
- *
- * The WiMAX device is reference counted by the associated network
- * device. The only operation that can be used to reference the device
- * is wimax_dev_get_by_genl_info(), and the reference it acquires has
- * to be released with dev_put(wimax_dev->net_dev).
- *
- * RFKILL:
- *
- * At startup, both HW and SW radio switchess are assumed to be off.
- *
- * At initialization time [after calling wimax_dev_add()], have the
- * driver query the device for the status of the software and hardware
- * RF kill switches and call wimax_report_rfkill_hw() and
- * wimax_rfkill_report_sw() to indicate their state. If any is
- * missing, just call it to indicate it is ON (radio always on).
- *
- * Whenever the driver detects a change in the state of the RF kill
- * switches, it should call wimax_report_rfkill_hw() or
- * wimax_report_rfkill_sw() to report it to the stack.
- */
-struct wimax_dev {
-	struct net_device *net_dev;
-	struct list_head id_table_node;
-	struct mutex mutex;		/* Protects all members and API calls */
-	struct mutex mutex_reset;
-	enum wimax_st state;
-
-	int (*op_msg_from_user)(struct wimax_dev *wimax_dev,
-				const char *,
-				const void *, size_t,
-				const struct genl_info *info);
-	int (*op_rfkill_sw_toggle)(struct wimax_dev *wimax_dev,
-				   enum wimax_rf_state);
-	int (*op_reset)(struct wimax_dev *wimax_dev);
-
-	struct rfkill *rfkill;
-	unsigned int rf_hw;
-	unsigned int rf_sw;
-	char name[32];
-
-	struct dentry *debugfs_dentry;
-};
-
-
-
-/*
- * WiMAX stack public API for device drivers
- * -----------------------------------------
- *
- * These functions are not exported to user space.
- */
-void wimax_dev_init(struct wimax_dev *);
-int wimax_dev_add(struct wimax_dev *, struct net_device *);
-void wimax_dev_rm(struct wimax_dev *);
-
-static inline
-struct wimax_dev *net_dev_to_wimax(struct net_device *net_dev)
-{
-	return netdev_priv(net_dev);
-}
-
-static inline
-struct device *wimax_dev_to_dev(struct wimax_dev *wimax_dev)
-{
-	return wimax_dev->net_dev->dev.parent;
-}
-
-void wimax_state_change(struct wimax_dev *, enum wimax_st);
-enum wimax_st wimax_state_get(struct wimax_dev *);
-
-/*
- * Radio Switch state reporting.
- *
- * enum wimax_rf_state is declared in linux/wimax.h so the exports
- * to user space can use it.
- */
-void wimax_report_rfkill_hw(struct wimax_dev *, enum wimax_rf_state);
-void wimax_report_rfkill_sw(struct wimax_dev *, enum wimax_rf_state);
-
-
-/*
- * Free-form messaging to/from user space
- *
- * Sending a message:
- *
- *   wimax_msg(wimax_dev, pipe_name, buf, buf_size, GFP_KERNEL);
- *
- * Broken up:
- *
- *   skb = wimax_msg_alloc(wimax_dev, pipe_name, buf_size, GFP_KERNEL);
- *   ...fill up skb...
- *   wimax_msg_send(wimax_dev, pipe_name, skb);
- *
- * Be sure not to modify skb->data in the middle (ie: don't use
- * skb_push()/skb_pull()/skb_reserve() on the skb).
- *
- * "pipe_name" is any string, that can be interpreted as the name of
- * the pipe or recipient; the interpretation of it is driver
- * specific, so the recipient can multiplex it as wished. It can be
- * NULL, it won't be used - an example is using a "diagnostics" tag to
- * send diagnostics information that a device-specific diagnostics
- * tool would be interested in.
- */
-struct sk_buff *wimax_msg_alloc(struct wimax_dev *, const char *, const void *,
-				size_t, gfp_t);
-int wimax_msg_send(struct wimax_dev *, struct sk_buff *);
-int wimax_msg(struct wimax_dev *, const char *, const void *, size_t, gfp_t);
-
-const void *wimax_msg_data_len(struct sk_buff *, size_t *);
-const void *wimax_msg_data(struct sk_buff *);
-ssize_t wimax_msg_len(struct sk_buff *);
-
-
-/*
- * WiMAX stack user space API
- * --------------------------
- *
- * This API is what gets exported to user space for general
- * operations. As well, they can be called from within the kernel,
- * (with a properly referenced `struct wimax_dev`).
- *
- * Properly referenced means: the 'struct net_device' that embeds the
- * device's control structure and (as such) the 'struct wimax_dev' is
- * referenced by the caller.
- */
-int wimax_rfkill(struct wimax_dev *, enum wimax_rf_state);
-int wimax_reset(struct wimax_dev *);
-
-#endif /* #ifndef __NET__WIMAX_H__ */
diff --git a/include/uapi/linux/wimax.h b/include/uapi/linux/wimax.h
deleted file mode 100644
index 9f6b77af2f6d..000000000000
--- a/include/uapi/linux/wimax.h
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * Linux WiMax
- * API for user space
- *
- *
- * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * Intel Corporation <linux-wimax@intel.com>
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *  - Initial implementation
- *
- *
- * This file declares the user/kernel protocol that is spoken over
- * Generic Netlink, as well as any type declaration that is to be used
- * by kernel and user space.
- *
- * It is intended for user space to clone it verbatim to use it as a
- * primary reference for definitions.
- *
- * Stuff intended for kernel usage as well as full protocol and stack
- * documentation is rooted in include/net/wimax.h.
- */
-
-#ifndef __LINUX__WIMAX_H__
-#define __LINUX__WIMAX_H__
-
-#include <linux/types.h>
-
-enum {
-	/**
-	 * Version of the interface (unsigned decimal, MMm, max 25.5)
-	 * M - Major: change if removing or modifying an existing call.
-	 * m - minor: change when adding a new call
-	 */
-	WIMAX_GNL_VERSION = 01,
-	/* Generic NetLink attributes */
-	WIMAX_GNL_ATTR_INVALID = 0x00,
-	WIMAX_GNL_ATTR_MAX = 10,
-};
-
-
-/*
- * Generic NetLink operations
- *
- * Most of these map to an API call; _OP_ stands for operation, _RP_
- * for reply and _RE_ for report (aka: signal).
- */
-enum {
-	WIMAX_GNL_OP_MSG_FROM_USER,	/* User to kernel message */
-	WIMAX_GNL_OP_MSG_TO_USER,	/* Kernel to user message */
-	WIMAX_GNL_OP_RFKILL,	/* Run wimax_rfkill() */
-	WIMAX_GNL_OP_RESET,	/* Run wimax_rfkill() */
-	WIMAX_GNL_RE_STATE_CHANGE,	/* Report: status change */
-	WIMAX_GNL_OP_STATE_GET,		/* Request for current state */
-};
-
-
-/* Message from user / to user */
-enum {
-	WIMAX_GNL_MSG_IFIDX = 1,
-	WIMAX_GNL_MSG_PIPE_NAME,
-	WIMAX_GNL_MSG_DATA,
-};
-
-
-/*
- * wimax_rfkill()
- *
- * The state of the radio (ON/OFF) is mapped to the rfkill subsystem's
- * switch state (DISABLED/ENABLED).
- */
-enum wimax_rf_state {
-	WIMAX_RF_OFF = 0,	/* Radio is off, rfkill on/enabled */
-	WIMAX_RF_ON = 1,	/* Radio is on, rfkill off/disabled */
-	WIMAX_RF_QUERY = 2,
-};
-
-/* Attributes */
-enum {
-	WIMAX_GNL_RFKILL_IFIDX = 1,
-	WIMAX_GNL_RFKILL_STATE,
-};
-
-
-/* Attributes for wimax_reset() */
-enum {
-	WIMAX_GNL_RESET_IFIDX = 1,
-};
-
-/* Attributes for wimax_state_get() */
-enum {
-	WIMAX_GNL_STGET_IFIDX = 1,
-};
-
-/*
- * Attributes for the Report State Change
- *
- * For now we just have the old and new states; new attributes might
- * be added later on.
- */
-enum {
-	WIMAX_GNL_STCH_IFIDX = 1,
-	WIMAX_GNL_STCH_STATE_OLD,
-	WIMAX_GNL_STCH_STATE_NEW,
-};
-
-
-/**
- * enum wimax_st - The different states of a WiMAX device
- * @__WIMAX_ST_NULL: The device structure has been allocated and zeroed,
- *     but still wimax_dev_add() hasn't been called. There is no state.
- *
- * @WIMAX_ST_DOWN: The device has been registered with the WiMAX and
- *     networking stacks, but it is not initialized (normally that is
- *     done with 'ifconfig DEV up' [or equivalent], which can upload
- *     firmware and enable communications with the device).
- *     In this state, the device is powered down and using as less
- *     power as possible.
- *     This state is the default after a call to wimax_dev_add(). It
- *     is ok to have drivers move directly to %WIMAX_ST_UNINITIALIZED
- *     or %WIMAX_ST_RADIO_OFF in _probe() after the call to
- *     wimax_dev_add().
- *     It is recommended that the driver leaves this state when
- *     calling 'ifconfig DEV up' and enters it back on 'ifconfig DEV
- *     down'.
- *
- * @__WIMAX_ST_QUIESCING: The device is being torn down, so no API
- *     operations are allowed to proceed except the ones needed to
- *     complete the device clean up process.
- *
- * @WIMAX_ST_UNINITIALIZED: [optional] Communication with the device
- *     is setup, but the device still requires some configuration
- *     before being operational.
- *     Some WiMAX API calls might work.
- *
- * @WIMAX_ST_RADIO_OFF: The device is fully up; radio is off (wether
- *     by hardware or software switches).
- *     It is recommended to always leave the device in this state
- *     after initialization.
- *
- * @WIMAX_ST_READY: The device is fully up and radio is on.
- *
- * @WIMAX_ST_SCANNING: [optional] The device has been instructed to
- *     scan. In this state, the device cannot be actively connected to
- *     a network.
- *
- * @WIMAX_ST_CONNECTING: The device is connecting to a network. This
- *     state exists because in some devices, the connect process can
- *     include a number of negotiations between user space, kernel
- *     space and the device. User space needs to know what the device
- *     is doing. If the connect sequence in a device is atomic and
- *     fast, the device can transition directly to CONNECTED
- *
- * @WIMAX_ST_CONNECTED: The device is connected to a network.
- *
- * @__WIMAX_ST_INVALID: This is an invalid state used to mark the
- *     maximum numeric value of states.
- *
- * Description:
- *
- * Transitions from one state to another one are atomic and can only
- * be caused in kernel space with wimax_state_change(). To read the
- * state, use wimax_state_get().
- *
- * States starting with __ are internal and shall not be used or
- * referred to by drivers or userspace. They look ugly, but that's the
- * point -- if any use is made non-internal to the stack, it is easier
- * to catch on review.
- *
- * All API operations [with well defined exceptions] will take the
- * device mutex before starting and then check the state. If the state
- * is %__WIMAX_ST_NULL, %WIMAX_ST_DOWN, %WIMAX_ST_UNINITIALIZED or
- * %__WIMAX_ST_QUIESCING, it will drop the lock and quit with
- * -%EINVAL, -%ENOMEDIUM, -%ENOTCONN or -%ESHUTDOWN.
- *
- * The order of the definitions is important, so we can do numerical
- * comparisons (eg: < %WIMAX_ST_RADIO_OFF means the device is not ready
- * to operate).
- */
-/*
- * The allowed state transitions are described in the table below
- * (states in rows can go to states in columns where there is an X):
- *
- *                                  UNINI   RADIO READY SCAN CONNEC CONNEC
- *             NULL DOWN QUIESCING TIALIZED  OFF        NING  TING   TED
- * NULL         -    x
- * DOWN              -      x        x       x
- * QUIESCING         x      -
- * UNINITIALIZED            x        -       x
- * RADIO_OFF                x                -     x
- * READY                    x                x     -     x     x      x
- * SCANNING                 x                x     x     -     x      x
- * CONNECTING               x                x     x     x     -      x
- * CONNECTED                x                x     x                  -
- *
- * This table not available in kernel-doc because the formatting messes it up.
- */
- enum wimax_st {
-	__WIMAX_ST_NULL = 0,
-	WIMAX_ST_DOWN,
-	__WIMAX_ST_QUIESCING,
-	WIMAX_ST_UNINITIALIZED,
-	WIMAX_ST_RADIO_OFF,
-	WIMAX_ST_READY,
-	WIMAX_ST_SCANNING,
-	WIMAX_ST_CONNECTING,
-	WIMAX_ST_CONNECTED,
-	__WIMAX_ST_INVALID			/* Always keep last */
-};
-
-
-#endif /* #ifndef __LINUX__WIMAX_H__ */
diff --git a/include/uapi/linux/wimax/i2400m.h b/include/uapi/linux/wimax/i2400m.h
deleted file mode 100644
index fd198bc24a3c..000000000000
--- a/include/uapi/linux/wimax/i2400m.h
+++ /dev/null
@@ -1,572 +0,0 @@
-/*
- * Intel Wireless WiMax Connection 2400m
- * Host-Device protocol interface definitions
- *
- *
- * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * Intel Corporation <linux-wimax@intel.com>
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *  - Initial implementation
- *
- *
- * This header defines the data structures and constants used to
- * communicate with the device.
- *
- * BOOTMODE/BOOTROM/FIRMWARE UPLOAD PROTOCOL
- *
- * The firmware upload protocol is quite simple and only requires a
- * handful of commands. See drivers/net/wimax/i2400m/fw.c for more
- * details.
- *
- * The BCF data structure is for the firmware file header.
- *
- *
- * THE DATA / CONTROL PROTOCOL
- *
- * This is the normal protocol spoken with the device once the
- * firmware is uploaded. It transports data payloads and control
- * messages back and forth.
- *
- * It consists 'messages' that pack one or more payloads each. The
- * format is described in detail in drivers/net/wimax/i2400m/rx.c and
- * tx.c.
- *
- *
- * THE L3L4 PROTOCOL
- *
- * The term L3L4 refers to Layer 3 (the device), Layer 4 (the
- * driver/host software).
- *
- * This is the control protocol used by the host to control the i2400m
- * device (scan, connect, disconnect...). This is sent to / received
- * as control frames. These frames consist of a header and zero or
- * more TLVs with information. We call each control frame a "message".
- *
- * Each message is composed of:
- *
- * HEADER
- * [TLV0 + PAYLOAD0]
- * [TLV1 + PAYLOAD1]
- * [...]
- * [TLVN + PAYLOADN]
- *
- * The HEADER is defined by 'struct i2400m_l3l4_hdr'. The payloads are
- * defined by a TLV structure (Type Length Value) which is a 'header'
- * (struct i2400m_tlv_hdr) and then the payload.
- *
- * All integers are represented as Little Endian.
- *
- * - REQUESTS AND EVENTS
- *
- * The requests can be clasified as follows:
- *
- *   COMMAND:  implies a request from the host to the device requesting
- *             an action being performed. The device will reply with a
- *             message (with the same type as the command), status and
- *             no (TLV) payload. Execution of a command might cause
- *             events (of different type) to be sent later on as
- *             device's state changes.
- *
- *   GET/SET:  similar to COMMAND, but will not cause other
- *             EVENTs. The reply, in the case of GET, will contain
- *             TLVs with the requested information.
- *
- *   EVENT:    asynchronous messages sent from the device, maybe as a
- *             consequence of previous COMMANDs but disassociated from
- *             them.
- *
- * Only one request might be pending at the same time (ie: don't
- * parallelize nor post another GET request before the previous
- * COMMAND has been acknowledged with it's corresponding reply by the
- * device).
- *
- * The different requests and their formats are described below:
- *
- *  I2400M_MT_*   Message types
- *  I2400M_MS_*   Message status (for replies, events)
- *  i2400m_tlv_*  TLVs
- *
- * data types are named 'struct i2400m_msg_OPNAME', OPNAME matching the
- * operation.
- */
-
-#ifndef __LINUX__WIMAX__I2400M_H__
-#define __LINUX__WIMAX__I2400M_H__
-
-#include <linux/types.h>
-#include <linux/if_ether.h>
-
-/*
- * Host Device Interface (HDI) common to all busses
- */
-
-/* Boot-mode (firmware upload mode) commands */
-
-/* Header for the firmware file */
-struct i2400m_bcf_hdr {
-	__le32 module_type;
-	__le32 header_len;
-	__le32 header_version;
-	__le32 module_id;
-	__le32 module_vendor;
-	__le32 date;		/* BCD YYYMMDD */
-	__le32 size;            /* in dwords */
-	__le32 key_size;	/* in dwords */
-	__le32 modulus_size;	/* in dwords */
-	__le32 exponent_size;	/* in dwords */
-	__u8 reserved[88];
-} __attribute__ ((packed));
-
-/* Boot mode opcodes */
-enum i2400m_brh_opcode {
-	I2400M_BRH_READ = 1,
-	I2400M_BRH_WRITE = 2,
-	I2400M_BRH_JUMP = 3,
-	I2400M_BRH_SIGNED_JUMP = 8,
-	I2400M_BRH_HASH_PAYLOAD_ONLY = 9,
-};
-
-/* Boot mode command masks and stuff */
-enum i2400m_brh {
-	I2400M_BRH_SIGNATURE = 0xcbbc0000,
-	I2400M_BRH_SIGNATURE_MASK = 0xffff0000,
-	I2400M_BRH_SIGNATURE_SHIFT = 16,
-	I2400M_BRH_OPCODE_MASK = 0x0000000f,
-	I2400M_BRH_RESPONSE_MASK = 0x000000f0,
-	I2400M_BRH_RESPONSE_SHIFT = 4,
-	I2400M_BRH_DIRECT_ACCESS = 0x00000400,
-	I2400M_BRH_RESPONSE_REQUIRED = 0x00000200,
-	I2400M_BRH_USE_CHECKSUM = 0x00000100,
-};
-
-
-/**
- * i2400m_bootrom_header - Header for a boot-mode command
- *
- * @cmd: the above command descriptor
- * @target_addr: where on the device memory should the action be performed.
- * @data_size: for read/write, amount of data to be read/written
- * @block_checksum: checksum value (if applicable)
- * @payload: the beginning of data attached to this header
- */
-struct i2400m_bootrom_header {
-	__le32 command;		/* Compose with enum i2400_brh */
-	__le32 target_addr;
-	__le32 data_size;
-	__le32 block_checksum;
-	char payload[0];
-} __attribute__ ((packed));
-
-
-/*
- * Data / control protocol
- */
-
-/* Packet types for the host-device interface */
-enum i2400m_pt {
-	I2400M_PT_DATA = 0,
-	I2400M_PT_CTRL,
-	I2400M_PT_TRACE,	/* For device debug */
-	I2400M_PT_RESET_WARM,	/* device reset */
-	I2400M_PT_RESET_COLD,	/* USB[transport] reset, like reconnect */
-	I2400M_PT_EDATA,	/* Extended RX data */
-	I2400M_PT_ILLEGAL
-};
-
-
-/*
- * Payload for a data packet
- *
- * This is prefixed to each and every outgoing DATA type.
- */
-struct i2400m_pl_data_hdr {
-	__le32 reserved;
-} __attribute__((packed));
-
-
-/*
- * Payload for an extended data packet
- *
- * New in fw v1.4
- *
- * @reorder: if this payload has to be reorder or not (and how)
- * @cs: the type of data in the packet, as defined per (802.16e
- *     T11.13.19.1). Currently only 2 (IPv4 packet) supported.
- *
- * This is prefixed to each and every INCOMING DATA packet.
- */
-struct i2400m_pl_edata_hdr {
-	__le32 reorder;		/* bits defined in i2400m_ro */
-	__u8 cs;
-	__u8 reserved[11];
-} __attribute__((packed));
-
-enum i2400m_cs {
-	I2400M_CS_IPV4_0 = 0,
-	I2400M_CS_IPV4 = 2,
-};
-
-enum i2400m_ro {
-	I2400M_RO_NEEDED     = 0x01,
-	I2400M_RO_TYPE       = 0x03,
-	I2400M_RO_TYPE_SHIFT = 1,
-	I2400M_RO_CIN        = 0x0f,
-	I2400M_RO_CIN_SHIFT  = 4,
-	I2400M_RO_FBN        = 0x07ff,
-	I2400M_RO_FBN_SHIFT  = 8,
-	I2400M_RO_SN         = 0x07ff,
-	I2400M_RO_SN_SHIFT   = 21,
-};
-
-enum i2400m_ro_type {
-	I2400M_RO_TYPE_RESET = 0,
-	I2400M_RO_TYPE_PACKET,
-	I2400M_RO_TYPE_WS,
-	I2400M_RO_TYPE_PACKET_WS,
-};
-
-
-/* Misc constants */
-enum {
-	I2400M_PL_ALIGN = 16,	/* Payload data size alignment */
-	I2400M_PL_SIZE_MAX = 0x3EFF,
-	I2400M_MAX_PLS_IN_MSG = 60,
-	/* protocol barkers: sync sequences; for notifications they
-	 * are sent in groups of four. */
-	I2400M_H2D_PREVIEW_BARKER = 0xcafe900d,
-	I2400M_COLD_RESET_BARKER = 0xc01dc01d,
-	I2400M_WARM_RESET_BARKER = 0x50f750f7,
-	I2400M_NBOOT_BARKER = 0xdeadbeef,
-	I2400M_SBOOT_BARKER = 0x0ff1c1a1,
-	I2400M_SBOOT_BARKER_6050 = 0x80000001,
-	I2400M_ACK_BARKER = 0xfeedbabe,
-	I2400M_D2H_MSG_BARKER = 0xbeefbabe,
-};
-
-
-/*
- * Hardware payload descriptor
- *
- * Bitfields encoded in a struct to enforce typing semantics.
- *
- * Look in rx.c and tx.c for a full description of the format.
- */
-struct i2400m_pld {
-	__le32 val;
-} __attribute__ ((packed));
-
-#define I2400M_PLD_SIZE_MASK 0x00003fff
-#define I2400M_PLD_TYPE_SHIFT 16
-#define I2400M_PLD_TYPE_MASK 0x000f0000
-
-/*
- * Header for a TX message or RX message
- *
- * @barker: preamble
- * @size: used for management of the FIFO queue buffer; before
- *     sending, this is converted to be a real preamble. This
- *     indicates the real size of the TX message that starts at this
- *     point. If the highest bit is set, then this message is to be
- *     skipped.
- * @sequence: sequence number of this message
- * @offset: offset where the message itself starts -- see the comments
- *     in the file header about message header and payload descriptor
- *     alignment.
- * @num_pls: number of payloads in this message
- * @padding: amount of padding bytes at the end of the message to make
- *           it be of block-size aligned
- *
- * Look in rx.c and tx.c for a full description of the format.
- */
-struct i2400m_msg_hdr {
-	union {
-		__le32 barker;
-		__u32 size;	/* same size type as barker!! */
-	};
-	union {
-		__le32 sequence;
-		__u32 offset;	/* same size type as barker!! */
-	};
-	__le16 num_pls;
-	__le16 rsv1;
-	__le16 padding;
-	__le16 rsv2;
-	struct i2400m_pld pld[0];
-} __attribute__ ((packed));
-
-
-
-/*
- * L3/L4 control protocol
- */
-
-enum {
-	/* Interface version */
-	I2400M_L3L4_VERSION             = 0x0100,
-};
-
-/* Message types */
-enum i2400m_mt {
-	I2400M_MT_RESERVED              = 0x0000,
-	I2400M_MT_INVALID               = 0xffff,
-	I2400M_MT_REPORT_MASK		= 0x8000,
-
-	I2400M_MT_GET_SCAN_RESULT  	= 0x4202,
-	I2400M_MT_SET_SCAN_PARAM   	= 0x4402,
-	I2400M_MT_CMD_RF_CONTROL   	= 0x4602,
-	I2400M_MT_CMD_SCAN         	= 0x4603,
-	I2400M_MT_CMD_CONNECT      	= 0x4604,
-	I2400M_MT_CMD_DISCONNECT   	= 0x4605,
-	I2400M_MT_CMD_EXIT_IDLE   	= 0x4606,
-	I2400M_MT_GET_LM_VERSION   	= 0x5201,
-	I2400M_MT_GET_DEVICE_INFO  	= 0x5202,
-	I2400M_MT_GET_LINK_STATUS  	= 0x5203,
-	I2400M_MT_GET_STATISTICS   	= 0x5204,
-	I2400M_MT_GET_STATE        	= 0x5205,
-	I2400M_MT_GET_MEDIA_STATUS	= 0x5206,
-	I2400M_MT_SET_INIT_CONFIG	= 0x5404,
-	I2400M_MT_CMD_INIT	        = 0x5601,
-	I2400M_MT_CMD_TERMINATE		= 0x5602,
-	I2400M_MT_CMD_MODE_OF_OP	= 0x5603,
-	I2400M_MT_CMD_RESET_DEVICE	= 0x5604,
-	I2400M_MT_CMD_MONITOR_CONTROL   = 0x5605,
-	I2400M_MT_CMD_ENTER_POWERSAVE   = 0x5606,
-	I2400M_MT_GET_TLS_OPERATION_RESULT = 0x6201,
-	I2400M_MT_SET_EAP_SUCCESS       = 0x6402,
-	I2400M_MT_SET_EAP_FAIL          = 0x6403,
-	I2400M_MT_SET_EAP_KEY          	= 0x6404,
-	I2400M_MT_CMD_SEND_EAP_RESPONSE = 0x6602,
-	I2400M_MT_REPORT_SCAN_RESULT    = 0xc002,
-	I2400M_MT_REPORT_STATE		= 0xd002,
-	I2400M_MT_REPORT_POWERSAVE_READY = 0xd005,
-	I2400M_MT_REPORT_EAP_REQUEST    = 0xe002,
-	I2400M_MT_REPORT_EAP_RESTART    = 0xe003,
-	I2400M_MT_REPORT_ALT_ACCEPT    	= 0xe004,
-	I2400M_MT_REPORT_KEY_REQUEST 	= 0xe005,
-};
-
-
-/*
- * Message Ack Status codes
- *
- * When a message is replied-to, this status is reported.
- */
-enum i2400m_ms {
-	I2400M_MS_DONE_OK                  = 0,
-	I2400M_MS_DONE_IN_PROGRESS         = 1,
-	I2400M_MS_INVALID_OP               = 2,
-	I2400M_MS_BAD_STATE                = 3,
-	I2400M_MS_ILLEGAL_VALUE            = 4,
-	I2400M_MS_MISSING_PARAMS           = 5,
-	I2400M_MS_VERSION_ERROR            = 6,
-	I2400M_MS_ACCESSIBILITY_ERROR      = 7,
-	I2400M_MS_BUSY                     = 8,
-	I2400M_MS_CORRUPTED_TLV            = 9,
-	I2400M_MS_UNINITIALIZED            = 10,
-	I2400M_MS_UNKNOWN_ERROR            = 11,
-	I2400M_MS_PRODUCTION_ERROR         = 12,
-	I2400M_MS_NO_RF                    = 13,
-	I2400M_MS_NOT_READY_FOR_POWERSAVE  = 14,
-	I2400M_MS_THERMAL_CRITICAL         = 15,
-	I2400M_MS_MAX
-};
-
-
-/**
- * i2400m_tlv - enumeration of the different types of TLVs
- *
- * TLVs stand for type-length-value and are the header for a payload
- * composed of almost anything. Each payload has a type assigned
- * and a length.
- */
-enum i2400m_tlv {
-	I2400M_TLV_L4_MESSAGE_VERSIONS = 129,
-	I2400M_TLV_SYSTEM_STATE = 141,
-	I2400M_TLV_MEDIA_STATUS = 161,
-	I2400M_TLV_RF_OPERATION = 162,
-	I2400M_TLV_RF_STATUS = 163,
-	I2400M_TLV_DEVICE_RESET_TYPE = 132,
-	I2400M_TLV_CONFIG_IDLE_PARAMETERS = 601,
-	I2400M_TLV_CONFIG_IDLE_TIMEOUT = 611,
-	I2400M_TLV_CONFIG_D2H_DATA_FORMAT = 614,
-	I2400M_TLV_CONFIG_DL_HOST_REORDER = 615,
-};
-
-
-struct i2400m_tlv_hdr {
-	__le16 type;
-	__le16 length;		/* payload's */
-	__u8   pl[0];
-} __attribute__((packed));
-
-
-struct i2400m_l3l4_hdr {
-	__le16 type;
-	__le16 length;		/* payload's */
-	__le16 version;
-	__le16 resv1;
-	__le16 status;
-	__le16 resv2;
-	struct i2400m_tlv_hdr pl[0];
-} __attribute__((packed));
-
-
-/**
- * i2400m_system_state - different states of the device
- */
-enum i2400m_system_state {
-	I2400M_SS_UNINITIALIZED = 1,
-	I2400M_SS_INIT,
-	I2400M_SS_READY,
-	I2400M_SS_SCAN,
-	I2400M_SS_STANDBY,
-	I2400M_SS_CONNECTING,
-	I2400M_SS_WIMAX_CONNECTED,
-	I2400M_SS_DATA_PATH_CONNECTED,
-	I2400M_SS_IDLE,
-	I2400M_SS_DISCONNECTING,
-	I2400M_SS_OUT_OF_ZONE,
-	I2400M_SS_SLEEPACTIVE,
-	I2400M_SS_PRODUCTION,
-	I2400M_SS_CONFIG,
-	I2400M_SS_RF_OFF,
-	I2400M_SS_RF_SHUTDOWN,
-	I2400M_SS_DEVICE_DISCONNECT,
-	I2400M_SS_MAX,
-};
-
-
-/**
- * i2400m_tlv_system_state - report on the state of the system
- *
- * @state: see enum i2400m_system_state
- */
-struct i2400m_tlv_system_state {
-	struct i2400m_tlv_hdr hdr;
-	__le32 state;
-} __attribute__((packed));
-
-
-struct i2400m_tlv_l4_message_versions {
-	struct i2400m_tlv_hdr hdr;
-	__le16 major;
-	__le16 minor;
-	__le16 branch;
-	__le16 reserved;
-} __attribute__((packed));
-
-
-struct i2400m_tlv_detailed_device_info {
-	struct i2400m_tlv_hdr hdr;
-	__u8 reserved1[400];
-	__u8 mac_address[ETH_ALEN];
-	__u8 reserved2[2];
-} __attribute__((packed));
-
-
-enum i2400m_rf_switch_status {
-	I2400M_RF_SWITCH_ON = 1,
-	I2400M_RF_SWITCH_OFF = 2,
-};
-
-struct i2400m_tlv_rf_switches_status {
-	struct i2400m_tlv_hdr hdr;
-	__u8 sw_rf_switch;	/* 1 ON, 2 OFF */
-	__u8 hw_rf_switch;	/* 1 ON, 2 OFF */
-	__u8 reserved[2];
-} __attribute__((packed));
-
-
-enum {
-	i2400m_rf_operation_on = 1,
-	i2400m_rf_operation_off = 2
-};
-
-struct i2400m_tlv_rf_operation {
-	struct i2400m_tlv_hdr hdr;
-	__le32 status;	/* 1 ON, 2 OFF */
-} __attribute__((packed));
-
-
-enum i2400m_tlv_reset_type {
-	I2400M_RESET_TYPE_COLD = 1,
-	I2400M_RESET_TYPE_WARM
-};
-
-struct i2400m_tlv_device_reset_type {
-	struct i2400m_tlv_hdr hdr;
-	__le32 reset_type;
-} __attribute__((packed));
-
-
-struct i2400m_tlv_config_idle_parameters {
-	struct i2400m_tlv_hdr hdr;
-	__le32 idle_timeout;	/* 100 to 300000 ms [5min], 100 increments
-				 * 0 disabled */
-	__le32 idle_paging_interval;	/* frames */
-} __attribute__((packed));
-
-
-enum i2400m_media_status {
-	I2400M_MEDIA_STATUS_LINK_UP = 1,
-	I2400M_MEDIA_STATUS_LINK_DOWN,
-	I2400M_MEDIA_STATUS_LINK_RENEW,
-};
-
-struct i2400m_tlv_media_status {
-	struct i2400m_tlv_hdr hdr;
-	__le32 media_status;
-} __attribute__((packed));
-
-
-/* New in v1.4 */
-struct i2400m_tlv_config_idle_timeout {
-	struct i2400m_tlv_hdr hdr;
-	__le32 timeout;	/* 100 to 300000 ms [5min], 100 increments
-			 * 0 disabled */
-} __attribute__((packed));
-
-/* New in v1.4 -- for backward compat, will be removed */
-struct i2400m_tlv_config_d2h_data_format {
-	struct i2400m_tlv_hdr hdr;
-	__u8 format; 		/* 0 old format, 1 enhanced */
-	__u8 reserved[3];
-} __attribute__((packed));
-
-/* New in v1.4 */
-struct i2400m_tlv_config_dl_host_reorder {
-	struct i2400m_tlv_hdr hdr;
-	__u8 reorder; 		/* 0 disabled, 1 enabled */
-	__u8 reserved[3];
-} __attribute__((packed));
-
-
-#endif /* #ifndef __LINUX__WIMAX__I2400M_H__ */
diff --git a/net/Kconfig b/net/Kconfig
index d6567162c1cf..f4c32d982af6 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -386,8 +386,6 @@ source "net/mac80211/Kconfig"
 
 endif # WIRELESS
 
-source "net/wimax/Kconfig"
-
 source "net/rfkill/Kconfig"
 source "net/9p/Kconfig"
 source "net/caif/Kconfig"
diff --git a/net/Makefile b/net/Makefile
index 5744bf1997fd..d96b0aa8f39f 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -66,7 +66,6 @@ obj-$(CONFIG_MAC802154)		+= mac802154/
 ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_SYSCTL)		+= sysctl_net.o
 endif
-obj-$(CONFIG_WIMAX)		+= wimax/
 obj-$(CONFIG_DNS_RESOLVER)	+= dns_resolver/
 obj-$(CONFIG_CEPH_LIB)		+= ceph/
 obj-$(CONFIG_BATMAN_ADV)	+= batman-adv/
diff --git a/net/wimax/Kconfig b/net/wimax/Kconfig
deleted file mode 100644
index d13762bc4abc..000000000000
--- a/net/wimax/Kconfig
+++ /dev/null
@@ -1,40 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# WiMAX LAN device configuration
-#
-
-menuconfig WIMAX
-	tristate "WiMAX Wireless Broadband support"
-	depends on RFKILL || !RFKILL
-	help
-
-	  Select to configure support for devices that provide
-	  wireless broadband connectivity using the WiMAX protocol
-	  (IEEE 802.16).
-
-	  Please note that most of these devices require signing up
-	  for a service plan with a provider.
-
-	  The different WiMAX drivers can be enabled in the menu entry
-
-	  Device Drivers > Network device support > WiMAX Wireless
-	  Broadband devices
-
-	  If unsure, it is safe to select M (module).
-
-config WIMAX_DEBUG_LEVEL
-	int "WiMAX debug level"
-	depends on WIMAX
-	default 8
-	help
-
-	  Select the maximum debug verbosity level to be compiled into
-	  the WiMAX stack code.
-
-	  By default, debug messages are disabled at runtime and can
-	  be selectively enabled for different parts of the code using
-	  the sysfs debug-levels file.
-
-	  If set at zero, this will compile out all the debug code.
-
-	  It is recommended that it is left at 8.
diff --git a/net/wimax/Makefile b/net/wimax/Makefile
deleted file mode 100644
index c2a71ae487ac..000000000000
--- a/net/wimax/Makefile
+++ /dev/null
@@ -1,13 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-obj-$(CONFIG_WIMAX)		+= wimax.o
-
-wimax-y :=		\
-	id-table.o	\
-	op-msg.o	\
-	op-reset.o	\
-	op-rfkill.o	\
-	op-state-get.o	\
-	stack.o
-
-wimax-$(CONFIG_DEBUG_FS) += debugfs.o
diff --git a/net/wimax/debug-levels.h b/net/wimax/debug-levels.h
deleted file mode 100644
index ebc287cde336..000000000000
--- a/net/wimax/debug-levels.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Linux WiMAX Stack
- * Debug levels control file for the wimax module
- *
- * Copyright (C) 2007-2008 Intel Corporation <linux-wimax@intel.com>
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- */
-#ifndef __debug_levels__h__
-#define __debug_levels__h__
-
-/* Maximum compile and run time debug level for all submodules */
-#define D_MODULENAME wimax
-#define D_MASTER CONFIG_WIMAX_DEBUG_LEVEL
-
-#include <linux/wimax/debug.h>
-
-/* List of all the enabled modules */
-enum d_module {
-	D_SUBMODULE_DECLARE(debugfs),
-	D_SUBMODULE_DECLARE(id_table),
-	D_SUBMODULE_DECLARE(op_msg),
-	D_SUBMODULE_DECLARE(op_reset),
-	D_SUBMODULE_DECLARE(op_rfkill),
-	D_SUBMODULE_DECLARE(op_state_get),
-	D_SUBMODULE_DECLARE(stack),
-};
-
-#endif /* #ifndef __debug_levels__h__ */
diff --git a/net/wimax/debugfs.c b/net/wimax/debugfs.c
deleted file mode 100644
index 3c54bb6b925a..000000000000
--- a/net/wimax/debugfs.c
+++ /dev/null
@@ -1,38 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Linux WiMAX
- * Debugfs support
- *
- * Copyright (C) 2005-2006 Intel Corporation <linux-wimax@intel.com>
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- */
-#include <linux/debugfs.h>
-#include <linux/wimax.h>
-#include "wimax-internal.h"
-
-#define D_SUBMODULE debugfs
-#include "debug-levels.h"
-
-void wimax_debugfs_add(struct wimax_dev *wimax_dev)
-{
-	struct net_device *net_dev = wimax_dev->net_dev;
-	struct dentry *dentry;
-	char buf[128];
-
-	snprintf(buf, sizeof(buf), "wimax:%s", net_dev->name);
-	dentry = debugfs_create_dir(buf, NULL);
-	wimax_dev->debugfs_dentry = dentry;
-
-	d_level_register_debugfs("wimax_dl_", debugfs, dentry);
-	d_level_register_debugfs("wimax_dl_", id_table, dentry);
-	d_level_register_debugfs("wimax_dl_", op_msg, dentry);
-	d_level_register_debugfs("wimax_dl_", op_reset, dentry);
-	d_level_register_debugfs("wimax_dl_", op_rfkill, dentry);
-	d_level_register_debugfs("wimax_dl_", op_state_get, dentry);
-	d_level_register_debugfs("wimax_dl_", stack, dentry);
-}
-
-void wimax_debugfs_rm(struct wimax_dev *wimax_dev)
-{
-	debugfs_remove_recursive(wimax_dev->debugfs_dentry);
-}
diff --git a/net/wimax/id-table.c b/net/wimax/id-table.c
deleted file mode 100644
index 02eee37b7e31..000000000000
--- a/net/wimax/id-table.c
+++ /dev/null
@@ -1,130 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Linux WiMAX
- * Mappping of generic netlink family IDs to net devices
- *
- * Copyright (C) 2005-2006 Intel Corporation <linux-wimax@intel.com>
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * We assign a single generic netlink family ID to each device (to
- * simplify lookup).
- *
- * We need a way to map family ID to a wimax_dev pointer.
- *
- * The idea is to use a very simple lookup. Using a netlink attribute
- * with (for example) the interface name implies a heavier search over
- * all the network devices; seemed kind of a waste given that we know
- * we are looking for a WiMAX device and that most systems will have
- * just a single WiMAX adapter.
- *
- * We put all the WiMAX devices in the system in a linked list and
- * match the generic link family ID against the list.
- *
- * By using a linked list, the case of a single adapter in the system
- * becomes (almost) no overhead, while still working for many more. If
- * it ever goes beyond two, I'll be surprised.
- */
-#include <linux/device.h>
-#include <net/genetlink.h>
-#include <linux/netdevice.h>
-#include <linux/list.h>
-#include <linux/wimax.h>
-#include "wimax-internal.h"
-
-
-#define D_SUBMODULE id_table
-#include "debug-levels.h"
-
-
-static DEFINE_SPINLOCK(wimax_id_table_lock);
-static struct list_head wimax_id_table = LIST_HEAD_INIT(wimax_id_table);
-
-
-/*
- * wimax_id_table_add - add a gennetlink familiy ID / wimax_dev mapping
- *
- * @wimax_dev: WiMAX device descriptor to associate to the Generic
- *     Netlink family ID.
- *
- * Look for an empty spot in the ID table; if none found, double the
- * table's size and get the first spot.
- */
-void wimax_id_table_add(struct wimax_dev *wimax_dev)
-{
-	d_fnstart(3, NULL, "(wimax_dev %p)\n", wimax_dev);
-	spin_lock(&wimax_id_table_lock);
-	list_add(&wimax_dev->id_table_node, &wimax_id_table);
-	spin_unlock(&wimax_id_table_lock);
-	d_fnend(3, NULL, "(wimax_dev %p)\n", wimax_dev);
-}
-
-
-/*
- * wimax_get_netdev_by_info - lookup a wimax_dev from the gennetlink info
- *
- * The generic netlink family ID has been filled out in the
- * nlmsghdr->nlmsg_type field, so we pull it from there, look it up in
- * the mapping table and reference the wimax_dev.
- *
- * When done, the reference should be dropped with
- * 'dev_put(wimax_dev->net_dev)'.
- */
-struct wimax_dev *wimax_dev_get_by_genl_info(
-	struct genl_info *info, int ifindex)
-{
-	struct wimax_dev *wimax_dev = NULL;
-
-	d_fnstart(3, NULL, "(info %p ifindex %d)\n", info, ifindex);
-	spin_lock(&wimax_id_table_lock);
-	list_for_each_entry(wimax_dev, &wimax_id_table, id_table_node) {
-		if (wimax_dev->net_dev->ifindex == ifindex) {
-			dev_hold(wimax_dev->net_dev);
-			goto found;
-		}
-	}
-	wimax_dev = NULL;
-	d_printf(1, NULL, "wimax: no devices found with ifindex %d\n",
-		 ifindex);
-found:
-	spin_unlock(&wimax_id_table_lock);
-	d_fnend(3, NULL, "(info %p ifindex %d) = %p\n",
-		info, ifindex, wimax_dev);
-	return wimax_dev;
-}
-
-
-/*
- * wimax_id_table_rm - Remove a gennetlink familiy ID / wimax_dev mapping
- *
- * @id: family ID to remove from the table
- */
-void wimax_id_table_rm(struct wimax_dev *wimax_dev)
-{
-	spin_lock(&wimax_id_table_lock);
-	list_del_init(&wimax_dev->id_table_node);
-	spin_unlock(&wimax_id_table_lock);
-}
-
-
-/*
- * Release the gennetlink family id / mapping table
- *
- * On debug, verify that the table is empty upon removal. We want the
- * code always compiled, to ensure it doesn't bit rot. It will be
- * compiled out if CONFIG_BUG is disabled.
- */
-void wimax_id_table_release(void)
-{
-	struct wimax_dev *wimax_dev;
-
-#ifndef CONFIG_BUG
-	return;
-#endif
-	spin_lock(&wimax_id_table_lock);
-	list_for_each_entry(wimax_dev, &wimax_id_table, id_table_node) {
-		pr_err("BUG: %s wimax_dev %p ifindex %d not cleared\n",
-		       __func__, wimax_dev, wimax_dev->net_dev->ifindex);
-		WARN_ON(1);
-	}
-	spin_unlock(&wimax_id_table_lock);
-}
diff --git a/net/wimax/op-msg.c b/net/wimax/op-msg.c
deleted file mode 100644
index 6460b5785758..000000000000
--- a/net/wimax/op-msg.c
+++ /dev/null
@@ -1,391 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Linux WiMAX
- * Generic messaging interface between userspace and driver/device
- *
- * Copyright (C) 2007-2008 Intel Corporation <linux-wimax@intel.com>
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This implements a direct communication channel between user space and
- * the driver/device, by which free form messages can be sent back and
- * forth.
- *
- * This is intended for device-specific features, vendor quirks, etc.
- *
- * See include/net/wimax.h
- *
- * GENERIC NETLINK ENCODING AND CAPACITY
- *
- * A destination "pipe name" is added to each message; it is up to the
- * drivers to assign or use those names (if using them at all).
- *
- * Messages are encoded as a binary netlink attribute using nla_put()
- * using type NLA_UNSPEC (as some versions of libnl still in
- * deployment don't yet understand NLA_BINARY).
- *
- * The maximum capacity of this transport is PAGESIZE per message (so
- * the actual payload will be bit smaller depending on the
- * netlink/generic netlink attributes and headers).
- *
- * RECEPTION OF MESSAGES
- *
- * When a message is received from user space, it is passed verbatim
- * to the driver calling wimax_dev->op_msg_from_user(). The return
- * value from this function is passed back to user space as an ack
- * over the generic netlink protocol.
- *
- * The stack doesn't do any processing or interpretation of these
- * messages.
- *
- * SENDING MESSAGES
- *
- * Messages can be sent with wimax_msg().
- *
- * If the message delivery needs to happen on a different context to
- * that of its creation, wimax_msg_alloc() can be used to get a
- * pointer to the message that can be delivered later on with
- * wimax_msg_send().
- *
- * ROADMAP
- *
- * wimax_gnl_doit_msg_from_user()    Process a message from user space
- *   wimax_dev_get_by_genl_info()
- *   wimax_dev->op_msg_from_user()   Delivery of message to the driver
- *
- * wimax_msg()                       Send a message to user space
- *   wimax_msg_alloc()
- *   wimax_msg_send()
- */
-#include <linux/device.h>
-#include <linux/slab.h>
-#include <net/genetlink.h>
-#include <linux/netdevice.h>
-#include <linux/wimax.h>
-#include <linux/security.h>
-#include <linux/export.h>
-#include "wimax-internal.h"
-
-
-#define D_SUBMODULE op_msg
-#include "debug-levels.h"
-
-
-/**
- * wimax_msg_alloc - Create a new skb for sending a message to userspace
- *
- * @wimax_dev: WiMAX device descriptor
- * @pipe_name: "named pipe" the message will be sent to
- * @msg: pointer to the message data to send
- * @size: size of the message to send (in bytes), including the header.
- * @gfp_flags: flags for memory allocation.
- *
- * Returns: %0 if ok, negative errno code on error
- *
- * Description:
- *
- * Allocates an skb that will contain the message to send to user
- * space over the messaging pipe and initializes it, copying the
- * payload.
- *
- * Once this call is done, you can deliver it with
- * wimax_msg_send().
- *
- * IMPORTANT:
- *
- * Don't use skb_push()/skb_pull()/skb_reserve() on the skb, as
- * wimax_msg_send() depends on skb->data being placed at the
- * beginning of the user message.
- *
- * Unlike other WiMAX stack calls, this call can be used way early,
- * even before wimax_dev_add() is called, as long as the
- * wimax_dev->net_dev pointer is set to point to a proper
- * net_dev. This is so that drivers can use it early in case they need
- * to send stuff around or communicate with user space.
- */
-struct sk_buff *wimax_msg_alloc(struct wimax_dev *wimax_dev,
-				const char *pipe_name,
-				const void *msg, size_t size,
-				gfp_t gfp_flags)
-{
-	int result;
-	struct device *dev = wimax_dev_to_dev(wimax_dev);
-	size_t msg_size;
-	void *genl_msg;
-	struct sk_buff *skb;
-
-	msg_size = nla_total_size(size)
-		+ nla_total_size(sizeof(u32))
-		+ (pipe_name ? nla_total_size(strlen(pipe_name)) : 0);
-	result = -ENOMEM;
-	skb = genlmsg_new(msg_size, gfp_flags);
-	if (skb == NULL)
-		goto error_new;
-	genl_msg = genlmsg_put(skb, 0, 0, &wimax_gnl_family,
-			       0, WIMAX_GNL_OP_MSG_TO_USER);
-	if (genl_msg == NULL) {
-		dev_err(dev, "no memory to create generic netlink message\n");
-		goto error_genlmsg_put;
-	}
-	result = nla_put_u32(skb, WIMAX_GNL_MSG_IFIDX,
-			     wimax_dev->net_dev->ifindex);
-	if (result < 0) {
-		dev_err(dev, "no memory to add ifindex attribute\n");
-		goto error_nla_put;
-	}
-	if (pipe_name) {
-		result = nla_put_string(skb, WIMAX_GNL_MSG_PIPE_NAME,
-					pipe_name);
-		if (result < 0) {
-			dev_err(dev, "no memory to add pipe_name attribute\n");
-			goto error_nla_put;
-		}
-	}
-	result = nla_put(skb, WIMAX_GNL_MSG_DATA, size, msg);
-	if (result < 0) {
-		dev_err(dev, "no memory to add payload (msg %p size %zu) in "
-			"attribute: %d\n", msg, size, result);
-		goto error_nla_put;
-	}
-	genlmsg_end(skb, genl_msg);
-	return skb;
-
-error_nla_put:
-error_genlmsg_put:
-error_new:
-	nlmsg_free(skb);
-	return ERR_PTR(result);
-}
-EXPORT_SYMBOL_GPL(wimax_msg_alloc);
-
-
-/**
- * wimax_msg_data_len - Return a pointer and size of a message's payload
- *
- * @msg: Pointer to a message created with wimax_msg_alloc()
- * @size: Pointer to where to store the message's size
- *
- * Returns the pointer to the message data.
- */
-const void *wimax_msg_data_len(struct sk_buff *msg, size_t *size)
-{
-	struct nlmsghdr *nlh = (void *) msg->head;
-	struct nlattr *nla;
-
-	nla = nlmsg_find_attr(nlh, sizeof(struct genlmsghdr),
-			      WIMAX_GNL_MSG_DATA);
-	if (nla == NULL) {
-		pr_err("Cannot find attribute WIMAX_GNL_MSG_DATA\n");
-		return NULL;
-	}
-	*size = nla_len(nla);
-	return nla_data(nla);
-}
-EXPORT_SYMBOL_GPL(wimax_msg_data_len);
-
-
-/**
- * wimax_msg_data - Return a pointer to a message's payload
- *
- * @msg: Pointer to a message created with wimax_msg_alloc()
- */
-const void *wimax_msg_data(struct sk_buff *msg)
-{
-	struct nlmsghdr *nlh = (void *) msg->head;
-	struct nlattr *nla;
-
-	nla = nlmsg_find_attr(nlh, sizeof(struct genlmsghdr),
-			      WIMAX_GNL_MSG_DATA);
-	if (nla == NULL) {
-		pr_err("Cannot find attribute WIMAX_GNL_MSG_DATA\n");
-		return NULL;
-	}
-	return nla_data(nla);
-}
-EXPORT_SYMBOL_GPL(wimax_msg_data);
-
-
-/**
- * wimax_msg_len - Return a message's payload length
- *
- * @msg: Pointer to a message created with wimax_msg_alloc()
- */
-ssize_t wimax_msg_len(struct sk_buff *msg)
-{
-	struct nlmsghdr *nlh = (void *) msg->head;
-	struct nlattr *nla;
-
-	nla = nlmsg_find_attr(nlh, sizeof(struct genlmsghdr),
-			      WIMAX_GNL_MSG_DATA);
-	if (nla == NULL) {
-		pr_err("Cannot find attribute WIMAX_GNL_MSG_DATA\n");
-		return -EINVAL;
-	}
-	return nla_len(nla);
-}
-EXPORT_SYMBOL_GPL(wimax_msg_len);
-
-
-/**
- * wimax_msg_send - Send a pre-allocated message to user space
- *
- * @wimax_dev: WiMAX device descriptor
- *
- * @skb: &struct sk_buff returned by wimax_msg_alloc(). Note the
- *     ownership of @skb is transferred to this function.
- *
- * Returns: 0 if ok, < 0 errno code on error
- *
- * Description:
- *
- * Sends a free-form message that was preallocated with
- * wimax_msg_alloc() and filled up.
- *
- * Assumes that once you pass an skb to this function for sending, it
- * owns it and will release it when done (on success).
- *
- * IMPORTANT:
- *
- * Don't use skb_push()/skb_pull()/skb_reserve() on the skb, as
- * wimax_msg_send() depends on skb->data being placed at the
- * beginning of the user message.
- *
- * Unlike other WiMAX stack calls, this call can be used way early,
- * even before wimax_dev_add() is called, as long as the
- * wimax_dev->net_dev pointer is set to point to a proper
- * net_dev. This is so that drivers can use it early in case they need
- * to send stuff around or communicate with user space.
- */
-int wimax_msg_send(struct wimax_dev *wimax_dev, struct sk_buff *skb)
-{
-	struct device *dev = wimax_dev_to_dev(wimax_dev);
-	void *msg = skb->data;
-	size_t size = skb->len;
-	might_sleep();
-
-	d_printf(1, dev, "CTX: wimax msg, %zu bytes\n", size);
-	d_dump(2, dev, msg, size);
-	genlmsg_multicast(&wimax_gnl_family, skb, 0, 0, GFP_KERNEL);
-	d_printf(1, dev, "CTX: genl multicast done\n");
-	return 0;
-}
-EXPORT_SYMBOL_GPL(wimax_msg_send);
-
-
-/**
- * wimax_msg - Send a message to user space
- *
- * @wimax_dev: WiMAX device descriptor (properly referenced)
- * @pipe_name: "named pipe" the message will be sent to
- * @buf: pointer to the message to send.
- * @size: size of the buffer pointed to by @buf (in bytes).
- * @gfp_flags: flags for memory allocation.
- *
- * Returns: %0 if ok, negative errno code on error.
- *
- * Description:
- *
- * Sends a free-form message to user space on the device @wimax_dev.
- *
- * NOTES:
- *
- * Once the @skb is given to this function, who will own it and will
- * release it when done (unless it returns error).
- */
-int wimax_msg(struct wimax_dev *wimax_dev, const char *pipe_name,
-	      const void *buf, size_t size, gfp_t gfp_flags)
-{
-	int result = -ENOMEM;
-	struct sk_buff *skb;
-
-	skb = wimax_msg_alloc(wimax_dev, pipe_name, buf, size, gfp_flags);
-	if (IS_ERR(skb))
-		result = PTR_ERR(skb);
-	else
-		result = wimax_msg_send(wimax_dev, skb);
-	return result;
-}
-EXPORT_SYMBOL_GPL(wimax_msg);
-
-/*
- * Relays a message from user space to the driver
- *
- * The skb is passed to the driver-specific function with the netlink
- * and generic netlink headers already stripped.
- *
- * This call will block while handling/relaying the message.
- */
-int wimax_gnl_doit_msg_from_user(struct sk_buff *skb, struct genl_info *info)
-{
-	int result, ifindex;
-	struct wimax_dev *wimax_dev;
-	struct device *dev;
-	struct nlmsghdr *nlh = info->nlhdr;
-	char *pipe_name;
-	void *msg_buf;
-	size_t msg_len;
-
-	might_sleep();
-	d_fnstart(3, NULL, "(skb %p info %p)\n", skb, info);
-	result = -ENODEV;
-	if (info->attrs[WIMAX_GNL_MSG_IFIDX] == NULL) {
-		pr_err("WIMAX_GNL_MSG_FROM_USER: can't find IFIDX attribute\n");
-		goto error_no_wimax_dev;
-	}
-	ifindex = nla_get_u32(info->attrs[WIMAX_GNL_MSG_IFIDX]);
-	wimax_dev = wimax_dev_get_by_genl_info(info, ifindex);
-	if (wimax_dev == NULL)
-		goto error_no_wimax_dev;
-	dev = wimax_dev_to_dev(wimax_dev);
-
-	/* Unpack arguments */
-	result = -EINVAL;
-	if (info->attrs[WIMAX_GNL_MSG_DATA] == NULL) {
-		dev_err(dev, "WIMAX_GNL_MSG_FROM_USER: can't find MSG_DATA "
-			"attribute\n");
-		goto error_no_data;
-	}
-	msg_buf = nla_data(info->attrs[WIMAX_GNL_MSG_DATA]);
-	msg_len = nla_len(info->attrs[WIMAX_GNL_MSG_DATA]);
-
-	if (info->attrs[WIMAX_GNL_MSG_PIPE_NAME] == NULL)
-		pipe_name = NULL;
-	else {
-		struct nlattr *attr = info->attrs[WIMAX_GNL_MSG_PIPE_NAME];
-		size_t attr_len = nla_len(attr);
-		/* libnl-1.1 does not yet support NLA_NUL_STRING */
-		result = -ENOMEM;
-		pipe_name = kstrndup(nla_data(attr), attr_len + 1, GFP_KERNEL);
-		if (pipe_name == NULL)
-			goto error_alloc;
-		pipe_name[attr_len] = 0;
-	}
-	mutex_lock(&wimax_dev->mutex);
-	result = wimax_dev_is_ready(wimax_dev);
-	if (result == -ENOMEDIUM)
-		result = 0;
-	if (result < 0)
-		goto error_not_ready;
-	result = -ENOSYS;
-	if (wimax_dev->op_msg_from_user == NULL)
-		goto error_noop;
-
-	d_printf(1, dev,
-		 "CRX: nlmsghdr len %u type %u flags 0x%04x seq 0x%x pid %u\n",
-		 nlh->nlmsg_len, nlh->nlmsg_type, nlh->nlmsg_flags,
-		 nlh->nlmsg_seq, nlh->nlmsg_pid);
-	d_printf(1, dev, "CRX: wimax message %zu bytes\n", msg_len);
-	d_dump(2, dev, msg_buf, msg_len);
-
-	result = wimax_dev->op_msg_from_user(wimax_dev, pipe_name,
-					     msg_buf, msg_len, info);
-error_noop:
-error_not_ready:
-	mutex_unlock(&wimax_dev->mutex);
-error_alloc:
-	kfree(pipe_name);
-error_no_data:
-	dev_put(wimax_dev->net_dev);
-error_no_wimax_dev:
-	d_fnend(3, NULL, "(skb %p info %p) = %d\n", skb, info, result);
-	return result;
-}
diff --git a/net/wimax/op-reset.c b/net/wimax/op-reset.c
deleted file mode 100644
index 9899b2e56721..000000000000
--- a/net/wimax/op-reset.c
+++ /dev/null
@@ -1,108 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Linux WiMAX
- * Implement and export a method for resetting a WiMAX device
- *
- * Copyright (C) 2008 Intel Corporation <linux-wimax@intel.com>
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This implements a simple synchronous call to reset a WiMAX device.
- *
- * Resets aim at being warm, keeping the device handles active;
- * however, when that fails, it falls back to a cold reset (that will
- * disconnect and reconnect the device).
- */
-
-#include <net/wimax.h>
-#include <net/genetlink.h>
-#include <linux/wimax.h>
-#include <linux/security.h>
-#include <linux/export.h>
-#include "wimax-internal.h"
-
-#define D_SUBMODULE op_reset
-#include "debug-levels.h"
-
-
-/**
- * wimax_reset - Reset a WiMAX device
- *
- * @wimax_dev: WiMAX device descriptor
- *
- * Returns:
- *
- * %0 if ok and a warm reset was done (the device still exists in
- * the system).
- *
- * -%ENODEV if a cold/bus reset had to be done (device has
- * disconnected and reconnected, so current handle is not valid
- * any more).
- *
- * -%EINVAL if the device is not even registered.
- *
- * Any other negative error code shall be considered as
- * non-recoverable.
- *
- * Description:
- *
- * Called when wanting to reset the device for any reason. Device is
- * taken back to power on status.
- *
- * This call blocks; on successful return, the device has completed the
- * reset process and is ready to operate.
- */
-int wimax_reset(struct wimax_dev *wimax_dev)
-{
-	int result = -EINVAL;
-	struct device *dev = wimax_dev_to_dev(wimax_dev);
-	enum wimax_st state;
-
-	might_sleep();
-	d_fnstart(3, dev, "(wimax_dev %p)\n", wimax_dev);
-	mutex_lock(&wimax_dev->mutex);
-	dev_hold(wimax_dev->net_dev);
-	state = wimax_dev->state;
-	mutex_unlock(&wimax_dev->mutex);
-
-	if (state >= WIMAX_ST_DOWN) {
-		mutex_lock(&wimax_dev->mutex_reset);
-		result = wimax_dev->op_reset(wimax_dev);
-		mutex_unlock(&wimax_dev->mutex_reset);
-	}
-	dev_put(wimax_dev->net_dev);
-
-	d_fnend(3, dev, "(wimax_dev %p) = %d\n", wimax_dev, result);
-	return result;
-}
-EXPORT_SYMBOL(wimax_reset);
-
-
-/*
- * Exporting to user space over generic netlink
- *
- * Parse the reset command from user space, return error code.
- *
- * No attributes.
- */
-int wimax_gnl_doit_reset(struct sk_buff *skb, struct genl_info *info)
-{
-	int result, ifindex;
-	struct wimax_dev *wimax_dev;
-
-	d_fnstart(3, NULL, "(skb %p info %p)\n", skb, info);
-	result = -ENODEV;
-	if (info->attrs[WIMAX_GNL_RESET_IFIDX] == NULL) {
-		pr_err("WIMAX_GNL_OP_RFKILL: can't find IFIDX attribute\n");
-		goto error_no_wimax_dev;
-	}
-	ifindex = nla_get_u32(info->attrs[WIMAX_GNL_RESET_IFIDX]);
-	wimax_dev = wimax_dev_get_by_genl_info(info, ifindex);
-	if (wimax_dev == NULL)
-		goto error_no_wimax_dev;
-	/* Execute the operation and send the result back to user space */
-	result = wimax_reset(wimax_dev);
-	dev_put(wimax_dev->net_dev);
-error_no_wimax_dev:
-	d_fnend(3, NULL, "(skb %p info %p) = %d\n", skb, info, result);
-	return result;
-}
diff --git a/net/wimax/op-rfkill.c b/net/wimax/op-rfkill.c
deleted file mode 100644
index 248d10b60b05..000000000000
--- a/net/wimax/op-rfkill.c
+++ /dev/null
@@ -1,431 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Linux WiMAX
- * RF-kill framework integration
- *
- * Copyright (C) 2008 Intel Corporation <linux-wimax@intel.com>
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This integrates into the Linux Kernel rfkill susbystem so that the
- * drivers just have to do the bare minimal work, which is providing a
- * method to set the software RF-Kill switch and to report changes in
- * the software and hardware switch status.
- *
- * A non-polled generic rfkill device is embedded into the WiMAX
- * subsystem's representation of a device.
- *
- * FIXME: Need polled support? Let drivers provide a poll routine
- *	  and hand it to rfkill ops then?
- *
- * All device drivers have to do is after wimax_dev_init(), call
- * wimax_report_rfkill_hw() and wimax_report_rfkill_sw() to update
- * initial state and then every time it changes. See wimax.h:struct
- * wimax_dev for more information.
- *
- * ROADMAP
- *
- * wimax_gnl_doit_rfkill()      User space calling wimax_rfkill()
- *   wimax_rfkill()             Kernel calling wimax_rfkill()
- *     __wimax_rf_toggle_radio()
- *
- * wimax_rfkill_set_radio_block()  RF-Kill subsystem calling
- *   __wimax_rf_toggle_radio()
- *
- * __wimax_rf_toggle_radio()
- *   wimax_dev->op_rfkill_sw_toggle() Driver backend
- *   __wimax_state_change()
- *
- * wimax_report_rfkill_sw()     Driver reports state change
- *   __wimax_state_change()
- *
- * wimax_report_rfkill_hw()     Driver reports state change
- *   __wimax_state_change()
- *
- * wimax_rfkill_add()           Initialize/shutdown rfkill support
- * wimax_rfkill_rm()            [called by wimax_dev_add/rm()]
- */
-
-#include <net/wimax.h>
-#include <net/genetlink.h>
-#include <linux/wimax.h>
-#include <linux/security.h>
-#include <linux/rfkill.h>
-#include <linux/export.h>
-#include "wimax-internal.h"
-
-#define D_SUBMODULE op_rfkill
-#include "debug-levels.h"
-
-/**
- * wimax_report_rfkill_hw - Reports changes in the hardware RF switch
- *
- * @wimax_dev: WiMAX device descriptor
- *
- * @state: New state of the RF Kill switch. %WIMAX_RF_ON radio on,
- *     %WIMAX_RF_OFF radio off.
- *
- * When the device detects a change in the state of thehardware RF
- * switch, it must call this function to let the WiMAX kernel stack
- * know that the state has changed so it can be properly propagated.
- *
- * The WiMAX stack caches the state (the driver doesn't need to). As
- * well, as the change is propagated it will come back as a request to
- * change the software state to mirror the hardware state.
- *
- * If the device doesn't have a hardware kill switch, just report
- * it on initialization as always on (%WIMAX_RF_ON, radio on).
- */
-void wimax_report_rfkill_hw(struct wimax_dev *wimax_dev,
-			    enum wimax_rf_state state)
-{
-	int result;
-	struct device *dev = wimax_dev_to_dev(wimax_dev);
-	enum wimax_st wimax_state;
-
-	d_fnstart(3, dev, "(wimax_dev %p state %u)\n", wimax_dev, state);
-	BUG_ON(state == WIMAX_RF_QUERY);
-	BUG_ON(state != WIMAX_RF_ON && state != WIMAX_RF_OFF);
-
-	mutex_lock(&wimax_dev->mutex);
-	result = wimax_dev_is_ready(wimax_dev);
-	if (result < 0)
-		goto error_not_ready;
-
-	if (state != wimax_dev->rf_hw) {
-		wimax_dev->rf_hw = state;
-		if (wimax_dev->rf_hw == WIMAX_RF_ON &&
-		    wimax_dev->rf_sw == WIMAX_RF_ON)
-			wimax_state = WIMAX_ST_READY;
-		else
-			wimax_state = WIMAX_ST_RADIO_OFF;
-
-		result = rfkill_set_hw_state(wimax_dev->rfkill,
-					     state == WIMAX_RF_OFF);
-
-		__wimax_state_change(wimax_dev, wimax_state);
-	}
-error_not_ready:
-	mutex_unlock(&wimax_dev->mutex);
-	d_fnend(3, dev, "(wimax_dev %p state %u) = void [%d]\n",
-		wimax_dev, state, result);
-}
-EXPORT_SYMBOL_GPL(wimax_report_rfkill_hw);
-
-
-/**
- * wimax_report_rfkill_sw - Reports changes in the software RF switch
- *
- * @wimax_dev: WiMAX device descriptor
- *
- * @state: New state of the RF kill switch. %WIMAX_RF_ON radio on,
- *     %WIMAX_RF_OFF radio off.
- *
- * Reports changes in the software RF switch state to the WiMAX stack.
- *
- * The main use is during initialization, so the driver can query the
- * device for its current software radio kill switch state and feed it
- * to the system.
- *
- * On the side, the device does not change the software state by
- * itself. In practice, this can happen, as the device might decide to
- * switch (in software) the radio off for different reasons.
- */
-void wimax_report_rfkill_sw(struct wimax_dev *wimax_dev,
-			    enum wimax_rf_state state)
-{
-	int result;
-	struct device *dev = wimax_dev_to_dev(wimax_dev);
-	enum wimax_st wimax_state;
-
-	d_fnstart(3, dev, "(wimax_dev %p state %u)\n", wimax_dev, state);
-	BUG_ON(state == WIMAX_RF_QUERY);
-	BUG_ON(state != WIMAX_RF_ON && state != WIMAX_RF_OFF);
-
-	mutex_lock(&wimax_dev->mutex);
-	result = wimax_dev_is_ready(wimax_dev);
-	if (result < 0)
-		goto error_not_ready;
-
-	if (state != wimax_dev->rf_sw) {
-		wimax_dev->rf_sw = state;
-		if (wimax_dev->rf_hw == WIMAX_RF_ON &&
-		    wimax_dev->rf_sw == WIMAX_RF_ON)
-			wimax_state = WIMAX_ST_READY;
-		else
-			wimax_state = WIMAX_ST_RADIO_OFF;
-		__wimax_state_change(wimax_dev, wimax_state);
-		rfkill_set_sw_state(wimax_dev->rfkill, state == WIMAX_RF_OFF);
-	}
-error_not_ready:
-	mutex_unlock(&wimax_dev->mutex);
-	d_fnend(3, dev, "(wimax_dev %p state %u) = void [%d]\n",
-		wimax_dev, state, result);
-}
-EXPORT_SYMBOL_GPL(wimax_report_rfkill_sw);
-
-
-/*
- * Callback for the RF Kill toggle operation
- *
- * This function is called by:
- *
- * - The rfkill subsystem when the RF-Kill key is pressed in the
- *   hardware and the driver notifies through
- *   wimax_report_rfkill_hw(). The rfkill subsystem ends up calling back
- *   here so the software RF Kill switch state is changed to reflect
- *   the hardware switch state.
- *
- * - When the user sets the state through sysfs' rfkill/state file
- *
- * - When the user calls wimax_rfkill().
- *
- * This call blocks!
- *
- * WARNING! When we call rfkill_unregister(), this will be called with
- * state 0!
- *
- * WARNING: wimax_dev must be locked
- */
-static
-int __wimax_rf_toggle_radio(struct wimax_dev *wimax_dev,
-			    enum wimax_rf_state state)
-{
-	int result = 0;
-	struct device *dev = wimax_dev_to_dev(wimax_dev);
-	enum wimax_st wimax_state;
-
-	might_sleep();
-	d_fnstart(3, dev, "(wimax_dev %p state %u)\n", wimax_dev, state);
-	if (wimax_dev->rf_sw == state)
-		goto out_no_change;
-	if (wimax_dev->op_rfkill_sw_toggle != NULL)
-		result = wimax_dev->op_rfkill_sw_toggle(wimax_dev, state);
-	else if (state == WIMAX_RF_OFF)	/* No op? can't turn off */
-		result = -ENXIO;
-	else				/* No op? can turn on */
-		result = 0;		/* should never happen tho */
-	if (result >= 0) {
-		result = 0;
-		wimax_dev->rf_sw = state;
-		wimax_state = state == WIMAX_RF_ON ?
-			WIMAX_ST_READY : WIMAX_ST_RADIO_OFF;
-		__wimax_state_change(wimax_dev, wimax_state);
-	}
-out_no_change:
-	d_fnend(3, dev, "(wimax_dev %p state %u) = %d\n",
-		wimax_dev, state, result);
-	return result;
-}
-
-
-/*
- * Translate from rfkill state to wimax state
- *
- * NOTE: Special state handling rules here
- *
- *     Just pretend the call didn't happen if we are in a state where
- *     we know for sure it cannot be handled (WIMAX_ST_DOWN or
- *     __WIMAX_ST_QUIESCING). rfkill() needs it to register and
- *     unregister, as it will run this path.
- *
- * NOTE: This call will block until the operation is completed.
- */
-static int wimax_rfkill_set_radio_block(void *data, bool blocked)
-{
-	int result;
-	struct wimax_dev *wimax_dev = data;
-	struct device *dev = wimax_dev_to_dev(wimax_dev);
-	enum wimax_rf_state rf_state;
-
-	d_fnstart(3, dev, "(wimax_dev %p blocked %u)\n", wimax_dev, blocked);
-	rf_state = WIMAX_RF_ON;
-	if (blocked)
-		rf_state = WIMAX_RF_OFF;
-	mutex_lock(&wimax_dev->mutex);
-	if (wimax_dev->state <= __WIMAX_ST_QUIESCING)
-		result = 0;
-	else
-		result = __wimax_rf_toggle_radio(wimax_dev, rf_state);
-	mutex_unlock(&wimax_dev->mutex);
-	d_fnend(3, dev, "(wimax_dev %p blocked %u) = %d\n",
-		wimax_dev, blocked, result);
-	return result;
-}
-
-static const struct rfkill_ops wimax_rfkill_ops = {
-	.set_block = wimax_rfkill_set_radio_block,
-};
-
-/**
- * wimax_rfkill - Set the software RF switch state for a WiMAX device
- *
- * @wimax_dev: WiMAX device descriptor
- *
- * @state: New RF state.
- *
- * Returns:
- *
- * >= 0 toggle state if ok, < 0 errno code on error. The toggle state
- * is returned as a bitmap, bit 0 being the hardware RF state, bit 1
- * the software RF state.
- *
- * 0 means disabled (%WIMAX_RF_ON, radio on), 1 means enabled radio
- * off (%WIMAX_RF_OFF).
- *
- * Description:
- *
- * Called by the user when he wants to request the WiMAX radio to be
- * switched on (%WIMAX_RF_ON) or off (%WIMAX_RF_OFF). With
- * %WIMAX_RF_QUERY, just the current state is returned.
- *
- * NOTE:
- *
- * This call will block until the operation is complete.
- */
-int wimax_rfkill(struct wimax_dev *wimax_dev, enum wimax_rf_state state)
-{
-	int result;
-	struct device *dev = wimax_dev_to_dev(wimax_dev);
-
-	d_fnstart(3, dev, "(wimax_dev %p state %u)\n", wimax_dev, state);
-	mutex_lock(&wimax_dev->mutex);
-	result = wimax_dev_is_ready(wimax_dev);
-	if (result < 0) {
-		/* While initializing, < 1.4.3 wimax-tools versions use
-		 * this call to check if the device is a valid WiMAX
-		 * device; so we allow it to proceed always,
-		 * considering the radios are all off. */
-		if (result == -ENOMEDIUM && state == WIMAX_RF_QUERY)
-			result = WIMAX_RF_OFF << 1 | WIMAX_RF_OFF;
-		goto error_not_ready;
-	}
-	switch (state) {
-	case WIMAX_RF_ON:
-	case WIMAX_RF_OFF:
-		result = __wimax_rf_toggle_radio(wimax_dev, state);
-		if (result < 0)
-			goto error;
-		rfkill_set_sw_state(wimax_dev->rfkill, state == WIMAX_RF_OFF);
-		break;
-	case WIMAX_RF_QUERY:
-		break;
-	default:
-		result = -EINVAL;
-		goto error;
-	}
-	result = wimax_dev->rf_sw << 1 | wimax_dev->rf_hw;
-error:
-error_not_ready:
-	mutex_unlock(&wimax_dev->mutex);
-	d_fnend(3, dev, "(wimax_dev %p state %u) = %d\n",
-		wimax_dev, state, result);
-	return result;
-}
-EXPORT_SYMBOL(wimax_rfkill);
-
-
-/*
- * Register a new WiMAX device's RF Kill support
- *
- * WARNING: wimax_dev->mutex must be unlocked
- */
-int wimax_rfkill_add(struct wimax_dev *wimax_dev)
-{
-	int result;
-	struct rfkill *rfkill;
-	struct device *dev = wimax_dev_to_dev(wimax_dev);
-
-	d_fnstart(3, dev, "(wimax_dev %p)\n", wimax_dev);
-	/* Initialize RF Kill */
-	result = -ENOMEM;
-	rfkill = rfkill_alloc(wimax_dev->name, dev, RFKILL_TYPE_WIMAX,
-			      &wimax_rfkill_ops, wimax_dev);
-	if (rfkill == NULL)
-		goto error_rfkill_allocate;
-
-	d_printf(1, dev, "rfkill %p\n", rfkill);
-
-	wimax_dev->rfkill = rfkill;
-
-	rfkill_init_sw_state(rfkill, 1);
-	result = rfkill_register(wimax_dev->rfkill);
-	if (result < 0)
-		goto error_rfkill_register;
-
-	/* If there is no SW toggle op, SW RFKill is always on */
-	if (wimax_dev->op_rfkill_sw_toggle == NULL)
-		wimax_dev->rf_sw = WIMAX_RF_ON;
-
-	d_fnend(3, dev, "(wimax_dev %p) = 0\n", wimax_dev);
-	return 0;
-
-error_rfkill_register:
-	rfkill_destroy(wimax_dev->rfkill);
-error_rfkill_allocate:
-	d_fnend(3, dev, "(wimax_dev %p) = %d\n", wimax_dev, result);
-	return result;
-}
-
-
-/*
- * Deregister a WiMAX device's RF Kill support
- *
- * Ick, we can't call rfkill_free() after rfkill_unregister()...oh
- * well.
- *
- * WARNING: wimax_dev->mutex must be unlocked
- */
-void wimax_rfkill_rm(struct wimax_dev *wimax_dev)
-{
-	struct device *dev = wimax_dev_to_dev(wimax_dev);
-	d_fnstart(3, dev, "(wimax_dev %p)\n", wimax_dev);
-	rfkill_unregister(wimax_dev->rfkill);
-	rfkill_destroy(wimax_dev->rfkill);
-	d_fnend(3, dev, "(wimax_dev %p)\n", wimax_dev);
-}
-
-
-/*
- * Exporting to user space over generic netlink
- *
- * Parse the rfkill command from user space, return a combination
- * value that describe the states of the different toggles.
- *
- * Only one attribute: the new state requested (on, off or no change,
- * just query).
- */
-
-int wimax_gnl_doit_rfkill(struct sk_buff *skb, struct genl_info *info)
-{
-	int result, ifindex;
-	struct wimax_dev *wimax_dev;
-	struct device *dev;
-	enum wimax_rf_state new_state;
-
-	d_fnstart(3, NULL, "(skb %p info %p)\n", skb, info);
-	result = -ENODEV;
-	if (info->attrs[WIMAX_GNL_RFKILL_IFIDX] == NULL) {
-		pr_err("WIMAX_GNL_OP_RFKILL: can't find IFIDX attribute\n");
-		goto error_no_wimax_dev;
-	}
-	ifindex = nla_get_u32(info->attrs[WIMAX_GNL_RFKILL_IFIDX]);
-	wimax_dev = wimax_dev_get_by_genl_info(info, ifindex);
-	if (wimax_dev == NULL)
-		goto error_no_wimax_dev;
-	dev = wimax_dev_to_dev(wimax_dev);
-	result = -EINVAL;
-	if (info->attrs[WIMAX_GNL_RFKILL_STATE] == NULL) {
-		dev_err(dev, "WIMAX_GNL_RFKILL: can't find RFKILL_STATE "
-			"attribute\n");
-		goto error_no_pid;
-	}
-	new_state = nla_get_u32(info->attrs[WIMAX_GNL_RFKILL_STATE]);
-
-	/* Execute the operation and send the result back to user space */
-	result = wimax_rfkill(wimax_dev, new_state);
-error_no_pid:
-	dev_put(wimax_dev->net_dev);
-error_no_wimax_dev:
-	d_fnend(3, NULL, "(skb %p info %p) = %d\n", skb, info, result);
-	return result;
-}
diff --git a/net/wimax/op-state-get.c b/net/wimax/op-state-get.c
deleted file mode 100644
index 5bc712de1563..000000000000
--- a/net/wimax/op-state-get.c
+++ /dev/null
@@ -1,52 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Linux WiMAX
- * Implement and export a method for getting a WiMAX device current state
- *
- * Copyright (C) 2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
- *
- * Based on previous WiMAX core work by:
- *  Copyright (C) 2008 Intel Corporation <linux-wimax@intel.com>
- *  Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- */
-
-#include <net/wimax.h>
-#include <net/genetlink.h>
-#include <linux/wimax.h>
-#include <linux/security.h>
-#include "wimax-internal.h"
-
-#define D_SUBMODULE op_state_get
-#include "debug-levels.h"
-
-
-/*
- * Exporting to user space over generic netlink
- *
- * Parse the state get command from user space, return a combination
- * value that describe the current state.
- *
- * No attributes.
- */
-int wimax_gnl_doit_state_get(struct sk_buff *skb, struct genl_info *info)
-{
-	int result, ifindex;
-	struct wimax_dev *wimax_dev;
-
-	d_fnstart(3, NULL, "(skb %p info %p)\n", skb, info);
-	result = -ENODEV;
-	if (info->attrs[WIMAX_GNL_STGET_IFIDX] == NULL) {
-		pr_err("WIMAX_GNL_OP_STATE_GET: can't find IFIDX attribute\n");
-		goto error_no_wimax_dev;
-	}
-	ifindex = nla_get_u32(info->attrs[WIMAX_GNL_STGET_IFIDX]);
-	wimax_dev = wimax_dev_get_by_genl_info(info, ifindex);
-	if (wimax_dev == NULL)
-		goto error_no_wimax_dev;
-	/* Execute the operation and send the result back to user space */
-	result = wimax_state_get(wimax_dev);
-	dev_put(wimax_dev->net_dev);
-error_no_wimax_dev:
-	d_fnend(3, NULL, "(skb %p info %p) = %d\n", skb, info, result);
-	return result;
-}
diff --git a/net/wimax/stack.c b/net/wimax/stack.c
deleted file mode 100644
index 3a62af3f80bf..000000000000
--- a/net/wimax/stack.c
+++ /dev/null
@@ -1,616 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Linux WiMAX
- * Initialization, addition and removal of wimax devices
- *
- * Copyright (C) 2005-2006 Intel Corporation <linux-wimax@intel.com>
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This implements:
- *
- *   - basic life cycle of 'struct wimax_dev' [wimax_dev_*()]; on
- *     addition/registration initialize all subfields and allocate
- *     generic netlink resources for user space communication. On
- *     removal/unregistration, undo all that.
- *
- *   - device state machine [wimax_state_change()] and support to send
- *     reports to user space when the state changes
- *     [wimax_gnl_re_state_change*()].
- *
- * See include/net/wimax.h for rationales and design.
- *
- * ROADMAP
- *
- * [__]wimax_state_change()     Called by drivers to update device's state
- *   wimax_gnl_re_state_change_alloc()
- *   wimax_gnl_re_state_change_send()
- *
- * wimax_dev_init()	        Init a device
- * wimax_dev_add()              Register
- *   wimax_rfkill_add()
- *   wimax_gnl_add()            Register all the generic netlink resources.
- *   wimax_id_table_add()
- * wimax_dev_rm()               Unregister
- *   wimax_id_table_rm()
- *   wimax_gnl_rm()
- *   wimax_rfkill_rm()
- */
-#include <linux/device.h>
-#include <linux/gfp.h>
-#include <net/genetlink.h>
-#include <linux/netdevice.h>
-#include <linux/wimax.h>
-#include <linux/module.h>
-#include "wimax-internal.h"
-
-
-#define D_SUBMODULE stack
-#include "debug-levels.h"
-
-static char wimax_debug_params[128];
-module_param_string(debug, wimax_debug_params, sizeof(wimax_debug_params),
-		    0644);
-MODULE_PARM_DESC(debug,
-		 "String of space-separated NAME:VALUE pairs, where NAMEs "
-		 "are the different debug submodules and VALUE are the "
-		 "initial debug value to set.");
-
-/*
- * Authoritative source for the RE_STATE_CHANGE attribute policy
- *
- * We don't really use it here, but /me likes to keep the definition
- * close to where the data is generated.
- */
-/*
-static const struct nla_policy wimax_gnl_re_status_change[WIMAX_GNL_ATTR_MAX + 1] = {
-	[WIMAX_GNL_STCH_STATE_OLD] = { .type = NLA_U8 },
-	[WIMAX_GNL_STCH_STATE_NEW] = { .type = NLA_U8 },
-};
-*/
-
-
-/*
- * Allocate a Report State Change message
- *
- * @header: save it, you need it for _send()
- *
- * Creates and fills a basic state change message; different code
- * paths can then add more attributes to the message as needed.
- *
- * Use wimax_gnl_re_state_change_send() to send the returned skb.
- *
- * Returns: skb with the genl message if ok, IS_ERR() ptr on error
- *     with an errno code.
- */
-static
-struct sk_buff *wimax_gnl_re_state_change_alloc(
-	struct wimax_dev *wimax_dev,
-	enum wimax_st new_state, enum wimax_st old_state,
-	void **header)
-{
-	int result;
-	struct device *dev = wimax_dev_to_dev(wimax_dev);
-	void *data;
-	struct sk_buff *report_skb;
-
-	d_fnstart(3, dev, "(wimax_dev %p new_state %u old_state %u)\n",
-		  wimax_dev, new_state, old_state);
-	result = -ENOMEM;
-	report_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (report_skb == NULL) {
-		dev_err(dev, "RE_STCH: can't create message\n");
-		goto error_new;
-	}
-	/* FIXME: sending a group ID as the seq is wrong */
-	data = genlmsg_put(report_skb, 0, wimax_gnl_family.mcgrp_offset,
-			   &wimax_gnl_family, 0, WIMAX_GNL_RE_STATE_CHANGE);
-	if (data == NULL) {
-		dev_err(dev, "RE_STCH: can't put data into message\n");
-		goto error_put;
-	}
-	*header = data;
-
-	result = nla_put_u8(report_skb, WIMAX_GNL_STCH_STATE_OLD, old_state);
-	if (result < 0) {
-		dev_err(dev, "RE_STCH: Error adding OLD attr: %d\n", result);
-		goto error_put;
-	}
-	result = nla_put_u8(report_skb, WIMAX_GNL_STCH_STATE_NEW, new_state);
-	if (result < 0) {
-		dev_err(dev, "RE_STCH: Error adding NEW attr: %d\n", result);
-		goto error_put;
-	}
-	result = nla_put_u32(report_skb, WIMAX_GNL_STCH_IFIDX,
-			     wimax_dev->net_dev->ifindex);
-	if (result < 0) {
-		dev_err(dev, "RE_STCH: Error adding IFINDEX attribute\n");
-		goto error_put;
-	}
-	d_fnend(3, dev, "(wimax_dev %p new_state %u old_state %u) = %p\n",
-		wimax_dev, new_state, old_state, report_skb);
-	return report_skb;
-
-error_put:
-	nlmsg_free(report_skb);
-error_new:
-	d_fnend(3, dev, "(wimax_dev %p new_state %u old_state %u) = %d\n",
-		wimax_dev, new_state, old_state, result);
-	return ERR_PTR(result);
-}
-
-
-/*
- * Send a Report State Change message (as created with _alloc).
- *
- * @report_skb: as returned by wimax_gnl_re_state_change_alloc()
- * @header: as returned by wimax_gnl_re_state_change_alloc()
- *
- * Returns: 0 if ok, < 0 errno code on error.
- *
- * If the message is  NULL, pretend it didn't happen.
- */
-static
-int wimax_gnl_re_state_change_send(
-	struct wimax_dev *wimax_dev, struct sk_buff *report_skb,
-	void *header)
-{
-	int result = 0;
-	struct device *dev = wimax_dev_to_dev(wimax_dev);
-	d_fnstart(3, dev, "(wimax_dev %p report_skb %p)\n",
-		  wimax_dev, report_skb);
-	if (report_skb == NULL) {
-		result = -ENOMEM;
-		goto out;
-	}
-	genlmsg_end(report_skb, header);
-	genlmsg_multicast(&wimax_gnl_family, report_skb, 0, 0, GFP_KERNEL);
-out:
-	d_fnend(3, dev, "(wimax_dev %p report_skb %p) = %d\n",
-		wimax_dev, report_skb, result);
-	return result;
-}
-
-
-static
-void __check_new_state(enum wimax_st old_state, enum wimax_st new_state,
-		       unsigned int allowed_states_bm)
-{
-	if (WARN_ON(((1 << new_state) & allowed_states_bm) == 0)) {
-		pr_err("SW BUG! Forbidden state change %u -> %u\n",
-		       old_state, new_state);
-	}
-}
-
-
-/*
- * Set the current state of a WiMAX device [unlocking version of
- * wimax_state_change().
- */
-void __wimax_state_change(struct wimax_dev *wimax_dev, enum wimax_st new_state)
-{
-	struct device *dev = wimax_dev_to_dev(wimax_dev);
-	enum wimax_st old_state = wimax_dev->state;
-	struct sk_buff *stch_skb;
-	void *header;
-
-	d_fnstart(3, dev, "(wimax_dev %p new_state %u [old %u])\n",
-		  wimax_dev, new_state, old_state);
-
-	if (WARN_ON(new_state >= __WIMAX_ST_INVALID)) {
-		dev_err(dev, "SW BUG: requesting invalid state %u\n",
-			new_state);
-		goto out;
-	}
-	if (old_state == new_state)
-		goto out;
-	header = NULL;	/* gcc complains? can't grok why */
-	stch_skb = wimax_gnl_re_state_change_alloc(
-		wimax_dev, new_state, old_state, &header);
-
-	/* Verify the state transition and do exit-from-state actions */
-	switch (old_state) {
-	case __WIMAX_ST_NULL:
-		__check_new_state(old_state, new_state,
-				  1 << WIMAX_ST_DOWN);
-		break;
-	case WIMAX_ST_DOWN:
-		__check_new_state(old_state, new_state,
-				  1 << __WIMAX_ST_QUIESCING
-				  | 1 << WIMAX_ST_UNINITIALIZED
-				  | 1 << WIMAX_ST_RADIO_OFF);
-		break;
-	case __WIMAX_ST_QUIESCING:
-		__check_new_state(old_state, new_state, 1 << WIMAX_ST_DOWN);
-		break;
-	case WIMAX_ST_UNINITIALIZED:
-		__check_new_state(old_state, new_state,
-				  1 << __WIMAX_ST_QUIESCING
-				  | 1 << WIMAX_ST_RADIO_OFF);
-		break;
-	case WIMAX_ST_RADIO_OFF:
-		__check_new_state(old_state, new_state,
-				  1 << __WIMAX_ST_QUIESCING
-				  | 1 << WIMAX_ST_READY);
-		break;
-	case WIMAX_ST_READY:
-		__check_new_state(old_state, new_state,
-				  1 << __WIMAX_ST_QUIESCING
-				  | 1 << WIMAX_ST_RADIO_OFF
-				  | 1 << WIMAX_ST_SCANNING
-				  | 1 << WIMAX_ST_CONNECTING
-				  | 1 << WIMAX_ST_CONNECTED);
-		break;
-	case WIMAX_ST_SCANNING:
-		__check_new_state(old_state, new_state,
-				  1 << __WIMAX_ST_QUIESCING
-				  | 1 << WIMAX_ST_RADIO_OFF
-				  | 1 << WIMAX_ST_READY
-				  | 1 << WIMAX_ST_CONNECTING
-				  | 1 << WIMAX_ST_CONNECTED);
-		break;
-	case WIMAX_ST_CONNECTING:
-		__check_new_state(old_state, new_state,
-				  1 << __WIMAX_ST_QUIESCING
-				  | 1 << WIMAX_ST_RADIO_OFF
-				  | 1 << WIMAX_ST_READY
-				  | 1 << WIMAX_ST_SCANNING
-				  | 1 << WIMAX_ST_CONNECTED);
-		break;
-	case WIMAX_ST_CONNECTED:
-		__check_new_state(old_state, new_state,
-				  1 << __WIMAX_ST_QUIESCING
-				  | 1 << WIMAX_ST_RADIO_OFF
-				  | 1 << WIMAX_ST_READY);
-		netif_tx_disable(wimax_dev->net_dev);
-		netif_carrier_off(wimax_dev->net_dev);
-		break;
-	case __WIMAX_ST_INVALID:
-	default:
-		dev_err(dev, "SW BUG: wimax_dev %p is in unknown state %u\n",
-			wimax_dev, wimax_dev->state);
-		WARN_ON(1);
-		goto out;
-	}
-
-	/* Execute the actions of entry to the new state */
-	switch (new_state) {
-	case __WIMAX_ST_NULL:
-		dev_err(dev, "SW BUG: wimax_dev %p entering NULL state "
-			"from %u\n", wimax_dev, wimax_dev->state);
-		WARN_ON(1);		/* Nobody can enter this state */
-		break;
-	case WIMAX_ST_DOWN:
-		break;
-	case __WIMAX_ST_QUIESCING:
-		break;
-	case WIMAX_ST_UNINITIALIZED:
-		break;
-	case WIMAX_ST_RADIO_OFF:
-		break;
-	case WIMAX_ST_READY:
-		break;
-	case WIMAX_ST_SCANNING:
-		break;
-	case WIMAX_ST_CONNECTING:
-		break;
-	case WIMAX_ST_CONNECTED:
-		netif_carrier_on(wimax_dev->net_dev);
-		netif_wake_queue(wimax_dev->net_dev);
-		break;
-	case __WIMAX_ST_INVALID:
-	default:
-		BUG();
-	}
-	__wimax_state_set(wimax_dev, new_state);
-	if (!IS_ERR(stch_skb))
-		wimax_gnl_re_state_change_send(wimax_dev, stch_skb, header);
-out:
-	d_fnend(3, dev, "(wimax_dev %p new_state %u [old %u]) = void\n",
-		wimax_dev, new_state, old_state);
-}
-
-
-/**
- * wimax_state_change - Set the current state of a WiMAX device
- *
- * @wimax_dev: WiMAX device descriptor (properly referenced)
- * @new_state: New state to switch to
- *
- * This implements the state changes for the wimax devices. It will
- *
- * - verify that the state transition is legal (for now it'll just
- *   print a warning if not) according to the table in
- *   linux/wimax.h's documentation for 'enum wimax_st'.
- *
- * - perform the actions needed for leaving the current state and
- *   whichever are needed for entering the new state.
- *
- * - issue a report to user space indicating the new state (and an
- *   optional payload with information about the new state).
- *
- * NOTE: @wimax_dev must be locked
- */
-void wimax_state_change(struct wimax_dev *wimax_dev, enum wimax_st new_state)
-{
-	/*
-	 * A driver cannot take the wimax_dev out of the
-	 * __WIMAX_ST_NULL state unless by calling wimax_dev_add(). If
-	 * the wimax_dev's state is still NULL, we ignore any request
-	 * to change its state because it means it hasn't been yet
-	 * registered.
-	 *
-	 * There is no need to complain about it, as routines that
-	 * call this might be shared from different code paths that
-	 * are called before or after wimax_dev_add() has done its
-	 * job.
-	 */
-	mutex_lock(&wimax_dev->mutex);
-	if (wimax_dev->state > __WIMAX_ST_NULL)
-		__wimax_state_change(wimax_dev, new_state);
-	mutex_unlock(&wimax_dev->mutex);
-}
-EXPORT_SYMBOL_GPL(wimax_state_change);
-
-
-/**
- * wimax_state_get() - Return the current state of a WiMAX device
- *
- * @wimax_dev: WiMAX device descriptor
- *
- * Returns: Current state of the device according to its driver.
- */
-enum wimax_st wimax_state_get(struct wimax_dev *wimax_dev)
-{
-	enum wimax_st state;
-	mutex_lock(&wimax_dev->mutex);
-	state = wimax_dev->state;
-	mutex_unlock(&wimax_dev->mutex);
-	return state;
-}
-EXPORT_SYMBOL_GPL(wimax_state_get);
-
-
-/**
- * wimax_dev_init - initialize a newly allocated instance
- *
- * @wimax_dev: WiMAX device descriptor to initialize.
- *
- * Initializes fields of a freshly allocated @wimax_dev instance. This
- * function assumes that after allocation, the memory occupied by
- * @wimax_dev was zeroed.
- */
-void wimax_dev_init(struct wimax_dev *wimax_dev)
-{
-	INIT_LIST_HEAD(&wimax_dev->id_table_node);
-	__wimax_state_set(wimax_dev, __WIMAX_ST_NULL);
-	mutex_init(&wimax_dev->mutex);
-	mutex_init(&wimax_dev->mutex_reset);
-}
-EXPORT_SYMBOL_GPL(wimax_dev_init);
-
-/*
- * There are multiple enums reusing the same values, adding
- * others is only possible if they use a compatible policy.
- */
-static const struct nla_policy wimax_gnl_policy[WIMAX_GNL_ATTR_MAX + 1] = {
-	/*
-	 * WIMAX_GNL_RESET_IFIDX, WIMAX_GNL_RFKILL_IFIDX,
-	 * WIMAX_GNL_STGET_IFIDX, WIMAX_GNL_MSG_IFIDX
-	 */
-	[1] = { .type = NLA_U32, },
-	/*
-	 * WIMAX_GNL_RFKILL_STATE, WIMAX_GNL_MSG_PIPE_NAME
-	 */
-	[2] = { .type = NLA_U32, }, /* enum wimax_rf_state */
-	/*
-	 * WIMAX_GNL_MSG_DATA
-	 */
-	[3] = { .type = NLA_UNSPEC, }, /* libnl doesn't grok BINARY yet */
-};
-
-static const struct genl_small_ops wimax_gnl_ops[] = {
-	{
-		.cmd = WIMAX_GNL_OP_MSG_FROM_USER,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.flags = GENL_ADMIN_PERM,
-		.doit = wimax_gnl_doit_msg_from_user,
-	},
-	{
-		.cmd = WIMAX_GNL_OP_RESET,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.flags = GENL_ADMIN_PERM,
-		.doit = wimax_gnl_doit_reset,
-	},
-	{
-		.cmd = WIMAX_GNL_OP_RFKILL,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.flags = GENL_ADMIN_PERM,
-		.doit = wimax_gnl_doit_rfkill,
-	},
-	{
-		.cmd = WIMAX_GNL_OP_STATE_GET,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.flags = GENL_ADMIN_PERM,
-		.doit = wimax_gnl_doit_state_get,
-	},
-};
-
-
-static
-size_t wimax_addr_scnprint(char *addr_str, size_t addr_str_size,
-			   unsigned char *addr, size_t addr_len)
-{
-	unsigned int cnt, total;
-
-	for (total = cnt = 0; cnt < addr_len; cnt++)
-		total += scnprintf(addr_str + total, addr_str_size - total,
-				   "%02x%c", addr[cnt],
-				   cnt == addr_len - 1 ? '\0' : ':');
-	return total;
-}
-
-
-/**
- * wimax_dev_add - Register a new WiMAX device
- *
- * @wimax_dev: WiMAX device descriptor (as embedded in your @net_dev's
- *     priv data). You must have called wimax_dev_init() on it before.
- *
- * @net_dev: net device the @wimax_dev is associated with. The
- *     function expects SET_NETDEV_DEV() and register_netdev() were
- *     already called on it.
- *
- * Registers the new WiMAX device, sets up the user-kernel control
- * interface (generic netlink) and common WiMAX infrastructure.
- *
- * Note that the parts that will allow interaction with user space are
- * setup at the very end, when the rest is in place, as once that
- * happens, the driver might get user space control requests via
- * netlink or from debugfs that might translate into calls into
- * wimax_dev->op_*().
- */
-int wimax_dev_add(struct wimax_dev *wimax_dev, struct net_device *net_dev)
-{
-	int result;
-	struct device *dev = net_dev->dev.parent;
-	char addr_str[32];
-
-	d_fnstart(3, dev, "(wimax_dev %p net_dev %p)\n", wimax_dev, net_dev);
-
-	/* Do the RFKILL setup before locking, as RFKILL will call
-	 * into our functions.
-	 */
-	wimax_dev->net_dev = net_dev;
-	result = wimax_rfkill_add(wimax_dev);
-	if (result < 0)
-		goto error_rfkill_add;
-
-	/* Set up user-space interaction */
-	mutex_lock(&wimax_dev->mutex);
-	wimax_id_table_add(wimax_dev);
-	wimax_debugfs_add(wimax_dev);
-
-	__wimax_state_set(wimax_dev, WIMAX_ST_DOWN);
-	mutex_unlock(&wimax_dev->mutex);
-
-	wimax_addr_scnprint(addr_str, sizeof(addr_str),
-			    net_dev->dev_addr, net_dev->addr_len);
-	dev_err(dev, "WiMAX interface %s (%s) ready\n",
-		net_dev->name, addr_str);
-	d_fnend(3, dev, "(wimax_dev %p net_dev %p) = 0\n", wimax_dev, net_dev);
-	return 0;
-
-error_rfkill_add:
-	d_fnend(3, dev, "(wimax_dev %p net_dev %p) = %d\n",
-		wimax_dev, net_dev, result);
-	return result;
-}
-EXPORT_SYMBOL_GPL(wimax_dev_add);
-
-
-/**
- * wimax_dev_rm - Unregister an existing WiMAX device
- *
- * @wimax_dev: WiMAX device descriptor
- *
- * Unregisters a WiMAX device previously registered for use with
- * wimax_add_rm().
- *
- * IMPORTANT! Must call before calling unregister_netdev().
- *
- * After this function returns, you will not get any more user space
- * control requests (via netlink or debugfs) and thus to wimax_dev->ops.
- *
- * Reentrancy control is ensured by setting the state to
- * %__WIMAX_ST_QUIESCING. rfkill operations coming through
- * wimax_*rfkill*() will be stopped by the quiescing state; ops coming
- * from the rfkill subsystem will be stopped by the support being
- * removed by wimax_rfkill_rm().
- */
-void wimax_dev_rm(struct wimax_dev *wimax_dev)
-{
-	d_fnstart(3, NULL, "(wimax_dev %p)\n", wimax_dev);
-
-	mutex_lock(&wimax_dev->mutex);
-	__wimax_state_change(wimax_dev, __WIMAX_ST_QUIESCING);
-	wimax_debugfs_rm(wimax_dev);
-	wimax_id_table_rm(wimax_dev);
-	__wimax_state_change(wimax_dev, WIMAX_ST_DOWN);
-	mutex_unlock(&wimax_dev->mutex);
-	wimax_rfkill_rm(wimax_dev);
-	d_fnend(3, NULL, "(wimax_dev %p) = void\n", wimax_dev);
-}
-EXPORT_SYMBOL_GPL(wimax_dev_rm);
-
-
-/* Debug framework control of debug levels */
-struct d_level D_LEVEL[] = {
-	D_SUBMODULE_DEFINE(debugfs),
-	D_SUBMODULE_DEFINE(id_table),
-	D_SUBMODULE_DEFINE(op_msg),
-	D_SUBMODULE_DEFINE(op_reset),
-	D_SUBMODULE_DEFINE(op_rfkill),
-	D_SUBMODULE_DEFINE(op_state_get),
-	D_SUBMODULE_DEFINE(stack),
-};
-size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL);
-
-
-static const struct genl_multicast_group wimax_gnl_mcgrps[] = {
-	{ .name = "msg", },
-};
-
-struct genl_family wimax_gnl_family __ro_after_init = {
-	.name = "WiMAX",
-	.version = WIMAX_GNL_VERSION,
-	.hdrsize = 0,
-	.maxattr = WIMAX_GNL_ATTR_MAX,
-	.policy = wimax_gnl_policy,
-	.module = THIS_MODULE,
-	.small_ops = wimax_gnl_ops,
-	.n_small_ops = ARRAY_SIZE(wimax_gnl_ops),
-	.mcgrps = wimax_gnl_mcgrps,
-	.n_mcgrps = ARRAY_SIZE(wimax_gnl_mcgrps),
-};
-
-
-
-/* Shutdown the wimax stack */
-static
-int __init wimax_subsys_init(void)
-{
-	int result;
-
-	d_fnstart(4, NULL, "()\n");
-	d_parse_params(D_LEVEL, D_LEVEL_SIZE, wimax_debug_params,
-		       "wimax.debug");
-
-	result = genl_register_family(&wimax_gnl_family);
-	if (unlikely(result < 0)) {
-		pr_err("cannot register generic netlink family: %d\n", result);
-		goto error_register_family;
-	}
-
-	d_fnend(4, NULL, "() = 0\n");
-	return 0;
-
-error_register_family:
-	d_fnend(4, NULL, "() = %d\n", result);
-	return result;
-
-}
-module_init(wimax_subsys_init);
-
-
-/* Shutdown the wimax stack */
-static
-void __exit wimax_subsys_exit(void)
-{
-	wimax_id_table_release();
-	genl_unregister_family(&wimax_gnl_family);
-}
-module_exit(wimax_subsys_exit);
-
-MODULE_AUTHOR("Intel Corporation <linux-wimax@intel.com>");
-MODULE_DESCRIPTION("Linux WiMAX stack");
-MODULE_LICENSE("GPL");
diff --git a/net/wimax/wimax-internal.h b/net/wimax/wimax-internal.h
deleted file mode 100644
index 40751207296c..000000000000
--- a/net/wimax/wimax-internal.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Linux WiMAX
- * Internal API for kernel space WiMAX stack
- *
- * Copyright (C) 2007 Intel Corporation <linux-wimax@intel.com>
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This header file is for declarations and definitions internal to
- * the WiMAX stack. For public APIs and documentation, see
- * include/net/wimax.h and include/linux/wimax.h.
- */
-
-#ifndef __WIMAX_INTERNAL_H__
-#define __WIMAX_INTERNAL_H__
-#ifdef __KERNEL__
-
-#ifdef pr_fmt
-#undef pr_fmt
-#endif
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/device.h>
-#include <net/wimax.h>
-
-
-/*
- * Decide if a (locked) device is ready for use
- *
- * Before using the device structure, it must be locked
- * (wimax_dev->mutex). As well, most operations need to call this
- * function to check if the state is the right one.
- *
- * An error value will be returned if the state is not the right
- * one. In that case, the caller should not attempt to use the device
- * and just unlock it.
- */
-static inline __must_check
-int wimax_dev_is_ready(struct wimax_dev *wimax_dev)
-{
-	if (wimax_dev->state == __WIMAX_ST_NULL)
-		return -EINVAL;	/* Device is not even registered! */
-	if (wimax_dev->state == WIMAX_ST_DOWN)
-		return -ENOMEDIUM;
-	if (wimax_dev->state == __WIMAX_ST_QUIESCING)
-		return -ESHUTDOWN;
-	return 0;
-}
-
-
-static inline
-void __wimax_state_set(struct wimax_dev *wimax_dev, enum wimax_st state)
-{
-	wimax_dev->state = state;
-}
-void __wimax_state_change(struct wimax_dev *, enum wimax_st);
-
-#ifdef CONFIG_DEBUG_FS
-void wimax_debugfs_add(struct wimax_dev *);
-void wimax_debugfs_rm(struct wimax_dev *);
-#else
-static inline void wimax_debugfs_add(struct wimax_dev *wimax_dev) {}
-static inline void wimax_debugfs_rm(struct wimax_dev *wimax_dev) {}
-#endif
-
-void wimax_id_table_add(struct wimax_dev *);
-struct wimax_dev *wimax_dev_get_by_genl_info(struct genl_info *, int);
-void wimax_id_table_rm(struct wimax_dev *);
-void wimax_id_table_release(void);
-
-int wimax_rfkill_add(struct wimax_dev *);
-void wimax_rfkill_rm(struct wimax_dev *);
-
-/* generic netlink */
-extern struct genl_family wimax_gnl_family;
-
-/* ops */
-int wimax_gnl_doit_msg_from_user(struct sk_buff *skb, struct genl_info *info);
-int wimax_gnl_doit_reset(struct sk_buff *skb, struct genl_info *info);
-int wimax_gnl_doit_rfkill(struct sk_buff *skb, struct genl_info *info);
-int wimax_gnl_doit_state_get(struct sk_buff *skb, struct genl_info *info);
-
-#endif /* #ifdef __KERNEL__ */
-#endif /* #ifndef __WIMAX_INTERNAL_H__ */
-- 
cgit v1.2.3


From fbaedb4129838252570410c65abb2036b5505cbd Mon Sep 17 00:00:00 2001
From: Henrik Bjoernlund <henrik.bjoernlund@microchip.com>
Date: Tue, 27 Oct 2020 10:02:44 +0000
Subject: bridge: uapi: cfm: Added EtherType used by the CFM protocol.

This EtherType is used by all CFM protocal frames transmitted
according to 802.1Q section 12.14.

Signed-off-by: Henrik Bjoernlund  <henrik.bjoernlund@microchip.com>
Reviewed-by: Horatiu Vultur  <horatiu.vultur@microchip.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/if_ether.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index d6de2b167448..a0b637911d3c 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -99,6 +99,7 @@
 #define ETH_P_1588	0x88F7		/* IEEE 1588 Timesync */
 #define ETH_P_NCSI	0x88F8		/* NCSI protocol		*/
 #define ETH_P_PRP	0x88FB		/* IEC 62439-3 PRP/HSRv0	*/
+#define ETH_P_CFM	0x8902		/* Connectivity Fault Management */
 #define ETH_P_FCOE	0x8906		/* Fibre Channel over Ethernet  */
 #define ETH_P_IBOE	0x8915		/* Infiniband over Ethernet	*/
 #define ETH_P_TDLS	0x890D          /* TDLS */
-- 
cgit v1.2.3


From 86a14b79e1d0fa023f82d7c2dde888fa64af2c65 Mon Sep 17 00:00:00 2001
From: Henrik Bjoernlund <henrik.bjoernlund@microchip.com>
Date: Tue, 27 Oct 2020 10:02:45 +0000
Subject: bridge: cfm: Kernel space implementation of CFM. MEP create/delete.

This is the first commit of the implementation of the CFM protocol
according to 802.1Q section 12.14.

It contains MEP instance create, delete and configuration.

Connectivity Fault Management (CFM) comprises capabilities for
detecting, verifying, and isolating connectivity failures in
Virtual Bridged Networks. These capabilities can be used in
networks operated by multiple independent organizations, each
with restricted management access to each others equipment.

CFM functions are partitioned as follows:
    - Path discovery
    - Fault detection
    - Fault verification and isolation
    - Fault notification
    - Fault recovery

Interface consists of these functions:
br_cfm_mep_create()
br_cfm_mep_delete()
br_cfm_mep_config_set()
br_cfm_cc_config_set()
br_cfm_cc_peer_mep_add()
br_cfm_cc_peer_mep_remove()

A MEP instance is created by br_cfm_mep_create()
    -It is the Maintenance association End Point
     described in 802.1Q section 19.2.
    -It is created on a specific level (1-7) and is assuring
     that no CFM frames are passing through this MEP on lower levels.
    -It initiates and validates CFM frames on its level.
    -It can only exist on a port that is related to a bridge.
    -Attributes given cannot be changed until the instance is
     deleted.

A MEP instance can be deleted by br_cfm_mep_delete().

A created MEP instance has attributes that can be
configured by br_cfm_mep_config_set().

A MEP Continuity Check feature can be configured by
br_cfm_cc_config_set()
    The Continuity Check Receiver state machine can be
    enabled and disabled.
    According to 802.1Q section 19.2.8

A MEP can have Peer MEPs added and removed by
br_cfm_cc_peer_mep_add() and br_cfm_cc_peer_mep_remove()
    The Continuity Check feature can maintain connectivity
    status on each added Peer MEP.

Signed-off-by: Henrik Bjoernlund  <henrik.bjoernlund@microchip.com>
Reviewed-by: Horatiu Vultur  <horatiu.vultur@microchip.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/cfm_bridge.h |  23 ++++
 net/bridge/Makefile             |   2 +
 net/bridge/br_cfm.c             | 260 ++++++++++++++++++++++++++++++++++++++++
 net/bridge/br_if.c              |   1 +
 net/bridge/br_private.h         |  10 ++
 net/bridge/br_private_cfm.h     |  61 ++++++++++
 6 files changed, 357 insertions(+)
 create mode 100644 include/uapi/linux/cfm_bridge.h
 create mode 100644 net/bridge/br_cfm.c
 create mode 100644 net/bridge/br_private_cfm.h

(limited to 'include/uapi')

diff --git a/include/uapi/linux/cfm_bridge.h b/include/uapi/linux/cfm_bridge.h
new file mode 100644
index 000000000000..a262a8c0e085
--- /dev/null
+++ b/include/uapi/linux/cfm_bridge.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+
+#ifndef _UAPI_LINUX_CFM_BRIDGE_H_
+#define _UAPI_LINUX_CFM_BRIDGE_H_
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+
+#define CFM_MAID_LENGTH		48
+
+/* MEP domain */
+enum br_cfm_domain {
+	BR_CFM_PORT,
+	BR_CFM_VLAN,
+};
+
+/* MEP direction */
+enum br_cfm_mep_direction {
+	BR_CFM_MEP_DIRECTION_DOWN,
+	BR_CFM_MEP_DIRECTION_UP,
+};
+
+#endif
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index ccb394236fbd..ddc0a9192348 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -27,3 +27,5 @@ bridge-$(CONFIG_NET_SWITCHDEV) += br_switchdev.o
 obj-$(CONFIG_NETFILTER) += netfilter/
 
 bridge-$(CONFIG_BRIDGE_MRP)	+= br_mrp_switchdev.o br_mrp.o br_mrp_netlink.o
+
+bridge-$(CONFIG_BRIDGE_CFM)	+= br_cfm.o
diff --git a/net/bridge/br_cfm.c b/net/bridge/br_cfm.c
new file mode 100644
index 000000000000..42f35109681a
--- /dev/null
+++ b/net/bridge/br_cfm.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/cfm_bridge.h>
+#include <uapi/linux/cfm_bridge.h>
+#include "br_private_cfm.h"
+
+static struct br_cfm_mep *br_mep_find(struct net_bridge *br, u32 instance)
+{
+	struct br_cfm_mep *mep;
+
+	hlist_for_each_entry(mep, &br->mep_list, head)
+		if (mep->instance == instance)
+			return mep;
+
+	return NULL;
+}
+
+static struct br_cfm_mep *br_mep_find_ifindex(struct net_bridge *br,
+					      u32 ifindex)
+{
+	struct br_cfm_mep *mep;
+
+	hlist_for_each_entry_rcu(mep, &br->mep_list, head,
+				 lockdep_rtnl_is_held())
+		if (mep->create.ifindex == ifindex)
+			return mep;
+
+	return NULL;
+}
+
+static struct br_cfm_peer_mep *br_peer_mep_find(struct br_cfm_mep *mep,
+						u32 mepid)
+{
+	struct br_cfm_peer_mep *peer_mep;
+
+	hlist_for_each_entry_rcu(peer_mep, &mep->peer_mep_list, head,
+				 lockdep_rtnl_is_held())
+		if (peer_mep->mepid == mepid)
+			return peer_mep;
+
+	return NULL;
+}
+
+static struct net_bridge_port *br_mep_get_port(struct net_bridge *br,
+					       u32 ifindex)
+{
+	struct net_bridge_port *port;
+
+	list_for_each_entry(port, &br->port_list, list)
+		if (port->dev->ifindex == ifindex)
+			return port;
+
+	return NULL;
+}
+
+int br_cfm_mep_create(struct net_bridge *br,
+		      const u32 instance,
+		      struct br_cfm_mep_create *const create,
+		      struct netlink_ext_ack *extack)
+{
+	struct net_bridge_port *p;
+	struct br_cfm_mep *mep;
+
+	ASSERT_RTNL();
+
+	if (create->domain == BR_CFM_VLAN) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "VLAN domain not supported");
+		return -EINVAL;
+	}
+	if (create->domain != BR_CFM_PORT) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Invalid domain value");
+		return -EINVAL;
+	}
+	if (create->direction == BR_CFM_MEP_DIRECTION_UP) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Up-MEP not supported");
+		return -EINVAL;
+	}
+	if (create->direction != BR_CFM_MEP_DIRECTION_DOWN) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Invalid direction value");
+		return -EINVAL;
+	}
+	p = br_mep_get_port(br, create->ifindex);
+	if (!p) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Port is not related to bridge");
+		return -EINVAL;
+	}
+	mep = br_mep_find(br, instance);
+	if (mep) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "MEP instance already exists");
+		return -EEXIST;
+	}
+
+	/* In PORT domain only one instance can be created per port */
+	if (create->domain == BR_CFM_PORT) {
+		mep = br_mep_find_ifindex(br, create->ifindex);
+		if (mep) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Only one Port MEP on a port allowed");
+			return -EINVAL;
+		}
+	}
+
+	mep = kzalloc(sizeof(*mep), GFP_KERNEL);
+	if (!mep)
+		return -ENOMEM;
+
+	mep->create = *create;
+	mep->instance = instance;
+	rcu_assign_pointer(mep->b_port, p);
+
+	INIT_HLIST_HEAD(&mep->peer_mep_list);
+
+	hlist_add_tail_rcu(&mep->head, &br->mep_list);
+
+	return 0;
+}
+
+static void mep_delete_implementation(struct net_bridge *br,
+				      struct br_cfm_mep *mep)
+{
+	struct br_cfm_peer_mep *peer_mep;
+	struct hlist_node *n_store;
+
+	ASSERT_RTNL();
+
+	/* Empty and free peer MEP list */
+	hlist_for_each_entry_safe(peer_mep, n_store, &mep->peer_mep_list, head) {
+		hlist_del_rcu(&peer_mep->head);
+		kfree_rcu(peer_mep, rcu);
+	}
+
+	RCU_INIT_POINTER(mep->b_port, NULL);
+	hlist_del_rcu(&mep->head);
+	kfree_rcu(mep, rcu);
+}
+
+int br_cfm_mep_delete(struct net_bridge *br,
+		      const u32 instance,
+		      struct netlink_ext_ack *extack)
+{
+	struct br_cfm_mep *mep;
+
+	ASSERT_RTNL();
+
+	mep = br_mep_find(br, instance);
+	if (!mep) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "MEP instance does not exists");
+		return -ENOENT;
+	}
+
+	mep_delete_implementation(br, mep);
+
+	return 0;
+}
+
+int br_cfm_mep_config_set(struct net_bridge *br,
+			  const u32 instance,
+			  const struct br_cfm_mep_config *const config,
+			  struct netlink_ext_ack *extack)
+{
+	struct br_cfm_mep *mep;
+
+	ASSERT_RTNL();
+
+	mep = br_mep_find(br, instance);
+	if (!mep) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "MEP instance does not exists");
+		return -ENOENT;
+	}
+
+	mep->config = *config;
+
+	return 0;
+}
+
+int br_cfm_cc_peer_mep_add(struct net_bridge *br, const u32 instance,
+			   u32 mepid,
+			   struct netlink_ext_ack *extack)
+{
+	struct br_cfm_peer_mep *peer_mep;
+	struct br_cfm_mep *mep;
+
+	ASSERT_RTNL();
+
+	mep = br_mep_find(br, instance);
+	if (!mep) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "MEP instance does not exists");
+		return -ENOENT;
+	}
+
+	peer_mep = br_peer_mep_find(mep, mepid);
+	if (peer_mep) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Peer MEP-ID already exists");
+		return -EEXIST;
+	}
+
+	peer_mep = kzalloc(sizeof(*peer_mep), GFP_KERNEL);
+	if (!peer_mep)
+		return -ENOMEM;
+
+	peer_mep->mepid = mepid;
+	peer_mep->mep = mep;
+
+	hlist_add_tail_rcu(&peer_mep->head, &mep->peer_mep_list);
+
+	return 0;
+}
+
+int br_cfm_cc_peer_mep_remove(struct net_bridge *br, const u32 instance,
+			      u32 mepid,
+			      struct netlink_ext_ack *extack)
+{
+	struct br_cfm_peer_mep *peer_mep;
+	struct br_cfm_mep *mep;
+
+	ASSERT_RTNL();
+
+	mep = br_mep_find(br, instance);
+	if (!mep) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "MEP instance does not exists");
+		return -ENOENT;
+	}
+
+	peer_mep = br_peer_mep_find(mep, mepid);
+	if (!peer_mep) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Peer MEP-ID does not exists");
+		return -ENOENT;
+	}
+
+	hlist_del_rcu(&peer_mep->head);
+	kfree_rcu(peer_mep, rcu);
+
+	return 0;
+}
+
+/* Deletes the CFM instances on a specific bridge port
+ */
+void br_cfm_port_del(struct net_bridge *br, struct net_bridge_port *port)
+{
+	struct hlist_node *n_store;
+	struct br_cfm_mep *mep;
+
+	ASSERT_RTNL();
+
+	hlist_for_each_entry_safe(mep, n_store, &br->mep_list, head)
+		if (mep->create.ifindex == port->dev->ifindex)
+			mep_delete_implementation(br, mep);
+}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index a0e9a7937412..f7d2f472ae24 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -334,6 +334,7 @@ static void del_nbp(struct net_bridge_port *p)
 	spin_unlock_bh(&br->lock);
 
 	br_mrp_port_del(br, p);
+	br_cfm_port_del(br, p);
 
 	br_ifinfo_notify(RTM_DELLINK, NULL, p);
 
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 90ead48fa762..f7c41380de4d 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -1459,6 +1459,16 @@ static inline int br_mrp_fill_info(struct sk_buff *skb, struct net_bridge *br)
 
 #endif
 
+/* br_mrp.c */
+#if IS_ENABLED(CONFIG_BRIDGE_CFM)
+void br_cfm_port_del(struct net_bridge *br, struct net_bridge_port *p);
+#else
+static inline void br_cfm_port_del(struct net_bridge *br,
+				   struct net_bridge_port *p)
+{
+}
+#endif
+
 /* br_netlink.c */
 extern struct rtnl_link_ops br_link_ops;
 int br_netlink_init(void);
diff --git a/net/bridge/br_private_cfm.h b/net/bridge/br_private_cfm.h
new file mode 100644
index 000000000000..40fe982added
--- /dev/null
+++ b/net/bridge/br_private_cfm.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _BR_PRIVATE_CFM_H_
+#define _BR_PRIVATE_CFM_H_
+
+#include "br_private.h"
+#include <uapi/linux/cfm_bridge.h>
+
+struct br_cfm_mep_create {
+	enum br_cfm_domain domain; /* Domain for this MEP */
+	enum br_cfm_mep_direction direction; /* Up or Down MEP direction */
+	u32 ifindex; /* Residence port */
+};
+
+int br_cfm_mep_create(struct net_bridge *br,
+		      const u32 instance,
+		      struct br_cfm_mep_create *const create,
+		      struct netlink_ext_ack *extack);
+
+int br_cfm_mep_delete(struct net_bridge *br,
+		      const u32 instance,
+		      struct netlink_ext_ack *extack);
+
+struct br_cfm_mep_config {
+	u32 mdlevel;
+	u32 mepid; /* MEPID for this MEP */
+	struct mac_addr unicast_mac; /* The MEP unicast MAC */
+};
+
+int br_cfm_mep_config_set(struct net_bridge *br,
+			  const u32 instance,
+			  const struct br_cfm_mep_config *const config,
+			  struct netlink_ext_ack *extack);
+
+int br_cfm_cc_peer_mep_add(struct net_bridge *br, const u32 instance,
+			   u32 peer_mep_id,
+			   struct netlink_ext_ack *extack);
+int br_cfm_cc_peer_mep_remove(struct net_bridge *br, const u32 instance,
+			      u32 peer_mep_id,
+			      struct netlink_ext_ack *extack);
+
+struct br_cfm_mep {
+	/* list header of MEP instances */
+	struct hlist_node		head;
+	u32				instance;
+	struct br_cfm_mep_create	create;
+	struct br_cfm_mep_config	config;
+	/* List of multiple peer MEPs */
+	struct hlist_head		peer_mep_list;
+	struct net_bridge_port __rcu	*b_port;
+	struct rcu_head			rcu;
+};
+
+struct br_cfm_peer_mep {
+	struct hlist_node		head;
+	struct br_cfm_mep		*mep;
+	u32				mepid;
+	struct rcu_head			rcu;
+};
+
+#endif /* _BR_PRIVATE_CFM_H_ */
-- 
cgit v1.2.3


From a806ad8ee2aa7826b279c3f92c67956eb101ae42 Mon Sep 17 00:00:00 2001
From: Henrik Bjoernlund <henrik.bjoernlund@microchip.com>
Date: Tue, 27 Oct 2020 10:02:46 +0000
Subject: bridge: cfm: Kernel space implementation of CFM. CCM frame TX added.

This is the second commit of the implementation of the CFM protocol
according to 802.1Q section 12.14.

Functionality is extended with CCM frame transmission.

Interface is extended with these functions:
br_cfm_cc_rdi_set()
br_cfm_cc_ccm_tx()
br_cfm_cc_config_set()

A MEP Continuity Check feature can be configured by
br_cfm_cc_config_set()
    The Continuity Check parameters can be configured to be used when
    transmitting CCM.

A MEP can be configured to start or stop transmission of CCM frames by
br_cfm_cc_ccm_tx()
    The CCM will be transmitted for a selected period in seconds.
    Must call this function before timeout to keep transmission alive.

A MEP transmitting CCM can be configured with inserted RDI in PDU by
br_cfm_cc_rdi_set()

Signed-off-by: Henrik Bjoernlund  <henrik.bjoernlund@microchip.com>
Reviewed-by: Horatiu Vultur  <horatiu.vultur@microchip.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/cfm_bridge.h |  39 +++++-
 net/bridge/br_cfm.c             | 285 ++++++++++++++++++++++++++++++++++++++++
 net/bridge/br_private_cfm.h     |  54 ++++++++
 3 files changed, 377 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/cfm_bridge.h b/include/uapi/linux/cfm_bridge.h
index a262a8c0e085..84a3817da90b 100644
--- a/include/uapi/linux/cfm_bridge.h
+++ b/include/uapi/linux/cfm_bridge.h
@@ -6,7 +6,32 @@
 #include <linux/types.h>
 #include <linux/if_ether.h>
 
-#define CFM_MAID_LENGTH		48
+#define ETHER_HEADER_LENGTH		(6+6+4+2)
+#define CFM_MAID_LENGTH			48
+#define CFM_CCM_PDU_LENGTH		75
+#define CFM_PORT_STATUS_TLV_LENGTH	4
+#define CFM_IF_STATUS_TLV_LENGTH	4
+#define CFM_IF_STATUS_TLV_TYPE		4
+#define CFM_PORT_STATUS_TLV_TYPE	2
+#define CFM_ENDE_TLV_TYPE		0
+#define CFM_CCM_MAX_FRAME_LENGTH	(ETHER_HEADER_LENGTH+\
+					 CFM_CCM_PDU_LENGTH+\
+					 CFM_PORT_STATUS_TLV_LENGTH+\
+					 CFM_IF_STATUS_TLV_LENGTH)
+#define CFM_FRAME_PRIO			7
+#define CFM_CCM_TLV_OFFSET		70
+#define CFM_CCM_ITU_RESERVED_SIZE	16
+
+struct br_cfm_common_hdr {
+	__u8 mdlevel_version;
+	__u8 opcode;
+	__u8 flags;
+	__u8 tlv_offset;
+};
+
+enum br_cfm_opcodes {
+	BR_CFM_OPCODE_CCM = 0x1,
+};
 
 /* MEP domain */
 enum br_cfm_domain {
@@ -20,4 +45,16 @@ enum br_cfm_mep_direction {
 	BR_CFM_MEP_DIRECTION_UP,
 };
 
+/* CCM interval supported. */
+enum br_cfm_ccm_interval {
+	BR_CFM_CCM_INTERVAL_NONE,
+	BR_CFM_CCM_INTERVAL_3_3_MS,
+	BR_CFM_CCM_INTERVAL_10_MS,
+	BR_CFM_CCM_INTERVAL_100_MS,
+	BR_CFM_CCM_INTERVAL_1_SEC,
+	BR_CFM_CCM_INTERVAL_10_SEC,
+	BR_CFM_CCM_INTERVAL_1_MIN,
+	BR_CFM_CCM_INTERVAL_10_MIN,
+};
+
 #endif
diff --git a/net/bridge/br_cfm.c b/net/bridge/br_cfm.c
index 42f35109681a..382e003f5b92 100644
--- a/net/bridge/br_cfm.c
+++ b/net/bridge/br_cfm.c
@@ -53,6 +53,185 @@ static struct net_bridge_port *br_mep_get_port(struct net_bridge *br,
 	return NULL;
 }
 
+/* Calculate the CCM interval in us. */
+static u32 interval_to_us(enum br_cfm_ccm_interval interval)
+{
+	switch (interval) {
+	case BR_CFM_CCM_INTERVAL_NONE:
+		return 0;
+	case BR_CFM_CCM_INTERVAL_3_3_MS:
+		return 3300;
+	case BR_CFM_CCM_INTERVAL_10_MS:
+		return 10 * 1000;
+	case BR_CFM_CCM_INTERVAL_100_MS:
+		return 100 * 1000;
+	case BR_CFM_CCM_INTERVAL_1_SEC:
+		return 1000 * 1000;
+	case BR_CFM_CCM_INTERVAL_10_SEC:
+		return 10 * 1000 * 1000;
+	case BR_CFM_CCM_INTERVAL_1_MIN:
+		return 60 * 1000 * 1000;
+	case BR_CFM_CCM_INTERVAL_10_MIN:
+		return 10 * 60 * 1000 * 1000;
+	}
+	return 0;
+}
+
+/* Convert the interface interval to CCM PDU value. */
+static u32 interval_to_pdu(enum br_cfm_ccm_interval interval)
+{
+	switch (interval) {
+	case BR_CFM_CCM_INTERVAL_NONE:
+		return 0;
+	case BR_CFM_CCM_INTERVAL_3_3_MS:
+		return 1;
+	case BR_CFM_CCM_INTERVAL_10_MS:
+		return 2;
+	case BR_CFM_CCM_INTERVAL_100_MS:
+		return 3;
+	case BR_CFM_CCM_INTERVAL_1_SEC:
+		return 4;
+	case BR_CFM_CCM_INTERVAL_10_SEC:
+		return 5;
+	case BR_CFM_CCM_INTERVAL_1_MIN:
+		return 6;
+	case BR_CFM_CCM_INTERVAL_10_MIN:
+		return 7;
+	}
+	return 0;
+}
+
+static struct sk_buff *ccm_frame_build(struct br_cfm_mep *mep,
+				       const struct br_cfm_cc_ccm_tx_info *const tx_info)
+
+{
+	struct br_cfm_common_hdr *common_hdr;
+	struct net_bridge_port *b_port;
+	struct br_cfm_maid *maid;
+	u8 *itu_reserved, *e_tlv;
+	struct ethhdr *eth_hdr;
+	struct sk_buff *skb;
+	__be32 *status_tlv;
+	__be32 *snumber;
+	__be16 *mepid;
+
+	skb = dev_alloc_skb(CFM_CCM_MAX_FRAME_LENGTH);
+	if (!skb)
+		return NULL;
+
+	rcu_read_lock();
+	b_port = rcu_dereference(mep->b_port);
+	if (!b_port) {
+		kfree_skb(skb);
+		rcu_read_unlock();
+		return NULL;
+	}
+	skb->dev = b_port->dev;
+	rcu_read_unlock();
+	/* The device cannot be deleted until the work_queue functions has
+	 * completed. This function is called from ccm_tx_work_expired()
+	 * that is a work_queue functions.
+	 */
+
+	skb->protocol = htons(ETH_P_CFM);
+	skb->priority = CFM_FRAME_PRIO;
+
+	/* Ethernet header */
+	eth_hdr = skb_put(skb, sizeof(*eth_hdr));
+	ether_addr_copy(eth_hdr->h_dest, tx_info->dmac.addr);
+	ether_addr_copy(eth_hdr->h_source, mep->config.unicast_mac.addr);
+	eth_hdr->h_proto = htons(ETH_P_CFM);
+
+	/* Common CFM Header */
+	common_hdr = skb_put(skb, sizeof(*common_hdr));
+	common_hdr->mdlevel_version = mep->config.mdlevel << 5;
+	common_hdr->opcode = BR_CFM_OPCODE_CCM;
+	common_hdr->flags = (mep->rdi << 7) |
+			    interval_to_pdu(mep->cc_config.exp_interval);
+	common_hdr->tlv_offset = CFM_CCM_TLV_OFFSET;
+
+	/* Sequence number */
+	snumber = skb_put(skb, sizeof(*snumber));
+	if (tx_info->seq_no_update) {
+		*snumber = cpu_to_be32(mep->ccm_tx_snumber);
+		mep->ccm_tx_snumber += 1;
+	} else {
+		*snumber = 0;
+	}
+
+	mepid = skb_put(skb, sizeof(*mepid));
+	*mepid = cpu_to_be16((u16)mep->config.mepid);
+
+	maid = skb_put(skb, sizeof(*maid));
+	memcpy(maid->data, mep->cc_config.exp_maid.data, sizeof(maid->data));
+
+	/* ITU reserved (CFM_CCM_ITU_RESERVED_SIZE octets) */
+	itu_reserved = skb_put(skb, CFM_CCM_ITU_RESERVED_SIZE);
+	memset(itu_reserved, 0, CFM_CCM_ITU_RESERVED_SIZE);
+
+	/* Generel CFM TLV format:
+	 * TLV type:		one byte
+	 * TLV value length:	two bytes
+	 * TLV value:		'TLV value length' bytes
+	 */
+
+	/* Port status TLV. The value length is 1. Total of 4 bytes. */
+	if (tx_info->port_tlv) {
+		status_tlv = skb_put(skb, sizeof(*status_tlv));
+		*status_tlv = cpu_to_be32((CFM_PORT_STATUS_TLV_TYPE << 24) |
+					  (1 << 8) |	/* Value length */
+					  (tx_info->port_tlv_value & 0xFF));
+	}
+
+	/* Interface status TLV. The value length is 1. Total of 4 bytes. */
+	if (tx_info->if_tlv) {
+		status_tlv = skb_put(skb, sizeof(*status_tlv));
+		*status_tlv = cpu_to_be32((CFM_IF_STATUS_TLV_TYPE << 24) |
+					  (1 << 8) |	/* Value length */
+					  (tx_info->if_tlv_value & 0xFF));
+	}
+
+	/* End TLV */
+	e_tlv = skb_put(skb, sizeof(*e_tlv));
+	*e_tlv = CFM_ENDE_TLV_TYPE;
+
+	return skb;
+}
+
+static void ccm_frame_tx(struct sk_buff *skb)
+{
+	skb_reset_network_header(skb);
+	dev_queue_xmit(skb);
+}
+
+/* This function is called with the configured CC 'expected_interval'
+ * in order to drive CCM transmission when enabled.
+ */
+static void ccm_tx_work_expired(struct work_struct *work)
+{
+	struct delayed_work *del_work;
+	struct br_cfm_mep *mep;
+	struct sk_buff *skb;
+	u32 interval_us;
+
+	del_work = to_delayed_work(work);
+	mep = container_of(del_work, struct br_cfm_mep, ccm_tx_dwork);
+
+	if (time_before_eq(mep->ccm_tx_end, jiffies)) {
+		/* Transmission period has ended */
+		mep->cc_ccm_tx_info.period = 0;
+		return;
+	}
+
+	skb = ccm_frame_build(mep, &mep->cc_ccm_tx_info);
+	if (skb)
+		ccm_frame_tx(skb);
+
+	interval_us = interval_to_us(mep->cc_config.exp_interval);
+	queue_delayed_work(system_wq, &mep->ccm_tx_dwork,
+			   usecs_to_jiffies(interval_us));
+}
+
 int br_cfm_mep_create(struct net_bridge *br,
 		      const u32 instance,
 		      struct br_cfm_mep_create *const create,
@@ -115,6 +294,7 @@ int br_cfm_mep_create(struct net_bridge *br,
 	rcu_assign_pointer(mep->b_port, p);
 
 	INIT_HLIST_HEAD(&mep->peer_mep_list);
+	INIT_DELAYED_WORK(&mep->ccm_tx_dwork, ccm_tx_work_expired);
 
 	hlist_add_tail_rcu(&mep->head, &br->mep_list);
 
@@ -135,6 +315,8 @@ static void mep_delete_implementation(struct net_bridge *br,
 		kfree_rcu(peer_mep, rcu);
 	}
 
+	cancel_delayed_work_sync(&mep->ccm_tx_dwork);
+
 	RCU_INIT_POINTER(mep->b_port, NULL);
 	hlist_del_rcu(&mep->head);
 	kfree_rcu(mep, rcu);
@@ -181,6 +363,32 @@ int br_cfm_mep_config_set(struct net_bridge *br,
 	return 0;
 }
 
+int br_cfm_cc_config_set(struct net_bridge *br,
+			 const u32 instance,
+			 const struct br_cfm_cc_config *const config,
+			 struct netlink_ext_ack *extack)
+{
+	struct br_cfm_mep *mep;
+
+	ASSERT_RTNL();
+
+	mep = br_mep_find(br, instance);
+	if (!mep) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "MEP instance does not exists");
+		return -ENOENT;
+	}
+
+	/* Check for no change in configuration */
+	if (memcmp(config, &mep->cc_config, sizeof(*config)) == 0)
+		return 0;
+
+	mep->cc_config = *config;
+	mep->ccm_tx_snumber = 1;
+
+	return 0;
+}
+
 int br_cfm_cc_peer_mep_add(struct net_bridge *br, const u32 instance,
 			   u32 mepid,
 			   struct netlink_ext_ack *extack)
@@ -245,6 +453,83 @@ int br_cfm_cc_peer_mep_remove(struct net_bridge *br, const u32 instance,
 	return 0;
 }
 
+int br_cfm_cc_rdi_set(struct net_bridge *br, const u32 instance,
+		      const bool rdi, struct netlink_ext_ack *extack)
+{
+	struct br_cfm_mep *mep;
+
+	ASSERT_RTNL();
+
+	mep = br_mep_find(br, instance);
+	if (!mep) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "MEP instance does not exists");
+		return -ENOENT;
+	}
+
+	mep->rdi = rdi;
+
+	return 0;
+}
+
+int br_cfm_cc_ccm_tx(struct net_bridge *br, const u32 instance,
+		     const struct br_cfm_cc_ccm_tx_info *const tx_info,
+		     struct netlink_ext_ack *extack)
+{
+	struct br_cfm_mep *mep;
+
+	ASSERT_RTNL();
+
+	mep = br_mep_find(br, instance);
+	if (!mep) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "MEP instance does not exists");
+		return -ENOENT;
+	}
+
+	if (memcmp(tx_info, &mep->cc_ccm_tx_info, sizeof(*tx_info)) == 0) {
+		/* No change in tx_info. */
+		if (mep->cc_ccm_tx_info.period == 0)
+			/* Transmission is not enabled - just return */
+			return 0;
+
+		/* Transmission is ongoing, the end time is recalculated */
+		mep->ccm_tx_end = jiffies +
+				  usecs_to_jiffies(tx_info->period * 1000000);
+		return 0;
+	}
+
+	if (tx_info->period == 0 && mep->cc_ccm_tx_info.period == 0)
+		/* Some change in info and transmission is not ongoing */
+		goto save;
+
+	if (tx_info->period != 0 && mep->cc_ccm_tx_info.period != 0) {
+		/* Some change in info and transmission is ongoing
+		 * The end time is recalculated
+		 */
+		mep->ccm_tx_end = jiffies +
+				  usecs_to_jiffies(tx_info->period * 1000000);
+
+		goto save;
+	}
+
+	if (tx_info->period == 0 && mep->cc_ccm_tx_info.period != 0) {
+		cancel_delayed_work_sync(&mep->ccm_tx_dwork);
+		goto save;
+	}
+
+	/* Start delayed work to transmit CCM frames. It is done with zero delay
+	 * to send first frame immediately
+	 */
+	mep->ccm_tx_end = jiffies + usecs_to_jiffies(tx_info->period * 1000000);
+	queue_delayed_work(system_wq, &mep->ccm_tx_dwork, 0);
+
+save:
+	mep->cc_ccm_tx_info = *tx_info;
+
+	return 0;
+}
+
 /* Deletes the CFM instances on a specific bridge port
  */
 void br_cfm_port_del(struct net_bridge *br, struct net_bridge_port *port)
diff --git a/net/bridge/br_private_cfm.h b/net/bridge/br_private_cfm.h
index 40fe982added..8d1b449acfbf 100644
--- a/net/bridge/br_private_cfm.h
+++ b/net/bridge/br_private_cfm.h
@@ -32,6 +32,24 @@ int br_cfm_mep_config_set(struct net_bridge *br,
 			  const struct br_cfm_mep_config *const config,
 			  struct netlink_ext_ack *extack);
 
+struct br_cfm_maid {
+	u8 data[CFM_MAID_LENGTH];
+};
+
+struct br_cfm_cc_config {
+	/* Expected received CCM PDU MAID. */
+	struct br_cfm_maid exp_maid;
+
+	/* Expected received CCM PDU interval. */
+	/* Transmitting CCM PDU interval when CCM tx is enabled. */
+	enum br_cfm_ccm_interval exp_interval;
+};
+
+int br_cfm_cc_config_set(struct net_bridge *br,
+			 const u32 instance,
+			 const struct br_cfm_cc_config *const config,
+			 struct netlink_ext_ack *extack);
+
 int br_cfm_cc_peer_mep_add(struct net_bridge *br, const u32 instance,
 			   u32 peer_mep_id,
 			   struct netlink_ext_ack *extack);
@@ -39,15 +57,51 @@ int br_cfm_cc_peer_mep_remove(struct net_bridge *br, const u32 instance,
 			      u32 peer_mep_id,
 			      struct netlink_ext_ack *extack);
 
+/* Transmitted CCM Remote Defect Indication status set.
+ * This RDI is inserted in transmitted CCM PDUs if CCM transmission is enabled.
+ * See br_cfm_cc_ccm_tx() with interval != BR_CFM_CCM_INTERVAL_NONE
+ */
+int br_cfm_cc_rdi_set(struct net_bridge *br, const u32 instance,
+		      const bool rdi, struct netlink_ext_ack *extack);
+
+/* OAM PDU Tx information */
+struct br_cfm_cc_ccm_tx_info {
+	struct mac_addr dmac;
+	/* The CCM will be transmitted for this period in seconds.
+	 * Call br_cfm_cc_ccm_tx before timeout to keep transmission alive.
+	 * When period is zero any ongoing transmission will be stopped.
+	 */
+	u32 period;
+
+	bool seq_no_update; /* Update Tx CCM sequence number */
+	bool if_tlv; /* Insert Interface Status TLV */
+	u8 if_tlv_value; /* Interface Status TLV value */
+	bool port_tlv; /* Insert Port Status TLV */
+	u8 port_tlv_value; /* Port Status TLV value */
+	/* Sender ID TLV ??
+	 * Organization-Specific TLV ??
+	 */
+};
+
+int br_cfm_cc_ccm_tx(struct net_bridge *br, const u32 instance,
+		     const struct br_cfm_cc_ccm_tx_info *const tx_info,
+		     struct netlink_ext_ack *extack);
+
 struct br_cfm_mep {
 	/* list header of MEP instances */
 	struct hlist_node		head;
 	u32				instance;
 	struct br_cfm_mep_create	create;
 	struct br_cfm_mep_config	config;
+	struct br_cfm_cc_config		cc_config;
+	struct br_cfm_cc_ccm_tx_info	cc_ccm_tx_info;
 	/* List of multiple peer MEPs */
 	struct hlist_head		peer_mep_list;
 	struct net_bridge_port __rcu	*b_port;
+	unsigned long			ccm_tx_end;
+	struct delayed_work		ccm_tx_dwork;
+	u32				ccm_tx_snumber;
+	bool				rdi;
 	struct rcu_head			rcu;
 };
 
-- 
cgit v1.2.3


From dc32cbb3dbd7da38c700d6e0fc6354df24920525 Mon Sep 17 00:00:00 2001
From: Henrik Bjoernlund <henrik.bjoernlund@microchip.com>
Date: Tue, 27 Oct 2020 10:02:47 +0000
Subject: bridge: cfm: Kernel space implementation of CFM. CCM frame RX added.

This is the third commit of the implementation of the CFM protocol
according to 802.1Q section 12.14.

Functionality is extended with CCM frame reception.
The MEP instance now contains CCM based status information.
Most important is the CCM defect status indicating if correct
CCM frames are received with the expected interval.

Signed-off-by: Henrik Bjoernlund  <henrik.bjoernlund@microchip.com>
Reviewed-by: Horatiu Vultur  <horatiu.vultur@microchip.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/cfm_bridge.h |   4 +
 net/bridge/br_cfm.c             | 269 ++++++++++++++++++++++++++++++++++++++++
 net/bridge/br_private_cfm.h     |  32 +++++
 3 files changed, 305 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/cfm_bridge.h b/include/uapi/linux/cfm_bridge.h
index 84a3817da90b..3c1cbd1db2f5 100644
--- a/include/uapi/linux/cfm_bridge.h
+++ b/include/uapi/linux/cfm_bridge.h
@@ -20,6 +20,10 @@
 					 CFM_IF_STATUS_TLV_LENGTH)
 #define CFM_FRAME_PRIO			7
 #define CFM_CCM_TLV_OFFSET		70
+#define CFM_CCM_PDU_MAID_OFFSET		10
+#define CFM_CCM_PDU_MEPID_OFFSET	8
+#define CFM_CCM_PDU_SEQNR_OFFSET	4
+#define CFM_CCM_PDU_TLV_OFFSET		74
 #define CFM_CCM_ITU_RESERVED_SIZE	16
 
 struct br_cfm_common_hdr {
diff --git a/net/bridge/br_cfm.c b/net/bridge/br_cfm.c
index 382e003f5b92..6331f731024f 100644
--- a/net/bridge/br_cfm.c
+++ b/net/bridge/br_cfm.c
@@ -101,6 +101,56 @@ static u32 interval_to_pdu(enum br_cfm_ccm_interval interval)
 	return 0;
 }
 
+/* Convert the CCM PDU value to interval on interface. */
+static u32 pdu_to_interval(u32 value)
+{
+	switch (value) {
+	case 0:
+		return BR_CFM_CCM_INTERVAL_NONE;
+	case 1:
+		return BR_CFM_CCM_INTERVAL_3_3_MS;
+	case 2:
+		return BR_CFM_CCM_INTERVAL_10_MS;
+	case 3:
+		return BR_CFM_CCM_INTERVAL_100_MS;
+	case 4:
+		return BR_CFM_CCM_INTERVAL_1_SEC;
+	case 5:
+		return BR_CFM_CCM_INTERVAL_10_SEC;
+	case 6:
+		return BR_CFM_CCM_INTERVAL_1_MIN;
+	case 7:
+		return BR_CFM_CCM_INTERVAL_10_MIN;
+	}
+	return BR_CFM_CCM_INTERVAL_NONE;
+}
+
+static void ccm_rx_timer_start(struct br_cfm_peer_mep *peer_mep)
+{
+	u32 interval_us;
+
+	interval_us = interval_to_us(peer_mep->mep->cc_config.exp_interval);
+	/* Function ccm_rx_dwork must be called with 1/4
+	 * of the configured CC 'expected_interval'
+	 * in order to detect CCM defect after 3.25 interval.
+	 */
+	queue_delayed_work(system_wq, &peer_mep->ccm_rx_dwork,
+			   usecs_to_jiffies(interval_us / 4));
+}
+
+static void cc_peer_enable(struct br_cfm_peer_mep *peer_mep)
+{
+	memset(&peer_mep->cc_status, 0, sizeof(peer_mep->cc_status));
+	peer_mep->ccm_rx_count_miss = 0;
+
+	ccm_rx_timer_start(peer_mep);
+}
+
+static void cc_peer_disable(struct br_cfm_peer_mep *peer_mep)
+{
+	cancel_delayed_work_sync(&peer_mep->ccm_rx_dwork);
+}
+
 static struct sk_buff *ccm_frame_build(struct br_cfm_mep *mep,
 				       const struct br_cfm_cc_ccm_tx_info *const tx_info)
 
@@ -232,6 +282,200 @@ static void ccm_tx_work_expired(struct work_struct *work)
 			   usecs_to_jiffies(interval_us));
 }
 
+/* This function is called with 1/4 of the configured CC 'expected_interval'
+ * in order to detect CCM defect after 3.25 interval.
+ */
+static void ccm_rx_work_expired(struct work_struct *work)
+{
+	struct br_cfm_peer_mep *peer_mep;
+	struct delayed_work *del_work;
+
+	del_work = to_delayed_work(work);
+	peer_mep = container_of(del_work, struct br_cfm_peer_mep, ccm_rx_dwork);
+
+	/* After 13 counts (4 * 3,25) then 3.25 intervals are expired */
+	if (peer_mep->ccm_rx_count_miss < 13) {
+		/* 3.25 intervals are NOT expired without CCM reception */
+		peer_mep->ccm_rx_count_miss++;
+
+		/* Start timer again */
+		ccm_rx_timer_start(peer_mep);
+	} else {
+		/* 3.25 intervals are expired without CCM reception.
+		 * CCM defect detected
+		 */
+		peer_mep->cc_status.ccm_defect = true;
+	}
+}
+
+static u32 ccm_tlv_extract(struct sk_buff *skb, u32 index,
+			   struct br_cfm_peer_mep *peer_mep)
+{
+	__be32 *s_tlv;
+	__be32 _s_tlv;
+	u32 h_s_tlv;
+	u8 *e_tlv;
+	u8 _e_tlv;
+
+	e_tlv = skb_header_pointer(skb, index, sizeof(_e_tlv), &_e_tlv);
+	if (!e_tlv)
+		return 0;
+
+	/* TLV is present - get the status TLV */
+	s_tlv = skb_header_pointer(skb,
+				   index,
+				   sizeof(_s_tlv), &_s_tlv);
+	if (!s_tlv)
+		return 0;
+
+	h_s_tlv = ntohl(*s_tlv);
+	if ((h_s_tlv >> 24) == CFM_IF_STATUS_TLV_TYPE) {
+		/* Interface status TLV */
+		peer_mep->cc_status.tlv_seen = true;
+		peer_mep->cc_status.if_tlv_value = (h_s_tlv & 0xFF);
+	}
+
+	if ((h_s_tlv >> 24) == CFM_PORT_STATUS_TLV_TYPE) {
+		/* Port status TLV */
+		peer_mep->cc_status.tlv_seen = true;
+		peer_mep->cc_status.port_tlv_value = (h_s_tlv & 0xFF);
+	}
+
+	/* The Sender ID TLV is not handled */
+	/* The Organization-Specific TLV is not handled */
+
+	/* Return the length of this tlv.
+	 * This is the length of the value field plus 3 bytes for size of type
+	 * field and length field
+	 */
+	return ((h_s_tlv >> 8) & 0xFFFF) + 3;
+}
+
+/* note: already called with rcu_read_lock */
+static int br_cfm_frame_rx(struct net_bridge_port *port, struct sk_buff *skb)
+{
+	u32 mdlevel, interval, size, index, max;
+	const struct br_cfm_common_hdr *hdr;
+	struct br_cfm_peer_mep *peer_mep;
+	const struct br_cfm_maid *maid;
+	struct br_cfm_common_hdr _hdr;
+	struct br_cfm_maid _maid;
+	struct br_cfm_mep *mep;
+	struct net_bridge *br;
+	__be32 *snumber;
+	__be32 _snumber;
+	__be16 *mepid;
+	__be16 _mepid;
+
+	if (port->state == BR_STATE_DISABLED)
+		return 0;
+
+	hdr = skb_header_pointer(skb, 0, sizeof(_hdr), &_hdr);
+	if (!hdr)
+		return 1;
+
+	br = port->br;
+	mep = br_mep_find_ifindex(br, port->dev->ifindex);
+	if (unlikely(!mep))
+		/* No MEP on this port - must be forwarded */
+		return 0;
+
+	mdlevel = hdr->mdlevel_version >> 5;
+	if (mdlevel > mep->config.mdlevel)
+		/* The level is above this MEP level - must be forwarded */
+		return 0;
+
+	if ((hdr->mdlevel_version & 0x1F) != 0) {
+		/* Invalid version */
+		mep->status.version_unexp_seen = true;
+		return 1;
+	}
+
+	if (mdlevel < mep->config.mdlevel) {
+		/* The level is below this MEP level */
+		mep->status.rx_level_low_seen = true;
+		return 1;
+	}
+
+	if (hdr->opcode == BR_CFM_OPCODE_CCM) {
+		/* CCM PDU received. */
+		/* MA ID is after common header + sequence number + MEP ID */
+		maid = skb_header_pointer(skb,
+					  CFM_CCM_PDU_MAID_OFFSET,
+					  sizeof(_maid), &_maid);
+		if (!maid)
+			return 1;
+		if (memcmp(maid->data, mep->cc_config.exp_maid.data,
+			   sizeof(maid->data)))
+			/* MA ID not as expected */
+			return 1;
+
+		/* MEP ID is after common header + sequence number */
+		mepid = skb_header_pointer(skb,
+					   CFM_CCM_PDU_MEPID_OFFSET,
+					   sizeof(_mepid), &_mepid);
+		if (!mepid)
+			return 1;
+		peer_mep = br_peer_mep_find(mep, (u32)ntohs(*mepid));
+		if (!peer_mep)
+			return 1;
+
+		/* Interval is in common header flags */
+		interval = hdr->flags & 0x07;
+		if (mep->cc_config.exp_interval != pdu_to_interval(interval))
+			/* Interval not as expected */
+			return 1;
+
+		/* A valid CCM frame is received */
+		if (peer_mep->cc_status.ccm_defect) {
+			peer_mep->cc_status.ccm_defect = false;
+
+			/* Start CCM RX timer */
+			ccm_rx_timer_start(peer_mep);
+		}
+
+		peer_mep->cc_status.seen = true;
+		peer_mep->ccm_rx_count_miss = 0;
+
+		/* RDI is in common header flags */
+		peer_mep->cc_status.rdi = (hdr->flags & 0x80) ? true : false;
+
+		/* Sequence number is after common header */
+		snumber = skb_header_pointer(skb,
+					     CFM_CCM_PDU_SEQNR_OFFSET,
+					     sizeof(_snumber), &_snumber);
+		if (!snumber)
+			return 1;
+		if (ntohl(*snumber) != (mep->ccm_rx_snumber + 1))
+			/* Unexpected sequence number */
+			peer_mep->cc_status.seq_unexp_seen = true;
+
+		mep->ccm_rx_snumber = ntohl(*snumber);
+
+		/* TLV end is after common header + sequence number + MEP ID +
+		 * MA ID + ITU reserved
+		 */
+		index = CFM_CCM_PDU_TLV_OFFSET;
+		max = 0;
+		do { /* Handle all TLVs */
+			size = ccm_tlv_extract(skb, index, peer_mep);
+			index += size;
+			max += 1;
+		} while (size != 0 && max < 4); /* Max four TLVs possible */
+
+		return 1;
+	}
+
+	mep->status.opcode_unexp_seen = true;
+
+	return 1;
+}
+
+static struct br_frame_type cfm_frame_type __read_mostly = {
+	.type = cpu_to_be16(ETH_P_CFM),
+	.frame_handler = br_cfm_frame_rx,
+};
+
 int br_cfm_mep_create(struct net_bridge *br,
 		      const u32 instance,
 		      struct br_cfm_mep_create *const create,
@@ -296,6 +540,9 @@ int br_cfm_mep_create(struct net_bridge *br,
 	INIT_HLIST_HEAD(&mep->peer_mep_list);
 	INIT_DELAYED_WORK(&mep->ccm_tx_dwork, ccm_tx_work_expired);
 
+	if (hlist_empty(&br->mep_list))
+		br_add_frame(br, &cfm_frame_type);
+
 	hlist_add_tail_rcu(&mep->head, &br->mep_list);
 
 	return 0;
@@ -311,6 +558,7 @@ static void mep_delete_implementation(struct net_bridge *br,
 
 	/* Empty and free peer MEP list */
 	hlist_for_each_entry_safe(peer_mep, n_store, &mep->peer_mep_list, head) {
+		cancel_delayed_work_sync(&peer_mep->ccm_rx_dwork);
 		hlist_del_rcu(&peer_mep->head);
 		kfree_rcu(peer_mep, rcu);
 	}
@@ -320,6 +568,9 @@ static void mep_delete_implementation(struct net_bridge *br,
 	RCU_INIT_POINTER(mep->b_port, NULL);
 	hlist_del_rcu(&mep->head);
 	kfree_rcu(mep, rcu);
+
+	if (hlist_empty(&br->mep_list))
+		br_del_frame(br, &cfm_frame_type);
 }
 
 int br_cfm_mep_delete(struct net_bridge *br,
@@ -368,6 +619,7 @@ int br_cfm_cc_config_set(struct net_bridge *br,
 			 const struct br_cfm_cc_config *const config,
 			 struct netlink_ext_ack *extack)
 {
+	struct br_cfm_peer_mep *peer_mep;
 	struct br_cfm_mep *mep;
 
 	ASSERT_RTNL();
@@ -383,7 +635,18 @@ int br_cfm_cc_config_set(struct net_bridge *br,
 	if (memcmp(config, &mep->cc_config, sizeof(*config)) == 0)
 		return 0;
 
+	if (config->enable && !mep->cc_config.enable)
+		/* CC is enabled */
+		hlist_for_each_entry(peer_mep, &mep->peer_mep_list, head)
+			cc_peer_enable(peer_mep);
+
+	if (!config->enable && mep->cc_config.enable)
+		/* CC is disabled */
+		hlist_for_each_entry(peer_mep, &mep->peer_mep_list, head)
+			cc_peer_disable(peer_mep);
+
 	mep->cc_config = *config;
+	mep->ccm_rx_snumber = 0;
 	mep->ccm_tx_snumber = 1;
 
 	return 0;
@@ -418,6 +681,10 @@ int br_cfm_cc_peer_mep_add(struct net_bridge *br, const u32 instance,
 
 	peer_mep->mepid = mepid;
 	peer_mep->mep = mep;
+	INIT_DELAYED_WORK(&peer_mep->ccm_rx_dwork, ccm_rx_work_expired);
+
+	if (mep->cc_config.enable)
+		cc_peer_enable(peer_mep);
 
 	hlist_add_tail_rcu(&peer_mep->head, &mep->peer_mep_list);
 
@@ -447,6 +714,8 @@ int br_cfm_cc_peer_mep_remove(struct net_bridge *br, const u32 instance,
 		return -ENOENT;
 	}
 
+	cc_peer_disable(peer_mep);
+
 	hlist_del_rcu(&peer_mep->head);
 	kfree_rcu(peer_mep, rcu);
 
diff --git a/net/bridge/br_private_cfm.h b/net/bridge/br_private_cfm.h
index 8d1b449acfbf..a43a5e7fa2c3 100644
--- a/net/bridge/br_private_cfm.h
+++ b/net/bridge/br_private_cfm.h
@@ -43,6 +43,8 @@ struct br_cfm_cc_config {
 	/* Expected received CCM PDU interval. */
 	/* Transmitting CCM PDU interval when CCM tx is enabled. */
 	enum br_cfm_ccm_interval exp_interval;
+
+	bool enable; /* Enable/disable CCM PDU handling */
 };
 
 int br_cfm_cc_config_set(struct net_bridge *br,
@@ -87,6 +89,31 @@ int br_cfm_cc_ccm_tx(struct net_bridge *br, const u32 instance,
 		     const struct br_cfm_cc_ccm_tx_info *const tx_info,
 		     struct netlink_ext_ack *extack);
 
+struct br_cfm_mep_status {
+	/* Indications that an OAM PDU has been seen. */
+	bool opcode_unexp_seen; /* RX of OAM PDU with unexpected opcode */
+	bool version_unexp_seen; /* RX of OAM PDU with unexpected version */
+	bool rx_level_low_seen; /* Rx of OAM PDU with level low */
+};
+
+struct br_cfm_cc_peer_status {
+	/* This CCM related status is based on the latest received CCM PDU. */
+	u8 port_tlv_value; /* Port Status TLV value */
+	u8 if_tlv_value; /* Interface Status TLV value */
+
+	/* CCM has not been received for 3.25 intervals */
+	u8 ccm_defect:1;
+
+	/* (RDI == 1) for last received CCM PDU */
+	u8 rdi:1;
+
+	/* Indications that a CCM PDU has been seen. */
+	u8 seen:1; /* CCM PDU received */
+	u8 tlv_seen:1; /* CCM PDU with TLV received */
+	/* CCM PDU with unexpected sequence number received */
+	u8 seq_unexp_seen:1;
+};
+
 struct br_cfm_mep {
 	/* list header of MEP instances */
 	struct hlist_node		head;
@@ -101,6 +128,8 @@ struct br_cfm_mep {
 	unsigned long			ccm_tx_end;
 	struct delayed_work		ccm_tx_dwork;
 	u32				ccm_tx_snumber;
+	u32				ccm_rx_snumber;
+	struct br_cfm_mep_status	status;
 	bool				rdi;
 	struct rcu_head			rcu;
 };
@@ -108,7 +137,10 @@ struct br_cfm_mep {
 struct br_cfm_peer_mep {
 	struct hlist_node		head;
 	struct br_cfm_mep		*mep;
+	struct delayed_work		ccm_rx_dwork;
 	u32				mepid;
+	struct br_cfm_cc_peer_status	cc_status;
+	u32				ccm_rx_count_miss;
 	struct rcu_head			rcu;
 };
 
-- 
cgit v1.2.3


From 2be665c3940d367e0a2a8128eb4985ce323f99a3 Mon Sep 17 00:00:00 2001
From: Henrik Bjoernlund <henrik.bjoernlund@microchip.com>
Date: Tue, 27 Oct 2020 10:02:48 +0000
Subject: bridge: cfm: Netlink SET configuration Interface.

This is the implementation of CFM netlink configuration
set information interface.

Add new nested netlink attributes. These attributes are used by the
user space to create/delete/configure CFM instances.

SETLINK:
    IFLA_BRIDGE_CFM:
        Indicate that the following attributes are CFM.

    IFLA_BRIDGE_CFM_MEP_CREATE:
        This indicate that a MEP instance must be created.
    IFLA_BRIDGE_CFM_MEP_DELETE:
        This indicate that a MEP instance must be deleted.
    IFLA_BRIDGE_CFM_MEP_CONFIG:
        This indicate that a MEP instance must be configured.
    IFLA_BRIDGE_CFM_CC_CONFIG:
        This indicate that a MEP instance Continuity Check (CC)
        functionality must be configured.
    IFLA_BRIDGE_CFM_CC_PEER_MEP_ADD:
        This indicate that a CC Peer MEP must be added.
    IFLA_BRIDGE_CFM_CC_PEER_MEP_REMOVE:
        This indicate that a CC Peer MEP must be removed.
    IFLA_BRIDGE_CFM_CC_CCM_TX:
        This indicate that the CC transmitted CCM PDU must be configured.
    IFLA_BRIDGE_CFM_CC_RDI:
        This indicate that the CC transmitted CCM PDU RDI must be
        configured.

CFM nested attribute has the following attributes in next level.

SETLINK RTEXT_FILTER_CFM_CONFIG:
    IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE:
        The created MEP instance number.
        The type is u32.
    IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN:
        The created MEP domain.
        The type is u32 (br_cfm_domain).
        It must be BR_CFM_PORT.
        This means that CFM frames are transmitted and received
        directly on the port - untagged. Not in a VLAN.
    IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION:
        The created MEP direction.
        The type is u32 (br_cfm_mep_direction).
        It must be BR_CFM_MEP_DIRECTION_DOWN.
        This means that CFM frames are transmitted and received on
        the port. Not in the bridge.
    IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX:
        The created MEP residence port ifindex.
        The type is u32 (ifindex).

    IFLA_BRIDGE_CFM_MEP_DELETE_INSTANCE:
        The deleted MEP instance number.
        The type is u32.

    IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE:
        The configured MEP instance number.
        The type is u32.
    IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC:
        The configured MEP unicast MAC address.
        The type is 6*u8 (array).
        This is used as SMAC in all transmitted CFM frames.
    IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL:
        The configured MEP unicast MD level.
        The type is u32.
        It must be in the range 1-7.
        No CFM frames are passing through this MEP on lower levels.
    IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID:
        The configured MEP ID.
        The type is u32.
        It must be in the range 0-0x1FFF.
        This MEP ID is inserted in any transmitted CCM frame.

    IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE:
        The configured MEP instance number.
        The type is u32.
    IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE:
        The Continuity Check (CC) functionality is enabled or disabled.
        The type is u32 (bool).
    IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL:
        The CC expected receive interval of CCM frames.
        The type is u32 (br_cfm_ccm_interval).
        This is also the transmission interval of CCM frames when enabled.
    IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID:
        The CC expected receive MAID in CCM frames.
        The type is CFM_MAID_LENGTH*u8.
        This is MAID is also inserted in transmitted CCM frames.

    IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE:
        The configured MEP instance number.
        The type is u32.
    IFLA_BRIDGE_CFM_CC_PEER_MEPID:
        The CC Peer MEP ID added.
        The type is u32.
        When a Peer MEP ID is added and CC is enabled it is expected to
        receive CCM frames from that Peer MEP.

    IFLA_BRIDGE_CFM_CC_RDI_INSTANCE:
        The configured MEP instance number.
        The type is u32.
    IFLA_BRIDGE_CFM_CC_RDI_RDI:
        The RDI that is inserted in transmitted CCM PDU.
        The type is u32 (bool).

    IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE:
        The configured MEP instance number.
        The type is u32.
    IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC:
        The transmitted CCM frame destination MAC address.
        The type is 6*u8 (array).
        This is used as DMAC in all transmitted CFM frames.
    IFLA_BRIDGE_CFM_CC_CCM_TX_SEQ_NO_UPDATE:
        The transmitted CCM frame update (increment) of sequence
        number is enabled or disabled.
        The type is u32 (bool).
    IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD:
        The period of time where CCM frame are transmitted.
        The type is u32.
        The time is given in seconds. SETLINK IFLA_BRIDGE_CFM_CC_CCM_TX
        must be done before timeout to keep transmission alive.
        When period is zero any ongoing CCM frame transmission
        will be stopped.
    IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV:
        The transmitted CCM frame update with Interface Status TLV
        is enabled or disabled.
        The type is u32 (bool).
    IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE:
        The transmitted Interface Status TLV value field.
        The type is u8.
    IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV:
        The transmitted CCM frame update with Port Status TLV is enabled
        or disabled.
        The type is u32 (bool).
    IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE:
        The transmitted Port Status TLV value field.
        The type is u8.

Signed-off-by: Henrik Bjoernlund  <henrik.bjoernlund@microchip.com>
Reviewed-by: Horatiu Vultur  <horatiu.vultur@microchip.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/if_bridge.h |  90 ++++++++
 include/uapi/linux/rtnetlink.h |   1 +
 net/bridge/Makefile            |   2 +-
 net/bridge/br_cfm.c            |   5 +
 net/bridge/br_cfm_netlink.c    | 453 +++++++++++++++++++++++++++++++++++++++++
 net/bridge/br_netlink.c        |   5 +
 net/bridge/br_private.h        |  17 +-
 7 files changed, 571 insertions(+), 2 deletions(-)
 create mode 100644 net/bridge/br_cfm_netlink.c

(limited to 'include/uapi')

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 4c687686aa8f..94cc9444d749 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -121,6 +121,7 @@ enum {
 	IFLA_BRIDGE_VLAN_INFO,
 	IFLA_BRIDGE_VLAN_TUNNEL_INFO,
 	IFLA_BRIDGE_MRP,
+	IFLA_BRIDGE_CFM,
 	__IFLA_BRIDGE_MAX,
 };
 #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1)
@@ -328,6 +329,95 @@ struct br_mrp_start_in_test {
 	__u16 in_id;
 };
 
+enum {
+	IFLA_BRIDGE_CFM_UNSPEC,
+	IFLA_BRIDGE_CFM_MEP_CREATE,
+	IFLA_BRIDGE_CFM_MEP_DELETE,
+	IFLA_BRIDGE_CFM_MEP_CONFIG,
+	IFLA_BRIDGE_CFM_CC_CONFIG,
+	IFLA_BRIDGE_CFM_CC_PEER_MEP_ADD,
+	IFLA_BRIDGE_CFM_CC_PEER_MEP_REMOVE,
+	IFLA_BRIDGE_CFM_CC_RDI,
+	IFLA_BRIDGE_CFM_CC_CCM_TX,
+	__IFLA_BRIDGE_CFM_MAX,
+};
+
+#define IFLA_BRIDGE_CFM_MAX (__IFLA_BRIDGE_CFM_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_CFM_MEP_CREATE_UNSPEC,
+	IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE,
+	IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN,
+	IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION,
+	IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX,
+	__IFLA_BRIDGE_CFM_MEP_CREATE_MAX,
+};
+
+#define IFLA_BRIDGE_CFM_MEP_CREATE_MAX (__IFLA_BRIDGE_CFM_MEP_CREATE_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_CFM_MEP_DELETE_UNSPEC,
+	IFLA_BRIDGE_CFM_MEP_DELETE_INSTANCE,
+	__IFLA_BRIDGE_CFM_MEP_DELETE_MAX,
+};
+
+#define IFLA_BRIDGE_CFM_MEP_DELETE_MAX (__IFLA_BRIDGE_CFM_MEP_DELETE_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_CFM_MEP_CONFIG_UNSPEC,
+	IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE,
+	IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC,
+	IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL,
+	IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID,
+	__IFLA_BRIDGE_CFM_MEP_CONFIG_MAX,
+};
+
+#define IFLA_BRIDGE_CFM_MEP_CONFIG_MAX (__IFLA_BRIDGE_CFM_MEP_CONFIG_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_CFM_CC_CONFIG_UNSPEC,
+	IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE,
+	IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE,
+	IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL,
+	IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID,
+	__IFLA_BRIDGE_CFM_CC_CONFIG_MAX,
+};
+
+#define IFLA_BRIDGE_CFM_CC_CONFIG_MAX (__IFLA_BRIDGE_CFM_CC_CONFIG_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_CFM_CC_PEER_MEP_UNSPEC,
+	IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE,
+	IFLA_BRIDGE_CFM_CC_PEER_MEPID,
+	__IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX,
+};
+
+#define IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX (__IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_CFM_CC_RDI_UNSPEC,
+	IFLA_BRIDGE_CFM_CC_RDI_INSTANCE,
+	IFLA_BRIDGE_CFM_CC_RDI_RDI,
+	__IFLA_BRIDGE_CFM_CC_RDI_MAX,
+};
+
+#define IFLA_BRIDGE_CFM_CC_RDI_MAX (__IFLA_BRIDGE_CFM_CC_RDI_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_CFM_CC_CCM_TX_UNSPEC,
+	IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE,
+	IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC,
+	IFLA_BRIDGE_CFM_CC_CCM_TX_SEQ_NO_UPDATE,
+	IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD,
+	IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV,
+	IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE,
+	IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV,
+	IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE,
+	__IFLA_BRIDGE_CFM_CC_CCM_TX_MAX,
+};
+
+#define IFLA_BRIDGE_CFM_CC_CCM_TX_MAX (__IFLA_BRIDGE_CFM_CC_CCM_TX_MAX - 1)
+
 struct bridge_stp_xstats {
 	__u64 transition_blk;
 	__u64 transition_fwd;
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 9b814c92de12..ffc9ca1f2bdb 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -779,6 +779,7 @@ enum {
 #define RTEXT_FILTER_BRVLAN_COMPRESSED	(1 << 2)
 #define	RTEXT_FILTER_SKIP_STATS	(1 << 3)
 #define RTEXT_FILTER_MRP	(1 << 4)
+#define RTEXT_FILTER_CFM_CONFIG	(1 << 5)
 
 /* End of information exported to user level */
 
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index ddc0a9192348..4702702a74d3 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -28,4 +28,4 @@ obj-$(CONFIG_NETFILTER) += netfilter/
 
 bridge-$(CONFIG_BRIDGE_MRP)	+= br_mrp_switchdev.o br_mrp.o br_mrp_netlink.o
 
-bridge-$(CONFIG_BRIDGE_CFM)	+= br_cfm.o
+bridge-$(CONFIG_BRIDGE_CFM)	+= br_cfm.o br_cfm_netlink.o
diff --git a/net/bridge/br_cfm.c b/net/bridge/br_cfm.c
index 6331f731024f..3912fedfd289 100644
--- a/net/bridge/br_cfm.c
+++ b/net/bridge/br_cfm.c
@@ -799,6 +799,11 @@ save:
 	return 0;
 }
 
+bool br_cfm_created(struct net_bridge *br)
+{
+	return !hlist_empty(&br->mep_list);
+}
+
 /* Deletes the CFM instances on a specific bridge port
  */
 void br_cfm_port_del(struct net_bridge *br, struct net_bridge_port *port)
diff --git a/net/bridge/br_cfm_netlink.c b/net/bridge/br_cfm_netlink.c
new file mode 100644
index 000000000000..c75f4c788eac
--- /dev/null
+++ b/net/bridge/br_cfm_netlink.c
@@ -0,0 +1,453 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <net/genetlink.h>
+
+#include "br_private.h"
+#include "br_private_cfm.h"
+
+static const struct nla_policy
+br_cfm_mep_create_policy[IFLA_BRIDGE_CFM_MEP_CREATE_MAX + 1] = {
+	[IFLA_BRIDGE_CFM_MEP_CREATE_UNSPEC]	= { .type = NLA_REJECT },
+	[IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE]	= { .type = NLA_U32 },
+	[IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN]	= { .type = NLA_U32 },
+	[IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION]	= { .type = NLA_U32 },
+	[IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX]	= { .type = NLA_U32 },
+};
+
+static const struct nla_policy
+br_cfm_mep_delete_policy[IFLA_BRIDGE_CFM_MEP_DELETE_MAX + 1] = {
+	[IFLA_BRIDGE_CFM_MEP_DELETE_UNSPEC]	= { .type = NLA_REJECT },
+	[IFLA_BRIDGE_CFM_MEP_DELETE_INSTANCE]	= { .type = NLA_U32 },
+};
+
+static const struct nla_policy
+br_cfm_mep_config_policy[IFLA_BRIDGE_CFM_MEP_CONFIG_MAX + 1] = {
+	[IFLA_BRIDGE_CFM_MEP_CONFIG_UNSPEC]	 = { .type = NLA_REJECT },
+	[IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE]	 = { .type = NLA_U32 },
+	[IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC] = NLA_POLICY_ETH_ADDR,
+	[IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL]	 = NLA_POLICY_MAX(NLA_U32, 7),
+	[IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID]	 = NLA_POLICY_MAX(NLA_U32, 0x1FFF),
+};
+
+static const struct nla_policy
+br_cfm_cc_config_policy[IFLA_BRIDGE_CFM_CC_CONFIG_MAX + 1] = {
+	[IFLA_BRIDGE_CFM_CC_CONFIG_UNSPEC]	 = { .type = NLA_REJECT },
+	[IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE]	 = { .type = NLA_U32 },
+	[IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE]	 = { .type = NLA_U32 },
+	[IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL] = { .type = NLA_U32 },
+	[IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID]	 = {
+	.type = NLA_BINARY, .len = CFM_MAID_LENGTH },
+};
+
+static const struct nla_policy
+br_cfm_cc_peer_mep_policy[IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX + 1] = {
+	[IFLA_BRIDGE_CFM_CC_PEER_MEP_UNSPEC]	= { .type = NLA_REJECT },
+	[IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE]	= { .type = NLA_U32 },
+	[IFLA_BRIDGE_CFM_CC_PEER_MEPID]		= NLA_POLICY_MAX(NLA_U32, 0x1FFF),
+};
+
+static const struct nla_policy
+br_cfm_cc_rdi_policy[IFLA_BRIDGE_CFM_CC_RDI_MAX + 1] = {
+	[IFLA_BRIDGE_CFM_CC_RDI_UNSPEC]		= { .type = NLA_REJECT },
+	[IFLA_BRIDGE_CFM_CC_RDI_INSTANCE]	= { .type = NLA_U32 },
+	[IFLA_BRIDGE_CFM_CC_RDI_RDI]		= { .type = NLA_U32 },
+};
+
+static const struct nla_policy
+br_cfm_cc_ccm_tx_policy[IFLA_BRIDGE_CFM_CC_CCM_TX_MAX + 1] = {
+	[IFLA_BRIDGE_CFM_CC_CCM_TX_UNSPEC]	   = { .type = NLA_REJECT },
+	[IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE]	   = { .type = NLA_U32 },
+	[IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC]	   = NLA_POLICY_ETH_ADDR,
+	[IFLA_BRIDGE_CFM_CC_CCM_TX_SEQ_NO_UPDATE]  = { .type = NLA_U32 },
+	[IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD]	   = { .type = NLA_U32 },
+	[IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV]	   = { .type = NLA_U32 },
+	[IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE]   = { .type = NLA_U8 },
+	[IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV]	   = { .type = NLA_U32 },
+	[IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE] = { .type = NLA_U8 },
+};
+
+static const struct nla_policy
+br_cfm_policy[IFLA_BRIDGE_CFM_MAX + 1] = {
+	[IFLA_BRIDGE_CFM_UNSPEC]		= { .type = NLA_REJECT },
+	[IFLA_BRIDGE_CFM_MEP_CREATE]		=
+				NLA_POLICY_NESTED(br_cfm_mep_create_policy),
+	[IFLA_BRIDGE_CFM_MEP_DELETE]		=
+				NLA_POLICY_NESTED(br_cfm_mep_delete_policy),
+	[IFLA_BRIDGE_CFM_MEP_CONFIG]		=
+				NLA_POLICY_NESTED(br_cfm_mep_config_policy),
+	[IFLA_BRIDGE_CFM_CC_CONFIG]		=
+				NLA_POLICY_NESTED(br_cfm_cc_config_policy),
+	[IFLA_BRIDGE_CFM_CC_PEER_MEP_ADD]	=
+				NLA_POLICY_NESTED(br_cfm_cc_peer_mep_policy),
+	[IFLA_BRIDGE_CFM_CC_PEER_MEP_REMOVE]	=
+				NLA_POLICY_NESTED(br_cfm_cc_peer_mep_policy),
+	[IFLA_BRIDGE_CFM_CC_RDI]		=
+				NLA_POLICY_NESTED(br_cfm_cc_rdi_policy),
+	[IFLA_BRIDGE_CFM_CC_CCM_TX]		=
+				NLA_POLICY_NESTED(br_cfm_cc_ccm_tx_policy),
+};
+
+static int br_mep_create_parse(struct net_bridge *br, struct nlattr *attr,
+			       struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[IFLA_BRIDGE_CFM_MEP_CREATE_MAX + 1];
+	struct br_cfm_mep_create create;
+	u32 instance;
+	int err;
+
+	err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_MEP_CREATE_MAX, attr,
+			       br_cfm_mep_create_policy, extack);
+	if (err)
+		return err;
+
+	if (!tb[IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute");
+		return -EINVAL;
+	}
+	if (!tb[IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing DOMAIN attribute");
+		return -EINVAL;
+	}
+	if (!tb[IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing DIRECTION attribute");
+		return -EINVAL;
+	}
+	if (!tb[IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing IFINDEX attribute");
+		return -EINVAL;
+	}
+
+	memset(&create, 0, sizeof(create));
+
+	instance =  nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE]);
+	create.domain = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN]);
+	create.direction = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION]);
+	create.ifindex = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX]);
+
+	return br_cfm_mep_create(br, instance, &create, extack);
+}
+
+static int br_mep_delete_parse(struct net_bridge *br, struct nlattr *attr,
+			       struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[IFLA_BRIDGE_CFM_MEP_DELETE_MAX + 1];
+	u32 instance;
+	int err;
+
+	err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_MEP_DELETE_MAX, attr,
+			       br_cfm_mep_delete_policy, extack);
+	if (err)
+		return err;
+
+	if (!tb[IFLA_BRIDGE_CFM_MEP_DELETE_INSTANCE]) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Missing INSTANCE attribute");
+		return -EINVAL;
+	}
+
+	instance =  nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_DELETE_INSTANCE]);
+
+	return br_cfm_mep_delete(br, instance, extack);
+}
+
+static int br_mep_config_parse(struct net_bridge *br, struct nlattr *attr,
+			       struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[IFLA_BRIDGE_CFM_MEP_CONFIG_MAX + 1];
+	struct br_cfm_mep_config config;
+	u32 instance;
+	int err;
+
+	err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_MEP_CONFIG_MAX, attr,
+			       br_cfm_mep_config_policy, extack);
+	if (err)
+		return err;
+
+	if (!tb[IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute");
+		return -EINVAL;
+	}
+	if (!tb[IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing UNICAST_MAC attribute");
+		return -EINVAL;
+	}
+	if (!tb[IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing MDLEVEL attribute");
+		return -EINVAL;
+	}
+	if (!tb[IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing MEPID attribute");
+		return -EINVAL;
+	}
+
+	memset(&config, 0, sizeof(config));
+
+	instance =  nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE]);
+	nla_memcpy(&config.unicast_mac.addr,
+		   tb[IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC],
+		   sizeof(config.unicast_mac.addr));
+	config.mdlevel = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL]);
+	config.mepid = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID]);
+
+	return br_cfm_mep_config_set(br, instance, &config, extack);
+}
+
+static int br_cc_config_parse(struct net_bridge *br, struct nlattr *attr,
+			      struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[IFLA_BRIDGE_CFM_CC_CONFIG_MAX + 1];
+	struct br_cfm_cc_config config;
+	u32 instance;
+	int err;
+
+	err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_CC_CONFIG_MAX, attr,
+			       br_cfm_cc_config_policy, extack);
+	if (err)
+		return err;
+
+	if (!tb[IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute");
+		return -EINVAL;
+	}
+	if (!tb[IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing ENABLE attribute");
+		return -EINVAL;
+	}
+	if (!tb[IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing INTERVAL attribute");
+		return -EINVAL;
+	}
+	if (!tb[IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing MAID attribute");
+		return -EINVAL;
+	}
+
+	memset(&config, 0, sizeof(config));
+
+	instance =  nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE]);
+	config.enable = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE]);
+	config.exp_interval = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL]);
+	nla_memcpy(&config.exp_maid.data, tb[IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID],
+		   sizeof(config.exp_maid.data));
+
+	return br_cfm_cc_config_set(br, instance, &config, extack);
+}
+
+static int br_cc_peer_mep_add_parse(struct net_bridge *br, struct nlattr *attr,
+				    struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX + 1];
+	u32 instance, peer_mep_id;
+	int err;
+
+	err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX, attr,
+			       br_cfm_cc_peer_mep_policy, extack);
+	if (err)
+		return err;
+
+	if (!tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute");
+		return -EINVAL;
+	}
+	if (!tb[IFLA_BRIDGE_CFM_CC_PEER_MEPID]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing PEER_MEP_ID attribute");
+		return -EINVAL;
+	}
+
+	instance =  nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE]);
+	peer_mep_id =  nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_PEER_MEPID]);
+
+	return br_cfm_cc_peer_mep_add(br, instance, peer_mep_id, extack);
+}
+
+static int br_cc_peer_mep_remove_parse(struct net_bridge *br, struct nlattr *attr,
+				       struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX + 1];
+	u32 instance, peer_mep_id;
+	int err;
+
+	err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX, attr,
+			       br_cfm_cc_peer_mep_policy, extack);
+	if (err)
+		return err;
+
+	if (!tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute");
+		return -EINVAL;
+	}
+	if (!tb[IFLA_BRIDGE_CFM_CC_PEER_MEPID]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing PEER_MEP_ID attribute");
+		return -EINVAL;
+	}
+
+	instance =  nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE]);
+	peer_mep_id =  nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_PEER_MEPID]);
+
+	return br_cfm_cc_peer_mep_remove(br, instance, peer_mep_id, extack);
+}
+
+static int br_cc_rdi_parse(struct net_bridge *br, struct nlattr *attr,
+			   struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[IFLA_BRIDGE_CFM_CC_RDI_MAX + 1];
+	u32 instance, rdi;
+	int err;
+
+	err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_CC_RDI_MAX, attr,
+			       br_cfm_cc_rdi_policy, extack);
+	if (err)
+		return err;
+
+	if (!tb[IFLA_BRIDGE_CFM_CC_RDI_INSTANCE]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute");
+		return -EINVAL;
+	}
+	if (!tb[IFLA_BRIDGE_CFM_CC_RDI_RDI]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing RDI attribute");
+		return -EINVAL;
+	}
+
+	instance =  nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_RDI_INSTANCE]);
+	rdi =  nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_RDI_RDI]);
+
+	return br_cfm_cc_rdi_set(br, instance, rdi, extack);
+}
+
+static int br_cc_ccm_tx_parse(struct net_bridge *br, struct nlattr *attr,
+			      struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[IFLA_BRIDGE_CFM_CC_CCM_TX_MAX + 1];
+	struct br_cfm_cc_ccm_tx_info tx_info;
+	u32 instance;
+	int err;
+
+	err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_CC_CCM_TX_MAX, attr,
+			       br_cfm_cc_ccm_tx_policy, extack);
+	if (err)
+		return err;
+
+	if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute");
+		return -EINVAL;
+	}
+	if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing DMAC attribute");
+		return -EINVAL;
+	}
+	if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_SEQ_NO_UPDATE]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing SEQ_NO_UPDATE attribute");
+		return -EINVAL;
+	}
+	if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing PERIOD attribute");
+		return -EINVAL;
+	}
+	if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing IF_TLV attribute");
+		return -EINVAL;
+	}
+	if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing IF_TLV_VALUE attribute");
+		return -EINVAL;
+	}
+	if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing PORT_TLV attribute");
+		return -EINVAL;
+	}
+	if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing PORT_TLV_VALUE attribute");
+		return -EINVAL;
+	}
+
+	memset(&tx_info, 0, sizeof(tx_info));
+
+	instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_RDI_INSTANCE]);
+	nla_memcpy(&tx_info.dmac.addr,
+		   tb[IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC],
+		   sizeof(tx_info.dmac.addr));
+	tx_info.seq_no_update = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_SEQ_NO_UPDATE]);
+	tx_info.period = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD]);
+	tx_info.if_tlv = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV]);
+	tx_info.if_tlv_value = nla_get_u8(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE]);
+	tx_info.port_tlv = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV]);
+	tx_info.port_tlv_value = nla_get_u8(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE]);
+
+	return br_cfm_cc_ccm_tx(br, instance, &tx_info, extack);
+}
+
+int br_cfm_parse(struct net_bridge *br, struct net_bridge_port *p,
+		 struct nlattr *attr, int cmd, struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[IFLA_BRIDGE_CFM_MAX + 1];
+	int err;
+
+	/* When this function is called for a port then the br pointer is
+	 * invalid, therefor set the br to point correctly
+	 */
+	if (p)
+		br = p->br;
+
+	err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_MAX, attr,
+			       br_cfm_policy, extack);
+	if (err)
+		return err;
+
+	if (tb[IFLA_BRIDGE_CFM_MEP_CREATE]) {
+		err = br_mep_create_parse(br, tb[IFLA_BRIDGE_CFM_MEP_CREATE],
+					  extack);
+		if (err)
+			return err;
+	}
+
+	if (tb[IFLA_BRIDGE_CFM_MEP_DELETE]) {
+		err = br_mep_delete_parse(br, tb[IFLA_BRIDGE_CFM_MEP_DELETE],
+					  extack);
+		if (err)
+			return err;
+	}
+
+	if (tb[IFLA_BRIDGE_CFM_MEP_CONFIG]) {
+		err = br_mep_config_parse(br, tb[IFLA_BRIDGE_CFM_MEP_CONFIG],
+					  extack);
+		if (err)
+			return err;
+	}
+
+	if (tb[IFLA_BRIDGE_CFM_CC_CONFIG]) {
+		err = br_cc_config_parse(br, tb[IFLA_BRIDGE_CFM_CC_CONFIG],
+					 extack);
+		if (err)
+			return err;
+	}
+
+	if (tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_ADD]) {
+		err = br_cc_peer_mep_add_parse(br, tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_ADD],
+					       extack);
+		if (err)
+			return err;
+	}
+
+	if (tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_REMOVE]) {
+		err = br_cc_peer_mep_remove_parse(br, tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_REMOVE],
+						  extack);
+		if (err)
+			return err;
+	}
+
+	if (tb[IFLA_BRIDGE_CFM_CC_RDI]) {
+		err = br_cc_rdi_parse(br, tb[IFLA_BRIDGE_CFM_CC_RDI],
+				      extack);
+		if (err)
+			return err;
+	}
+
+	if (tb[IFLA_BRIDGE_CFM_CC_CCM_TX]) {
+		err = br_cc_ccm_tx_parse(br, tb[IFLA_BRIDGE_CFM_CC_CCM_TX],
+					 extack);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 92d64abffa87..431ee2b06dc1 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -700,6 +700,11 @@ static int br_afspec(struct net_bridge *br,
 			if (err)
 				return err;
 			break;
+		case IFLA_BRIDGE_CFM:
+			err = br_cfm_parse(br, p, attr, cmd, extack);
+			if (err)
+				return err;
+			break;
 		}
 	}
 
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index f7c41380de4d..6a5db0553f19 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -1459,10 +1459,25 @@ static inline int br_mrp_fill_info(struct sk_buff *skb, struct net_bridge *br)
 
 #endif
 
-/* br_mrp.c */
+/* br_cfm.c */
 #if IS_ENABLED(CONFIG_BRIDGE_CFM)
+int br_cfm_parse(struct net_bridge *br, struct net_bridge_port *p,
+		 struct nlattr *attr, int cmd, struct netlink_ext_ack *extack);
+bool br_cfm_created(struct net_bridge *br);
 void br_cfm_port_del(struct net_bridge *br, struct net_bridge_port *p);
 #else
+static inline int br_cfm_parse(struct net_bridge *br, struct net_bridge_port *p,
+			       struct nlattr *attr, int cmd,
+			       struct netlink_ext_ack *extack)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline bool br_cfm_created(struct net_bridge *br)
+{
+	return false;
+}
+
 static inline void br_cfm_port_del(struct net_bridge *br,
 				   struct net_bridge_port *p)
 {
-- 
cgit v1.2.3


From 5e312fc0e7fbd11716c0b976d83f4392522c4f83 Mon Sep 17 00:00:00 2001
From: Henrik Bjoernlund <henrik.bjoernlund@microchip.com>
Date: Tue, 27 Oct 2020 10:02:49 +0000
Subject: bridge: cfm: Netlink GET configuration Interface.

This is the implementation of CFM netlink configuration
get information interface.

Add new nested netlink attributes. These attributes are used by the
user space to get configuration information.

GETLINK:
    Request filter RTEXT_FILTER_CFM_CONFIG:
    Indicating that CFM configuration information must be delivered.

    IFLA_BRIDGE_CFM:
        Points to the CFM information.

    IFLA_BRIDGE_CFM_MEP_CREATE_INFO:
        This indicate that MEP instance create parameters are following.
    IFLA_BRIDGE_CFM_MEP_CONFIG_INFO:
        This indicate that MEP instance config parameters are following.
    IFLA_BRIDGE_CFM_CC_CONFIG_INFO:
        This indicate that MEP instance CC functionality
        parameters are following.
    IFLA_BRIDGE_CFM_CC_RDI_INFO:
        This indicate that CC transmitted CCM PDU RDI
        parameters are following.
    IFLA_BRIDGE_CFM_CC_CCM_TX_INFO:
        This indicate that CC transmitted CCM PDU parameters are
        following.
    IFLA_BRIDGE_CFM_CC_PEER_MEP_INFO:
        This indicate that the added peer MEP IDs are following.

CFM nested attribute has the following attributes in next level.

GETLINK RTEXT_FILTER_CFM_CONFIG:
    IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE:
        The created MEP instance number.
        The type is u32.
    IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN:
        The created MEP domain.
        The type is u32 (br_cfm_domain).
        It must be BR_CFM_PORT.
        This means that CFM frames are transmitted and received
        directly on the port - untagged. Not in a VLAN.
    IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION:
        The created MEP direction.
        The type is u32 (br_cfm_mep_direction).
        It must be BR_CFM_MEP_DIRECTION_DOWN.
        This means that CFM frames are transmitted and received on
        the port. Not in the bridge.
    IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX:
        The created MEP residence port ifindex.
        The type is u32 (ifindex).

    IFLA_BRIDGE_CFM_MEP_DELETE_INSTANCE:
        The deleted MEP instance number.
        The type is u32.

    IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE:
        The configured MEP instance number.
        The type is u32.
    IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC:
        The configured MEP unicast MAC address.
        The type is 6*u8 (array).
        This is used as SMAC in all transmitted CFM frames.
    IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL:
        The configured MEP unicast MD level.
        The type is u32.
        It must be in the range 1-7.
        No CFM frames are passing through this MEP on lower levels.
    IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID:
        The configured MEP ID.
        The type is u32.
        It must be in the range 0-0x1FFF.
        This MEP ID is inserted in any transmitted CCM frame.

    IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE:
        The configured MEP instance number.
        The type is u32.
    IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE:
        The Continuity Check (CC) functionality is enabled or disabled.
        The type is u32 (bool).
    IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL:
        The CC expected receive interval of CCM frames.
        The type is u32 (br_cfm_ccm_interval).
        This is also the transmission interval of CCM frames when enabled.
    IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID:
        The CC expected receive MAID in CCM frames.
        The type is CFM_MAID_LENGTH*u8.
        This is MAID is also inserted in transmitted CCM frames.

    IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE:
        The configured MEP instance number.
        The type is u32.
    IFLA_BRIDGE_CFM_CC_PEER_MEPID:
        The CC Peer MEP ID added.
        The type is u32.
        When a Peer MEP ID is added and CC is enabled it is expected to
        receive CCM frames from that Peer MEP.

    IFLA_BRIDGE_CFM_CC_RDI_INSTANCE:
        The configured MEP instance number.
        The type is u32.
    IFLA_BRIDGE_CFM_CC_RDI_RDI:
        The RDI that is inserted in transmitted CCM PDU.
        The type is u32 (bool).

    IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE:
        The configured MEP instance number.
        The type is u32.
    IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC:
        The transmitted CCM frame destination MAC address.
        The type is 6*u8 (array).
        This is used as DMAC in all transmitted CFM frames.
    IFLA_BRIDGE_CFM_CC_CCM_TX_SEQ_NO_UPDATE:
        The transmitted CCM frame update (increment) of sequence
        number is enabled or disabled.
        The type is u32 (bool).
    IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD:
        The period of time where CCM frame are transmitted.
        The type is u32.
        The time is given in seconds. SETLINK IFLA_BRIDGE_CFM_CC_CCM_TX
        must be done before timeout to keep transmission alive.
        When period is zero any ongoing CCM frame transmission
        will be stopped.
    IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV:
        The transmitted CCM frame update with Interface Status TLV
        is enabled or disabled.
        The type is u32 (bool).
    IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE:
        The transmitted Interface Status TLV value field.
        The type is u8.
    IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV:
        The transmitted CCM frame update with Port Status TLV is enabled
        or disabled.
        The type is u32 (bool).
    IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE:
        The transmitted Port Status TLV value field.
        The type is u8.

Signed-off-by: Henrik Bjoernlund  <henrik.bjoernlund@microchip.com>
Reviewed-by: Horatiu Vultur  <horatiu.vultur@microchip.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/if_bridge.h |   6 ++
 net/bridge/br_cfm_netlink.c    | 161 +++++++++++++++++++++++++++++++++++++++++
 net/bridge/br_netlink.c        |  29 +++++++-
 net/bridge/br_private.h        |   6 ++
 4 files changed, 200 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 94cc9444d749..b8b4491922d9 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -339,6 +339,12 @@ enum {
 	IFLA_BRIDGE_CFM_CC_PEER_MEP_REMOVE,
 	IFLA_BRIDGE_CFM_CC_RDI,
 	IFLA_BRIDGE_CFM_CC_CCM_TX,
+	IFLA_BRIDGE_CFM_MEP_CREATE_INFO,
+	IFLA_BRIDGE_CFM_MEP_CONFIG_INFO,
+	IFLA_BRIDGE_CFM_CC_CONFIG_INFO,
+	IFLA_BRIDGE_CFM_CC_RDI_INFO,
+	IFLA_BRIDGE_CFM_CC_CCM_TX_INFO,
+	IFLA_BRIDGE_CFM_CC_PEER_MEP_INFO,
 	__IFLA_BRIDGE_CFM_MAX,
 };
 
diff --git a/net/bridge/br_cfm_netlink.c b/net/bridge/br_cfm_netlink.c
index c75f4c788eac..dee1e0dea39e 100644
--- a/net/bridge/br_cfm_netlink.c
+++ b/net/bridge/br_cfm_netlink.c
@@ -451,3 +451,164 @@ int br_cfm_parse(struct net_bridge *br, struct net_bridge_port *p,
 
 	return 0;
 }
+
+int br_cfm_config_fill_info(struct sk_buff *skb, struct net_bridge *br)
+{
+	struct br_cfm_peer_mep *peer_mep;
+	struct br_cfm_mep *mep;
+	struct nlattr *tb;
+
+	hlist_for_each_entry_rcu(mep, &br->mep_list, head) {
+		tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_MEP_CREATE_INFO);
+		if (!tb)
+			goto nla_info_failure;
+
+		if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE,
+				mep->instance))
+			goto nla_put_failure;
+
+		if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN,
+				mep->create.domain))
+			goto nla_put_failure;
+
+		if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION,
+				mep->create.direction))
+			goto nla_put_failure;
+
+		if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX,
+				mep->create.ifindex))
+			goto nla_put_failure;
+
+		nla_nest_end(skb, tb);
+
+		tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_MEP_CONFIG_INFO);
+
+		if (!tb)
+			goto nla_info_failure;
+
+		if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE,
+				mep->instance))
+			goto nla_put_failure;
+
+		if (nla_put(skb, IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC,
+			    sizeof(mep->config.unicast_mac.addr),
+			    mep->config.unicast_mac.addr))
+			goto nla_put_failure;
+
+		if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL,
+				mep->config.mdlevel))
+			goto nla_put_failure;
+
+		if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID,
+				mep->config.mepid))
+			goto nla_put_failure;
+
+		nla_nest_end(skb, tb);
+
+		tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_CC_CONFIG_INFO);
+
+		if (!tb)
+			goto nla_info_failure;
+
+		if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE,
+				mep->instance))
+			goto nla_put_failure;
+
+		if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE,
+				mep->cc_config.enable))
+			goto nla_put_failure;
+
+		if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL,
+				mep->cc_config.exp_interval))
+			goto nla_put_failure;
+
+		if (nla_put(skb, IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID,
+			    sizeof(mep->cc_config.exp_maid.data),
+			    mep->cc_config.exp_maid.data))
+			goto nla_put_failure;
+
+		nla_nest_end(skb, tb);
+
+		tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_CC_RDI_INFO);
+
+		if (!tb)
+			goto nla_info_failure;
+
+		if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_RDI_INSTANCE,
+				mep->instance))
+			goto nla_put_failure;
+
+		if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_RDI_RDI,
+				mep->rdi))
+			goto nla_put_failure;
+
+		nla_nest_end(skb, tb);
+
+		tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_INFO);
+
+		if (!tb)
+			goto nla_info_failure;
+
+		if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE,
+				mep->instance))
+			goto nla_put_failure;
+
+		if (nla_put(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC,
+			    sizeof(mep->cc_ccm_tx_info.dmac),
+			    mep->cc_ccm_tx_info.dmac.addr))
+			goto nla_put_failure;
+
+		if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_SEQ_NO_UPDATE,
+				mep->cc_ccm_tx_info.seq_no_update))
+			goto nla_put_failure;
+
+		if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD,
+				mep->cc_ccm_tx_info.period))
+			goto nla_put_failure;
+
+		if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV,
+				mep->cc_ccm_tx_info.if_tlv))
+			goto nla_put_failure;
+
+		if (nla_put_u8(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE,
+			       mep->cc_ccm_tx_info.if_tlv_value))
+			goto nla_put_failure;
+
+		if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV,
+				mep->cc_ccm_tx_info.port_tlv))
+			goto nla_put_failure;
+
+		if (nla_put_u8(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE,
+			       mep->cc_ccm_tx_info.port_tlv_value))
+			goto nla_put_failure;
+
+		nla_nest_end(skb, tb);
+
+		hlist_for_each_entry_rcu(peer_mep, &mep->peer_mep_list, head) {
+			tb = nla_nest_start(skb,
+					    IFLA_BRIDGE_CFM_CC_PEER_MEP_INFO);
+
+			if (!tb)
+				goto nla_info_failure;
+
+			if (nla_put_u32(skb,
+					IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE,
+					mep->instance))
+				goto nla_put_failure;
+
+			if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_PEER_MEPID,
+					peer_mep->mepid))
+				goto nla_put_failure;
+
+			nla_nest_end(skb, tb);
+		}
+	}
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, tb);
+
+nla_info_failure:
+	return -EMSGSIZE;
+}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 431ee2b06dc1..69bfe165ff7f 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -16,6 +16,7 @@
 
 #include "br_private.h"
 #include "br_private_stp.h"
+#include "br_private_cfm.h"
 #include "br_private_tunnel.h"
 
 static int __get_num_vlan_infos(struct net_bridge_vlan_group *vg,
@@ -426,7 +427,8 @@ static int br_fill_ifinfo(struct sk_buff *skb,
 
 	if (filter_mask & (RTEXT_FILTER_BRVLAN |
 			   RTEXT_FILTER_BRVLAN_COMPRESSED |
-			   RTEXT_FILTER_MRP)) {
+			   RTEXT_FILTER_MRP |
+			   RTEXT_FILTER_CFM_CONFIG)) {
 		af = nla_nest_start_noflag(skb, IFLA_AF_SPEC);
 		if (!af)
 			goto nla_put_failure;
@@ -475,6 +477,28 @@ static int br_fill_ifinfo(struct sk_buff *skb,
 			goto nla_put_failure;
 	}
 
+	if (filter_mask & RTEXT_FILTER_CFM_CONFIG) {
+		struct nlattr *cfm_nest = NULL;
+		int err;
+
+		if (!br_cfm_created(br) || port)
+			goto done;
+
+		cfm_nest = nla_nest_start(skb, IFLA_BRIDGE_CFM);
+		if (!cfm_nest)
+			goto nla_put_failure;
+
+		if (filter_mask & RTEXT_FILTER_CFM_CONFIG) {
+			rcu_read_lock();
+			err = br_cfm_config_fill_info(skb, br);
+			rcu_read_unlock();
+			if (err)
+				goto nla_put_failure;
+		}
+
+		nla_nest_end(skb, cfm_nest);
+	}
+
 done:
 	if (af)
 		nla_nest_end(skb, af);
@@ -538,7 +562,8 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
 
 	if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN) &&
 	    !(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) &&
-	    !(filter_mask & RTEXT_FILTER_MRP))
+	    !(filter_mask & RTEXT_FILTER_MRP) &&
+	    !(filter_mask & RTEXT_FILTER_CFM_CONFIG))
 		return 0;
 
 	return br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, nlflags,
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 6a5db0553f19..f571bdeb5d83 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -1465,6 +1465,7 @@ int br_cfm_parse(struct net_bridge *br, struct net_bridge_port *p,
 		 struct nlattr *attr, int cmd, struct netlink_ext_ack *extack);
 bool br_cfm_created(struct net_bridge *br);
 void br_cfm_port_del(struct net_bridge *br, struct net_bridge_port *p);
+int br_cfm_config_fill_info(struct sk_buff *skb, struct net_bridge *br);
 #else
 static inline int br_cfm_parse(struct net_bridge *br, struct net_bridge_port *p,
 			       struct nlattr *attr, int cmd,
@@ -1482,6 +1483,11 @@ static inline void br_cfm_port_del(struct net_bridge *br,
 				   struct net_bridge_port *p)
 {
 }
+
+static inline int br_cfm_config_fill_info(struct sk_buff *skb, struct net_bridge *br)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 /* br_netlink.c */
-- 
cgit v1.2.3


From e77824d81dff5e6f9244c201537a47f418eb36cb Mon Sep 17 00:00:00 2001
From: Henrik Bjoernlund <henrik.bjoernlund@microchip.com>
Date: Tue, 27 Oct 2020 10:02:50 +0000
Subject: bridge: cfm: Netlink GET status Interface.

This is the implementation of CFM netlink status
get information interface.

Add new nested netlink attributes. These attributes are used by the
user space to get status information.

GETLINK:
    Request filter RTEXT_FILTER_CFM_STATUS:
    Indicating that CFM status information must be delivered.

    IFLA_BRIDGE_CFM:
        Points to the CFM information.

    IFLA_BRIDGE_CFM_MEP_STATUS_INFO:
        This indicate that the MEP instance status are following.
    IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO:
        This indicate that the peer MEP status are following.

CFM nested attribute has the following attributes in next level.

GETLINK RTEXT_FILTER_CFM_STATUS:
    IFLA_BRIDGE_CFM_MEP_STATUS_INSTANCE:
        The MEP instance number of the delivered status.
        The type is u32.
    IFLA_BRIDGE_CFM_MEP_STATUS_OPCODE_UNEXP_SEEN:
        The MEP instance received CFM PDU with unexpected Opcode.
        The type is u32 (bool).
    IFLA_BRIDGE_CFM_MEP_STATUS_VERSION_UNEXP_SEEN:
        The MEP instance received CFM PDU with unexpected version.
        The type is u32 (bool).
    IFLA_BRIDGE_CFM_MEP_STATUS_RX_LEVEL_LOW_SEEN:
        The MEP instance received CCM PDU with MD level lower than
        configured level. This frame is discarded.
        The type is u32 (bool).

    IFLA_BRIDGE_CFM_CC_PEER_STATUS_INSTANCE:
        The MEP instance number of the delivered status.
        The type is u32.
    IFLA_BRIDGE_CFM_CC_PEER_STATUS_PEER_MEPID:
        The added Peer MEP ID of the delivered status.
        The type is u32.
    IFLA_BRIDGE_CFM_CC_PEER_STATUS_CCM_DEFECT:
        The CCM defect status.
        The type is u32 (bool).
        True means no CCM frame is received for 3.25 intervals.
        IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL.
    IFLA_BRIDGE_CFM_CC_PEER_STATUS_RDI:
        The last received CCM PDU RDI.
        The type is u32 (bool).
    IFLA_BRIDGE_CFM_CC_PEER_STATUS_PORT_TLV_VALUE:
        The last received CCM PDU Port Status TLV value field.
        The type is u8.
    IFLA_BRIDGE_CFM_CC_PEER_STATUS_IF_TLV_VALUE:
        The last received CCM PDU Interface Status TLV value field.
        The type is u8.
    IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEEN:
        A CCM frame has been received from Peer MEP.
        The type is u32 (bool).
        This is cleared after GETLINK IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO.
    IFLA_BRIDGE_CFM_CC_PEER_STATUS_TLV_SEEN:
        A CCM frame with TLV has been received from Peer MEP.
        The type is u32 (bool).
        This is cleared after GETLINK IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO.
    IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEQ_UNEXP_SEEN:
        A CCM frame with unexpected sequence number has been received
        from Peer MEP.
        The type is u32 (bool).
        When a sequence number is not one higher than previously received
        then it is unexpected.
        This is cleared after GETLINK IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO.

Signed-off-by: Henrik Bjoernlund  <henrik.bjoernlund@microchip.com>
Reviewed-by: Horatiu Vultur  <horatiu.vultur@microchip.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/if_bridge.h |  29 ++++++++++++
 include/uapi/linux/rtnetlink.h |   1 +
 net/bridge/br_cfm_netlink.c    | 105 +++++++++++++++++++++++++++++++++++++++++
 net/bridge/br_netlink.c        |  16 +++++--
 net/bridge/br_private.h        |   6 +++
 5 files changed, 154 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index b8b4491922d9..d975e1223884 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -345,6 +345,8 @@ enum {
 	IFLA_BRIDGE_CFM_CC_RDI_INFO,
 	IFLA_BRIDGE_CFM_CC_CCM_TX_INFO,
 	IFLA_BRIDGE_CFM_CC_PEER_MEP_INFO,
+	IFLA_BRIDGE_CFM_MEP_STATUS_INFO,
+	IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO,
 	__IFLA_BRIDGE_CFM_MAX,
 };
 
@@ -424,6 +426,33 @@ enum {
 
 #define IFLA_BRIDGE_CFM_CC_CCM_TX_MAX (__IFLA_BRIDGE_CFM_CC_CCM_TX_MAX - 1)
 
+enum {
+	IFLA_BRIDGE_CFM_MEP_STATUS_UNSPEC,
+	IFLA_BRIDGE_CFM_MEP_STATUS_INSTANCE,
+	IFLA_BRIDGE_CFM_MEP_STATUS_OPCODE_UNEXP_SEEN,
+	IFLA_BRIDGE_CFM_MEP_STATUS_VERSION_UNEXP_SEEN,
+	IFLA_BRIDGE_CFM_MEP_STATUS_RX_LEVEL_LOW_SEEN,
+	__IFLA_BRIDGE_CFM_MEP_STATUS_MAX,
+};
+
+#define IFLA_BRIDGE_CFM_MEP_STATUS_MAX (__IFLA_BRIDGE_CFM_MEP_STATUS_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_CFM_CC_PEER_STATUS_UNSPEC,
+	IFLA_BRIDGE_CFM_CC_PEER_STATUS_INSTANCE,
+	IFLA_BRIDGE_CFM_CC_PEER_STATUS_PEER_MEPID,
+	IFLA_BRIDGE_CFM_CC_PEER_STATUS_CCM_DEFECT,
+	IFLA_BRIDGE_CFM_CC_PEER_STATUS_RDI,
+	IFLA_BRIDGE_CFM_CC_PEER_STATUS_PORT_TLV_VALUE,
+	IFLA_BRIDGE_CFM_CC_PEER_STATUS_IF_TLV_VALUE,
+	IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEEN,
+	IFLA_BRIDGE_CFM_CC_PEER_STATUS_TLV_SEEN,
+	IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEQ_UNEXP_SEEN,
+	__IFLA_BRIDGE_CFM_CC_PEER_STATUS_MAX,
+};
+
+#define IFLA_BRIDGE_CFM_CC_PEER_STATUS_MAX (__IFLA_BRIDGE_CFM_CC_PEER_STATUS_MAX - 1)
+
 struct bridge_stp_xstats {
 	__u64 transition_blk;
 	__u64 transition_fwd;
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index ffc9ca1f2bdb..fdd408f6a5d2 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -780,6 +780,7 @@ enum {
 #define	RTEXT_FILTER_SKIP_STATS	(1 << 3)
 #define RTEXT_FILTER_MRP	(1 << 4)
 #define RTEXT_FILTER_CFM_CONFIG	(1 << 5)
+#define RTEXT_FILTER_CFM_STATUS	(1 << 6)
 
 /* End of information exported to user level */
 
diff --git a/net/bridge/br_cfm_netlink.c b/net/bridge/br_cfm_netlink.c
index dee1e0dea39e..c8b5ff0825a3 100644
--- a/net/bridge/br_cfm_netlink.c
+++ b/net/bridge/br_cfm_netlink.c
@@ -612,3 +612,108 @@ nla_put_failure:
 nla_info_failure:
 	return -EMSGSIZE;
 }
+
+int br_cfm_status_fill_info(struct sk_buff *skb, struct net_bridge *br)
+{
+	struct br_cfm_peer_mep *peer_mep;
+	struct br_cfm_mep *mep;
+	struct nlattr *tb;
+
+	hlist_for_each_entry_rcu(mep, &br->mep_list, head) {
+		tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_MEP_STATUS_INFO);
+		if (!tb)
+			goto nla_info_failure;
+
+		if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_STATUS_INSTANCE,
+				mep->instance))
+			goto nla_put_failure;
+
+		if (nla_put_u32(skb,
+				IFLA_BRIDGE_CFM_MEP_STATUS_OPCODE_UNEXP_SEEN,
+				mep->status.opcode_unexp_seen))
+			goto nla_put_failure;
+
+		if (nla_put_u32(skb,
+				IFLA_BRIDGE_CFM_MEP_STATUS_VERSION_UNEXP_SEEN,
+				mep->status.version_unexp_seen))
+			goto nla_put_failure;
+
+		if (nla_put_u32(skb,
+				IFLA_BRIDGE_CFM_MEP_STATUS_RX_LEVEL_LOW_SEEN,
+				mep->status.rx_level_low_seen))
+			goto nla_put_failure;
+
+		/* Clear all 'seen' indications */
+		mep->status.opcode_unexp_seen = false;
+		mep->status.version_unexp_seen = false;
+		mep->status.rx_level_low_seen = false;
+
+		nla_nest_end(skb, tb);
+
+		hlist_for_each_entry_rcu(peer_mep, &mep->peer_mep_list, head) {
+			tb = nla_nest_start(skb,
+					    IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO);
+			if (!tb)
+				goto nla_info_failure;
+
+			if (nla_put_u32(skb,
+					IFLA_BRIDGE_CFM_CC_PEER_STATUS_INSTANCE,
+					mep->instance))
+				goto nla_put_failure;
+
+			if (nla_put_u32(skb,
+					IFLA_BRIDGE_CFM_CC_PEER_STATUS_PEER_MEPID,
+					peer_mep->mepid))
+				goto nla_put_failure;
+
+			if (nla_put_u32(skb,
+					IFLA_BRIDGE_CFM_CC_PEER_STATUS_CCM_DEFECT,
+					peer_mep->cc_status.ccm_defect))
+				goto nla_put_failure;
+
+			if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_PEER_STATUS_RDI,
+					peer_mep->cc_status.rdi))
+				goto nla_put_failure;
+
+			if (nla_put_u8(skb,
+				       IFLA_BRIDGE_CFM_CC_PEER_STATUS_PORT_TLV_VALUE,
+				       peer_mep->cc_status.port_tlv_value))
+				goto nla_put_failure;
+
+			if (nla_put_u8(skb,
+				       IFLA_BRIDGE_CFM_CC_PEER_STATUS_IF_TLV_VALUE,
+				       peer_mep->cc_status.if_tlv_value))
+				goto nla_put_failure;
+
+			if (nla_put_u32(skb,
+					IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEEN,
+					peer_mep->cc_status.seen))
+				goto nla_put_failure;
+
+			if (nla_put_u32(skb,
+					IFLA_BRIDGE_CFM_CC_PEER_STATUS_TLV_SEEN,
+					peer_mep->cc_status.tlv_seen))
+				goto nla_put_failure;
+
+			if (nla_put_u32(skb,
+					IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEQ_UNEXP_SEEN,
+					peer_mep->cc_status.seq_unexp_seen))
+				goto nla_put_failure;
+
+			/* Clear all 'seen' indications */
+			peer_mep->cc_status.seen = false;
+			peer_mep->cc_status.tlv_seen = false;
+			peer_mep->cc_status.seq_unexp_seen = false;
+
+			nla_nest_end(skb, tb);
+		}
+	}
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, tb);
+
+nla_info_failure:
+	return -EMSGSIZE;
+}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 69bfe165ff7f..68c2ed87e26b 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -428,7 +428,8 @@ static int br_fill_ifinfo(struct sk_buff *skb,
 	if (filter_mask & (RTEXT_FILTER_BRVLAN |
 			   RTEXT_FILTER_BRVLAN_COMPRESSED |
 			   RTEXT_FILTER_MRP |
-			   RTEXT_FILTER_CFM_CONFIG)) {
+			   RTEXT_FILTER_CFM_CONFIG |
+			   RTEXT_FILTER_CFM_STATUS)) {
 		af = nla_nest_start_noflag(skb, IFLA_AF_SPEC);
 		if (!af)
 			goto nla_put_failure;
@@ -477,7 +478,7 @@ static int br_fill_ifinfo(struct sk_buff *skb,
 			goto nla_put_failure;
 	}
 
-	if (filter_mask & RTEXT_FILTER_CFM_CONFIG) {
+	if (filter_mask & (RTEXT_FILTER_CFM_CONFIG | RTEXT_FILTER_CFM_STATUS)) {
 		struct nlattr *cfm_nest = NULL;
 		int err;
 
@@ -496,6 +497,14 @@ static int br_fill_ifinfo(struct sk_buff *skb,
 				goto nla_put_failure;
 		}
 
+		if (filter_mask & RTEXT_FILTER_CFM_STATUS) {
+			rcu_read_lock();
+			err = br_cfm_status_fill_info(skb, br);
+			rcu_read_unlock();
+			if (err)
+				goto nla_put_failure;
+		}
+
 		nla_nest_end(skb, cfm_nest);
 	}
 
@@ -563,7 +572,8 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
 	if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN) &&
 	    !(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) &&
 	    !(filter_mask & RTEXT_FILTER_MRP) &&
-	    !(filter_mask & RTEXT_FILTER_CFM_CONFIG))
+	    !(filter_mask & RTEXT_FILTER_CFM_CONFIG) &&
+	    !(filter_mask & RTEXT_FILTER_CFM_STATUS))
 		return 0;
 
 	return br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, nlflags,
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index f571bdeb5d83..228635b350a2 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -1466,6 +1466,7 @@ int br_cfm_parse(struct net_bridge *br, struct net_bridge_port *p,
 bool br_cfm_created(struct net_bridge *br);
 void br_cfm_port_del(struct net_bridge *br, struct net_bridge_port *p);
 int br_cfm_config_fill_info(struct sk_buff *skb, struct net_bridge *br);
+int br_cfm_status_fill_info(struct sk_buff *skb, struct net_bridge *br);
 #else
 static inline int br_cfm_parse(struct net_bridge *br, struct net_bridge_port *p,
 			       struct nlattr *attr, int cmd,
@@ -1488,6 +1489,11 @@ static inline int br_cfm_config_fill_info(struct sk_buff *skb, struct net_bridge
 {
 	return -EOPNOTSUPP;
 }
+
+static inline int br_cfm_status_fill_info(struct sk_buff *skb, struct net_bridge *br)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 /* br_netlink.c */
-- 
cgit v1.2.3


From 8ba16d5993749c3f31fd2b49e16f0dc1e1770b9c Mon Sep 17 00:00:00 2001
From: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Date: Sat, 30 May 2020 12:58:17 +0200
Subject: drm/fourcc: Add AMD DRM modifiers.

This adds modifiers for GFX9+ AMD GPUs.

As the modifiers need a lot of parameters I split things out in
getters and setters.
  - Advantage: simplifies the code a lot
  - Disadvantage: Makes it harder to check that you're setting all
                  the required fields.

The tiling modes seem to change every generation, but the structure
of what each tiling mode is good for stays really similar. As such
the core of the modifier is
 - the tiling mode
 - a version. Not explicitly a GPU generation, but splitting out
   a new set of tiling equations.

Sometimes one or two tiling modes stay the same and for those we
specify a canonical version.

Then we have a bunch of parameters on how the compression works.
Different HW units have different requirements for these and we
actually have some conflicts here.

e.g. the render backends need a specific alignment but the display
unit only works with unaligned compression surfaces. To work around
that we have a DCC_RETILE option where both an aligned and unaligned
compression surface are allocated and a writer has to sync the
aligned surface to the unaligned surface on handoff.

Finally there are some GPU parameters that participate in the tiling
equations. These are constant for each GPU on the rendering/texturing
side. The display unit is very flexible however and supports all
of them :|

Some estimates:
 - Single GPU, render+texture: ~10 modifiers
 - All possible configs in a gen, display: ~1000 modifiers
 - Configs of actually existing GPUs in a gen: ~100 modifiers

For formats with a single plane everything gets put in a separate
DRM plane. However, this doesn't fit for some YUV formats, so if
the format has >1 plane, we let the driver pack the surfaces into
1 DRM plane per format plane.

This way we avoid X11 rendering onto the frontbuffer with DCC, but
still fit into 4 DRM planes.

Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/drm_fourcc.h | 115 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 115 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index 82f327801267..df56e71a7380 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -1056,6 +1056,121 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier)
  */
 #define AMLOGIC_FBC_OPTION_MEM_SAVING		(1ULL << 0)
 
+/*
+ * AMD modifiers
+ *
+ * Memory layout:
+ *
+ * without DCC:
+ *   - main surface
+ *
+ * with DCC & without DCC_RETILE:
+ *   - main surface in plane 0
+ *   - DCC surface in plane 1 (RB-aligned, pipe-aligned if DCC_PIPE_ALIGN is set)
+ *
+ * with DCC & DCC_RETILE:
+ *   - main surface in plane 0
+ *   - displayable DCC surface in plane 1 (not RB-aligned & not pipe-aligned)
+ *   - pipe-aligned DCC surface in plane 2 (RB-aligned & pipe-aligned)
+ *
+ * For multi-plane formats the above surfaces get merged into one plane for
+ * each format plane, based on the required alignment only.
+ */
+#define AMD_FMT_MOD fourcc_mod_code(AMD, 0)
+
+#define IS_AMD_FMT_MOD(val) (((val) >> 56) == DRM_FORMAT_MOD_VENDOR_AMD)
+
+/* Reserve 0 for GFX8 and older */
+#define AMD_FMT_MOD_TILE_VER_GFX9 1
+#define AMD_FMT_MOD_TILE_VER_GFX10 2
+#define AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS 3
+
+/*
+ * 64K_S is the same for GFX9/GFX10/GFX10_RBPLUS and hence has GFX9 as canonical
+ * version.
+ */
+#define AMD_FMT_MOD_TILE_GFX9_64K_S 9
+
+/*
+ * 64K_D for non-32 bpp is the same for GFX9/GFX10/GFX10_RBPLUS and hence has
+ * GFX9 as canonical version.
+ */
+#define AMD_FMT_MOD_TILE_GFX9_64K_D 10
+#define AMD_FMT_MOD_TILE_GFX9_64K_S_X 25
+#define AMD_FMT_MOD_TILE_GFX9_64K_D_X 26
+#define AMD_FMT_MOD_TILE_GFX9_64K_R_X 27
+
+#define AMD_FMT_MOD_DCC_BLOCK_64B 0
+#define AMD_FMT_MOD_DCC_BLOCK_128B 1
+#define AMD_FMT_MOD_DCC_BLOCK_256B 2
+
+#define AMD_FMT_MOD_TILE_VERSION_SHIFT 0
+#define AMD_FMT_MOD_TILE_VERSION_MASK 0xFF
+#define AMD_FMT_MOD_TILE_SHIFT 8
+#define AMD_FMT_MOD_TILE_MASK 0x1F
+
+/* Whether DCC compression is enabled. */
+#define AMD_FMT_MOD_DCC_SHIFT 13
+#define AMD_FMT_MOD_DCC_MASK 0x1
+
+/*
+ * Whether to include two DCC surfaces, one which is rb & pipe aligned, and
+ * one which is not-aligned.
+ */
+#define AMD_FMT_MOD_DCC_RETILE_SHIFT 14
+#define AMD_FMT_MOD_DCC_RETILE_MASK 0x1
+
+/* Only set if DCC_RETILE = false */
+#define AMD_FMT_MOD_DCC_PIPE_ALIGN_SHIFT 15
+#define AMD_FMT_MOD_DCC_PIPE_ALIGN_MASK 0x1
+
+#define AMD_FMT_MOD_DCC_INDEPENDENT_64B_SHIFT 16
+#define AMD_FMT_MOD_DCC_INDEPENDENT_64B_MASK 0x1
+#define AMD_FMT_MOD_DCC_INDEPENDENT_128B_SHIFT 17
+#define AMD_FMT_MOD_DCC_INDEPENDENT_128B_MASK 0x1
+#define AMD_FMT_MOD_DCC_MAX_COMPRESSED_BLOCK_SHIFT 18
+#define AMD_FMT_MOD_DCC_MAX_COMPRESSED_BLOCK_MASK 0x1
+
+/*
+ * DCC supports embedding some clear colors directly in the DCC surface.
+ * However, on older GPUs the rendering HW ignores the embedded clear color
+ * and prefers the driver provided color. This necessitates doing a fastclear
+ * eliminate operation before a process transfers control.
+ *
+ * If this bit is set that means the fastclear eliminate is not needed for these
+ * embeddable colors.
+ */
+#define AMD_FMT_MOD_DCC_CONSTANT_ENCODE_SHIFT 19
+#define AMD_FMT_MOD_DCC_CONSTANT_ENCODE_MASK 0x1
+
+/*
+ * The below fields are for accounting for per GPU differences. These are only
+ * relevant for GFX9 and later and if the tile field is *_X/_T.
+ *
+ * PIPE_XOR_BITS = always needed
+ * BANK_XOR_BITS = only for TILE_VER_GFX9
+ * PACKERS = only for TILE_VER_GFX10_RBPLUS
+ * RB = only for TILE_VER_GFX9 & DCC
+ * PIPE = only for TILE_VER_GFX9 & DCC & (DCC_RETILE | DCC_PIPE_ALIGN)
+ */
+#define AMD_FMT_MOD_PIPE_XOR_BITS_SHIFT 20
+#define AMD_FMT_MOD_PIPE_XOR_BITS_MASK 0x7
+#define AMD_FMT_MOD_BANK_XOR_BITS_SHIFT 23
+#define AMD_FMT_MOD_BANK_XOR_BITS_MASK 0x7
+#define AMD_FMT_MOD_PACKERS_SHIFT 26 /* aliases with BANK_XOR_BITS */
+#define AMD_FMT_MOD_PACKERS_MASK 0x7
+#define AMD_FMT_MOD_RB_SHIFT 29
+#define AMD_FMT_MOD_RB_MASK 0x7
+#define AMD_FMT_MOD_PIPE_SHIFT 32
+#define AMD_FMT_MOD_PIPE_MASK 0x7
+
+#define AMD_FMT_MOD_SET(field, value) \
+	((uint64_t)(value) << AMD_FMT_MOD_##field##_SHIFT)
+#define AMD_FMT_MOD_GET(field, value) \
+	(((value) >> AMD_FMT_MOD_##field##_SHIFT) & AMD_FMT_MOD_##field##_MASK)
+#define AMD_FMT_MOD_CLEAR(field) \
+	(~((uint64_t)AMD_FMT_MOD_##field##_MASK << AMD_FMT_MOD_##field##_SHIFT))
+
 #if defined(__cplusplus)
 }
 #endif
-- 
cgit v1.2.3


From 8dba29603b5c8bfca2bf90aeb83d05a236df967b Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 29 Oct 2020 15:05:02 +0800
Subject: sctp: add SCTP_REMOTE_UDP_ENCAPS_PORT sockopt

This patch is to implement:

  rfc6951#section-6.1: Get or Set the Remote UDP Encapsulation Port Number

with the param of the struct:

  struct sctp_udpencaps {
    sctp_assoc_t sue_assoc_id;
    struct sockaddr_storage sue_address;
    uint16_t sue_port;
  };

the encap_port of sock, assoc or transport can be changed by users,
which also means it allows the different transports of the same asoc
to have different encap_port value.

v1->v2:
  - no change.
v2->v3:
  - fix the endian warning when setting values between encap_port and
    sue_port.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/sctp.h |   7 +++
 net/sctp/socket.c         | 114 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 121 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index 28ad40d9acba..cb78e7a739da 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -140,6 +140,7 @@ typedef __s32 sctp_assoc_t;
 #define SCTP_ECN_SUPPORTED	130
 #define SCTP_EXPOSE_POTENTIALLY_FAILED_STATE	131
 #define SCTP_EXPOSE_PF_STATE	SCTP_EXPOSE_POTENTIALLY_FAILED_STATE
+#define SCTP_REMOTE_UDP_ENCAPS_PORT	132
 
 /* PR-SCTP policies */
 #define SCTP_PR_SCTP_NONE	0x0000
@@ -1197,6 +1198,12 @@ struct sctp_event {
 	uint8_t se_on;
 };
 
+struct sctp_udpencaps {
+	sctp_assoc_t sue_assoc_id;
+	struct sockaddr_storage sue_address;
+	uint16_t sue_port;
+};
+
 /* SCTP Stream schedulers */
 enum sctp_sched_type {
 	SCTP_SS_FCFS,
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 09b94cd7ca37..2a9ee9b3e46c 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4417,6 +4417,55 @@ out:
 	return retval;
 }
 
+static int sctp_setsockopt_encap_port(struct sock *sk,
+				      struct sctp_udpencaps *encap,
+				      unsigned int optlen)
+{
+	struct sctp_association *asoc;
+	struct sctp_transport *t;
+	__be16 encap_port;
+
+	if (optlen != sizeof(*encap))
+		return -EINVAL;
+
+	/* If an address other than INADDR_ANY is specified, and
+	 * no transport is found, then the request is invalid.
+	 */
+	encap_port = (__force __be16)encap->sue_port;
+	if (!sctp_is_any(sk, (union sctp_addr *)&encap->sue_address)) {
+		t = sctp_addr_id2transport(sk, &encap->sue_address,
+					   encap->sue_assoc_id);
+		if (!t)
+			return -EINVAL;
+
+		t->encap_port = encap_port;
+		return 0;
+	}
+
+	/* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the
+	 * socket is a one to many style socket, and an association
+	 * was not found, then the id was invalid.
+	 */
+	asoc = sctp_id2assoc(sk, encap->sue_assoc_id);
+	if (!asoc && encap->sue_assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
+		return -EINVAL;
+
+	/* If changes are for association, also apply encap_port to
+	 * each transport.
+	 */
+	if (asoc) {
+		list_for_each_entry(t, &asoc->peer.transport_addr_list,
+				    transports)
+			t->encap_port = encap_port;
+
+		return 0;
+	}
+
+	sctp_sk(sk)->encap_port = encap_port;
+	return 0;
+}
+
 /* API 6.2 setsockopt(), getsockopt()
  *
  * Applications use setsockopt() and getsockopt() to set or retrieve
@@ -4636,6 +4685,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
 	case SCTP_EXPOSE_POTENTIALLY_FAILED_STATE:
 		retval = sctp_setsockopt_pf_expose(sk, kopt, optlen);
 		break;
+	case SCTP_REMOTE_UDP_ENCAPS_PORT:
+		retval = sctp_setsockopt_encap_port(sk, kopt, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
@@ -7791,6 +7843,65 @@ out:
 	return retval;
 }
 
+static int sctp_getsockopt_encap_port(struct sock *sk, int len,
+				      char __user *optval, int __user *optlen)
+{
+	struct sctp_association *asoc;
+	struct sctp_udpencaps encap;
+	struct sctp_transport *t;
+	__be16 encap_port;
+
+	if (len < sizeof(encap))
+		return -EINVAL;
+
+	len = sizeof(encap);
+	if (copy_from_user(&encap, optval, len))
+		return -EFAULT;
+
+	/* If an address other than INADDR_ANY is specified, and
+	 * no transport is found, then the request is invalid.
+	 */
+	if (!sctp_is_any(sk, (union sctp_addr *)&encap.sue_address)) {
+		t = sctp_addr_id2transport(sk, &encap.sue_address,
+					   encap.sue_assoc_id);
+		if (!t) {
+			pr_debug("%s: failed no transport\n", __func__);
+			return -EINVAL;
+		}
+
+		encap_port = t->encap_port;
+		goto out;
+	}
+
+	/* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the
+	 * socket is a one to many style socket, and an association
+	 * was not found, then the id was invalid.
+	 */
+	asoc = sctp_id2assoc(sk, encap.sue_assoc_id);
+	if (!asoc && encap.sue_assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP)) {
+		pr_debug("%s: failed no association\n", __func__);
+		return -EINVAL;
+	}
+
+	if (asoc) {
+		encap_port = asoc->encap_port;
+		goto out;
+	}
+
+	encap_port = sctp_sk(sk)->encap_port;
+
+out:
+	encap.sue_port = (__force uint16_t)encap_port;
+	if (copy_to_user(optval, &encap, len))
+		return -EFAULT;
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+
+	return 0;
+}
+
 static int sctp_getsockopt(struct sock *sk, int level, int optname,
 			   char __user *optval, int __user *optlen)
 {
@@ -8011,6 +8122,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
 	case SCTP_EXPOSE_POTENTIALLY_FAILED_STATE:
 		retval = sctp_getsockopt_pf_expose(sk, len, optval, optlen);
 		break;
+	case SCTP_REMOTE_UDP_ENCAPS_PORT:
+		retval = sctp_getsockopt_encap_port(sk, len, optval, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
-- 
cgit v1.2.3


From 955062b03fa62b802a1ee34fbb04e39f7a70ae73 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Thu, 29 Oct 2020 01:38:31 +0200
Subject: net: bridge: mcast: add support for raw L2 multicast groups

Extend the bridge multicast control and data path to configure routes
for L2 (non-IP) multicast groups.

The uapi struct br_mdb_entry union u is extended with another variant,
mac_addr, which does not change the structure size, and which is valid
when the proto field is zero.

To be compatible with the forwarding code that is already in place,
which acts as an IGMP/MLD snooping bridge with querier capabilities, we
need to declare that for L2 MDB entries (for which there exists no such
thing as IGMP/MLD snooping/querying), that there is always a querier.
Otherwise, these entries would be flooded to all bridge ports and not
just to those that are members of the L2 multicast group.

Needless to say, only permanent L2 multicast groups can be installed on
a bridge port.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://lore.kernel.org/r/20201028233831.610076-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/if_bridge.h      |  1 +
 include/uapi/linux/if_bridge.h |  1 +
 net/bridge/br_device.c         |  2 +-
 net/bridge/br_input.c          |  2 +-
 net/bridge/br_mdb.c            | 24 ++++++++++++++++++++++--
 net/bridge/br_multicast.c      | 13 +++++++++----
 net/bridge/br_private.h        | 10 ++++++++--
 7 files changed, 43 insertions(+), 10 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 556caed00258..b979005ea39c 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -25,6 +25,7 @@ struct br_ip {
 #if IS_ENABLED(CONFIG_IPV6)
 		struct in6_addr ip6;
 #endif
+		unsigned char	mac_addr[ETH_ALEN];
 	} dst;
 	__be16		proto;
 	__u16           vid;
diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index d975e1223884..13d59c51ef5b 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -651,6 +651,7 @@ struct br_mdb_entry {
 		union {
 			__be32	ip4;
 			struct in6_addr ip6;
+			unsigned char mac_addr[ETH_ALEN];
 		} u;
 		__be16		proto;
 	} addr;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 9b5d62744acc..2400a66fe76e 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -93,7 +93,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		mdst = br_mdb_get(br, skb, vid);
 		if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
-		    br_multicast_querier_exists(br, eth_hdr(skb)))
+		    br_multicast_querier_exists(br, eth_hdr(skb), mdst))
 			br_multicast_flood(mdst, skb, false, true);
 		else
 			br_flood(br, skb, BR_PKT_MULTICAST, false, true);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index bece03bf83c4..21808985f268 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -134,7 +134,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
 	case BR_PKT_MULTICAST:
 		mdst = br_mdb_get(br, skb, vid);
 		if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
-		    br_multicast_querier_exists(br, eth_hdr(skb))) {
+		    br_multicast_querier_exists(br, eth_hdr(skb), mdst)) {
 			if ((mdst && mdst->host_joined) ||
 			    br_multicast_is_router(br)) {
 				local_rcv = true;
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index e15bab19a012..3c8863418d0b 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -87,6 +87,8 @@ static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip,
 			ip->src.ip6 = nla_get_in6_addr(mdb_attrs[MDBE_ATTR_SOURCE]);
 		break;
 #endif
+	default:
+		ether_addr_copy(ip->dst.mac_addr, entry->addr.u.mac_addr);
 	}
 
 }
@@ -174,9 +176,11 @@ static int __mdb_fill_info(struct sk_buff *skb,
 	if (mp->addr.proto == htons(ETH_P_IP))
 		e.addr.u.ip4 = mp->addr.dst.ip4;
 #if IS_ENABLED(CONFIG_IPV6)
-	if (mp->addr.proto == htons(ETH_P_IPV6))
+	else if (mp->addr.proto == htons(ETH_P_IPV6))
 		e.addr.u.ip6 = mp->addr.dst.ip6;
 #endif
+	else
+		ether_addr_copy(e.addr.u.mac_addr, mp->addr.dst.mac_addr);
 	e.addr.proto = mp->addr.proto;
 	nest_ent = nla_nest_start_noflag(skb,
 					 MDBA_MDB_ENTRY_INFO);
@@ -210,6 +214,8 @@ static int __mdb_fill_info(struct sk_buff *skb,
 		}
 		break;
 #endif
+	default:
+		ether_addr_copy(e.addr.u.mac_addr, mp->addr.dst.mac_addr);
 	}
 	if (p) {
 		if (nla_put_u8(skb, MDBA_MDB_EATTR_RTPROT, p->rt_protocol))
@@ -562,9 +568,12 @@ void br_mdb_notify(struct net_device *dev,
 		if (mp->addr.proto == htons(ETH_P_IP))
 			ip_eth_mc_map(mp->addr.dst.ip4, mdb.addr);
 #if IS_ENABLED(CONFIG_IPV6)
-		else
+		else if (mp->addr.proto == htons(ETH_P_IPV6))
 			ipv6_eth_mc_map(&mp->addr.dst.ip6, mdb.addr);
 #endif
+		else
+			ether_addr_copy(mdb.addr, mp->addr.dst.mac_addr);
+
 		mdb.obj.orig_dev = pg->key.port->dev;
 		switch (type) {
 		case RTM_NEWMDB:
@@ -693,6 +702,12 @@ static bool is_valid_mdb_entry(struct br_mdb_entry *entry,
 			return false;
 		}
 #endif
+	} else if (entry->addr.proto == 0) {
+		/* L2 mdb */
+		if (!is_multicast_ether_addr(entry->addr.u.mac_addr)) {
+			NL_SET_ERR_MSG_MOD(extack, "L2 entry group is not multicast");
+			return false;
+		}
 	} else {
 		NL_SET_ERR_MSG_MOD(extack, "Unknown entry protocol");
 		return false;
@@ -849,6 +864,11 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
 		}
 	}
 
+	if (br_group_is_l2(&group) && entry->state != MDB_PERMANENT) {
+		NL_SET_ERR_MSG_MOD(extack, "Only permanent L2 entries allowed");
+		return -EINVAL;
+	}
+
 	mp = br_mdb_ip_get(br, &group);
 	if (!mp) {
 		mp = br_multicast_new_group(br, &group);
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index eae898c3cff7..484820c223a3 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -179,7 +179,8 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
 		break;
 #endif
 	default:
-		return NULL;
+		ip.proto = 0;
+		ether_addr_copy(ip.dst.mac_addr, eth_hdr(skb)->h_dest);
 	}
 
 	return br_mdb_ip_get_rcu(br, &ip);
@@ -1203,6 +1204,10 @@ void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify)
 		if (notify)
 			br_mdb_notify(mp->br->dev, mp, NULL, RTM_NEWMDB);
 	}
+
+	if (br_group_is_l2(&mp->addr))
+		return;
+
 	mod_timer(&mp->timer, jiffies + mp->br->multicast_membership_interval);
 }
 
@@ -1254,8 +1259,8 @@ __br_multicast_add_group(struct net_bridge *br,
 			break;
 	}
 
-	p = br_multicast_new_port_group(port, group, *pp, 0, src, filter_mode,
-					RTPROT_KERNEL);
+	p = br_multicast_new_port_group(port, group, *pp, 0, src,
+					filter_mode, RTPROT_KERNEL);
 	if (unlikely(!p)) {
 		p = ERR_PTR(-ENOMEM);
 		goto out;
@@ -3690,7 +3695,7 @@ bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto)
 	memset(&eth, 0, sizeof(eth));
 	eth.h_proto = htons(proto);
 
-	ret = br_multicast_querier_exists(br, &eth);
+	ret = br_multicast_querier_exists(br, &eth, NULL);
 
 unlock:
 	rcu_read_unlock();
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 905d406a2fc7..4c691c371884 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -854,6 +854,11 @@ void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg,
 void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp,
 				       struct net_bridge_port_group *sg);
 
+static inline bool br_group_is_l2(const struct br_ip *group)
+{
+	return group->proto == 0;
+}
+
 #define mlock_dereference(X, br) \
 	rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
 
@@ -885,7 +890,8 @@ __br_multicast_querier_exists(struct net_bridge *br,
 }
 
 static inline bool br_multicast_querier_exists(struct net_bridge *br,
-					       struct ethhdr *eth)
+					       struct ethhdr *eth,
+					       const struct net_bridge_mdb_entry *mdb)
 {
 	switch (eth->h_proto) {
 	case (htons(ETH_P_IP)):
@@ -897,7 +903,7 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br,
 			&br->ip6_other_query, true);
 #endif
 	default:
-		return false;
+		return !!mdb && br_group_is_l2(&mdb->addr);
 	}
 }
 
-- 
cgit v1.2.3


From ccf0a4b7fc688561428290265e4effde41446668 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@netfilter.org>
Date: Thu, 29 Oct 2020 16:39:48 +0100
Subject: netfilter: ipset: Add bucketsize parameter to all hash types

The parameter defines the upper limit in any hash bucket at adding new entries
from userspace - if the limit would be exceeded, ipset doubles the hash size
and rehashes. It means the set may consume more memory but gives faster
evaluation at matching in the set.

Signed-off-by: Jozsef Kadlecsik <kadlec@netfilter.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/ipset/ip_set.h       |  5 ++++
 include/uapi/linux/netfilter/ipset/ip_set.h  |  4 ++-
 net/netfilter/ipset/ip_set_core.c            |  2 ++
 net/netfilter/ipset/ip_set_hash_gen.h        | 38 +++++++++++++++++-----------
 net/netfilter/ipset/ip_set_hash_ip.c         |  6 +++--
 net/netfilter/ipset/ip_set_hash_ipmac.c      |  5 ++--
 net/netfilter/ipset/ip_set_hash_ipmark.c     |  6 +++--
 net/netfilter/ipset/ip_set_hash_ipport.c     |  6 +++--
 net/netfilter/ipset/ip_set_hash_ipportip.c   |  6 +++--
 net/netfilter/ipset/ip_set_hash_ipportnet.c  |  6 +++--
 net/netfilter/ipset/ip_set_hash_mac.c        |  5 ++--
 net/netfilter/ipset/ip_set_hash_net.c        |  6 +++--
 net/netfilter/ipset/ip_set_hash_netiface.c   |  6 +++--
 net/netfilter/ipset/ip_set_hash_netnet.c     |  6 +++--
 net/netfilter/ipset/ip_set_hash_netport.c    |  6 +++--
 net/netfilter/ipset/ip_set_hash_netportnet.c |  6 +++--
 16 files changed, 79 insertions(+), 40 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index ab192720e2d6..46d9a0c26c67 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -198,6 +198,9 @@ struct ip_set_region {
 	u32 elements;		/* Number of elements vs timeout */
 };
 
+/* The max revision number supported by any set type + 1 */
+#define IPSET_REVISION_MAX	9
+
 /* The core set type structure */
 struct ip_set_type {
 	struct list_head list;
@@ -215,6 +218,8 @@ struct ip_set_type {
 	u8 family;
 	/* Type revisions */
 	u8 revision_min, revision_max;
+	/* Revision-specific supported (create) flags */
+	u8 create_flags[IPSET_REVISION_MAX+1];
 	/* Set features to control swapping */
 	u16 features;
 
diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
index 11a72a938eb1..398f7b909b7d 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -96,7 +96,7 @@ enum {
 	IPSET_ATTR_HASHSIZE,
 	IPSET_ATTR_MAXELEM,
 	IPSET_ATTR_NETMASK,
-	IPSET_ATTR_PROBES,
+	IPSET_ATTR_BUCKETSIZE,	/* was unused IPSET_ATTR_PROBES */
 	IPSET_ATTR_RESIZE,
 	IPSET_ATTR_SIZE,
 	/* Kernel-only */
@@ -214,6 +214,8 @@ enum ipset_cadt_flags {
 enum ipset_create_flags {
 	IPSET_CREATE_FLAG_BIT_FORCEADD = 0,
 	IPSET_CREATE_FLAG_FORCEADD = (1 << IPSET_CREATE_FLAG_BIT_FORCEADD),
+	IPSET_CREATE_FLAG_BIT_BUCKETSIZE = 1,
+	IPSET_CREATE_FLAG_BUCKETSIZE = (1 << IPSET_CREATE_FLAG_BIT_BUCKETSIZE),
 	IPSET_CREATE_FLAG_BIT_MAX = 7,
 };
 
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index e3c00dacec5c..e76bfca2d3ef 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1109,6 +1109,8 @@ static int ip_set_create(struct net *net, struct sock *ctnl,
 		ret = -IPSET_ERR_PROTOCOL;
 		goto put_out;
 	}
+	/* Set create flags depending on the type revision */
+	set->flags |= set->type->create_flags[revision];
 
 	ret = set->type->create(net, set, tb, flags);
 	if (ret != 0)
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 521e970be402..4e3544442b26 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -37,18 +37,18 @@
  */
 
 /* Number of elements to store in an initial array block */
-#define AHASH_INIT_SIZE			4
+#define AHASH_INIT_SIZE			2
 /* Max number of elements to store in an array block */
-#define AHASH_MAX_SIZE			(3 * AHASH_INIT_SIZE)
+#define AHASH_MAX_SIZE			(6 * AHASH_INIT_SIZE)
 /* Max muber of elements in the array block when tuned */
 #define AHASH_MAX_TUNED			64
 
+#define AHASH_MAX(h)			((h)->bucketsize)
+
 /* Max number of elements can be tuned */
 #ifdef IP_SET_HASH_WITH_MULTI
-#define AHASH_MAX(h)			((h)->ahash_max)
-
 static u8
-tune_ahash_max(u8 curr, u32 multi)
+tune_bucketsize(u8 curr, u32 multi)
 {
 	u32 n;
 
@@ -61,12 +61,10 @@ tune_ahash_max(u8 curr, u32 multi)
 	 */
 	return n > curr && n <= AHASH_MAX_TUNED ? n : curr;
 }
-
-#define TUNE_AHASH_MAX(h, multi)	\
-	((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi))
+#define TUNE_BUCKETSIZE(h, multi)	\
+	((h)->bucketsize = tune_bucketsize((h)->bucketsize, multi))
 #else
-#define AHASH_MAX(h)			AHASH_MAX_SIZE
-#define TUNE_AHASH_MAX(h, multi)
+#define TUNE_BUCKETSIZE(h, multi)
 #endif
 
 /* A hash bucket */
@@ -321,9 +319,7 @@ struct htype {
 #ifdef IP_SET_HASH_WITH_MARKMASK
 	u32 markmask;		/* markmask value for mark mask to store */
 #endif
-#ifdef IP_SET_HASH_WITH_MULTI
-	u8 ahash_max;		/* max elements in an array block */
-#endif
+	u8 bucketsize;		/* max elements in an array block */
 #ifdef IP_SET_HASH_WITH_NETMASK
 	u8 netmask;		/* netmask value for subnets to store */
 #endif
@@ -950,7 +946,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 		goto set_full;
 	/* Create a new slot */
 	if (n->pos >= n->size) {
-		TUNE_AHASH_MAX(h, multi);
+		TUNE_BUCKETSIZE(h, multi);
 		if (n->size >= AHASH_MAX(h)) {
 			/* Trigger rehashing */
 			mtype_data_next(&h->next, d);
@@ -1305,6 +1301,9 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
 	if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask))
 		goto nla_put_failure;
 #endif
+	if (set->flags & IPSET_CREATE_FLAG_BUCKETSIZE &&
+	    nla_put_u8(skb, IPSET_ATTR_BUCKETSIZE, h->bucketsize))
+		goto nla_put_failure;
 	if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
 	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
 	    nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(elements)))
@@ -1548,7 +1547,16 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
 	h->markmask = markmask;
 #endif
 	get_random_bytes(&h->initval, sizeof(h->initval));
-
+	h->bucketsize = AHASH_MAX_SIZE;
+	if (tb[IPSET_ATTR_BUCKETSIZE]) {
+		h->bucketsize = nla_get_u8(tb[IPSET_ATTR_BUCKETSIZE]);
+		if (h->bucketsize < AHASH_INIT_SIZE)
+			h->bucketsize = AHASH_INIT_SIZE;
+		else if (h->bucketsize > AHASH_MAX_SIZE)
+			h->bucketsize = AHASH_MAX_SIZE;
+		else if (h->bucketsize % 2)
+			h->bucketsize += 1;
+	}
 	t->htable_bits = hbits;
 	t->maxelem = h->maxelem / ahash_numof_locks(hbits);
 	RCU_INIT_POINTER(h->table, t);
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index 5d6d68eaf6a9..0495d515c498 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -23,7 +23,8 @@
 /*				1	   Counters support */
 /*				2	   Comments support */
 /*				3	   Forceadd support */
-#define IPSET_TYPE_REV_MAX	4	/* skbinfo support  */
+/*				4	   skbinfo support */
+#define IPSET_TYPE_REV_MAX	5	/* bucketsize support  */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
@@ -277,11 +278,12 @@ static struct ip_set_type hash_ip_type __read_mostly = {
 	.family		= NFPROTO_UNSPEC,
 	.revision_min	= IPSET_TYPE_REV_MIN,
 	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
 	.create		= hash_ip_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
 		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
-		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },
+		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
 		[IPSET_ATTR_NETMASK]	= { .type = NLA_U8  },
diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c
index eceb7bc4a93a..2655501f9fe3 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmac.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmac.c
@@ -23,7 +23,7 @@
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
 #define IPSET_TYPE_REV_MIN	0
-#define IPSET_TYPE_REV_MAX	0
+#define IPSET_TYPE_REV_MAX	1	/* bucketsize support  */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Tomasz Chilinski <tomasz.chilinski@chilan.com>");
@@ -268,11 +268,12 @@ static struct ip_set_type hash_ipmac_type __read_mostly = {
 	.family		= NFPROTO_UNSPEC,
 	.revision_min	= IPSET_TYPE_REV_MIN,
 	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
 	.create		= hash_ipmac_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
 		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
-		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },
+		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
 		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index aba1df617d6e..5bbed85d0e47 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -21,7 +21,8 @@
 
 #define IPSET_TYPE_REV_MIN	0
 /*				1	   Forceadd support */
-#define IPSET_TYPE_REV_MAX	2	/* skbinfo support  */
+/*				2	   skbinfo support */
+#define IPSET_TYPE_REV_MAX	3	/* bucketsize support  */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Vytas Dauksa <vytas.dauksa@smoothwall.net>");
@@ -274,12 +275,13 @@ static struct ip_set_type hash_ipmark_type __read_mostly = {
 	.family		= NFPROTO_UNSPEC,
 	.revision_min	= IPSET_TYPE_REV_MIN,
 	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
 	.create		= hash_ipmark_create,
 	.create_policy	= {
 		[IPSET_ATTR_MARKMASK]	= { .type = NLA_U32 },
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
 		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
-		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },
+		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
 		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 1ff228717e29..c1ac2e89e2d3 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -25,7 +25,8 @@
 /*				2    Counters support added */
 /*				3    Comments support added */
 /*				4    Forceadd support added */
-#define IPSET_TYPE_REV_MAX	5 /* skbinfo support added */
+/*				5    skbinfo support added */
+#define IPSET_TYPE_REV_MAX	6 /* bucketsize support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
@@ -341,11 +342,12 @@ static struct ip_set_type hash_ipport_type __read_mostly = {
 	.family		= NFPROTO_UNSPEC,
 	.revision_min	= IPSET_TYPE_REV_MIN,
 	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
 	.create		= hash_ipport_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
 		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
-		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },
+		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index fa88afd812fa..d3f4a672986e 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -25,7 +25,8 @@
 /*				2    Counters support added */
 /*				3    Comments support added */
 /*				4    Forceadd support added */
-#define IPSET_TYPE_REV_MAX	5 /* skbinfo support added */
+/*				5    skbinfo support added */
+#define IPSET_TYPE_REV_MAX	6 /* bucketsize support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
@@ -356,11 +357,12 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {
 	.family		= NFPROTO_UNSPEC,
 	.revision_min	= IPSET_TYPE_REV_MIN,
 	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
 	.create		= hash_ipportip_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
 		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
-		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },
+		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
 		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index eef6ecfcb409..8f7fe360736a 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -27,7 +27,8 @@
 /*				4    Counters support added */
 /*				5    Comments support added */
 /*				6    Forceadd support added */
-#define IPSET_TYPE_REV_MAX	7 /* skbinfo support added */
+/*				7    skbinfo support added */
+#define IPSET_TYPE_REV_MAX	8 /* bucketsize support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
@@ -513,11 +514,12 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {
 	.family		= NFPROTO_UNSPEC,
 	.revision_min	= IPSET_TYPE_REV_MIN,
 	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
 	.create		= hash_ipportnet_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
 		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
-		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },
+		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
 		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c
index 0b61593165ef..00dd7e20df3c 100644
--- a/net/netfilter/ipset/ip_set_hash_mac.c
+++ b/net/netfilter/ipset/ip_set_hash_mac.c
@@ -16,7 +16,7 @@
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
 #define IPSET_TYPE_REV_MIN	0
-#define IPSET_TYPE_REV_MAX	0
+#define IPSET_TYPE_REV_MAX	1	/* bucketsize support */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
@@ -125,11 +125,12 @@ static struct ip_set_type hash_mac_type __read_mostly = {
 	.family		= NFPROTO_UNSPEC,
 	.revision_min	= IPSET_TYPE_REV_MIN,
 	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
 	.create		= hash_mac_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
 		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
-		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },
+		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
 		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 136cf0781d3a..d366e816b6ed 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -24,7 +24,8 @@
 /*				3    Counters support added */
 /*				4    Comments support added */
 /*				5    Forceadd support added */
-#define IPSET_TYPE_REV_MAX	6 /* skbinfo mapping support added */
+/*				6    skbinfo support added */
+#define IPSET_TYPE_REV_MAX	7 /* bucketsize support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
@@ -354,11 +355,12 @@ static struct ip_set_type hash_net_type __read_mostly = {
 	.family		= NFPROTO_UNSPEC,
 	.revision_min	= IPSET_TYPE_REV_MIN,
 	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
 	.create		= hash_net_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
 		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
-		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },
+		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
 		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index be5e95a0d876..38b1d77584d4 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -26,7 +26,8 @@
 /*				4    Comments support added */
 /*				5    Forceadd support added */
 /*				6    skbinfo support added */
-#define IPSET_TYPE_REV_MAX	7 /* interface wildcard support added */
+/*				7    interface wildcard support added */
+#define IPSET_TYPE_REV_MAX	8 /* bucketsize support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
@@ -470,11 +471,12 @@ static struct ip_set_type hash_netiface_type __read_mostly = {
 	.family		= NFPROTO_UNSPEC,
 	.revision_min	= IPSET_TYPE_REV_MIN,
 	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
 	.create		= hash_netiface_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
 		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
-		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },
+		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index da4ef910b12d..0cc7970f36e9 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -22,7 +22,8 @@
 
 #define IPSET_TYPE_REV_MIN	0
 /*				1	   Forceadd support added */
-#define IPSET_TYPE_REV_MAX	2	/* skbinfo support added */
+/*				2	   skbinfo support added */
+#define IPSET_TYPE_REV_MAX	3	/* bucketsize support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
@@ -459,11 +460,12 @@ static struct ip_set_type hash_netnet_type __read_mostly = {
 	.family		= NFPROTO_UNSPEC,
 	.revision_min	= IPSET_TYPE_REV_MIN,
 	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
 	.create		= hash_netnet_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
 		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
-		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },
+		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
 		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 34448df80fb9..b356d7d85e34 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -26,7 +26,8 @@
 /*				4    Counters support added */
 /*				5    Comments support added */
 /*				6    Forceadd support added */
-#define IPSET_TYPE_REV_MAX	7 /* skbinfo support added */
+/*				7    skbinfo support added */
+#define IPSET_TYPE_REV_MAX	8 /* bucketsize support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
@@ -460,11 +461,12 @@ static struct ip_set_type hash_netport_type __read_mostly = {
 	.family		= NFPROTO_UNSPEC,
 	.revision_min	= IPSET_TYPE_REV_MIN,
 	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
 	.create		= hash_netport_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
 		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
-		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },
+		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 934c1712cba8..eeb39688f26f 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -23,7 +23,8 @@
 #define IPSET_TYPE_REV_MIN	0
 /*				0    Comments support added */
 /*				1    Forceadd support added */
-#define IPSET_TYPE_REV_MAX	2 /* skbinfo support added */
+/*				2    skbinfo support added */
+#define IPSET_TYPE_REV_MAX	3 /* bucketsize support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
@@ -558,11 +559,12 @@ static struct ip_set_type hash_netportnet_type __read_mostly = {
 	.family		= NFPROTO_UNSPEC,
 	.revision_min	= IPSET_TYPE_REV_MIN,
 	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE,
 	.create		= hash_netportnet_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
 		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
-		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },
+		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
 		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
-- 
cgit v1.2.3


From 3976ca101990ca11ddf51f38bec7b86c19d0ca6f Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@netfilter.org>
Date: Thu, 29 Oct 2020 16:39:49 +0100
Subject: netfilter: ipset: Expose the initval hash parameter to userspace

It makes possible to reproduce exactly the same set after a save/restore.

Signed-off-by: Jozsef Kadlecsik <kadlec@netfilter.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/ipset/ip_set.h  |  2 +-
 net/netfilter/ipset/ip_set_hash_gen.h        | 13 +++++++++----
 net/netfilter/ipset/ip_set_hash_ip.c         |  3 ++-
 net/netfilter/ipset/ip_set_hash_ipmac.c      |  3 ++-
 net/netfilter/ipset/ip_set_hash_ipmark.c     |  3 ++-
 net/netfilter/ipset/ip_set_hash_ipport.c     |  3 ++-
 net/netfilter/ipset/ip_set_hash_ipportip.c   |  3 ++-
 net/netfilter/ipset/ip_set_hash_ipportnet.c  |  3 ++-
 net/netfilter/ipset/ip_set_hash_mac.c        |  3 ++-
 net/netfilter/ipset/ip_set_hash_net.c        |  3 ++-
 net/netfilter/ipset/ip_set_hash_netiface.c   |  3 ++-
 net/netfilter/ipset/ip_set_hash_netnet.c     |  3 ++-
 net/netfilter/ipset/ip_set_hash_netport.c    |  3 ++-
 net/netfilter/ipset/ip_set_hash_netportnet.c |  3 ++-
 14 files changed, 34 insertions(+), 17 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
index 398f7b909b7d..6397d75899bc 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -92,7 +92,7 @@ enum {
 	/* Reserve empty slots */
 	IPSET_ATTR_CADT_MAX = 16,
 	/* Create-only specific attributes */
-	IPSET_ATTR_GC,
+	IPSET_ATTR_INITVAL,	/* was unused IPSET_ATTR_GC */
 	IPSET_ATTR_HASHSIZE,
 	IPSET_ATTR_MAXELEM,
 	IPSET_ATTR_NETMASK,
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 4e3544442b26..5f1208ad049e 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -1301,9 +1301,11 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
 	if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask))
 		goto nla_put_failure;
 #endif
-	if (set->flags & IPSET_CREATE_FLAG_BUCKETSIZE &&
-	    nla_put_u8(skb, IPSET_ATTR_BUCKETSIZE, h->bucketsize))
-		goto nla_put_failure;
+	if (set->flags & IPSET_CREATE_FLAG_BUCKETSIZE) {
+		if (nla_put_u8(skb, IPSET_ATTR_BUCKETSIZE, h->bucketsize) ||
+		    nla_put_net32(skb, IPSET_ATTR_INITVAL, htonl(h->initval)))
+			goto nla_put_failure;
+	}
 	if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
 	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
 	    nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(elements)))
@@ -1546,7 +1548,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
 #ifdef IP_SET_HASH_WITH_MARKMASK
 	h->markmask = markmask;
 #endif
-	get_random_bytes(&h->initval, sizeof(h->initval));
+	if (tb[IPSET_ATTR_INITVAL])
+		h->initval = ntohl(nla_get_be32(tb[IPSET_ATTR_INITVAL]));
+	else
+		get_random_bytes(&h->initval, sizeof(h->initval));
 	h->bucketsize = AHASH_MAX_SIZE;
 	if (tb[IPSET_ATTR_BUCKETSIZE]) {
 		h->bucketsize = nla_get_u8(tb[IPSET_ATTR_BUCKETSIZE]);
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index 0495d515c498..d1bef23fd4f5 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -24,7 +24,7 @@
 /*				2	   Comments support */
 /*				3	   Forceadd support */
 /*				4	   skbinfo support */
-#define IPSET_TYPE_REV_MAX	5	/* bucketsize support  */
+#define IPSET_TYPE_REV_MAX	5	/* bucketsize, initval support  */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
@@ -283,6 +283,7 @@ static struct ip_set_type hash_ip_type __read_mostly = {
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
 		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_INITVAL]	= { .type = NLA_U32 },
 		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c
index 2655501f9fe3..467c59a83c0a 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmac.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmac.c
@@ -23,7 +23,7 @@
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
 #define IPSET_TYPE_REV_MIN	0
-#define IPSET_TYPE_REV_MAX	1	/* bucketsize support  */
+#define IPSET_TYPE_REV_MAX	1	/* bucketsize, initval support  */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Tomasz Chilinski <tomasz.chilinski@chilan.com>");
@@ -273,6 +273,7 @@ static struct ip_set_type hash_ipmac_type __read_mostly = {
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
 		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_INITVAL]	= { .type = NLA_U32 },
 		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index 5bbed85d0e47..18346d18aa16 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -22,7 +22,7 @@
 #define IPSET_TYPE_REV_MIN	0
 /*				1	   Forceadd support */
 /*				2	   skbinfo support */
-#define IPSET_TYPE_REV_MAX	3	/* bucketsize support  */
+#define IPSET_TYPE_REV_MAX	3	/* bucketsize, initval support  */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Vytas Dauksa <vytas.dauksa@smoothwall.net>");
@@ -281,6 +281,7 @@ static struct ip_set_type hash_ipmark_type __read_mostly = {
 		[IPSET_ATTR_MARKMASK]	= { .type = NLA_U32 },
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
 		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_INITVAL]	= { .type = NLA_U32 },
 		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index c1ac2e89e2d3..e1ca11196515 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -26,7 +26,7 @@
 /*				3    Comments support added */
 /*				4    Forceadd support added */
 /*				5    skbinfo support added */
-#define IPSET_TYPE_REV_MAX	6 /* bucketsize support added */
+#define IPSET_TYPE_REV_MAX	6 /* bucketsize, initval support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
@@ -347,6 +347,7 @@ static struct ip_set_type hash_ipport_type __read_mostly = {
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
 		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_INITVAL]	= { .type = NLA_U32 },
 		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index d3f4a672986e..ab179e064597 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -26,7 +26,7 @@
 /*				3    Comments support added */
 /*				4    Forceadd support added */
 /*				5    skbinfo support added */
-#define IPSET_TYPE_REV_MAX	6 /* bucketsize support added */
+#define IPSET_TYPE_REV_MAX	6 /* bucketsize, initval support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
@@ -362,6 +362,7 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
 		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_INITVAL]	= { .type = NLA_U32 },
 		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 8f7fe360736a..8f075b44cf64 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -28,7 +28,7 @@
 /*				5    Comments support added */
 /*				6    Forceadd support added */
 /*				7    skbinfo support added */
-#define IPSET_TYPE_REV_MAX	8 /* bucketsize support added */
+#define IPSET_TYPE_REV_MAX	8 /* bucketsize, initval support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
@@ -519,6 +519,7 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
 		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_INITVAL]	= { .type = NLA_U32 },
 		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c
index 00dd7e20df3c..718814730acf 100644
--- a/net/netfilter/ipset/ip_set_hash_mac.c
+++ b/net/netfilter/ipset/ip_set_hash_mac.c
@@ -16,7 +16,7 @@
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
 #define IPSET_TYPE_REV_MIN	0
-#define IPSET_TYPE_REV_MAX	1	/* bucketsize support */
+#define IPSET_TYPE_REV_MAX	1	/* bucketsize, initval support */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
@@ -130,6 +130,7 @@ static struct ip_set_type hash_mac_type __read_mostly = {
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
 		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_INITVAL]	= { .type = NLA_U32 },
 		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index d366e816b6ed..c1a11f041ac6 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -25,7 +25,7 @@
 /*				4    Comments support added */
 /*				5    Forceadd support added */
 /*				6    skbinfo support added */
-#define IPSET_TYPE_REV_MAX	7 /* bucketsize support added */
+#define IPSET_TYPE_REV_MAX	7 /* bucketsize, initval support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
@@ -360,6 +360,7 @@ static struct ip_set_type hash_net_type __read_mostly = {
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
 		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_INITVAL]	= { .type = NLA_U32 },
 		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 38b1d77584d4..3d74169b794c 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -27,7 +27,7 @@
 /*				5    Forceadd support added */
 /*				6    skbinfo support added */
 /*				7    interface wildcard support added */
-#define IPSET_TYPE_REV_MAX	8 /* bucketsize support added */
+#define IPSET_TYPE_REV_MAX	8 /* bucketsize, initval support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
@@ -476,6 +476,7 @@ static struct ip_set_type hash_netiface_type __read_mostly = {
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
 		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_INITVAL]	= { .type = NLA_U32 },
 		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index 0cc7970f36e9..6532f0505e66 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -23,7 +23,7 @@
 #define IPSET_TYPE_REV_MIN	0
 /*				1	   Forceadd support added */
 /*				2	   skbinfo support added */
-#define IPSET_TYPE_REV_MAX	3	/* bucketsize support added */
+#define IPSET_TYPE_REV_MAX	3	/* bucketsize, initval support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
@@ -465,6 +465,7 @@ static struct ip_set_type hash_netnet_type __read_mostly = {
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
 		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_INITVAL]	= { .type = NLA_U32 },
 		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index b356d7d85e34..ec1564a1cb5a 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -27,7 +27,7 @@
 /*				5    Comments support added */
 /*				6    Forceadd support added */
 /*				7    skbinfo support added */
-#define IPSET_TYPE_REV_MAX	8 /* bucketsize support added */
+#define IPSET_TYPE_REV_MAX	8 /* bucketsize, initval support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
@@ -466,6 +466,7 @@ static struct ip_set_type hash_netport_type __read_mostly = {
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
 		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_INITVAL]	= { .type = NLA_U32 },
 		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index eeb39688f26f..0e91d1e82f1c 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -24,7 +24,7 @@
 /*				0    Comments support added */
 /*				1    Forceadd support added */
 /*				2    skbinfo support added */
-#define IPSET_TYPE_REV_MAX	3 /* bucketsize support added */
+#define IPSET_TYPE_REV_MAX	3 /* bucketsize, initval support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
@@ -564,6 +564,7 @@ static struct ip_set_type hash_netportnet_type __read_mostly = {
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
 		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_INITVAL]	= { .type = NLA_U32 },
 		[IPSET_ATTR_BUCKETSIZE]	= { .type = NLA_U8 },
 		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
 		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
-- 
cgit v1.2.3


From b59e286be280fa3c2e94a0716ddcee6ba02bc8ba Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 27 Oct 2020 20:33:12 +0800
Subject: ICMPv6: Add ICMPv6 Parameter Problem, code 3 definition

Based on RFC7112, Section 6:

   IANA has added the following "Type 4 - Parameter Problem" message to
   the "Internet Control Message Protocol version 6 (ICMPv6) Parameters"
   registry:

      CODE     NAME/DESCRIPTION
       3       IPv6 First Fragment has incomplete IPv6 Header Chain

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/icmpv6.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/icmpv6.h b/include/uapi/linux/icmpv6.h
index c1661febc2dc..0564fd7ccde4 100644
--- a/include/uapi/linux/icmpv6.h
+++ b/include/uapi/linux/icmpv6.h
@@ -138,6 +138,7 @@ struct icmp6hdr {
 #define ICMPV6_HDR_FIELD		0
 #define ICMPV6_UNK_NEXTHDR		1
 #define ICMPV6_UNK_OPTION		2
+#define ICMPV6_HDR_INCOMP		3
 
 /*
  *	constants for (set|get)sockopt
-- 
cgit v1.2.3


From fa4320cefb8537a70cc28c55d311a1f569697cd3 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Mon, 2 Nov 2020 14:21:31 +0800
Subject: f2fs: move ioctl interface definitions to separated file

Like other filesystem does, we introduce a new file f2fs.h in path of
include/uapi/linux/, and move f2fs-specified ioctl interface definitions
to that file, after then, in order to use those definitions, userspace
developer only need to include the new header file rather than
copy & paste definitions from fs/f2fs/f2fs.h.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 MAINTAINERS                 |  1 +
 fs/f2fs/f2fs.h              | 79 ----------------------------------------
 fs/f2fs/file.c              |  1 +
 include/trace/events/f2fs.h |  1 +
 include/uapi/linux/f2fs.h   | 87 +++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 90 insertions(+), 79 deletions(-)
 create mode 100644 include/uapi/linux/f2fs.h

(limited to 'include/uapi')

diff --git a/MAINTAINERS b/MAINTAINERS
index b516bb34a8d5..13d8fbd74d72 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6681,6 +6681,7 @@ F:	Documentation/filesystems/f2fs.rst
 F:	fs/f2fs/
 F:	include/linux/f2fs_fs.h
 F:	include/trace/events/f2fs.h
+F:	include/uapi/linux/f2fs.h
 
 F71805F HARDWARE MONITORING DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index cb700d797296..99bcf4b44a9c 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -402,85 +402,6 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
 	return size <= MAX_SIT_JENTRIES(journal);
 }
 
-/*
- * f2fs-specific ioctl commands
- */
-#define F2FS_IOCTL_MAGIC		0xf5
-#define F2FS_IOC_START_ATOMIC_WRITE	_IO(F2FS_IOCTL_MAGIC, 1)
-#define F2FS_IOC_COMMIT_ATOMIC_WRITE	_IO(F2FS_IOCTL_MAGIC, 2)
-#define F2FS_IOC_START_VOLATILE_WRITE	_IO(F2FS_IOCTL_MAGIC, 3)
-#define F2FS_IOC_RELEASE_VOLATILE_WRITE	_IO(F2FS_IOCTL_MAGIC, 4)
-#define F2FS_IOC_ABORT_VOLATILE_WRITE	_IO(F2FS_IOCTL_MAGIC, 5)
-#define F2FS_IOC_GARBAGE_COLLECT	_IOW(F2FS_IOCTL_MAGIC, 6, __u32)
-#define F2FS_IOC_WRITE_CHECKPOINT	_IO(F2FS_IOCTL_MAGIC, 7)
-#define F2FS_IOC_DEFRAGMENT		_IOWR(F2FS_IOCTL_MAGIC, 8,	\
-						struct f2fs_defragment)
-#define F2FS_IOC_MOVE_RANGE		_IOWR(F2FS_IOCTL_MAGIC, 9,	\
-						struct f2fs_move_range)
-#define F2FS_IOC_FLUSH_DEVICE		_IOW(F2FS_IOCTL_MAGIC, 10,	\
-						struct f2fs_flush_device)
-#define F2FS_IOC_GARBAGE_COLLECT_RANGE	_IOW(F2FS_IOCTL_MAGIC, 11,	\
-						struct f2fs_gc_range)
-#define F2FS_IOC_GET_FEATURES		_IOR(F2FS_IOCTL_MAGIC, 12, __u32)
-#define F2FS_IOC_SET_PIN_FILE		_IOW(F2FS_IOCTL_MAGIC, 13, __u32)
-#define F2FS_IOC_GET_PIN_FILE		_IOR(F2FS_IOCTL_MAGIC, 14, __u32)
-#define F2FS_IOC_PRECACHE_EXTENTS	_IO(F2FS_IOCTL_MAGIC, 15)
-#define F2FS_IOC_RESIZE_FS		_IOW(F2FS_IOCTL_MAGIC, 16, __u64)
-#define F2FS_IOC_GET_COMPRESS_BLOCKS	_IOR(F2FS_IOCTL_MAGIC, 17, __u64)
-#define F2FS_IOC_RELEASE_COMPRESS_BLOCKS				\
-					_IOR(F2FS_IOCTL_MAGIC, 18, __u64)
-#define F2FS_IOC_RESERVE_COMPRESS_BLOCKS				\
-					_IOR(F2FS_IOCTL_MAGIC, 19, __u64)
-#define F2FS_IOC_SEC_TRIM_FILE		_IOW(F2FS_IOCTL_MAGIC, 20,	\
-						struct f2fs_sectrim_range)
-
-/*
- * should be same as XFS_IOC_GOINGDOWN.
- * Flags for going down operation used by FS_IOC_GOINGDOWN
- */
-#define F2FS_IOC_SHUTDOWN	_IOR('X', 125, __u32)	/* Shutdown */
-#define F2FS_GOING_DOWN_FULLSYNC	0x0	/* going down with full sync */
-#define F2FS_GOING_DOWN_METASYNC	0x1	/* going down with metadata */
-#define F2FS_GOING_DOWN_NOSYNC		0x2	/* going down */
-#define F2FS_GOING_DOWN_METAFLUSH	0x3	/* going down with meta flush */
-#define F2FS_GOING_DOWN_NEED_FSCK	0x4	/* going down to trigger fsck */
-
-/*
- * Flags used by F2FS_IOC_SEC_TRIM_FILE
- */
-#define F2FS_TRIM_FILE_DISCARD		0x1	/* send discard command */
-#define F2FS_TRIM_FILE_ZEROOUT		0x2	/* zero out */
-#define F2FS_TRIM_FILE_MASK		0x3
-
-struct f2fs_gc_range {
-	u32 sync;
-	u64 start;
-	u64 len;
-};
-
-struct f2fs_defragment {
-	u64 start;
-	u64 len;
-};
-
-struct f2fs_move_range {
-	u32 dst_fd;		/* destination fd */
-	u64 pos_in;		/* start position in src_fd */
-	u64 pos_out;		/* start position in dst_fd */
-	u64 len;		/* size to move */
-};
-
-struct f2fs_flush_device {
-	u32 dev_num;		/* device number to flush */
-	u32 segments;		/* # of segments to flush */
-};
-
-struct f2fs_sectrim_range {
-	u64 start;
-	u64 len;
-	u64 flags;
-};
-
 /* for inline stuff */
 #define DEF_INLINE_RESERVED_SIZE	1
 static inline int get_extra_isize(struct inode *inode);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index fe39e591e5b4..89c451f09344 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -31,6 +31,7 @@
 #include "gc.h"
 #include "trace.h"
 #include <trace/events/f2fs.h>
+#include <uapi/linux/f2fs.h>
 
 static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
 {
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index f8f1e85ff130..56b113e3cd6a 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -6,6 +6,7 @@
 #define _TRACE_F2FS_H
 
 #include <linux/tracepoint.h>
+#include <uapi/linux/f2fs.h>
 
 #define show_dev(dev)		MAJOR(dev), MINOR(dev)
 #define show_dev_ino(entry)	show_dev(entry->dev), (unsigned long)entry->ino
diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h
new file mode 100644
index 000000000000..28bcfe8d2c27
--- /dev/null
+++ b/include/uapi/linux/f2fs.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+#ifndef _UAPI_LINUX_F2FS_H
+#define _UAPI_LINUX_F2FS_H
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+/*
+ * f2fs-specific ioctl commands
+ */
+#define F2FS_IOCTL_MAGIC		0xf5
+#define F2FS_IOC_START_ATOMIC_WRITE	_IO(F2FS_IOCTL_MAGIC, 1)
+#define F2FS_IOC_COMMIT_ATOMIC_WRITE	_IO(F2FS_IOCTL_MAGIC, 2)
+#define F2FS_IOC_START_VOLATILE_WRITE	_IO(F2FS_IOCTL_MAGIC, 3)
+#define F2FS_IOC_RELEASE_VOLATILE_WRITE	_IO(F2FS_IOCTL_MAGIC, 4)
+#define F2FS_IOC_ABORT_VOLATILE_WRITE	_IO(F2FS_IOCTL_MAGIC, 5)
+#define F2FS_IOC_GARBAGE_COLLECT	_IOW(F2FS_IOCTL_MAGIC, 6, __u32)
+#define F2FS_IOC_WRITE_CHECKPOINT	_IO(F2FS_IOCTL_MAGIC, 7)
+#define F2FS_IOC_DEFRAGMENT		_IOWR(F2FS_IOCTL_MAGIC, 8,	\
+						struct f2fs_defragment)
+#define F2FS_IOC_MOVE_RANGE		_IOWR(F2FS_IOCTL_MAGIC, 9,	\
+						struct f2fs_move_range)
+#define F2FS_IOC_FLUSH_DEVICE		_IOW(F2FS_IOCTL_MAGIC, 10,	\
+						struct f2fs_flush_device)
+#define F2FS_IOC_GARBAGE_COLLECT_RANGE	_IOW(F2FS_IOCTL_MAGIC, 11,	\
+						struct f2fs_gc_range)
+#define F2FS_IOC_GET_FEATURES		_IOR(F2FS_IOCTL_MAGIC, 12, __u32)
+#define F2FS_IOC_SET_PIN_FILE		_IOW(F2FS_IOCTL_MAGIC, 13, __u32)
+#define F2FS_IOC_GET_PIN_FILE		_IOR(F2FS_IOCTL_MAGIC, 14, __u32)
+#define F2FS_IOC_PRECACHE_EXTENTS	_IO(F2FS_IOCTL_MAGIC, 15)
+#define F2FS_IOC_RESIZE_FS		_IOW(F2FS_IOCTL_MAGIC, 16, __u64)
+#define F2FS_IOC_GET_COMPRESS_BLOCKS	_IOR(F2FS_IOCTL_MAGIC, 17, __u64)
+#define F2FS_IOC_RELEASE_COMPRESS_BLOCKS				\
+					_IOR(F2FS_IOCTL_MAGIC, 18, __u64)
+#define F2FS_IOC_RESERVE_COMPRESS_BLOCKS				\
+					_IOR(F2FS_IOCTL_MAGIC, 19, __u64)
+#define F2FS_IOC_SEC_TRIM_FILE		_IOW(F2FS_IOCTL_MAGIC, 20,	\
+						struct f2fs_sectrim_range)
+
+/*
+ * should be same as XFS_IOC_GOINGDOWN.
+ * Flags for going down operation used by FS_IOC_GOINGDOWN
+ */
+#define F2FS_IOC_SHUTDOWN	_IOR('X', 125, __u32)	/* Shutdown */
+#define F2FS_GOING_DOWN_FULLSYNC	0x0	/* going down with full sync */
+#define F2FS_GOING_DOWN_METASYNC	0x1	/* going down with metadata */
+#define F2FS_GOING_DOWN_NOSYNC		0x2	/* going down */
+#define F2FS_GOING_DOWN_METAFLUSH	0x3	/* going down with meta flush */
+#define F2FS_GOING_DOWN_NEED_FSCK	0x4	/* going down to trigger fsck */
+
+/*
+ * Flags used by F2FS_IOC_SEC_TRIM_FILE
+ */
+#define F2FS_TRIM_FILE_DISCARD		0x1	/* send discard command */
+#define F2FS_TRIM_FILE_ZEROOUT		0x2	/* zero out */
+#define F2FS_TRIM_FILE_MASK		0x3
+
+struct f2fs_gc_range {
+	__u32 sync;
+	__u64 start;
+	__u64 len;
+};
+
+struct f2fs_defragment {
+	__u64 start;
+	__u64 len;
+};
+
+struct f2fs_move_range {
+	__u32 dst_fd;		/* destination fd */
+	__u64 pos_in;		/* start position in src_fd */
+	__u64 pos_out;		/* start position in dst_fd */
+	__u64 len;		/* size to move */
+};
+
+struct f2fs_flush_device {
+	__u32 dev_num;		/* device number to flush */
+	__u32 segments;		/* # of segments to flush */
+};
+
+struct f2fs_sectrim_range {
+	__u64 start;
+	__u64 len;
+	__u64 flags;
+};
+
+#endif /* _UAPI_LINUX_F2FS_H */
-- 
cgit v1.2.3


From 9e2a5f8cfb4d9371783e21e27bba4338401f1260 Mon Sep 17 00:00:00 2001
From: Daeho Jeong <daehojeong@google.com>
Date: Fri, 30 Oct 2020 13:10:34 +0900
Subject: f2fs: add F2FS_IOC_GET_COMPRESS_OPTION ioctl

Added a new F2FS_IOC_GET_COMPRESS_OPTION ioctl to get file compression
option of a file.

struct f2fs_comp_option {
    u8 algorithm;         => compression algorithm
                          => 0:lzo, 1:lz4, 2:zstd, 3:lzorle
    u8 log_cluster_size;  => log scale cluster size
                          => 2 ~ 8
};

struct f2fs_comp_option option;

ioctl(fd, F2FS_IOC_GET_COMPRESS_OPTION, &option);

Signed-off-by: Daeho Jeong <daehojeong@google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/file.c            | 30 ++++++++++++++++++++++++++++++
 include/uapi/linux/f2fs.h |  7 +++++++
 2 files changed, 37 insertions(+)

(limited to 'include/uapi')

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 89c451f09344..c747f5dd595c 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -3945,6 +3945,33 @@ err:
 	return ret;
 }
 
+static int f2fs_ioc_get_compress_option(struct file *filp, unsigned long arg)
+{
+	struct inode *inode = file_inode(filp);
+	struct f2fs_comp_option option;
+
+	if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
+		return -EOPNOTSUPP;
+
+	inode_lock_shared(inode);
+
+	if (!f2fs_compressed_file(inode)) {
+		inode_unlock_shared(inode);
+		return -ENODATA;
+	}
+
+	option.algorithm = F2FS_I(inode)->i_compress_algorithm;
+	option.log_cluster_size = F2FS_I(inode)->i_log_cluster_size;
+
+	inode_unlock_shared(inode);
+
+	if (copy_to_user((struct f2fs_comp_option __user *)arg, &option,
+				sizeof(option)))
+		return -EFAULT;
+
+	return 0;
+}
+
 long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
@@ -4033,6 +4060,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		return f2fs_reserve_compress_blocks(filp, arg);
 	case F2FS_IOC_SEC_TRIM_FILE:
 		return f2fs_sec_trim_file(filp, arg);
+	case F2FS_IOC_GET_COMPRESS_OPTION:
+		return f2fs_ioc_get_compress_option(filp, arg);
 	default:
 		return -ENOTTY;
 	}
@@ -4203,6 +4232,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case F2FS_IOC_RELEASE_COMPRESS_BLOCKS:
 	case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
 	case F2FS_IOC_SEC_TRIM_FILE:
+	case F2FS_IOC_GET_COMPRESS_OPTION:
 		break;
 	default:
 		return -ENOIOCTLCMD;
diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h
index 28bcfe8d2c27..872e61d78f29 100644
--- a/include/uapi/linux/f2fs.h
+++ b/include/uapi/linux/f2fs.h
@@ -36,6 +36,8 @@
 					_IOR(F2FS_IOCTL_MAGIC, 19, __u64)
 #define F2FS_IOC_SEC_TRIM_FILE		_IOW(F2FS_IOCTL_MAGIC, 20,	\
 						struct f2fs_sectrim_range)
+#define F2FS_IOC_GET_COMPRESS_OPTION	_IOR(F2FS_IOCTL_MAGIC, 21,	\
+						struct f2fs_comp_option)
 
 /*
  * should be same as XFS_IOC_GOINGDOWN.
@@ -84,4 +86,9 @@ struct f2fs_sectrim_range {
 	__u64 flags;
 };
 
+struct f2fs_comp_option {
+	__u8 algorithm;
+	__u8 log_cluster_size;
+};
+
 #endif /* _UAPI_LINUX_F2FS_H */
-- 
cgit v1.2.3


From 1dc2da5cd51f648de6d1df87e2bc6ea13f72f19c Mon Sep 17 00:00:00 2001
From: "David E. Box" <david.e.box@linux.intel.com>
Date: Wed, 28 Oct 2020 18:44:45 -0700
Subject: PCI: Add defines for Designated Vendor-Specific Extended Capability

Add PCIe Designated Vendor-Specific Extended Capability (DVSEC) and defines
for the header offsets. Defined in PCIe r5.0, sec 7.9.6.

Signed-off-by: David E. Box <david.e.box@linux.intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/uapi/linux/pci_regs.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index a95d55f9f257..8f8bd2318c6c 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -723,6 +723,7 @@
 #define PCI_EXT_CAP_ID_DPC	0x1D	/* Downstream Port Containment */
 #define PCI_EXT_CAP_ID_L1SS	0x1E	/* L1 PM Substates */
 #define PCI_EXT_CAP_ID_PTM	0x1F	/* Precision Time Measurement */
+#define PCI_EXT_CAP_ID_DVSEC	0x23	/* Designated Vendor-Specific */
 #define PCI_EXT_CAP_ID_DLF	0x25	/* Data Link Feature */
 #define PCI_EXT_CAP_ID_PL_16GT	0x26	/* Physical Layer 16.0 GT/s */
 #define PCI_EXT_CAP_ID_MAX	PCI_EXT_CAP_ID_PL_16GT
@@ -1066,6 +1067,10 @@
 #define  PCI_L1SS_CTL1_LTR_L12_TH_SCALE	0xe0000000  /* LTR_L1.2_THRESHOLD_Scale */
 #define PCI_L1SS_CTL2		0x0c	/* Control 2 Register */
 
+/* Designated Vendor-Specific (DVSEC, PCI_EXT_CAP_ID_DVSEC) */
+#define PCI_DVSEC_HEADER1		0x4 /* Designated Vendor-Specific Header1 */
+#define PCI_DVSEC_HEADER2		0x8 /* Designated Vendor-Specific Header2 */
+
 /* Data Link Feature */
 #define PCI_DLF_CAP		0x04	/* Capabilities Register */
 #define  PCI_DLF_EXCHANGE_ENABLE	0x80000000  /* Data Link Feature Exchange Enable */
-- 
cgit v1.2.3


From ee49df4505347daa68d87e318503d2037154ee6a Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Thu, 29 Oct 2020 12:32:09 +0100
Subject: vt: keyboard, sort key types by their number

KT_LETTER was numerically missorted. So sort all KT_* entries.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Link: https://lore.kernel.org/r/20201029113222.32640-4-jslaby@suse.cz
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/keyboard.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/keyboard.h b/include/uapi/linux/keyboard.h
index 4846716e7c5c..36d230cedf12 100644
--- a/include/uapi/linux/keyboard.h
+++ b/include/uapi/linux/keyboard.h
@@ -27,7 +27,6 @@
 #define MAX_NR_FUNC	256	/* max nr of strings assigned to keys */
 
 #define KT_LATIN	0	/* we depend on this being zero */
-#define KT_LETTER	11	/* symbol that can be acted upon by CapsLock */
 #define KT_FN		1
 #define KT_SPEC		2
 #define KT_PAD		3
@@ -38,6 +37,7 @@
 #define KT_META		8
 #define KT_ASCII	9
 #define KT_LOCK		10
+#define KT_LETTER	11	/* symbol that can be acted upon by CapsLock */
 #define KT_SLOCK	12
 #define KT_DEAD2	13
 #define KT_BRL		14
-- 
cgit v1.2.3


From f3f0e410c6a848c8f5b2715167eaa31c407cfb70 Mon Sep 17 00:00:00 2001
From: Simon Ser <contact@emersion.fr>
Date: Wed, 4 Nov 2020 17:01:40 +0000
Subject: drm: document that blobs are ref'counted
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

User-space doesn't need to keep track of blobs that might be in use by
the kernel. User-space can just destroy blobs as soon as they don't need
them anymore.

Signed-off-by: Simon Ser <contact@emersion.fr>
Signed-off-by: Daniel Stone <daniel@fooishbar.org>
Reviewed-by: Jonas Ådahl <jadahl@gmail.com>
Reviewed-by: Pekka Paalanen <pekka.paalanen@collabora.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/wgav99DTGfubfVPiurrydQEiyufYpxlJQZ0wJMWYBQ@cp7-web-042.plabs.ch
---
 include/uapi/drm/drm_mode.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 863eda048265..5ad10ab2a577 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -924,6 +924,12 @@ struct drm_mode_create_blob {
  * struct drm_mode_destroy_blob - Destroy user blob
  * @blob_id: blob_id to destroy
  * Destroy a user-created blob property.
+ *
+ * User-space can release blobs as soon as they do not need to refer to them by
+ * their blob object ID.  For instance, if you are using a MODE_ID blob in an
+ * atomic commit and you will not make another commit re-using the same ID, you
+ * can destroy the blob as soon as the commit has been issued, without waiting
+ * for it to complete.
  */
 struct drm_mode_destroy_blob {
 	__u32 blob_id;
-- 
cgit v1.2.3


From 94f44f28836de320a318730f4952fde8601f4b58 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vlad@buslov.dev>
Date: Mon, 2 Nov 2020 22:12:43 +0200
Subject: net: sched: implement action-specific terse dump

Allow user to request action terse dump with new flag value
TCA_FLAG_TERSE_DUMP. Only output essential action info in terse dump (kind,
stats, index and cookie, if set by the user when creating the action). This
is different from filter terse dump where index is excluded (filter can be
identified by its own handle).

Move tcf_action_dump_terse() function to the beginning of source file in
order to call it from tcf_dump_walker().

Signed-off-by: Vlad Buslov <vlad@buslov.dev>
Suggested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Link: https://lore.kernel.org/r/20201102201243.287486-1-vlad@buslov.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/rtnetlink.h |  4 +++
 net/sched/act_api.c            | 69 ++++++++++++++++++++++--------------------
 2 files changed, 41 insertions(+), 32 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index fdd408f6a5d2..d1325ffb0060 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -770,8 +770,12 @@ enum {
  * actions in a dump. All dump responses will contain the number of actions
  * being dumped stored in for user app's consumption in TCA_ROOT_COUNT
  *
+ * TCA_FLAG_TERSE_DUMP user->kernel to request terse (brief) dump that only
+ * includes essential action info (kind, index, etc.)
+ *
  */
 #define TCA_FLAG_LARGE_DUMP_ON		(1 << 0)
+#define TCA_FLAG_TERSE_DUMP		(1 << 1)
 
 /* New extended info filters for IFLA_EXT_MASK */
 #define RTEXT_FILTER_VF		(1 << 0)
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index f66417d5d2c3..1341c59c2f40 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -215,6 +215,36 @@ static size_t tcf_action_fill_size(const struct tc_action *act)
 	return sz;
 }
 
+static int
+tcf_action_dump_terse(struct sk_buff *skb, struct tc_action *a, bool from_act)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tc_cookie *cookie;
+
+	if (nla_put_string(skb, TCA_KIND, a->ops->kind))
+		goto nla_put_failure;
+	if (tcf_action_copy_stats(skb, a, 0))
+		goto nla_put_failure;
+	if (from_act && nla_put_u32(skb, TCA_ACT_INDEX, a->tcfa_index))
+		goto nla_put_failure;
+
+	rcu_read_lock();
+	cookie = rcu_dereference(a->act_cookie);
+	if (cookie) {
+		if (nla_put(skb, TCA_ACT_COOKIE, cookie->len, cookie->data)) {
+			rcu_read_unlock();
+			goto nla_put_failure;
+		}
+	}
+	rcu_read_unlock();
+
+	return 0;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
 static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
 			   struct netlink_callback *cb)
 {
@@ -248,7 +278,9 @@ static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
 			index--;
 			goto nla_put_failure;
 		}
-		err = tcf_action_dump_1(skb, p, 0, 0);
+		err = (act_flags & TCA_FLAG_TERSE_DUMP) ?
+			tcf_action_dump_terse(skb, p, true) :
+			tcf_action_dump_1(skb, p, 0, 0);
 		if (err < 0) {
 			index--;
 			nlmsg_trim(skb, nest);
@@ -752,34 +784,6 @@ tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 	return a->ops->dump(skb, a, bind, ref);
 }
 
-static int
-tcf_action_dump_terse(struct sk_buff *skb, struct tc_action *a)
-{
-	unsigned char *b = skb_tail_pointer(skb);
-	struct tc_cookie *cookie;
-
-	if (nla_put_string(skb, TCA_KIND, a->ops->kind))
-		goto nla_put_failure;
-	if (tcf_action_copy_stats(skb, a, 0))
-		goto nla_put_failure;
-
-	rcu_read_lock();
-	cookie = rcu_dereference(a->act_cookie);
-	if (cookie) {
-		if (nla_put(skb, TCA_ACT_COOKIE, cookie->len, cookie->data)) {
-			rcu_read_unlock();
-			goto nla_put_failure;
-		}
-	}
-	rcu_read_unlock();
-
-	return 0;
-
-nla_put_failure:
-	nlmsg_trim(skb, b);
-	return -1;
-}
-
 int
 tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
@@ -787,7 +791,7 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 	unsigned char *b = skb_tail_pointer(skb);
 	struct nlattr *nest;
 
-	if (tcf_action_dump_terse(skb, a))
+	if (tcf_action_dump_terse(skb, a, false))
 		goto nla_put_failure;
 
 	if (a->hw_stats != TCA_ACT_HW_STATS_ANY &&
@@ -832,7 +836,7 @@ int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[],
 		nest = nla_nest_start_noflag(skb, i + 1);
 		if (nest == NULL)
 			goto nla_put_failure;
-		err = terse ? tcf_action_dump_terse(skb, a) :
+		err = terse ? tcf_action_dump_terse(skb, a, false) :
 			tcf_action_dump_1(skb, a, bind, ref);
 		if (err < 0)
 			goto errout;
@@ -1469,7 +1473,8 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
 }
 
 static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = {
-	[TCA_ROOT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_FLAG_LARGE_DUMP_ON),
+	[TCA_ROOT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_FLAG_LARGE_DUMP_ON |
+						 TCA_FLAG_TERSE_DUMP),
 	[TCA_ROOT_TIME_DELTA]      = { .type = NLA_U32 },
 };
 
-- 
cgit v1.2.3


From 92eb6c3060ebe3adf381fd9899451c5b047bb14d Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 26 Oct 2020 13:07:15 -0700
Subject: crypto: af_alg - avoid undefined behavior accessing salg_name

Commit 3f69cc60768b ("crypto: af_alg - Allow arbitrarily long algorithm
names") made the kernel start accepting arbitrarily long algorithm names
in sockaddr_alg.  However, the actual length of the salg_name field
stayed at the original 64 bytes.

This is broken because the kernel can access indices >= 64 in salg_name,
which is undefined behavior -- even though the memory that is accessed
is still located within the sockaddr structure.  It would only be
defined behavior if the array were properly marked as arbitrary-length
(either by making it a flexible array, which is the recommended way
these days, or by making it an array of length 0 or 1).

We can't simply change salg_name into a flexible array, since that would
break source compatibility with userspace programs that embed
sockaddr_alg into another struct, or (more commonly) declare a
sockaddr_alg like 'struct sockaddr_alg sa = { .salg_name = "foo" };'.

One solution would be to change salg_name into a flexible array only
when '#ifdef __KERNEL__'.  However, that would keep userspace without an
easy way to actually use the longer algorithm names.

Instead, add a new structure 'sockaddr_alg_new' that has the flexible
array field, and expose it to both userspace and the kernel.
Make the kernel use it correctly in alg_bind().

This addresses the syzbot report
"UBSAN: array-index-out-of-bounds in alg_bind"
(https://syzkaller.appspot.com/bug?extid=92ead4eb8e26a26d465e).

Reported-by: syzbot+92ead4eb8e26a26d465e@syzkaller.appspotmail.com
Fixes: 3f69cc60768b ("crypto: af_alg - Allow arbitrarily long algorithm names")
Cc: <stable@vger.kernel.org> # v4.12+
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/af_alg.c             | 10 +++++++---
 include/uapi/linux/if_alg.h | 16 ++++++++++++++++
 2 files changed, 23 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index d11db80d24cd..9acb9d2c4bcf 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -147,7 +147,7 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	const u32 allowed = CRYPTO_ALG_KERN_DRIVER_ONLY;
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
-	struct sockaddr_alg *sa = (void *)uaddr;
+	struct sockaddr_alg_new *sa = (void *)uaddr;
 	const struct af_alg_type *type;
 	void *private;
 	int err;
@@ -155,7 +155,11 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	if (sock->state == SS_CONNECTED)
 		return -EINVAL;
 
-	if (addr_len < sizeof(*sa))
+	BUILD_BUG_ON(offsetof(struct sockaddr_alg_new, salg_name) !=
+		     offsetof(struct sockaddr_alg, salg_name));
+	BUILD_BUG_ON(offsetof(struct sockaddr_alg, salg_name) != sizeof(*sa));
+
+	if (addr_len < sizeof(*sa) + 1)
 		return -EINVAL;
 
 	/* If caller uses non-allowed flag, return error. */
@@ -163,7 +167,7 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		return -EINVAL;
 
 	sa->salg_type[sizeof(sa->salg_type) - 1] = 0;
-	sa->salg_name[sizeof(sa->salg_name) + addr_len - sizeof(*sa) - 1] = 0;
+	sa->salg_name[addr_len - sizeof(*sa) - 1] = 0;
 
 	type = alg_get_type(sa->salg_type);
 	if (PTR_ERR(type) == -ENOENT) {
diff --git a/include/uapi/linux/if_alg.h b/include/uapi/linux/if_alg.h
index 60b7c2efd921..dc52a11ba6d1 100644
--- a/include/uapi/linux/if_alg.h
+++ b/include/uapi/linux/if_alg.h
@@ -24,6 +24,22 @@ struct sockaddr_alg {
 	__u8	salg_name[64];
 };
 
+/*
+ * Linux v4.12 and later removed the 64-byte limit on salg_name[]; it's now an
+ * arbitrary-length field.  We had to keep the original struct above for source
+ * compatibility with existing userspace programs, though.  Use the new struct
+ * below if support for very long algorithm names is needed.  To do this,
+ * allocate 'sizeof(struct sockaddr_alg_new) + strlen(algname) + 1' bytes, and
+ * copy algname (including the null terminator) into salg_name.
+ */
+struct sockaddr_alg_new {
+	__u16	salg_family;
+	__u8	salg_type[14];
+	__u32	salg_feat;
+	__u32	salg_mask;
+	__u8	salg_name[];
+};
+
 struct af_alg_iv {
 	__u32	ivlen;
 	__u8	iv[0];
-- 
cgit v1.2.3


From 9f0ffa418483938d25a15f6ad3891389f333bc59 Mon Sep 17 00:00:00 2001
From: Rohan Dutta <drohan@codeaurora.org>
Date: Tue, 27 Oct 2020 12:09:10 +0200
Subject: cfg80211: Add support to configure SAE PWE value to drivers

Add support to configure SAE PWE preference from userspace to drivers in
both AP and STA modes. This is needed for cases where the driver takes
care of Authentication frame processing (SME in the driver) so that
correct enforcement of the acceptable PWE derivation mechanism can be
performed.

The userspace applications can pass the sae_pwe value using the
NL80211_ATTR_SAE_PWE attribute in the NL80211_CMD_CONNECT and
NL80211_CMD_START_AP commands to the driver. This allows selection
between the hunting-and-pecking loop and hash-to-element options for PWE
derivation. For backwards compatibility, this new attribute is optional
and if not included, the driver is notified of the value being
unspecified.

Signed-off-by: Rohan Dutta <drohan@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Link: https://lore.kernel.org/r/20201027100910.22283-1-jouni@codeaurora.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  9 +++++++++
 include/uapi/linux/nl80211.h | 26 ++++++++++++++++++++++++++
 net/wireless/nl80211.c       |  9 +++++++++
 3 files changed, 44 insertions(+)

(limited to 'include/uapi')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 661edfc8722e..0ba8d1fa6eb9 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1008,6 +1008,14 @@ struct survey_info {
  * @sae_pwd: password for SAE authentication (for devices supporting SAE
  *	offload)
  * @sae_pwd_len: length of SAE password (for devices supporting SAE offload)
+ * @sae_pwe: The mechanisms allowed for SAE PWE derivation
+ *	NL80211_SAE_PWE_UNSPECIFIED: Not-specified, used to indicate userspace
+ *		did not specify any preference. The driver should follow its
+ *		internal policy in such a scenario.
+ *	NL80211_SAE_PWE_HUNT_AND_PECK: Allow hunting-and-pecking loop only
+ *	NL80211_SAE_PWE_HASH_TO_ELEMENT: Allow hash-to-element only
+ *	NL80211_SAE_PWE_BOTH: Allow either hunting-and-pecking loop
+ *		or hash-to-element
  */
 struct cfg80211_crypto_settings {
 	u32 wpa_versions;
@@ -1026,6 +1034,7 @@ struct cfg80211_crypto_settings {
 	const u8 *psk;
 	const u8 *sae_pwd;
 	u8 sae_pwd_len;
+	enum nl80211_sae_pwe_mechanism sae_pwe;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 47700a2b9af9..2d733effcdaf 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2527,6 +2527,11 @@ enum nl80211_commands {
  *	override mask. Used with NL80211_ATTR_S1G_CAPABILITY in
  *	NL80211_CMD_ASSOCIATE or NL80211_CMD_CONNECT.
  *
+ * @NL80211_ATTR_SAE_PWE: Indicates the mechanism(s) allowed for SAE PWE
+ *	derivation in WPA3-Personal networks which are using SAE authentication.
+ *	This is a u8 attribute that encapsulates one of the values from
+ *	&enum nl80211_sae_pwe_mechanism.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -3016,6 +3021,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_S1G_CAPABILITY,
 	NL80211_ATTR_S1G_CAPABILITY_MASK,
 
+	NL80211_ATTR_SAE_PWE,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -7124,4 +7131,23 @@ enum nl80211_unsol_bcast_probe_resp_attributes {
 	NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_MAX =
 		__NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_LAST - 1
 };
+
+/**
+ * enum nl80211_sae_pwe_mechanism - The mechanism(s) allowed for SAE PWE
+ *	derivation. Applicable only when WPA3-Personal SAE authentication is
+ *	used.
+ *
+ * @NL80211_SAE_PWE_UNSPECIFIED: not specified, used internally to indicate that
+ *	attribute is not present from userspace.
+ * @NL80211_SAE_PWE_HUNT_AND_PECK: hunting-and-pecking loop only
+ * @NL80211_SAE_PWE_HASH_TO_ELEMENT: hash-to-element only
+ * @NL80211_SAE_PWE_BOTH: both hunting-and-pecking loop and hash-to-element
+ *	can be used.
+ */
+enum nl80211_sae_pwe_mechanism {
+	NL80211_SAE_PWE_UNSPECIFIED,
+	NL80211_SAE_PWE_HUNT_AND_PECK,
+	NL80211_SAE_PWE_HASH_TO_ELEMENT,
+	NL80211_SAE_PWE_BOTH,
+};
 #endif /* __LINUX_NL80211_H */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 554796a6c6fe..0928ecbe5bd6 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -715,6 +715,9 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 		NLA_POLICY_EXACT_LEN(IEEE80211_S1G_CAPABILITY_LEN),
 	[NL80211_ATTR_S1G_CAPABILITY_MASK] =
 		NLA_POLICY_EXACT_LEN(IEEE80211_S1G_CAPABILITY_LEN),
+	[NL80211_ATTR_SAE_PWE] =
+		NLA_POLICY_RANGE(NLA_U8, NL80211_SAE_PWE_HUNT_AND_PECK,
+				 NL80211_SAE_PWE_BOTH),
 };
 
 /* policy for the key attributes */
@@ -9731,6 +9734,12 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
 			nla_len(info->attrs[NL80211_ATTR_SAE_PASSWORD]);
 	}
 
+	if (info->attrs[NL80211_ATTR_SAE_PWE])
+		settings->sae_pwe =
+			nla_get_u8(info->attrs[NL80211_ATTR_SAE_PWE]);
+	else
+		settings->sae_pwe = NL80211_SAE_PWE_UNSPECIFIED;
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From c4a30446a92a222d2f368254dcc4ab2fda0ba924 Mon Sep 17 00:00:00 2001
From: Rajkumar Manoharan <rmanohar@codeaurora.org>
Date: Fri, 16 Oct 2020 13:15:27 -0700
Subject: cfg80211: add support to configure HE MCS for beacon rate

This allows an option to configure a single HE MCS beacon tx rate.

Signed-off-by: Rajkumar Manoharan <rmanohar@codeaurora.org>
Link: https://lore.kernel.org/r/1602879327-29488-2-git-send-email-rmanohar@codeaurora.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h |  9 +++++++--
 net/wireless/nl80211.c       | 25 +++++++++++++++++++++++--
 2 files changed, 30 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 2d733effcdaf..e1e5b3d4dd81 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1750,8 +1750,9 @@ enum nl80211_commands {
  *	specify just a single bitrate, which is to be used for the beacon.
  *	The driver must also specify support for this with the extended
  *	features NL80211_EXT_FEATURE_BEACON_RATE_LEGACY,
- *	NL80211_EXT_FEATURE_BEACON_RATE_HT and
- *	NL80211_EXT_FEATURE_BEACON_RATE_VHT.
+ *	NL80211_EXT_FEATURE_BEACON_RATE_HT,
+ *	NL80211_EXT_FEATURE_BEACON_RATE_VHT and
+ *	NL80211_EXT_FEATURE_BEACON_RATE_HE.
  *
  * @NL80211_ATTR_FRAME_MATCH: A binary attribute which typically must contain
  *	at least one byte, currently used with @NL80211_CMD_REGISTER_FRAME.
@@ -5903,6 +5904,9 @@ enum nl80211_feature_flags {
  * @NL80211_EXT_FEATURE_UNSOL_BCAST_PROBE_RESP: Driver/device supports
  *	unsolicited broadcast probe response transmission
  *
+ * @NL80211_EXT_FEATURE_BEACON_RATE_HE: Driver supports beacon rate
+ *	configuration (AP/mesh) with HE rates.
+ *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
  */
@@ -5963,6 +5967,7 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_SAE_OFFLOAD_AP,
 	NL80211_EXT_FEATURE_FILS_DISCOVERY,
 	NL80211_EXT_FEATURE_UNSOL_BCAST_PROBE_RESP,
+	NL80211_EXT_FEATURE_BEACON_RATE_HE,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 3c73eb35b1e5..aad37e7c7f91 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4683,6 +4683,7 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
 					mask->control[band].ht_mcs))
 				return -EINVAL;
 		}
+
 		if (tb[NL80211_TXRATE_VHT]) {
 			if (!vht_set_mcs_mask(
 					sband,
@@ -4690,6 +4691,7 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
 					mask->control[band].vht_mcs))
 				return -EINVAL;
 		}
+
 		if (tb[NL80211_TXRATE_GI]) {
 			mask->control[band].gi =
 				nla_get_u8(tb[NL80211_TXRATE_GI]);
@@ -4701,6 +4703,7 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
 				     nla_data(tb[NL80211_TXRATE_HE]),
 				     mask->control[band].he_mcs))
 			return -EINVAL;
+
 		if (tb[NL80211_TXRATE_HE_GI])
 			mask->control[band].he_gi =
 				nla_get_u8(tb[NL80211_TXRATE_HE_GI]);
@@ -4742,7 +4745,7 @@ static int validate_beacon_tx_rate(struct cfg80211_registered_device *rdev,
 				   enum nl80211_band band,
 				   struct cfg80211_bitrate_mask *beacon_rate)
 {
-	u32 count_ht, count_vht, i;
+	u32 count_ht, count_vht, count_he, i;
 	u32 rate = beacon_rate->control[band].legacy;
 
 	/* Allow only one rate */
@@ -4775,7 +4778,21 @@ static int validate_beacon_tx_rate(struct cfg80211_registered_device *rdev,
 			return -EINVAL;
 	}
 
-	if ((count_ht && count_vht) || (!rate && !count_ht && !count_vht))
+	count_he = 0;
+	for (i = 0; i < NL80211_HE_NSS_MAX; i++) {
+		if (hweight16(beacon_rate->control[band].he_mcs[i]) > 1) {
+			return -EINVAL;
+		} else if (beacon_rate->control[band].he_mcs[i]) {
+			count_he++;
+			if (count_he > 1)
+				return -EINVAL;
+		}
+		if (count_he && rate)
+			return -EINVAL;
+	}
+
+	if ((count_ht && count_vht && count_he) ||
+	    (!rate && !count_ht && !count_vht && !count_he))
 		return -EINVAL;
 
 	if (rate &&
@@ -4790,6 +4807,10 @@ static int validate_beacon_tx_rate(struct cfg80211_registered_device *rdev,
 	    !wiphy_ext_feature_isset(&rdev->wiphy,
 				     NL80211_EXT_FEATURE_BEACON_RATE_VHT))
 		return -EINVAL;
+	if (count_he &&
+	    !wiphy_ext_feature_isset(&rdev->wiphy,
+				     NL80211_EXT_FEATURE_BEACON_RATE_HE))
+		return -EINVAL;
 
 	return 0;
 }
-- 
cgit v1.2.3


From 88b8138b240b43d5215bf7cb422692cd8db51f6f Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Date: Fri, 6 Nov 2020 14:03:31 +0100
Subject: tty: serial: remove pnx8xxx uart driver

Commit 625326ea9c84 ("MIPS: Remove PNX833x alias NXP_STB22x") removed
support for PNX833x, so it's time to remove serial driver, too.

Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Link: https://lore.kernel.org/r/20201106130332.103476-1-tsbogend@alpha.franken.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/Kconfig        |  16 -
 drivers/tty/serial/Makefile       |   1 -
 drivers/tty/serial/pnx8xxx_uart.c | 858 --------------------------------------
 include/linux/serial_pnx8xxx.h    |  67 ---
 include/uapi/linux/serial_core.h  |   2 -
 5 files changed, 944 deletions(-)
 delete mode 100644 drivers/tty/serial/pnx8xxx_uart.c
 delete mode 100644 include/linux/serial_pnx8xxx.h

(limited to 'include/uapi')

diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 1044fc387691..b146c93146ee 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -703,22 +703,6 @@ config SERIAL_SH_SCI_DMA
 	depends on SERIAL_SH_SCI && DMA_ENGINE
 	default ARCH_RENESAS
 
-config SERIAL_PNX8XXX
-	bool "Enable PNX8XXX SoCs' UART Support"
-	depends on SOC_PNX833X
-	select SERIAL_CORE
-	help
-	  If you have a MIPS-based Philips SoC such as PNX8330 and you want
-	  to use serial ports, say Y.  Otherwise, say N.
-
-config SERIAL_PNX8XXX_CONSOLE
-	bool "Enable PNX8XX0 serial console"
-	depends on SERIAL_PNX8XXX
-	select SERIAL_CORE_CONSOLE
-	help
-	  If you have a MIPS-based Philips SoC such as PNX8330 and you want
-	  to use serial console, say Y. Otherwise, say N.
-
 config SERIAL_HS_LPC32XX
 	tristate "LPC32XX high speed serial port support"
 	depends on ARCH_LPC32XX || COMPILE_TEST
diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile
index caf167f0c10a..af44b231123c 100644
--- a/drivers/tty/serial/Makefile
+++ b/drivers/tty/serial/Makefile
@@ -27,7 +27,6 @@ obj-$(CONFIG_SERIAL_AMBA_PL010) += amba-pl010.o
 obj-$(CONFIG_SERIAL_AMBA_PL011) += amba-pl011.o
 obj-$(CONFIG_SERIAL_CLPS711X) += clps711x.o
 obj-$(CONFIG_SERIAL_PXA_NON8250) += pxa.o
-obj-$(CONFIG_SERIAL_PNX8XXX) += pnx8xxx_uart.o
 obj-$(CONFIG_SERIAL_SA1100) += sa1100.o
 obj-$(CONFIG_SERIAL_BCM63XX) += bcm63xx_uart.o
 obj-$(CONFIG_SERIAL_SAMSUNG) += samsung_tty.o
diff --git a/drivers/tty/serial/pnx8xxx_uart.c b/drivers/tty/serial/pnx8xxx_uart.c
deleted file mode 100644
index 972d94e8d32b..000000000000
--- a/drivers/tty/serial/pnx8xxx_uart.c
+++ /dev/null
@@ -1,858 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * UART driver for PNX8XXX SoCs
- *
- * Author: Per Hallsmark per.hallsmark@mvista.com
- * Ported to 2.6 kernel by EmbeddedAlley
- * Reworked by Vitaly Wool <vitalywool@gmail.com>
- *
- * Based on drivers/char/serial.c, by Linus Torvalds, Theodore Ts'o.
- * Copyright (C) 2000 Deep Blue Solutions Ltd.
- */
-
-#include <linux/module.h>
-#include <linux/ioport.h>
-#include <linux/init.h>
-#include <linux/console.h>
-#include <linux/sysrq.h>
-#include <linux/device.h>
-#include <linux/platform_device.h>
-#include <linux/tty.h>
-#include <linux/tty_flip.h>
-#include <linux/serial_core.h>
-#include <linux/serial.h>
-#include <linux/serial_pnx8xxx.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-
-/* We'll be using StrongARM sa1100 serial port major/minor */
-#define SERIAL_PNX8XXX_MAJOR	204
-#define MINOR_START		5
-
-#define NR_PORTS		2
-
-#define PNX8XXX_ISR_PASS_LIMIT	256
-
-/*
- * Convert from ignore_status_mask or read_status_mask to FIFO
- * and interrupt status bits
- */
-#define SM_TO_FIFO(x)	((x) >> 10)
-#define SM_TO_ISTAT(x)	((x) & 0x000001ff)
-#define FIFO_TO_SM(x)	((x) << 10)
-#define ISTAT_TO_SM(x)	((x) & 0x000001ff)
-
-/*
- * This is the size of our serial port register set.
- */
-#define UART_PORT_SIZE	0x1000
-
-/*
- * This determines how often we check the modem status signals
- * for any change.  They generally aren't connected to an IRQ
- * so we have to poll them.  We also check immediately before
- * filling the TX fifo incase CTS has been dropped.
- */
-#define MCTRL_TIMEOUT	(250*HZ/1000)
-
-extern struct pnx8xxx_port pnx8xxx_ports[];
-
-static inline int serial_in(struct pnx8xxx_port *sport, int offset)
-{
-	return (__raw_readl(sport->port.membase + offset));
-}
-
-static inline void serial_out(struct pnx8xxx_port *sport, int offset, int value)
-{
-	__raw_writel(value, sport->port.membase + offset);
-}
-
-/*
- * Handle any change of modem status signal since we were last called.
- */
-static void pnx8xxx_mctrl_check(struct pnx8xxx_port *sport)
-{
-	unsigned int status, changed;
-
-	status = sport->port.ops->get_mctrl(&sport->port);
-	changed = status ^ sport->old_status;
-
-	if (changed == 0)
-		return;
-
-	sport->old_status = status;
-
-	if (changed & TIOCM_RI)
-		sport->port.icount.rng++;
-	if (changed & TIOCM_DSR)
-		sport->port.icount.dsr++;
-	if (changed & TIOCM_CAR)
-		uart_handle_dcd_change(&sport->port, status & TIOCM_CAR);
-	if (changed & TIOCM_CTS)
-		uart_handle_cts_change(&sport->port, status & TIOCM_CTS);
-
-	wake_up_interruptible(&sport->port.state->port.delta_msr_wait);
-}
-
-/*
- * This is our per-port timeout handler, for checking the
- * modem status signals.
- */
-static void pnx8xxx_timeout(struct timer_list *t)
-{
-	struct pnx8xxx_port *sport = from_timer(sport, t, timer);
-	unsigned long flags;
-
-	if (sport->port.state) {
-		spin_lock_irqsave(&sport->port.lock, flags);
-		pnx8xxx_mctrl_check(sport);
-		spin_unlock_irqrestore(&sport->port.lock, flags);
-
-		mod_timer(&sport->timer, jiffies + MCTRL_TIMEOUT);
-	}
-}
-
-/*
- * interrupts disabled on entry
- */
-static void pnx8xxx_stop_tx(struct uart_port *port)
-{
-	struct pnx8xxx_port *sport =
-		container_of(port, struct pnx8xxx_port, port);
-	u32 ien;
-
-	/* Disable TX intr */
-	ien = serial_in(sport, PNX8XXX_IEN);
-	serial_out(sport, PNX8XXX_IEN, ien & ~PNX8XXX_UART_INT_ALLTX);
-
-	/* Clear all pending TX intr */
-	serial_out(sport, PNX8XXX_ICLR, PNX8XXX_UART_INT_ALLTX);
-}
-
-/*
- * interrupts may not be disabled on entry
- */
-static void pnx8xxx_start_tx(struct uart_port *port)
-{
-	struct pnx8xxx_port *sport =
-		container_of(port, struct pnx8xxx_port, port);
-	u32 ien;
-
-	/* Clear all pending TX intr */
-	serial_out(sport, PNX8XXX_ICLR, PNX8XXX_UART_INT_ALLTX);
-
-	/* Enable TX intr */
-	ien = serial_in(sport, PNX8XXX_IEN);
-	serial_out(sport, PNX8XXX_IEN, ien | PNX8XXX_UART_INT_ALLTX);
-}
-
-/*
- * Interrupts enabled
- */
-static void pnx8xxx_stop_rx(struct uart_port *port)
-{
-	struct pnx8xxx_port *sport =
-		container_of(port, struct pnx8xxx_port, port);
-	u32 ien;
-
-	/* Disable RX intr */
-	ien = serial_in(sport, PNX8XXX_IEN);
-	serial_out(sport, PNX8XXX_IEN, ien & ~PNX8XXX_UART_INT_ALLRX);
-
-	/* Clear all pending RX intr */
-	serial_out(sport, PNX8XXX_ICLR, PNX8XXX_UART_INT_ALLRX);
-}
-
-/*
- * Set the modem control timer to fire immediately.
- */
-static void pnx8xxx_enable_ms(struct uart_port *port)
-{
-	struct pnx8xxx_port *sport =
-		container_of(port, struct pnx8xxx_port, port);
-
-	mod_timer(&sport->timer, jiffies);
-}
-
-static void pnx8xxx_rx_chars(struct pnx8xxx_port *sport)
-{
-	unsigned int status, ch, flg;
-
-	status = FIFO_TO_SM(serial_in(sport, PNX8XXX_FIFO)) |
-		 ISTAT_TO_SM(serial_in(sport, PNX8XXX_ISTAT));
-	while (status & FIFO_TO_SM(PNX8XXX_UART_FIFO_RXFIFO)) {
-		ch = serial_in(sport, PNX8XXX_FIFO) & 0xff;
-
-		sport->port.icount.rx++;
-
-		flg = TTY_NORMAL;
-
-		/*
-		 * note that the error handling code is
-		 * out of the main execution path
-		 */
-		if (status & (FIFO_TO_SM(PNX8XXX_UART_FIFO_RXFE |
-					PNX8XXX_UART_FIFO_RXPAR |
-					PNX8XXX_UART_FIFO_RXBRK) |
-			      ISTAT_TO_SM(PNX8XXX_UART_INT_RXOVRN))) {
-			if (status & FIFO_TO_SM(PNX8XXX_UART_FIFO_RXBRK)) {
-				status &= ~(FIFO_TO_SM(PNX8XXX_UART_FIFO_RXFE) |
-					FIFO_TO_SM(PNX8XXX_UART_FIFO_RXPAR));
-				sport->port.icount.brk++;
-				if (uart_handle_break(&sport->port))
-					goto ignore_char;
-			} else if (status & FIFO_TO_SM(PNX8XXX_UART_FIFO_RXPAR))
-				sport->port.icount.parity++;
-			else if (status & FIFO_TO_SM(PNX8XXX_UART_FIFO_RXFE))
-				sport->port.icount.frame++;
-			if (status & ISTAT_TO_SM(PNX8XXX_UART_INT_RXOVRN))
-				sport->port.icount.overrun++;
-
-			status &= sport->port.read_status_mask;
-
-			if (status & FIFO_TO_SM(PNX8XXX_UART_FIFO_RXPAR))
-				flg = TTY_PARITY;
-			else if (status & FIFO_TO_SM(PNX8XXX_UART_FIFO_RXFE))
-				flg = TTY_FRAME;
-
-			sport->port.sysrq = 0;
-		}
-
-		if (uart_handle_sysrq_char(&sport->port, ch))
-			goto ignore_char;
-
-		uart_insert_char(&sport->port, status,
-				ISTAT_TO_SM(PNX8XXX_UART_INT_RXOVRN), ch, flg);
-
-	ignore_char:
-		serial_out(sport, PNX8XXX_LCR, serial_in(sport, PNX8XXX_LCR) |
-				PNX8XXX_UART_LCR_RX_NEXT);
-		status = FIFO_TO_SM(serial_in(sport, PNX8XXX_FIFO)) |
-			 ISTAT_TO_SM(serial_in(sport, PNX8XXX_ISTAT));
-	}
-
-	spin_unlock(&sport->port.lock);
-	tty_flip_buffer_push(&sport->port.state->port);
-	spin_lock(&sport->port.lock);
-}
-
-static void pnx8xxx_tx_chars(struct pnx8xxx_port *sport)
-{
-	struct circ_buf *xmit = &sport->port.state->xmit;
-
-	if (sport->port.x_char) {
-		serial_out(sport, PNX8XXX_FIFO, sport->port.x_char);
-		sport->port.icount.tx++;
-		sport->port.x_char = 0;
-		return;
-	}
-
-	/*
-	 * Check the modem control lines before
-	 * transmitting anything.
-	 */
-	pnx8xxx_mctrl_check(sport);
-
-	if (uart_circ_empty(xmit) || uart_tx_stopped(&sport->port)) {
-		pnx8xxx_stop_tx(&sport->port);
-		return;
-	}
-
-	/*
-	 * TX while bytes available
-	 */
-	while (((serial_in(sport, PNX8XXX_FIFO) &
-					PNX8XXX_UART_FIFO_TXFIFO) >> 16) < 16) {
-		serial_out(sport, PNX8XXX_FIFO, xmit->buf[xmit->tail]);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		sport->port.icount.tx++;
-		if (uart_circ_empty(xmit))
-			break;
-	}
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(&sport->port);
-
-	if (uart_circ_empty(xmit))
-		pnx8xxx_stop_tx(&sport->port);
-}
-
-static irqreturn_t pnx8xxx_int(int irq, void *dev_id)
-{
-	struct pnx8xxx_port *sport = dev_id;
-	unsigned int status;
-
-	spin_lock(&sport->port.lock);
-	/* Get the interrupts */
-	status  = serial_in(sport, PNX8XXX_ISTAT) & serial_in(sport, PNX8XXX_IEN);
-
-	/* Byte or break signal received */
-	if (status & (PNX8XXX_UART_INT_RX | PNX8XXX_UART_INT_BREAK))
-		pnx8xxx_rx_chars(sport);
-
-	/* TX holding register empty - transmit a byte */
-	if (status & PNX8XXX_UART_INT_TX)
-		pnx8xxx_tx_chars(sport);
-
-	/* Clear the ISTAT register */
-	serial_out(sport, PNX8XXX_ICLR, status);
-
-	spin_unlock(&sport->port.lock);
-	return IRQ_HANDLED;
-}
-
-/*
- * Return TIOCSER_TEMT when transmitter is not busy.
- */
-static unsigned int pnx8xxx_tx_empty(struct uart_port *port)
-{
-	struct pnx8xxx_port *sport =
-		container_of(port, struct pnx8xxx_port, port);
-
-	return serial_in(sport, PNX8XXX_FIFO) & PNX8XXX_UART_FIFO_TXFIFO_STA ? 0 : TIOCSER_TEMT;
-}
-
-static unsigned int pnx8xxx_get_mctrl(struct uart_port *port)
-{
-	struct pnx8xxx_port *sport =
-		container_of(port, struct pnx8xxx_port, port);
-	unsigned int mctrl = TIOCM_DSR;
-	unsigned int msr;
-
-	/* REVISIT */
-
-	msr = serial_in(sport, PNX8XXX_MCR);
-
-	mctrl |= msr & PNX8XXX_UART_MCR_CTS ? TIOCM_CTS : 0;
-	mctrl |= msr & PNX8XXX_UART_MCR_DCD ? TIOCM_CAR : 0;
-
-	return mctrl;
-}
-
-static void pnx8xxx_set_mctrl(struct uart_port *port, unsigned int mctrl)
-{
-#if	0	/* FIXME */
-	struct pnx8xxx_port *sport = (struct pnx8xxx_port *)port;
-	unsigned int msr;
-#endif
-}
-
-/*
- * Interrupts always disabled.
- */
-static void pnx8xxx_break_ctl(struct uart_port *port, int break_state)
-{
-	struct pnx8xxx_port *sport =
-		container_of(port, struct pnx8xxx_port, port);
-	unsigned long flags;
-	unsigned int lcr;
-
-	spin_lock_irqsave(&sport->port.lock, flags);
-	lcr = serial_in(sport, PNX8XXX_LCR);
-	if (break_state == -1)
-		lcr |= PNX8XXX_UART_LCR_TXBREAK;
-	else
-		lcr &= ~PNX8XXX_UART_LCR_TXBREAK;
-	serial_out(sport, PNX8XXX_LCR, lcr);
-	spin_unlock_irqrestore(&sport->port.lock, flags);
-}
-
-static int pnx8xxx_startup(struct uart_port *port)
-{
-	struct pnx8xxx_port *sport =
-		container_of(port, struct pnx8xxx_port, port);
-	int retval;
-
-	/*
-	 * Allocate the IRQ
-	 */
-	retval = request_irq(sport->port.irq, pnx8xxx_int, 0,
-			     "pnx8xxx-uart", sport);
-	if (retval)
-		return retval;
-
-	/*
-	 * Finally, clear and enable interrupts
-	 */
-
-	serial_out(sport, PNX8XXX_ICLR, PNX8XXX_UART_INT_ALLRX |
-			     PNX8XXX_UART_INT_ALLTX);
-
-	serial_out(sport, PNX8XXX_IEN, serial_in(sport, PNX8XXX_IEN) |
-			    PNX8XXX_UART_INT_ALLRX |
-			    PNX8XXX_UART_INT_ALLTX);
-
-	/*
-	 * Enable modem status interrupts
-	 */
-	spin_lock_irq(&sport->port.lock);
-	pnx8xxx_enable_ms(&sport->port);
-	spin_unlock_irq(&sport->port.lock);
-
-	return 0;
-}
-
-static void pnx8xxx_shutdown(struct uart_port *port)
-{
-	struct pnx8xxx_port *sport =
-		container_of(port, struct pnx8xxx_port, port);
-	int lcr;
-
-	/*
-	 * Stop our timer.
-	 */
-	del_timer_sync(&sport->timer);
-
-	/*
-	 * Disable all interrupts
-	 */
-	serial_out(sport, PNX8XXX_IEN, 0);
-
-	/*
-	 * Reset the Tx and Rx FIFOS, disable the break condition
-	 */
-	lcr = serial_in(sport, PNX8XXX_LCR);
-	lcr &= ~PNX8XXX_UART_LCR_TXBREAK;
-	lcr |= PNX8XXX_UART_LCR_TX_RST | PNX8XXX_UART_LCR_RX_RST;
-	serial_out(sport, PNX8XXX_LCR, lcr);
-
-	/*
-	 * Clear all interrupts
-	 */
-	serial_out(sport, PNX8XXX_ICLR, PNX8XXX_UART_INT_ALLRX |
-			     PNX8XXX_UART_INT_ALLTX);
-
-	/*
-	 * Free the interrupt
-	 */
-	free_irq(sport->port.irq, sport);
-}
-
-static void
-pnx8xxx_set_termios(struct uart_port *port, struct ktermios *termios,
-		   struct ktermios *old)
-{
-	struct pnx8xxx_port *sport =
-		container_of(port, struct pnx8xxx_port, port);
-	unsigned long flags;
-	unsigned int lcr_fcr, old_ien, baud, quot;
-	unsigned int old_csize = old ? old->c_cflag & CSIZE : CS8;
-
-	/*
-	 * We only support CS7 and CS8.
-	 */
-	while ((termios->c_cflag & CSIZE) != CS7 &&
-	       (termios->c_cflag & CSIZE) != CS8) {
-		termios->c_cflag &= ~CSIZE;
-		termios->c_cflag |= old_csize;
-		old_csize = CS8;
-	}
-
-	if ((termios->c_cflag & CSIZE) == CS8)
-		lcr_fcr = PNX8XXX_UART_LCR_8BIT;
-	else
-		lcr_fcr = 0;
-
-	if (termios->c_cflag & CSTOPB)
-		lcr_fcr |= PNX8XXX_UART_LCR_2STOPB;
-	if (termios->c_cflag & PARENB) {
-		lcr_fcr |= PNX8XXX_UART_LCR_PAREN;
-		if (!(termios->c_cflag & PARODD))
-			lcr_fcr |= PNX8XXX_UART_LCR_PAREVN;
-	}
-
-	/*
-	 * Ask the core to calculate the divisor for us.
-	 */
-	baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk/16);
-	quot = uart_get_divisor(port, baud);
-
-	spin_lock_irqsave(&sport->port.lock, flags);
-
-	sport->port.read_status_mask = ISTAT_TO_SM(PNX8XXX_UART_INT_RXOVRN) |
-				ISTAT_TO_SM(PNX8XXX_UART_INT_EMPTY) |
-				ISTAT_TO_SM(PNX8XXX_UART_INT_RX);
-	if (termios->c_iflag & INPCK)
-		sport->port.read_status_mask |=
-			FIFO_TO_SM(PNX8XXX_UART_FIFO_RXFE) |
-			FIFO_TO_SM(PNX8XXX_UART_FIFO_RXPAR);
-	if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK))
-		sport->port.read_status_mask |=
-			ISTAT_TO_SM(PNX8XXX_UART_INT_BREAK);
-
-	/*
-	 * Characters to ignore
-	 */
-	sport->port.ignore_status_mask = 0;
-	if (termios->c_iflag & IGNPAR)
-		sport->port.ignore_status_mask |=
-			FIFO_TO_SM(PNX8XXX_UART_FIFO_RXFE) |
-			FIFO_TO_SM(PNX8XXX_UART_FIFO_RXPAR);
-	if (termios->c_iflag & IGNBRK) {
-		sport->port.ignore_status_mask |=
-			ISTAT_TO_SM(PNX8XXX_UART_INT_BREAK);
-		/*
-		 * If we're ignoring parity and break indicators,
-		 * ignore overruns too (for real raw support).
-		 */
-		if (termios->c_iflag & IGNPAR)
-			sport->port.ignore_status_mask |=
-				ISTAT_TO_SM(PNX8XXX_UART_INT_RXOVRN);
-	}
-
-	/*
-	 * ignore all characters if CREAD is not set
-	 */
-	if ((termios->c_cflag & CREAD) == 0)
-		sport->port.ignore_status_mask |=
-			ISTAT_TO_SM(PNX8XXX_UART_INT_RX);
-
-	del_timer_sync(&sport->timer);
-
-	/*
-	 * Update the per-port timeout.
-	 */
-	uart_update_timeout(port, termios->c_cflag, baud);
-
-	/*
-	 * disable interrupts and drain transmitter
-	 */
-	old_ien = serial_in(sport, PNX8XXX_IEN);
-	serial_out(sport, PNX8XXX_IEN, old_ien & ~(PNX8XXX_UART_INT_ALLTX |
-					PNX8XXX_UART_INT_ALLRX));
-
-	while (serial_in(sport, PNX8XXX_FIFO) & PNX8XXX_UART_FIFO_TXFIFO_STA)
-		barrier();
-
-	/* then, disable everything */
-	serial_out(sport, PNX8XXX_IEN, 0);
-
-	/* Reset the Rx and Tx FIFOs too */
-	lcr_fcr |= PNX8XXX_UART_LCR_TX_RST;
-	lcr_fcr |= PNX8XXX_UART_LCR_RX_RST;
-
-	/* set the parity, stop bits and data size */
-	serial_out(sport, PNX8XXX_LCR, lcr_fcr);
-
-	/* set the baud rate */
-	quot -= 1;
-	serial_out(sport, PNX8XXX_BAUD, quot);
-
-	serial_out(sport, PNX8XXX_ICLR, -1);
-
-	serial_out(sport, PNX8XXX_IEN, old_ien);
-
-	if (UART_ENABLE_MS(&sport->port, termios->c_cflag))
-		pnx8xxx_enable_ms(&sport->port);
-
-	spin_unlock_irqrestore(&sport->port.lock, flags);
-}
-
-static const char *pnx8xxx_type(struct uart_port *port)
-{
-	struct pnx8xxx_port *sport =
-		container_of(port, struct pnx8xxx_port, port);
-
-	return sport->port.type == PORT_PNX8XXX ? "PNX8XXX" : NULL;
-}
-
-/*
- * Release the memory region(s) being used by 'port'.
- */
-static void pnx8xxx_release_port(struct uart_port *port)
-{
-	struct pnx8xxx_port *sport =
-		container_of(port, struct pnx8xxx_port, port);
-
-	release_mem_region(sport->port.mapbase, UART_PORT_SIZE);
-}
-
-/*
- * Request the memory region(s) being used by 'port'.
- */
-static int pnx8xxx_request_port(struct uart_port *port)
-{
-	struct pnx8xxx_port *sport =
-		container_of(port, struct pnx8xxx_port, port);
-	return request_mem_region(sport->port.mapbase, UART_PORT_SIZE,
-			"pnx8xxx-uart") != NULL ? 0 : -EBUSY;
-}
-
-/*
- * Configure/autoconfigure the port.
- */
-static void pnx8xxx_config_port(struct uart_port *port, int flags)
-{
-	struct pnx8xxx_port *sport =
-		container_of(port, struct pnx8xxx_port, port);
-
-	if (flags & UART_CONFIG_TYPE &&
-	    pnx8xxx_request_port(&sport->port) == 0)
-		sport->port.type = PORT_PNX8XXX;
-}
-
-/*
- * Verify the new serial_struct (for TIOCSSERIAL).
- * The only change we allow are to the flags and type, and
- * even then only between PORT_PNX8XXX and PORT_UNKNOWN
- */
-static int
-pnx8xxx_verify_port(struct uart_port *port, struct serial_struct *ser)
-{
-	struct pnx8xxx_port *sport =
-		container_of(port, struct pnx8xxx_port,	port);
-	int ret = 0;
-
-	if (ser->type != PORT_UNKNOWN && ser->type != PORT_PNX8XXX)
-		ret = -EINVAL;
-	if (sport->port.irq != ser->irq)
-		ret = -EINVAL;
-	if (ser->io_type != SERIAL_IO_MEM)
-		ret = -EINVAL;
-	if (sport->port.uartclk / 16 != ser->baud_base)
-		ret = -EINVAL;
-	if ((void *)sport->port.mapbase != ser->iomem_base)
-		ret = -EINVAL;
-	if (sport->port.iobase != ser->port)
-		ret = -EINVAL;
-	if (ser->hub6 != 0)
-		ret = -EINVAL;
-	return ret;
-}
-
-static const struct uart_ops pnx8xxx_pops = {
-	.tx_empty	= pnx8xxx_tx_empty,
-	.set_mctrl	= pnx8xxx_set_mctrl,
-	.get_mctrl	= pnx8xxx_get_mctrl,
-	.stop_tx	= pnx8xxx_stop_tx,
-	.start_tx	= pnx8xxx_start_tx,
-	.stop_rx	= pnx8xxx_stop_rx,
-	.enable_ms	= pnx8xxx_enable_ms,
-	.break_ctl	= pnx8xxx_break_ctl,
-	.startup	= pnx8xxx_startup,
-	.shutdown	= pnx8xxx_shutdown,
-	.set_termios	= pnx8xxx_set_termios,
-	.type		= pnx8xxx_type,
-	.release_port	= pnx8xxx_release_port,
-	.request_port	= pnx8xxx_request_port,
-	.config_port	= pnx8xxx_config_port,
-	.verify_port	= pnx8xxx_verify_port,
-};
-
-
-/*
- * Setup the PNX8XXX serial ports.
- *
- * Note also that we support "console=ttySx" where "x" is either 0 or 1.
- */
-static void __init pnx8xxx_init_ports(void)
-{
-	static int first = 1;
-	int i;
-
-	if (!first)
-		return;
-	first = 0;
-
-	for (i = 0; i < NR_PORTS; i++) {
-		timer_setup(&pnx8xxx_ports[i].timer, pnx8xxx_timeout, 0);
-		pnx8xxx_ports[i].port.ops = &pnx8xxx_pops;
-	}
-}
-
-#ifdef CONFIG_SERIAL_PNX8XXX_CONSOLE
-
-static void pnx8xxx_console_putchar(struct uart_port *port, int ch)
-{
-	struct pnx8xxx_port *sport =
-		container_of(port, struct pnx8xxx_port, port);
-	int status;
-
-	do {
-		/* Wait for UART_TX register to empty */
-		status = serial_in(sport, PNX8XXX_FIFO);
-	} while (status & PNX8XXX_UART_FIFO_TXFIFO);
-	serial_out(sport, PNX8XXX_FIFO, ch);
-}
-
-/*
- * Interrupts are disabled on entering
- */static void
-pnx8xxx_console_write(struct console *co, const char *s, unsigned int count)
-{
-	struct pnx8xxx_port *sport = &pnx8xxx_ports[co->index];
-	unsigned int old_ien, status;
-
-	/*
-	 *	First, save IEN and then disable interrupts
-	 */
-	old_ien = serial_in(sport, PNX8XXX_IEN);
-	serial_out(sport, PNX8XXX_IEN, old_ien & ~(PNX8XXX_UART_INT_ALLTX |
-					PNX8XXX_UART_INT_ALLRX));
-
-	uart_console_write(&sport->port, s, count, pnx8xxx_console_putchar);
-
-	/*
-	 *	Finally, wait for transmitter to become empty
-	 *	and restore IEN
-	 */
-	do {
-		/* Wait for UART_TX register to empty */
-		status = serial_in(sport, PNX8XXX_FIFO);
-	} while (status & PNX8XXX_UART_FIFO_TXFIFO);
-
-	/* Clear TX and EMPTY interrupt */
-	serial_out(sport, PNX8XXX_ICLR, PNX8XXX_UART_INT_TX |
-			     PNX8XXX_UART_INT_EMPTY);
-
-	serial_out(sport, PNX8XXX_IEN, old_ien);
-}
-
-static int __init
-pnx8xxx_console_setup(struct console *co, char *options)
-{
-	struct pnx8xxx_port *sport;
-	int baud = 38400;
-	int bits = 8;
-	int parity = 'n';
-	int flow = 'n';
-
-	/*
-	 * Check whether an invalid uart number has been specified, and
-	 * if so, search for the first available port that does have
-	 * console support.
-	 */
-	if (co->index == -1 || co->index >= NR_PORTS)
-		co->index = 0;
-	sport = &pnx8xxx_ports[co->index];
-
-	if (options)
-		uart_parse_options(options, &baud, &parity, &bits, &flow);
-
-	return uart_set_options(&sport->port, co, baud, parity, bits, flow);
-}
-
-static struct uart_driver pnx8xxx_reg;
-static struct console pnx8xxx_console = {
-	.name		= "ttyS",
-	.write		= pnx8xxx_console_write,
-	.device		= uart_console_device,
-	.setup		= pnx8xxx_console_setup,
-	.flags		= CON_PRINTBUFFER,
-	.index		= -1,
-	.data		= &pnx8xxx_reg,
-};
-
-static int __init pnx8xxx_rs_console_init(void)
-{
-	pnx8xxx_init_ports();
-	register_console(&pnx8xxx_console);
-	return 0;
-}
-console_initcall(pnx8xxx_rs_console_init);
-
-#define PNX8XXX_CONSOLE	&pnx8xxx_console
-#else
-#define PNX8XXX_CONSOLE	NULL
-#endif
-
-static struct uart_driver pnx8xxx_reg = {
-	.owner			= THIS_MODULE,
-	.driver_name		= "ttyS",
-	.dev_name		= "ttyS",
-	.major			= SERIAL_PNX8XXX_MAJOR,
-	.minor			= MINOR_START,
-	.nr			= NR_PORTS,
-	.cons			= PNX8XXX_CONSOLE,
-};
-
-static int pnx8xxx_serial_suspend(struct platform_device *pdev, pm_message_t state)
-{
-	struct pnx8xxx_port *sport = platform_get_drvdata(pdev);
-
-	return uart_suspend_port(&pnx8xxx_reg, &sport->port);
-}
-
-static int pnx8xxx_serial_resume(struct platform_device *pdev)
-{
-	struct pnx8xxx_port *sport = platform_get_drvdata(pdev);
-
-	return uart_resume_port(&pnx8xxx_reg, &sport->port);
-}
-
-static int pnx8xxx_serial_probe(struct platform_device *pdev)
-{
-	struct resource *res = pdev->resource;
-	int i;
-
-	for (i = 0; i < pdev->num_resources; i++, res++) {
-		if (!(res->flags & IORESOURCE_MEM))
-			continue;
-
-		for (i = 0; i < NR_PORTS; i++) {
-			if (pnx8xxx_ports[i].port.mapbase != res->start)
-				continue;
-
-			pnx8xxx_ports[i].port.has_sysrq = IS_ENABLED(CONFIG_SERIAL_PNX8XXX_CONSOLE);
-			pnx8xxx_ports[i].port.dev = &pdev->dev;
-			uart_add_one_port(&pnx8xxx_reg, &pnx8xxx_ports[i].port);
-			platform_set_drvdata(pdev, &pnx8xxx_ports[i]);
-			break;
-		}
-	}
-
-	return 0;
-}
-
-static int pnx8xxx_serial_remove(struct platform_device *pdev)
-{
-	struct pnx8xxx_port *sport = platform_get_drvdata(pdev);
-
-	if (sport)
-		uart_remove_one_port(&pnx8xxx_reg, &sport->port);
-
-	return 0;
-}
-
-static struct platform_driver pnx8xxx_serial_driver = {
-	.driver		= {
-		.name	= "pnx8xxx-uart",
-	},
-	.probe		= pnx8xxx_serial_probe,
-	.remove		= pnx8xxx_serial_remove,
-	.suspend	= pnx8xxx_serial_suspend,
-	.resume		= pnx8xxx_serial_resume,
-};
-
-static int __init pnx8xxx_serial_init(void)
-{
-	int ret;
-
-	printk(KERN_INFO "Serial: PNX8XXX driver\n");
-
-	pnx8xxx_init_ports();
-
-	ret = uart_register_driver(&pnx8xxx_reg);
-	if (ret == 0) {
-		ret = platform_driver_register(&pnx8xxx_serial_driver);
-		if (ret)
-			uart_unregister_driver(&pnx8xxx_reg);
-	}
-	return ret;
-}
-
-static void __exit pnx8xxx_serial_exit(void)
-{
-	platform_driver_unregister(&pnx8xxx_serial_driver);
-	uart_unregister_driver(&pnx8xxx_reg);
-}
-
-module_init(pnx8xxx_serial_init);
-module_exit(pnx8xxx_serial_exit);
-
-MODULE_AUTHOR("Embedded Alley Solutions, Inc.");
-MODULE_DESCRIPTION("PNX8XXX SoCs serial port driver");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_CHARDEV_MAJOR(SERIAL_PNX8XXX_MAJOR);
-MODULE_ALIAS("platform:pnx8xxx-uart");
diff --git a/include/linux/serial_pnx8xxx.h b/include/linux/serial_pnx8xxx.h
deleted file mode 100644
index 619d748dcd44..000000000000
--- a/include/linux/serial_pnx8xxx.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Embedded Alley Solutions, source@embeddedalley.com.
- */
-
-#ifndef _LINUX_SERIAL_PNX8XXX_H
-#define _LINUX_SERIAL_PNX8XXX_H
-
-#include <linux/serial_core.h>
-
-#define PNX8XXX_NR_PORTS	2
-
-struct pnx8xxx_port {
-	struct uart_port	port;
-	struct timer_list	timer;
-	unsigned int		old_status;
-};
-
-/* register offsets */
-#define PNX8XXX_LCR		0
-#define PNX8XXX_MCR		0x004
-#define PNX8XXX_BAUD		0x008
-#define PNX8XXX_CFG		0x00c
-#define PNX8XXX_FIFO		0x028
-#define PNX8XXX_ISTAT		0xfe0
-#define PNX8XXX_IEN		0xfe4
-#define PNX8XXX_ICLR		0xfe8
-#define PNX8XXX_ISET		0xfec
-#define PNX8XXX_PD		0xff4
-#define PNX8XXX_MID		0xffc
-
-#define PNX8XXX_UART_LCR_TXBREAK	(1<<30)
-#define PNX8XXX_UART_LCR_PAREVN		0x10000000
-#define PNX8XXX_UART_LCR_PAREN		0x08000000
-#define PNX8XXX_UART_LCR_2STOPB		0x04000000
-#define PNX8XXX_UART_LCR_8BIT		0x01000000
-#define PNX8XXX_UART_LCR_TX_RST		0x00040000
-#define PNX8XXX_UART_LCR_RX_RST		0x00020000
-#define PNX8XXX_UART_LCR_RX_NEXT	0x00010000
-
-#define PNX8XXX_UART_MCR_SCR		0xFF000000
-#define PNX8XXX_UART_MCR_DCD		0x00800000
-#define PNX8XXX_UART_MCR_CTS		0x00100000
-#define PNX8XXX_UART_MCR_LOOP		0x00000010
-#define PNX8XXX_UART_MCR_RTS		0x00000002
-#define PNX8XXX_UART_MCR_DTR		0x00000001
-
-#define PNX8XXX_UART_INT_TX		0x00000080
-#define PNX8XXX_UART_INT_EMPTY		0x00000040
-#define PNX8XXX_UART_INT_RCVTO		0x00000020
-#define PNX8XXX_UART_INT_RX		0x00000010
-#define PNX8XXX_UART_INT_RXOVRN		0x00000008
-#define PNX8XXX_UART_INT_FRERR		0x00000004
-#define PNX8XXX_UART_INT_BREAK		0x00000002
-#define PNX8XXX_UART_INT_PARITY		0x00000001
-#define PNX8XXX_UART_INT_ALLRX		0x0000003F
-#define PNX8XXX_UART_INT_ALLTX		0x000000C0
-
-#define PNX8XXX_UART_FIFO_TXFIFO	0x001F0000
-#define PNX8XXX_UART_FIFO_TXFIFO_STA	(0x1f<<16)
-#define PNX8XXX_UART_FIFO_RXBRK		0x00008000
-#define PNX8XXX_UART_FIFO_RXFE		0x00004000
-#define PNX8XXX_UART_FIFO_RXPAR		0x00002000
-#define PNX8XXX_UART_FIFO_RXFIFO	0x00001F00
-#define PNX8XXX_UART_FIFO_RBRTHR	0x000000FF
-
-#endif
diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h
index 851b982f8c4b..62c22045fe65 100644
--- a/include/uapi/linux/serial_core.h
+++ b/include/uapi/linux/serial_core.h
@@ -134,8 +134,6 @@
 /*Digi jsm */
 #define PORT_JSM        69
 
-#define PORT_PNX8XXX	70
-
 /* SUN4V Hypervisor Console */
 #define PORT_SUNHV	72
 
-- 
cgit v1.2.3


From 4cf1bc1f10452065a29d576fc5693fc4fab5b919 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Fri, 6 Nov 2020 10:37:40 +0000
Subject: bpf: Implement task local storage

Similar to bpf_local_storage for sockets and inodes add local storage
for task_struct.

The life-cycle of storage is managed with the life-cycle of the
task_struct.  i.e. the storage is destroyed along with the owning task
with a callback to the bpf_task_storage_free from the task_free LSM
hook.

The BPF LSM allocates an __rcu pointer to the bpf_local_storage in
the security blob which are now stackable and can co-exist with other
LSMs.

The userspace map operations can be done by using a pid fd as a key
passed to the lookup, update and delete operations.

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201106103747.2780972-3-kpsingh@chromium.org
---
 include/linux/bpf_lsm.h        |  23 +++
 include/linux/bpf_types.h      |   1 +
 include/uapi/linux/bpf.h       |  39 +++++
 kernel/bpf/Makefile            |   1 +
 kernel/bpf/bpf_lsm.c           |   4 +
 kernel/bpf/bpf_task_storage.c  | 315 +++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c           |   3 +-
 kernel/bpf/verifier.c          |  10 ++
 security/bpf/hooks.c           |   2 +
 tools/include/uapi/linux/bpf.h |  39 +++++
 10 files changed, 436 insertions(+), 1 deletion(-)
 create mode 100644 kernel/bpf/bpf_task_storage.c

(limited to 'include/uapi')

diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index aaacb6aafc87..73226181b744 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -7,6 +7,7 @@
 #ifndef _LINUX_BPF_LSM_H
 #define _LINUX_BPF_LSM_H
 
+#include <linux/sched.h>
 #include <linux/bpf.h>
 #include <linux/lsm_hooks.h>
 
@@ -35,9 +36,21 @@ static inline struct bpf_storage_blob *bpf_inode(
 	return inode->i_security + bpf_lsm_blob_sizes.lbs_inode;
 }
 
+static inline struct bpf_storage_blob *bpf_task(
+	const struct task_struct *task)
+{
+	if (unlikely(!task->security))
+		return NULL;
+
+	return task->security + bpf_lsm_blob_sizes.lbs_task;
+}
+
 extern const struct bpf_func_proto bpf_inode_storage_get_proto;
 extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
+extern const struct bpf_func_proto bpf_task_storage_get_proto;
+extern const struct bpf_func_proto bpf_task_storage_delete_proto;
 void bpf_inode_storage_free(struct inode *inode);
+void bpf_task_storage_free(struct task_struct *task);
 
 #else /* !CONFIG_BPF_LSM */
 
@@ -53,10 +66,20 @@ static inline struct bpf_storage_blob *bpf_inode(
 	return NULL;
 }
 
+static inline struct bpf_storage_blob *bpf_task(
+	const struct task_struct *task)
+{
+	return NULL;
+}
+
 static inline void bpf_inode_storage_free(struct inode *inode)
 {
 }
 
+static inline void bpf_task_storage_free(struct task_struct *task)
+{
+}
+
 #endif /* CONFIG_BPF_LSM */
 
 #endif /* _LINUX_BPF_LSM_H */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 2e6f568377f1..99f7fd657d87 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -109,6 +109,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
 #endif
 #ifdef CONFIG_BPF_LSM
 BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
 #if defined(CONFIG_XDP_SOCKETS)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e6ceac3f7d62..f4037b2161a6 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -157,6 +157,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_STRUCT_OPS,
 	BPF_MAP_TYPE_RINGBUF,
 	BPF_MAP_TYPE_INODE_STORAGE,
+	BPF_MAP_TYPE_TASK_STORAGE,
 };
 
 /* Note that tracing related programs such as
@@ -3742,6 +3743,42 @@ union bpf_attr {
  * 	Return
  * 		The helper returns **TC_ACT_REDIRECT** on success or
  * 		**TC_ACT_SHOT** on error.
+ *
+ * void *bpf_task_storage_get(struct bpf_map *map, struct task_struct *task, void *value, u64 flags)
+ *	Description
+ *		Get a bpf_local_storage from the *task*.
+ *
+ *		Logically, it could be thought of as getting the value from
+ *		a *map* with *task* as the **key**.  From this
+ *		perspective,  the usage is not much different from
+ *		**bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this
+ *		helper enforces the key must be an task_struct and the map must also
+ *		be a **BPF_MAP_TYPE_TASK_STORAGE**.
+ *
+ *		Underneath, the value is stored locally at *task* instead of
+ *		the *map*.  The *map* is used as the bpf-local-storage
+ *		"type". The bpf-local-storage "type" (i.e. the *map*) is
+ *		searched against all bpf_local_storage residing at *task*.
+ *
+ *		An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
+ *		used such that a new bpf_local_storage will be
+ *		created if one does not exist.  *value* can be used
+ *		together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
+ *		the initial value of a bpf_local_storage.  If *value* is
+ *		**NULL**, the new bpf_local_storage will be zero initialized.
+ *	Return
+ *		A bpf_local_storage pointer is returned on success.
+ *
+ *		**NULL** if not found or there was an error in adding
+ *		a new bpf_local_storage.
+ *
+ * long bpf_task_storage_delete(struct bpf_map *map, struct task_struct *task)
+ *	Description
+ *		Delete a bpf_local_storage from a *task*.
+ *	Return
+ *		0 on success.
+ *
+ *		**-ENOENT** if the bpf_local_storage cannot be found.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3900,6 +3937,8 @@ union bpf_attr {
 	FN(bpf_per_cpu_ptr),            \
 	FN(bpf_this_cpu_ptr),		\
 	FN(redirect_peer),		\
+	FN(task_storage_get),		\
+	FN(task_storage_delete),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index bdc8cd1b6767..f0b93ced5a7f 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_i
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
 obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
 obj-${CONFIG_BPF_LSM}	  += bpf_inode_storage.o
+obj-${CONFIG_BPF_LSM}	  += bpf_task_storage.o
 obj-$(CONFIG_BPF_SYSCALL) += disasm.o
 obj-$(CONFIG_BPF_JIT) += trampoline.o
 obj-$(CONFIG_BPF_SYSCALL) += btf.o
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index cd8a617f2109..e92c51bebb47 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -63,6 +63,10 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_spin_lock_proto;
 	case BPF_FUNC_spin_unlock:
 		return &bpf_spin_unlock_proto;
+	case BPF_FUNC_task_storage_get:
+		return &bpf_task_storage_get_proto;
+	case BPF_FUNC_task_storage_delete:
+		return &bpf_task_storage_delete_proto;
 	default:
 		return tracing_prog_func_proto(func_id, prog);
 	}
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
new file mode 100644
index 000000000000..39a45fba4fb0
--- /dev/null
+++ b/kernel/bpf/bpf_task_storage.c
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2020 Facebook
+ * Copyright 2020 Google LLC.
+ */
+
+#include <linux/pid.h>
+#include <linux/sched.h>
+#include <linux/rculist.h>
+#include <linux/list.h>
+#include <linux/hash.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/bpf.h>
+#include <linux/bpf_local_storage.h>
+#include <linux/filter.h>
+#include <uapi/linux/btf.h>
+#include <linux/bpf_lsm.h>
+#include <linux/btf_ids.h>
+#include <linux/fdtable.h>
+
+DEFINE_BPF_STORAGE_CACHE(task_cache);
+
+static struct bpf_local_storage __rcu **task_storage_ptr(void *owner)
+{
+	struct task_struct *task = owner;
+	struct bpf_storage_blob *bsb;
+
+	bsb = bpf_task(task);
+	if (!bsb)
+		return NULL;
+	return &bsb->storage;
+}
+
+static struct bpf_local_storage_data *
+task_storage_lookup(struct task_struct *task, struct bpf_map *map,
+		    bool cacheit_lockit)
+{
+	struct bpf_local_storage *task_storage;
+	struct bpf_local_storage_map *smap;
+	struct bpf_storage_blob *bsb;
+
+	bsb = bpf_task(task);
+	if (!bsb)
+		return NULL;
+
+	task_storage = rcu_dereference(bsb->storage);
+	if (!task_storage)
+		return NULL;
+
+	smap = (struct bpf_local_storage_map *)map;
+	return bpf_local_storage_lookup(task_storage, smap, cacheit_lockit);
+}
+
+void bpf_task_storage_free(struct task_struct *task)
+{
+	struct bpf_local_storage_elem *selem;
+	struct bpf_local_storage *local_storage;
+	bool free_task_storage = false;
+	struct bpf_storage_blob *bsb;
+	struct hlist_node *n;
+
+	bsb = bpf_task(task);
+	if (!bsb)
+		return;
+
+	rcu_read_lock();
+
+	local_storage = rcu_dereference(bsb->storage);
+	if (!local_storage) {
+		rcu_read_unlock();
+		return;
+	}
+
+	/* Neither the bpf_prog nor the bpf-map's syscall
+	 * could be modifying the local_storage->list now.
+	 * Thus, no elem can be added-to or deleted-from the
+	 * local_storage->list by the bpf_prog or by the bpf-map's syscall.
+	 *
+	 * It is racing with bpf_local_storage_map_free() alone
+	 * when unlinking elem from the local_storage->list and
+	 * the map's bucket->list.
+	 */
+	raw_spin_lock_bh(&local_storage->lock);
+	hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
+		/* Always unlink from map before unlinking from
+		 * local_storage.
+		 */
+		bpf_selem_unlink_map(selem);
+		free_task_storage = bpf_selem_unlink_storage_nolock(
+			local_storage, selem, false);
+	}
+	raw_spin_unlock_bh(&local_storage->lock);
+	rcu_read_unlock();
+
+	/* free_task_storage should always be true as long as
+	 * local_storage->list was non-empty.
+	 */
+	if (free_task_storage)
+		kfree_rcu(local_storage, rcu);
+}
+
+static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_local_storage_data *sdata;
+	struct task_struct *task;
+	unsigned int f_flags;
+	struct pid *pid;
+	int fd, err;
+
+	fd = *(int *)key;
+	pid = pidfd_get_pid(fd, &f_flags);
+	if (IS_ERR(pid))
+		return ERR_CAST(pid);
+
+	/* We should be in an RCU read side critical section, it should be safe
+	 * to call pid_task.
+	 */
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	task = pid_task(pid, PIDTYPE_PID);
+	if (!task) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	sdata = task_storage_lookup(task, map, true);
+	put_pid(pid);
+	return sdata ? sdata->data : NULL;
+out:
+	put_pid(pid);
+	return ERR_PTR(err);
+}
+
+static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
+					    void *value, u64 map_flags)
+{
+	struct bpf_local_storage_data *sdata;
+	struct task_struct *task;
+	unsigned int f_flags;
+	struct pid *pid;
+	int fd, err;
+
+	fd = *(int *)key;
+	pid = pidfd_get_pid(fd, &f_flags);
+	if (IS_ERR(pid))
+		return PTR_ERR(pid);
+
+	/* We should be in an RCU read side critical section, it should be safe
+	 * to call pid_task.
+	 */
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	task = pid_task(pid, PIDTYPE_PID);
+	if (!task) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	sdata = bpf_local_storage_update(
+		task, (struct bpf_local_storage_map *)map, value, map_flags);
+
+	err = PTR_ERR_OR_ZERO(sdata);
+out:
+	put_pid(pid);
+	return err;
+}
+
+static int task_storage_delete(struct task_struct *task, struct bpf_map *map)
+{
+	struct bpf_local_storage_data *sdata;
+
+	sdata = task_storage_lookup(task, map, false);
+	if (!sdata)
+		return -ENOENT;
+
+	bpf_selem_unlink(SELEM(sdata));
+
+	return 0;
+}
+
+static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
+{
+	struct task_struct *task;
+	unsigned int f_flags;
+	struct pid *pid;
+	int fd, err;
+
+	fd = *(int *)key;
+	pid = pidfd_get_pid(fd, &f_flags);
+	if (IS_ERR(pid))
+		return PTR_ERR(pid);
+
+	/* We should be in an RCU read side critical section, it should be safe
+	 * to call pid_task.
+	 */
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	task = pid_task(pid, PIDTYPE_PID);
+	if (!task) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	err = task_storage_delete(task, map);
+out:
+	put_pid(pid);
+	return err;
+}
+
+BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
+	   task, void *, value, u64, flags)
+{
+	struct bpf_local_storage_data *sdata;
+
+	if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
+		return (unsigned long)NULL;
+
+	/* explicitly check that the task_storage_ptr is not
+	 * NULL as task_storage_lookup returns NULL in this case and
+	 * bpf_local_storage_update expects the owner to have a
+	 * valid storage pointer.
+	 */
+	if (!task_storage_ptr(task))
+		return (unsigned long)NULL;
+
+	sdata = task_storage_lookup(task, map, true);
+	if (sdata)
+		return (unsigned long)sdata->data;
+
+	/* This helper must only be called from places where the lifetime of the task
+	 * is guaranteed. Either by being refcounted or by being protected
+	 * by an RCU read-side critical section.
+	 */
+	if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
+		sdata = bpf_local_storage_update(
+			task, (struct bpf_local_storage_map *)map, value,
+			BPF_NOEXIST);
+		return IS_ERR(sdata) ? (unsigned long)NULL :
+					     (unsigned long)sdata->data;
+	}
+
+	return (unsigned long)NULL;
+}
+
+BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
+	   task)
+{
+	/* This helper must only be called from places where the lifetime of the task
+	 * is guaranteed. Either by being refcounted or by being protected
+	 * by an RCU read-side critical section.
+	 */
+	return task_storage_delete(task, map);
+}
+
+static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+	return -ENOTSUPP;
+}
+
+static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr)
+{
+	struct bpf_local_storage_map *smap;
+
+	smap = bpf_local_storage_map_alloc(attr);
+	if (IS_ERR(smap))
+		return ERR_CAST(smap);
+
+	smap->cache_idx = bpf_local_storage_cache_idx_get(&task_cache);
+	return &smap->map;
+}
+
+static void task_storage_map_free(struct bpf_map *map)
+{
+	struct bpf_local_storage_map *smap;
+
+	smap = (struct bpf_local_storage_map *)map;
+	bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx);
+	bpf_local_storage_map_free(smap);
+}
+
+static int task_storage_map_btf_id;
+const struct bpf_map_ops task_storage_map_ops = {
+	.map_meta_equal = bpf_map_meta_equal,
+	.map_alloc_check = bpf_local_storage_map_alloc_check,
+	.map_alloc = task_storage_map_alloc,
+	.map_free = task_storage_map_free,
+	.map_get_next_key = notsupp_get_next_key,
+	.map_lookup_elem = bpf_pid_task_storage_lookup_elem,
+	.map_update_elem = bpf_pid_task_storage_update_elem,
+	.map_delete_elem = bpf_pid_task_storage_delete_elem,
+	.map_check_btf = bpf_local_storage_map_check_btf,
+	.map_btf_name = "bpf_local_storage_map",
+	.map_btf_id = &task_storage_map_btf_id,
+	.map_owner_storage_ptr = task_storage_ptr,
+};
+
+BTF_ID_LIST_SINGLE(bpf_task_storage_btf_ids, struct, task_struct)
+
+const struct bpf_func_proto bpf_task_storage_get_proto = {
+	.func = bpf_task_storage_get,
+	.gpl_only = false,
+	.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
+	.arg1_type = ARG_CONST_MAP_PTR,
+	.arg2_type = ARG_PTR_TO_BTF_ID,
+	.arg2_btf_id = &bpf_task_storage_btf_ids[0],
+	.arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
+	.arg4_type = ARG_ANYTHING,
+};
+
+const struct bpf_func_proto bpf_task_storage_delete_proto = {
+	.func = bpf_task_storage_delete,
+	.gpl_only = false,
+	.ret_type = RET_INTEGER,
+	.arg1_type = ARG_CONST_MAP_PTR,
+	.arg2_type = ARG_PTR_TO_BTF_ID,
+	.arg2_btf_id = &bpf_task_storage_btf_ids[0],
+};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 8f50c9c19f1b..f3fe9f53f93c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -773,7 +773,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 		    map->map_type != BPF_MAP_TYPE_ARRAY &&
 		    map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
 		    map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
-		    map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
+		    map->map_type != BPF_MAP_TYPE_INODE_STORAGE &&
+		    map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
 			return -ENOTSUPP;
 		if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
 		    map->value_size) {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f863aa84d0a2..00960f6a83ec 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4469,6 +4469,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		    func_id != BPF_FUNC_inode_storage_delete)
 			goto error;
 		break;
+	case BPF_MAP_TYPE_TASK_STORAGE:
+		if (func_id != BPF_FUNC_task_storage_get &&
+		    func_id != BPF_FUNC_task_storage_delete)
+			goto error;
+		break;
 	default:
 		break;
 	}
@@ -4547,6 +4552,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
 			goto error;
 		break;
+	case BPF_FUNC_task_storage_get:
+	case BPF_FUNC_task_storage_delete:
+		if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
+			goto error;
+		break;
 	default:
 		break;
 	}
diff --git a/security/bpf/hooks.c b/security/bpf/hooks.c
index 788667d582ae..e5971fa74fd7 100644
--- a/security/bpf/hooks.c
+++ b/security/bpf/hooks.c
@@ -12,6 +12,7 @@ static struct security_hook_list bpf_lsm_hooks[] __lsm_ro_after_init = {
 	#include <linux/lsm_hook_defs.h>
 	#undef LSM_HOOK
 	LSM_HOOK_INIT(inode_free_security, bpf_inode_storage_free),
+	LSM_HOOK_INIT(task_free, bpf_task_storage_free),
 };
 
 static int __init bpf_lsm_init(void)
@@ -23,6 +24,7 @@ static int __init bpf_lsm_init(void)
 
 struct lsm_blob_sizes bpf_lsm_blob_sizes __lsm_ro_after_init = {
 	.lbs_inode = sizeof(struct bpf_storage_blob),
+	.lbs_task = sizeof(struct bpf_storage_blob),
 };
 
 DEFINE_LSM(bpf) = {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e6ceac3f7d62..f4037b2161a6 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -157,6 +157,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_STRUCT_OPS,
 	BPF_MAP_TYPE_RINGBUF,
 	BPF_MAP_TYPE_INODE_STORAGE,
+	BPF_MAP_TYPE_TASK_STORAGE,
 };
 
 /* Note that tracing related programs such as
@@ -3742,6 +3743,42 @@ union bpf_attr {
  * 	Return
  * 		The helper returns **TC_ACT_REDIRECT** on success or
  * 		**TC_ACT_SHOT** on error.
+ *
+ * void *bpf_task_storage_get(struct bpf_map *map, struct task_struct *task, void *value, u64 flags)
+ *	Description
+ *		Get a bpf_local_storage from the *task*.
+ *
+ *		Logically, it could be thought of as getting the value from
+ *		a *map* with *task* as the **key**.  From this
+ *		perspective,  the usage is not much different from
+ *		**bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this
+ *		helper enforces the key must be an task_struct and the map must also
+ *		be a **BPF_MAP_TYPE_TASK_STORAGE**.
+ *
+ *		Underneath, the value is stored locally at *task* instead of
+ *		the *map*.  The *map* is used as the bpf-local-storage
+ *		"type". The bpf-local-storage "type" (i.e. the *map*) is
+ *		searched against all bpf_local_storage residing at *task*.
+ *
+ *		An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
+ *		used such that a new bpf_local_storage will be
+ *		created if one does not exist.  *value* can be used
+ *		together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
+ *		the initial value of a bpf_local_storage.  If *value* is
+ *		**NULL**, the new bpf_local_storage will be zero initialized.
+ *	Return
+ *		A bpf_local_storage pointer is returned on success.
+ *
+ *		**NULL** if not found or there was an error in adding
+ *		a new bpf_local_storage.
+ *
+ * long bpf_task_storage_delete(struct bpf_map *map, struct task_struct *task)
+ *	Description
+ *		Delete a bpf_local_storage from a *task*.
+ *	Return
+ *		0 on success.
+ *
+ *		**-ENOENT** if the bpf_local_storage cannot be found.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3900,6 +3937,8 @@ union bpf_attr {
 	FN(bpf_per_cpu_ptr),            \
 	FN(bpf_this_cpu_ptr),		\
 	FN(redirect_peer),		\
+	FN(task_storage_get),		\
+	FN(task_storage_delete),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 3ca1032ab7ab010eccb107aa515598788f7d93bb Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Fri, 6 Nov 2020 10:37:43 +0000
Subject: bpf: Implement get_current_task_btf and RET_PTR_TO_BTF_ID

The currently available bpf_get_current_task returns an unsigned integer
which can be used along with BPF_CORE_READ to read data from
the task_struct but still cannot be used as an input argument to a
helper that accepts an ARG_PTR_TO_BTF_ID of type task_struct.

In order to implement this helper a new return type, RET_PTR_TO_BTF_ID,
is added. This is similar to RET_PTR_TO_BTF_ID_OR_NULL but does not
require checking the nullness of returned pointer.

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201106103747.2780972-6-kpsingh@chromium.org
---
 include/linux/bpf.h            |  1 +
 include/uapi/linux/bpf.h       |  9 +++++++++
 kernel/bpf/verifier.c          |  7 +++++--
 kernel/trace/bpf_trace.c       | 16 ++++++++++++++++
 tools/include/uapi/linux/bpf.h |  9 +++++++++
 5 files changed, 40 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2fffd30e13ac..73d5381a5d5c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -310,6 +310,7 @@ enum bpf_return_type {
 	RET_PTR_TO_BTF_ID_OR_NULL,	/* returns a pointer to a btf_id or NULL */
 	RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */
 	RET_PTR_TO_MEM_OR_BTF_ID,	/* returns a pointer to a valid memory or a btf_id */
+	RET_PTR_TO_BTF_ID,		/* returns a pointer to a btf_id */
 };
 
 /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f4037b2161a6..9879d6793e90 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3779,6 +3779,14 @@ union bpf_attr {
  *		0 on success.
  *
  *		**-ENOENT** if the bpf_local_storage cannot be found.
+ *
+ * struct task_struct *bpf_get_current_task_btf(void)
+ *	Description
+ *		Return a BTF pointer to the "current" task.
+ *		This pointer can also be used in helpers that accept an
+ *		*ARG_PTR_TO_BTF_ID* of type *task_struct*.
+ *	Return
+ *		Pointer to the current task.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3939,6 +3947,7 @@ union bpf_attr {
 	FN(redirect_peer),		\
 	FN(task_storage_get),		\
 	FN(task_storage_delete),	\
+	FN(get_current_task_btf),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 00960f6a83ec..10da26e55130 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5186,11 +5186,14 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 				PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL;
 			regs[BPF_REG_0].btf_id = meta.ret_btf_id;
 		}
-	} else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
+	} else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL ||
+		   fn->ret_type == RET_PTR_TO_BTF_ID) {
 		int ret_btf_id;
 
 		mark_reg_known_zero(env, regs, BPF_REG_0);
-		regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL;
+		regs[BPF_REG_0].type = fn->ret_type == RET_PTR_TO_BTF_ID ?
+						     PTR_TO_BTF_ID :
+						     PTR_TO_BTF_ID_OR_NULL;
 		ret_btf_id = *fn->ret_btf_id;
 		if (ret_btf_id == 0) {
 			verbose(env, "invalid return type %d of func %s#%d\n",
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 4517c8b66518..e4515b0f62a8 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1022,6 +1022,20 @@ const struct bpf_func_proto bpf_get_current_task_proto = {
 	.ret_type	= RET_INTEGER,
 };
 
+BPF_CALL_0(bpf_get_current_task_btf)
+{
+	return (unsigned long) current;
+}
+
+BTF_ID_LIST_SINGLE(bpf_get_current_btf_ids, struct, task_struct)
+
+static const struct bpf_func_proto bpf_get_current_task_btf_proto = {
+	.func		= bpf_get_current_task_btf,
+	.gpl_only	= true,
+	.ret_type	= RET_PTR_TO_BTF_ID,
+	.ret_btf_id	= &bpf_get_current_btf_ids[0],
+};
+
 BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
 {
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
@@ -1265,6 +1279,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_get_current_pid_tgid_proto;
 	case BPF_FUNC_get_current_task:
 		return &bpf_get_current_task_proto;
+	case BPF_FUNC_get_current_task_btf:
+		return &bpf_get_current_task_btf_proto;
 	case BPF_FUNC_get_current_uid_gid:
 		return &bpf_get_current_uid_gid_proto;
 	case BPF_FUNC_get_current_comm:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f4037b2161a6..9879d6793e90 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3779,6 +3779,14 @@ union bpf_attr {
  *		0 on success.
  *
  *		**-ENOENT** if the bpf_local_storage cannot be found.
+ *
+ * struct task_struct *bpf_get_current_task_btf(void)
+ *	Description
+ *		Return a BTF pointer to the "current" task.
+ *		This pointer can also be used in helpers that accept an
+ *		*ARG_PTR_TO_BTF_ID* of type *task_struct*.
+ *	Return
+ *		Pointer to the current task.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3939,6 +3947,7 @@ union bpf_attr {
 	FN(redirect_peer),		\
 	FN(task_storage_get),		\
 	FN(task_storage_delete),	\
+	FN(get_current_task_btf),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 968a83f8cf6fd5a107289c57ee3197a52c72f02c Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 4 Nov 2020 15:30:27 +0200
Subject: rtnetlink: Add RTNH_F_TRAP flag

The flag indicates to user space that the nexthop is not programmed to
forward packets in hardware, but rather to trap them to the CPU. This is
needed, for example, when the MAC of the nexthop neighbour is not
resolved and packets should reach the CPU to trigger neighbour
resolution.

The flag will be used in subsequent patches by netdevsim to test nexthop
objects programming to device drivers and in the future by mlxsw as
well.

Changes since RFC:
* Reword commit message

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/rtnetlink.h | 6 ++++--
 net/ipv4/fib_semantics.c       | 2 ++
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index d1325ffb0060..2ffbef5da6c1 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -396,11 +396,13 @@ struct rtnexthop {
 #define RTNH_F_DEAD		1	/* Nexthop is dead (used by multipath)	*/
 #define RTNH_F_PERVASIVE	2	/* Do recursive gateway lookup	*/
 #define RTNH_F_ONLINK		4	/* Gateway is forced on link	*/
-#define RTNH_F_OFFLOAD		8	/* offloaded route */
+#define RTNH_F_OFFLOAD		8	/* Nexthop is offloaded */
 #define RTNH_F_LINKDOWN		16	/* carrier-down on nexthop */
 #define RTNH_F_UNRESOLVED	32	/* The entry is unresolved (ipmr) */
+#define RTNH_F_TRAP		64	/* Nexthop is trapping packets */
 
-#define RTNH_COMPARE_MASK	(RTNH_F_DEAD | RTNH_F_LINKDOWN | RTNH_F_OFFLOAD)
+#define RTNH_COMPARE_MASK	(RTNH_F_DEAD | RTNH_F_LINKDOWN | \
+				 RTNH_F_OFFLOAD | RTNH_F_TRAP)
 
 /* Macros to handle hexthops */
 
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 1f75dc686b6b..f70b9a0c4957 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1644,6 +1644,8 @@ int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc,
 	*flags |= (nhc->nhc_flags & RTNH_F_ONLINK);
 	if (nhc->nhc_flags & RTNH_F_OFFLOAD)
 		*flags |= RTNH_F_OFFLOAD;
+	if (nhc->nhc_flags & RTNH_F_TRAP)
+		*flags |= RTNH_F_TRAP;
 
 	if (!skip_oif && nhc->nhc_dev &&
 	    nla_put_u32(skb, RTA_OIF, nhc->nhc_dev->ifindex))
-- 
cgit v1.2.3


From a3ce2b109a59ee9670706ae8126dcc04cfe261cd Mon Sep 17 00:00:00 2001
From: Menglong Dong <dong.menglong@zte.com.cn>
Date: Thu, 5 Nov 2020 20:49:14 -0500
Subject: net: udp: introduce UDP_MIB_MEMERRORS for udp_mem

When udp_memory_allocated is at the limit, __udp_enqueue_schedule_skb
will return a -ENOBUFS, and skb will be dropped in __udp_queue_rcv_skb
without any counters being done. It's hard to find out what happened
once this happen.

So we introduce a UDP_MIB_MEMERRORS to do this job. Well, this change
looks friendly to the existing users, such as netstat:

$ netstat -u -s
Udp:
    0 packets received
    639 packets to unknown port received.
    158689 packet receive errors
    180022 packets sent
    RcvbufErrors: 20930
    MemErrors: 137759
UdpLite:
IpExt:
    InOctets: 257426235
    OutOctets: 257460598
    InNoECTPkts: 181177

v2:
- Fix some alignment problems

Signed-off-by: Menglong Dong <dong.menglong@zte.com.cn>
Link: https://lore.kernel.org/r/1604627354-43207-1-git-send-email-dong.menglong@zte.com.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/snmp.h | 1 +
 net/ipv4/proc.c           | 1 +
 net/ipv4/udp.c            | 3 +++
 net/ipv6/proc.c           | 2 ++
 net/ipv6/udp.c            | 3 +++
 5 files changed, 10 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index f84e7bcad6de..26fc60ce9298 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -159,6 +159,7 @@ enum
 	UDP_MIB_SNDBUFERRORS,			/* SndbufErrors */
 	UDP_MIB_CSUMERRORS,			/* InCsumErrors */
 	UDP_MIB_IGNOREDMULTI,			/* IgnoredMulti */
+	UDP_MIB_MEMERRORS,			/* MemErrors */
 	__UDP_MIB_MAX
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 8d5e1695b9aa..63cd370ea29d 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -167,6 +167,7 @@ static const struct snmp_mib snmp4_udp_list[] = {
 	SNMP_MIB_ITEM("SndbufErrors", UDP_MIB_SNDBUFERRORS),
 	SNMP_MIB_ITEM("InCsumErrors", UDP_MIB_CSUMERRORS),
 	SNMP_MIB_ITEM("IgnoredMulti", UDP_MIB_IGNOREDMULTI),
+	SNMP_MIB_ITEM("MemErrors", UDP_MIB_MEMERRORS),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ca04a8a35e52..1e2e73accd11 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2038,6 +2038,9 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		if (rc == -ENOMEM)
 			UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS,
 					is_udplite);
+		else
+			UDP_INC_STATS(sock_net(sk), UDP_MIB_MEMERRORS,
+				      is_udplite);
 		UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
 		kfree_skb(skb);
 		trace_udp_fail_queue_rcv_skb(rc, sk);
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index bbff3e02e302..d6306aa46bb1 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -126,6 +126,7 @@ static const struct snmp_mib snmp6_udp6_list[] = {
 	SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS),
 	SNMP_MIB_ITEM("Udp6InCsumErrors", UDP_MIB_CSUMERRORS),
 	SNMP_MIB_ITEM("Udp6IgnoredMulti", UDP_MIB_IGNOREDMULTI),
+	SNMP_MIB_ITEM("Udp6MemErrors", UDP_MIB_MEMERRORS),
 	SNMP_MIB_SENTINEL
 };
 
@@ -137,6 +138,7 @@ static const struct snmp_mib snmp6_udplite6_list[] = {
 	SNMP_MIB_ITEM("UdpLite6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
 	SNMP_MIB_ITEM("UdpLite6SndbufErrors", UDP_MIB_SNDBUFERRORS),
 	SNMP_MIB_ITEM("UdpLite6InCsumErrors", UDP_MIB_CSUMERRORS),
+	SNMP_MIB_ITEM("UdpLite6MemErrors", UDP_MIB_MEMERRORS),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index cde9b8874d4b..559611bef0e6 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -637,6 +637,9 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		if (rc == -ENOMEM)
 			UDP6_INC_STATS(sock_net(sk),
 					 UDP_MIB_RCVBUFERRORS, is_udplite);
+		else
+			UDP6_INC_STATS(sock_net(sk),
+				       UDP_MIB_MEMERRORS, is_udplite);
 		UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
 		kfree_skb(skb);
 		return -1;
-- 
cgit v1.2.3


From 9c661b0b85444e426d3f23250305eeb16f6ffe88 Mon Sep 17 00:00:00 2001
From: Tanner Love <tannerlove@google.com>
Date: Fri, 6 Nov 2020 13:07:40 -0500
Subject: net/packet: make packet_fanout.arr size configurable up to 64K

One use case of PACKET_FANOUT is lockless reception with one socket
per CPU. 256 is a practical limit on increasingly many machines.

Increase PACKET_FANOUT_MAX to 64K. Expand setsockopt PACKET_FANOUT to
take an extra argument max_num_members. Also explicitly define a
fanout_args struct, instead of implicitly casting to an integer. This
documents the API and simplifies the control flow.

If max_num_members is not specified or is set to 0, then 256 is used,
same as before.

Signed-off-by: Tanner Love <tannerlove@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/if_packet.h | 12 ++++++++++++
 net/packet/af_packet.c         | 37 +++++++++++++++++++++++++------------
 net/packet/internal.h          |  5 +++--
 3 files changed, 40 insertions(+), 14 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h
index 3d884d68eb30..c07caf7b40db 100644
--- a/include/uapi/linux/if_packet.h
+++ b/include/uapi/linux/if_packet.h
@@ -2,6 +2,7 @@
 #ifndef __LINUX_IF_PACKET_H
 #define __LINUX_IF_PACKET_H
 
+#include <asm/byteorder.h>
 #include <linux/types.h>
 
 struct sockaddr_pkt {
@@ -296,6 +297,17 @@ struct packet_mreq {
 	unsigned char	mr_address[8];
 };
 
+struct fanout_args {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	__u16		id;
+	__u16		type_flags;
+#else
+	__u16		type_flags;
+	__u16		id;
+#endif
+	__u32		max_num_members;
+};
+
 #define PACKET_MR_MULTICAST	0
 #define PACKET_MR_PROMISC	1
 #define PACKET_MR_ALLMULTI	2
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index cefbd50c1090..62ebfaa7adcb 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1636,13 +1636,15 @@ static bool fanout_find_new_id(struct sock *sk, u16 *new_id)
 	return false;
 }
 
-static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
+static int fanout_add(struct sock *sk, struct fanout_args *args)
 {
 	struct packet_rollover *rollover = NULL;
 	struct packet_sock *po = pkt_sk(sk);
+	u16 type_flags = args->type_flags;
 	struct packet_fanout *f, *match;
 	u8 type = type_flags & 0xff;
 	u8 flags = type_flags >> 8;
+	u16 id = args->id;
 	int err;
 
 	switch (type) {
@@ -1700,11 +1702,21 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 		}
 	}
 	err = -EINVAL;
-	if (match && match->flags != flags)
-		goto out;
-	if (!match) {
+	if (match) {
+		if (match->flags != flags)
+			goto out;
+		if (args->max_num_members &&
+		    args->max_num_members != match->max_num_members)
+			goto out;
+	} else {
+		if (args->max_num_members > PACKET_FANOUT_MAX)
+			goto out;
+		if (!args->max_num_members)
+			/* legacy PACKET_FANOUT_MAX */
+			args->max_num_members = 256;
 		err = -ENOMEM;
-		match = kzalloc(sizeof(*match), GFP_KERNEL);
+		match = kvzalloc(struct_size(match, arr, args->max_num_members),
+				 GFP_KERNEL);
 		if (!match)
 			goto out;
 		write_pnet(&match->net, sock_net(sk));
@@ -1720,6 +1732,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 		match->prot_hook.func = packet_rcv_fanout;
 		match->prot_hook.af_packet_priv = match;
 		match->prot_hook.id_match = match_fanout_group;
+		match->max_num_members = args->max_num_members;
 		list_add(&match->list, &fanout_list);
 	}
 	err = -EINVAL;
@@ -1730,7 +1743,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 	    match->prot_hook.type == po->prot_hook.type &&
 	    match->prot_hook.dev == po->prot_hook.dev) {
 		err = -ENOSPC;
-		if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
+		if (refcount_read(&match->sk_ref) < match->max_num_members) {
 			__dev_remove_pack(&po->prot_hook);
 			po->fanout = match;
 			po->rollover = rollover;
@@ -1744,7 +1757,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 
 	if (err && !refcount_read(&match->sk_ref)) {
 		list_del(&match->list);
-		kfree(match);
+		kvfree(match);
 	}
 
 out:
@@ -3075,7 +3088,7 @@ static int packet_release(struct socket *sock)
 	kfree(po->rollover);
 	if (f) {
 		fanout_release_data(f);
-		kfree(f);
+		kvfree(f);
 	}
 	/*
 	 *	Now the socket is dead. No more input will appear.
@@ -3866,14 +3879,14 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
 	}
 	case PACKET_FANOUT:
 	{
-		int val;
+		struct fanout_args args = { 0 };
 
-		if (optlen != sizeof(val))
+		if (optlen != sizeof(int) && optlen != sizeof(args))
 			return -EINVAL;
-		if (copy_from_sockptr(&val, optval, sizeof(val)))
+		if (copy_from_sockptr(&args, optval, optlen))
 			return -EFAULT;
 
-		return fanout_add(sk, val & 0xffff, val >> 16);
+		return fanout_add(sk, &args);
 	}
 	case PACKET_FANOUT_DATA:
 	{
diff --git a/net/packet/internal.h b/net/packet/internal.h
index fd41ecb7f605..baafc3f3fa25 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -77,11 +77,12 @@ struct packet_ring_buffer {
 };
 
 extern struct mutex fanout_mutex;
-#define PACKET_FANOUT_MAX	256
+#define PACKET_FANOUT_MAX	(1 << 16)
 
 struct packet_fanout {
 	possible_net_t		net;
 	unsigned int		num_members;
+	u32			max_num_members;
 	u16			id;
 	u8			type;
 	u8			flags;
@@ -90,10 +91,10 @@ struct packet_fanout {
 		struct bpf_prog __rcu	*bpf_prog;
 	};
 	struct list_head	list;
-	struct sock		*arr[PACKET_FANOUT_MAX];
 	spinlock_t		lock;
 	refcount_t		sk_ref;
 	struct packet_type	prot_hook ____cacheline_aligned_in_smp;
+	struct sock		*arr[];
 };
 
 struct packet_rollover {
-- 
cgit v1.2.3


From 5329722057d41aebc31e391907a501feaa42f7d9 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 9 Nov 2020 17:19:29 -0800
Subject: bpf: Assign ID to vmlinux BTF and return extra info for BTF in
 GET_OBJ_INFO

Allocate ID for vmlinux BTF. This makes it visible when iterating over all BTF
objects in the system. To allow distinguishing vmlinux BTF (and later kernel
module BTF) from user-provided BTFs, expose extra kernel_btf flag, as well as
BTF name ("vmlinux" for vmlinux BTF, will equal to module's name for module
BTF).  We might want to later allow specifying BTF name for user-provided BTFs
as well, if that makes sense. But currently this is reserved only for
in-kernel BTFs.

Having in-kernel BTFs exposed IDs will allow to extend BPF APIs that require
in-kernel BTF type with ability to specify BTF types from kernel modules, not
just vmlinux BTF. This will be implemented in a follow up patch set for
fentry/fexit/fmod_ret/lsm/etc.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201110011932.3201430-3-andrii@kernel.org
---
 include/uapi/linux/bpf.h       |  3 +++
 kernel/bpf/btf.c               | 43 +++++++++++++++++++++++++++++++++++++++---
 tools/include/uapi/linux/bpf.h |  3 +++
 3 files changed, 46 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 9879d6793e90..162999b12790 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4466,6 +4466,9 @@ struct bpf_btf_info {
 	__aligned_u64 btf;
 	__u32 btf_size;
 	__u32 id;
+	__aligned_u64 name;
+	__u32 name_len;
+	__u32 kernel_btf;
 } __attribute__((aligned(8)));
 
 struct bpf_link_info {
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 727c1c27053f..856585db7aa7 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -214,6 +214,8 @@ struct btf {
 	struct btf *base_btf;
 	u32 start_id; /* first type ID in this BTF (0 for base BTF) */
 	u32 start_str_off; /* first string offset (0 for base BTF) */
+	char name[MODULE_NAME_LEN];
+	bool kernel_btf;
 };
 
 enum verifier_phase {
@@ -4429,6 +4431,8 @@ struct btf *btf_parse_vmlinux(void)
 
 	btf->data = __start_BTF;
 	btf->data_size = __stop_BTF - __start_BTF;
+	btf->kernel_btf = true;
+	snprintf(btf->name, sizeof(btf->name), "vmlinux");
 
 	err = btf_parse_hdr(env);
 	if (err)
@@ -4454,8 +4458,13 @@ struct btf *btf_parse_vmlinux(void)
 
 	bpf_struct_ops_init(btf, log);
 
-	btf_verifier_env_free(env);
 	refcount_set(&btf->refcnt, 1);
+
+	err = btf_alloc_id(btf);
+	if (err)
+		goto errout;
+
+	btf_verifier_env_free(env);
 	return btf;
 
 errout:
@@ -5553,7 +5562,9 @@ int btf_get_info_by_fd(const struct btf *btf,
 	struct bpf_btf_info info;
 	u32 info_copy, btf_copy;
 	void __user *ubtf;
-	u32 uinfo_len;
+	char __user *uname;
+	u32 uinfo_len, uname_len, name_len;
+	int ret = 0;
 
 	uinfo = u64_to_user_ptr(attr->info.info);
 	uinfo_len = attr->info.info_len;
@@ -5570,11 +5581,37 @@ int btf_get_info_by_fd(const struct btf *btf,
 		return -EFAULT;
 	info.btf_size = btf->data_size;
 
+	info.kernel_btf = btf->kernel_btf;
+
+	uname = u64_to_user_ptr(info.name);
+	uname_len = info.name_len;
+	if (!uname ^ !uname_len)
+		return -EINVAL;
+
+	name_len = strlen(btf->name);
+	info.name_len = name_len;
+
+	if (uname) {
+		if (uname_len >= name_len + 1) {
+			if (copy_to_user(uname, btf->name, name_len + 1))
+				return -EFAULT;
+		} else {
+			char zero = '\0';
+
+			if (copy_to_user(uname, btf->name, uname_len - 1))
+				return -EFAULT;
+			if (put_user(zero, uname + uname_len - 1))
+				return -EFAULT;
+			/* let user-space know about too short buffer */
+			ret = -ENOSPC;
+		}
+	}
+
 	if (copy_to_user(uinfo, &info, info_copy) ||
 	    put_user(info_copy, &uattr->info.info_len))
 		return -EFAULT;
 
-	return 0;
+	return ret;
 }
 
 int btf_get_fd_by_id(u32 id)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 9879d6793e90..162999b12790 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4466,6 +4466,9 @@ struct bpf_btf_info {
 	__aligned_u64 btf;
 	__u32 btf_size;
 	__u32 id;
+	__aligned_u64 name;
+	__u32 name_len;
+	__u32 kernel_btf;
 } __attribute__((aligned(8)));
 
 struct bpf_link_info {
-- 
cgit v1.2.3


From e7e0517c1004991908bc7f20b4c9a7b678277358 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 9 Nov 2020 10:57:47 +0100
Subject: cfg80211: remove WDS code

Remove all the code that was there to configure WDS interfaces,
now that there's no way to reach it anymore.

Link: https://lore.kernel.org/r/20201109105103.8f5b98e4068d.I5f5129041649ef2862b69683574bb3344743727b@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  5 -----
 include/uapi/linux/nl80211.h |  3 ++-
 net/wireless/chan.c          |  6 +++---
 net/wireless/core.c          |  8 +------
 net/wireless/nl80211.c       | 36 ++-----------------------------
 net/wireless/rdev-ops.h      | 10 ---------
 net/wireless/trace.h         |  5 -----
 net/wireless/util.c          |  5 ++---
 net/wireless/wext-compat.c   | 51 --------------------------------------------
 9 files changed, 10 insertions(+), 119 deletions(-)

(limited to 'include/uapi')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0ba8d1fa6eb9..4ff804a8bc1d 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3745,8 +3745,6 @@ struct mgmt_frame_regs {
  * @get_tx_power: store the current TX power into the dbm variable;
  *	return 0 if successful
  *
- * @set_wds_peer: set the WDS peer for a WDS interface
- *
  * @rfkill_poll: polls the hw rfkill line, use cfg80211 reporting
  *	functions to adjust rfkill hw state
  *
@@ -4067,9 +4065,6 @@ struct cfg80211_ops {
 	int	(*get_tx_power)(struct wiphy *wiphy, struct wireless_dev *wdev,
 				int *dbm);
 
-	int	(*set_wds_peer)(struct wiphy *wiphy, struct net_device *dev,
-				const u8 *addr);
-
 	void	(*rfkill_poll)(struct wiphy *wiphy);
 
 #ifdef CONFIG_NL80211_TESTMODE
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index e1e5b3d4dd81..3e0d4a038ab6 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -757,7 +757,8 @@
  *	of any other interfaces, and other interfaces will again take
  *	precedence when they are used.
  *
- * @NL80211_CMD_SET_WDS_PEER: Set the MAC address of the peer on a WDS interface.
+ * @NL80211_CMD_SET_WDS_PEER: Set the MAC address of the peer on a WDS interface
+ *	(no longer supported).
  *
  * @NL80211_CMD_SET_MULTICAST_TO_UNICAST: Configure if this AP should perform
  *	multicast to unicast conversion. When enabled, all multicast packets
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 22d1779ab2b1..e4030f1fbc60 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -530,10 +530,10 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy,
 	case NL80211_IFTYPE_P2P_CLIENT:
 	case NL80211_IFTYPE_MONITOR:
 	case NL80211_IFTYPE_AP_VLAN:
-	case NL80211_IFTYPE_WDS:
 	case NL80211_IFTYPE_P2P_DEVICE:
 	case NL80211_IFTYPE_NAN:
 		break;
+	case NL80211_IFTYPE_WDS:
 	case NL80211_IFTYPE_UNSPECIFIED:
 	case NUM_NL80211_IFTYPES:
 		WARN_ON(1);
@@ -677,12 +677,12 @@ bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev)
 	case NL80211_IFTYPE_P2P_CLIENT:
 	case NL80211_IFTYPE_MONITOR:
 	case NL80211_IFTYPE_AP_VLAN:
-	case NL80211_IFTYPE_WDS:
 	case NL80211_IFTYPE_P2P_DEVICE:
 	/* Can NAN type be considered as beaconing interface? */
 	case NL80211_IFTYPE_NAN:
 		break;
 	case NL80211_IFTYPE_UNSPECIFIED:
+	case NL80211_IFTYPE_WDS:
 	case NUM_NL80211_IFTYPES:
 		WARN_ON(1);
 	}
@@ -1324,12 +1324,12 @@ cfg80211_get_chan_state(struct wireless_dev *wdev,
 		break;
 	case NL80211_IFTYPE_MONITOR:
 	case NL80211_IFTYPE_AP_VLAN:
-	case NL80211_IFTYPE_WDS:
 	case NL80211_IFTYPE_P2P_DEVICE:
 	case NL80211_IFTYPE_NAN:
 		/* these interface types don't really have a channel */
 		return;
 	case NL80211_IFTYPE_UNSPECIFIED:
+	case NL80211_IFTYPE_WDS:
 	case NUM_NL80211_IFTYPES:
 		WARN_ON(1);
 	}
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 9f23923e8d29..f6b744e91ff4 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -631,10 +631,8 @@ static int wiphy_verify_combinations(struct wiphy *wiphy)
 				return -EINVAL;
 		}
 
-#ifndef CONFIG_WIRELESS_WDS
 		if (WARN_ON(all_iftypes & BIT(NL80211_IFTYPE_WDS)))
 			return -EINVAL;
-#endif
 
 		/* You can't even choose that many! */
 		if (WARN_ON(cnt < c->max_interfaces))
@@ -675,10 +673,8 @@ int wiphy_register(struct wiphy *wiphy)
 		     !(wiphy->nan_supported_bands & BIT(NL80211_BAND_2GHZ)))))
 		return -EINVAL;
 
-#ifndef CONFIG_WIRELESS_WDS
 	if (WARN_ON(wiphy->interface_modes & BIT(NL80211_IFTYPE_WDS)))
 		return -EINVAL;
-#endif
 
 	if (WARN_ON(wiphy->pmsr_capa && !wiphy->pmsr_capa->ftm.supported))
 		return -EINVAL;
@@ -1202,9 +1198,6 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev,
 	case NL80211_IFTYPE_OCB:
 		__cfg80211_leave_ocb(rdev, dev);
 		break;
-	case NL80211_IFTYPE_WDS:
-		/* must be handled by mac80211/driver, has no APIs */
-		break;
 	case NL80211_IFTYPE_P2P_DEVICE:
 	case NL80211_IFTYPE_NAN:
 		/* cannot happen, has no netdev */
@@ -1214,6 +1207,7 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev,
 		/* nothing to do */
 		break;
 	case NL80211_IFTYPE_UNSPECIFIED:
+	case NL80211_IFTYPE_WDS:
 	case NUM_NL80211_IFTYPES:
 		/* invalid */
 		break;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index aad37e7c7f91..b76bdc8417c4 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1885,7 +1885,6 @@ static int nl80211_add_commands_unsplit(struct cfg80211_registered_device *rdev,
 		if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL))
 			goto nla_put_failure;
 	}
-	CMD(set_wds_peer, SET_WDS_PEER);
 	if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) {
 		CMD(tdls_mgmt, TDLS_MGMT);
 		CMD(tdls_oper, TDLS_OPER);
@@ -2863,8 +2862,8 @@ static int parse_txq_params(struct nlattr *tb[],
 static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev)
 {
 	/*
-	 * You can only set the channel explicitly for WDS interfaces,
-	 * all others have their channel managed via their respective
+	 * You can only set the channel explicitly for some interfaces,
+	 * most have their channel managed via their respective
 	 * "establish a connection" command (connect, join, ...)
 	 *
 	 * For AP/GO and mesh mode, the channel can be set with the
@@ -3069,29 +3068,6 @@ static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info)
 	return __nl80211_set_channel(rdev, netdev, info);
 }
 
-static int nl80211_set_wds_peer(struct sk_buff *skb, struct genl_info *info)
-{
-	struct cfg80211_registered_device *rdev = info->user_ptr[0];
-	struct net_device *dev = info->user_ptr[1];
-	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	const u8 *bssid;
-
-	if (!info->attrs[NL80211_ATTR_MAC])
-		return -EINVAL;
-
-	if (netif_running(dev))
-		return -EBUSY;
-
-	if (!rdev->ops->set_wds_peer)
-		return -EOPNOTSUPP;
-
-	if (wdev->iftype != NL80211_IFTYPE_WDS)
-		return -EOPNOTSUPP;
-
-	bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
-	return rdev_set_wds_peer(rdev, dev, bssid);
-}
-
 static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev;
@@ -15173,14 +15149,6 @@ static const struct genl_small_ops nl80211_small_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
-	{
-		.cmd = NL80211_CMD_SET_WDS_PEER,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = nl80211_set_wds_peer,
-		.flags = GENL_UNS_ADMIN_PERM,
-		.internal_flags = NL80211_FLAG_NEED_NETDEV |
-				  NL80211_FLAG_NEED_RTNL,
-	},
 	{
 		.cmd = NL80211_CMD_JOIN_MESH,
 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 950d57494168..5e2f349c92a8 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -582,16 +582,6 @@ static inline int rdev_get_tx_power(struct cfg80211_registered_device *rdev,
 	return ret;
 }
 
-static inline int rdev_set_wds_peer(struct cfg80211_registered_device *rdev,
-				    struct net_device *dev, const u8 *addr)
-{
-	int ret;
-	trace_rdev_set_wds_peer(&rdev->wiphy, dev, addr);
-	ret = rdev->ops->set_wds_peer(&rdev->wiphy, dev, addr);
-	trace_rdev_return_int(&rdev->wiphy, ret);
-	return ret;
-}
-
 static inline int
 rdev_set_multicast_to_unicast(struct cfg80211_registered_device *rdev,
 			      struct net_device *dev,
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 6e218a0acd4e..817c6fef13be 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -838,11 +838,6 @@ DEFINE_EVENT(wiphy_netdev_mac_evt, rdev_del_mpath,
 	TP_ARGS(wiphy, netdev, mac)
 );
 
-DEFINE_EVENT(wiphy_netdev_mac_evt, rdev_set_wds_peer,
-	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const u8 *mac),
-	TP_ARGS(wiphy, netdev, mac)
-);
-
 TRACE_EVENT(rdev_dump_station,
 	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int _idx,
 		 u8 *mac),
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 5b6c80ae564a..5af88037f1fb 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -550,8 +550,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 			return -1;
 		break;
 	case cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS):
-		if (unlikely(iftype != NL80211_IFTYPE_WDS &&
-			     iftype != NL80211_IFTYPE_MESH_POINT &&
+		if (unlikely(iftype != NL80211_IFTYPE_MESH_POINT &&
 			     iftype != NL80211_IFTYPE_AP_VLAN &&
 			     iftype != NL80211_IFTYPE_STATION))
 			return -1;
@@ -1051,7 +1050,6 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 		case NL80211_IFTYPE_P2P_GO:
 		case NL80211_IFTYPE_AP:
 		case NL80211_IFTYPE_AP_VLAN:
-		case NL80211_IFTYPE_WDS:
 		case NL80211_IFTYPE_MESH_POINT:
 			/* bridging OK */
 			break;
@@ -1063,6 +1061,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 			/* not happening */
 			break;
 		case NL80211_IFTYPE_P2P_DEVICE:
+		case NL80211_IFTYPE_WDS:
 		case NL80211_IFTYPE_NAN:
 			WARN_ON(1);
 			break;
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 78f2927ead7f..b84a345b2653 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -49,9 +49,6 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info,
 	case IW_MODE_ADHOC:
 		type = NL80211_IFTYPE_ADHOC;
 		break;
-	case IW_MODE_REPEAT:
-		type = NL80211_IFTYPE_WDS;
-		break;
 	case IW_MODE_MONITOR:
 		type = NL80211_IFTYPE_MONITOR;
 		break;
@@ -1150,50 +1147,6 @@ static int cfg80211_wext_giwpower(struct net_device *dev,
 	return 0;
 }
 
-static int cfg80211_wds_wext_siwap(struct net_device *dev,
-				   struct iw_request_info *info,
-				   struct sockaddr *addr, char *extra)
-{
-	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
-	int err;
-
-	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_WDS))
-		return -EINVAL;
-
-	if (addr->sa_family != ARPHRD_ETHER)
-		return -EINVAL;
-
-	if (netif_running(dev))
-		return -EBUSY;
-
-	if (!rdev->ops->set_wds_peer)
-		return -EOPNOTSUPP;
-
-	err = rdev_set_wds_peer(rdev, dev, (u8 *)&addr->sa_data);
-	if (err)
-		return err;
-
-	memcpy(&wdev->wext.bssid, (u8 *) &addr->sa_data, ETH_ALEN);
-
-	return 0;
-}
-
-static int cfg80211_wds_wext_giwap(struct net_device *dev,
-				   struct iw_request_info *info,
-				   struct sockaddr *addr, char *extra)
-{
-	struct wireless_dev *wdev = dev->ieee80211_ptr;
-
-	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_WDS))
-		return -EINVAL;
-
-	addr->sa_family = ARPHRD_ETHER;
-	memcpy(&addr->sa_data, wdev->wext.bssid, ETH_ALEN);
-
-	return 0;
-}
-
 static int cfg80211_wext_siwrate(struct net_device *dev,
 				 struct iw_request_info *info,
 				 struct iw_param *rate, char *extra)
@@ -1371,8 +1324,6 @@ static int cfg80211_wext_siwap(struct net_device *dev,
 		return cfg80211_ibss_wext_siwap(dev, info, ap_addr, extra);
 	case NL80211_IFTYPE_STATION:
 		return cfg80211_mgd_wext_siwap(dev, info, ap_addr, extra);
-	case NL80211_IFTYPE_WDS:
-		return cfg80211_wds_wext_siwap(dev, info, ap_addr, extra);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -1389,8 +1340,6 @@ static int cfg80211_wext_giwap(struct net_device *dev,
 		return cfg80211_ibss_wext_giwap(dev, info, ap_addr, extra);
 	case NL80211_IFTYPE_STATION:
 		return cfg80211_mgd_wext_giwap(dev, info, ap_addr, extra);
-	case NL80211_IFTYPE_WDS:
-		return cfg80211_wds_wext_giwap(dev, info, ap_addr, extra);
 	default:
 		return -EOPNOTSUPP;
 	}
-- 
cgit v1.2.3


From 63f9909ff602082597849f684655e93336c50b11 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 9 Oct 2020 14:15:07 -0400
Subject: fuse: introduce the notion of FUSE_HANDLE_KILLPRIV_V2

We already have FUSE_HANDLE_KILLPRIV flag that says that file server will
remove suid/sgid/caps on truncate/chown/write. But that's little different
from what Linux VFS implements.

To be consistent with Linux VFS behavior what we want is.

- caps are always cleared on chown/write/truncate
- suid is always cleared on chown, while for truncate/write it is cleared
  only if caller does not have CAP_FSETID.
- sgid is always cleared on chown, while for truncate/write it is cleared
  only if caller does not have CAP_FSETID as well as file has group execute
  permission.

As previous flag did not provide above semantics. Implement a V2 of the
protocol with above said constraints.

Server does not know if caller has CAP_FSETID or not. So for the case
of write()/truncate(), client will send information in special flag to
indicate whether to kill priviliges or not. These changes are in subsequent
patches.

FUSE_HANDLE_KILLPRIV_V2 relies on WRITE being sent to server to clear
suid/sgid/security.capability. But with ->writeback_cache, WRITES are
cached in guest. So it is not recommended to use FUSE_HANDLE_KILLPRIV_V2
and writeback_cache together. Though it probably might be good enough
for lot of use cases.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/fuse_i.h          |  8 ++++++++
 fs/fuse/inode.c           |  5 ++++-
 include/uapi/linux/fuse.h | 11 ++++++++++-
 3 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 8301c5056022..d414c787e362 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -635,6 +635,14 @@ struct fuse_conn {
 	/* show legacy mount options */
 	unsigned int legacy_opts_show:1;
 
+	/*
+	 * fs kills suid/sgid/cap on write/chown/trunc. suid is killed on
+	 * write/trunc only if caller did not have CAP_FSETID.  sgid is killed
+	 * on write/truncate only if caller did not have CAP_FSETID as well as
+	 * file has group execute permission.
+	 */
+	unsigned handle_killpriv_v2:1;
+
 	/*
 	 * The following bitfields are only for optimization purposes
 	 * and hence races in setting them will not cause malfunction
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index e7e9005b9b66..5a6102cd6473 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1038,6 +1038,8 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
 			    !fuse_dax_check_alignment(fc, arg->map_alignment)) {
 				ok = false;
 			}
+			if (arg->flags & FUSE_HANDLE_KILLPRIV_V2)
+				fc->handle_killpriv_v2 = 1;
 		} else {
 			ra_pages = fc->max_read / PAGE_SIZE;
 			fc->no_lock = 1;
@@ -1080,7 +1082,8 @@ void fuse_send_init(struct fuse_mount *fm)
 		FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
 		FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
 		FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
-		FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA;
+		FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
+		FUSE_HANDLE_KILLPRIV_V2;
 #ifdef CONFIG_FUSE_DAX
 	if (fm->fc->dax)
 		ia->in.flags |= FUSE_MAP_ALIGNMENT;
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 7233502ea991..29bd2e007947 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -175,6 +175,9 @@
  *
  *  7.32
  *  - add flags to fuse_attr, add FUSE_ATTR_SUBMOUNT, add FUSE_SUBMOUNTS
+ *
+ *  7.33
+ *  - add FUSE_HANDLE_KILLPRIV_V2
  */
 
 #ifndef _LINUX_FUSE_H
@@ -210,7 +213,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 32
+#define FUSE_KERNEL_MINOR_VERSION 33
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -320,6 +323,11 @@ struct fuse_file_lock {
  *		       foffset and moffset fields in struct
  *		       fuse_setupmapping_out and fuse_removemapping_one.
  * FUSE_SUBMOUNTS: kernel supports auto-mounting directory submounts
+ * FUSE_HANDLE_KILLPRIV_V2: fs kills suid/sgid/cap on write/chown/trunc.
+ *			Upon write/truncate suid/sgid is only killed if caller
+ *			does not have CAP_FSETID. Additionally upon
+ *			write/truncate sgid is killed only if file has group
+ *			execute permission. (Same as Linux VFS behavior).
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -349,6 +357,7 @@ struct fuse_file_lock {
 #define FUSE_EXPLICIT_INVAL_DATA (1 << 25)
 #define FUSE_MAP_ALIGNMENT	(1 << 26)
 #define FUSE_SUBMOUNTS		(1 << 27)
+#define FUSE_HANDLE_KILLPRIV_V2	(1 << 28)
 
 /**
  * CUSE INIT request/reply flags
-- 
cgit v1.2.3


From 10c52c84e3f4872689a64ac7666b34d67e630691 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 11 Nov 2020 17:22:32 +0100
Subject: fuse: rename FUSE_WRITE_KILL_PRIV to FUSE_WRITE_KILL_SUIDGID

Kernel has:
ATTR_KILL_PRIV -> clear "security.capability"
ATTR_KILL_SUID -> clear S_ISUID
ATTR_KILL_SGID -> clear S_ISGID if executable

Fuse has:
FUSE_WRITE_KILL_PRIV -> clear S_ISUID and S_ISGID if executable

So FUSE_WRITE_KILL_PRIV implies the complement of ATTR_KILL_PRIV, which is
somewhat confusing.  Also PRIV implies all privileges, including
"security.capability".

Change the name to FUSE_WRITE_KILL_SUIDGID and make FUSE_WRITE_KILL_PRIV an
alias to perserve API compatibility

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/file.c            | 2 +-
 include/uapi/linux/fuse.h | 9 ++++++---
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 41b1e14f3820..603af847d596 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1451,7 +1451,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
 
 		if (write) {
 			if (!capable(CAP_FSETID))
-				ia->write.in.write_flags |= FUSE_WRITE_KILL_PRIV;
+				ia->write.in.write_flags |= FUSE_WRITE_KILL_SUIDGID;
 
 			nres = fuse_send_write(ia, pos, nbytes, owner);
 		} else {
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 29bd2e007947..2623c75b94a5 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -177,7 +177,7 @@
  *  - add flags to fuse_attr, add FUSE_ATTR_SUBMOUNT, add FUSE_SUBMOUNTS
  *
  *  7.33
- *  - add FUSE_HANDLE_KILLPRIV_V2
+ *  - add FUSE_HANDLE_KILLPRIV_V2, FUSE_WRITE_KILL_SUIDGID
  */
 
 #ifndef _LINUX_FUSE_H
@@ -387,11 +387,14 @@ struct fuse_file_lock {
  *
  * FUSE_WRITE_CACHE: delayed write from page cache, file handle is guessed
  * FUSE_WRITE_LOCKOWNER: lock_owner field is valid
- * FUSE_WRITE_KILL_PRIV: kill suid and sgid bits
+ * FUSE_WRITE_KILL_SUIDGID: kill suid and sgid bits
  */
 #define FUSE_WRITE_CACHE	(1 << 0)
 #define FUSE_WRITE_LOCKOWNER	(1 << 1)
-#define FUSE_WRITE_KILL_PRIV	(1 << 2)
+#define FUSE_WRITE_KILL_SUIDGID (1 << 2)
+
+/* Obsolete alias; this flag implies killing suid/sgid only. */
+#define FUSE_WRITE_KILL_PRIV	FUSE_WRITE_KILL_SUIDGID
 
 /**
  * Read flags
-- 
cgit v1.2.3


From 3179216135ec09825d7c7875580951a6e69dc5df Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 9 Oct 2020 14:15:09 -0400
Subject: fuse: setattr should set FATTR_KILL_SUIDGID

If fc->handle_killpriv_v2 is enabled, we expect file server to clear
suid/sgid/security.capbility upon chown/truncate/write as appropriate.

Upon truncate (ATTR_SIZE), suid/sgid are cleared only if caller does not
have CAP_FSETID.  File server does not know whether caller has CAP_FSETID
or not.  Hence set FATTR_KILL_SUIDGID upon truncate to let file server know
that caller does not have CAP_FSETID and it should kill suid/sgid as
appropriate.

On chown (ATTR_UID/ATTR_GID) suid/sgid need to be cleared irrespective of
capabilities of calling process, so set FATTR_KILL_SUIDGID unconditionally
in that case.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/dir.c             | 10 ++++++++++
 include/uapi/linux/fuse.h |  3 ++-
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index c3e22a3dd323..28b07ae5e55f 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1648,10 +1648,20 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
 		inarg.valid |= FATTR_FH;
 		inarg.fh = ff->fh;
 	}
+
+	/* Kill suid/sgid for non-directory chown unconditionally */
+	if (fc->handle_killpriv_v2 && !S_ISDIR(inode->i_mode) &&
+	    attr->ia_valid & (ATTR_UID | ATTR_GID))
+		inarg.valid |= FATTR_KILL_SUIDGID;
+
 	if (attr->ia_valid & ATTR_SIZE) {
 		/* For mandatory locking in truncate */
 		inarg.valid |= FATTR_LOCKOWNER;
 		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
+
+		/* Kill suid/sgid for truncate only if no CAP_FSETID */
+		if (fc->handle_killpriv_v2 && !capable(CAP_FSETID))
+			inarg.valid |= FATTR_KILL_SUIDGID;
 	}
 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
 	err = fuse_simple_request(fm, &args);
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 2623c75b94a5..9eb96e0564be 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -177,7 +177,7 @@
  *  - add flags to fuse_attr, add FUSE_ATTR_SUBMOUNT, add FUSE_SUBMOUNTS
  *
  *  7.33
- *  - add FUSE_HANDLE_KILLPRIV_V2, FUSE_WRITE_KILL_SUIDGID
+ *  - add FUSE_HANDLE_KILLPRIV_V2, FUSE_WRITE_KILL_SUIDGID, FATTR_KILL_SUIDGID
  */
 
 #ifndef _LINUX_FUSE_H
@@ -274,6 +274,7 @@ struct fuse_file_lock {
 #define FATTR_MTIME_NOW	(1 << 8)
 #define FATTR_LOCKOWNER	(1 << 9)
 #define FATTR_CTIME	(1 << 10)
+#define FATTR_KILL_SUIDGID	(1 << 11)
 
 /**
  * Flags returned by the OPEN request
-- 
cgit v1.2.3


From 643a666a89c358ef588d2b3ef9f2dc1efc421e61 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 9 Oct 2020 14:15:11 -0400
Subject: fuse: add a flag FUSE_OPEN_KILL_SUIDGID for open() request

With FUSE_HANDLE_KILLPRIV_V2 support, server will need to kill suid/sgid/
security.capability on open(O_TRUNC), if server supports
FUSE_ATOMIC_O_TRUNC.

But server needs to kill suid/sgid only if caller does not have CAP_FSETID.
Given server does not have this information, client needs to send this info
to server.

So add a flag FUSE_OPEN_KILL_SUIDGID to fuse_open_in request which tells
server to kill suid/sgid (only if group execute is set).

This flag is added to the FUSE_OPEN request, as well as the FUSE_CREATE
request if the create was non-exclusive, since that might result in an
existing file being opened/truncated.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/dir.c             |  6 ++++++
 fs/fuse/file.c            |  6 ++++++
 include/uapi/linux/fuse.h | 11 +++++++++--
 3 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 778367d125f9..5d43af1169b7 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -541,6 +541,12 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
 	inarg.flags = flags;
 	inarg.mode = mode;
 	inarg.umask = current_umask();
+
+	if (fm->fc->handle_killpriv_v2 && (flags & O_TRUNC) &&
+	    !(flags & O_EXCL) && !capable(CAP_FSETID)) {
+		inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
+	}
+
 	args.opcode = FUSE_CREATE;
 	args.nodeid = get_node_id(dir);
 	args.in_numargs = 2;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index aa0a44f7028f..349885353036 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -42,6 +42,12 @@ static int fuse_send_open(struct fuse_mount *fm, u64 nodeid, struct file *file,
 	inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
 	if (!fm->fc->atomic_o_trunc)
 		inarg.flags &= ~O_TRUNC;
+
+	if (fm->fc->handle_killpriv_v2 &&
+	    (inarg.flags & O_TRUNC) && !capable(CAP_FSETID)) {
+		inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
+	}
+
 	args.opcode = opcode;
 	args.nodeid = nodeid;
 	args.in_numargs = 1;
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 9eb96e0564be..98ca64d1beb6 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -178,6 +178,7 @@
  *
  *  7.33
  *  - add FUSE_HANDLE_KILLPRIV_V2, FUSE_WRITE_KILL_SUIDGID, FATTR_KILL_SUIDGID
+ *  - add FUSE_OPEN_KILL_SUIDGID
  */
 
 #ifndef _LINUX_FUSE_H
@@ -444,6 +445,12 @@ struct fuse_file_lock {
  */
 #define FUSE_ATTR_SUBMOUNT      (1 << 0)
 
+/**
+ * Open flags
+ * FUSE_OPEN_KILL_SUIDGID: Kill suid and sgid if executable
+ */
+#define FUSE_OPEN_KILL_SUIDGID	(1 << 0)
+
 enum fuse_opcode {
 	FUSE_LOOKUP		= 1,
 	FUSE_FORGET		= 2,  /* no reply */
@@ -605,14 +612,14 @@ struct fuse_setattr_in {
 
 struct fuse_open_in {
 	uint32_t	flags;
-	uint32_t	unused;
+	uint32_t	open_flags;	/* FUSE_OPEN_... */
 };
 
 struct fuse_create_in {
 	uint32_t	flags;
 	uint32_t	mode;
 	uint32_t	umask;
-	uint32_t	padding;
+	uint32_t	open_flags;	/* FUSE_OPEN_... */
 };
 
 struct fuse_open_out {
-- 
cgit v1.2.3


From 8d0dd23c6c78d140ed2132f523592ddb4cea839f Mon Sep 17 00:00:00 2001
From: Tal Zussman <tz2294@columbia.edu>
Date: Thu, 12 Nov 2020 16:56:57 -0500
Subject: syscalls: Fix file comments for syscalls implemented in kernel/sys.c

The relevant syscalls were previously moved from kernel/timer.c to kernel/sys.c,
but the comments weren't updated to reflect this change.

Fixing these comments messes up the alphabetical ordering of syscalls by
filename. This could be fixed by merging the two groups of kernel/sys.c syscalls,
but that would require reordering the syscalls and renumbering them to maintain
the numerical order in unistd.h.

Signed-off-by: Tal Zussman <tz2294@columbia.edu>
Link: https://lore.kernel.org/r/20201112215657.GA4539@charmander'
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/syscalls.h          | 2 +-
 include/uapi/asm-generic/unistd.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 37bea07c12f2..629870fbb2c9 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -744,7 +744,7 @@ asmlinkage long sys_settimeofday(struct __kernel_old_timeval __user *tv,
 asmlinkage long sys_adjtimex(struct __kernel_timex __user *txc_p);
 asmlinkage long sys_adjtimex_time32(struct old_timex32 __user *txc_p);
 
-/* kernel/timer.c */
+/* kernel/sys.c */
 asmlinkage long sys_getpid(void);
 asmlinkage long sys_getppid(void);
 asmlinkage long sys_getuid(void);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 2056318988f7..fc48c64700eb 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -517,7 +517,7 @@ __SC_COMP(__NR_settimeofday, sys_settimeofday, compat_sys_settimeofday)
 __SC_3264(__NR_adjtimex, sys_adjtimex_time32, sys_adjtimex)
 #endif
 
-/* kernel/timer.c */
+/* kernel/sys.c */
 #define __NR_getpid 172
 __SYSCALL(__NR_getpid, sys_getpid)
 #define __NR_getppid 173
-- 
cgit v1.2.3


From f026c123001bcc15b78311495cec79a8b73c3cf2 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Thu, 12 Nov 2020 10:30:57 -0600
Subject: ASoC: topology: use inclusive language for bclk and fsync

Mirror suggested changes in alsa-lib.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://lore.kernel.org/r/20201112163100.5081-2-pierre-louis.bossart@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h   | 32 ++++++++++++++++++++------------
 include/uapi/sound/asoc.h | 22 ++++++++++++++--------
 sound/soc/soc-topology.c  | 24 ++++++++++++------------
 sound/soc/sof/topology.c  |  6 +++---
 4 files changed, 49 insertions(+), 35 deletions(-)

(limited to 'include/uapi')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index 7a85a6f83ca8..4bf759f025d2 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -72,21 +72,29 @@ struct snd_compr_stream;
 #define SND_SOC_DAIFMT_IB_IF		(4 << 8) /* invert BCLK + FRM */
 
 /*
- * DAI hardware clock masters.
+ * DAI hardware clock providers/consumers
  *
  * This is wrt the codec, the inverse is true for the interface
- * i.e. if the codec is clk and FRM master then the interface is
- * clk and frame secondary.
+ * i.e. if the codec is clk and FRM provider then the interface is
+ * clk and frame consumer.
  */
-#define SND_SOC_DAIFMT_CBM_CFM		(1 << 12) /* codec clk & FRM master */
-#define SND_SOC_DAIFMT_CBS_CFM		(2 << 12) /* codec clk secondary & FRM master */
-#define SND_SOC_DAIFMT_CBM_CFS		(3 << 12) /* codec clk master & frame secondary */
-#define SND_SOC_DAIFMT_CBS_CFS		(4 << 12) /* codec clk & FRM secondary */
-
-#define SND_SOC_DAIFMT_FORMAT_MASK	0x000f
-#define SND_SOC_DAIFMT_CLOCK_MASK	0x00f0
-#define SND_SOC_DAIFMT_INV_MASK		0x0f00
-#define SND_SOC_DAIFMT_MASTER_MASK	0xf000
+#define SND_SOC_DAIFMT_CBP_CFP		(1 << 12) /* codec clk provider & frame provider */
+#define SND_SOC_DAIFMT_CBC_CFP		(2 << 12) /* codec clk consumer & frame provider */
+#define SND_SOC_DAIFMT_CBP_CFC		(3 << 12) /* codec clk provider & frame consumer */
+#define SND_SOC_DAIFMT_CBC_CFC		(4 << 12) /* codec clk consumer & frame follower */
+
+/* previous definitions kept for backwards-compatibility, do not use in new contributions */
+#define SND_SOC_DAIFMT_CBM_CFM		SND_SOC_DAIFMT_CBP_CFP
+#define SND_SOC_DAIFMT_CBS_CFM		SND_SOC_DAIFMT_CBC_CFP
+#define SND_SOC_DAIFMT_CBM_CFS		SND_SOC_DAIFMT_CBP_CFC
+#define SND_SOC_DAIFMT_CBS_CFS		SND_SOC_DAIFMT_CBC_CFC
+
+#define SND_SOC_DAIFMT_FORMAT_MASK		0x000f
+#define SND_SOC_DAIFMT_CLOCK_MASK		0x00f0
+#define SND_SOC_DAIFMT_INV_MASK			0x0f00
+#define SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK	0xf000
+
+#define SND_SOC_DAIFMT_MASTER_MASK	SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK
 
 /*
  * Master Clock Directions
diff --git a/include/uapi/sound/asoc.h b/include/uapi/sound/asoc.h
index a74ca232f1fc..da61398b1f8f 100644
--- a/include/uapi/sound/asoc.h
+++ b/include/uapi/sound/asoc.h
@@ -170,16 +170,22 @@
 #define SND_SOC_TPLG_LNK_FLGBIT_VOICE_WAKEUP            (1 << 3)
 
 /* DAI topology BCLK parameter
- * For the backwards capability, by default codec is bclk master
+ * For the backwards capability, by default codec is bclk provider
  */
-#define SND_SOC_TPLG_BCLK_CM         0 /* codec is bclk master */
-#define SND_SOC_TPLG_BCLK_CS         1 /* codec is bclk slave */
+#define SND_SOC_TPLG_BCLK_CP         0 /* codec is bclk provider */
+#define SND_SOC_TPLG_BCLK_CC         1 /* codec is bclk consumer */
+/* keep previous definitions for compatibility */
+#define SND_SOC_TPLG_BCLK_CM         SND_SOC_TPLG_BCLK_CP
+#define SND_SOC_TPLG_BCLK_CS         SND_SOC_TPLG_BCLK_CC
 
 /* DAI topology FSYNC parameter
- * For the backwards capability, by default codec is fsync master
+ * For the backwards capability, by default codec is fsync provider
  */
-#define SND_SOC_TPLG_FSYNC_CM         0 /* codec is fsync master */
-#define SND_SOC_TPLG_FSYNC_CS         1 /* codec is fsync slave */
+#define SND_SOC_TPLG_FSYNC_CP         0 /* codec is fsync provider */
+#define SND_SOC_TPLG_FSYNC_CC         1 /* codec is fsync consumer */
+/* keep previous definitions for compatibility */
+#define SND_SOC_TPLG_FSYNC_CM         SND_SOC_TPLG_FSYNC_CP
+#define SND_SOC_TPLG_FSYNC_CS         SND_SOC_TPLG_FSYNC_CC
 
 /*
  * Block Header.
@@ -336,8 +342,8 @@ struct snd_soc_tplg_hw_config {
 	__u8 clock_gated;	/* SND_SOC_TPLG_DAI_CLK_GATE_ value */
 	__u8 invert_bclk;	/* 1 for inverted BCLK, 0 for normal */
 	__u8 invert_fsync;	/* 1 for inverted frame clock, 0 for normal */
-	__u8 bclk_master;	/* SND_SOC_TPLG_BCLK_ value */
-	__u8 fsync_master;	/* SND_SOC_TPLG_FSYNC_ value */
+	__u8 bclk_provider;	/* SND_SOC_TPLG_BCLK_ value */
+	__u8 fsync_provider;	/* SND_SOC_TPLG_FSYNC_ value */
 	__u8 mclk_direction;    /* SND_SOC_TPLG_MCLK_ value */
 	__le16 reserved;	/* for 32bit alignment */
 	__le32 mclk_rate;	/* MCLK or SYSCLK freqency in Hz */
diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c
index 07c60187e9ea..eb2633dd6454 100644
--- a/sound/soc/soc-topology.c
+++ b/sound/soc/soc-topology.c
@@ -2017,7 +2017,7 @@ static void set_link_hw_format(struct snd_soc_dai_link *link,
 			struct snd_soc_tplg_link_config *cfg)
 {
 	struct snd_soc_tplg_hw_config *hw_config;
-	unsigned char bclk_master, fsync_master;
+	unsigned char bclk_provider, fsync_provider;
 	unsigned char invert_bclk, invert_fsync;
 	int i;
 
@@ -2057,18 +2057,18 @@ static void set_link_hw_format(struct snd_soc_dai_link *link,
 			link->dai_fmt |= SND_SOC_DAIFMT_IB_IF;
 
 		/* clock masters */
-		bclk_master = (hw_config->bclk_master ==
-			       SND_SOC_TPLG_BCLK_CM);
-		fsync_master = (hw_config->fsync_master ==
-				SND_SOC_TPLG_FSYNC_CM);
-		if (bclk_master && fsync_master)
-			link->dai_fmt |= SND_SOC_DAIFMT_CBM_CFM;
-		else if (!bclk_master && fsync_master)
-			link->dai_fmt |= SND_SOC_DAIFMT_CBS_CFM;
-		else if (bclk_master && !fsync_master)
-			link->dai_fmt |= SND_SOC_DAIFMT_CBM_CFS;
+		bclk_provider = (hw_config->bclk_provider ==
+			       SND_SOC_TPLG_BCLK_CP);
+		fsync_provider = (hw_config->fsync_provider ==
+				SND_SOC_TPLG_FSYNC_CP);
+		if (bclk_provider && fsync_provider)
+			link->dai_fmt |= SND_SOC_DAIFMT_CBP_CFP;
+		else if (!bclk_provider && fsync_provider)
+			link->dai_fmt |= SND_SOC_DAIFMT_CBC_CFP;
+		else if (bclk_provider && !fsync_provider)
+			link->dai_fmt |= SND_SOC_DAIFMT_CBP_CFC;
 		else
-			link->dai_fmt |= SND_SOC_DAIFMT_CBS_CFS;
+			link->dai_fmt |= SND_SOC_DAIFMT_CBC_CFC;
 	}
 }
 
diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c
index 44fddeda6043..d708c640e7b5 100644
--- a/sound/soc/sof/topology.c
+++ b/sound/soc/sof/topology.c
@@ -2777,15 +2777,15 @@ static void sof_dai_set_format(struct snd_soc_tplg_hw_config *hw_config,
 			       struct sof_ipc_dai_config *config)
 {
 	/* clock directions wrt codec */
-	if (hw_config->bclk_master == SND_SOC_TPLG_BCLK_CM) {
+	if (hw_config->bclk_provider == SND_SOC_TPLG_BCLK_CM) {
 		/* codec is bclk master */
-		if (hw_config->fsync_master == SND_SOC_TPLG_FSYNC_CM)
+		if (hw_config->fsync_provider == SND_SOC_TPLG_FSYNC_CM)
 			config->format |= SOF_DAI_FMT_CBM_CFM;
 		else
 			config->format |= SOF_DAI_FMT_CBM_CFS;
 	} else {
 		/* codec is bclk slave */
-		if (hw_config->fsync_master == SND_SOC_TPLG_FSYNC_CM)
+		if (hw_config->fsync_provider == SND_SOC_TPLG_FSYNC_CM)
 			config->format |= SOF_DAI_FMT_CBS_CFM;
 		else
 			config->format |= SOF_DAI_FMT_CBS_CFS;
-- 
cgit v1.2.3


From b7397bad74db7bd380b8eee9f1d97bbfe42bdd23 Mon Sep 17 00:00:00 2001
From: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Date: Fri, 13 Nov 2020 18:55:33 +0100
Subject: drm/fourcc: Fix modifier field mask for AMD modifiers.

The DCC_MAX_COMPRESSED_BLOCK has to contain one of
AMD_FMT_MOD_DCC_BLOCK_* and with 3 values this doesn't
fit in 1 bit.

Fix this cleanly while it is only in drm-next.

Fixes: 8ba16d599374 ("drm/fourcc: Add AMD DRM modifiers.")
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/drm_fourcc.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index ca48ed0e6bc1..ad772e7dd48f 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -1168,7 +1168,7 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier)
 #define AMD_FMT_MOD_DCC_INDEPENDENT_128B_SHIFT 17
 #define AMD_FMT_MOD_DCC_INDEPENDENT_128B_MASK 0x1
 #define AMD_FMT_MOD_DCC_MAX_COMPRESSED_BLOCK_SHIFT 18
-#define AMD_FMT_MOD_DCC_MAX_COMPRESSED_BLOCK_MASK 0x1
+#define AMD_FMT_MOD_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3
 
 /*
  * DCC supports embedding some clear colors directly in the DCC surface.
@@ -1179,7 +1179,7 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier)
  * If this bit is set that means the fastclear eliminate is not needed for these
  * embeddable colors.
  */
-#define AMD_FMT_MOD_DCC_CONSTANT_ENCODE_SHIFT 19
+#define AMD_FMT_MOD_DCC_CONSTANT_ENCODE_SHIFT 20
 #define AMD_FMT_MOD_DCC_CONSTANT_ENCODE_MASK 0x1
 
 /*
@@ -1192,15 +1192,15 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier)
  * RB = only for TILE_VER_GFX9 & DCC
  * PIPE = only for TILE_VER_GFX9 & DCC & (DCC_RETILE | DCC_PIPE_ALIGN)
  */
-#define AMD_FMT_MOD_PIPE_XOR_BITS_SHIFT 20
+#define AMD_FMT_MOD_PIPE_XOR_BITS_SHIFT 21
 #define AMD_FMT_MOD_PIPE_XOR_BITS_MASK 0x7
-#define AMD_FMT_MOD_BANK_XOR_BITS_SHIFT 23
+#define AMD_FMT_MOD_BANK_XOR_BITS_SHIFT 24
 #define AMD_FMT_MOD_BANK_XOR_BITS_MASK 0x7
-#define AMD_FMT_MOD_PACKERS_SHIFT 26 /* aliases with BANK_XOR_BITS */
+#define AMD_FMT_MOD_PACKERS_SHIFT 27 /* aliases with BANK_XOR_BITS */
 #define AMD_FMT_MOD_PACKERS_MASK 0x7
-#define AMD_FMT_MOD_RB_SHIFT 29
+#define AMD_FMT_MOD_RB_SHIFT 30
 #define AMD_FMT_MOD_RB_MASK 0x7
-#define AMD_FMT_MOD_PIPE_SHIFT 32
+#define AMD_FMT_MOD_PIPE_SHIFT 33
 #define AMD_FMT_MOD_PIPE_MASK 0x7
 
 #define AMD_FMT_MOD_SET(field, value) \
-- 
cgit v1.2.3


From 544645f2ec1af910284ebde00da2a6cfab7cc8c1 Mon Sep 17 00:00:00 2001
From: Simon Ser <contact@emersion.fr>
Date: Fri, 13 Nov 2020 17:21:35 +0000
Subject: drm/fourcc: add table describing AMD modifiers bit layout

The table describes how each bit in the u64 value is used. Explicitly
state which values a field can take if we have defines for them. Also
add a note when a field isn't always populated.

Forcing people to update the table when changing the bit layout should
make it more obvious when there's a mistake, I hope.

If we get to the point where the bit layout gets more complicated, it
might be worth it to split the table into multiple tables (e.g. one for
GFX8, one for GFX9+, and so on).

Signed-off-by: Simon Ser <contact@emersion.fr>
Cc: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Cc: Alex Deucher <alexdeucher@gmail.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/drm_fourcc.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index ad772e7dd48f..bf03bce1e854 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -1114,6 +1114,25 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier)
  *
  * For multi-plane formats the above surfaces get merged into one plane for
  * each format plane, based on the required alignment only.
+ *
+ * Bits  Parameter                Notes
+ * ----- ------------------------ ---------------------------------------------
+ *
+ *   7:0 TILE_VERSION             Values are AMD_FMT_MOD_TILE_VER_*
+ *  12:8 TILE                     Values are AMD_FMT_MOD_TILE_<version>_*
+ *    13 DCC
+ *    14 DCC_RETILE
+ *    15 DCC_PIPE_ALIGN
+ *    16 DCC_INDEPENDENT_64B
+ *    17 DCC_INDEPENDENT_128B
+ * 19:18 DCC_MAX_COMPRESSED_BLOCK Values are AMD_FMT_MOD_DCC_BLOCK_*
+ *    20 DCC_CONSTANT_ENCODE
+ * 23:21 PIPE_XOR_BITS            Only for some chips
+ * 26:24 BANK_XOR_BITS            Only for some chips
+ * 29:27 PACKERS                  Only for some chips
+ * 32:30 RB                       Only for some chips
+ * 35:33 PIPE                     Only for some chips
+ * 55:36 -                        Reserved for future use, must be zero
  */
 #define AMD_FMT_MOD fourcc_mod_code(AMD, 0)
 
-- 
cgit v1.2.3


From c21d54f0307ff42a346294899107b570b98c47b5 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 29 Sep 2020 17:09:43 +0200
Subject: KVM: x86: hyper-v: allow KVM_GET_SUPPORTED_HV_CPUID as a system ioctl

KVM_GET_SUPPORTED_HV_CPUID is a vCPU ioctl but its output is now
independent from vCPU and in some cases VMMs may want to use it as a system
ioctl instead. In particular, QEMU doesn CPU feature expansion before any
vCPU gets created so KVM_GET_SUPPORTED_HV_CPUID can't be used.

Convert KVM_GET_SUPPORTED_HV_CPUID to 'dual' system/vCPU ioctl with the
same meaning.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20200929150944.1235688-2-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/api.rst | 16 ++++++++++-----
 arch/x86/kvm/hyperv.c          |  6 +++---
 arch/x86/kvm/hyperv.h          |  4 ++--
 arch/x86/kvm/vmx/evmcs.c       |  3 +--
 arch/x86/kvm/x86.c             | 45 +++++++++++++++++++++++++-----------------
 include/uapi/linux/kvm.h       |  3 ++-
 6 files changed, 46 insertions(+), 31 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index e00a66d72372..81d54fe76a2d 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -4455,9 +4455,9 @@ that KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is present.
 4.118 KVM_GET_SUPPORTED_HV_CPUID
 --------------------------------
 
-:Capability: KVM_CAP_HYPERV_CPUID
+:Capability: KVM_CAP_HYPERV_CPUID (vcpu), KVM_CAP_SYS_HYPERV_CPUID (system)
 :Architectures: x86
-:Type: vcpu ioctl
+:Type: system ioctl, vcpu ioctl
 :Parameters: struct kvm_cpuid2 (in/out)
 :Returns: 0 on success, -1 on error
 
@@ -4502,9 +4502,6 @@ Currently, the following list of CPUID leaves are returned:
  - HYPERV_CPUID_SYNDBG_INTERFACE
  - HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES
 
-HYPERV_CPUID_NESTED_FEATURES leaf is only exposed when Enlightened VMCS was
-enabled on the corresponding vCPU (KVM_CAP_HYPERV_ENLIGHTENED_VMCS).
-
 Userspace invokes KVM_GET_SUPPORTED_HV_CPUID by passing a kvm_cpuid2 structure
 with the 'nent' field indicating the number of entries in the variable-size
 array 'entries'.  If the number of entries is too low to describe all Hyper-V
@@ -4515,6 +4512,15 @@ number of valid entries in the 'entries' array, which is then filled.
 'index' and 'flags' fields in 'struct kvm_cpuid_entry2' are currently reserved,
 userspace should not expect to get any particular value there.
 
+Note, vcpu version of KVM_GET_SUPPORTED_HV_CPUID is currently deprecated. Unlike
+system ioctl which exposes all supported feature bits unconditionally, vcpu
+version has the following quirks:
+- HYPERV_CPUID_NESTED_FEATURES leaf and HV_X64_ENLIGHTENED_VMCS_RECOMMENDED
+  feature bit are only exposed when Enlightened VMCS was previously enabled
+  on the corresponding vCPU (KVM_CAP_HYPERV_ENLIGHTENED_VMCS).
+- HV_STIMER_DIRECT_MODE_AVAILABLE bit is only exposed with in-kernel LAPIC.
+  (presumes KVM_CREATE_IRQCHIP has already been called).
+
 4.119 KVM_ARM_VCPU_FINALIZE
 ---------------------------
 
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 5c7c4060b45c..922c69dcca4d 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1951,8 +1951,8 @@ int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args)
 	return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd);
 }
 
-int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
-				struct kvm_cpuid_entry2 __user *entries)
+int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
+		     struct kvm_cpuid_entry2 __user *entries)
 {
 	uint16_t evmcs_ver = 0;
 	struct kvm_cpuid_entry2 cpuid_entries[] = {
@@ -2037,7 +2037,7 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
 			 * Direct Synthetic timers only make sense with in-kernel
 			 * LAPIC
 			 */
-			if (lapic_in_kernel(vcpu))
+			if (!vcpu || lapic_in_kernel(vcpu))
 				ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;
 
 			break;
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index e68c6c2e9649..6d7def2b0aad 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -126,7 +126,7 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
 void kvm_hv_init_vm(struct kvm *kvm);
 void kvm_hv_destroy_vm(struct kvm *kvm);
 int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args);
-int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
-				struct kvm_cpuid_entry2 __user *entries);
+int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
+		     struct kvm_cpuid_entry2 __user *entries);
 
 #endif
diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
index f3199bb02f22..41f24661af04 100644
--- a/arch/x86/kvm/vmx/evmcs.c
+++ b/arch/x86/kvm/vmx/evmcs.c
@@ -326,7 +326,6 @@ bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa)
 
 uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
 {
-	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	/*
 	 * vmcs_version represents the range of supported Enlightened VMCS
 	 * versions: lower 8 bits is the minimal version, higher 8 bits is the
@@ -334,7 +333,7 @@ uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
 	 * KVM_EVMCS_VERSION.
 	 */
 	if (kvm_cpu_cap_get(X86_FEATURE_VMX) &&
-	    vmx->nested.enlightened_vmcs_enabled)
+	    (!vcpu || to_vmx(vcpu)->nested.enlightened_vmcs_enabled))
 		return (KVM_EVMCS_VERSION << 8) | 1;
 
 	return 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2db86702cac4..773cb52cb775 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3677,6 +3677,27 @@ static inline bool kvm_can_mwait_in_guest(void)
 		boot_cpu_has(X86_FEATURE_ARAT);
 }
 
+static int kvm_ioctl_get_supported_hv_cpuid(struct kvm_vcpu *vcpu,
+					    struct kvm_cpuid2 __user *cpuid_arg)
+{
+	struct kvm_cpuid2 cpuid;
+	int r;
+
+	r = -EFAULT;
+	if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
+		return r;
+
+	r = kvm_get_hv_cpuid(vcpu, &cpuid, cpuid_arg->entries);
+	if (r)
+		return r;
+
+	r = -EFAULT;
+	if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
+		return r;
+
+	return 0;
+}
+
 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r = 0;
@@ -3713,6 +3734,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_HYPERV_TLBFLUSH:
 	case KVM_CAP_HYPERV_SEND_IPI:
 	case KVM_CAP_HYPERV_CPUID:
+	case KVM_CAP_SYS_HYPERV_CPUID:
 	case KVM_CAP_PCI_SEGMENT:
 	case KVM_CAP_DEBUGREGS:
 	case KVM_CAP_X86_ROBUST_SINGLESTEP:
@@ -3898,6 +3920,9 @@ long kvm_arch_dev_ioctl(struct file *filp,
 	case KVM_GET_MSRS:
 		r = msr_io(NULL, argp, do_get_msr_feature, 1);
 		break;
+	case KVM_GET_SUPPORTED_HV_CPUID:
+		r = kvm_ioctl_get_supported_hv_cpuid(NULL, argp);
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -4974,25 +4999,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
 		break;
 	}
-	case KVM_GET_SUPPORTED_HV_CPUID: {
-		struct kvm_cpuid2 __user *cpuid_arg = argp;
-		struct kvm_cpuid2 cpuid;
-
-		r = -EFAULT;
-		if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
-			goto out;
-
-		r = kvm_vcpu_ioctl_get_hv_cpuid(vcpu, &cpuid,
-						cpuid_arg->entries);
-		if (r)
-			goto out;
-
-		r = -EFAULT;
-		if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
-			goto out;
-		r = 0;
+	case KVM_GET_SUPPORTED_HV_CPUID:
+		r = kvm_ioctl_get_supported_hv_cpuid(vcpu, argp);
 		break;
-	}
 	default:
 		r = -EINVAL;
 	}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index ca41220b40b8..204afbe1240e 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1053,6 +1053,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_X86_USER_SPACE_MSR 188
 #define KVM_CAP_X86_MSR_FILTER 189
 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
+#define KVM_CAP_SYS_HYPERV_CPUID 191
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1511,7 +1512,7 @@ struct kvm_enc_region {
 /* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT_2 */
 #define KVM_CLEAR_DIRTY_LOG          _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log)
 
-/* Available with KVM_CAP_HYPERV_CPUID */
+/* Available with KVM_CAP_HYPERV_CPUID (vcpu) / KVM_CAP_SYS_HYPERV_CPUID (system) */
 #define KVM_GET_SUPPORTED_HV_CPUID _IOWR(KVMIO, 0xc1, struct kvm_cpuid2)
 
 /* Available with KVM_CAP_ARM_SVE */
-- 
cgit v1.2.3


From fb04a1eddb1a65b6588a021bdc132270d5ae48bb Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Wed, 30 Sep 2020 21:22:22 -0400
Subject: KVM: X86: Implement ring-based dirty memory tracking

This patch is heavily based on previous work from Lei Cao
<lei.cao@stratus.com> and Paolo Bonzini <pbonzini@redhat.com>. [1]

KVM currently uses large bitmaps to track dirty memory.  These bitmaps
are copied to userspace when userspace queries KVM for its dirty page
information.  The use of bitmaps is mostly sufficient for live
migration, as large parts of memory are be dirtied from one log-dirty
pass to another.  However, in a checkpointing system, the number of
dirty pages is small and in fact it is often bounded---the VM is
paused when it has dirtied a pre-defined number of pages. Traversing a
large, sparsely populated bitmap to find set bits is time-consuming,
as is copying the bitmap to user-space.

A similar issue will be there for live migration when the guest memory
is huge while the page dirty procedure is trivial.  In that case for
each dirty sync we need to pull the whole dirty bitmap to userspace
and analyse every bit even if it's mostly zeros.

The preferred data structure for above scenarios is a dense list of
guest frame numbers (GFN).  This patch series stores the dirty list in
kernel memory that can be memory mapped into userspace to allow speedy
harvesting.

This patch enables dirty ring for X86 only.  However it should be
easily extended to other archs as well.

[1] https://patchwork.kernel.org/patch/10471409/

Signed-off-by: Lei Cao <lei.cao@stratus.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <20201001012222.5767-1-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/api.rst  |  93 +++++++++++++++++++
 arch/x86/include/asm/kvm_host.h |   3 +
 arch/x86/include/uapi/asm/kvm.h |   1 +
 arch/x86/kvm/Makefile           |   3 +-
 arch/x86/kvm/mmu/mmu.c          |   8 ++
 arch/x86/kvm/mmu/tdp_mmu.c      |   2 +-
 arch/x86/kvm/vmx/vmx.c          |   7 ++
 arch/x86/kvm/x86.c              |   9 ++
 include/linux/kvm_dirty_ring.h  | 103 +++++++++++++++++++++
 include/linux/kvm_host.h        |  13 +++
 include/trace/events/kvm.h      |  63 +++++++++++++
 include/uapi/linux/kvm.h        |  53 +++++++++++
 virt/kvm/dirty_ring.c           | 194 ++++++++++++++++++++++++++++++++++++++++
 virt/kvm/kvm_main.c             | 113 ++++++++++++++++++++++-
 14 files changed, 662 insertions(+), 3 deletions(-)
 create mode 100644 include/linux/kvm_dirty_ring.h
 create mode 100644 virt/kvm/dirty_ring.c

(limited to 'include/uapi')

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 81d54fe76a2d..e264ebc35e27 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -262,6 +262,18 @@ The KVM_RUN ioctl (cf.) communicates with userspace via a shared
 memory region.  This ioctl returns the size of that region.  See the
 KVM_RUN documentation for details.
 
+Besides the size of the KVM_RUN communication region, other areas of
+the VCPU file descriptor can be mmap-ed, including:
+
+- if KVM_CAP_COALESCED_MMIO is available, a page at
+  KVM_COALESCED_MMIO_PAGE_OFFSET * PAGE_SIZE; for historical reasons,
+  this page is included in the result of KVM_GET_VCPU_MMAP_SIZE.
+  KVM_CAP_COALESCED_MMIO is not documented yet.
+
+- if KVM_CAP_DIRTY_LOG_RING is available, a number of pages at
+  KVM_DIRTY_LOG_PAGE_OFFSET * PAGE_SIZE.  For more information on
+  KVM_CAP_DIRTY_LOG_RING, see section 8.3.
+
 
 4.6 KVM_SET_MEMORY_REGION
 -------------------------
@@ -6396,3 +6408,84 @@ When enabled, KVM will disable paravirtual features provided to the
 guest according to the bits in the KVM_CPUID_FEATURES CPUID leaf
 (0x40000001). Otherwise, a guest may use the paravirtual features
 regardless of what has actually been exposed through the CPUID leaf.
+
+
+8.29 KVM_CAP_DIRTY_LOG_RING
+---------------------------
+
+:Architectures: x86
+:Parameters: args[0] - size of the dirty log ring
+
+KVM is capable of tracking dirty memory using ring buffers that are
+mmaped into userspace; there is one dirty ring per vcpu.
+
+The dirty ring is available to userspace as an array of
+``struct kvm_dirty_gfn``.  Each dirty entry it's defined as::
+
+  struct kvm_dirty_gfn {
+          __u32 flags;
+          __u32 slot; /* as_id | slot_id */
+          __u64 offset;
+  };
+
+The following values are defined for the flags field to define the
+current state of the entry::
+
+  #define KVM_DIRTY_GFN_F_DIRTY           BIT(0)
+  #define KVM_DIRTY_GFN_F_RESET           BIT(1)
+  #define KVM_DIRTY_GFN_F_MASK            0x3
+
+Userspace should call KVM_ENABLE_CAP ioctl right after KVM_CREATE_VM
+ioctl to enable this capability for the new guest and set the size of
+the rings.  Enabling the capability is only allowed before creating any
+vCPU, and the size of the ring must be a power of two.  The larger the
+ring buffer, the less likely the ring is full and the VM is forced to
+exit to userspace. The optimal size depends on the workload, but it is
+recommended that it be at least 64 KiB (4096 entries).
+
+Just like for dirty page bitmaps, the buffer tracks writes to
+all user memory regions for which the KVM_MEM_LOG_DIRTY_PAGES flag was
+set in KVM_SET_USER_MEMORY_REGION.  Once a memory region is registered
+with the flag set, userspace can start harvesting dirty pages from the
+ring buffer.
+
+An entry in the ring buffer can be unused (flag bits ``00``),
+dirty (flag bits ``01``) or harvested (flag bits ``1X``).  The
+state machine for the entry is as follows::
+
+          dirtied         harvested        reset
+     00 -----------> 01 -------------> 1X -------+
+      ^                                          |
+      |                                          |
+      +------------------------------------------+
+
+To harvest the dirty pages, userspace accesses the mmaped ring buffer
+to read the dirty GFNs.  If the flags has the DIRTY bit set (at this stage
+the RESET bit must be cleared), then it means this GFN is a dirty GFN.
+The userspace should harvest this GFN and mark the flags from state
+``01b`` to ``1Xb`` (bit 0 will be ignored by KVM, but bit 1 must be set
+to show that this GFN is harvested and waiting for a reset), and move
+on to the next GFN.  The userspace should continue to do this until the
+flags of a GFN have the DIRTY bit cleared, meaning that it has harvested
+all the dirty GFNs that were available.
+
+It's not necessary for userspace to harvest the all dirty GFNs at once.
+However it must collect the dirty GFNs in sequence, i.e., the userspace
+program cannot skip one dirty GFN to collect the one next to it.
+
+After processing one or more entries in the ring buffer, userspace
+calls the VM ioctl KVM_RESET_DIRTY_RINGS to notify the kernel about
+it, so that the kernel will reprotect those collected GFNs.
+Therefore, the ioctl must be called *before* reading the content of
+the dirty pages.
+
+The dirty ring can get full.  When it happens, the KVM_RUN of the
+vcpu will return with exit reason KVM_EXIT_DIRTY_LOG_FULL.
+
+The dirty ring interface has a major difference comparing to the
+KVM_GET_DIRTY_LOG interface in that, when reading the dirty ring from
+userspace, it's still possible that the kernel has not yet flushed the
+processor's dirty page buffers into the kernel buffer (with dirty bitmaps, the
+flushing is done by the KVM_GET_DIRTY_LOG ioctl).  To achieve that, one
+needs to kick the vcpu out of KVM_RUN using a signal.  The resulting
+vmexit ensures that all dirty GFNs are flushed to the dirty rings.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 69e94aa716e9..f002cdb13a0b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1232,6 +1232,7 @@ struct kvm_x86_ops {
 	void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
 					   struct kvm_memory_slot *slot,
 					   gfn_t offset, unsigned long mask);
+	int (*cpu_dirty_log_size)(void);
 
 	/* pmu operations of sub-arch */
 	const struct kvm_pmu_ops *pmu_ops;
@@ -1744,4 +1745,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
 #define GET_SMSTATE(type, buf, offset)		\
 	(*(type *)((buf) + (offset) - 0x7e00))
 
+int kvm_cpu_dirty_log_size(void);
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 89e5f3d1bba8..8e76d3701db3 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -12,6 +12,7 @@
 
 #define KVM_PIO_PAGE_OFFSET 1
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2
+#define KVM_DIRTY_LOG_PAGE_OFFSET 64
 
 #define DE_VECTOR 0
 #define DB_VECTOR 1
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index b804444e16d4..4bd14ab01323 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -10,7 +10,8 @@ endif
 KVM := ../../../virt/kvm
 
 kvm-y			+= $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
-				$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
+				$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o \
+				$(KVM)/dirty_ring.o
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
 
 kvm-y			+= x86.o emulate.o i8259.o irq.o lapic.o \
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 5bb1939b65d8..12e5cfe0995e 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1289,6 +1289,14 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
 		kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
 }
 
+int kvm_cpu_dirty_log_size(void)
+{
+	if (kvm_x86_ops.cpu_dirty_log_size)
+		return kvm_x86_ops.cpu_dirty_log_size();
+
+	return 0;
+}
+
 bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
 				    struct kvm_memory_slot *slot, u64 gfn)
 {
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index ff28a5c6abd6..cffa51c6049e 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -185,7 +185,7 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn,
 	if ((!is_writable_pte(old_spte) || pfn_changed) &&
 	    is_writable_pte(new_spte)) {
 		slot = __gfn_to_memslot(__kvm_memslots(kvm, as_id), gfn);
-		mark_page_dirty_in_slot(slot, gfn);
+		mark_page_dirty_in_slot(kvm, slot, gfn);
 	}
 }
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 46b32aa43811..2b6d538454a6 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7583,6 +7583,11 @@ static bool vmx_check_apicv_inhibit_reasons(ulong bit)
 	return supported & BIT(bit);
 }
 
+static int vmx_cpu_dirty_log_size(void)
+{
+	return enable_pml ? PML_ENTITY_NUM : 0;
+}
+
 static struct kvm_x86_ops vmx_x86_ops __initdata = {
 	.hardware_unsetup = hardware_unsetup,
 
@@ -7712,6 +7717,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 	.migrate_timers = vmx_migrate_timers,
 
 	.msr_filter_changed = vmx_msr_filter_changed,
+	.cpu_dirty_log_size = vmx_cpu_dirty_log_size,
 };
 
 static __init int hardware_setup(void)
@@ -7829,6 +7835,7 @@ static __init int hardware_setup(void)
 		vmx_x86_ops.slot_disable_log_dirty = NULL;
 		vmx_x86_ops.flush_log_dirty = NULL;
 		vmx_x86_ops.enable_log_dirty_pt_masked = NULL;
+		vmx_x86_ops.cpu_dirty_log_size = NULL;
 	}
 
 	if (!cpu_has_vmx_preemption_timer())
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b4ac726526f8..6c704a597b7c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8754,6 +8754,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
 	bool req_immediate_exit = false;
 
+	/* Forbid vmenter if vcpu dirty ring is soft-full */
+	if (unlikely(vcpu->kvm->dirty_ring_size &&
+		     kvm_dirty_ring_soft_full(&vcpu->dirty_ring))) {
+		vcpu->run->exit_reason = KVM_EXIT_DIRTY_RING_FULL;
+		trace_kvm_dirty_ring_exit(vcpu);
+		r = 0;
+		goto out;
+	}
+
 	if (kvm_request_pending(vcpu)) {
 		if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
 			if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) {
diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h
new file mode 100644
index 000000000000..120e5e90fa1d
--- /dev/null
+++ b/include/linux/kvm_dirty_ring.h
@@ -0,0 +1,103 @@
+#ifndef KVM_DIRTY_RING_H
+#define KVM_DIRTY_RING_H
+
+#include <linux/kvm.h>
+
+/**
+ * kvm_dirty_ring: KVM internal dirty ring structure
+ *
+ * @dirty_index: free running counter that points to the next slot in
+ *               dirty_ring->dirty_gfns, where a new dirty page should go
+ * @reset_index: free running counter that points to the next dirty page
+ *               in dirty_ring->dirty_gfns for which dirty trap needs to
+ *               be reenabled
+ * @size:        size of the compact list, dirty_ring->dirty_gfns
+ * @soft_limit:  when the number of dirty pages in the list reaches this
+ *               limit, vcpu that owns this ring should exit to userspace
+ *               to allow userspace to harvest all the dirty pages
+ * @dirty_gfns:  the array to keep the dirty gfns
+ * @index:       index of this dirty ring
+ */
+struct kvm_dirty_ring {
+	u32 dirty_index;
+	u32 reset_index;
+	u32 size;
+	u32 soft_limit;
+	struct kvm_dirty_gfn *dirty_gfns;
+	int index;
+};
+
+#if (KVM_DIRTY_LOG_PAGE_OFFSET == 0)
+/*
+ * If KVM_DIRTY_LOG_PAGE_OFFSET not defined, kvm_dirty_ring.o should
+ * not be included as well, so define these nop functions for the arch.
+ */
+static inline u32 kvm_dirty_ring_get_rsvd_entries(void)
+{
+	return 0;
+}
+
+static inline int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring,
+				       int index, u32 size)
+{
+	return 0;
+}
+
+static inline struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm)
+{
+	return NULL;
+}
+
+static inline int kvm_dirty_ring_reset(struct kvm *kvm,
+				       struct kvm_dirty_ring *ring)
+{
+	return 0;
+}
+
+static inline void kvm_dirty_ring_push(struct kvm_dirty_ring *ring,
+				       u32 slot, u64 offset)
+{
+}
+
+static inline struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring,
+						   u32 offset)
+{
+	return NULL;
+}
+
+static inline void kvm_dirty_ring_free(struct kvm_dirty_ring *ring)
+{
+}
+
+static inline bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring)
+{
+	return true;
+}
+
+#else /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */
+
+u32 kvm_dirty_ring_get_rsvd_entries(void);
+int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size);
+struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm);
+
+/*
+ * called with kvm->slots_lock held, returns the number of
+ * processed pages.
+ */
+int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring);
+
+/*
+ * returns =0: successfully pushed
+ *         <0: unable to push, need to wait
+ */
+void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset);
+
+/* for use in vm_operations_struct */
+struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset);
+
+void kvm_dirty_ring_free(struct kvm_dirty_ring *ring);
+bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring);
+
+#endif /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */
+
+#endif	/* KVM_DIRTY_RING_H */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ca7c1459a8e3..864b156391c8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -34,6 +34,7 @@
 #include <linux/kvm_types.h>
 
 #include <asm/kvm_host.h>
+#include <linux/kvm_dirty_ring.h>
 
 #ifndef KVM_MAX_VCPU_ID
 #define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
@@ -319,6 +320,7 @@ struct kvm_vcpu {
 	bool preempted;
 	bool ready;
 	struct kvm_vcpu_arch arch;
+	struct kvm_dirty_ring dirty_ring;
 };
 
 static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
@@ -505,6 +507,7 @@ struct kvm {
 	struct srcu_struct irq_srcu;
 	pid_t userspace_pid;
 	unsigned int max_halt_poll_ns;
+	u32 dirty_ring_size;
 };
 
 #define kvm_err(fmt, ...) \
@@ -1477,4 +1480,14 @@ static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu)
 }
 #endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */
 
+/*
+ * This defines how many reserved entries we want to keep before we
+ * kick the vcpu to the userspace to avoid dirty ring full.  This
+ * value can be tuned to higher if e.g. PML is enabled on the host.
+ */
+#define  KVM_DIRTY_RING_RSVD_ENTRIES  64
+
+/* Max number of entries allowed for each kvm dirty ring */
+#define  KVM_DIRTY_RING_MAX_ENTRIES  65536
+
 #endif
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 26cfb0fa8e7e..49d7d0fe29f6 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -399,6 +399,69 @@ TRACE_EVENT(kvm_halt_poll_ns,
 #define trace_kvm_halt_poll_ns_shrink(vcpu_id, new, old) \
 	trace_kvm_halt_poll_ns(false, vcpu_id, new, old)
 
+TRACE_EVENT(kvm_dirty_ring_push,
+	TP_PROTO(struct kvm_dirty_ring *ring, u32 slot, u64 offset),
+	TP_ARGS(ring, slot, offset),
+
+	TP_STRUCT__entry(
+		__field(int, index)
+		__field(u32, dirty_index)
+		__field(u32, reset_index)
+		__field(u32, slot)
+		__field(u64, offset)
+	),
+
+	TP_fast_assign(
+		__entry->index          = ring->index;
+		__entry->dirty_index    = ring->dirty_index;
+		__entry->reset_index    = ring->reset_index;
+		__entry->slot           = slot;
+		__entry->offset         = offset;
+	),
+
+	TP_printk("ring %d: dirty 0x%x reset 0x%x "
+		  "slot %u offset 0x%llx (used %u)",
+		  __entry->index, __entry->dirty_index,
+		  __entry->reset_index,  __entry->slot, __entry->offset,
+		  __entry->dirty_index - __entry->reset_index)
+);
+
+TRACE_EVENT(kvm_dirty_ring_reset,
+	TP_PROTO(struct kvm_dirty_ring *ring),
+	TP_ARGS(ring),
+
+	TP_STRUCT__entry(
+		__field(int, index)
+		__field(u32, dirty_index)
+		__field(u32, reset_index)
+	),
+
+	TP_fast_assign(
+		__entry->index          = ring->index;
+		__entry->dirty_index    = ring->dirty_index;
+		__entry->reset_index    = ring->reset_index;
+	),
+
+	TP_printk("ring %d: dirty 0x%x reset 0x%x (used %u)",
+		  __entry->index, __entry->dirty_index, __entry->reset_index,
+		  __entry->dirty_index - __entry->reset_index)
+);
+
+TRACE_EVENT(kvm_dirty_ring_exit,
+	TP_PROTO(struct kvm_vcpu *vcpu),
+	TP_ARGS(vcpu),
+
+	TP_STRUCT__entry(
+	    __field(int, vcpu_id)
+	),
+
+	TP_fast_assign(
+	    __entry->vcpu_id = vcpu->vcpu_id;
+	),
+
+	TP_printk("vcpu %d", __entry->vcpu_id)
+);
+
 #endif /* _TRACE_KVM_MAIN_H */
 
 /* This part must be outside protection */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 204afbe1240e..886802b8ffba 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -250,6 +250,7 @@ struct kvm_hyperv_exit {
 #define KVM_EXIT_ARM_NISV         28
 #define KVM_EXIT_X86_RDMSR        29
 #define KVM_EXIT_X86_WRMSR        30
+#define KVM_EXIT_DIRTY_RING_FULL  31
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -1054,6 +1055,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_X86_MSR_FILTER 189
 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
 #define KVM_CAP_SYS_HYPERV_CPUID 191
+#define KVM_CAP_DIRTY_LOG_RING 192
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1558,6 +1560,9 @@ struct kvm_pv_cmd {
 /* Available with KVM_CAP_X86_MSR_FILTER */
 #define KVM_X86_SET_MSR_FILTER	_IOW(KVMIO,  0xc6, struct kvm_msr_filter)
 
+/* Available with KVM_CAP_DIRTY_LOG_RING */
+#define KVM_RESET_DIRTY_RINGS		_IO(KVMIO, 0xc7)
+
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {
 	/* Guest initialization commands */
@@ -1711,4 +1716,52 @@ struct kvm_hyperv_eventfd {
 #define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE    (1 << 0)
 #define KVM_DIRTY_LOG_INITIALLY_SET            (1 << 1)
 
+/*
+ * Arch needs to define the macro after implementing the dirty ring
+ * feature.  KVM_DIRTY_LOG_PAGE_OFFSET should be defined as the
+ * starting page offset of the dirty ring structures.
+ */
+#ifndef KVM_DIRTY_LOG_PAGE_OFFSET
+#define KVM_DIRTY_LOG_PAGE_OFFSET 0
+#endif
+
+/*
+ * KVM dirty GFN flags, defined as:
+ *
+ * |---------------+---------------+--------------|
+ * | bit 1 (reset) | bit 0 (dirty) | Status       |
+ * |---------------+---------------+--------------|
+ * |             0 |             0 | Invalid GFN  |
+ * |             0 |             1 | Dirty GFN    |
+ * |             1 |             X | GFN to reset |
+ * |---------------+---------------+--------------|
+ *
+ * Lifecycle of a dirty GFN goes like:
+ *
+ *      dirtied         harvested        reset
+ * 00 -----------> 01 -------------> 1X -------+
+ *  ^                                          |
+ *  |                                          |
+ *  +------------------------------------------+
+ *
+ * The userspace program is only responsible for the 01->1X state
+ * conversion after harvesting an entry.  Also, it must not skip any
+ * dirty bits, so that dirty bits are always harvested in sequence.
+ */
+#define KVM_DIRTY_GFN_F_DIRTY           BIT(0)
+#define KVM_DIRTY_GFN_F_RESET           BIT(1)
+#define KVM_DIRTY_GFN_F_MASK            0x3
+
+/*
+ * KVM dirty rings should be mapped at KVM_DIRTY_LOG_PAGE_OFFSET of
+ * per-vcpu mmaped regions as an array of struct kvm_dirty_gfn.  The
+ * size of the gfn buffer is decided by the first argument when
+ * enabling KVM_CAP_DIRTY_LOG_RING.
+ */
+struct kvm_dirty_gfn {
+	__u32 flags;
+	__u32 slot;
+	__u64 offset;
+};
+
 #endif /* __LINUX_KVM_H */
diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c
new file mode 100644
index 000000000000..9d01299563ee
--- /dev/null
+++ b/virt/kvm/dirty_ring.c
@@ -0,0 +1,194 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * KVM dirty ring implementation
+ *
+ * Copyright 2019 Red Hat, Inc.
+ */
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/vmalloc.h>
+#include <linux/kvm_dirty_ring.h>
+#include <trace/events/kvm.h>
+
+int __weak kvm_cpu_dirty_log_size(void)
+{
+	return 0;
+}
+
+u32 kvm_dirty_ring_get_rsvd_entries(void)
+{
+	return KVM_DIRTY_RING_RSVD_ENTRIES + kvm_cpu_dirty_log_size();
+}
+
+static u32 kvm_dirty_ring_used(struct kvm_dirty_ring *ring)
+{
+	return READ_ONCE(ring->dirty_index) - READ_ONCE(ring->reset_index);
+}
+
+bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring)
+{
+	return kvm_dirty_ring_used(ring) >= ring->soft_limit;
+}
+
+static bool kvm_dirty_ring_full(struct kvm_dirty_ring *ring)
+{
+	return kvm_dirty_ring_used(ring) >= ring->size;
+}
+
+struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm)
+{
+	struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
+
+	WARN_ON_ONCE(vcpu->kvm != kvm);
+
+	return &vcpu->dirty_ring;
+}
+
+static void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask)
+{
+	struct kvm_memory_slot *memslot;
+	int as_id, id;
+
+	as_id = slot >> 16;
+	id = (u16)slot;
+
+	if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
+		return;
+
+	memslot = id_to_memslot(__kvm_memslots(kvm, as_id), id);
+
+	if (!memslot || (offset + __fls(mask)) >= memslot->npages)
+		return;
+
+	spin_lock(&kvm->mmu_lock);
+	kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset, mask);
+	spin_unlock(&kvm->mmu_lock);
+}
+
+int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size)
+{
+	ring->dirty_gfns = vmalloc(size);
+	if (!ring->dirty_gfns)
+		return -ENOMEM;
+	memset(ring->dirty_gfns, 0, size);
+
+	ring->size = size / sizeof(struct kvm_dirty_gfn);
+	ring->soft_limit = ring->size - kvm_dirty_ring_get_rsvd_entries();
+	ring->dirty_index = 0;
+	ring->reset_index = 0;
+	ring->index = index;
+
+	return 0;
+}
+
+static inline void kvm_dirty_gfn_set_invalid(struct kvm_dirty_gfn *gfn)
+{
+	gfn->flags = 0;
+}
+
+static inline void kvm_dirty_gfn_set_dirtied(struct kvm_dirty_gfn *gfn)
+{
+	gfn->flags = KVM_DIRTY_GFN_F_DIRTY;
+}
+
+static inline bool kvm_dirty_gfn_invalid(struct kvm_dirty_gfn *gfn)
+{
+	return gfn->flags == 0;
+}
+
+static inline bool kvm_dirty_gfn_harvested(struct kvm_dirty_gfn *gfn)
+{
+	return gfn->flags & KVM_DIRTY_GFN_F_RESET;
+}
+
+int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring)
+{
+	u32 cur_slot, next_slot;
+	u64 cur_offset, next_offset;
+	unsigned long mask;
+	int count = 0;
+	struct kvm_dirty_gfn *entry;
+	bool first_round = true;
+
+	/* This is only needed to make compilers happy */
+	cur_slot = cur_offset = mask = 0;
+
+	while (true) {
+		entry = &ring->dirty_gfns[ring->reset_index & (ring->size - 1)];
+
+		if (!kvm_dirty_gfn_harvested(entry))
+			break;
+
+		next_slot = READ_ONCE(entry->slot);
+		next_offset = READ_ONCE(entry->offset);
+
+		/* Update the flags to reflect that this GFN is reset */
+		kvm_dirty_gfn_set_invalid(entry);
+
+		ring->reset_index++;
+		count++;
+		/*
+		 * Try to coalesce the reset operations when the guest is
+		 * scanning pages in the same slot.
+		 */
+		if (!first_round && next_slot == cur_slot) {
+			s64 delta = next_offset - cur_offset;
+
+			if (delta >= 0 && delta < BITS_PER_LONG) {
+				mask |= 1ull << delta;
+				continue;
+			}
+
+			/* Backwards visit, careful about overflows!  */
+			if (delta > -BITS_PER_LONG && delta < 0 &&
+			    (mask << -delta >> -delta) == mask) {
+				cur_offset = next_offset;
+				mask = (mask << -delta) | 1;
+				continue;
+			}
+		}
+		kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask);
+		cur_slot = next_slot;
+		cur_offset = next_offset;
+		mask = 1;
+		first_round = false;
+	}
+
+	kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask);
+
+	trace_kvm_dirty_ring_reset(ring);
+
+	return count;
+}
+
+void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset)
+{
+	struct kvm_dirty_gfn *entry;
+
+	/* It should never get full */
+	WARN_ON_ONCE(kvm_dirty_ring_full(ring));
+
+	entry = &ring->dirty_gfns[ring->dirty_index & (ring->size - 1)];
+
+	entry->slot = slot;
+	entry->offset = offset;
+	/*
+	 * Make sure the data is filled in before we publish this to
+	 * the userspace program.  There's no paired kernel-side reader.
+	 */
+	smp_wmb();
+	kvm_dirty_gfn_set_dirtied(entry);
+	ring->dirty_index++;
+	trace_kvm_dirty_ring_push(ring, slot, offset);
+}
+
+struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset)
+{
+	return vmalloc_to_page((void *)ring->dirty_gfns + offset * PAGE_SIZE);
+}
+
+void kvm_dirty_ring_free(struct kvm_dirty_ring *ring)
+{
+	vfree(ring->dirty_gfns);
+	ring->dirty_gfns = NULL;
+}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 68598fdba226..78ef414512bf 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -63,6 +63,8 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/kvm.h>
 
+#include <linux/kvm_dirty_ring.h>
+
 /* Worst case buffer size needed for holding an integer. */
 #define ITOA_MAX_LEN 12
 
@@ -415,6 +417,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 
 void kvm_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
+	kvm_dirty_ring_free(&vcpu->dirty_ring);
 	kvm_arch_vcpu_destroy(vcpu);
 
 	/*
@@ -2644,8 +2647,13 @@ void mark_page_dirty_in_slot(struct kvm *kvm,
 {
 	if (memslot && memslot->dirty_bitmap) {
 		unsigned long rel_gfn = gfn - memslot->base_gfn;
+		u32 slot = (memslot->as_id << 16) | memslot->id;
 
-		set_bit_le(rel_gfn, memslot->dirty_bitmap);
+		if (kvm->dirty_ring_size)
+			kvm_dirty_ring_push(kvm_dirty_ring_get(kvm),
+					    slot, rel_gfn);
+		else
+			set_bit_le(rel_gfn, memslot->dirty_bitmap);
 	}
 }
 EXPORT_SYMBOL_GPL(mark_page_dirty_in_slot);
@@ -3005,6 +3013,17 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
 
+static bool kvm_page_in_dirty_ring(struct kvm *kvm, unsigned long pgoff)
+{
+#if KVM_DIRTY_LOG_PAGE_OFFSET > 0
+	return (pgoff >= KVM_DIRTY_LOG_PAGE_OFFSET) &&
+	    (pgoff < KVM_DIRTY_LOG_PAGE_OFFSET +
+	     kvm->dirty_ring_size / PAGE_SIZE);
+#else
+	return false;
+#endif
+}
+
 static vm_fault_t kvm_vcpu_fault(struct vm_fault *vmf)
 {
 	struct kvm_vcpu *vcpu = vmf->vma->vm_file->private_data;
@@ -3020,6 +3039,10 @@ static vm_fault_t kvm_vcpu_fault(struct vm_fault *vmf)
 	else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET)
 		page = virt_to_page(vcpu->kvm->coalesced_mmio_ring);
 #endif
+	else if (kvm_page_in_dirty_ring(vcpu->kvm, vmf->pgoff))
+		page = kvm_dirty_ring_get_page(
+		    &vcpu->dirty_ring,
+		    vmf->pgoff - KVM_DIRTY_LOG_PAGE_OFFSET);
 	else
 		return kvm_arch_vcpu_fault(vcpu, vmf);
 	get_page(page);
@@ -3033,6 +3056,14 @@ static const struct vm_operations_struct kvm_vcpu_vm_ops = {
 
 static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
 {
+	struct kvm_vcpu *vcpu = file->private_data;
+	unsigned long pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+
+	if ((kvm_page_in_dirty_ring(vcpu->kvm, vma->vm_pgoff) ||
+	     kvm_page_in_dirty_ring(vcpu->kvm, vma->vm_pgoff + pages - 1)) &&
+	    ((vma->vm_flags & VM_EXEC) || !(vma->vm_flags & VM_SHARED)))
+		return -EINVAL;
+
 	vma->vm_ops = &kvm_vcpu_vm_ops;
 	return 0;
 }
@@ -3126,6 +3157,13 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
 	if (r)
 		goto vcpu_free_run_page;
 
+	if (kvm->dirty_ring_size) {
+		r = kvm_dirty_ring_alloc(&vcpu->dirty_ring,
+					 id, kvm->dirty_ring_size);
+		if (r)
+			goto arch_vcpu_destroy;
+	}
+
 	mutex_lock(&kvm->lock);
 	if (kvm_get_vcpu_by_id(kvm, id)) {
 		r = -EEXIST;
@@ -3159,6 +3197,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
 
 unlock_vcpu_destroy:
 	mutex_unlock(&kvm->lock);
+	kvm_dirty_ring_free(&vcpu->dirty_ring);
+arch_vcpu_destroy:
 	kvm_arch_vcpu_destroy(vcpu);
 vcpu_free_run_page:
 	free_page((unsigned long)vcpu->run);
@@ -3631,12 +3671,78 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 #endif
 	case KVM_CAP_NR_MEMSLOTS:
 		return KVM_USER_MEM_SLOTS;
+	case KVM_CAP_DIRTY_LOG_RING:
+#if KVM_DIRTY_LOG_PAGE_OFFSET > 0
+		return KVM_DIRTY_RING_MAX_ENTRIES * sizeof(struct kvm_dirty_gfn);
+#else
+		return 0;
+#endif
 	default:
 		break;
 	}
 	return kvm_vm_ioctl_check_extension(kvm, arg);
 }
 
+static int kvm_vm_ioctl_enable_dirty_log_ring(struct kvm *kvm, u32 size)
+{
+	int r;
+
+	if (!KVM_DIRTY_LOG_PAGE_OFFSET)
+		return -EINVAL;
+
+	/* the size should be power of 2 */
+	if (!size || (size & (size - 1)))
+		return -EINVAL;
+
+	/* Should be bigger to keep the reserved entries, or a page */
+	if (size < kvm_dirty_ring_get_rsvd_entries() *
+	    sizeof(struct kvm_dirty_gfn) || size < PAGE_SIZE)
+		return -EINVAL;
+
+	if (size > KVM_DIRTY_RING_MAX_ENTRIES *
+	    sizeof(struct kvm_dirty_gfn))
+		return -E2BIG;
+
+	/* We only allow it to set once */
+	if (kvm->dirty_ring_size)
+		return -EINVAL;
+
+	mutex_lock(&kvm->lock);
+
+	if (kvm->created_vcpus) {
+		/* We don't allow to change this value after vcpu created */
+		r = -EINVAL;
+	} else {
+		kvm->dirty_ring_size = size;
+		r = 0;
+	}
+
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+
+static int kvm_vm_ioctl_reset_dirty_pages(struct kvm *kvm)
+{
+	int i;
+	struct kvm_vcpu *vcpu;
+	int cleared = 0;
+
+	if (!kvm->dirty_ring_size)
+		return -EINVAL;
+
+	mutex_lock(&kvm->slots_lock);
+
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		cleared += kvm_dirty_ring_reset(vcpu->kvm, &vcpu->dirty_ring);
+
+	mutex_unlock(&kvm->slots_lock);
+
+	if (cleared)
+		kvm_flush_remote_tlbs(kvm);
+
+	return cleared;
+}
+
 int __attribute__((weak)) kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 						  struct kvm_enable_cap *cap)
 {
@@ -3667,6 +3773,8 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
 		kvm->max_halt_poll_ns = cap->args[0];
 		return 0;
 	}
+	case KVM_CAP_DIRTY_LOG_RING:
+		return kvm_vm_ioctl_enable_dirty_log_ring(kvm, cap->args[0]);
 	default:
 		return kvm_vm_ioctl_enable_cap(kvm, cap);
 	}
@@ -3851,6 +3959,9 @@ static long kvm_vm_ioctl(struct file *filp,
 	case KVM_CHECK_EXTENSION:
 		r = kvm_vm_ioctl_check_extension_generic(kvm, arg);
 		break;
+	case KVM_RESET_DIRTY_RINGS:
+		r = kvm_vm_ioctl_reset_dirty_pages(kvm);
+		break;
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 	}
-- 
cgit v1.2.3


From 3f65c6f67e8813448d7e3cfd3470b0f8c15bfaea Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 30 Oct 2020 17:55:26 +0100
Subject: media: v4l2: allocate v4l2_clip objects early

The v4l2_format based ioctls can have an indirect pointer to an array
of v4l2_clip structures for overlay mode, depending on the 'type' member.
There are only five drivers that use the overlay mode and copy the
data through the __user pointer.

Change the five drivers to use memcpy() instead, and copy the data
in common code using the check_array_args() helpers. This allows
for a subsequent patch that use the same mechanism for compat
ioctl handlers.

Note that there is another pointer for a 'bitmap' that is only
used in the 'vivid' driver and nowhere else. There is no easy
way to use the same trick without adding complexity to the
common code, so this remains a __user pointer.

[hverkuil: fix: CHECK: spaces preferred around that '*' (ctx:VxV)]
[hverkuil: fix: CHECK: Alignment should match open parenthesis]

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/common/saa7146/saa7146_video.c     |  6 ++----
 drivers/media/pci/bt8xx/bttv-driver.c            |  8 ++------
 drivers/media/pci/saa7134/saa7134-video.c        | 19 +++++++------------
 drivers/media/test-drivers/vivid/vivid-vid-cap.c | 18 +++++++-----------
 drivers/media/test-drivers/vivid/vivid-vid-out.c | 18 +++++++-----------
 drivers/media/v4l2-core/v4l2-ioctl.c             | 23 ++++++++++++++++++++++-
 include/uapi/linux/videodev2.h                   |  2 +-
 7 files changed, 48 insertions(+), 46 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/media/common/saa7146/saa7146_video.c b/drivers/media/common/saa7146/saa7146_video.c
index ccd15b4d4920..7b8795eca589 100644
--- a/drivers/media/common/saa7146/saa7146_video.c
+++ b/drivers/media/common/saa7146/saa7146_video.c
@@ -771,10 +771,8 @@ static int vidioc_s_fmt_vid_overlay(struct file *file, void *__fh, struct v4l2_f
 	vv->ov.nclips = f->fmt.win.clipcount;
 	if (vv->ov.nclips > 16)
 		vv->ov.nclips = 16;
-	if (copy_from_user(vv->ov.clips, f->fmt.win.clips,
-				sizeof(struct v4l2_clip) * vv->ov.nclips)) {
-		return -EFAULT;
-	}
+	memcpy(vv->ov.clips, f->fmt.win.clips,
+	       sizeof(struct v4l2_clip) * vv->ov.nclips);
 
 	/* vv->ov.fh is used to indicate that we have valid overlay information, too */
 	vv->ov.fh = fh;
diff --git a/drivers/media/pci/bt8xx/bttv-driver.c b/drivers/media/pci/bt8xx/bttv-driver.c
index 8824dd0fb331..ef2ead36b70e 100644
--- a/drivers/media/pci/bt8xx/bttv-driver.c
+++ b/drivers/media/pci/bt8xx/bttv-driver.c
@@ -2143,12 +2143,8 @@ static int setup_window_lock(struct bttv_fh *fh, struct bttv *btv,
 	clips = kmalloc(size,GFP_KERNEL);
 	if (NULL == clips)
 		return -ENOMEM;
-	if (n > 0) {
-		if (copy_from_user(clips,win->clips,sizeof(struct v4l2_clip)*n)) {
-			kfree(clips);
-			return -EFAULT;
-		}
-	}
+	if (n > 0)
+		memcpy(clips, win->clips, sizeof(struct v4l2_clip) * n);
 
 	/* clip against screen */
 	if (NULL != btv->fbuf.base)
diff --git a/drivers/media/pci/saa7134/saa7134-video.c b/drivers/media/pci/saa7134/saa7134-video.c
index 9a6a6b68f8e3..94c1c10d0fea 100644
--- a/drivers/media/pci/saa7134/saa7134-video.c
+++ b/drivers/media/pci/saa7134/saa7134-video.c
@@ -1265,9 +1265,7 @@ static int saa7134_g_fmt_vid_overlay(struct file *file, void *priv,
 				struct v4l2_format *f)
 {
 	struct saa7134_dev *dev = video_drvdata(file);
-	struct v4l2_clip __user *clips = f->fmt.win.clips;
 	u32 clipcount = f->fmt.win.clipcount;
-	int err = 0;
 	int i;
 
 	if (saa7134_no_overlay > 0) {
@@ -1275,20 +1273,18 @@ static int saa7134_g_fmt_vid_overlay(struct file *file, void *priv,
 		return -EINVAL;
 	}
 	f->fmt.win = dev->win;
-	f->fmt.win.clips = clips;
-	if (clips == NULL)
+	if (!f->fmt.win.clips)
 		clipcount = 0;
 	if (dev->nclips < clipcount)
 		clipcount = dev->nclips;
 	f->fmt.win.clipcount = clipcount;
 
-	for (i = 0; !err && i < clipcount; i++) {
-		if (copy_to_user(&f->fmt.win.clips[i].c, &dev->clips[i].c,
-					sizeof(struct v4l2_rect)))
-			err = -EFAULT;
+	for (i = 0; i < clipcount; i++) {
+		memcpy(&f->fmt.win.clips[i].c, &dev->clips[i].c,
+		       sizeof(struct v4l2_rect));
 	}
 
-	return err;
+	return 0;
 }
 
 static int saa7134_try_fmt_vid_cap(struct file *file, void *priv,
@@ -1396,9 +1392,8 @@ static int saa7134_s_fmt_vid_overlay(struct file *file, void *priv,
 	dev->win    = f->fmt.win;
 	dev->nclips = f->fmt.win.clipcount;
 
-	if (copy_from_user(dev->clips, f->fmt.win.clips,
-			   sizeof(struct v4l2_clip) * dev->nclips))
-		return -EFAULT;
+	memcpy(dev->clips, f->fmt.win.clips,
+	       sizeof(struct v4l2_clip) * dev->nclips);
 
 	if (priv == dev->overlay_owner) {
 		spin_lock_irqsave(&dev->slock, flags);
diff --git a/drivers/media/test-drivers/vivid/vivid-vid-cap.c b/drivers/media/test-drivers/vivid/vivid-vid-cap.c
index eadf28ab1e39..b9caa4b26209 100644
--- a/drivers/media/test-drivers/vivid/vivid-vid-cap.c
+++ b/drivers/media/test-drivers/vivid/vivid-vid-cap.c
@@ -1107,11 +1107,9 @@ int vidioc_g_fmt_vid_overlay(struct file *file, void *priv,
 		    ((compose->width + 7) / 8) * compose->height))
 			return -EFAULT;
 	}
-	if (clipcount && win->clips) {
-		if (copy_to_user(win->clips, dev->clips_cap,
-				 clipcount * sizeof(dev->clips_cap[0])))
-			return -EFAULT;
-	}
+	if (clipcount && win->clips)
+		memcpy(win->clips, dev->clips_cap,
+		       clipcount * sizeof(dev->clips_cap[0]));
 	return 0;
 }
 
@@ -1141,9 +1139,8 @@ int vidioc_try_fmt_vid_overlay(struct file *file, void *priv,
 	if (win->clipcount > MAX_CLIPS)
 		win->clipcount = MAX_CLIPS;
 	if (win->clipcount) {
-		if (copy_from_user(dev->try_clips_cap, win->clips,
-				   win->clipcount * sizeof(dev->clips_cap[0])))
-			return -EFAULT;
+		memcpy(dev->try_clips_cap, win->clips,
+		       win->clipcount * sizeof(dev->clips_cap[0]));
 		for (i = 0; i < win->clipcount; i++) {
 			struct v4l2_rect *r = &dev->try_clips_cap[i].c;
 
@@ -1166,9 +1163,8 @@ int vidioc_try_fmt_vid_overlay(struct file *file, void *priv,
 					return -EINVAL;
 			}
 		}
-		if (copy_to_user(win->clips, dev->try_clips_cap,
-				 win->clipcount * sizeof(dev->clips_cap[0])))
-			return -EFAULT;
+		memcpy(win->clips, dev->try_clips_cap,
+		       win->clipcount * sizeof(dev->clips_cap[0]));
 	}
 	return 0;
 }
diff --git a/drivers/media/test-drivers/vivid/vivid-vid-out.c b/drivers/media/test-drivers/vivid/vivid-vid-out.c
index ee3446e3217c..ac1e981e8342 100644
--- a/drivers/media/test-drivers/vivid/vivid-vid-out.c
+++ b/drivers/media/test-drivers/vivid/vivid-vid-out.c
@@ -857,11 +857,9 @@ int vidioc_g_fmt_vid_out_overlay(struct file *file, void *priv,
 		    ((dev->compose_out.width + 7) / 8) * dev->compose_out.height))
 			return -EFAULT;
 	}
-	if (clipcount && win->clips) {
-		if (copy_to_user(win->clips, dev->clips_out,
-				 clipcount * sizeof(dev->clips_out[0])))
-			return -EFAULT;
-	}
+	if (clipcount && win->clips)
+		memcpy(win->clips, dev->clips_out,
+		       clipcount * sizeof(dev->clips_out[0]));
 	return 0;
 }
 
@@ -891,9 +889,8 @@ int vidioc_try_fmt_vid_out_overlay(struct file *file, void *priv,
 	if (win->clipcount > MAX_CLIPS)
 		win->clipcount = MAX_CLIPS;
 	if (win->clipcount) {
-		if (copy_from_user(dev->try_clips_out, win->clips,
-				   win->clipcount * sizeof(dev->clips_out[0])))
-			return -EFAULT;
+		memcpy(dev->try_clips_out, win->clips,
+		       win->clipcount * sizeof(dev->clips_out[0]));
 		for (i = 0; i < win->clipcount; i++) {
 			struct v4l2_rect *r = &dev->try_clips_out[i].c;
 
@@ -916,9 +913,8 @@ int vidioc_try_fmt_vid_out_overlay(struct file *file, void *priv,
 					return -EINVAL;
 			}
 		}
-		if (copy_to_user(win->clips, dev->try_clips_out,
-				 win->clipcount * sizeof(dev->clips_out[0])))
-			return -EFAULT;
+		memcpy(win->clips, dev->try_clips_out,
+		       win->clipcount * sizeof(dev->clips_out[0]));
 	}
 	return 0;
 }
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index b8be61a09776..f0f6906a879d 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -1582,7 +1582,7 @@ static int v4l_g_fmt(const struct v4l2_ioctl_ops *ops,
 	switch (p->type) {
 	case V4L2_BUF_TYPE_VIDEO_OVERLAY:
 	case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY: {
-		struct v4l2_clip __user *clips = p->fmt.win.clips;
+		struct v4l2_clip *clips = p->fmt.win.clips;
 		u32 clipcount = p->fmt.win.clipcount;
 		void __user *bitmap = p->fmt.win.bitmap;
 
@@ -3084,6 +3084,27 @@ static int check_array_args(unsigned int cmd, void *parg, size_t *array_size,
 		}
 		break;
 	}
+	case VIDIOC_G_FMT:
+	case VIDIOC_S_FMT:
+	case VIDIOC_TRY_FMT: {
+		struct v4l2_format *fmt = parg;
+
+		if (fmt->type != V4L2_BUF_TYPE_VIDEO_OVERLAY &&
+		    fmt->type != V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY)
+			break;
+		if (fmt->fmt.win.clipcount > 2048)
+			return -EINVAL;
+		if (!fmt->fmt.win.clipcount)
+			break;
+
+		*user_ptr = (void __user *)fmt->fmt.win.clips;
+		*kernel_ptr = (void **)&fmt->fmt.win.clips;
+		*array_size = sizeof(struct v4l2_clip)
+				* fmt->fmt.win.clipcount;
+
+		ret = 1;
+		break;
+	}
 	}
 
 	return ret;
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 534eaa4d39bc..b10f102bbf6f 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1185,7 +1185,7 @@ struct v4l2_window {
 	struct v4l2_rect        w;
 	__u32			field;	 /* enum v4l2_field */
 	__u32			chromakey;
-	struct v4l2_clip	__user *clips;
+	struct v4l2_clip	*clips;
 	__u32			clipcount;
 	void			__user *bitmap;
 	__u8                    global_alpha;
-- 
cgit v1.2.3


From e7531d5625ed1ec50299059949d5782d9459b8e5 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Thu, 1 Oct 2020 12:13:42 +0200
Subject: media: Fix V4L2_COLORSPACE_470_SYSTEM_BG description

The description of the V4L2_COLORSPACE_470_SYSTEM_BG stated that it was
superseded by SMPTE 170M. That is incorrect. The probably root cause of
this is that the HDMI standard does not support this colorspace and,
unless otherwise signaled, will fall back to SMPTE 170M for SDTV.

However, EBU Tech. 3321 states that sources should signal Rec. 709 as the
colorimetry when using HDMI since the difference between Rec. 709 and
Tech. 3213 are negligible.

Update the text accordingly.

Also drop a spurious " at the end of the Tech 3213 title in the
bibliography.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 Documentation/userspace-api/media/v4l/biblio.rst             | 12 +++++++++++-
 .../userspace-api/media/v4l/colorspaces-details.rst          |  5 +++--
 include/uapi/linux/videodev2.h                               |  4 +---
 3 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/userspace-api/media/v4l/biblio.rst b/Documentation/userspace-api/media/v4l/biblio.rst
index 7869b6f6ff72..64d241daf63c 100644
--- a/Documentation/userspace-api/media/v4l/biblio.rst
+++ b/Documentation/userspace-api/media/v4l/biblio.rst
@@ -270,7 +270,17 @@ EBU Tech 3213
 =============
 
 
-:title:     E.B.U. Standard for Chromaticity Tolerances for Studio Monitors"
+:title:     E.B.U. Standard for Chromaticity Tolerances for Studio Monitors
+
+:author:    European Broadcast Union (http://www.ebu.ch)
+
+.. _tech3321:
+
+EBU Tech 3321
+=============
+
+
+:title:     E.B.U. guidelines for Consumer Flat Panel Displays (FPDs)
 
 :author:    European Broadcast Union (http://www.ebu.ch)
 
diff --git a/Documentation/userspace-api/media/v4l/colorspaces-details.rst b/Documentation/userspace-api/media/v4l/colorspaces-details.rst
index 014e7c9fc655..126f66482a0d 100644
--- a/Documentation/userspace-api/media/v4l/colorspaces-details.rst
+++ b/Documentation/userspace-api/media/v4l/colorspaces-details.rst
@@ -674,8 +674,9 @@ Colorspace EBU Tech. 3213 (V4L2_COLORSPACE_470_SYSTEM_BG)
 =========================================================
 
 The :ref:`tech3213` standard defines the colorspace used by PAL/SECAM
-in 1975. In practice this colorspace is obsolete and SMPTE 170M should
-be used instead. The default transfer function is
+in 1975. Note that this colorspace is not supported by the HDMI interface.
+Instead :ref:`tech3321` recommends that Rec. 709 is used instead for HDMI.
+The default transfer function is
 ``V4L2_XFER_FUNC_709``. The default Y'CbCr encoding is
 ``V4L2_YCBCR_ENC_601``. The default Y'CbCr quantization is limited
 range. The chromaticities of the primary colors and the white reference
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index b10f102bbf6f..927075fa9099 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -221,9 +221,7 @@ enum v4l2_colorspace {
 	V4L2_COLORSPACE_470_SYSTEM_M  = 5,
 
 	/*
-	 * EBU Tech 3213 PAL/SECAM colorspace. This only makes sense when
-	 * dealing with really old PAL/SECAM recordings. Superseded by
-	 * SMPTE 170M.
+	 * EBU Tech 3213 PAL/SECAM colorspace.
 	 */
 	V4L2_COLORSPACE_470_SYSTEM_BG = 6,
 
-- 
cgit v1.2.3


From 6ad253cc3436269fc6bcff03d704c672f368da0a Mon Sep 17 00:00:00 2001
From: Dafna Hirschfeld <dafna.hirschfeld@collabora.com>
Date: Fri, 30 Oct 2020 14:46:08 +0100
Subject: media: uapi: add MEDIA_BUS_FMT_METADATA_FIXED media bus format.

MEDIA_BUS_FMT_METADATA_FIXED should be used when
the same driver handles both sides of the link and
the bus format is a fixed metadata format that is
not configurable from userspace.
The width and height will be set to 0 for this format.

Signed-off-by: Dafna Hirschfeld <dafna.hirschfeld@collabora.com>
Acked-by: Helen Koike <helen.koike@collabora.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../userspace-api/media/v4l/subdev-formats.rst     | 27 ++++++++++++++++++++++
 include/uapi/linux/media-bus-format.h              |  8 +++++++
 2 files changed, 35 insertions(+)

(limited to 'include/uapi')

diff --git a/Documentation/userspace-api/media/v4l/subdev-formats.rst b/Documentation/userspace-api/media/v4l/subdev-formats.rst
index c9b7bb3ca089..7f16cbe46e5c 100644
--- a/Documentation/userspace-api/media/v4l/subdev-formats.rst
+++ b/Documentation/userspace-api/media/v4l/subdev-formats.rst
@@ -7899,3 +7899,30 @@ formats.
       - 0x5001
       - Interleaved raw UYVY and JPEG image format with embedded meta-data
 	used by Samsung S3C73MX camera sensors.
+
+.. _v4l2-mbus-metadata-fmts:
+
+Metadata Formats
+^^^^^^^^^^^^^^^^
+
+This section lists all metadata formats.
+
+The following table lists the existing metadata formats.
+
+.. tabularcolumns:: |p{8.0cm}|p{1.4cm}|p{7.7cm}|
+
+.. flat-table:: Metadata formats
+    :header-rows:  1
+    :stub-columns: 0
+
+    * - Identifier
+      - Code
+      - Comments
+    * .. _MEDIA-BUS-FMT-METADATA-FIXED:
+
+      - MEDIA_BUS_FMT_METADATA_FIXED
+      - 0x7001
+      - This format should be used when the same driver handles
+	both sides of the link and the bus format is a fixed
+	metadata format that is not configurable from userspace.
+	Width and height will be set to 0 for this format.
diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h
index 84fa53ffb13f..2ce3d891d344 100644
--- a/include/uapi/linux/media-bus-format.h
+++ b/include/uapi/linux/media-bus-format.h
@@ -156,4 +156,12 @@
 /* HSV - next is	0x6002 */
 #define MEDIA_BUS_FMT_AHSV8888_1X32		0x6001
 
+/*
+ * This format should be used when the same driver handles
+ * both sides of the link and the bus format is a fixed
+ * metadata format that is not configurable from userspace.
+ * Width and height will be set to 0 for this format.
+ */
+#define MEDIA_BUS_FMT_METADATA_FIXED		0x7001
+
 #endif /* __LINUX_MEDIA_BUS_FORMAT_H */
-- 
cgit v1.2.3


From b2d3bef1aa7858b2ae5e0d01adb214121ba00b9f Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Wed, 4 Nov 2020 18:43:11 +0100
Subject: media: coda: Add a V4L2 user for control error macroblocks count

To avoid potentially overflowing the kernel logs in the case
of corrupted streams, this commit replaces an error message with
a per-stream counter to be read through a driver-specific
control.

Applications can read the per-stream accumulated
error macroblocks count.

The old error message is replaced by a rate-limited debug message.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/coda/coda-bit.c    | 10 +++++++---
 drivers/media/platform/coda/coda-common.c | 18 ++++++++++++++++++
 drivers/media/platform/coda/coda.h        | 10 ++++++++++
 include/uapi/linux/v4l2-controls.h        |  6 ++++++
 4 files changed, 41 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/media/platform/coda/coda-bit.c b/drivers/media/platform/coda/coda-bit.c
index 919b36d753ec..2f42808c43a4 100644
--- a/drivers/media/platform/coda/coda-bit.c
+++ b/drivers/media/platform/coda/coda-bit.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/log2.h>
 #include <linux/platform_device.h>
+#include <linux/ratelimit.h>
 #include <linux/reset.h>
 #include <linux/slab.h>
 #include <linux/videodev2.h>
@@ -2369,9 +2370,12 @@ static void coda_finish_decode(struct coda_ctx *ctx)
 	}
 
 	err_mb = coda_read(dev, CODA_RET_DEC_PIC_ERR_MB);
-	if (err_mb > 0)
-		v4l2_err(&dev->v4l2_dev,
-			 "errors in %d macroblocks\n", err_mb);
+	if (err_mb > 0) {
+		if (__ratelimit(&dev->mb_err_rs))
+			coda_dbg(1, ctx, "errors in %d macroblocks\n", err_mb);
+		v4l2_ctrl_s_ctrl(ctx->mb_err_cnt_ctrl,
+				 v4l2_ctrl_g_ctrl(ctx->mb_err_cnt_ctrl) + err_mb);
+	}
 
 	if (dev->devtype->product == CODA_HX4 ||
 	    dev->devtype->product == CODA_7541) {
diff --git a/drivers/media/platform/coda/coda-common.c b/drivers/media/platform/coda/coda-common.c
index f9e66247e92a..d30eafea701d 100644
--- a/drivers/media/platform/coda/coda-common.c
+++ b/drivers/media/platform/coda/coda-common.c
@@ -26,6 +26,7 @@
 #include <linux/videodev2.h>
 #include <linux/of.h>
 #include <linux/platform_data/media/coda.h>
+#include <linux/ratelimit.h>
 #include <linux/reset.h>
 
 #include <media/v4l2-ctrls.h>
@@ -2062,6 +2063,7 @@ static int coda_start_streaming(struct vb2_queue *q, unsigned int count)
 	if (q_data_dst->fourcc == V4L2_PIX_FMT_JPEG)
 		ctx->params.gop_size = 1;
 	ctx->gopcounter = ctx->params.gop_size - 1;
+	v4l2_ctrl_s_ctrl(ctx->mb_err_cnt_ctrl, 0);
 
 	ret = ctx->ops->start_streaming(ctx);
 	if (ctx->inst_type == CODA_INST_DECODER) {
@@ -2462,6 +2464,15 @@ static void coda_decode_ctrls(struct coda_ctx *ctx)
 		ctx->mpeg4_level_ctrl->flags |= V4L2_CTRL_FLAG_READ_ONLY;
 }
 
+static const struct v4l2_ctrl_config coda_mb_err_cnt_ctrl_config = {
+	.id	= V4L2_CID_CODA_MB_ERR_CNT,
+	.name	= "Macroblocks Error Count",
+	.type	= V4L2_CTRL_TYPE_INTEGER,
+	.min	= 0,
+	.max	= 0x7fffffff,
+	.step	= 1,
+};
+
 static int coda_ctrls_setup(struct coda_ctx *ctx)
 {
 	v4l2_ctrl_handler_init(&ctx->ctrls, 2);
@@ -2484,6 +2495,12 @@ static int coda_ctrls_setup(struct coda_ctx *ctx)
 				  1, 1, 1, 1);
 		if (ctx->cvd->src_formats[0] == V4L2_PIX_FMT_H264)
 			coda_decode_ctrls(ctx);
+
+		ctx->mb_err_cnt_ctrl = v4l2_ctrl_new_custom(&ctx->ctrls,
+						&coda_mb_err_cnt_ctrl_config,
+						NULL);
+		if (ctx->mb_err_cnt_ctrl)
+			ctx->mb_err_cnt_ctrl->flags |= V4L2_CTRL_FLAG_READ_ONLY;
 	}
 
 	if (ctx->ctrls.error) {
@@ -3202,6 +3219,7 @@ static int coda_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
+	ratelimit_default_init(&dev->mb_err_rs);
 	mutex_init(&dev->dev_mutex);
 	mutex_init(&dev->coda_mutex);
 	ida_init(&dev->ida);
diff --git a/drivers/media/platform/coda/coda.h b/drivers/media/platform/coda/coda.h
index e53f7a65d532..dcf35641c603 100644
--- a/drivers/media/platform/coda/coda.h
+++ b/drivers/media/platform/coda/coda.h
@@ -17,6 +17,7 @@
 #include <linux/mutex.h>
 #include <linux/kfifo.h>
 #include <linux/videodev2.h>
+#include <linux/ratelimit.h>
 
 #include <media/v4l2-ctrls.h>
 #include <media/v4l2-device.h>
@@ -28,6 +29,13 @@
 #define CODA_MAX_FRAMEBUFFERS	19
 #define FMO_SLICE_SAVE_BUF_SIZE	(32)
 
+/*
+ * This control allows applications to read the per-stream
+ * (i.e. per-context) Macroblocks Error Count. This value
+ * is CODA specific.
+ */
+#define V4L2_CID_CODA_MB_ERR_CNT (V4L2_CID_USER_CODA_BASE + 0)
+
 enum {
 	V4L2_M2M_SRC = 0,
 	V4L2_M2M_DST = 1,
@@ -92,6 +100,7 @@ struct coda_dev {
 	struct v4l2_m2m_dev	*m2m_dev;
 	struct ida		ida;
 	struct dentry		*debugfs_root;
+	struct ratelimit_state	mb_err_rs;
 };
 
 struct coda_codec {
@@ -242,6 +251,7 @@ struct coda_ctx {
 	struct v4l2_ctrl		*mpeg2_level_ctrl;
 	struct v4l2_ctrl		*mpeg4_profile_ctrl;
 	struct v4l2_ctrl		*mpeg4_level_ctrl;
+	struct v4l2_ctrl		*mb_err_cnt_ctrl;
 	struct v4l2_fh			fh;
 	int				gopcounter;
 	int				runcounter;
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index a184c4939438..7035f4fb182c 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -198,6 +198,12 @@ enum v4l2_colorfx {
  */
 #define V4L2_CID_USER_ATMEL_ISC_BASE		(V4L2_CID_USER_BASE + 0x10c0)
 
+/*
+ * The base for the CODA driver controls.
+ * We reserve 16 controls for this driver.
+ */
+#define V4L2_CID_USER_CODA_BASE			(V4L2_CID_USER_BASE + 0x10e0)
+
 /* MPEG-class control IDs */
 /* The MPEG controls are applicable to all codec controls
  * and the 'MPEG' part of the define is historical */
-- 
cgit v1.2.3


From 7a089ec7d77fe7d50f6bb7b178fa25eec9fd822b Mon Sep 17 00:00:00 2001
From: Peilin Ye <yepeilin.cs@gmail.com>
Date: Thu, 12 Nov 2020 07:04:03 -0500
Subject: console: Delete unused con_font_copy() callback implementations

Recently in commit 3c4e0dff2095 ("vt: Disable KD_FONT_OP_COPY") we
disabled the KD_FONT_OP_COPY ioctl() option. Delete all the
con_font_copy() callbacks, since we no longer use them.

Mark KD_FONT_OP_COPY as "obsolete" in include/uapi/linux/kd.h, just like
what we have done for PPPIOCDETACH in commit af8d3c7c001a ("ppp: remove
the PPPIOCDETACH ioctl").

Signed-off-by: Peilin Ye <yepeilin.cs@gmail.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/c8d28007edf50de4387e1532eb3eb736db716f73.1605169912.git.yepeilin.cs@gmail.com
---
 drivers/usb/misc/sisusbvga/sisusb_con.c |  6 ------
 drivers/video/console/dummycon.c        |  6 ------
 drivers/video/fbdev/core/fbcon.c        | 11 -----------
 include/linux/console.h                 |  1 -
 include/uapi/linux/kd.h                 |  2 +-
 5 files changed, 1 insertion(+), 25 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/usb/misc/sisusbvga/sisusb_con.c b/drivers/usb/misc/sisusbvga/sisusb_con.c
index c63e545fb105..fd9954381fbf 100644
--- a/drivers/usb/misc/sisusbvga/sisusb_con.c
+++ b/drivers/usb/misc/sisusbvga/sisusb_con.c
@@ -1358,11 +1358,6 @@ static int sisusbdummycon_font_default(struct vc_data *vc,
 	return 0;
 }
 
-static int sisusbdummycon_font_copy(struct vc_data *vc, int con)
-{
-	return 0;
-}
-
 static const struct consw sisusb_dummy_con = {
 	.owner =		THIS_MODULE,
 	.con_startup =		sisusbdummycon_startup,
@@ -1377,7 +1372,6 @@ static const struct consw sisusb_dummy_con = {
 	.con_blank =		sisusbdummycon_blank,
 	.con_font_set =		sisusbdummycon_font_set,
 	.con_font_default =	sisusbdummycon_font_default,
-	.con_font_copy =	sisusbdummycon_font_copy,
 };
 
 int
diff --git a/drivers/video/console/dummycon.c b/drivers/video/console/dummycon.c
index 2a0d0bda7faa..ab3df752fb57 100644
--- a/drivers/video/console/dummycon.c
+++ b/drivers/video/console/dummycon.c
@@ -136,11 +136,6 @@ static int dummycon_font_default(struct vc_data *vc,
 	return 0;
 }
 
-static int dummycon_font_copy(struct vc_data *vc, int con)
-{
-	return 0;
-}
-
 /*
  *  The console `switch' structure for the dummy console
  *
@@ -161,6 +156,5 @@ const struct consw dummy_con = {
 	.con_blank =	dummycon_blank,
 	.con_font_set =	dummycon_font_set,
 	.con_font_default =	dummycon_font_default,
-	.con_font_copy =	dummycon_font_copy,
 };
 EXPORT_SYMBOL_GPL(dummy_con);
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index cef437817b0d..26d1b0916692 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -2451,16 +2451,6 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h,
 	return 0;
 }
 
-static int fbcon_copy_font(struct vc_data *vc, int con)
-{
-	struct fbcon_display *od = &fb_display[con];
-	struct console_font *f = &vc->vc_font;
-
-	if (od->fontdata == f->data)
-		return 0;	/* already the same font... */
-	return fbcon_do_set_font(vc, f->width, f->height, od->fontdata, od->userfont);
-}
-
 /*
  *  User asked to set font; we are guaranteed that
  *	a) width and height are in range 1..32
@@ -3111,7 +3101,6 @@ static const struct consw fb_con = {
 	.con_font_set 		= fbcon_set_font,
 	.con_font_get 		= fbcon_get_font,
 	.con_font_default	= fbcon_set_def_font,
-	.con_font_copy 		= fbcon_copy_font,
 	.con_set_palette 	= fbcon_set_palette,
 	.con_invert_region 	= fbcon_invert_region,
 	.con_screen_pos 	= fbcon_screen_pos,
diff --git a/include/linux/console.h b/include/linux/console.h
index 4b1e26c4cb42..20874db50bc8 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -62,7 +62,6 @@ struct consw {
 	int	(*con_font_get)(struct vc_data *vc, struct console_font *font);
 	int	(*con_font_default)(struct vc_data *vc,
 			struct console_font *font, char *name);
-	int	(*con_font_copy)(struct vc_data *vc, int con);
 	int     (*con_resize)(struct vc_data *vc, unsigned int width,
 			unsigned int height, unsigned int user);
 	void	(*con_set_palette)(struct vc_data *vc,
diff --git a/include/uapi/linux/kd.h b/include/uapi/linux/kd.h
index 4616b31f84da..ee929ece4112 100644
--- a/include/uapi/linux/kd.h
+++ b/include/uapi/linux/kd.h
@@ -173,7 +173,7 @@ struct console_font {
 #define KD_FONT_OP_SET		0	/* Set font */
 #define KD_FONT_OP_GET		1	/* Get font */
 #define KD_FONT_OP_SET_DEFAULT	2	/* Set font to default, data points to name / NULL */
-#define KD_FONT_OP_COPY		3	/* Copy from another console */
+#define KD_FONT_OP_COPY		3	/* Obsolete, do not use */
 
 #define KD_FONT_FLAG_DONT_RECALC 	1	/* Don't recalculate hw charcell size [compat] */
 
-- 
cgit v1.2.3


From 09bec07bbef508d5e264efdc48e88f0256607453 Mon Sep 17 00:00:00 2001
From: Simon Ser <contact@emersion.fr>
Date: Sun, 15 Nov 2020 09:39:38 +0000
Subject: drm/fourcc: fix AMD modifiers PACKERS field doc

This field doesn't alias with BANK_XOR_BITS: PACKERS is bits 27:29 while
BANK_XOR_BITS is bits 24:26.

Fixes: 8ba16d599374 ("drm/fourcc: Add AMD DRM modifiers.")
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Signed-off-by: Simon Ser <contact@emersion.fr>
Cc: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Cc: Alex Deucher <alexdeucher@gmail.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/drm_fourcc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index bf03bce1e854..723c8e23ca87 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -1215,7 +1215,7 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier)
 #define AMD_FMT_MOD_PIPE_XOR_BITS_MASK 0x7
 #define AMD_FMT_MOD_BANK_XOR_BITS_SHIFT 24
 #define AMD_FMT_MOD_BANK_XOR_BITS_MASK 0x7
-#define AMD_FMT_MOD_PACKERS_SHIFT 27 /* aliases with BANK_XOR_BITS */
+#define AMD_FMT_MOD_PACKERS_SHIFT 27
 #define AMD_FMT_MOD_PACKERS_MASK 0x7
 #define AMD_FMT_MOD_RB_SHIFT 30
 #define AMD_FMT_MOD_RB_MASK 0x7
-- 
cgit v1.2.3


From 3ceb6543e9cf6ed87cc1fbc6f23ca2db903564cd Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 23 Oct 2020 17:51:31 -0700
Subject: fscrypt: remove kernel-internal constants from UAPI header

There isn't really any valid reason to use __FSCRYPT_MODE_MAX or
FSCRYPT_POLICY_FLAGS_VALID in a userspace program.  These constants are
only meant to be used by the kernel internally, and they are defined in
the UAPI header next to the mode numbers and flags only so that kernel
developers don't forget to update them when adding new modes or flags.

In https://lkml.kernel.org/r/20201005074133.1958633-2-satyat@google.com
there was an example of someone wanting to use __FSCRYPT_MODE_MAX in a
user program, and it was wrong because the program would have broken if
__FSCRYPT_MODE_MAX were ever increased.  So having this definition
available is harmful.  FSCRYPT_POLICY_FLAGS_VALID has the same problem.

So, remove these definitions from the UAPI header.  Replace
FSCRYPT_POLICY_FLAGS_VALID with just listing the valid flags explicitly
in the one kernel function that needs it.  Move __FSCRYPT_MODE_MAX to
fscrypt_private.h, remove the double underscores (which were only
present to discourage use by userspace), and add a BUILD_BUG_ON() and
comments to (hopefully) ensure it is kept in sync.

Keep the old name FS_POLICY_FLAGS_VALID, since it's been around for
longer and there's a greater chance that removing it would break source
compatibility with some program.  Indeed, mtd-utils is using it in
an #ifdef, and removing it would introduce compiler warnings (about
FS_POLICY_FLAGS_PAD_* being redefined) into the mtd-utils build.
However, reduce its value to 0x07 so that it only includes the flags
with old names (the ones present before Linux 5.4), and try to make it
clear that it's now "frozen" and no new flags should be added to it.

Fixes: 2336d0deb2d4 ("fscrypt: use FSCRYPT_ prefix for uapi constants")
Cc: <stable@vger.kernel.org> # v5.4+
Link: https://lore.kernel.org/r/20201024005132.495952-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/crypto/fscrypt_private.h  | 9 ++++++---
 fs/crypto/keyring.c          | 2 +-
 fs/crypto/keysetup.c         | 4 +++-
 fs/crypto/policy.c           | 5 ++++-
 include/uapi/linux/fscrypt.h | 5 ++---
 5 files changed, 16 insertions(+), 9 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 4f5806a3b73d..322ecae9a758 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -25,6 +25,9 @@
 #define FSCRYPT_CONTEXT_V1	1
 #define FSCRYPT_CONTEXT_V2	2
 
+/* Keep this in sync with include/uapi/linux/fscrypt.h */
+#define FSCRYPT_MODE_MAX	FSCRYPT_MODE_ADIANTUM
+
 struct fscrypt_context_v1 {
 	u8 version; /* FSCRYPT_CONTEXT_V1 */
 	u8 contents_encryption_mode;
@@ -491,9 +494,9 @@ struct fscrypt_master_key {
 	 * Per-mode encryption keys for the various types of encryption policies
 	 * that use them.  Allocated and derived on-demand.
 	 */
-	struct fscrypt_prepared_key mk_direct_keys[__FSCRYPT_MODE_MAX + 1];
-	struct fscrypt_prepared_key mk_iv_ino_lblk_64_keys[__FSCRYPT_MODE_MAX + 1];
-	struct fscrypt_prepared_key mk_iv_ino_lblk_32_keys[__FSCRYPT_MODE_MAX + 1];
+	struct fscrypt_prepared_key mk_direct_keys[FSCRYPT_MODE_MAX + 1];
+	struct fscrypt_prepared_key mk_iv_ino_lblk_64_keys[FSCRYPT_MODE_MAX + 1];
+	struct fscrypt_prepared_key mk_iv_ino_lblk_32_keys[FSCRYPT_MODE_MAX + 1];
 
 	/* Hash key for inode numbers.  Initialized only when needed. */
 	siphash_key_t		mk_ino_hash_key;
diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c
index 53cc552a7b8f..d7ec52cb3d9a 100644
--- a/fs/crypto/keyring.c
+++ b/fs/crypto/keyring.c
@@ -44,7 +44,7 @@ static void free_master_key(struct fscrypt_master_key *mk)
 
 	wipe_master_key_secret(&mk->mk_secret);
 
-	for (i = 0; i <= __FSCRYPT_MODE_MAX; i++) {
+	for (i = 0; i <= FSCRYPT_MODE_MAX; i++) {
 		fscrypt_destroy_prepared_key(&mk->mk_direct_keys[i]);
 		fscrypt_destroy_prepared_key(&mk->mk_iv_ino_lblk_64_keys[i]);
 		fscrypt_destroy_prepared_key(&mk->mk_iv_ino_lblk_32_keys[i]);
diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c
index d595abb8ef90..31fb08d94f87 100644
--- a/fs/crypto/keysetup.c
+++ b/fs/crypto/keysetup.c
@@ -56,6 +56,8 @@ static struct fscrypt_mode *
 select_encryption_mode(const union fscrypt_policy *policy,
 		       const struct inode *inode)
 {
+	BUILD_BUG_ON(ARRAY_SIZE(fscrypt_modes) != FSCRYPT_MODE_MAX + 1);
+
 	if (S_ISREG(inode->i_mode))
 		return &fscrypt_modes[fscrypt_policy_contents_mode(policy)];
 
@@ -168,7 +170,7 @@ static int setup_per_mode_enc_key(struct fscrypt_info *ci,
 	unsigned int hkdf_infolen = 0;
 	int err;
 
-	if (WARN_ON(mode_num > __FSCRYPT_MODE_MAX))
+	if (WARN_ON(mode_num > FSCRYPT_MODE_MAX))
 		return -EINVAL;
 
 	prep_key = &keys[mode_num];
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index 4441d9944b9e..faa0f21daa68 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -175,7 +175,10 @@ static bool fscrypt_supported_v2_policy(const struct fscrypt_policy_v2 *policy,
 		return false;
 	}
 
-	if (policy->flags & ~FSCRYPT_POLICY_FLAGS_VALID) {
+	if (policy->flags & ~(FSCRYPT_POLICY_FLAGS_PAD_MASK |
+			      FSCRYPT_POLICY_FLAG_DIRECT_KEY |
+			      FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64 |
+			      FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)) {
 		fscrypt_warn(inode, "Unsupported encryption flags (0x%02x)",
 			     policy->flags);
 		return false;
diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h
index e5de60336938..9f4428be3e36 100644
--- a/include/uapi/linux/fscrypt.h
+++ b/include/uapi/linux/fscrypt.h
@@ -20,7 +20,6 @@
 #define FSCRYPT_POLICY_FLAG_DIRECT_KEY		0x04
 #define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64	0x08
 #define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32	0x10
-#define FSCRYPT_POLICY_FLAGS_VALID		0x1F
 
 /* Encryption algorithms */
 #define FSCRYPT_MODE_AES_256_XTS		1
@@ -28,7 +27,7 @@
 #define FSCRYPT_MODE_AES_128_CBC		5
 #define FSCRYPT_MODE_AES_128_CTS		6
 #define FSCRYPT_MODE_ADIANTUM			9
-#define __FSCRYPT_MODE_MAX			9
+/* If adding a mode number > 9, update FSCRYPT_MODE_MAX in fscrypt_private.h */
 
 /*
  * Legacy policy version; ad-hoc KDF and no key verification.
@@ -177,7 +176,7 @@ struct fscrypt_get_key_status_arg {
 #define FS_POLICY_FLAGS_PAD_32		FSCRYPT_POLICY_FLAGS_PAD_32
 #define FS_POLICY_FLAGS_PAD_MASK	FSCRYPT_POLICY_FLAGS_PAD_MASK
 #define FS_POLICY_FLAG_DIRECT_KEY	FSCRYPT_POLICY_FLAG_DIRECT_KEY
-#define FS_POLICY_FLAGS_VALID		FSCRYPT_POLICY_FLAGS_VALID
+#define FS_POLICY_FLAGS_VALID		0x07	/* contains old flags only */
 #define FS_ENCRYPTION_MODE_INVALID	0	/* never used */
 #define FS_ENCRYPTION_MODE_AES_256_XTS	FSCRYPT_MODE_AES_256_XTS
 #define FS_ENCRYPTION_MODE_AES_256_GCM	2	/* never used */
-- 
cgit v1.2.3


From 72e637fec558c7842d6f1508f480fd77c20afe3a Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 30 Oct 2020 12:52:30 +0100
Subject: media: rc: validate that "rc_proto" is reasonable

Smatch complains that "rc_proto" comes from the user and it can result
in shift wrapping in ir_raw_encode_scancode()

    drivers/media/rc/rc-ir-raw.c:526 ir_raw_encode_scancode()
    error: undefined (user controlled) shift '1 << protocol'

This is true, but I reviewed the surrounding code and it appears
harmless. Anyway, let's verify that "rc_proto" is valid as a kernel
hardening measure.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/rc/lirc_dev.c | 3 ++-
 include/uapi/linux/lirc.h   | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c
index 220363b9a868..116daf90c858 100644
--- a/drivers/media/rc/lirc_dev.c
+++ b/drivers/media/rc/lirc_dev.c
@@ -263,7 +263,8 @@ static ssize_t lirc_transmit(struct file *file, const char __user *buf,
 			goto out_unlock;
 		}
 
-		if (scan.flags || scan.keycode || scan.timestamp) {
+		if (scan.flags || scan.keycode || scan.timestamp ||
+		    scan.rc_proto > RC_PROTO_MAX) {
 			ret = -EINVAL;
 			goto out_unlock;
 		}
diff --git a/include/uapi/linux/lirc.h b/include/uapi/linux/lirc.h
index f99d9dcae667..c1eb960adde3 100644
--- a/include/uapi/linux/lirc.h
+++ b/include/uapi/linux/lirc.h
@@ -226,6 +226,7 @@ enum rc_proto {
 	RC_PROTO_RCMM24		= 25,
 	RC_PROTO_RCMM32		= 26,
 	RC_PROTO_XBOX_DVD	= 27,
+	RC_PROTO_MAX		= RC_PROTO_XBOX_DVD,
 };
 
 #endif
-- 
cgit v1.2.3


From df22026aebd863745efd753371f46f6ab28a2617 Mon Sep 17 00:00:00 2001
From: Shunqian Zheng <zhengsq@rock-chips.com>
Date: Fri, 6 Nov 2020 13:19:36 +0100
Subject: media: videodev2.h, v4l2-ioctl: add rkisp1 meta buffer format

Add the Rockchip ISP1 specific processing parameter format
V4L2_META_FMT_RK_ISP1_PARAMS and metadata format
V4L2_META_FMT_RK_ISP1_STAT_3A for 3A.

Signed-off-by: Shunqian Zheng <zhengsq@rock-chips.com>
Signed-off-by: Jacob Chen <jacob2.chen@rock-chips.com>
Signed-off-by: Helen Koike <helen.koike@collabora.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/v4l2-core/v4l2-ioctl.c              | 2 ++
 drivers/staging/media/rkisp1/uapi/rkisp1-config.h | 4 ----
 include/uapi/linux/videodev2.h                    | 4 ++++
 3 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index f0f6906a879d..3198abdd538c 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -1403,6 +1403,8 @@ static void v4l_fill_fmtdesc(struct v4l2_fmtdesc *fmt)
 	case V4L2_META_FMT_UVC:		descr = "UVC Payload Header Metadata"; break;
 	case V4L2_META_FMT_D4XX:	descr = "Intel D4xx UVC Metadata"; break;
 	case V4L2_META_FMT_VIVID:       descr = "Vivid Metadata"; break;
+	case V4L2_META_FMT_RK_ISP1_PARAMS:	descr = "Rockchip ISP1 3A Parameters"; break;
+	case V4L2_META_FMT_RK_ISP1_STAT_3A:	descr = "Rockchip ISP1 3A Statistics"; break;
 
 	default:
 		/* Compressed formats */
diff --git a/drivers/staging/media/rkisp1/uapi/rkisp1-config.h b/drivers/staging/media/rkisp1/uapi/rkisp1-config.h
index 8d906cc7da8f..6e449e784260 100644
--- a/drivers/staging/media/rkisp1/uapi/rkisp1-config.h
+++ b/drivers/staging/media/rkisp1/uapi/rkisp1-config.h
@@ -9,10 +9,6 @@
 
 #include <linux/types.h>
 
-/* Vendor specific - used for RK_ISP1 camera sub-system */
-#define V4L2_META_FMT_RK_ISP1_PARAMS   v4l2_fourcc('R', 'K', '1', 'P') /* Rockchip ISP1 params */
-#define V4L2_META_FMT_RK_ISP1_STAT_3A  v4l2_fourcc('R', 'K', '1', 'S') /* Rockchip ISP1 3A statistics */
-
 /* Defect Pixel Cluster Detection */
 #define RKISP1_CIF_ISP_MODULE_DPCC		(1U << 0)
 /* Black Level Subtraction */
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 927075fa9099..761ac9da3ffd 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -768,6 +768,10 @@ struct v4l2_pix_format {
 #define V4L2_META_FMT_D4XX        v4l2_fourcc('D', '4', 'X', 'X') /* D4XX Payload Header metadata */
 #define V4L2_META_FMT_VIVID	  v4l2_fourcc('V', 'I', 'V', 'D') /* Vivid Metadata */
 
+/* Vendor specific - used for RK_ISP1 camera sub-system */
+#define V4L2_META_FMT_RK_ISP1_PARAMS	v4l2_fourcc('R', 'K', '1', 'P') /* Rockchip ISP1 3A Parameters */
+#define V4L2_META_FMT_RK_ISP1_STAT_3A	v4l2_fourcc('R', 'K', '1', 'S') /* Rockchip ISP1 3A Statistics */
+
 /* priv field value to indicates that subsequent fields are valid. */
 #define V4L2_PIX_FMT_PRIV_MAGIC		0xfeedcafe
 
-- 
cgit v1.2.3


From e6938cc1cb7763a363f62b78147f1f2fb972f49c Mon Sep 17 00:00:00 2001
From: Helen Koike <helen.koike@collabora.com>
Date: Fri, 6 Nov 2020 13:19:37 +0100
Subject: media: rockchip: rkisp1: destage Rockchip ISP1 driver

All the items in the TODO list were addressed, uapi was reviewed,
documentation written, checkpatch errors fixed, several bugs fixed.

There is no big reason to keep this driver in staging, so move it out.

Dt-bindings Verified with:
make ARCH=arm64 dt_binding_check DT_SCHEMA_FILES=Documentation/devicetree/bindings/media/rockchip-isp1.yaml

Fields of MAINTAINERS file sorted according to output of
./scripts/parse-maintainers.pl --input=MAINTAINERS --output=MAINTAINERS
--order

[dt-bindings: media: rkisp1: move rockchip-isp1 bindings out of staging]
[dt-bindings: media: rkisp1: move rockchip-isp1 bindings out of staging]
[hverkuil: fix various checkpatch alignment warnings]

Signed-off-by: Helen Koike <helen.koike@collabora.com>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Tomasz Figa <tfiga@chromium.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../devicetree/bindings/media/rockchip-isp1.yaml   |  215 +++
 .../userspace-api/media/v4l/pixfmt-meta-rkisp1.rst |    2 +-
 MAINTAINERS                                        |    5 +-
 drivers/media/platform/Kconfig                     |   18 +
 drivers/media/platform/Makefile                    |    1 +
 drivers/media/platform/rockchip/rkisp1/Makefile    |   10 +
 .../platform/rockchip/rkisp1/rkisp1-capture.c      | 1431 ++++++++++++++++++
 .../media/platform/rockchip/rkisp1/rkisp1-common.c |   37 +
 .../media/platform/rockchip/rkisp1/rkisp1-common.h |  485 ++++++
 .../media/platform/rockchip/rkisp1/rkisp1-dev.c    |  581 ++++++++
 .../media/platform/rockchip/rkisp1/rkisp1-isp.c    | 1160 +++++++++++++++
 .../media/platform/rockchip/rkisp1/rkisp1-params.c | 1572 ++++++++++++++++++++
 .../media/platform/rockchip/rkisp1/rkisp1-regs.h   | 1262 ++++++++++++++++
 .../platform/rockchip/rkisp1/rkisp1-resizer.c      |  846 +++++++++++
 .../media/platform/rockchip/rkisp1/rkisp1-stats.c  |  415 ++++++
 drivers/staging/media/Kconfig                      |    2 -
 drivers/staging/media/Makefile                     |    1 -
 .../devicetree/bindings/media/rockchip-isp1.yaml   |  215 ---
 drivers/staging/media/rkisp1/Kconfig               |   19 -
 drivers/staging/media/rkisp1/Makefile              |   10 -
 drivers/staging/media/rkisp1/TODO                  |    8 -
 drivers/staging/media/rkisp1/rkisp1-capture.c      | 1431 ------------------
 drivers/staging/media/rkisp1/rkisp1-common.c       |   37 -
 drivers/staging/media/rkisp1/rkisp1-common.h       |  485 ------
 drivers/staging/media/rkisp1/rkisp1-dev.c          |  580 --------
 drivers/staging/media/rkisp1/rkisp1-isp.c          | 1161 ---------------
 drivers/staging/media/rkisp1/rkisp1-params.c       | 1572 --------------------
 drivers/staging/media/rkisp1/rkisp1-regs.h         | 1262 ----------------
 drivers/staging/media/rkisp1/rkisp1-resizer.c      |  846 -----------
 drivers/staging/media/rkisp1/rkisp1-stats.c        |  415 ------
 drivers/staging/media/rkisp1/uapi/rkisp1-config.h  |  884 -----------
 include/uapi/linux/rkisp1-config.h                 |  884 +++++++++++
 32 files changed, 8922 insertions(+), 8930 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/media/rockchip-isp1.yaml
 create mode 100644 drivers/media/platform/rockchip/rkisp1/Makefile
 create mode 100644 drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c
 create mode 100644 drivers/media/platform/rockchip/rkisp1/rkisp1-common.c
 create mode 100644 drivers/media/platform/rockchip/rkisp1/rkisp1-common.h
 create mode 100644 drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c
 create mode 100644 drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c
 create mode 100644 drivers/media/platform/rockchip/rkisp1/rkisp1-params.c
 create mode 100644 drivers/media/platform/rockchip/rkisp1/rkisp1-regs.h
 create mode 100644 drivers/media/platform/rockchip/rkisp1/rkisp1-resizer.c
 create mode 100644 drivers/media/platform/rockchip/rkisp1/rkisp1-stats.c
 delete mode 100644 drivers/staging/media/rkisp1/Documentation/devicetree/bindings/media/rockchip-isp1.yaml
 delete mode 100644 drivers/staging/media/rkisp1/Kconfig
 delete mode 100644 drivers/staging/media/rkisp1/Makefile
 delete mode 100644 drivers/staging/media/rkisp1/TODO
 delete mode 100644 drivers/staging/media/rkisp1/rkisp1-capture.c
 delete mode 100644 drivers/staging/media/rkisp1/rkisp1-common.c
 delete mode 100644 drivers/staging/media/rkisp1/rkisp1-common.h
 delete mode 100644 drivers/staging/media/rkisp1/rkisp1-dev.c
 delete mode 100644 drivers/staging/media/rkisp1/rkisp1-isp.c
 delete mode 100644 drivers/staging/media/rkisp1/rkisp1-params.c
 delete mode 100644 drivers/staging/media/rkisp1/rkisp1-regs.h
 delete mode 100644 drivers/staging/media/rkisp1/rkisp1-resizer.c
 delete mode 100644 drivers/staging/media/rkisp1/rkisp1-stats.c
 delete mode 100644 drivers/staging/media/rkisp1/uapi/rkisp1-config.h
 create mode 100644 include/uapi/linux/rkisp1-config.h

(limited to 'include/uapi')

diff --git a/Documentation/devicetree/bindings/media/rockchip-isp1.yaml b/Documentation/devicetree/bindings/media/rockchip-isp1.yaml
new file mode 100644
index 000000000000..2004c054ed1a
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/rockchip-isp1.yaml
@@ -0,0 +1,215 @@
+# SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/media/rockchip-isp1.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip SoC Image Signal Processing unit v1
+
+maintainers:
+  - Helen Koike <helen.koike@collabora.com>
+
+description: |
+  Rockchip ISP1 is the Camera interface for the Rockchip series of SoCs
+  which contains image processing, scaling, and compression functions.
+
+properties:
+  compatible:
+    const: rockchip,rk3399-cif-isp
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    minItems: 3
+    items:
+      # isp0 and isp1
+      - description: ISP clock
+      - description: ISP AXI clock
+      - description: ISP AHB clock
+      # only for isp1
+      - description: ISP Pixel clock
+
+  clock-names:
+    minItems: 3
+    items:
+      # isp0 and isp1
+      - const: isp
+      - const: aclk
+      - const: hclk
+      # only for isp1
+      - const: pclk_isp
+
+  iommus:
+    maxItems: 1
+
+  phys:
+    maxItems: 1
+    description: phandle for the PHY port
+
+  phy-names:
+    const: dphy
+
+  power-domains:
+    maxItems: 1
+
+  # See ./video-interfaces.txt for details
+  ports:
+    type: object
+    additionalProperties: false
+
+    properties:
+      "#address-cells":
+        const: 1
+
+      "#size-cells":
+        const: 0
+
+      port@0:
+        type: object
+        description: connection point for sensors at MIPI-DPHY RX0
+        additionalProperties: false
+
+        properties:
+          "#address-cells":
+            const: 1
+
+          "#size-cells":
+            const: 0
+
+          reg:
+            const: 0
+
+        patternProperties:
+          endpoint:
+            type: object
+            additionalProperties: false
+
+            properties:
+              reg:
+                maxItems: 1
+
+              data-lanes:
+                minItems: 1
+                maxItems: 4
+
+              remote-endpoint: true
+
+        required:
+          - reg
+          - "#address-cells"
+          - "#size-cells"
+
+    required:
+      - "#address-cells"
+      - "#size-cells"
+      - port@0
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - iommus
+  - phys
+  - phy-names
+  - power-domains
+  - ports
+
+if:
+  properties:
+    compatible:
+      contains:
+        const: rockchip,rk3399-cif-isp
+then:
+  properties:
+    clocks:
+      minItems: 3
+      maxItems: 4
+    clock-names:
+      minItems: 3
+      maxItems: 4
+
+additionalProperties: false
+
+examples:
+  - |
+
+    #include <dt-bindings/clock/rk3399-cru.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/power/rk3399-power.h>
+
+    parent0: parent {
+        #address-cells = <2>;
+        #size-cells = <2>;
+
+        isp0: isp0@ff910000 {
+            compatible = "rockchip,rk3399-cif-isp";
+            reg = <0x0 0xff910000 0x0 0x4000>;
+            interrupts = <GIC_SPI 43 IRQ_TYPE_LEVEL_HIGH 0>;
+            clocks = <&cru SCLK_ISP0>,
+                     <&cru ACLK_ISP0_WRAPPER>,
+                     <&cru HCLK_ISP0_WRAPPER>;
+            clock-names = "isp", "aclk", "hclk";
+            iommus = <&isp0_mmu>;
+            phys = <&dphy>;
+            phy-names = "dphy";
+            power-domains = <&power RK3399_PD_ISP0>;
+
+            ports {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                port@0 {
+                    reg = <0>;
+                    #address-cells = <1>;
+                    #size-cells = <0>;
+
+                    mipi_in_wcam: endpoint@0 {
+                        reg = <0>;
+                        remote-endpoint = <&wcam_out>;
+                        data-lanes = <1 2>;
+                    };
+
+                    mipi_in_ucam: endpoint@1 {
+                        reg = <1>;
+                        remote-endpoint = <&ucam_out>;
+                        data-lanes = <1>;
+                    };
+                };
+            };
+        };
+
+        i2c7: i2c {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            wcam: camera@36 {
+                compatible = "ovti,ov5695";
+                reg = <0x36>;
+
+                port {
+                    wcam_out: endpoint {
+                        remote-endpoint = <&mipi_in_wcam>;
+                        data-lanes = <1 2>;
+                    };
+                };
+            };
+
+            ucam: camera@3c {
+                compatible = "ovti,ov2685";
+                reg = <0x3c>;
+
+                  port {
+                      ucam_out: endpoint {
+                          remote-endpoint = <&mipi_in_ucam>;
+                          data-lanes = <1>;
+                      };
+                  };
+            };
+        };
+    };
diff --git a/Documentation/userspace-api/media/v4l/pixfmt-meta-rkisp1.rst b/Documentation/userspace-api/media/v4l/pixfmt-meta-rkisp1.rst
index 7e43837ed260..f3671472d410 100644
--- a/Documentation/userspace-api/media/v4l/pixfmt-meta-rkisp1.rst
+++ b/Documentation/userspace-api/media/v4l/pixfmt-meta-rkisp1.rst
@@ -46,4 +46,4 @@ important tuning tools using software control loop.
 rkisp1 uAPI data types
 ======================
 
-.. kernel-doc:: drivers/staging/media/rkisp1/uapi/rkisp1-config.h
+.. kernel-doc:: include/uapi/linux/rkisp1-config.h
diff --git a/MAINTAINERS b/MAINTAINERS
index 811db1d3ca33..352b8eaa21f7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15034,10 +15034,13 @@ ROCKCHIP ISP V1 DRIVER
 M:	Helen Koike <helen.koike@collabora.com>
 M:	Dafna Hirschfeld <dafna.hirschfeld@collabora.com>
 L:	linux-media@vger.kernel.org
+L:	linux-rockchip@lists.infradead.org
 S:	Maintained
 F:	Documentation/admin-guide/media/rkisp1.rst
+F:	Documentation/devicetree/bindings/media/rockchip-isp1.yaml
 F:	Documentation/userspace-api/media/v4l/pixfmt-meta-rkisp1.rst
-F:	drivers/staging/media/rkisp1/
+F:	drivers/media/platform/rockchip/rkisp1
+F:	include/uapi/linux/rkisp1-config.h
 
 ROCKCHIP RASTER 2D GRAPHIC ACCELERATION UNIT DRIVER
 M:	Jacob Chen <jacob-chen@iotwrt.com>
diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig
index a3cb104956d5..b161f2ba238f 100644
--- a/drivers/media/platform/Kconfig
+++ b/drivers/media/platform/Kconfig
@@ -147,6 +147,24 @@ config VIDEO_RENESAS_CEU
 	help
 	  This is a v4l2 driver for the Renesas CEU Interface
 
+config VIDEO_ROCKCHIP_ISP1
+	tristate "Rockchip Image Signal Processing v1 Unit driver"
+	depends on VIDEO_V4L2 && OF
+	depends on ARCH_ROCKCHIP || COMPILE_TEST
+	select MEDIA_CONTROLLER
+	select VIDEO_V4L2_SUBDEV_API
+	select VIDEOBUF2_DMA_CONTIG
+	select VIDEOBUF2_VMALLOC
+	select V4L2_FWNODE
+	select GENERIC_PHY_MIPI_DPHY
+	default n
+	help
+	  Enable this to support the Image Signal Processing (ISP) module
+	  present in RK3399 SoCs.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called rockchip-isp1.
+
 source "drivers/media/platform/exynos4-is/Kconfig"
 source "drivers/media/platform/am437x/Kconfig"
 source "drivers/media/platform/xilinx/Kconfig"
diff --git a/drivers/media/platform/Makefile b/drivers/media/platform/Makefile
index 62b6cdc8c730..b342714228db 100644
--- a/drivers/media/platform/Makefile
+++ b/drivers/media/platform/Makefile
@@ -52,6 +52,7 @@ obj-$(CONFIG_VIDEO_RENESAS_FDP1)	+= rcar_fdp1.o
 obj-$(CONFIG_VIDEO_RENESAS_JPU)		+= rcar_jpu.o
 obj-$(CONFIG_VIDEO_RENESAS_VSP1)	+= vsp1/
 
+obj-$(CONFIG_VIDEO_ROCKCHIP_ISP1)	+= rockchip/rkisp1/
 obj-$(CONFIG_VIDEO_ROCKCHIP_RGA)	+= rockchip/rga/
 
 obj-y	+= omap/
diff --git a/drivers/media/platform/rockchip/rkisp1/Makefile b/drivers/media/platform/rockchip/rkisp1/Makefile
new file mode 100644
index 000000000000..ab32a77db8f7
--- /dev/null
+++ b/drivers/media/platform/rockchip/rkisp1/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_VIDEO_ROCKCHIP_ISP1) += rockchip-isp1.o
+rockchip-isp1-objs += 	rkisp1-capture.o \
+			rkisp1-common.o \
+			rkisp1-dev.o \
+			rkisp1-isp.o \
+			rkisp1-resizer.o \
+			rkisp1-stats.o \
+			rkisp1-params.o
diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c
new file mode 100644
index 000000000000..b81235afd053
--- /dev/null
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c
@@ -0,0 +1,1431 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Rockchip ISP1 Driver - V4l capture device
+ *
+ * Copyright (C) 2019 Collabora, Ltd.
+ *
+ * Based on Rockchip ISP1 driver by Rockchip Electronics Co., Ltd.
+ * Copyright (C) 2017 Rockchip Electronics Co., Ltd.
+ */
+
+#include <linux/delay.h>
+#include <linux/pm_runtime.h>
+#include <media/v4l2-common.h>
+#include <media/v4l2-event.h>
+#include <media/v4l2-fh.h>
+#include <media/v4l2-ioctl.h>
+#include <media/v4l2-mc.h>
+#include <media/v4l2-subdev.h>
+#include <media/videobuf2-dma-contig.h>
+
+#include "rkisp1-common.h"
+
+/*
+ * NOTE: There are two capture video devices in rkisp1, selfpath and mainpath.
+ *
+ * differences between selfpath and mainpath
+ * available mp sink input: isp
+ * available sp sink input : isp, dma(TODO)
+ * available mp sink pad fmts: yuv422, raw
+ * available sp sink pad fmts: yuv422, yuv420......
+ * available mp source fmts: yuv, raw, jpeg(TODO)
+ * available sp source fmts: yuv, rgb
+ */
+
+#define RKISP1_SP_DEV_NAME	RKISP1_DRIVER_NAME "_selfpath"
+#define RKISP1_MP_DEV_NAME	RKISP1_DRIVER_NAME "_mainpath"
+
+#define RKISP1_MIN_BUFFERS_NEEDED 3
+
+enum rkisp1_plane {
+	RKISP1_PLANE_Y	= 0,
+	RKISP1_PLANE_CB	= 1,
+	RKISP1_PLANE_CR	= 2
+};
+
+/*
+ * @fourcc: pixel format
+ * @fmt_type: helper filed for pixel format
+ * @uv_swap: if cb cr swaped, for yuv
+ * @write_format: defines how YCbCr self picture data is written to memory
+ * @output_format: defines sp output format
+ * @mbus: the mbus code on the src resizer pad that matches the pixel format
+ */
+struct rkisp1_capture_fmt_cfg {
+	u32 fourcc;
+	u8 uv_swap;
+	u32 write_format;
+	u32 output_format;
+	u32 mbus;
+};
+
+struct rkisp1_capture_ops {
+	void (*config)(struct rkisp1_capture *cap);
+	void (*stop)(struct rkisp1_capture *cap);
+	void (*enable)(struct rkisp1_capture *cap);
+	void (*disable)(struct rkisp1_capture *cap);
+	void (*set_data_path)(struct rkisp1_capture *cap);
+	bool (*is_stopped)(struct rkisp1_capture *cap);
+};
+
+struct rkisp1_capture_config {
+	const struct rkisp1_capture_fmt_cfg *fmts;
+	int fmt_size;
+	struct {
+		u32 y_size_init;
+		u32 cb_size_init;
+		u32 cr_size_init;
+		u32 y_base_ad_init;
+		u32 cb_base_ad_init;
+		u32 cr_base_ad_init;
+		u32 y_offs_cnt_init;
+		u32 cb_offs_cnt_init;
+		u32 cr_offs_cnt_init;
+	} mi;
+};
+
+/*
+ * The supported pixel formats for mainpath. NOTE, pixel formats with identical 'mbus'
+ * are grouped together. This is assumed and used by the function rkisp1_cap_enum_mbus_codes
+ */
+static const struct rkisp1_capture_fmt_cfg rkisp1_mp_fmts[] = {
+	/* yuv422 */
+	{
+		.fourcc = V4L2_PIX_FMT_YUYV,
+		.uv_swap = 0,
+		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUVINT,
+		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
+	}, {
+		.fourcc = V4L2_PIX_FMT_YUV422P,
+		.uv_swap = 0,
+		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8,
+		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
+	}, {
+		.fourcc = V4L2_PIX_FMT_NV16,
+		.uv_swap = 0,
+		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_SPLA,
+		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
+	}, {
+		.fourcc = V4L2_PIX_FMT_NV61,
+		.uv_swap = 1,
+		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_SPLA,
+		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
+	}, {
+		.fourcc = V4L2_PIX_FMT_YVU422M,
+		.uv_swap = 1,
+		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8,
+		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
+	},
+	/* yuv400 */
+	{
+		.fourcc = V4L2_PIX_FMT_GREY,
+		.uv_swap = 0,
+		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8,
+		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
+	},
+	/* yuv420 */
+	{
+		.fourcc = V4L2_PIX_FMT_NV21,
+		.uv_swap = 1,
+		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_SPLA,
+		.mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
+	}, {
+		.fourcc = V4L2_PIX_FMT_NV12,
+		.uv_swap = 0,
+		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_SPLA,
+		.mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
+	}, {
+		.fourcc = V4L2_PIX_FMT_NV21M,
+		.uv_swap = 1,
+		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_SPLA,
+		.mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
+	}, {
+		.fourcc = V4L2_PIX_FMT_NV12M,
+		.uv_swap = 0,
+		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_SPLA,
+		.mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
+	}, {
+		.fourcc = V4L2_PIX_FMT_YUV420,
+		.uv_swap = 0,
+		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8,
+		.mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
+	}, {
+		.fourcc = V4L2_PIX_FMT_YVU420,
+		.uv_swap = 1,
+		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8,
+		.mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
+	},
+	/* raw */
+	{
+		.fourcc = V4L2_PIX_FMT_SRGGB8,
+		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8,
+		.mbus = MEDIA_BUS_FMT_SRGGB8_1X8,
+	}, {
+		.fourcc = V4L2_PIX_FMT_SGRBG8,
+		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8,
+		.mbus = MEDIA_BUS_FMT_SGRBG8_1X8,
+	}, {
+		.fourcc = V4L2_PIX_FMT_SGBRG8,
+		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8,
+		.mbus = MEDIA_BUS_FMT_SGBRG8_1X8,
+	}, {
+		.fourcc = V4L2_PIX_FMT_SBGGR8,
+		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8,
+		.mbus = MEDIA_BUS_FMT_SBGGR8_1X8,
+	}, {
+		.fourcc = V4L2_PIX_FMT_SRGGB10,
+		.write_format = RKISP1_MI_CTRL_MP_WRITE_RAW12,
+		.mbus = MEDIA_BUS_FMT_SRGGB10_1X10,
+	}, {
+		.fourcc = V4L2_PIX_FMT_SGRBG10,
+		.write_format = RKISP1_MI_CTRL_MP_WRITE_RAW12,
+		.mbus = MEDIA_BUS_FMT_SGRBG10_1X10,
+	}, {
+		.fourcc = V4L2_PIX_FMT_SGBRG10,
+		.write_format = RKISP1_MI_CTRL_MP_WRITE_RAW12,
+		.mbus = MEDIA_BUS_FMT_SGBRG10_1X10,
+	}, {
+		.fourcc = V4L2_PIX_FMT_SBGGR10,
+		.write_format = RKISP1_MI_CTRL_MP_WRITE_RAW12,
+		.mbus = MEDIA_BUS_FMT_SBGGR10_1X10,
+	}, {
+		.fourcc = V4L2_PIX_FMT_SRGGB12,
+		.write_format = RKISP1_MI_CTRL_MP_WRITE_RAW12,
+		.mbus = MEDIA_BUS_FMT_SRGGB12_1X12,
+	}, {
+		.fourcc = V4L2_PIX_FMT_SGRBG12,
+		.write_format = RKISP1_MI_CTRL_MP_WRITE_RAW12,
+		.mbus = MEDIA_BUS_FMT_SGRBG12_1X12,
+	}, {
+		.fourcc = V4L2_PIX_FMT_SGBRG12,
+		.write_format = RKISP1_MI_CTRL_MP_WRITE_RAW12,
+		.mbus = MEDIA_BUS_FMT_SGBRG12_1X12,
+	}, {
+		.fourcc = V4L2_PIX_FMT_SBGGR12,
+		.write_format = RKISP1_MI_CTRL_MP_WRITE_RAW12,
+		.mbus = MEDIA_BUS_FMT_SBGGR12_1X12,
+	},
+};
+
+/*
+ * The supported pixel formats for selfpath. NOTE, pixel formats with identical 'mbus'
+ * are grouped together. This is assumed and used by the function rkisp1_cap_enum_mbus_codes
+ */
+static const struct rkisp1_capture_fmt_cfg rkisp1_sp_fmts[] = {
+	/* yuv422 */
+	{
+		.fourcc = V4L2_PIX_FMT_YUYV,
+		.uv_swap = 0,
+		.write_format = RKISP1_MI_CTRL_SP_WRITE_INT,
+		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_YUV422,
+		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
+	}, {
+		.fourcc = V4L2_PIX_FMT_YUV422P,
+		.uv_swap = 0,
+		.write_format = RKISP1_MI_CTRL_SP_WRITE_PLA,
+		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_YUV422,
+		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
+	}, {
+		.fourcc = V4L2_PIX_FMT_NV16,
+		.uv_swap = 0,
+		.write_format = RKISP1_MI_CTRL_SP_WRITE_SPLA,
+		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_YUV422,
+		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
+	}, {
+		.fourcc = V4L2_PIX_FMT_NV61,
+		.uv_swap = 1,
+		.write_format = RKISP1_MI_CTRL_SP_WRITE_SPLA,
+		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_YUV422,
+		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
+	}, {
+		.fourcc = V4L2_PIX_FMT_YVU422M,
+		.uv_swap = 1,
+		.write_format = RKISP1_MI_CTRL_SP_WRITE_PLA,
+		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_YUV422,
+		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
+	},
+	/* yuv400 */
+	{
+		.fourcc = V4L2_PIX_FMT_GREY,
+		.uv_swap = 0,
+		.write_format = RKISP1_MI_CTRL_SP_WRITE_PLA,
+		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_YUV400,
+		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
+	},
+	/* rgb */
+	{
+		.fourcc = V4L2_PIX_FMT_XBGR32,
+		.write_format = RKISP1_MI_CTRL_SP_WRITE_PLA,
+		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_RGB888,
+		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
+	}, {
+		.fourcc = V4L2_PIX_FMT_RGB565,
+		.write_format = RKISP1_MI_CTRL_SP_WRITE_PLA,
+		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_RGB565,
+		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
+	},
+	/* yuv420 */
+	{
+		.fourcc = V4L2_PIX_FMT_NV21,
+		.uv_swap = 1,
+		.write_format = RKISP1_MI_CTRL_SP_WRITE_SPLA,
+		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_YUV420,
+		.mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
+	}, {
+		.fourcc = V4L2_PIX_FMT_NV12,
+		.uv_swap = 0,
+		.write_format = RKISP1_MI_CTRL_SP_WRITE_SPLA,
+		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_YUV420,
+		.mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
+	}, {
+		.fourcc = V4L2_PIX_FMT_NV21M,
+		.uv_swap = 1,
+		.write_format = RKISP1_MI_CTRL_SP_WRITE_SPLA,
+		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_YUV420,
+		.mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
+	}, {
+		.fourcc = V4L2_PIX_FMT_NV12M,
+		.uv_swap = 0,
+		.write_format = RKISP1_MI_CTRL_SP_WRITE_SPLA,
+		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_YUV420,
+		.mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
+	}, {
+		.fourcc = V4L2_PIX_FMT_YUV420,
+		.uv_swap = 0,
+		.write_format = RKISP1_MI_CTRL_SP_WRITE_PLA,
+		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_YUV420,
+		.mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
+	}, {
+		.fourcc = V4L2_PIX_FMT_YVU420,
+		.uv_swap = 1,
+		.write_format = RKISP1_MI_CTRL_SP_WRITE_PLA,
+		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_YUV420,
+		.mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
+	},
+};
+
+static const struct rkisp1_capture_config rkisp1_capture_config_mp = {
+	.fmts = rkisp1_mp_fmts,
+	.fmt_size = ARRAY_SIZE(rkisp1_mp_fmts),
+	.mi = {
+		.y_size_init =		RKISP1_CIF_MI_MP_Y_SIZE_INIT,
+		.cb_size_init =		RKISP1_CIF_MI_MP_CB_SIZE_INIT,
+		.cr_size_init =		RKISP1_CIF_MI_MP_CR_SIZE_INIT,
+		.y_base_ad_init =	RKISP1_CIF_MI_MP_Y_BASE_AD_INIT,
+		.cb_base_ad_init =	RKISP1_CIF_MI_MP_CB_BASE_AD_INIT,
+		.cr_base_ad_init =	RKISP1_CIF_MI_MP_CR_BASE_AD_INIT,
+		.y_offs_cnt_init =	RKISP1_CIF_MI_MP_Y_OFFS_CNT_INIT,
+		.cb_offs_cnt_init =	RKISP1_CIF_MI_MP_CB_OFFS_CNT_INIT,
+		.cr_offs_cnt_init =	RKISP1_CIF_MI_MP_CR_OFFS_CNT_INIT,
+	},
+};
+
+static const struct rkisp1_capture_config rkisp1_capture_config_sp = {
+	.fmts = rkisp1_sp_fmts,
+	.fmt_size = ARRAY_SIZE(rkisp1_sp_fmts),
+	.mi = {
+		.y_size_init =		RKISP1_CIF_MI_SP_Y_SIZE_INIT,
+		.cb_size_init =		RKISP1_CIF_MI_SP_CB_SIZE_INIT,
+		.cr_size_init =		RKISP1_CIF_MI_SP_CR_SIZE_INIT,
+		.y_base_ad_init =	RKISP1_CIF_MI_SP_Y_BASE_AD_INIT,
+		.cb_base_ad_init =	RKISP1_CIF_MI_SP_CB_BASE_AD_INIT,
+		.cr_base_ad_init =	RKISP1_CIF_MI_SP_CR_BASE_AD_INIT,
+		.y_offs_cnt_init =	RKISP1_CIF_MI_SP_Y_OFFS_CNT_INIT,
+		.cb_offs_cnt_init =	RKISP1_CIF_MI_SP_CB_OFFS_CNT_INIT,
+		.cr_offs_cnt_init =	RKISP1_CIF_MI_SP_CR_OFFS_CNT_INIT,
+	},
+};
+
+static inline struct rkisp1_vdev_node *
+rkisp1_vdev_to_node(struct video_device *vdev)
+{
+	return container_of(vdev, struct rkisp1_vdev_node, vdev);
+}
+
+int rkisp1_cap_enum_mbus_codes(struct rkisp1_capture *cap,
+			       struct v4l2_subdev_mbus_code_enum *code)
+{
+	const struct rkisp1_capture_fmt_cfg *fmts = cap->config->fmts;
+	/*
+	 * initialize curr_mbus to non existing mbus code 0 to ensure it is
+	 * different from fmts[0].mbus
+	 */
+	u32 curr_mbus = 0;
+	int i, n = 0;
+
+	for (i = 0; i < cap->config->fmt_size; i++) {
+		if (fmts[i].mbus == curr_mbus)
+			continue;
+
+		curr_mbus = fmts[i].mbus;
+		if (n++ == code->index) {
+			code->code = curr_mbus;
+			return 0;
+		}
+	}
+	return -EINVAL;
+}
+
+/* ----------------------------------------------------------------------------
+ * Stream operations for self-picture path (sp) and main-picture path (mp)
+ */
+
+static void rkisp1_mi_config_ctrl(struct rkisp1_capture *cap)
+{
+	u32 mi_ctrl = rkisp1_read(cap->rkisp1, RKISP1_CIF_MI_CTRL);
+
+	mi_ctrl &= ~GENMASK(17, 16);
+	mi_ctrl |= RKISP1_CIF_MI_CTRL_BURST_LEN_LUM_64;
+
+	mi_ctrl &= ~GENMASK(19, 18);
+	mi_ctrl |= RKISP1_CIF_MI_CTRL_BURST_LEN_CHROM_64;
+
+	mi_ctrl |= RKISP1_CIF_MI_CTRL_INIT_BASE_EN |
+		   RKISP1_CIF_MI_CTRL_INIT_OFFSET_EN;
+
+	rkisp1_write(cap->rkisp1, mi_ctrl, RKISP1_CIF_MI_CTRL);
+}
+
+static u32 rkisp1_pixfmt_comp_size(const struct v4l2_pix_format_mplane *pixm,
+				   unsigned int component)
+{
+	/*
+	 * If packed format, then plane_fmt[0].sizeimage is the sum of all
+	 * components, so we need to calculate just the size of Y component.
+	 * See rkisp1_fill_pixfmt().
+	 */
+	if (!component && pixm->num_planes == 1)
+		return pixm->plane_fmt[0].bytesperline * pixm->height;
+	return pixm->plane_fmt[component].sizeimage;
+}
+
+static void rkisp1_irq_frame_end_enable(struct rkisp1_capture *cap)
+{
+	u32 mi_imsc = rkisp1_read(cap->rkisp1, RKISP1_CIF_MI_IMSC);
+
+	mi_imsc |= RKISP1_CIF_MI_FRAME(cap);
+	rkisp1_write(cap->rkisp1, mi_imsc, RKISP1_CIF_MI_IMSC);
+}
+
+static void rkisp1_mp_config(struct rkisp1_capture *cap)
+{
+	const struct v4l2_pix_format_mplane *pixm = &cap->pix.fmt;
+	struct rkisp1_device *rkisp1 = cap->rkisp1;
+	u32 reg;
+
+	rkisp1_write(rkisp1, rkisp1_pixfmt_comp_size(pixm, RKISP1_PLANE_Y),
+		     cap->config->mi.y_size_init);
+	rkisp1_write(rkisp1, rkisp1_pixfmt_comp_size(pixm, RKISP1_PLANE_CB),
+		     cap->config->mi.cb_size_init);
+	rkisp1_write(rkisp1, rkisp1_pixfmt_comp_size(pixm, RKISP1_PLANE_CR),
+		     cap->config->mi.cr_size_init);
+
+	rkisp1_irq_frame_end_enable(cap);
+
+	/* set uv swapping for semiplanar formats */
+	if (cap->pix.info->comp_planes == 2) {
+		reg = rkisp1_read(rkisp1, RKISP1_CIF_MI_XTD_FORMAT_CTRL);
+		if (cap->pix.cfg->uv_swap)
+			reg |= RKISP1_CIF_MI_XTD_FMT_CTRL_MP_CB_CR_SWAP;
+		else
+			reg &= ~RKISP1_CIF_MI_XTD_FMT_CTRL_MP_CB_CR_SWAP;
+		rkisp1_write(rkisp1, reg, RKISP1_CIF_MI_XTD_FORMAT_CTRL);
+	}
+
+	rkisp1_mi_config_ctrl(cap);
+
+	reg = rkisp1_read(rkisp1, RKISP1_CIF_MI_CTRL);
+	reg &= ~RKISP1_MI_CTRL_MP_FMT_MASK;
+	reg |= cap->pix.cfg->write_format;
+	rkisp1_write(rkisp1, reg, RKISP1_CIF_MI_CTRL);
+
+	reg = rkisp1_read(rkisp1, RKISP1_CIF_MI_CTRL);
+	reg |= RKISP1_CIF_MI_MP_AUTOUPDATE_ENABLE;
+	rkisp1_write(rkisp1, reg, RKISP1_CIF_MI_CTRL);
+}
+
+static void rkisp1_sp_config(struct rkisp1_capture *cap)
+{
+	const struct v4l2_pix_format_mplane *pixm = &cap->pix.fmt;
+	struct rkisp1_device *rkisp1 = cap->rkisp1;
+	u32 mi_ctrl, reg;
+
+	rkisp1_write(rkisp1, rkisp1_pixfmt_comp_size(pixm, RKISP1_PLANE_Y),
+		     cap->config->mi.y_size_init);
+	rkisp1_write(rkisp1, rkisp1_pixfmt_comp_size(pixm, RKISP1_PLANE_CB),
+		     cap->config->mi.cb_size_init);
+	rkisp1_write(rkisp1, rkisp1_pixfmt_comp_size(pixm, RKISP1_PLANE_CR),
+		     cap->config->mi.cr_size_init);
+
+	rkisp1_write(rkisp1, pixm->width, RKISP1_CIF_MI_SP_Y_PIC_WIDTH);
+	rkisp1_write(rkisp1, pixm->height, RKISP1_CIF_MI_SP_Y_PIC_HEIGHT);
+	rkisp1_write(rkisp1, cap->sp_y_stride, RKISP1_CIF_MI_SP_Y_LLENGTH);
+
+	rkisp1_irq_frame_end_enable(cap);
+
+	/* set uv swapping for semiplanar formats */
+	if (cap->pix.info->comp_planes == 2) {
+		reg = rkisp1_read(rkisp1, RKISP1_CIF_MI_XTD_FORMAT_CTRL);
+		if (cap->pix.cfg->uv_swap)
+			reg |= RKISP1_CIF_MI_XTD_FMT_CTRL_SP_CB_CR_SWAP;
+		else
+			reg &= ~RKISP1_CIF_MI_XTD_FMT_CTRL_SP_CB_CR_SWAP;
+		rkisp1_write(rkisp1, reg, RKISP1_CIF_MI_XTD_FORMAT_CTRL);
+	}
+
+	rkisp1_mi_config_ctrl(cap);
+
+	mi_ctrl = rkisp1_read(rkisp1, RKISP1_CIF_MI_CTRL);
+	mi_ctrl &= ~RKISP1_MI_CTRL_SP_FMT_MASK;
+	mi_ctrl |= cap->pix.cfg->write_format |
+		   RKISP1_MI_CTRL_SP_INPUT_YUV422 |
+		   cap->pix.cfg->output_format |
+		   RKISP1_CIF_MI_SP_AUTOUPDATE_ENABLE;
+	rkisp1_write(rkisp1, mi_ctrl, RKISP1_CIF_MI_CTRL);
+}
+
+static void rkisp1_mp_disable(struct rkisp1_capture *cap)
+{
+	u32 mi_ctrl = rkisp1_read(cap->rkisp1, RKISP1_CIF_MI_CTRL);
+
+	mi_ctrl &= ~(RKISP1_CIF_MI_CTRL_MP_ENABLE |
+		     RKISP1_CIF_MI_CTRL_RAW_ENABLE);
+	rkisp1_write(cap->rkisp1, mi_ctrl, RKISP1_CIF_MI_CTRL);
+}
+
+static void rkisp1_sp_disable(struct rkisp1_capture *cap)
+{
+	u32 mi_ctrl = rkisp1_read(cap->rkisp1, RKISP1_CIF_MI_CTRL);
+
+	mi_ctrl &= ~RKISP1_CIF_MI_CTRL_SP_ENABLE;
+	rkisp1_write(cap->rkisp1, mi_ctrl, RKISP1_CIF_MI_CTRL);
+}
+
+static void rkisp1_mp_enable(struct rkisp1_capture *cap)
+{
+	u32 mi_ctrl;
+
+	rkisp1_mp_disable(cap);
+
+	mi_ctrl = rkisp1_read(cap->rkisp1, RKISP1_CIF_MI_CTRL);
+	if (v4l2_is_format_bayer(cap->pix.info))
+		mi_ctrl |= RKISP1_CIF_MI_CTRL_RAW_ENABLE;
+	/* YUV */
+	else
+		mi_ctrl |= RKISP1_CIF_MI_CTRL_MP_ENABLE;
+
+	rkisp1_write(cap->rkisp1, mi_ctrl, RKISP1_CIF_MI_CTRL);
+}
+
+static void rkisp1_sp_enable(struct rkisp1_capture *cap)
+{
+	u32 mi_ctrl = rkisp1_read(cap->rkisp1, RKISP1_CIF_MI_CTRL);
+
+	mi_ctrl |= RKISP1_CIF_MI_CTRL_SP_ENABLE;
+	rkisp1_write(cap->rkisp1, mi_ctrl, RKISP1_CIF_MI_CTRL);
+}
+
+static void rkisp1_mp_sp_stop(struct rkisp1_capture *cap)
+{
+	if (!cap->is_streaming)
+		return;
+	rkisp1_write(cap->rkisp1,
+		     RKISP1_CIF_MI_FRAME(cap), RKISP1_CIF_MI_ICR);
+	cap->ops->disable(cap);
+}
+
+static bool rkisp1_mp_is_stopped(struct rkisp1_capture *cap)
+{
+	u32 en = RKISP1_CIF_MI_CTRL_SHD_MP_IN_ENABLED |
+		 RKISP1_CIF_MI_CTRL_SHD_RAW_OUT_ENABLED;
+
+	return !(rkisp1_read(cap->rkisp1, RKISP1_CIF_MI_CTRL_SHD) & en);
+}
+
+static bool rkisp1_sp_is_stopped(struct rkisp1_capture *cap)
+{
+	return !(rkisp1_read(cap->rkisp1, RKISP1_CIF_MI_CTRL_SHD) &
+		 RKISP1_CIF_MI_CTRL_SHD_SP_IN_ENABLED);
+}
+
+static void rkisp1_mp_set_data_path(struct rkisp1_capture *cap)
+{
+	u32 dpcl = rkisp1_read(cap->rkisp1, RKISP1_CIF_VI_DPCL);
+
+	dpcl = dpcl | RKISP1_CIF_VI_DPCL_CHAN_MODE_MP |
+	       RKISP1_CIF_VI_DPCL_MP_MUX_MRSZ_MI;
+	rkisp1_write(cap->rkisp1, dpcl, RKISP1_CIF_VI_DPCL);
+}
+
+static void rkisp1_sp_set_data_path(struct rkisp1_capture *cap)
+{
+	u32 dpcl = rkisp1_read(cap->rkisp1, RKISP1_CIF_VI_DPCL);
+
+	dpcl |= RKISP1_CIF_VI_DPCL_CHAN_MODE_SP;
+	rkisp1_write(cap->rkisp1, dpcl, RKISP1_CIF_VI_DPCL);
+}
+
+static struct rkisp1_capture_ops rkisp1_capture_ops_mp = {
+	.config = rkisp1_mp_config,
+	.enable = rkisp1_mp_enable,
+	.disable = rkisp1_mp_disable,
+	.stop = rkisp1_mp_sp_stop,
+	.set_data_path = rkisp1_mp_set_data_path,
+	.is_stopped = rkisp1_mp_is_stopped,
+};
+
+static struct rkisp1_capture_ops rkisp1_capture_ops_sp = {
+	.config = rkisp1_sp_config,
+	.enable = rkisp1_sp_enable,
+	.disable = rkisp1_sp_disable,
+	.stop = rkisp1_mp_sp_stop,
+	.set_data_path = rkisp1_sp_set_data_path,
+	.is_stopped = rkisp1_sp_is_stopped,
+};
+
+/* ----------------------------------------------------------------------------
+ * Frame buffer operations
+ */
+
+static int rkisp1_dummy_buf_create(struct rkisp1_capture *cap)
+{
+	const struct v4l2_pix_format_mplane *pixm = &cap->pix.fmt;
+	struct rkisp1_dummy_buffer *dummy_buf = &cap->buf.dummy;
+
+	dummy_buf->size = max3(rkisp1_pixfmt_comp_size(pixm, RKISP1_PLANE_Y),
+			       rkisp1_pixfmt_comp_size(pixm, RKISP1_PLANE_CB),
+			       rkisp1_pixfmt_comp_size(pixm, RKISP1_PLANE_CR));
+
+	/* The driver never access vaddr, no mapping is required */
+	dummy_buf->vaddr = dma_alloc_attrs(cap->rkisp1->dev,
+					   dummy_buf->size,
+					   &dummy_buf->dma_addr,
+					   GFP_KERNEL,
+					   DMA_ATTR_NO_KERNEL_MAPPING);
+	if (!dummy_buf->vaddr)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void rkisp1_dummy_buf_destroy(struct rkisp1_capture *cap)
+{
+	dma_free_attrs(cap->rkisp1->dev,
+		       cap->buf.dummy.size, cap->buf.dummy.vaddr,
+		       cap->buf.dummy.dma_addr, DMA_ATTR_NO_KERNEL_MAPPING);
+}
+
+static void rkisp1_set_next_buf(struct rkisp1_capture *cap)
+{
+	cap->buf.curr = cap->buf.next;
+	cap->buf.next = NULL;
+
+	if (!list_empty(&cap->buf.queue)) {
+		u32 *buff_addr;
+
+		cap->buf.next = list_first_entry(&cap->buf.queue, struct rkisp1_buffer, queue);
+		list_del(&cap->buf.next->queue);
+
+		buff_addr = cap->buf.next->buff_addr;
+
+		rkisp1_write(cap->rkisp1,
+			     buff_addr[RKISP1_PLANE_Y],
+			     cap->config->mi.y_base_ad_init);
+		rkisp1_write(cap->rkisp1,
+			     buff_addr[RKISP1_PLANE_CB],
+			     cap->config->mi.cb_base_ad_init);
+		rkisp1_write(cap->rkisp1,
+			     buff_addr[RKISP1_PLANE_CR],
+			     cap->config->mi.cr_base_ad_init);
+	} else {
+		/*
+		 * Use the dummy space allocated by dma_alloc_coherent to
+		 * throw data if there is no available buffer.
+		 */
+		rkisp1_write(cap->rkisp1,
+			     cap->buf.dummy.dma_addr,
+			     cap->config->mi.y_base_ad_init);
+		rkisp1_write(cap->rkisp1,
+			     cap->buf.dummy.dma_addr,
+			     cap->config->mi.cb_base_ad_init);
+		rkisp1_write(cap->rkisp1,
+			     cap->buf.dummy.dma_addr,
+			     cap->config->mi.cr_base_ad_init);
+	}
+
+	/* Set plane offsets */
+	rkisp1_write(cap->rkisp1, 0, cap->config->mi.y_offs_cnt_init);
+	rkisp1_write(cap->rkisp1, 0, cap->config->mi.cb_offs_cnt_init);
+	rkisp1_write(cap->rkisp1, 0, cap->config->mi.cr_offs_cnt_init);
+}
+
+/*
+ * This function is called when a frame end comes. The next frame
+ * is processing and we should set up buffer for next-next frame,
+ * otherwise it will overflow.
+ */
+static void rkisp1_handle_buffer(struct rkisp1_capture *cap)
+{
+	struct rkisp1_isp *isp = &cap->rkisp1->isp;
+	struct rkisp1_buffer *curr_buf;
+
+	spin_lock(&cap->buf.lock);
+	curr_buf = cap->buf.curr;
+
+	if (curr_buf) {
+		curr_buf->vb.sequence = isp->frame_sequence;
+		curr_buf->vb.vb2_buf.timestamp = ktime_get_boottime_ns();
+		curr_buf->vb.field = V4L2_FIELD_NONE;
+		vb2_buffer_done(&curr_buf->vb.vb2_buf, VB2_BUF_STATE_DONE);
+	} else {
+		cap->rkisp1->debug.frame_drop[cap->id]++;
+	}
+
+	rkisp1_set_next_buf(cap);
+	spin_unlock(&cap->buf.lock);
+}
+
+void rkisp1_capture_isr(struct rkisp1_device *rkisp1)
+{
+	unsigned int i;
+	u32 status;
+
+	status = rkisp1_read(rkisp1, RKISP1_CIF_MI_MIS);
+	rkisp1_write(rkisp1, status, RKISP1_CIF_MI_ICR);
+
+	for (i = 0; i < ARRAY_SIZE(rkisp1->capture_devs); ++i) {
+		struct rkisp1_capture *cap = &rkisp1->capture_devs[i];
+
+		if (!(status & RKISP1_CIF_MI_FRAME(cap)))
+			continue;
+		if (!cap->is_stopping) {
+			rkisp1_handle_buffer(cap);
+			continue;
+		}
+		/*
+		 * Make sure stream is actually stopped, whose state
+		 * can be read from the shadow register, before
+		 * wake_up() thread which would immediately free all
+		 * frame buffers. stop() takes effect at the next
+		 * frame end that sync the configurations to shadow
+		 * regs.
+		 */
+		if (!cap->ops->is_stopped(cap)) {
+			cap->ops->stop(cap);
+			continue;
+		}
+		cap->is_stopping = false;
+		cap->is_streaming = false;
+		wake_up(&cap->done);
+	}
+}
+
+/* ----------------------------------------------------------------------------
+ * Vb2 operations
+ */
+
+static int rkisp1_vb2_queue_setup(struct vb2_queue *queue,
+				  unsigned int *num_buffers,
+				  unsigned int *num_planes,
+				  unsigned int sizes[],
+				  struct device *alloc_devs[])
+{
+	struct rkisp1_capture *cap = queue->drv_priv;
+	const struct v4l2_pix_format_mplane *pixm = &cap->pix.fmt;
+	unsigned int i;
+
+	if (*num_planes) {
+		if (*num_planes != pixm->num_planes)
+			return -EINVAL;
+
+		for (i = 0; i < pixm->num_planes; i++)
+			if (sizes[i] < pixm->plane_fmt[i].sizeimage)
+				return -EINVAL;
+	} else {
+		*num_planes = pixm->num_planes;
+		for (i = 0; i < pixm->num_planes; i++)
+			sizes[i] = pixm->plane_fmt[i].sizeimage;
+	}
+
+	return 0;
+}
+
+static void rkisp1_vb2_buf_queue(struct vb2_buffer *vb)
+{
+	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
+	struct rkisp1_buffer *ispbuf =
+		container_of(vbuf, struct rkisp1_buffer, vb);
+	struct rkisp1_capture *cap = vb->vb2_queue->drv_priv;
+	const struct v4l2_pix_format_mplane *pixm = &cap->pix.fmt;
+	unsigned int i;
+
+	memset(ispbuf->buff_addr, 0, sizeof(ispbuf->buff_addr));
+	for (i = 0; i < pixm->num_planes; i++)
+		ispbuf->buff_addr[i] = vb2_dma_contig_plane_dma_addr(vb, i);
+
+	/* Convert to non-MPLANE */
+	if (pixm->num_planes == 1) {
+		ispbuf->buff_addr[RKISP1_PLANE_CB] =
+			ispbuf->buff_addr[RKISP1_PLANE_Y] +
+			rkisp1_pixfmt_comp_size(pixm, RKISP1_PLANE_Y);
+		ispbuf->buff_addr[RKISP1_PLANE_CR] =
+			ispbuf->buff_addr[RKISP1_PLANE_CB] +
+			rkisp1_pixfmt_comp_size(pixm, RKISP1_PLANE_CB);
+	}
+
+	/*
+	 * uv swap can be supported for planar formats by switching
+	 * the address of cb and cr
+	 */
+	if (cap->pix.info->comp_planes == 3 && cap->pix.cfg->uv_swap)
+		swap(ispbuf->buff_addr[RKISP1_PLANE_CR],
+		     ispbuf->buff_addr[RKISP1_PLANE_CB]);
+
+	spin_lock_irq(&cap->buf.lock);
+	list_add_tail(&ispbuf->queue, &cap->buf.queue);
+	spin_unlock_irq(&cap->buf.lock);
+}
+
+static int rkisp1_vb2_buf_prepare(struct vb2_buffer *vb)
+{
+	struct rkisp1_capture *cap = vb->vb2_queue->drv_priv;
+	unsigned int i;
+
+	for (i = 0; i < cap->pix.fmt.num_planes; i++) {
+		unsigned long size = cap->pix.fmt.plane_fmt[i].sizeimage;
+
+		if (vb2_plane_size(vb, i) < size) {
+			dev_err(cap->rkisp1->dev,
+				"User buffer too small (%ld < %ld)\n",
+				vb2_plane_size(vb, i), size);
+			return -EINVAL;
+		}
+		vb2_set_plane_payload(vb, i, size);
+	}
+
+	return 0;
+}
+
+static void rkisp1_return_all_buffers(struct rkisp1_capture *cap,
+				      enum vb2_buffer_state state)
+{
+	struct rkisp1_buffer *buf;
+
+	spin_lock_irq(&cap->buf.lock);
+	if (cap->buf.curr) {
+		vb2_buffer_done(&cap->buf.curr->vb.vb2_buf, state);
+		cap->buf.curr = NULL;
+	}
+	if (cap->buf.next) {
+		vb2_buffer_done(&cap->buf.next->vb.vb2_buf, state);
+		cap->buf.next = NULL;
+	}
+	while (!list_empty(&cap->buf.queue)) {
+		buf = list_first_entry(&cap->buf.queue,
+				       struct rkisp1_buffer, queue);
+		list_del(&buf->queue);
+		vb2_buffer_done(&buf->vb.vb2_buf, state);
+	}
+	spin_unlock_irq(&cap->buf.lock);
+}
+
+/*
+ * Most of registers inside rockchip ISP1 have shadow register since
+ * they must be not be changed during processing a frame.
+ * Usually, each sub-module updates its shadow register after
+ * processing the last pixel of a frame.
+ */
+static void rkisp1_cap_stream_enable(struct rkisp1_capture *cap)
+{
+	struct rkisp1_device *rkisp1 = cap->rkisp1;
+	struct rkisp1_capture *other = &rkisp1->capture_devs[cap->id ^ 1];
+
+	cap->ops->set_data_path(cap);
+	cap->ops->config(cap);
+
+	/* Setup a buffer for the next frame */
+	spin_lock_irq(&cap->buf.lock);
+	rkisp1_set_next_buf(cap);
+	cap->ops->enable(cap);
+	/* It's safe to config ACTIVE and SHADOW regs for the
+	 * first stream. While when the second is starting, do NOT
+	 * force update because it also update the first one.
+	 *
+	 * The latter case would drop one more buf(that is 2) since
+	 * there's not buf in shadow when the second FE received. This's
+	 * also required because the second FE maybe corrupt especially
+	 * when run at 120fps.
+	 */
+	if (!other->is_streaming) {
+		/* force cfg update */
+		rkisp1_write(rkisp1,
+			     RKISP1_CIF_MI_INIT_SOFT_UPD, RKISP1_CIF_MI_INIT);
+		rkisp1_set_next_buf(cap);
+	}
+	spin_unlock_irq(&cap->buf.lock);
+	cap->is_streaming = true;
+}
+
+static void rkisp1_cap_stream_disable(struct rkisp1_capture *cap)
+{
+	int ret;
+
+	/* Stream should stop in interrupt. If it dosn't, stop it by force. */
+	cap->is_stopping = true;
+	ret = wait_event_timeout(cap->done,
+				 !cap->is_streaming,
+				 msecs_to_jiffies(1000));
+	if (!ret) {
+		cap->rkisp1->debug.stop_timeout[cap->id]++;
+		cap->ops->stop(cap);
+		cap->is_stopping = false;
+		cap->is_streaming = false;
+	}
+}
+
+/*
+ * rkisp1_pipeline_stream_disable - disable nodes in the pipeline
+ *
+ * Call s_stream(false) in the reverse order from
+ * rkisp1_pipeline_stream_enable() and disable the DMA engine.
+ * Should be called before media_pipeline_stop()
+ */
+static void rkisp1_pipeline_stream_disable(struct rkisp1_capture *cap)
+	__must_hold(&cap->rkisp1->stream_lock)
+{
+	struct rkisp1_device *rkisp1 = cap->rkisp1;
+
+	rkisp1_cap_stream_disable(cap);
+
+	/*
+	 * If the other capture is streaming, isp and sensor nodes shouldn't
+	 * be disabled, skip them.
+	 */
+	if (rkisp1->pipe.streaming_count < 2) {
+		v4l2_subdev_call(rkisp1->active_sensor->sd, video, s_stream,
+				 false);
+		v4l2_subdev_call(&rkisp1->isp.sd, video, s_stream, false);
+	}
+
+	v4l2_subdev_call(&rkisp1->resizer_devs[cap->id].sd, video, s_stream,
+			 false);
+}
+
+/*
+ * rkisp1_pipeline_stream_enable - enable nodes in the pipeline
+ *
+ * Enable the DMA Engine and call s_stream(true) through the pipeline.
+ * Should be called after media_pipeline_start()
+ */
+static int rkisp1_pipeline_stream_enable(struct rkisp1_capture *cap)
+	__must_hold(&cap->rkisp1->stream_lock)
+{
+	struct rkisp1_device *rkisp1 = cap->rkisp1;
+	int ret;
+
+	rkisp1_cap_stream_enable(cap);
+
+	ret = v4l2_subdev_call(&rkisp1->resizer_devs[cap->id].sd, video,
+			       s_stream, true);
+	if (ret)
+		goto err_disable_cap;
+
+	/*
+	 * If the other capture is streaming, isp and sensor nodes are already
+	 * enabled, skip them.
+	 */
+	if (rkisp1->pipe.streaming_count > 1)
+		return 0;
+
+	ret = v4l2_subdev_call(&rkisp1->isp.sd, video, s_stream, true);
+	if (ret)
+		goto err_disable_rsz;
+
+	ret = v4l2_subdev_call(rkisp1->active_sensor->sd, video, s_stream,
+			       true);
+	if (ret)
+		goto err_disable_isp;
+
+	return 0;
+
+err_disable_isp:
+	v4l2_subdev_call(&rkisp1->isp.sd, video, s_stream, false);
+err_disable_rsz:
+	v4l2_subdev_call(&rkisp1->resizer_devs[cap->id].sd, video, s_stream,
+			 false);
+err_disable_cap:
+	rkisp1_cap_stream_disable(cap);
+
+	return ret;
+}
+
+static void rkisp1_vb2_stop_streaming(struct vb2_queue *queue)
+{
+	struct rkisp1_capture *cap = queue->drv_priv;
+	struct rkisp1_vdev_node *node = &cap->vnode;
+	struct rkisp1_device *rkisp1 = cap->rkisp1;
+	int ret;
+
+	mutex_lock(&cap->rkisp1->stream_lock);
+
+	rkisp1_pipeline_stream_disable(cap);
+
+	rkisp1_return_all_buffers(cap, VB2_BUF_STATE_ERROR);
+
+	v4l2_pipeline_pm_put(&node->vdev.entity);
+	ret = pm_runtime_put(rkisp1->dev);
+	if (ret < 0)
+		dev_err(rkisp1->dev, "power down failed error:%d\n", ret);
+
+	rkisp1_dummy_buf_destroy(cap);
+
+	media_pipeline_stop(&node->vdev.entity);
+
+	mutex_unlock(&cap->rkisp1->stream_lock);
+}
+
+static int
+rkisp1_vb2_start_streaming(struct vb2_queue *queue, unsigned int count)
+{
+	struct rkisp1_capture *cap = queue->drv_priv;
+	struct media_entity *entity = &cap->vnode.vdev.entity;
+	int ret;
+
+	mutex_lock(&cap->rkisp1->stream_lock);
+
+	ret = media_pipeline_start(entity, &cap->rkisp1->pipe);
+	if (ret) {
+		dev_err(cap->rkisp1->dev, "start pipeline failed %d\n", ret);
+		goto err_ret_buffers;
+	}
+
+	ret = rkisp1_dummy_buf_create(cap);
+	if (ret)
+		goto err_pipeline_stop;
+
+	ret = pm_runtime_get_sync(cap->rkisp1->dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(cap->rkisp1->dev);
+		dev_err(cap->rkisp1->dev, "power up failed %d\n", ret);
+		goto err_destroy_dummy;
+	}
+	ret = v4l2_pipeline_pm_get(entity);
+	if (ret) {
+		dev_err(cap->rkisp1->dev, "open cif pipeline failed %d\n", ret);
+		goto err_pipe_pm_put;
+	}
+
+	ret = rkisp1_pipeline_stream_enable(cap);
+	if (ret)
+		goto err_v4l2_pm_put;
+
+	mutex_unlock(&cap->rkisp1->stream_lock);
+
+	return 0;
+
+err_v4l2_pm_put:
+	v4l2_pipeline_pm_put(entity);
+err_pipe_pm_put:
+	pm_runtime_put(cap->rkisp1->dev);
+err_destroy_dummy:
+	rkisp1_dummy_buf_destroy(cap);
+err_pipeline_stop:
+	media_pipeline_stop(entity);
+err_ret_buffers:
+	rkisp1_return_all_buffers(cap, VB2_BUF_STATE_QUEUED);
+	mutex_unlock(&cap->rkisp1->stream_lock);
+
+	return ret;
+}
+
+static struct vb2_ops rkisp1_vb2_ops = {
+	.queue_setup = rkisp1_vb2_queue_setup,
+	.buf_queue = rkisp1_vb2_buf_queue,
+	.buf_prepare = rkisp1_vb2_buf_prepare,
+	.wait_prepare = vb2_ops_wait_prepare,
+	.wait_finish = vb2_ops_wait_finish,
+	.stop_streaming = rkisp1_vb2_stop_streaming,
+	.start_streaming = rkisp1_vb2_start_streaming,
+};
+
+/* ----------------------------------------------------------------------------
+ * IOCTLs operations
+ */
+
+static const struct v4l2_format_info *
+rkisp1_fill_pixfmt(struct v4l2_pix_format_mplane *pixm,
+		   enum rkisp1_stream_id id)
+{
+	struct v4l2_plane_pix_format *plane_y = &pixm->plane_fmt[0];
+	const struct v4l2_format_info *info;
+	unsigned int i;
+	u32 stride;
+
+	memset(pixm->plane_fmt, 0, sizeof(pixm->plane_fmt));
+	info = v4l2_format_info(pixm->pixelformat);
+	pixm->num_planes = info->mem_planes;
+	stride = info->bpp[0] * pixm->width;
+	/* Self path supports custom stride but Main path doesn't */
+	if (id == RKISP1_MAINPATH || plane_y->bytesperline < stride)
+		plane_y->bytesperline = stride;
+	plane_y->sizeimage = plane_y->bytesperline * pixm->height;
+
+	/* normalize stride to pixels per line */
+	stride = DIV_ROUND_UP(plane_y->bytesperline, info->bpp[0]);
+
+	for (i = 1; i < info->comp_planes; i++) {
+		struct v4l2_plane_pix_format *plane = &pixm->plane_fmt[i];
+
+		/* bytesperline for other components derive from Y component */
+		plane->bytesperline = DIV_ROUND_UP(stride, info->hdiv) *
+				      info->bpp[i];
+		plane->sizeimage = plane->bytesperline *
+				   DIV_ROUND_UP(pixm->height, info->vdiv);
+	}
+
+	/*
+	 * If pixfmt is packed, then plane_fmt[0] should contain the total size
+	 * considering all components. plane_fmt[i] for i > 0 should be ignored
+	 * by userspace as mem_planes == 1, but we are keeping information there
+	 * for convenience.
+	 */
+	if (info->mem_planes == 1)
+		for (i = 1; i < info->comp_planes; i++)
+			plane_y->sizeimage += pixm->plane_fmt[i].sizeimage;
+
+	return info;
+}
+
+static const struct rkisp1_capture_fmt_cfg *
+rkisp1_find_fmt_cfg(const struct rkisp1_capture *cap, const u32 pixelfmt)
+{
+	unsigned int i;
+
+	for (i = 0; i < cap->config->fmt_size; i++) {
+		if (cap->config->fmts[i].fourcc == pixelfmt)
+			return &cap->config->fmts[i];
+	}
+	return NULL;
+}
+
+static void rkisp1_try_fmt(const struct rkisp1_capture *cap,
+			   struct v4l2_pix_format_mplane *pixm,
+			   const struct rkisp1_capture_fmt_cfg **fmt_cfg,
+			   const struct v4l2_format_info **fmt_info)
+{
+	const struct rkisp1_capture_config *config = cap->config;
+	const struct rkisp1_capture_fmt_cfg *fmt;
+	const struct v4l2_format_info *info;
+	const unsigned int max_widths[] = { RKISP1_RSZ_MP_SRC_MAX_WIDTH,
+					    RKISP1_RSZ_SP_SRC_MAX_WIDTH };
+	const unsigned int max_heights[] = { RKISP1_RSZ_MP_SRC_MAX_HEIGHT,
+					     RKISP1_RSZ_SP_SRC_MAX_HEIGHT};
+
+	fmt = rkisp1_find_fmt_cfg(cap, pixm->pixelformat);
+	if (!fmt) {
+		fmt = config->fmts;
+		pixm->pixelformat = fmt->fourcc;
+	}
+
+	pixm->width = clamp_t(u32, pixm->width,
+			      RKISP1_RSZ_SRC_MIN_WIDTH, max_widths[cap->id]);
+	pixm->height = clamp_t(u32, pixm->height,
+			       RKISP1_RSZ_SRC_MIN_HEIGHT, max_heights[cap->id]);
+
+	pixm->field = V4L2_FIELD_NONE;
+	pixm->colorspace = V4L2_COLORSPACE_DEFAULT;
+	pixm->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
+	pixm->quantization = V4L2_QUANTIZATION_DEFAULT;
+
+	info = rkisp1_fill_pixfmt(pixm, cap->id);
+
+	if (fmt_cfg)
+		*fmt_cfg = fmt;
+	if (fmt_info)
+		*fmt_info = info;
+}
+
+static void rkisp1_set_fmt(struct rkisp1_capture *cap,
+			   struct v4l2_pix_format_mplane *pixm)
+{
+	rkisp1_try_fmt(cap, pixm, &cap->pix.cfg, &cap->pix.info);
+	cap->pix.fmt = *pixm;
+
+	/* SP supports custom stride in number of pixels of the Y plane */
+	if (cap->id == RKISP1_SELFPATH)
+		cap->sp_y_stride = pixm->plane_fmt[0].bytesperline /
+				   cap->pix.info->bpp[0];
+}
+
+static int rkisp1_try_fmt_vid_cap_mplane(struct file *file, void *fh,
+					 struct v4l2_format *f)
+{
+	struct rkisp1_capture *cap = video_drvdata(file);
+
+	rkisp1_try_fmt(cap, &f->fmt.pix_mp, NULL, NULL);
+
+	return 0;
+}
+
+static int rkisp1_enum_fmt_vid_cap_mplane(struct file *file, void *priv,
+					  struct v4l2_fmtdesc *f)
+{
+	struct rkisp1_capture *cap = video_drvdata(file);
+	const struct rkisp1_capture_fmt_cfg *fmt = NULL;
+	unsigned int i, n = 0;
+
+	if (!f->mbus_code) {
+		if (f->index >= cap->config->fmt_size)
+			return -EINVAL;
+
+		fmt = &cap->config->fmts[f->index];
+		f->pixelformat = fmt->fourcc;
+		return 0;
+	}
+
+	for (i = 0; i < cap->config->fmt_size; i++) {
+		if (cap->config->fmts[i].mbus != f->mbus_code)
+			continue;
+
+		if (n++ == f->index) {
+			f->pixelformat = cap->config->fmts[i].fourcc;
+			return 0;
+		}
+	}
+	return -EINVAL;
+}
+
+static int rkisp1_s_fmt_vid_cap_mplane(struct file *file,
+				       void *priv, struct v4l2_format *f)
+{
+	struct rkisp1_capture *cap = video_drvdata(file);
+	struct rkisp1_vdev_node *node =
+				rkisp1_vdev_to_node(&cap->vnode.vdev);
+
+	if (vb2_is_busy(&node->buf_queue))
+		return -EBUSY;
+
+	rkisp1_set_fmt(cap, &f->fmt.pix_mp);
+
+	return 0;
+}
+
+static int rkisp1_g_fmt_vid_cap_mplane(struct file *file, void *fh,
+				       struct v4l2_format *f)
+{
+	struct rkisp1_capture *cap = video_drvdata(file);
+
+	f->fmt.pix_mp = cap->pix.fmt;
+
+	return 0;
+}
+
+static int
+rkisp1_querycap(struct file *file, void *priv, struct v4l2_capability *cap)
+{
+	struct rkisp1_capture *cap_dev = video_drvdata(file);
+	struct rkisp1_device *rkisp1 = cap_dev->rkisp1;
+
+	strscpy(cap->driver, rkisp1->dev->driver->name, sizeof(cap->driver));
+	strscpy(cap->card, rkisp1->dev->driver->name, sizeof(cap->card));
+	strscpy(cap->bus_info, RKISP1_BUS_INFO, sizeof(cap->bus_info));
+
+	return 0;
+}
+
+static const struct v4l2_ioctl_ops rkisp1_v4l2_ioctl_ops = {
+	.vidioc_reqbufs = vb2_ioctl_reqbufs,
+	.vidioc_querybuf = vb2_ioctl_querybuf,
+	.vidioc_create_bufs = vb2_ioctl_create_bufs,
+	.vidioc_qbuf = vb2_ioctl_qbuf,
+	.vidioc_expbuf = vb2_ioctl_expbuf,
+	.vidioc_dqbuf = vb2_ioctl_dqbuf,
+	.vidioc_prepare_buf = vb2_ioctl_prepare_buf,
+	.vidioc_streamon = vb2_ioctl_streamon,
+	.vidioc_streamoff = vb2_ioctl_streamoff,
+	.vidioc_try_fmt_vid_cap_mplane = rkisp1_try_fmt_vid_cap_mplane,
+	.vidioc_s_fmt_vid_cap_mplane = rkisp1_s_fmt_vid_cap_mplane,
+	.vidioc_g_fmt_vid_cap_mplane = rkisp1_g_fmt_vid_cap_mplane,
+	.vidioc_enum_fmt_vid_cap = rkisp1_enum_fmt_vid_cap_mplane,
+	.vidioc_querycap = rkisp1_querycap,
+	.vidioc_subscribe_event = v4l2_ctrl_subscribe_event,
+	.vidioc_unsubscribe_event = v4l2_event_unsubscribe,
+};
+
+static int rkisp1_capture_link_validate(struct media_link *link)
+{
+	struct video_device *vdev =
+		media_entity_to_video_device(link->sink->entity);
+	struct v4l2_subdev *sd =
+		media_entity_to_v4l2_subdev(link->source->entity);
+	struct rkisp1_capture *cap = video_get_drvdata(vdev);
+	const struct rkisp1_capture_fmt_cfg *fmt =
+		rkisp1_find_fmt_cfg(cap, cap->pix.fmt.pixelformat);
+	struct v4l2_subdev_format sd_fmt;
+	int ret;
+
+	sd_fmt.which = V4L2_SUBDEV_FORMAT_ACTIVE;
+	sd_fmt.pad = link->source->index;
+	ret = v4l2_subdev_call(sd, pad, get_fmt, NULL, &sd_fmt);
+	if (ret)
+		return ret;
+
+	if (sd_fmt.format.height != cap->pix.fmt.height ||
+	    sd_fmt.format.width != cap->pix.fmt.width ||
+	    sd_fmt.format.code != fmt->mbus)
+		return -EPIPE;
+
+	return 0;
+}
+
+/* ----------------------------------------------------------------------------
+ * core functions
+ */
+
+static const struct media_entity_operations rkisp1_media_ops = {
+	.link_validate = rkisp1_capture_link_validate,
+};
+
+static const struct v4l2_file_operations rkisp1_fops = {
+	.open = v4l2_fh_open,
+	.release = vb2_fop_release,
+	.unlocked_ioctl = video_ioctl2,
+	.poll = vb2_fop_poll,
+	.mmap = vb2_fop_mmap,
+};
+
+static void rkisp1_unregister_capture(struct rkisp1_capture *cap)
+{
+	media_entity_cleanup(&cap->vnode.vdev.entity);
+	vb2_video_unregister_device(&cap->vnode.vdev);
+}
+
+void rkisp1_capture_devs_unregister(struct rkisp1_device *rkisp1)
+{
+	struct rkisp1_capture *mp = &rkisp1->capture_devs[RKISP1_MAINPATH];
+	struct rkisp1_capture *sp = &rkisp1->capture_devs[RKISP1_SELFPATH];
+
+	rkisp1_unregister_capture(mp);
+	rkisp1_unregister_capture(sp);
+}
+
+static int rkisp1_register_capture(struct rkisp1_capture *cap)
+{
+	const char * const dev_names[] = {RKISP1_MP_DEV_NAME,
+					  RKISP1_SP_DEV_NAME};
+	struct v4l2_device *v4l2_dev = &cap->rkisp1->v4l2_dev;
+	struct video_device *vdev = &cap->vnode.vdev;
+	struct rkisp1_vdev_node *node;
+	struct vb2_queue *q;
+	int ret;
+
+	strscpy(vdev->name, dev_names[cap->id], sizeof(vdev->name));
+	node = rkisp1_vdev_to_node(vdev);
+	mutex_init(&node->vlock);
+
+	vdev->ioctl_ops = &rkisp1_v4l2_ioctl_ops;
+	vdev->release = video_device_release_empty;
+	vdev->fops = &rkisp1_fops;
+	vdev->minor = -1;
+	vdev->v4l2_dev = v4l2_dev;
+	vdev->lock = &node->vlock;
+	vdev->device_caps = V4L2_CAP_VIDEO_CAPTURE_MPLANE |
+			    V4L2_CAP_STREAMING | V4L2_CAP_IO_MC;
+	vdev->entity.ops = &rkisp1_media_ops;
+	video_set_drvdata(vdev, cap);
+	vdev->vfl_dir = VFL_DIR_RX;
+	node->pad.flags = MEDIA_PAD_FL_SINK;
+
+	q = &node->buf_queue;
+	q->type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
+	q->io_modes = VB2_MMAP | VB2_DMABUF;
+	q->drv_priv = cap;
+	q->ops = &rkisp1_vb2_ops;
+	q->mem_ops = &vb2_dma_contig_memops;
+	q->buf_struct_size = sizeof(struct rkisp1_buffer);
+	q->min_buffers_needed = RKISP1_MIN_BUFFERS_NEEDED;
+	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
+	q->lock = &node->vlock;
+	q->dev = cap->rkisp1->dev;
+	ret = vb2_queue_init(q);
+	if (ret) {
+		dev_err(cap->rkisp1->dev,
+			"vb2 queue init failed (err=%d)\n", ret);
+		return ret;
+	}
+
+	vdev->queue = q;
+
+	ret = video_register_device(vdev, VFL_TYPE_VIDEO, -1);
+	if (ret) {
+		dev_err(cap->rkisp1->dev,
+			"failed to register %s, ret=%d\n", vdev->name, ret);
+		return ret;
+	}
+	v4l2_info(v4l2_dev, "registered %s as /dev/video%d\n", vdev->name,
+		  vdev->num);
+
+	ret = media_entity_pads_init(&vdev->entity, 1, &node->pad);
+	if (ret) {
+		video_unregister_device(vdev);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void
+rkisp1_capture_init(struct rkisp1_device *rkisp1, enum rkisp1_stream_id id)
+{
+	struct rkisp1_capture *cap = &rkisp1->capture_devs[id];
+	struct v4l2_pix_format_mplane pixm;
+
+	memset(cap, 0, sizeof(*cap));
+	cap->id = id;
+	cap->rkisp1 = rkisp1;
+
+	INIT_LIST_HEAD(&cap->buf.queue);
+	init_waitqueue_head(&cap->done);
+	spin_lock_init(&cap->buf.lock);
+	if (cap->id == RKISP1_SELFPATH) {
+		cap->ops = &rkisp1_capture_ops_sp;
+		cap->config = &rkisp1_capture_config_sp;
+	} else {
+		cap->ops = &rkisp1_capture_ops_mp;
+		cap->config = &rkisp1_capture_config_mp;
+	}
+
+	cap->is_streaming = false;
+
+	memset(&pixm, 0, sizeof(pixm));
+	pixm.pixelformat = V4L2_PIX_FMT_YUYV;
+	pixm.width = RKISP1_DEFAULT_WIDTH;
+	pixm.height = RKISP1_DEFAULT_HEIGHT;
+	rkisp1_set_fmt(cap, &pixm);
+}
+
+int rkisp1_capture_devs_register(struct rkisp1_device *rkisp1)
+{
+	struct rkisp1_capture *cap;
+	unsigned int i, j;
+	int ret;
+
+	for (i = 0; i < ARRAY_SIZE(rkisp1->capture_devs); i++) {
+		rkisp1_capture_init(rkisp1, i);
+		cap = &rkisp1->capture_devs[i];
+		cap->rkisp1 = rkisp1;
+		ret = rkisp1_register_capture(cap);
+		if (ret)
+			goto err_unreg_capture_devs;
+	}
+
+	return 0;
+
+err_unreg_capture_devs:
+	for (j = 0; j < i; j++) {
+		cap = &rkisp1->capture_devs[j];
+		rkisp1_unregister_capture(cap);
+	}
+
+	return ret;
+}
diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-common.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-common.c
new file mode 100644
index 000000000000..cf889666e166
--- /dev/null
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-common.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Rockchip ISP1 Driver - Common definitions
+ *
+ * Copyright (C) 2019 Collabora, Ltd.
+ */
+
+#include <media/v4l2-rect.h>
+
+#include "rkisp1-common.h"
+
+static const struct v4l2_rect rkisp1_sd_min_crop = {
+	.width = RKISP1_ISP_MIN_WIDTH,
+	.height = RKISP1_ISP_MIN_HEIGHT,
+	.top = 0,
+	.left = 0,
+};
+
+void rkisp1_sd_adjust_crop_rect(struct v4l2_rect *crop,
+				const struct v4l2_rect *bounds)
+{
+	v4l2_rect_set_min_size(crop, &rkisp1_sd_min_crop);
+	v4l2_rect_map_inside(crop, bounds);
+}
+
+void rkisp1_sd_adjust_crop(struct v4l2_rect *crop,
+			   const struct v4l2_mbus_framefmt *bounds)
+{
+	struct v4l2_rect crop_bounds = {
+		.left = 0,
+		.top = 0,
+		.width = bounds->width,
+		.height = bounds->height,
+	};
+
+	rkisp1_sd_adjust_crop_rect(crop, &crop_bounds);
+}
diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h b/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h
new file mode 100644
index 000000000000..3a134e97161c
--- /dev/null
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h
@@ -0,0 +1,485 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR MIT) */
+/*
+ * Rockchip ISP1 Driver - Common definitions
+ *
+ * Copyright (C) 2019 Collabora, Ltd.
+ *
+ * Based on Rockchip ISP1 driver by Rockchip Electronics Co., Ltd.
+ * Copyright (C) 2017 Rockchip Electronics Co., Ltd.
+ */
+
+#ifndef _RKISP1_COMMON_H
+#define _RKISP1_COMMON_H
+
+#include <linux/clk.h>
+#include <linux/mutex.h>
+#include <linux/rkisp1-config.h>
+#include <media/media-device.h>
+#include <media/media-entity.h>
+#include <media/v4l2-ctrls.h>
+#include <media/v4l2-device.h>
+#include <media/videobuf2-v4l2.h>
+
+#include "rkisp1-regs.h"
+
+/*
+ * flags on the 'direction' field in struct 'rkisp1_isp_mbus_info' that indicate
+ * on which pad the media bus format is supported
+ */
+#define RKISP1_ISP_SD_SRC BIT(0)
+#define RKISP1_ISP_SD_SINK BIT(1)
+
+/* min and max values for the widths and heights of the entities */
+#define RKISP1_ISP_MAX_WIDTH		4032
+#define RKISP1_ISP_MAX_HEIGHT		3024
+#define RKISP1_ISP_MIN_WIDTH		32
+#define RKISP1_ISP_MIN_HEIGHT		32
+
+#define RKISP1_RSZ_MP_SRC_MAX_WIDTH		4416
+#define RKISP1_RSZ_MP_SRC_MAX_HEIGHT		3312
+#define RKISP1_RSZ_SP_SRC_MAX_WIDTH		1920
+#define RKISP1_RSZ_SP_SRC_MAX_HEIGHT		1920
+#define RKISP1_RSZ_SRC_MIN_WIDTH		32
+#define RKISP1_RSZ_SRC_MIN_HEIGHT		16
+
+/* the default width and height of all the entities */
+#define RKISP1_DEFAULT_WIDTH		800
+#define RKISP1_DEFAULT_HEIGHT		600
+
+#define RKISP1_DRIVER_NAME	"rkisp1"
+#define RKISP1_BUS_INFO		"platform:" RKISP1_DRIVER_NAME
+
+/* maximum number of clocks */
+#define RKISP1_MAX_BUS_CLK	8
+
+/* a bitmask of the ready stats */
+#define RKISP1_STATS_MEAS_MASK		(RKISP1_CIF_ISP_AWB_DONE |	\
+					 RKISP1_CIF_ISP_AFM_FIN |	\
+					 RKISP1_CIF_ISP_EXP_END |	\
+					 RKISP1_CIF_ISP_HIST_MEASURE_RDY)
+
+/* enum for the resizer pads */
+enum rkisp1_rsz_pad {
+	RKISP1_RSZ_PAD_SINK,
+	RKISP1_RSZ_PAD_SRC,
+	RKISP1_RSZ_PAD_MAX
+};
+
+/* enum for the capture id */
+enum rkisp1_stream_id {
+	RKISP1_MAINPATH,
+	RKISP1_SELFPATH,
+};
+
+/* bayer patterns */
+enum rkisp1_fmt_raw_pat_type {
+	RKISP1_RAW_RGGB = 0,
+	RKISP1_RAW_GRBG,
+	RKISP1_RAW_GBRG,
+	RKISP1_RAW_BGGR,
+};
+
+/* enum for the isp pads */
+enum rkisp1_isp_pad {
+	RKISP1_ISP_PAD_SINK_VIDEO,
+	RKISP1_ISP_PAD_SINK_PARAMS,
+	RKISP1_ISP_PAD_SOURCE_VIDEO,
+	RKISP1_ISP_PAD_SOURCE_STATS,
+	RKISP1_ISP_PAD_MAX
+};
+
+/*
+ * struct rkisp1_sensor_async - A container for the v4l2_async_subdev to add to the notifier
+ *				of the v4l2-async API
+ *
+ * @asd:		async_subdev variable for the sensor
+ * @lanes:		number of lanes
+ * @mbus_type:		type of bus (currently only CSI2 is supported)
+ * @mbus_flags:		media bus (V4L2_MBUS_*) flags
+ * @sd:			a pointer to v4l2_subdev struct of the sensor
+ * @pixel_rate_ctrl:	pixel rate of the sensor, used to initialize the phy
+ * @dphy:		a pointer to the phy
+ */
+struct rkisp1_sensor_async {
+	struct v4l2_async_subdev asd;
+	unsigned int lanes;
+	enum v4l2_mbus_type mbus_type;
+	unsigned int mbus_flags;
+	struct v4l2_subdev *sd;
+	struct v4l2_ctrl *pixel_rate_ctrl;
+	struct phy *dphy;
+};
+
+/*
+ * struct rkisp1_isp - ISP subdev entity
+ *
+ * @sd:				v4l2_subdev variable
+ * @rkisp1:			pointer to rkisp1_device
+ * @pads:			media pads
+ * @pad_cfg:			pads configurations
+ * @sink_fmt:			input format
+ * @src_fmt:			output format
+ * @ops_lock:			ops serialization
+ * @is_dphy_errctrl_disabled:	if dphy errctrl is disabled (avoid endless interrupt)
+ * @frame_sequence:		used to synchronize frame_id between video devices.
+ */
+struct rkisp1_isp {
+	struct v4l2_subdev sd;
+	struct media_pad pads[RKISP1_ISP_PAD_MAX];
+	struct v4l2_subdev_pad_config pad_cfg[RKISP1_ISP_PAD_MAX];
+	const struct rkisp1_isp_mbus_info *sink_fmt;
+	const struct rkisp1_isp_mbus_info *src_fmt;
+	struct mutex ops_lock; /* serialize the subdevice ops */
+	bool is_dphy_errctrl_disabled;
+	__u32 frame_sequence;
+};
+
+/*
+ * struct rkisp1_vdev_node - Container for the video nodes: params, stats, mainpath, selfpath
+ *
+ * @buf_queue:	queue of buffers
+ * @vlock:	lock of the video node
+ * @vdev:	video node
+ * @pad:	media pad
+ */
+struct rkisp1_vdev_node {
+	struct vb2_queue buf_queue;
+	struct mutex vlock; /* ioctl serialization mutex */
+	struct video_device vdev;
+	struct media_pad pad;
+};
+
+/*
+ * struct rkisp1_buffer - A container for the vb2 buffers used by the video devices:
+ *			  params, stats, mainpath, selfpath
+ *
+ * @vb:		vb2 buffer
+ * @queue:	entry of the buffer in the queue
+ * @buff_addr:	dma addresses of each plane, used only by the capture devices: selfpath, mainpath
+ * @vaddr:	virtual address for buffers used by params and stats devices
+ */
+struct rkisp1_buffer {
+	struct vb2_v4l2_buffer vb;
+	struct list_head queue;
+	union {
+		u32 buff_addr[VIDEO_MAX_PLANES];
+		void *vaddr;
+	};
+};
+
+/*
+ * struct rkisp1_dummy_buffer - A buffer to write the next frame to in case
+ *				there are no vb2 buffers available.
+ *
+ * @vaddr:	return value of call to dma_alloc_attrs.
+ * @dma_addr:	dma address of the buffer.
+ * @size:	size of the buffer.
+ */
+struct rkisp1_dummy_buffer {
+	void *vaddr;
+	dma_addr_t dma_addr;
+	u32 size;
+};
+
+struct rkisp1_device;
+
+/*
+ * struct rkisp1_capture - ISP capture video device
+ *
+ * @vnode:	  video node
+ * @rkisp1:	  pointer to rkisp1_device
+ * @id:		  id of the capture, one of RKISP1_SELFPATH, RKISP1_MAINPATH
+ * @ops:	  list of callbacks to configure the capture device.
+ * @config:	  a pointer to the list of registers to configure the capture format.
+ * @is_streaming: device is streaming
+ * @is_stopping:  stop_streaming callback was called and the device is in the process of
+ *		  stopping the streaming.
+ * @done:	  when stop_streaming callback is called, the device waits for the next irq
+ *		  handler to stop the streaming by waiting on the 'done' wait queue.
+ *		  If the irq handler is not called, the stream is stopped by the callback
+ *		  after timeout.
+ * @sp_y_stride:  the selfpath allows to configure a y stride that is longer than the image width.
+ * @buf.lock:	  lock to protect buf.queue
+ * @buf.queue:	  queued buffer list
+ * @buf.dummy:	  dummy space to store dropped data
+ *
+ * rkisp1 uses shadow registers, so it needs two buffers at a time
+ * @buf.curr:	  the buffer used for current frame
+ * @buf.next:	  the buffer used for next frame
+ * @pix.cfg:	  pixel configuration
+ * @pix.info:	  a pointer to the v4l2_format_info of the pixel format
+ * @pix.fmt:	  buffer format
+ */
+struct rkisp1_capture {
+	struct rkisp1_vdev_node vnode;
+	struct rkisp1_device *rkisp1;
+	enum rkisp1_stream_id id;
+	struct rkisp1_capture_ops *ops;
+	const struct rkisp1_capture_config *config;
+	bool is_streaming;
+	bool is_stopping;
+	wait_queue_head_t done;
+	unsigned int sp_y_stride;
+	struct {
+		/* protects queue, curr and next */
+		spinlock_t lock;
+		struct list_head queue;
+		struct rkisp1_dummy_buffer dummy;
+		struct rkisp1_buffer *curr;
+		struct rkisp1_buffer *next;
+	} buf;
+	struct {
+		const struct rkisp1_capture_fmt_cfg *cfg;
+		const struct v4l2_format_info *info;
+		struct v4l2_pix_format_mplane fmt;
+	} pix;
+};
+
+/*
+ * struct rkisp1_stats - ISP Statistics device
+ *
+ * @vnode:	  video node
+ * @rkisp1:	  pointer to the rkisp1 device
+ * @lock:	  locks the buffer list 'stat'
+ * @stat:	  queue of rkisp1_buffer
+ * @vdev_fmt:	  v4l2_format of the metadata format
+ */
+struct rkisp1_stats {
+	struct rkisp1_vdev_node vnode;
+	struct rkisp1_device *rkisp1;
+
+	spinlock_t lock; /* locks the buffers list 'stats' */
+	struct list_head stat;
+	struct v4l2_format vdev_fmt;
+};
+
+/*
+ * struct rkisp1_params - ISP input parameters device
+ *
+ * @vnode:		video node
+ * @rkisp1:		pointer to the rkisp1 device
+ * @config_lock:	locks the buffer list 'params'
+ * @params:		queue of rkisp1_buffer
+ * @vdev_fmt:		v4l2_format of the metadata format
+ * @quantization:	the quantization configured on the isp's src pad
+ * @raw_type:		the bayer pattern on the isp video sink pad
+ */
+struct rkisp1_params {
+	struct rkisp1_vdev_node vnode;
+	struct rkisp1_device *rkisp1;
+
+	spinlock_t config_lock; /* locks the buffers list 'params' */
+	struct list_head params;
+	struct v4l2_format vdev_fmt;
+
+	enum v4l2_quantization quantization;
+	enum rkisp1_fmt_raw_pat_type raw_type;
+};
+
+/*
+ * struct rkisp1_resizer - Resizer subdev
+ *
+ * @sd:	       v4l2_subdev variable
+ * @id:	       id of the resizer, one of RKISP1_SELFPATH, RKISP1_MAINPATH
+ * @rkisp1:    pointer to the rkisp1 device
+ * @pads:      media pads
+ * @pad_cfg:   configurations for the pads
+ * @config:    the set of registers to configure the resizer
+ * @pixel_enc: pixel encoding of the resizer
+ * @ops_lock:  a lock for the subdev ops
+ */
+struct rkisp1_resizer {
+	struct v4l2_subdev sd;
+	enum rkisp1_stream_id id;
+	struct rkisp1_device *rkisp1;
+	struct media_pad pads[RKISP1_RSZ_PAD_MAX];
+	struct v4l2_subdev_pad_config pad_cfg[RKISP1_RSZ_PAD_MAX];
+	const struct rkisp1_rsz_config *config;
+	enum v4l2_pixel_encoding pixel_enc;
+	struct mutex ops_lock; /* serialize the subdevice ops */
+};
+
+/*
+ * struct rkisp1_debug - Values to be exposed on debugfs.
+ *			 The parameters are counters of the number of times the
+ *			 event occurred since the driver was loaded.
+ *
+ * @data_loss:			  loss of data occurred within a line, processing failure
+ * @outform_size_error:		  size error is generated in outmux submodule
+ * @img_stabilization_size_error: size error is generated in image stabilization submodule
+ * @inform_size_err:		  size error is generated in inform submodule
+ * @mipi_error:			  mipi error occurred
+ * @stats_error:		  writing to the 'Interrupt clear register' did not clear
+ *				  it in the register 'Masked interrupt status'
+ * @stop_timeout:		  upon stream stop, the capture waits 1 second for the isr to stop
+ *				  the stream. This param is incremented in case of timeout.
+ * @frame_drop:			  a frame was ready but the buffer queue was empty so the frame
+ *				  was not sent to userspace
+ */
+struct rkisp1_debug {
+	struct dentry *debugfs_dir;
+	unsigned long data_loss;
+	unsigned long outform_size_error;
+	unsigned long img_stabilization_size_error;
+	unsigned long inform_size_error;
+	unsigned long irq_delay;
+	unsigned long mipi_error;
+	unsigned long stats_error;
+	unsigned long stop_timeout[2];
+	unsigned long frame_drop[2];
+};
+
+/*
+ * struct rkisp1_device - ISP platform device
+ *
+ * @base_addr:	   base register address
+ * @irq:	   the irq number
+ * @dev:	   a pointer to the struct device
+ * @clk_size:	   number of clocks
+ * @clks:	   array of clocks
+ * @v4l2_dev:	   v4l2_device variable
+ * @media_dev:	   media_device variable
+ * @notifier:	   a notifier to register on the v4l2-async API to be notified on the sensor
+ * @active_sensor: sensor in-use, set when streaming on
+ * @isp:	   ISP sub-device
+ * @resizer_devs:  resizer sub-devices
+ * @capture_devs:  capture devices
+ * @stats:	   ISP statistics metadata capture device
+ * @params:	   ISP parameters metadata output device
+ * @pipe:	   media pipeline
+ * @stream_lock:   serializes {start/stop}_streaming callbacks between the capture devices.
+ * @debug:	   debug params to be exposed on debugfs
+ */
+struct rkisp1_device {
+	void __iomem *base_addr;
+	int irq;
+	struct device *dev;
+	unsigned int clk_size;
+	struct clk_bulk_data clks[RKISP1_MAX_BUS_CLK];
+	struct v4l2_device v4l2_dev;
+	struct media_device media_dev;
+	struct v4l2_async_notifier notifier;
+	struct rkisp1_sensor_async *active_sensor;
+	struct rkisp1_isp isp;
+	struct rkisp1_resizer resizer_devs[2];
+	struct rkisp1_capture capture_devs[2];
+	struct rkisp1_stats stats;
+	struct rkisp1_params params;
+	struct media_pipeline pipe;
+	struct mutex stream_lock; /* serialize {start/stop}_streaming cb between capture devices */
+	struct rkisp1_debug debug;
+};
+
+/*
+ * struct rkisp1_isp_mbus_info - ISP media bus info, Translates media bus code to hardware
+ *				 format values
+ *
+ * @mbus_code: media bus code
+ * @pixel_enc: pixel encoding
+ * @mipi_dt:   mipi data type
+ * @yuv_seq:   the order of the Y, Cb, Cr values
+ * @bus_width: bus width
+ * @bayer_pat: bayer pattern
+ * @direction: a bitmask of the flags indicating on which pad the format is supported on
+ */
+struct rkisp1_isp_mbus_info {
+	u32 mbus_code;
+	enum v4l2_pixel_encoding pixel_enc;
+	u32 mipi_dt;
+	u32 yuv_seq;
+	u8 bus_width;
+	enum rkisp1_fmt_raw_pat_type bayer_pat;
+	unsigned int direction;
+};
+
+static inline void
+rkisp1_write(struct rkisp1_device *rkisp1, u32 val, unsigned int addr)
+{
+	writel(val, rkisp1->base_addr + addr);
+}
+
+static inline u32 rkisp1_read(struct rkisp1_device *rkisp1, unsigned int addr)
+{
+	return readl(rkisp1->base_addr + addr);
+}
+
+/*
+ * rkisp1_cap_enum_mbus_codes - A helper function that return the i'th supported mbus code
+ *				of the capture entity. This is used to enumerate the supported
+ *				mbus codes on the source pad of the resizer.
+ *
+ * @cap:  the capture entity
+ * @code: the mbus code, the function reads the code->index and fills the code->code
+ */
+int rkisp1_cap_enum_mbus_codes(struct rkisp1_capture *cap,
+			       struct v4l2_subdev_mbus_code_enum *code);
+
+/*
+ * rkisp1_sd_adjust_crop_rect - adjust a rectangle to fit into another rectangle.
+ *
+ * @crop:   rectangle to adjust.
+ * @bounds: rectangle used as bounds.
+ */
+void rkisp1_sd_adjust_crop_rect(struct v4l2_rect *crop,
+				const struct v4l2_rect *bounds);
+
+/*
+ * rkisp1_sd_adjust_crop - adjust a rectangle to fit into media bus format
+ *
+ * @crop:   rectangle to adjust.
+ * @bounds: media bus format used as bounds.
+ */
+void rkisp1_sd_adjust_crop(struct v4l2_rect *crop,
+			   const struct v4l2_mbus_framefmt *bounds);
+
+/*
+ * rkisp1_isp_mbus_info - get the isp info of the media bus code
+ *
+ * @mbus_code: the media bus code
+ */
+const struct rkisp1_isp_mbus_info *rkisp1_isp_mbus_info_get(u32 mbus_code);
+
+/* rkisp1_params_configure - configure the params when stream starts.
+ *			     This function is called by the isp entity upon stream starts.
+ *			     The function applies the initial configuration of the parameters.
+ *
+ * @params:	  pointer to rkisp1_params.
+ * @bayer_pat:	  the bayer pattern on the isp video sink pad
+ * @quantization: the quantization configured on the isp's src pad
+ */
+void rkisp1_params_configure(struct rkisp1_params *params,
+			     enum rkisp1_fmt_raw_pat_type bayer_pat,
+			     enum v4l2_quantization quantization);
+
+/* rkisp1_params_disable - disable all parameters.
+ *			   This function is called by the isp entity upon stream start
+ *			   when capturing bayer format.
+ *
+ * @params: pointer to rkisp1_params.
+ */
+void rkisp1_params_disable(struct rkisp1_params *params);
+
+/* irq handlers */
+void rkisp1_isp_isr(struct rkisp1_device *rkisp1);
+void rkisp1_mipi_isr(struct rkisp1_device *rkisp1);
+void rkisp1_capture_isr(struct rkisp1_device *rkisp1);
+void rkisp1_stats_isr(struct rkisp1_stats *stats, u32 isp_ris);
+void rkisp1_params_isr(struct rkisp1_device *rkisp1);
+
+/* register/unregisters functions of the entities */
+int rkisp1_capture_devs_register(struct rkisp1_device *rkisp1);
+void rkisp1_capture_devs_unregister(struct rkisp1_device *rkisp1);
+
+int rkisp1_isp_register(struct rkisp1_device *rkisp1);
+void rkisp1_isp_unregister(struct rkisp1_device *rkisp1);
+
+int rkisp1_resizer_devs_register(struct rkisp1_device *rkisp1);
+void rkisp1_resizer_devs_unregister(struct rkisp1_device *rkisp1);
+
+int rkisp1_stats_register(struct rkisp1_device *rkisp1);
+void rkisp1_stats_unregister(struct rkisp1_device *rkisp1);
+
+int rkisp1_params_register(struct rkisp1_device *rkisp1);
+void rkisp1_params_unregister(struct rkisp1_device *rkisp1);
+
+#endif /* _RKISP1_COMMON_H */
diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c
new file mode 100644
index 000000000000..9af137e4967f
--- /dev/null
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c
@@ -0,0 +1,581 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Rockchip ISP1 Driver - Base driver
+ *
+ * Copyright (C) 2019 Collabora, Ltd.
+ *
+ * Based on Rockchip ISP1 driver by Rockchip Electronics Co., Ltd.
+ * Copyright (C) 2017 Rockchip Electronics Co., Ltd.
+ */
+
+#include <linux/clk.h>
+#include <linux/debugfs.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_graph.h>
+#include <linux/of_platform.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/phy/phy.h>
+#include <linux/phy/phy-mipi-dphy.h>
+#include <media/v4l2-fwnode.h>
+
+#include "rkisp1-common.h"
+
+/*
+ * ISP Details
+ * -----------
+ *
+ * ISP Comprises with:
+ *	MIPI serial camera interface
+ *	Image Signal Processing
+ *	Many Image Enhancement Blocks
+ *	Crop
+ *	Resizer
+ *	RBG display ready image
+ *	Image Rotation
+ *
+ * ISP Block Diagram
+ * -----------------
+ *                                                             rkisp1-resizer.c          rkisp1-capture.c
+ *                                                          |====================|  |=======================|
+ *                                rkisp1-isp.c                              Main Picture Path
+ *                        |==========================|      |===============================================|
+ *                        +-----------+  +--+--+--+--+      +--------+  +--------+              +-----------+
+ *                        |           |  |  |  |  |  |      |        |  |        |              |           |
+ * +--------+    |\       |           |  |  |  |  |  |   -->|  Crop  |->|  RSZ   |------------->|           |
+ * |  MIPI  |--->|  \     |           |  |  |  |  |  |   |  |        |  |        |              |           |
+ * +--------+    |   |    |           |  |IE|IE|IE|IE|   |  +--------+  +--------+              |  Memory   |
+ *               |MUX|--->|    ISP    |->|0 |1 |2 |3 |---+                                      | Interface |
+ * +--------+    |   |    |           |  |  |  |  |  |   |  +--------+  +--------+  +--------+  |           |
+ * |Parallel|--->|  /     |           |  |  |  |  |  |   |  |        |  |        |  |        |  |           |
+ * +--------+    |/       |           |  |  |  |  |  |   -->|  Crop  |->|  RSZ   |->|  RGB   |->|           |
+ *                        |           |  |  |  |  |  |      |        |  |        |  | Rotate |  |           |
+ *                        +-----------+  +--+--+--+--+      +--------+  +--------+  +--------+  +-----------+
+ *                                               ^
+ * +--------+                                    |          |===============================================|
+ * |  DMA   |------------------------------------+                          Self Picture Path
+ * +--------+
+ *
+ *         rkisp1-stats.c        rkisp1-params.c
+ *       |===============|      |===============|
+ *       +---------------+      +---------------+
+ *       |               |      |               |
+ *       |      ISP      |      |      ISP      |
+ *       |               |      |               |
+ *       +---------------+      +---------------+
+ *
+ *
+ * Media Topology
+ * --------------
+ *      +----------+     +----------+
+ *      | Sensor 2 |     | Sensor X |
+ *      ------------ ... ------------
+ *      |    0     |     |    0     |
+ *      +----------+     +----------+      +-----------+
+ *                  \      |               |  params   |
+ *                   \     |               | (output)  |
+ *    +----------+    \    |               +-----------+
+ *    | Sensor 1 |     v   v                     |
+ *    ------------      +------+------+          |
+ *    |    0     |----->|  0   |  1   |<---------+
+ *    +----------+      |------+------|
+ *                      |     ISP     |
+ *                      |------+------|
+ *        +-------------|  2   |  3   |----------+
+ *        |             +------+------+          |
+ *        |                |                     |
+ *        v                v                     v
+ *  +- ---------+    +-----------+         +-----------+
+ *  |     0     |    |     0     |         |   stats   |
+ *  -------------    -------------         | (capture) |
+ *  |  Resizer  |    |  Resizer  |         +-----------+
+ *  ------------|    ------------|
+ *  |     1     |    |     1     |
+ *  +-----------+    +-----------+
+ *        |                |
+ *        v                v
+ *  +-----------+    +-----------+
+ *  | selfpath  |    | mainpath  |
+ *  | (capture) |    | (capture) |
+ *  +-----------+    +-----------+
+ */
+
+struct rkisp1_match_data {
+	const char * const *clks;
+	unsigned int size;
+};
+
+/* ----------------------------------------------------------------------------
+ * Sensor DT bindings
+ */
+
+static int rkisp1_create_links(struct rkisp1_device *rkisp1)
+{
+	struct media_entity *source, *sink;
+	unsigned int flags, source_pad;
+	struct v4l2_subdev *sd;
+	unsigned int i;
+	int ret;
+
+	/* sensor links */
+	flags = MEDIA_LNK_FL_ENABLED;
+	list_for_each_entry(sd, &rkisp1->v4l2_dev.subdevs, list) {
+		if (sd == &rkisp1->isp.sd ||
+		    sd == &rkisp1->resizer_devs[RKISP1_MAINPATH].sd ||
+		    sd == &rkisp1->resizer_devs[RKISP1_SELFPATH].sd)
+			continue;
+
+		ret = media_entity_get_fwnode_pad(&sd->entity, sd->fwnode,
+						  MEDIA_PAD_FL_SOURCE);
+		if (ret < 0) {
+			dev_err(rkisp1->dev, "failed to find src pad for %s\n",
+				sd->name);
+			return ret;
+		}
+		source_pad = ret;
+
+		ret = media_create_pad_link(&sd->entity, source_pad,
+					    &rkisp1->isp.sd.entity,
+					    RKISP1_ISP_PAD_SINK_VIDEO,
+					    flags);
+		if (ret)
+			return ret;
+
+		flags = 0;
+	}
+
+	flags = MEDIA_LNK_FL_ENABLED | MEDIA_LNK_FL_IMMUTABLE;
+
+	/* create ISP->RSZ->CAP links */
+	for (i = 0; i < 2; i++) {
+		source = &rkisp1->isp.sd.entity;
+		sink = &rkisp1->resizer_devs[i].sd.entity;
+		ret = media_create_pad_link(source, RKISP1_ISP_PAD_SOURCE_VIDEO,
+					    sink, RKISP1_RSZ_PAD_SINK,
+					    MEDIA_LNK_FL_ENABLED);
+		if (ret)
+			return ret;
+
+		source = sink;
+		sink = &rkisp1->capture_devs[i].vnode.vdev.entity;
+		ret = media_create_pad_link(source, RKISP1_RSZ_PAD_SRC,
+					    sink, 0, flags);
+		if (ret)
+			return ret;
+	}
+
+	/* params links */
+	source = &rkisp1->params.vnode.vdev.entity;
+	sink = &rkisp1->isp.sd.entity;
+	ret = media_create_pad_link(source, 0, sink,
+				    RKISP1_ISP_PAD_SINK_PARAMS, flags);
+	if (ret)
+		return ret;
+
+	/* 3A stats links */
+	source = &rkisp1->isp.sd.entity;
+	sink = &rkisp1->stats.vnode.vdev.entity;
+	return media_create_pad_link(source, RKISP1_ISP_PAD_SOURCE_STATS,
+				     sink, 0, flags);
+}
+
+static int rkisp1_subdev_notifier_bound(struct v4l2_async_notifier *notifier,
+					struct v4l2_subdev *sd,
+					struct v4l2_async_subdev *asd)
+{
+	struct rkisp1_device *rkisp1 =
+		container_of(notifier, struct rkisp1_device, notifier);
+	struct rkisp1_sensor_async *s_asd =
+		container_of(asd, struct rkisp1_sensor_async, asd);
+
+	s_asd->pixel_rate_ctrl = v4l2_ctrl_find(sd->ctrl_handler,
+						V4L2_CID_PIXEL_RATE);
+	s_asd->sd = sd;
+	s_asd->dphy = devm_phy_get(rkisp1->dev, "dphy");
+	if (IS_ERR(s_asd->dphy)) {
+		if (PTR_ERR(s_asd->dphy) != -EPROBE_DEFER)
+			dev_err(rkisp1->dev, "Couldn't get the MIPI D-PHY\n");
+		return PTR_ERR(s_asd->dphy);
+	}
+
+	phy_init(s_asd->dphy);
+
+	return 0;
+}
+
+static void rkisp1_subdev_notifier_unbind(struct v4l2_async_notifier *notifier,
+					  struct v4l2_subdev *sd,
+					  struct v4l2_async_subdev *asd)
+{
+	struct rkisp1_sensor_async *s_asd =
+		container_of(asd, struct rkisp1_sensor_async, asd);
+
+	phy_exit(s_asd->dphy);
+}
+
+static int rkisp1_subdev_notifier_complete(struct v4l2_async_notifier *notifier)
+{
+	struct rkisp1_device *rkisp1 =
+		container_of(notifier, struct rkisp1_device, notifier);
+	int ret;
+
+	ret = rkisp1_create_links(rkisp1);
+	if (ret)
+		return ret;
+
+	ret = v4l2_device_register_subdev_nodes(&rkisp1->v4l2_dev);
+	if (ret)
+		return ret;
+
+	dev_dbg(rkisp1->dev, "Async subdev notifier completed\n");
+
+	return 0;
+}
+
+static const struct v4l2_async_notifier_operations rkisp1_subdev_notifier_ops = {
+	.bound = rkisp1_subdev_notifier_bound,
+	.unbind = rkisp1_subdev_notifier_unbind,
+	.complete = rkisp1_subdev_notifier_complete,
+};
+
+static int rkisp1_subdev_notifier(struct rkisp1_device *rkisp1)
+{
+	struct v4l2_async_notifier *ntf = &rkisp1->notifier;
+	unsigned int next_id = 0;
+	int ret;
+
+	v4l2_async_notifier_init(ntf);
+
+	while (1) {
+		struct v4l2_fwnode_endpoint vep = {
+			.bus_type = V4L2_MBUS_CSI2_DPHY
+		};
+		struct rkisp1_sensor_async *rk_asd = NULL;
+		struct fwnode_handle *ep;
+
+		ep = fwnode_graph_get_endpoint_by_id(dev_fwnode(rkisp1->dev),
+						     0, next_id,
+						     FWNODE_GRAPH_ENDPOINT_NEXT);
+		if (!ep)
+			break;
+
+		ret = v4l2_fwnode_endpoint_parse(ep, &vep);
+		if (ret)
+			goto err_parse;
+
+		rk_asd = kzalloc(sizeof(*rk_asd), GFP_KERNEL);
+		if (!rk_asd) {
+			ret = -ENOMEM;
+			goto err_parse;
+		}
+
+		rk_asd->mbus_type = vep.bus_type;
+		rk_asd->mbus_flags = vep.bus.mipi_csi2.flags;
+		rk_asd->lanes = vep.bus.mipi_csi2.num_data_lanes;
+
+		ret = v4l2_async_notifier_add_fwnode_remote_subdev(ntf, ep,
+								   &rk_asd->asd);
+		if (ret)
+			goto err_parse;
+
+		dev_dbg(rkisp1->dev, "registered ep id %d with %d lanes\n",
+			vep.base.id, rk_asd->lanes);
+
+		next_id = vep.base.id + 1;
+
+		fwnode_handle_put(ep);
+
+		continue;
+err_parse:
+		fwnode_handle_put(ep);
+		kfree(rk_asd);
+		v4l2_async_notifier_cleanup(ntf);
+		return ret;
+	}
+
+	if (next_id == 0)
+		dev_dbg(rkisp1->dev, "no remote subdevice found\n");
+	ntf->ops = &rkisp1_subdev_notifier_ops;
+	ret = v4l2_async_notifier_register(&rkisp1->v4l2_dev, ntf);
+	if (ret) {
+		v4l2_async_notifier_cleanup(ntf);
+		return ret;
+	}
+	return 0;
+}
+
+/* ----------------------------------------------------------------------------
+ * Power
+ */
+
+static int __maybe_unused rkisp1_runtime_suspend(struct device *dev)
+{
+	struct rkisp1_device *rkisp1 = dev_get_drvdata(dev);
+
+	clk_bulk_disable_unprepare(rkisp1->clk_size, rkisp1->clks);
+	return pinctrl_pm_select_sleep_state(dev);
+}
+
+static int __maybe_unused rkisp1_runtime_resume(struct device *dev)
+{
+	struct rkisp1_device *rkisp1 = dev_get_drvdata(dev);
+	int ret;
+
+	ret = pinctrl_pm_select_default_state(dev);
+	if (ret)
+		return ret;
+	ret = clk_bulk_prepare_enable(rkisp1->clk_size, rkisp1->clks);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static const struct dev_pm_ops rkisp1_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				pm_runtime_force_resume)
+	SET_RUNTIME_PM_OPS(rkisp1_runtime_suspend, rkisp1_runtime_resume, NULL)
+};
+
+/* ----------------------------------------------------------------------------
+ * Core
+ */
+
+static int rkisp1_entities_register(struct rkisp1_device *rkisp1)
+{
+	int ret;
+
+	ret = rkisp1_isp_register(rkisp1);
+	if (ret)
+		return ret;
+
+	ret = rkisp1_resizer_devs_register(rkisp1);
+	if (ret)
+		goto err_unreg_isp_subdev;
+
+	ret = rkisp1_capture_devs_register(rkisp1);
+	if (ret)
+		goto err_unreg_resizer_devs;
+
+	ret = rkisp1_stats_register(rkisp1);
+	if (ret)
+		goto err_unreg_capture_devs;
+
+	ret = rkisp1_params_register(rkisp1);
+	if (ret)
+		goto err_unreg_stats;
+
+	ret = rkisp1_subdev_notifier(rkisp1);
+	if (ret) {
+		dev_err(rkisp1->dev,
+			"Failed to register subdev notifier(%d)\n", ret);
+		goto err_unreg_params;
+	}
+
+	return 0;
+err_unreg_params:
+	rkisp1_params_unregister(rkisp1);
+err_unreg_stats:
+	rkisp1_stats_unregister(rkisp1);
+err_unreg_capture_devs:
+	rkisp1_capture_devs_unregister(rkisp1);
+err_unreg_resizer_devs:
+	rkisp1_resizer_devs_unregister(rkisp1);
+err_unreg_isp_subdev:
+	rkisp1_isp_unregister(rkisp1);
+	return ret;
+}
+
+static irqreturn_t rkisp1_isr(int irq, void *ctx)
+{
+	struct device *dev = ctx;
+	struct rkisp1_device *rkisp1 = dev_get_drvdata(dev);
+
+	/*
+	 * Call rkisp1_capture_isr() first to handle the frame that
+	 * potentially completed using the current frame_sequence number before
+	 * it is potentially incremented by rkisp1_isp_isr() in the vertical
+	 * sync.
+	 */
+	rkisp1_capture_isr(rkisp1);
+	rkisp1_isp_isr(rkisp1);
+	rkisp1_mipi_isr(rkisp1);
+
+	return IRQ_HANDLED;
+}
+
+static const char * const rk3399_isp_clks[] = {
+	"isp",
+	"aclk",
+	"hclk",
+};
+
+static const struct rkisp1_match_data rk3399_isp_clk_data = {
+	.clks = rk3399_isp_clks,
+	.size = ARRAY_SIZE(rk3399_isp_clks),
+};
+
+static const struct of_device_id rkisp1_of_match[] = {
+	{
+		.compatible = "rockchip,rk3399-cif-isp",
+		.data = &rk3399_isp_clk_data,
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, rkisp1_of_match);
+
+static void rkisp1_debug_init(struct rkisp1_device *rkisp1)
+{
+	struct rkisp1_debug *debug = &rkisp1->debug;
+
+	debug->debugfs_dir = debugfs_create_dir(RKISP1_DRIVER_NAME, NULL);
+	if (!debug->debugfs_dir) {
+		dev_dbg(rkisp1->dev, "failed to create debugfs directory\n");
+		return;
+	}
+	debugfs_create_ulong("data_loss", 0444, debug->debugfs_dir,
+			     &debug->data_loss);
+	debugfs_create_ulong("outform_size_err", 0444,  debug->debugfs_dir,
+			     &debug->outform_size_error);
+	debugfs_create_ulong("img_stabilization_size_error", 0444,
+			     debug->debugfs_dir,
+			     &debug->img_stabilization_size_error);
+	debugfs_create_ulong("inform_size_error", 0444,  debug->debugfs_dir,
+			     &debug->inform_size_error);
+	debugfs_create_ulong("irq_delay", 0444,  debug->debugfs_dir,
+			     &debug->irq_delay);
+	debugfs_create_ulong("mipi_error", 0444, debug->debugfs_dir,
+			     &debug->mipi_error);
+	debugfs_create_ulong("stats_error", 0444, debug->debugfs_dir,
+			     &debug->stats_error);
+	debugfs_create_ulong("mp_stop_timeout", 0444, debug->debugfs_dir,
+			     &debug->stop_timeout[RKISP1_MAINPATH]);
+	debugfs_create_ulong("sp_stop_timeout", 0444, debug->debugfs_dir,
+			     &debug->stop_timeout[RKISP1_SELFPATH]);
+	debugfs_create_ulong("mp_frame_drop", 0444, debug->debugfs_dir,
+			     &debug->frame_drop[RKISP1_MAINPATH]);
+	debugfs_create_ulong("sp_frame_drop", 0444, debug->debugfs_dir,
+			     &debug->frame_drop[RKISP1_SELFPATH]);
+}
+
+static int rkisp1_probe(struct platform_device *pdev)
+{
+	const struct rkisp1_match_data *clk_data;
+	struct device *dev = &pdev->dev;
+	struct rkisp1_device *rkisp1;
+	struct v4l2_device *v4l2_dev;
+	unsigned int i;
+	int ret, irq;
+
+	clk_data = of_device_get_match_data(&pdev->dev);
+	if (!clk_data)
+		return -ENODEV;
+
+	rkisp1 = devm_kzalloc(dev, sizeof(*rkisp1), GFP_KERNEL);
+	if (!rkisp1)
+		return -ENOMEM;
+
+	dev_set_drvdata(dev, rkisp1);
+	rkisp1->dev = dev;
+
+	mutex_init(&rkisp1->stream_lock);
+
+	rkisp1->base_addr = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(rkisp1->base_addr))
+		return PTR_ERR(rkisp1->base_addr);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	ret = devm_request_irq(dev, irq, rkisp1_isr, IRQF_SHARED,
+			       dev_driver_string(dev), dev);
+	if (ret) {
+		dev_err(dev, "request irq failed: %d\n", ret);
+		return ret;
+	}
+
+	rkisp1->irq = irq;
+
+	for (i = 0; i < clk_data->size; i++)
+		rkisp1->clks[i].id = clk_data->clks[i];
+	ret = devm_clk_bulk_get(dev, clk_data->size, rkisp1->clks);
+	if (ret)
+		return ret;
+	rkisp1->clk_size = clk_data->size;
+
+	pm_runtime_enable(&pdev->dev);
+
+	strscpy(rkisp1->media_dev.model, RKISP1_DRIVER_NAME,
+		sizeof(rkisp1->media_dev.model));
+	rkisp1->media_dev.dev = &pdev->dev;
+	strscpy(rkisp1->media_dev.bus_info, RKISP1_BUS_INFO,
+		sizeof(rkisp1->media_dev.bus_info));
+	media_device_init(&rkisp1->media_dev);
+
+	v4l2_dev = &rkisp1->v4l2_dev;
+	v4l2_dev->mdev = &rkisp1->media_dev;
+	strscpy(v4l2_dev->name, RKISP1_DRIVER_NAME, sizeof(v4l2_dev->name));
+
+	ret = v4l2_device_register(rkisp1->dev, &rkisp1->v4l2_dev);
+	if (ret)
+		return ret;
+
+	ret = media_device_register(&rkisp1->media_dev);
+	if (ret) {
+		dev_err(dev, "Failed to register media device: %d\n", ret);
+		goto err_unreg_v4l2_dev;
+	}
+
+	ret = rkisp1_entities_register(rkisp1);
+	if (ret)
+		goto err_unreg_media_dev;
+
+	rkisp1_debug_init(rkisp1);
+
+	return 0;
+
+err_unreg_media_dev:
+	media_device_unregister(&rkisp1->media_dev);
+err_unreg_v4l2_dev:
+	v4l2_device_unregister(&rkisp1->v4l2_dev);
+	pm_runtime_disable(&pdev->dev);
+	return ret;
+}
+
+static int rkisp1_remove(struct platform_device *pdev)
+{
+	struct rkisp1_device *rkisp1 = platform_get_drvdata(pdev);
+
+	v4l2_async_notifier_unregister(&rkisp1->notifier);
+	v4l2_async_notifier_cleanup(&rkisp1->notifier);
+
+	rkisp1_params_unregister(rkisp1);
+	rkisp1_stats_unregister(rkisp1);
+	rkisp1_capture_devs_unregister(rkisp1);
+	rkisp1_resizer_devs_unregister(rkisp1);
+	rkisp1_isp_unregister(rkisp1);
+
+	media_device_unregister(&rkisp1->media_dev);
+	v4l2_device_unregister(&rkisp1->v4l2_dev);
+
+	pm_runtime_disable(&pdev->dev);
+
+	debugfs_remove_recursive(rkisp1->debug.debugfs_dir);
+	return 0;
+}
+
+static struct platform_driver rkisp1_drv = {
+	.driver = {
+		.name = RKISP1_DRIVER_NAME,
+		.of_match_table = of_match_ptr(rkisp1_of_match),
+		.pm = &rkisp1_pm_ops,
+	},
+	.probe = rkisp1_probe,
+	.remove = rkisp1_remove,
+};
+
+module_platform_driver(rkisp1_drv);
+MODULE_DESCRIPTION("Rockchip ISP1 platform driver");
+MODULE_LICENSE("Dual MIT/GPL");
diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c
new file mode 100644
index 000000000000..889982d8ca41
--- /dev/null
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c
@@ -0,0 +1,1160 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Rockchip ISP1 Driver - ISP Subdevice
+ *
+ * Copyright (C) 2019 Collabora, Ltd.
+ *
+ * Based on Rockchip ISP1 driver by Rockchip Electronics Co., Ltd.
+ * Copyright (C) 2017 Rockchip Electronics Co., Ltd.
+ */
+
+#include <linux/iopoll.h>
+#include <linux/phy/phy.h>
+#include <linux/phy/phy-mipi-dphy.h>
+#include <linux/pm_runtime.h>
+#include <linux/videodev2.h>
+#include <linux/vmalloc.h>
+#include <media/v4l2-event.h>
+
+#include "rkisp1-common.h"
+
+#define RKISP1_DEF_SINK_PAD_FMT MEDIA_BUS_FMT_SRGGB10_1X10
+#define RKISP1_DEF_SRC_PAD_FMT MEDIA_BUS_FMT_YUYV8_2X8
+
+#define RKISP1_ISP_DEV_NAME	RKISP1_DRIVER_NAME "_isp"
+
+/*
+ * NOTE: MIPI controller and input MUX are also configured in this file.
+ * This is because ISP Subdev describes not only ISP submodule (input size,
+ * format, output size, format), but also a virtual route device.
+ */
+
+/*
+ * There are many variables named with format/frame in below code,
+ * please see here for their meaning.
+ * Cropping in the sink pad defines the image region from the sensor.
+ * Cropping in the source pad defines the region for the Image Stabilizer (IS)
+ *
+ * Cropping regions of ISP
+ *
+ * +---------------------------------------------------------+
+ * | Sensor image                                            |
+ * | +---------------------------------------------------+   |
+ * | | CIF_ISP_ACQ (for black level)                     |   |
+ * | | sink pad format                                   |   |
+ * | | +--------------------------------------------+    |   |
+ * | | |    CIF_ISP_OUT                             |    |   |
+ * | | |    sink pad crop                           |    |   |
+ * | | |    +---------------------------------+     |    |   |
+ * | | |    |   CIF_ISP_IS                    |     |    |   |
+ * | | |    |   source pad crop and format    |     |    |   |
+ * | | |    +---------------------------------+     |    |   |
+ * | | +--------------------------------------------+    |   |
+ * | +---------------------------------------------------+   |
+ * +---------------------------------------------------------+
+ */
+
+static const struct rkisp1_isp_mbus_info rkisp1_isp_formats[] = {
+	{
+		.mbus_code	= MEDIA_BUS_FMT_YUYV8_2X8,
+		.pixel_enc	= V4L2_PIXEL_ENC_YUV,
+		.direction	= RKISP1_ISP_SD_SRC,
+	}, {
+		.mbus_code	= MEDIA_BUS_FMT_SRGGB10_1X10,
+		.pixel_enc	= V4L2_PIXEL_ENC_BAYER,
+		.mipi_dt	= RKISP1_CIF_CSI2_DT_RAW10,
+		.bayer_pat	= RKISP1_RAW_RGGB,
+		.bus_width	= 10,
+		.direction	= RKISP1_ISP_SD_SINK | RKISP1_ISP_SD_SRC,
+	}, {
+		.mbus_code	= MEDIA_BUS_FMT_SBGGR10_1X10,
+		.pixel_enc	= V4L2_PIXEL_ENC_BAYER,
+		.mipi_dt	= RKISP1_CIF_CSI2_DT_RAW10,
+		.bayer_pat	= RKISP1_RAW_BGGR,
+		.bus_width	= 10,
+		.direction	= RKISP1_ISP_SD_SINK | RKISP1_ISP_SD_SRC,
+	}, {
+		.mbus_code	= MEDIA_BUS_FMT_SGBRG10_1X10,
+		.pixel_enc	= V4L2_PIXEL_ENC_BAYER,
+		.mipi_dt	= RKISP1_CIF_CSI2_DT_RAW10,
+		.bayer_pat	= RKISP1_RAW_GBRG,
+		.bus_width	= 10,
+		.direction	= RKISP1_ISP_SD_SINK | RKISP1_ISP_SD_SRC,
+	}, {
+		.mbus_code	= MEDIA_BUS_FMT_SGRBG10_1X10,
+		.pixel_enc	= V4L2_PIXEL_ENC_BAYER,
+		.mipi_dt	= RKISP1_CIF_CSI2_DT_RAW10,
+		.bayer_pat	= RKISP1_RAW_GRBG,
+		.bus_width	= 10,
+		.direction	= RKISP1_ISP_SD_SINK | RKISP1_ISP_SD_SRC,
+	}, {
+		.mbus_code	= MEDIA_BUS_FMT_SRGGB12_1X12,
+		.pixel_enc	= V4L2_PIXEL_ENC_BAYER,
+		.mipi_dt	= RKISP1_CIF_CSI2_DT_RAW12,
+		.bayer_pat	= RKISP1_RAW_RGGB,
+		.bus_width	= 12,
+		.direction	= RKISP1_ISP_SD_SINK | RKISP1_ISP_SD_SRC,
+	}, {
+		.mbus_code	= MEDIA_BUS_FMT_SBGGR12_1X12,
+		.pixel_enc	= V4L2_PIXEL_ENC_BAYER,
+		.mipi_dt	= RKISP1_CIF_CSI2_DT_RAW12,
+		.bayer_pat	= RKISP1_RAW_BGGR,
+		.bus_width	= 12,
+		.direction	= RKISP1_ISP_SD_SINK | RKISP1_ISP_SD_SRC,
+	}, {
+		.mbus_code	= MEDIA_BUS_FMT_SGBRG12_1X12,
+		.pixel_enc	= V4L2_PIXEL_ENC_BAYER,
+		.mipi_dt	= RKISP1_CIF_CSI2_DT_RAW12,
+		.bayer_pat	= RKISP1_RAW_GBRG,
+		.bus_width	= 12,
+		.direction	= RKISP1_ISP_SD_SINK | RKISP1_ISP_SD_SRC,
+	}, {
+		.mbus_code	= MEDIA_BUS_FMT_SGRBG12_1X12,
+		.pixel_enc	= V4L2_PIXEL_ENC_BAYER,
+		.mipi_dt	= RKISP1_CIF_CSI2_DT_RAW12,
+		.bayer_pat	= RKISP1_RAW_GRBG,
+		.bus_width	= 12,
+		.direction	= RKISP1_ISP_SD_SINK | RKISP1_ISP_SD_SRC,
+	}, {
+		.mbus_code	= MEDIA_BUS_FMT_SRGGB8_1X8,
+		.pixel_enc	= V4L2_PIXEL_ENC_BAYER,
+		.mipi_dt	= RKISP1_CIF_CSI2_DT_RAW8,
+		.bayer_pat	= RKISP1_RAW_RGGB,
+		.bus_width	= 8,
+		.direction	= RKISP1_ISP_SD_SINK | RKISP1_ISP_SD_SRC,
+	}, {
+		.mbus_code	= MEDIA_BUS_FMT_SBGGR8_1X8,
+		.pixel_enc	= V4L2_PIXEL_ENC_BAYER,
+		.mipi_dt	= RKISP1_CIF_CSI2_DT_RAW8,
+		.bayer_pat	= RKISP1_RAW_BGGR,
+		.bus_width	= 8,
+		.direction	= RKISP1_ISP_SD_SINK | RKISP1_ISP_SD_SRC,
+	}, {
+		.mbus_code	= MEDIA_BUS_FMT_SGBRG8_1X8,
+		.pixel_enc	= V4L2_PIXEL_ENC_BAYER,
+		.mipi_dt	= RKISP1_CIF_CSI2_DT_RAW8,
+		.bayer_pat	= RKISP1_RAW_GBRG,
+		.bus_width	= 8,
+		.direction	= RKISP1_ISP_SD_SINK | RKISP1_ISP_SD_SRC,
+	}, {
+		.mbus_code	= MEDIA_BUS_FMT_SGRBG8_1X8,
+		.pixel_enc	= V4L2_PIXEL_ENC_BAYER,
+		.mipi_dt	= RKISP1_CIF_CSI2_DT_RAW8,
+		.bayer_pat	= RKISP1_RAW_GRBG,
+		.bus_width	= 8,
+		.direction	= RKISP1_ISP_SD_SINK | RKISP1_ISP_SD_SRC,
+	}, {
+		.mbus_code	= MEDIA_BUS_FMT_YUYV8_1X16,
+		.pixel_enc	= V4L2_PIXEL_ENC_YUV,
+		.mipi_dt	= RKISP1_CIF_CSI2_DT_YUV422_8b,
+		.yuv_seq	= RKISP1_CIF_ISP_ACQ_PROP_YCBYCR,
+		.bus_width	= 16,
+		.direction	= RKISP1_ISP_SD_SINK,
+	}, {
+		.mbus_code	= MEDIA_BUS_FMT_YVYU8_1X16,
+		.pixel_enc	= V4L2_PIXEL_ENC_YUV,
+		.mipi_dt	= RKISP1_CIF_CSI2_DT_YUV422_8b,
+		.yuv_seq	= RKISP1_CIF_ISP_ACQ_PROP_YCRYCB,
+		.bus_width	= 16,
+		.direction	= RKISP1_ISP_SD_SINK,
+	}, {
+		.mbus_code	= MEDIA_BUS_FMT_UYVY8_1X16,
+		.pixel_enc	= V4L2_PIXEL_ENC_YUV,
+		.mipi_dt	= RKISP1_CIF_CSI2_DT_YUV422_8b,
+		.yuv_seq	= RKISP1_CIF_ISP_ACQ_PROP_CBYCRY,
+		.bus_width	= 16,
+		.direction	= RKISP1_ISP_SD_SINK,
+	}, {
+		.mbus_code	= MEDIA_BUS_FMT_VYUY8_1X16,
+		.pixel_enc	= V4L2_PIXEL_ENC_YUV,
+		.mipi_dt	= RKISP1_CIF_CSI2_DT_YUV422_8b,
+		.yuv_seq	= RKISP1_CIF_ISP_ACQ_PROP_CRYCBY,
+		.bus_width	= 16,
+		.direction	= RKISP1_ISP_SD_SINK,
+	},
+};
+
+/* ----------------------------------------------------------------------------
+ * Helpers
+ */
+
+const struct rkisp1_isp_mbus_info *rkisp1_isp_mbus_info_get(u32 mbus_code)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(rkisp1_isp_formats); i++) {
+		const struct rkisp1_isp_mbus_info *fmt = &rkisp1_isp_formats[i];
+
+		if (fmt->mbus_code == mbus_code)
+			return fmt;
+	}
+
+	return NULL;
+}
+
+static struct v4l2_subdev *rkisp1_get_remote_sensor(struct v4l2_subdev *sd)
+{
+	struct media_pad *local, *remote;
+	struct media_entity *sensor_me;
+
+	local = &sd->entity.pads[RKISP1_ISP_PAD_SINK_VIDEO];
+	remote = media_entity_remote_pad(local);
+	if (!remote)
+		return NULL;
+
+	sensor_me = remote->entity;
+	return media_entity_to_v4l2_subdev(sensor_me);
+}
+
+static struct v4l2_mbus_framefmt *
+rkisp1_isp_get_pad_fmt(struct rkisp1_isp *isp,
+		       struct v4l2_subdev_pad_config *cfg,
+		       unsigned int pad, u32 which)
+{
+	if (which == V4L2_SUBDEV_FORMAT_TRY)
+		return v4l2_subdev_get_try_format(&isp->sd, cfg, pad);
+	else
+		return v4l2_subdev_get_try_format(&isp->sd, isp->pad_cfg, pad);
+}
+
+static struct v4l2_rect *
+rkisp1_isp_get_pad_crop(struct rkisp1_isp *isp,
+			struct v4l2_subdev_pad_config *cfg,
+			unsigned int pad, u32 which)
+{
+	if (which == V4L2_SUBDEV_FORMAT_TRY)
+		return v4l2_subdev_get_try_crop(&isp->sd, cfg, pad);
+	else
+		return v4l2_subdev_get_try_crop(&isp->sd, isp->pad_cfg, pad);
+}
+
+/* ----------------------------------------------------------------------------
+ * Camera Interface registers configurations
+ */
+
+/*
+ * Image Stabilization.
+ * This should only be called when configuring CIF
+ * or at the frame end interrupt
+ */
+static void rkisp1_config_ism(struct rkisp1_device *rkisp1)
+{
+	struct v4l2_rect *src_crop =
+		rkisp1_isp_get_pad_crop(&rkisp1->isp, NULL,
+					RKISP1_ISP_PAD_SOURCE_VIDEO,
+					V4L2_SUBDEV_FORMAT_ACTIVE);
+	u32 val;
+
+	rkisp1_write(rkisp1, 0, RKISP1_CIF_ISP_IS_RECENTER);
+	rkisp1_write(rkisp1, 0, RKISP1_CIF_ISP_IS_MAX_DX);
+	rkisp1_write(rkisp1, 0, RKISP1_CIF_ISP_IS_MAX_DY);
+	rkisp1_write(rkisp1, 0, RKISP1_CIF_ISP_IS_DISPLACE);
+	rkisp1_write(rkisp1, src_crop->left, RKISP1_CIF_ISP_IS_H_OFFS);
+	rkisp1_write(rkisp1, src_crop->top, RKISP1_CIF_ISP_IS_V_OFFS);
+	rkisp1_write(rkisp1, src_crop->width, RKISP1_CIF_ISP_IS_H_SIZE);
+	rkisp1_write(rkisp1, src_crop->height, RKISP1_CIF_ISP_IS_V_SIZE);
+
+	/* IS(Image Stabilization) is always on, working as output crop */
+	rkisp1_write(rkisp1, 1, RKISP1_CIF_ISP_IS_CTRL);
+	val = rkisp1_read(rkisp1, RKISP1_CIF_ISP_CTRL);
+	val |= RKISP1_CIF_ISP_CTRL_ISP_CFG_UPD;
+	rkisp1_write(rkisp1, val, RKISP1_CIF_ISP_CTRL);
+}
+
+/*
+ * configure ISP blocks with input format, size......
+ */
+static int rkisp1_config_isp(struct rkisp1_device *rkisp1)
+{
+	u32 isp_ctrl = 0, irq_mask = 0, acq_mult = 0, signal = 0;
+	const struct rkisp1_isp_mbus_info *src_fmt, *sink_fmt;
+	struct rkisp1_sensor_async *sensor;
+	struct v4l2_mbus_framefmt *sink_frm;
+	struct v4l2_rect *sink_crop;
+
+	sensor = rkisp1->active_sensor;
+	sink_fmt = rkisp1->isp.sink_fmt;
+	src_fmt = rkisp1->isp.src_fmt;
+	sink_frm = rkisp1_isp_get_pad_fmt(&rkisp1->isp, NULL,
+					  RKISP1_ISP_PAD_SINK_VIDEO,
+					  V4L2_SUBDEV_FORMAT_ACTIVE);
+	sink_crop = rkisp1_isp_get_pad_crop(&rkisp1->isp, NULL,
+					    RKISP1_ISP_PAD_SINK_VIDEO,
+					    V4L2_SUBDEV_FORMAT_ACTIVE);
+
+	if (sink_fmt->pixel_enc == V4L2_PIXEL_ENC_BAYER) {
+		acq_mult = 1;
+		if (src_fmt->pixel_enc == V4L2_PIXEL_ENC_BAYER) {
+			if (sensor->mbus_type == V4L2_MBUS_BT656)
+				isp_ctrl = RKISP1_CIF_ISP_CTRL_ISP_MODE_RAW_PICT_ITU656;
+			else
+				isp_ctrl = RKISP1_CIF_ISP_CTRL_ISP_MODE_RAW_PICT;
+		} else {
+			rkisp1_write(rkisp1, RKISP1_CIF_ISP_DEMOSAIC_TH(0xc),
+				     RKISP1_CIF_ISP_DEMOSAIC);
+
+			if (sensor->mbus_type == V4L2_MBUS_BT656)
+				isp_ctrl = RKISP1_CIF_ISP_CTRL_ISP_MODE_BAYER_ITU656;
+			else
+				isp_ctrl = RKISP1_CIF_ISP_CTRL_ISP_MODE_BAYER_ITU601;
+		}
+	} else if (sink_fmt->pixel_enc == V4L2_PIXEL_ENC_YUV) {
+		acq_mult = 2;
+		if (sensor->mbus_type == V4L2_MBUS_CSI2_DPHY) {
+			isp_ctrl = RKISP1_CIF_ISP_CTRL_ISP_MODE_ITU601;
+		} else {
+			if (sensor->mbus_type == V4L2_MBUS_BT656)
+				isp_ctrl = RKISP1_CIF_ISP_CTRL_ISP_MODE_ITU656;
+			else
+				isp_ctrl = RKISP1_CIF_ISP_CTRL_ISP_MODE_ITU601;
+		}
+
+		irq_mask |= RKISP1_CIF_ISP_DATA_LOSS;
+	}
+
+	/* Set up input acquisition properties */
+	if (sensor->mbus_type == V4L2_MBUS_BT656 ||
+	    sensor->mbus_type == V4L2_MBUS_PARALLEL) {
+		if (sensor->mbus_flags & V4L2_MBUS_PCLK_SAMPLE_RISING)
+			signal = RKISP1_CIF_ISP_ACQ_PROP_POS_EDGE;
+	}
+
+	if (sensor->mbus_type == V4L2_MBUS_PARALLEL) {
+		if (sensor->mbus_flags & V4L2_MBUS_VSYNC_ACTIVE_LOW)
+			signal |= RKISP1_CIF_ISP_ACQ_PROP_VSYNC_LOW;
+
+		if (sensor->mbus_flags & V4L2_MBUS_HSYNC_ACTIVE_LOW)
+			signal |= RKISP1_CIF_ISP_ACQ_PROP_HSYNC_LOW;
+	}
+
+	rkisp1_write(rkisp1, isp_ctrl, RKISP1_CIF_ISP_CTRL);
+	rkisp1_write(rkisp1, signal | sink_fmt->yuv_seq |
+		     RKISP1_CIF_ISP_ACQ_PROP_BAYER_PAT(sink_fmt->bayer_pat) |
+		     RKISP1_CIF_ISP_ACQ_PROP_FIELD_SEL_ALL,
+		     RKISP1_CIF_ISP_ACQ_PROP);
+	rkisp1_write(rkisp1, 0, RKISP1_CIF_ISP_ACQ_NR_FRAMES);
+
+	/* Acquisition Size */
+	rkisp1_write(rkisp1, 0, RKISP1_CIF_ISP_ACQ_H_OFFS);
+	rkisp1_write(rkisp1, 0, RKISP1_CIF_ISP_ACQ_V_OFFS);
+	rkisp1_write(rkisp1,
+		     acq_mult * sink_frm->width, RKISP1_CIF_ISP_ACQ_H_SIZE);
+	rkisp1_write(rkisp1, sink_frm->height, RKISP1_CIF_ISP_ACQ_V_SIZE);
+
+	/* ISP Out Area */
+	rkisp1_write(rkisp1, sink_crop->left, RKISP1_CIF_ISP_OUT_H_OFFS);
+	rkisp1_write(rkisp1, sink_crop->top, RKISP1_CIF_ISP_OUT_V_OFFS);
+	rkisp1_write(rkisp1, sink_crop->width, RKISP1_CIF_ISP_OUT_H_SIZE);
+	rkisp1_write(rkisp1, sink_crop->height, RKISP1_CIF_ISP_OUT_V_SIZE);
+
+	irq_mask |= RKISP1_CIF_ISP_FRAME | RKISP1_CIF_ISP_V_START |
+		    RKISP1_CIF_ISP_PIC_SIZE_ERROR;
+	rkisp1_write(rkisp1, irq_mask, RKISP1_CIF_ISP_IMSC);
+
+	if (src_fmt->pixel_enc == V4L2_PIXEL_ENC_BAYER) {
+		rkisp1_params_disable(&rkisp1->params);
+	} else {
+		struct v4l2_mbus_framefmt *src_frm;
+
+		src_frm = rkisp1_isp_get_pad_fmt(&rkisp1->isp, NULL,
+						 RKISP1_ISP_PAD_SINK_VIDEO,
+						 V4L2_SUBDEV_FORMAT_ACTIVE);
+		rkisp1_params_configure(&rkisp1->params, sink_fmt->bayer_pat,
+					src_frm->quantization);
+	}
+
+	return 0;
+}
+
+static int rkisp1_config_dvp(struct rkisp1_device *rkisp1)
+{
+	const struct rkisp1_isp_mbus_info *sink_fmt = rkisp1->isp.sink_fmt;
+	u32 val, input_sel;
+
+	switch (sink_fmt->bus_width) {
+	case 8:
+		input_sel = RKISP1_CIF_ISP_ACQ_PROP_IN_SEL_8B_ZERO;
+		break;
+	case 10:
+		input_sel = RKISP1_CIF_ISP_ACQ_PROP_IN_SEL_10B_ZERO;
+		break;
+	case 12:
+		input_sel = RKISP1_CIF_ISP_ACQ_PROP_IN_SEL_12B;
+		break;
+	default:
+		dev_err(rkisp1->dev, "Invalid bus width\n");
+		return -EINVAL;
+	}
+
+	val = rkisp1_read(rkisp1, RKISP1_CIF_ISP_ACQ_PROP);
+	rkisp1_write(rkisp1, val | input_sel, RKISP1_CIF_ISP_ACQ_PROP);
+
+	return 0;
+}
+
+static int rkisp1_config_mipi(struct rkisp1_device *rkisp1)
+{
+	const struct rkisp1_isp_mbus_info *sink_fmt = rkisp1->isp.sink_fmt;
+	unsigned int lanes = rkisp1->active_sensor->lanes;
+	u32 mipi_ctrl;
+
+	if (lanes < 1 || lanes > 4)
+		return -EINVAL;
+
+	mipi_ctrl = RKISP1_CIF_MIPI_CTRL_NUM_LANES(lanes - 1) |
+		    RKISP1_CIF_MIPI_CTRL_SHUTDOWNLANES(0xf) |
+		    RKISP1_CIF_MIPI_CTRL_ERR_SOT_SYNC_HS_SKIP |
+		    RKISP1_CIF_MIPI_CTRL_CLOCKLANE_ENA;
+
+	rkisp1_write(rkisp1, mipi_ctrl, RKISP1_CIF_MIPI_CTRL);
+
+	/* Configure Data Type and Virtual Channel */
+	rkisp1_write(rkisp1,
+		     RKISP1_CIF_MIPI_DATA_SEL_DT(sink_fmt->mipi_dt) |
+		     RKISP1_CIF_MIPI_DATA_SEL_VC(0),
+		     RKISP1_CIF_MIPI_IMG_DATA_SEL);
+
+	/* Clear MIPI interrupts */
+	rkisp1_write(rkisp1, ~0, RKISP1_CIF_MIPI_ICR);
+	/*
+	 * Disable RKISP1_CIF_MIPI_ERR_DPHY interrupt here temporary for
+	 * isp bus may be dead when switch isp.
+	 */
+	rkisp1_write(rkisp1,
+		     RKISP1_CIF_MIPI_FRAME_END | RKISP1_CIF_MIPI_ERR_CSI |
+		     RKISP1_CIF_MIPI_ERR_DPHY |
+		     RKISP1_CIF_MIPI_SYNC_FIFO_OVFLW(0x03) |
+		     RKISP1_CIF_MIPI_ADD_DATA_OVFLW,
+		     RKISP1_CIF_MIPI_IMSC);
+
+	dev_dbg(rkisp1->dev, "\n  MIPI_CTRL 0x%08x\n"
+		"  MIPI_IMG_DATA_SEL 0x%08x\n"
+		"  MIPI_STATUS 0x%08x\n"
+		"  MIPI_IMSC 0x%08x\n",
+		rkisp1_read(rkisp1, RKISP1_CIF_MIPI_CTRL),
+		rkisp1_read(rkisp1, RKISP1_CIF_MIPI_IMG_DATA_SEL),
+		rkisp1_read(rkisp1, RKISP1_CIF_MIPI_STATUS),
+		rkisp1_read(rkisp1, RKISP1_CIF_MIPI_IMSC));
+
+	return 0;
+}
+
+/* Configure MUX */
+static int rkisp1_config_path(struct rkisp1_device *rkisp1)
+{
+	struct rkisp1_sensor_async *sensor = rkisp1->active_sensor;
+	u32 dpcl = rkisp1_read(rkisp1, RKISP1_CIF_VI_DPCL);
+	int ret = 0;
+
+	if (sensor->mbus_type == V4L2_MBUS_BT656 ||
+	    sensor->mbus_type == V4L2_MBUS_PARALLEL) {
+		ret = rkisp1_config_dvp(rkisp1);
+		dpcl |= RKISP1_CIF_VI_DPCL_IF_SEL_PARALLEL;
+	} else if (sensor->mbus_type == V4L2_MBUS_CSI2_DPHY) {
+		ret = rkisp1_config_mipi(rkisp1);
+		dpcl |= RKISP1_CIF_VI_DPCL_IF_SEL_MIPI;
+	}
+
+	rkisp1_write(rkisp1, dpcl, RKISP1_CIF_VI_DPCL);
+
+	return ret;
+}
+
+/* Hardware configure Entry */
+static int rkisp1_config_cif(struct rkisp1_device *rkisp1)
+{
+	u32 cif_id;
+	int ret;
+
+	cif_id = rkisp1_read(rkisp1, RKISP1_CIF_VI_ID);
+	dev_dbg(rkisp1->dev, "CIF_ID 0x%08x\n", cif_id);
+
+	ret = rkisp1_config_isp(rkisp1);
+	if (ret)
+		return ret;
+	ret = rkisp1_config_path(rkisp1);
+	if (ret)
+		return ret;
+	rkisp1_config_ism(rkisp1);
+
+	return 0;
+}
+
+static void rkisp1_isp_stop(struct rkisp1_device *rkisp1)
+{
+	u32 val;
+
+	/*
+	 * ISP(mi) stop in mi frame end -> Stop ISP(mipi) ->
+	 * Stop ISP(isp) ->wait for ISP isp off
+	 */
+	/* stop and clear MI, MIPI, and ISP interrupts */
+	rkisp1_write(rkisp1, 0, RKISP1_CIF_MIPI_IMSC);
+	rkisp1_write(rkisp1, ~0, RKISP1_CIF_MIPI_ICR);
+
+	rkisp1_write(rkisp1, 0, RKISP1_CIF_ISP_IMSC);
+	rkisp1_write(rkisp1, ~0, RKISP1_CIF_ISP_ICR);
+
+	rkisp1_write(rkisp1, 0, RKISP1_CIF_MI_IMSC);
+	rkisp1_write(rkisp1, ~0, RKISP1_CIF_MI_ICR);
+	val = rkisp1_read(rkisp1, RKISP1_CIF_MIPI_CTRL);
+	rkisp1_write(rkisp1, val & (~RKISP1_CIF_MIPI_CTRL_OUTPUT_ENA),
+		     RKISP1_CIF_MIPI_CTRL);
+	/* stop ISP */
+	val = rkisp1_read(rkisp1, RKISP1_CIF_ISP_CTRL);
+	val &= ~(RKISP1_CIF_ISP_CTRL_ISP_INFORM_ENABLE |
+		 RKISP1_CIF_ISP_CTRL_ISP_ENABLE);
+	rkisp1_write(rkisp1, val, RKISP1_CIF_ISP_CTRL);
+
+	val = rkisp1_read(rkisp1,	RKISP1_CIF_ISP_CTRL);
+	rkisp1_write(rkisp1, val | RKISP1_CIF_ISP_CTRL_ISP_CFG_UPD,
+		     RKISP1_CIF_ISP_CTRL);
+
+	readx_poll_timeout(readl, rkisp1->base_addr + RKISP1_CIF_ISP_RIS,
+			   val, val & RKISP1_CIF_ISP_OFF, 20, 100);
+	rkisp1_write(rkisp1,
+		     RKISP1_CIF_IRCL_MIPI_SW_RST | RKISP1_CIF_IRCL_ISP_SW_RST,
+		     RKISP1_CIF_IRCL);
+	rkisp1_write(rkisp1, 0x0, RKISP1_CIF_IRCL);
+}
+
+static void rkisp1_config_clk(struct rkisp1_device *rkisp1)
+{
+	u32 val = RKISP1_CIF_ICCL_ISP_CLK | RKISP1_CIF_ICCL_CP_CLK |
+		  RKISP1_CIF_ICCL_MRSZ_CLK | RKISP1_CIF_ICCL_SRSZ_CLK |
+		  RKISP1_CIF_ICCL_JPEG_CLK | RKISP1_CIF_ICCL_MI_CLK |
+		  RKISP1_CIF_ICCL_IE_CLK | RKISP1_CIF_ICCL_MIPI_CLK |
+		  RKISP1_CIF_ICCL_DCROP_CLK;
+
+	rkisp1_write(rkisp1, val, RKISP1_CIF_ICCL);
+}
+
+static void rkisp1_isp_start(struct rkisp1_device *rkisp1)
+{
+	struct rkisp1_sensor_async *sensor = rkisp1->active_sensor;
+	u32 val;
+
+	rkisp1_config_clk(rkisp1);
+
+	/* Activate MIPI */
+	if (sensor->mbus_type == V4L2_MBUS_CSI2_DPHY) {
+		val = rkisp1_read(rkisp1, RKISP1_CIF_MIPI_CTRL);
+		rkisp1_write(rkisp1, val | RKISP1_CIF_MIPI_CTRL_OUTPUT_ENA,
+			     RKISP1_CIF_MIPI_CTRL);
+	}
+	/* Activate ISP */
+	val = rkisp1_read(rkisp1, RKISP1_CIF_ISP_CTRL);
+	val |= RKISP1_CIF_ISP_CTRL_ISP_CFG_UPD |
+	       RKISP1_CIF_ISP_CTRL_ISP_ENABLE |
+	       RKISP1_CIF_ISP_CTRL_ISP_INFORM_ENABLE;
+	rkisp1_write(rkisp1, val, RKISP1_CIF_ISP_CTRL);
+
+	/*
+	 * CIF spec says to wait for sufficient time after enabling
+	 * the MIPI interface and before starting the sensor output.
+	 */
+	usleep_range(1000, 1200);
+}
+
+/* ----------------------------------------------------------------------------
+ * Subdev pad operations
+ */
+
+static int rkisp1_isp_enum_mbus_code(struct v4l2_subdev *sd,
+				     struct v4l2_subdev_pad_config *cfg,
+				     struct v4l2_subdev_mbus_code_enum *code)
+{
+	unsigned int i, dir;
+	int pos = 0;
+
+	if (code->pad == RKISP1_ISP_PAD_SINK_VIDEO) {
+		dir = RKISP1_ISP_SD_SINK;
+	} else if (code->pad == RKISP1_ISP_PAD_SOURCE_VIDEO) {
+		dir = RKISP1_ISP_SD_SRC;
+	} else {
+		if (code->index > 0)
+			return -EINVAL;
+		code->code = MEDIA_BUS_FMT_METADATA_FIXED;
+		return 0;
+	}
+
+	if (code->index >= ARRAY_SIZE(rkisp1_isp_formats))
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(rkisp1_isp_formats); i++) {
+		const struct rkisp1_isp_mbus_info *fmt = &rkisp1_isp_formats[i];
+
+		if (fmt->direction & dir)
+			pos++;
+
+		if (code->index == pos - 1) {
+			code->code = fmt->mbus_code;
+			if (fmt->pixel_enc == V4L2_PIXEL_ENC_YUV &&
+			    dir == RKISP1_ISP_SD_SRC)
+				code->flags =
+					V4L2_SUBDEV_MBUS_CODE_CSC_QUANTIZATION;
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static int rkisp1_isp_init_config(struct v4l2_subdev *sd,
+				  struct v4l2_subdev_pad_config *cfg)
+{
+	struct v4l2_mbus_framefmt *sink_fmt, *src_fmt;
+	struct v4l2_rect *sink_crop, *src_crop;
+
+	sink_fmt = v4l2_subdev_get_try_format(sd, cfg,
+					      RKISP1_ISP_PAD_SINK_VIDEO);
+	sink_fmt->width = RKISP1_DEFAULT_WIDTH;
+	sink_fmt->height = RKISP1_DEFAULT_HEIGHT;
+	sink_fmt->field = V4L2_FIELD_NONE;
+	sink_fmt->code = RKISP1_DEF_SINK_PAD_FMT;
+
+	sink_crop = v4l2_subdev_get_try_crop(sd, cfg,
+					     RKISP1_ISP_PAD_SINK_VIDEO);
+	sink_crop->width = RKISP1_DEFAULT_WIDTH;
+	sink_crop->height = RKISP1_DEFAULT_HEIGHT;
+	sink_crop->left = 0;
+	sink_crop->top = 0;
+
+	src_fmt = v4l2_subdev_get_try_format(sd, cfg,
+					     RKISP1_ISP_PAD_SOURCE_VIDEO);
+	*src_fmt = *sink_fmt;
+	src_fmt->code = RKISP1_DEF_SRC_PAD_FMT;
+
+	src_crop = v4l2_subdev_get_try_crop(sd, cfg,
+					    RKISP1_ISP_PAD_SOURCE_VIDEO);
+	*src_crop = *sink_crop;
+
+	sink_fmt = v4l2_subdev_get_try_format(sd, cfg,
+					      RKISP1_ISP_PAD_SINK_PARAMS);
+	src_fmt = v4l2_subdev_get_try_format(sd, cfg,
+					     RKISP1_ISP_PAD_SOURCE_STATS);
+	sink_fmt->width = 0;
+	sink_fmt->height = 0;
+	sink_fmt->field = V4L2_FIELD_NONE;
+	sink_fmt->code = MEDIA_BUS_FMT_METADATA_FIXED;
+	*src_fmt = *sink_fmt;
+
+	return 0;
+}
+
+static void rkisp1_isp_set_src_fmt(struct rkisp1_isp *isp,
+				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_mbus_framefmt *format,
+				   unsigned int which)
+{
+	const struct rkisp1_isp_mbus_info *mbus_info;
+	struct v4l2_mbus_framefmt *src_fmt;
+	const struct v4l2_rect *src_crop;
+
+	src_fmt = rkisp1_isp_get_pad_fmt(isp, cfg,
+					 RKISP1_ISP_PAD_SOURCE_VIDEO, which);
+	src_crop = rkisp1_isp_get_pad_crop(isp, cfg,
+					   RKISP1_ISP_PAD_SOURCE_VIDEO, which);
+
+	src_fmt->code = format->code;
+	mbus_info = rkisp1_isp_mbus_info_get(src_fmt->code);
+	if (!mbus_info || !(mbus_info->direction & RKISP1_ISP_SD_SRC)) {
+		src_fmt->code = RKISP1_DEF_SRC_PAD_FMT;
+		mbus_info = rkisp1_isp_mbus_info_get(src_fmt->code);
+	}
+	if (which == V4L2_SUBDEV_FORMAT_ACTIVE)
+		isp->src_fmt = mbus_info;
+	src_fmt->width  = src_crop->width;
+	src_fmt->height = src_crop->height;
+
+	/*
+	 * The CSC API is used to allow userspace to force full
+	 * quantization on YUV formats.
+	 */
+	if (format->flags & V4L2_MBUS_FRAMEFMT_SET_CSC &&
+	    format->quantization == V4L2_QUANTIZATION_FULL_RANGE &&
+	    mbus_info->pixel_enc == V4L2_PIXEL_ENC_YUV)
+		src_fmt->quantization = V4L2_QUANTIZATION_FULL_RANGE;
+	else if (mbus_info->pixel_enc == V4L2_PIXEL_ENC_YUV)
+		src_fmt->quantization = V4L2_QUANTIZATION_LIM_RANGE;
+	else
+		src_fmt->quantization = V4L2_QUANTIZATION_FULL_RANGE;
+
+	*format = *src_fmt;
+}
+
+static void rkisp1_isp_set_src_crop(struct rkisp1_isp *isp,
+				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_rect *r, unsigned int which)
+{
+	struct v4l2_mbus_framefmt *src_fmt;
+	const struct v4l2_rect *sink_crop;
+	struct v4l2_rect *src_crop;
+
+	src_crop = rkisp1_isp_get_pad_crop(isp, cfg,
+					   RKISP1_ISP_PAD_SOURCE_VIDEO,
+					   which);
+	sink_crop = rkisp1_isp_get_pad_crop(isp, cfg,
+					    RKISP1_ISP_PAD_SINK_VIDEO,
+					    which);
+
+	src_crop->left = ALIGN(r->left, 2);
+	src_crop->width = ALIGN(r->width, 2);
+	src_crop->top = r->top;
+	src_crop->height = r->height;
+	rkisp1_sd_adjust_crop_rect(src_crop, sink_crop);
+
+	*r = *src_crop;
+
+	/* Propagate to out format */
+	src_fmt = rkisp1_isp_get_pad_fmt(isp, cfg,
+					 RKISP1_ISP_PAD_SOURCE_VIDEO, which);
+	rkisp1_isp_set_src_fmt(isp, cfg, src_fmt, which);
+}
+
+static void rkisp1_isp_set_sink_crop(struct rkisp1_isp *isp,
+				     struct v4l2_subdev_pad_config *cfg,
+				     struct v4l2_rect *r, unsigned int which)
+{
+	struct v4l2_rect *sink_crop, *src_crop;
+	struct v4l2_mbus_framefmt *sink_fmt;
+
+	sink_crop = rkisp1_isp_get_pad_crop(isp, cfg, RKISP1_ISP_PAD_SINK_VIDEO,
+					    which);
+	sink_fmt = rkisp1_isp_get_pad_fmt(isp, cfg, RKISP1_ISP_PAD_SINK_VIDEO,
+					  which);
+
+	sink_crop->left = ALIGN(r->left, 2);
+	sink_crop->width = ALIGN(r->width, 2);
+	sink_crop->top = r->top;
+	sink_crop->height = r->height;
+	rkisp1_sd_adjust_crop(sink_crop, sink_fmt);
+
+	*r = *sink_crop;
+
+	/* Propagate to out crop */
+	src_crop = rkisp1_isp_get_pad_crop(isp, cfg,
+					   RKISP1_ISP_PAD_SOURCE_VIDEO, which);
+	rkisp1_isp_set_src_crop(isp, cfg, src_crop, which);
+}
+
+static void rkisp1_isp_set_sink_fmt(struct rkisp1_isp *isp,
+				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_mbus_framefmt *format,
+				    unsigned int which)
+{
+	const struct rkisp1_isp_mbus_info *mbus_info;
+	struct v4l2_mbus_framefmt *sink_fmt;
+	struct v4l2_rect *sink_crop;
+
+	sink_fmt = rkisp1_isp_get_pad_fmt(isp, cfg, RKISP1_ISP_PAD_SINK_VIDEO,
+					  which);
+	sink_fmt->code = format->code;
+	mbus_info = rkisp1_isp_mbus_info_get(sink_fmt->code);
+	if (!mbus_info || !(mbus_info->direction & RKISP1_ISP_SD_SINK)) {
+		sink_fmt->code = RKISP1_DEF_SINK_PAD_FMT;
+		mbus_info = rkisp1_isp_mbus_info_get(sink_fmt->code);
+	}
+	if (which == V4L2_SUBDEV_FORMAT_ACTIVE)
+		isp->sink_fmt = mbus_info;
+
+	sink_fmt->width = clamp_t(u32, format->width,
+				  RKISP1_ISP_MIN_WIDTH,
+				  RKISP1_ISP_MAX_WIDTH);
+	sink_fmt->height = clamp_t(u32, format->height,
+				   RKISP1_ISP_MIN_HEIGHT,
+				   RKISP1_ISP_MAX_HEIGHT);
+
+	*format = *sink_fmt;
+
+	/* Propagate to in crop */
+	sink_crop = rkisp1_isp_get_pad_crop(isp, cfg, RKISP1_ISP_PAD_SINK_VIDEO,
+					    which);
+	rkisp1_isp_set_sink_crop(isp, cfg, sink_crop, which);
+}
+
+static int rkisp1_isp_get_fmt(struct v4l2_subdev *sd,
+			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_format *fmt)
+{
+	struct rkisp1_isp *isp = container_of(sd, struct rkisp1_isp, sd);
+
+	mutex_lock(&isp->ops_lock);
+	fmt->format = *rkisp1_isp_get_pad_fmt(isp, cfg, fmt->pad, fmt->which);
+	mutex_unlock(&isp->ops_lock);
+	return 0;
+}
+
+static int rkisp1_isp_set_fmt(struct v4l2_subdev *sd,
+			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_format *fmt)
+{
+	struct rkisp1_isp *isp = container_of(sd, struct rkisp1_isp, sd);
+
+	mutex_lock(&isp->ops_lock);
+	if (fmt->pad == RKISP1_ISP_PAD_SINK_VIDEO)
+		rkisp1_isp_set_sink_fmt(isp, cfg, &fmt->format, fmt->which);
+	else if (fmt->pad == RKISP1_ISP_PAD_SOURCE_VIDEO)
+		rkisp1_isp_set_src_fmt(isp, cfg, &fmt->format, fmt->which);
+	else
+		fmt->format = *rkisp1_isp_get_pad_fmt(isp, cfg, fmt->pad,
+						      fmt->which);
+
+	mutex_unlock(&isp->ops_lock);
+	return 0;
+}
+
+static int rkisp1_isp_get_selection(struct v4l2_subdev *sd,
+				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_selection *sel)
+{
+	struct rkisp1_isp *isp = container_of(sd, struct rkisp1_isp, sd);
+	int ret = 0;
+
+	if (sel->pad != RKISP1_ISP_PAD_SOURCE_VIDEO &&
+	    sel->pad != RKISP1_ISP_PAD_SINK_VIDEO)
+		return -EINVAL;
+
+	mutex_lock(&isp->ops_lock);
+	switch (sel->target) {
+	case V4L2_SEL_TGT_CROP_BOUNDS:
+		if (sel->pad == RKISP1_ISP_PAD_SINK_VIDEO) {
+			struct v4l2_mbus_framefmt *fmt;
+
+			fmt = rkisp1_isp_get_pad_fmt(isp, cfg, sel->pad,
+						     sel->which);
+			sel->r.height = fmt->height;
+			sel->r.width = fmt->width;
+			sel->r.left = 0;
+			sel->r.top = 0;
+		} else {
+			sel->r = *rkisp1_isp_get_pad_crop(isp, cfg,
+						RKISP1_ISP_PAD_SINK_VIDEO,
+						sel->which);
+		}
+		break;
+	case V4L2_SEL_TGT_CROP:
+		sel->r = *rkisp1_isp_get_pad_crop(isp, cfg, sel->pad,
+						  sel->which);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	mutex_unlock(&isp->ops_lock);
+	return ret;
+}
+
+static int rkisp1_isp_set_selection(struct v4l2_subdev *sd,
+				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_selection *sel)
+{
+	struct rkisp1_device *rkisp1 =
+		container_of(sd->v4l2_dev, struct rkisp1_device, v4l2_dev);
+	struct rkisp1_isp *isp = container_of(sd, struct rkisp1_isp, sd);
+	int ret = 0;
+
+	if (sel->target != V4L2_SEL_TGT_CROP)
+		return -EINVAL;
+
+	dev_dbg(rkisp1->dev, "%s: pad: %d sel(%d,%d)/%dx%d\n", __func__,
+		sel->pad, sel->r.left, sel->r.top, sel->r.width, sel->r.height);
+	mutex_lock(&isp->ops_lock);
+	if (sel->pad == RKISP1_ISP_PAD_SINK_VIDEO)
+		rkisp1_isp_set_sink_crop(isp, cfg, &sel->r, sel->which);
+	else if (sel->pad == RKISP1_ISP_PAD_SOURCE_VIDEO)
+		rkisp1_isp_set_src_crop(isp, cfg, &sel->r, sel->which);
+	else
+		ret = -EINVAL;
+
+	mutex_unlock(&isp->ops_lock);
+	return ret;
+}
+
+static int rkisp1_subdev_link_validate(struct media_link *link)
+{
+	if (link->sink->index == RKISP1_ISP_PAD_SINK_PARAMS)
+		return 0;
+
+	return v4l2_subdev_link_validate(link);
+}
+
+static const struct v4l2_subdev_pad_ops rkisp1_isp_pad_ops = {
+	.enum_mbus_code = rkisp1_isp_enum_mbus_code,
+	.get_selection = rkisp1_isp_get_selection,
+	.set_selection = rkisp1_isp_set_selection,
+	.init_cfg = rkisp1_isp_init_config,
+	.get_fmt = rkisp1_isp_get_fmt,
+	.set_fmt = rkisp1_isp_set_fmt,
+	.link_validate = v4l2_subdev_link_validate_default,
+};
+
+/* ----------------------------------------------------------------------------
+ * Stream operations
+ */
+
+static int rkisp1_mipi_csi2_start(struct rkisp1_isp *isp,
+				  struct rkisp1_sensor_async *sensor)
+{
+	struct rkisp1_device *rkisp1 =
+		container_of(isp->sd.v4l2_dev, struct rkisp1_device, v4l2_dev);
+	union phy_configure_opts opts;
+	struct phy_configure_opts_mipi_dphy *cfg = &opts.mipi_dphy;
+	s64 pixel_clock;
+
+	if (!sensor->pixel_rate_ctrl) {
+		dev_warn(rkisp1->dev, "No pixel rate control in sensor subdev\n");
+		return -EPIPE;
+	}
+
+	pixel_clock = v4l2_ctrl_g_ctrl_int64(sensor->pixel_rate_ctrl);
+	if (!pixel_clock) {
+		dev_err(rkisp1->dev, "Invalid pixel rate value\n");
+		return -EINVAL;
+	}
+
+	phy_mipi_dphy_get_default_config(pixel_clock, isp->sink_fmt->bus_width,
+					 sensor->lanes, cfg);
+	phy_set_mode(sensor->dphy, PHY_MODE_MIPI_DPHY);
+	phy_configure(sensor->dphy, &opts);
+	phy_power_on(sensor->dphy);
+
+	return 0;
+}
+
+static void rkisp1_mipi_csi2_stop(struct rkisp1_sensor_async *sensor)
+{
+	phy_power_off(sensor->dphy);
+}
+
+static int rkisp1_isp_s_stream(struct v4l2_subdev *sd, int enable)
+{
+	struct rkisp1_device *rkisp1 =
+		container_of(sd->v4l2_dev, struct rkisp1_device, v4l2_dev);
+	struct rkisp1_isp *isp = &rkisp1->isp;
+	struct v4l2_subdev *sensor_sd;
+	int ret = 0;
+
+	if (!enable) {
+		rkisp1_isp_stop(rkisp1);
+		rkisp1_mipi_csi2_stop(rkisp1->active_sensor);
+		return 0;
+	}
+
+	sensor_sd = rkisp1_get_remote_sensor(sd);
+	if (!sensor_sd) {
+		dev_warn(rkisp1->dev, "No link between isp and sensor\n");
+		return -ENODEV;
+	}
+
+	rkisp1->active_sensor = container_of(sensor_sd->asd,
+					     struct rkisp1_sensor_async, asd);
+
+	if (rkisp1->active_sensor->mbus_type != V4L2_MBUS_CSI2_DPHY)
+		return -EINVAL;
+
+	rkisp1->isp.frame_sequence = -1;
+	mutex_lock(&isp->ops_lock);
+	ret = rkisp1_config_cif(rkisp1);
+	if (ret)
+		goto mutex_unlock;
+
+	ret = rkisp1_mipi_csi2_start(&rkisp1->isp, rkisp1->active_sensor);
+	if (ret)
+		goto mutex_unlock;
+
+	rkisp1_isp_start(rkisp1);
+
+mutex_unlock:
+	mutex_unlock(&isp->ops_lock);
+	return ret;
+}
+
+static int rkisp1_isp_subs_evt(struct v4l2_subdev *sd, struct v4l2_fh *fh,
+			       struct v4l2_event_subscription *sub)
+{
+	if (sub->type != V4L2_EVENT_FRAME_SYNC)
+		return -EINVAL;
+
+	/* V4L2_EVENT_FRAME_SYNC doesn't require an id, so zero should be set */
+	if (sub->id != 0)
+		return -EINVAL;
+
+	return v4l2_event_subscribe(fh, sub, 0, NULL);
+}
+
+static const struct media_entity_operations rkisp1_isp_media_ops = {
+	.link_validate = rkisp1_subdev_link_validate,
+};
+
+static const struct v4l2_subdev_video_ops rkisp1_isp_video_ops = {
+	.s_stream = rkisp1_isp_s_stream,
+};
+
+static const struct v4l2_subdev_core_ops rkisp1_isp_core_ops = {
+	.subscribe_event = rkisp1_isp_subs_evt,
+	.unsubscribe_event = v4l2_event_subdev_unsubscribe,
+};
+
+static const struct v4l2_subdev_ops rkisp1_isp_ops = {
+	.core = &rkisp1_isp_core_ops,
+	.video = &rkisp1_isp_video_ops,
+	.pad = &rkisp1_isp_pad_ops,
+};
+
+int rkisp1_isp_register(struct rkisp1_device *rkisp1)
+{
+	struct rkisp1_isp *isp = &rkisp1->isp;
+	struct media_pad *pads = isp->pads;
+	struct v4l2_subdev *sd = &isp->sd;
+	int ret;
+
+	v4l2_subdev_init(sd, &rkisp1_isp_ops);
+	sd->flags |= V4L2_SUBDEV_FL_HAS_DEVNODE | V4L2_SUBDEV_FL_HAS_EVENTS;
+	sd->entity.ops = &rkisp1_isp_media_ops;
+	sd->entity.function = MEDIA_ENT_F_PROC_VIDEO_PIXEL_FORMATTER;
+	sd->owner = THIS_MODULE;
+	strscpy(sd->name, RKISP1_ISP_DEV_NAME, sizeof(sd->name));
+
+	pads[RKISP1_ISP_PAD_SINK_VIDEO].flags = MEDIA_PAD_FL_SINK |
+						MEDIA_PAD_FL_MUST_CONNECT;
+	pads[RKISP1_ISP_PAD_SINK_PARAMS].flags = MEDIA_PAD_FL_SINK;
+	pads[RKISP1_ISP_PAD_SOURCE_VIDEO].flags = MEDIA_PAD_FL_SOURCE;
+	pads[RKISP1_ISP_PAD_SOURCE_STATS].flags = MEDIA_PAD_FL_SOURCE;
+
+	isp->sink_fmt = rkisp1_isp_mbus_info_get(RKISP1_DEF_SINK_PAD_FMT);
+	isp->src_fmt = rkisp1_isp_mbus_info_get(RKISP1_DEF_SRC_PAD_FMT);
+
+	mutex_init(&isp->ops_lock);
+	ret = media_entity_pads_init(&sd->entity, RKISP1_ISP_PAD_MAX, pads);
+	if (ret)
+		return ret;
+
+	ret = v4l2_device_register_subdev(&rkisp1->v4l2_dev, sd);
+	if (ret) {
+		dev_err(rkisp1->dev, "Failed to register isp subdev\n");
+		goto err_cleanup_media_entity;
+	}
+
+	rkisp1_isp_init_config(sd, rkisp1->isp.pad_cfg);
+	return 0;
+
+err_cleanup_media_entity:
+	media_entity_cleanup(&sd->entity);
+
+	return ret;
+}
+
+void rkisp1_isp_unregister(struct rkisp1_device *rkisp1)
+{
+	struct v4l2_subdev *sd = &rkisp1->isp.sd;
+
+	v4l2_device_unregister_subdev(sd);
+	media_entity_cleanup(&sd->entity);
+}
+
+/* ----------------------------------------------------------------------------
+ * Interrupt handlers
+ */
+
+void rkisp1_mipi_isr(struct rkisp1_device *rkisp1)
+{
+	u32 val, status;
+
+	status = rkisp1_read(rkisp1, RKISP1_CIF_MIPI_MIS);
+	if (!status)
+		return;
+
+	rkisp1_write(rkisp1, status, RKISP1_CIF_MIPI_ICR);
+
+	/*
+	 * Disable DPHY errctrl interrupt, because this dphy
+	 * erctrl signal is asserted until the next changes
+	 * of line state. This time is may be too long and cpu
+	 * is hold in this interrupt.
+	 */
+	if (status & RKISP1_CIF_MIPI_ERR_CTRL(0x0f)) {
+		val = rkisp1_read(rkisp1, RKISP1_CIF_MIPI_IMSC);
+		rkisp1_write(rkisp1, val & ~RKISP1_CIF_MIPI_ERR_CTRL(0x0f),
+			     RKISP1_CIF_MIPI_IMSC);
+		rkisp1->isp.is_dphy_errctrl_disabled = true;
+	}
+
+	/*
+	 * Enable DPHY errctrl interrupt again, if mipi have receive
+	 * the whole frame without any error.
+	 */
+	if (status == RKISP1_CIF_MIPI_FRAME_END) {
+		/*
+		 * Enable DPHY errctrl interrupt again, if mipi have receive
+		 * the whole frame without any error.
+		 */
+		if (rkisp1->isp.is_dphy_errctrl_disabled) {
+			val = rkisp1_read(rkisp1, RKISP1_CIF_MIPI_IMSC);
+			val |= RKISP1_CIF_MIPI_ERR_CTRL(0x0f);
+			rkisp1_write(rkisp1, val, RKISP1_CIF_MIPI_IMSC);
+			rkisp1->isp.is_dphy_errctrl_disabled = false;
+		}
+	} else {
+		rkisp1->debug.mipi_error++;
+	}
+}
+
+static void rkisp1_isp_queue_event_sof(struct rkisp1_isp *isp)
+{
+	struct v4l2_event event = {
+		.type = V4L2_EVENT_FRAME_SYNC,
+	};
+	event.u.frame_sync.frame_sequence = isp->frame_sequence;
+
+	v4l2_event_queue(isp->sd.devnode, &event);
+}
+
+void rkisp1_isp_isr(struct rkisp1_device *rkisp1)
+{
+	u32 status, isp_err;
+
+	status = rkisp1_read(rkisp1, RKISP1_CIF_ISP_MIS);
+	if (!status)
+		return;
+
+	rkisp1_write(rkisp1, status, RKISP1_CIF_ISP_ICR);
+
+	/* Vertical sync signal, starting generating new frame */
+	if (status & RKISP1_CIF_ISP_V_START) {
+		rkisp1->isp.frame_sequence++;
+		rkisp1_isp_queue_event_sof(&rkisp1->isp);
+		if (status & RKISP1_CIF_ISP_FRAME) {
+			WARN_ONCE(1, "irq delay is too long, buffers might not be in sync\n");
+			rkisp1->debug.irq_delay++;
+		}
+	}
+	if (status & RKISP1_CIF_ISP_PIC_SIZE_ERROR) {
+		/* Clear pic_size_error */
+		isp_err = rkisp1_read(rkisp1, RKISP1_CIF_ISP_ERR);
+		if (isp_err & RKISP1_CIF_ISP_ERR_INFORM_SIZE)
+			rkisp1->debug.inform_size_error++;
+		if (isp_err & RKISP1_CIF_ISP_ERR_IS_SIZE)
+			rkisp1->debug.img_stabilization_size_error++;
+		if (isp_err & RKISP1_CIF_ISP_ERR_OUTFORM_SIZE)
+			rkisp1->debug.outform_size_error++;
+		rkisp1_write(rkisp1, isp_err, RKISP1_CIF_ISP_ERR_CLR);
+	} else if (status & RKISP1_CIF_ISP_DATA_LOSS) {
+		/* keep track of data_loss in debugfs */
+		rkisp1->debug.data_loss++;
+	}
+
+	if (status & RKISP1_CIF_ISP_FRAME) {
+		u32 isp_ris;
+
+		/* New frame from the sensor received */
+		isp_ris = rkisp1_read(rkisp1, RKISP1_CIF_ISP_RIS);
+		if (isp_ris & RKISP1_STATS_MEAS_MASK)
+			rkisp1_stats_isr(&rkisp1->stats, isp_ris);
+		/*
+		 * Then update changed configs. Some of them involve
+		 * lot of register writes. Do those only one per frame.
+		 * Do the updates in the order of the processing flow.
+		 */
+		rkisp1_params_isr(rkisp1);
+	}
+}
diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c
new file mode 100644
index 000000000000..03f9a81df440
--- /dev/null
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c
@@ -0,0 +1,1572 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Rockchip ISP1 Driver - Params subdevice
+ *
+ * Copyright (C) 2017 Rockchip Electronics Co., Ltd.
+ */
+
+#include <media/v4l2-common.h>
+#include <media/v4l2-event.h>
+#include <media/v4l2-ioctl.h>
+#include <media/videobuf2-core.h>
+#include <media/videobuf2-vmalloc.h>	/* for ISP params */
+
+#include "rkisp1-common.h"
+
+#define RKISP1_PARAMS_DEV_NAME	RKISP1_DRIVER_NAME "_params"
+
+#define RKISP1_ISP_PARAMS_REQ_BUFS_MIN	2
+#define RKISP1_ISP_PARAMS_REQ_BUFS_MAX	8
+
+#define RKISP1_ISP_DPCC_LINE_THRESH(n) \
+			(RKISP1_CIF_ISP_DPCC_LINE_THRESH_1 + 0x14 * (n))
+#define RKISP1_ISP_DPCC_LINE_MAD_FAC(n) \
+			(RKISP1_CIF_ISP_DPCC_LINE_MAD_FAC_1 + 0x14 * (n))
+#define RKISP1_ISP_DPCC_PG_FAC(n) \
+			(RKISP1_CIF_ISP_DPCC_PG_FAC_1 + 0x14 * (n))
+#define RKISP1_ISP_DPCC_RND_THRESH(n) \
+			(RKISP1_CIF_ISP_DPCC_RND_THRESH_1 + 0x14 * (n))
+#define RKISP1_ISP_DPCC_RG_FAC(n) \
+			(RKISP1_CIF_ISP_DPCC_RG_FAC_1 + 0x14 * (n))
+#define RKISP1_ISP_CC_COEFF(n) \
+			(RKISP1_CIF_ISP_CC_COEFF_0 + (n) * 4)
+
+static inline void
+rkisp1_param_set_bits(struct rkisp1_params *params, u32 reg, u32 bit_mask)
+{
+	u32 val;
+
+	val = rkisp1_read(params->rkisp1, reg);
+	rkisp1_write(params->rkisp1, val | bit_mask, reg);
+}
+
+static inline void
+rkisp1_param_clear_bits(struct rkisp1_params *params, u32 reg, u32 bit_mask)
+{
+	u32 val;
+
+	val = rkisp1_read(params->rkisp1, reg);
+	rkisp1_write(params->rkisp1, val & ~bit_mask, reg);
+}
+
+/* ISP BP interface function */
+static void rkisp1_dpcc_config(struct rkisp1_params *params,
+			       const struct rkisp1_cif_isp_dpcc_config *arg)
+{
+	unsigned int i;
+	u32 mode;
+
+	/* avoid to override the old enable value */
+	mode = rkisp1_read(params->rkisp1, RKISP1_CIF_ISP_DPCC_MODE);
+	mode &= RKISP1_CIF_ISP_DPCC_ENA;
+	mode |= arg->mode & ~RKISP1_CIF_ISP_DPCC_ENA;
+	rkisp1_write(params->rkisp1, mode, RKISP1_CIF_ISP_DPCC_MODE);
+	rkisp1_write(params->rkisp1, arg->output_mode,
+		     RKISP1_CIF_ISP_DPCC_OUTPUT_MODE);
+	rkisp1_write(params->rkisp1, arg->set_use,
+		     RKISP1_CIF_ISP_DPCC_SET_USE);
+
+	rkisp1_write(params->rkisp1, arg->methods[0].method,
+		     RKISP1_CIF_ISP_DPCC_METHODS_SET_1);
+	rkisp1_write(params->rkisp1, arg->methods[1].method,
+		     RKISP1_CIF_ISP_DPCC_METHODS_SET_2);
+	rkisp1_write(params->rkisp1, arg->methods[2].method,
+		     RKISP1_CIF_ISP_DPCC_METHODS_SET_3);
+	for (i = 0; i < RKISP1_CIF_ISP_DPCC_METHODS_MAX; i++) {
+		rkisp1_write(params->rkisp1, arg->methods[i].line_thresh,
+			     RKISP1_ISP_DPCC_LINE_THRESH(i));
+		rkisp1_write(params->rkisp1, arg->methods[i].line_mad_fac,
+			     RKISP1_ISP_DPCC_LINE_MAD_FAC(i));
+		rkisp1_write(params->rkisp1, arg->methods[i].pg_fac,
+			     RKISP1_ISP_DPCC_PG_FAC(i));
+		rkisp1_write(params->rkisp1, arg->methods[i].rnd_thresh,
+			     RKISP1_ISP_DPCC_RND_THRESH(i));
+		rkisp1_write(params->rkisp1, arg->methods[i].rg_fac,
+			     RKISP1_ISP_DPCC_RG_FAC(i));
+	}
+
+	rkisp1_write(params->rkisp1, arg->rnd_offs,
+		     RKISP1_CIF_ISP_DPCC_RND_OFFS);
+	rkisp1_write(params->rkisp1, arg->ro_limits,
+		     RKISP1_CIF_ISP_DPCC_RO_LIMITS);
+}
+
+/* ISP black level subtraction interface function */
+static void rkisp1_bls_config(struct rkisp1_params *params,
+			      const struct rkisp1_cif_isp_bls_config *arg)
+{
+	/* avoid to override the old enable value */
+	u32 new_control;
+
+	new_control = rkisp1_read(params->rkisp1, RKISP1_CIF_ISP_BLS_CTRL);
+	new_control &= RKISP1_CIF_ISP_BLS_ENA;
+	/* fixed subtraction values */
+	if (!arg->enable_auto) {
+		const struct rkisp1_cif_isp_bls_fixed_val *pval =
+								&arg->fixed_val;
+
+		switch (params->raw_type) {
+		case RKISP1_RAW_BGGR:
+			rkisp1_write(params->rkisp1,
+				     pval->r, RKISP1_CIF_ISP_BLS_D_FIXED);
+			rkisp1_write(params->rkisp1,
+				     pval->gr, RKISP1_CIF_ISP_BLS_C_FIXED);
+			rkisp1_write(params->rkisp1,
+				     pval->gb, RKISP1_CIF_ISP_BLS_B_FIXED);
+			rkisp1_write(params->rkisp1,
+				     pval->b, RKISP1_CIF_ISP_BLS_A_FIXED);
+			break;
+		case RKISP1_RAW_GBRG:
+			rkisp1_write(params->rkisp1,
+				     pval->r, RKISP1_CIF_ISP_BLS_C_FIXED);
+			rkisp1_write(params->rkisp1,
+				     pval->gr, RKISP1_CIF_ISP_BLS_D_FIXED);
+			rkisp1_write(params->rkisp1,
+				     pval->gb, RKISP1_CIF_ISP_BLS_A_FIXED);
+			rkisp1_write(params->rkisp1,
+				     pval->b, RKISP1_CIF_ISP_BLS_B_FIXED);
+			break;
+		case RKISP1_RAW_GRBG:
+			rkisp1_write(params->rkisp1,
+				     pval->r, RKISP1_CIF_ISP_BLS_B_FIXED);
+			rkisp1_write(params->rkisp1,
+				     pval->gr, RKISP1_CIF_ISP_BLS_A_FIXED);
+			rkisp1_write(params->rkisp1,
+				     pval->gb, RKISP1_CIF_ISP_BLS_D_FIXED);
+			rkisp1_write(params->rkisp1,
+				     pval->b, RKISP1_CIF_ISP_BLS_C_FIXED);
+			break;
+		case RKISP1_RAW_RGGB:
+			rkisp1_write(params->rkisp1,
+				     pval->r, RKISP1_CIF_ISP_BLS_A_FIXED);
+			rkisp1_write(params->rkisp1,
+				     pval->gr, RKISP1_CIF_ISP_BLS_B_FIXED);
+			rkisp1_write(params->rkisp1,
+				     pval->gb, RKISP1_CIF_ISP_BLS_C_FIXED);
+			rkisp1_write(params->rkisp1,
+				     pval->b, RKISP1_CIF_ISP_BLS_D_FIXED);
+			break;
+		default:
+			break;
+		}
+
+	} else {
+		if (arg->en_windows & BIT(1)) {
+			rkisp1_write(params->rkisp1, arg->bls_window2.h_offs,
+				     RKISP1_CIF_ISP_BLS_H2_START);
+			rkisp1_write(params->rkisp1, arg->bls_window2.h_size,
+				     RKISP1_CIF_ISP_BLS_H2_STOP);
+			rkisp1_write(params->rkisp1, arg->bls_window2.v_offs,
+				     RKISP1_CIF_ISP_BLS_V2_START);
+			rkisp1_write(params->rkisp1, arg->bls_window2.v_size,
+				     RKISP1_CIF_ISP_BLS_V2_STOP);
+			new_control |= RKISP1_CIF_ISP_BLS_WINDOW_2;
+		}
+
+		if (arg->en_windows & BIT(0)) {
+			rkisp1_write(params->rkisp1, arg->bls_window1.h_offs,
+				     RKISP1_CIF_ISP_BLS_H1_START);
+			rkisp1_write(params->rkisp1, arg->bls_window1.h_size,
+				     RKISP1_CIF_ISP_BLS_H1_STOP);
+			rkisp1_write(params->rkisp1, arg->bls_window1.v_offs,
+				     RKISP1_CIF_ISP_BLS_V1_START);
+			rkisp1_write(params->rkisp1, arg->bls_window1.v_size,
+				     RKISP1_CIF_ISP_BLS_V1_STOP);
+			new_control |= RKISP1_CIF_ISP_BLS_WINDOW_1;
+		}
+
+		rkisp1_write(params->rkisp1, arg->bls_samples,
+			     RKISP1_CIF_ISP_BLS_SAMPLES);
+
+		new_control |= RKISP1_CIF_ISP_BLS_MODE_MEASURED;
+	}
+	rkisp1_write(params->rkisp1, new_control, RKISP1_CIF_ISP_BLS_CTRL);
+}
+
+/* ISP LS correction interface function */
+static void
+rkisp1_lsc_correct_matrix_config(struct rkisp1_params *params,
+				 const struct rkisp1_cif_isp_lsc_config *pconfig)
+{
+	unsigned int isp_lsc_status, sram_addr, isp_lsc_table_sel, i, j, data;
+
+	isp_lsc_status = rkisp1_read(params->rkisp1, RKISP1_CIF_ISP_LSC_STATUS);
+
+	/* RKISP1_CIF_ISP_LSC_TABLE_ADDRESS_153 = ( 17 * 18 ) >> 1 */
+	sram_addr = (isp_lsc_status & RKISP1_CIF_ISP_LSC_ACTIVE_TABLE) ?
+		    RKISP1_CIF_ISP_LSC_TABLE_ADDRESS_0 :
+		    RKISP1_CIF_ISP_LSC_TABLE_ADDRESS_153;
+	rkisp1_write(params->rkisp1, sram_addr,
+		     RKISP1_CIF_ISP_LSC_R_TABLE_ADDR);
+	rkisp1_write(params->rkisp1, sram_addr,
+		     RKISP1_CIF_ISP_LSC_GR_TABLE_ADDR);
+	rkisp1_write(params->rkisp1, sram_addr,
+		     RKISP1_CIF_ISP_LSC_GB_TABLE_ADDR);
+	rkisp1_write(params->rkisp1, sram_addr,
+		     RKISP1_CIF_ISP_LSC_B_TABLE_ADDR);
+
+	/* program data tables (table size is 9 * 17 = 153) */
+	for (i = 0; i < RKISP1_CIF_ISP_LSC_SAMPLES_MAX; i++) {
+		/*
+		 * 17 sectors with 2 values in one DWORD = 9
+		 * DWORDs (2nd value of last DWORD unused)
+		 */
+		for (j = 0; j < RKISP1_CIF_ISP_LSC_SAMPLES_MAX - 1; j += 2) {
+			data = RKISP1_CIF_ISP_LSC_TABLE_DATA(pconfig->r_data_tbl[i][j],
+							     pconfig->r_data_tbl[i][j + 1]);
+			rkisp1_write(params->rkisp1, data,
+				     RKISP1_CIF_ISP_LSC_R_TABLE_DATA);
+
+			data = RKISP1_CIF_ISP_LSC_TABLE_DATA(pconfig->gr_data_tbl[i][j],
+							     pconfig->gr_data_tbl[i][j + 1]);
+			rkisp1_write(params->rkisp1, data,
+				     RKISP1_CIF_ISP_LSC_GR_TABLE_DATA);
+
+			data = RKISP1_CIF_ISP_LSC_TABLE_DATA(pconfig->gb_data_tbl[i][j],
+							     pconfig->gb_data_tbl[i][j + 1]);
+			rkisp1_write(params->rkisp1, data,
+				     RKISP1_CIF_ISP_LSC_GB_TABLE_DATA);
+
+			data = RKISP1_CIF_ISP_LSC_TABLE_DATA(pconfig->b_data_tbl[i][j],
+							     pconfig->b_data_tbl[i][j + 1]);
+			rkisp1_write(params->rkisp1, data,
+				     RKISP1_CIF_ISP_LSC_B_TABLE_DATA);
+		}
+		data = RKISP1_CIF_ISP_LSC_TABLE_DATA(pconfig->r_data_tbl[i][j], 0);
+		rkisp1_write(params->rkisp1, data,
+			     RKISP1_CIF_ISP_LSC_R_TABLE_DATA);
+
+		data = RKISP1_CIF_ISP_LSC_TABLE_DATA(pconfig->gr_data_tbl[i][j], 0);
+		rkisp1_write(params->rkisp1, data,
+			     RKISP1_CIF_ISP_LSC_GR_TABLE_DATA);
+
+		data = RKISP1_CIF_ISP_LSC_TABLE_DATA(pconfig->gb_data_tbl[i][j], 0);
+		rkisp1_write(params->rkisp1, data,
+			     RKISP1_CIF_ISP_LSC_GB_TABLE_DATA);
+
+		data = RKISP1_CIF_ISP_LSC_TABLE_DATA(pconfig->b_data_tbl[i][j], 0);
+		rkisp1_write(params->rkisp1, data,
+			     RKISP1_CIF_ISP_LSC_B_TABLE_DATA);
+	}
+	isp_lsc_table_sel = (isp_lsc_status & RKISP1_CIF_ISP_LSC_ACTIVE_TABLE) ?
+			    RKISP1_CIF_ISP_LSC_TABLE_0 :
+			    RKISP1_CIF_ISP_LSC_TABLE_1;
+	rkisp1_write(params->rkisp1, isp_lsc_table_sel,
+		     RKISP1_CIF_ISP_LSC_TABLE_SEL);
+}
+
+static void rkisp1_lsc_config(struct rkisp1_params *params,
+			      const struct rkisp1_cif_isp_lsc_config *arg)
+{
+	unsigned int i, data;
+	u32 lsc_ctrl;
+
+	/* To config must be off , store the current status firstly */
+	lsc_ctrl = rkisp1_read(params->rkisp1, RKISP1_CIF_ISP_LSC_CTRL);
+	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_LSC_CTRL,
+				RKISP1_CIF_ISP_LSC_CTRL_ENA);
+	rkisp1_lsc_correct_matrix_config(params, arg);
+
+	for (i = 0; i < RKISP1_CIF_ISP_LSC_SECTORS_TBL_SIZE / 2; i++) {
+		/* program x size tables */
+		data = RKISP1_CIF_ISP_LSC_SECT_SIZE(arg->x_size_tbl[i * 2],
+						    arg->x_size_tbl[i * 2 + 1]);
+		rkisp1_write(params->rkisp1, data,
+			     RKISP1_CIF_ISP_LSC_XSIZE_01 + i * 4);
+
+		/* program x grad tables */
+		data = RKISP1_CIF_ISP_LSC_SECT_SIZE(arg->x_grad_tbl[i * 2],
+						    arg->x_grad_tbl[i * 2 + 1]);
+		rkisp1_write(params->rkisp1, data,
+			     RKISP1_CIF_ISP_LSC_XGRAD_01 + i * 4);
+
+		/* program y size tables */
+		data = RKISP1_CIF_ISP_LSC_SECT_SIZE(arg->y_size_tbl[i * 2],
+						    arg->y_size_tbl[i * 2 + 1]);
+		rkisp1_write(params->rkisp1, data,
+			     RKISP1_CIF_ISP_LSC_YSIZE_01 + i * 4);
+
+		/* program y grad tables */
+		data = RKISP1_CIF_ISP_LSC_SECT_SIZE(arg->y_grad_tbl[i * 2],
+						    arg->y_grad_tbl[i * 2 + 1]);
+		rkisp1_write(params->rkisp1, data,
+			     RKISP1_CIF_ISP_LSC_YGRAD_01 + i * 4);
+	}
+
+	/* restore the lsc ctrl status */
+	if (lsc_ctrl & RKISP1_CIF_ISP_LSC_CTRL_ENA) {
+		rkisp1_param_set_bits(params,
+				      RKISP1_CIF_ISP_LSC_CTRL,
+				      RKISP1_CIF_ISP_LSC_CTRL_ENA);
+	} else {
+		rkisp1_param_clear_bits(params,
+					RKISP1_CIF_ISP_LSC_CTRL,
+					RKISP1_CIF_ISP_LSC_CTRL_ENA);
+	}
+}
+
+/* ISP Filtering function */
+static void rkisp1_flt_config(struct rkisp1_params *params,
+			      const struct rkisp1_cif_isp_flt_config *arg)
+{
+	u32 filt_mode;
+
+	rkisp1_write(params->rkisp1,
+		     arg->thresh_bl0, RKISP1_CIF_ISP_FILT_THRESH_BL0);
+	rkisp1_write(params->rkisp1,
+		     arg->thresh_bl1, RKISP1_CIF_ISP_FILT_THRESH_BL1);
+	rkisp1_write(params->rkisp1,
+		     arg->thresh_sh0, RKISP1_CIF_ISP_FILT_THRESH_SH0);
+	rkisp1_write(params->rkisp1,
+		     arg->thresh_sh1, RKISP1_CIF_ISP_FILT_THRESH_SH1);
+	rkisp1_write(params->rkisp1, arg->fac_bl0, RKISP1_CIF_ISP_FILT_FAC_BL0);
+	rkisp1_write(params->rkisp1, arg->fac_bl1, RKISP1_CIF_ISP_FILT_FAC_BL1);
+	rkisp1_write(params->rkisp1, arg->fac_mid, RKISP1_CIF_ISP_FILT_FAC_MID);
+	rkisp1_write(params->rkisp1, arg->fac_sh0, RKISP1_CIF_ISP_FILT_FAC_SH0);
+	rkisp1_write(params->rkisp1, arg->fac_sh1, RKISP1_CIF_ISP_FILT_FAC_SH1);
+	rkisp1_write(params->rkisp1,
+		     arg->lum_weight, RKISP1_CIF_ISP_FILT_LUM_WEIGHT);
+
+	rkisp1_write(params->rkisp1,
+		     (arg->mode ? RKISP1_CIF_ISP_FLT_MODE_DNR : 0) |
+		     RKISP1_CIF_ISP_FLT_CHROMA_V_MODE(arg->chr_v_mode) |
+		     RKISP1_CIF_ISP_FLT_CHROMA_H_MODE(arg->chr_h_mode) |
+		     RKISP1_CIF_ISP_FLT_GREEN_STAGE1(arg->grn_stage1),
+		     RKISP1_CIF_ISP_FILT_MODE);
+
+	/* avoid to override the old enable value */
+	filt_mode = rkisp1_read(params->rkisp1, RKISP1_CIF_ISP_FILT_MODE);
+	filt_mode &= RKISP1_CIF_ISP_FLT_ENA;
+	if (arg->mode)
+		filt_mode |= RKISP1_CIF_ISP_FLT_MODE_DNR;
+	filt_mode |= RKISP1_CIF_ISP_FLT_CHROMA_V_MODE(arg->chr_v_mode) |
+		     RKISP1_CIF_ISP_FLT_CHROMA_H_MODE(arg->chr_h_mode) |
+		     RKISP1_CIF_ISP_FLT_GREEN_STAGE1(arg->grn_stage1);
+	rkisp1_write(params->rkisp1, filt_mode, RKISP1_CIF_ISP_FILT_MODE);
+}
+
+/* ISP demosaic interface function */
+static int rkisp1_bdm_config(struct rkisp1_params *params,
+			     const struct rkisp1_cif_isp_bdm_config *arg)
+{
+	u32 bdm_th;
+
+	/* avoid to override the old enable value */
+	bdm_th = rkisp1_read(params->rkisp1, RKISP1_CIF_ISP_DEMOSAIC);
+	bdm_th &= RKISP1_CIF_ISP_DEMOSAIC_BYPASS;
+	bdm_th |= arg->demosaic_th & ~RKISP1_CIF_ISP_DEMOSAIC_BYPASS;
+	/* set demosaic threshold */
+	rkisp1_write(params->rkisp1, bdm_th, RKISP1_CIF_ISP_DEMOSAIC);
+	return 0;
+}
+
+/* ISP GAMMA correction interface function */
+static void rkisp1_sdg_config(struct rkisp1_params *params,
+			      const struct rkisp1_cif_isp_sdg_config *arg)
+{
+	unsigned int i;
+
+	rkisp1_write(params->rkisp1,
+		     arg->xa_pnts.gamma_dx0, RKISP1_CIF_ISP_GAMMA_DX_LO);
+	rkisp1_write(params->rkisp1,
+		     arg->xa_pnts.gamma_dx1, RKISP1_CIF_ISP_GAMMA_DX_HI);
+
+	for (i = 0; i < RKISP1_CIF_ISP_DEGAMMA_CURVE_SIZE; i++) {
+		rkisp1_write(params->rkisp1, arg->curve_r.gamma_y[i],
+			     RKISP1_CIF_ISP_GAMMA_R_Y0 + i * 4);
+		rkisp1_write(params->rkisp1, arg->curve_g.gamma_y[i],
+			     RKISP1_CIF_ISP_GAMMA_G_Y0 + i * 4);
+		rkisp1_write(params->rkisp1, arg->curve_b.gamma_y[i],
+			     RKISP1_CIF_ISP_GAMMA_B_Y0 + i * 4);
+	}
+}
+
+/* ISP GAMMA correction interface function */
+static void rkisp1_goc_config(struct rkisp1_params *params,
+			      const struct rkisp1_cif_isp_goc_config *arg)
+{
+	unsigned int i;
+
+	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_CTRL,
+				RKISP1_CIF_ISP_CTRL_ISP_GAMMA_OUT_ENA);
+	rkisp1_write(params->rkisp1, arg->mode, RKISP1_CIF_ISP_GAMMA_OUT_MODE);
+
+	for (i = 0; i < RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES; i++)
+		rkisp1_write(params->rkisp1, arg->gamma_y[i],
+			     RKISP1_CIF_ISP_GAMMA_OUT_Y_0 + i * 4);
+}
+
+/* ISP Cross Talk */
+static void rkisp1_ctk_config(struct rkisp1_params *params,
+			      const struct rkisp1_cif_isp_ctk_config *arg)
+{
+	unsigned int i, j, k = 0;
+
+	for (i = 0; i < 3; i++)
+		for (j = 0; j < 3; j++)
+			rkisp1_write(params->rkisp1, arg->coeff[i][j],
+				     RKISP1_CIF_ISP_CT_COEFF_0 + 4 * k++);
+	for (i = 0; i < 3; i++)
+		rkisp1_write(params->rkisp1, arg->ct_offset[i],
+			     RKISP1_CIF_ISP_CT_OFFSET_R + i * 4);
+}
+
+static void rkisp1_ctk_enable(struct rkisp1_params *params, bool en)
+{
+	if (en)
+		return;
+
+	/* Write back the default values. */
+	rkisp1_write(params->rkisp1, 0x80, RKISP1_CIF_ISP_CT_COEFF_0);
+	rkisp1_write(params->rkisp1, 0, RKISP1_CIF_ISP_CT_COEFF_1);
+	rkisp1_write(params->rkisp1, 0, RKISP1_CIF_ISP_CT_COEFF_2);
+	rkisp1_write(params->rkisp1, 0, RKISP1_CIF_ISP_CT_COEFF_3);
+	rkisp1_write(params->rkisp1, 0x80, RKISP1_CIF_ISP_CT_COEFF_4);
+	rkisp1_write(params->rkisp1, 0, RKISP1_CIF_ISP_CT_COEFF_5);
+	rkisp1_write(params->rkisp1, 0, RKISP1_CIF_ISP_CT_COEFF_6);
+	rkisp1_write(params->rkisp1, 0, RKISP1_CIF_ISP_CT_COEFF_7);
+	rkisp1_write(params->rkisp1, 0x80, RKISP1_CIF_ISP_CT_COEFF_8);
+
+	rkisp1_write(params->rkisp1, 0, RKISP1_CIF_ISP_CT_OFFSET_R);
+	rkisp1_write(params->rkisp1, 0, RKISP1_CIF_ISP_CT_OFFSET_G);
+	rkisp1_write(params->rkisp1, 0, RKISP1_CIF_ISP_CT_OFFSET_B);
+}
+
+/* ISP White Balance Mode */
+static void rkisp1_awb_meas_config(struct rkisp1_params *params,
+				   const struct rkisp1_cif_isp_awb_meas_config *arg)
+{
+	u32 reg_val = 0;
+	/* based on the mode,configure the awb module */
+	if (arg->awb_mode == RKISP1_CIF_ISP_AWB_MODE_YCBCR) {
+		/* Reference Cb and Cr */
+		rkisp1_write(params->rkisp1,
+			     RKISP1_CIF_ISP_AWB_REF_CR_SET(arg->awb_ref_cr) |
+			     arg->awb_ref_cb, RKISP1_CIF_ISP_AWB_REF);
+		/* Yc Threshold */
+		rkisp1_write(params->rkisp1,
+			     RKISP1_CIF_ISP_AWB_MAX_Y_SET(arg->max_y) |
+			     RKISP1_CIF_ISP_AWB_MIN_Y_SET(arg->min_y) |
+			     RKISP1_CIF_ISP_AWB_MAX_CS_SET(arg->max_csum) |
+			     arg->min_c, RKISP1_CIF_ISP_AWB_THRESH);
+	}
+
+	reg_val = rkisp1_read(params->rkisp1, RKISP1_CIF_ISP_AWB_PROP);
+	if (arg->enable_ymax_cmp)
+		reg_val |= RKISP1_CIF_ISP_AWB_YMAX_CMP_EN;
+	else
+		reg_val &= ~RKISP1_CIF_ISP_AWB_YMAX_CMP_EN;
+	rkisp1_write(params->rkisp1, reg_val, RKISP1_CIF_ISP_AWB_PROP);
+
+	/* window offset */
+	rkisp1_write(params->rkisp1,
+		     arg->awb_wnd.v_offs, RKISP1_CIF_ISP_AWB_WND_V_OFFS);
+	rkisp1_write(params->rkisp1,
+		     arg->awb_wnd.h_offs, RKISP1_CIF_ISP_AWB_WND_H_OFFS);
+	/* AWB window size */
+	rkisp1_write(params->rkisp1,
+		     arg->awb_wnd.v_size, RKISP1_CIF_ISP_AWB_WND_V_SIZE);
+	rkisp1_write(params->rkisp1,
+		     arg->awb_wnd.h_size, RKISP1_CIF_ISP_AWB_WND_H_SIZE);
+	/* Number of frames */
+	rkisp1_write(params->rkisp1,
+		     arg->frames, RKISP1_CIF_ISP_AWB_FRAMES);
+}
+
+static void
+rkisp1_awb_meas_enable(struct rkisp1_params *params,
+		       const struct rkisp1_cif_isp_awb_meas_config *arg,
+		       bool en)
+{
+	u32 reg_val = rkisp1_read(params->rkisp1, RKISP1_CIF_ISP_AWB_PROP);
+
+	/* switch off */
+	reg_val &= RKISP1_CIF_ISP_AWB_MODE_MASK_NONE;
+
+	if (en) {
+		if (arg->awb_mode == RKISP1_CIF_ISP_AWB_MODE_RGB)
+			reg_val |= RKISP1_CIF_ISP_AWB_MODE_RGB_EN;
+		else
+			reg_val |= RKISP1_CIF_ISP_AWB_MODE_YCBCR_EN;
+
+		rkisp1_write(params->rkisp1, reg_val, RKISP1_CIF_ISP_AWB_PROP);
+
+		/* Measurements require AWB block be active. */
+		rkisp1_param_set_bits(params, RKISP1_CIF_ISP_CTRL,
+				      RKISP1_CIF_ISP_CTRL_ISP_AWB_ENA);
+	} else {
+		rkisp1_write(params->rkisp1,
+			     reg_val, RKISP1_CIF_ISP_AWB_PROP);
+		rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_CTRL,
+					RKISP1_CIF_ISP_CTRL_ISP_AWB_ENA);
+	}
+}
+
+static void
+rkisp1_awb_gain_config(struct rkisp1_params *params,
+		       const struct rkisp1_cif_isp_awb_gain_config *arg)
+{
+	rkisp1_write(params->rkisp1,
+		     RKISP1_CIF_ISP_AWB_GAIN_R_SET(arg->gain_green_r) |
+		     arg->gain_green_b, RKISP1_CIF_ISP_AWB_GAIN_G);
+
+	rkisp1_write(params->rkisp1,
+		     RKISP1_CIF_ISP_AWB_GAIN_R_SET(arg->gain_red) |
+		     arg->gain_blue, RKISP1_CIF_ISP_AWB_GAIN_RB);
+}
+
+static void rkisp1_aec_config(struct rkisp1_params *params,
+			      const struct rkisp1_cif_isp_aec_config *arg)
+{
+	unsigned int block_hsize, block_vsize;
+	u32 exp_ctrl;
+
+	/* avoid to override the old enable value */
+	exp_ctrl = rkisp1_read(params->rkisp1, RKISP1_CIF_ISP_EXP_CTRL);
+	exp_ctrl &= RKISP1_CIF_ISP_EXP_ENA;
+	if (arg->autostop)
+		exp_ctrl |= RKISP1_CIF_ISP_EXP_CTRL_AUTOSTOP;
+	if (arg->mode == RKISP1_CIF_ISP_EXP_MEASURING_MODE_1)
+		exp_ctrl |= RKISP1_CIF_ISP_EXP_CTRL_MEASMODE_1;
+	rkisp1_write(params->rkisp1, exp_ctrl, RKISP1_CIF_ISP_EXP_CTRL);
+
+	rkisp1_write(params->rkisp1,
+		     arg->meas_window.h_offs, RKISP1_CIF_ISP_EXP_H_OFFSET);
+	rkisp1_write(params->rkisp1,
+		     arg->meas_window.v_offs, RKISP1_CIF_ISP_EXP_V_OFFSET);
+
+	block_hsize = arg->meas_window.h_size /
+		      RKISP1_CIF_ISP_EXP_COLUMN_NUM - 1;
+	block_vsize = arg->meas_window.v_size /
+		      RKISP1_CIF_ISP_EXP_ROW_NUM - 1;
+
+	rkisp1_write(params->rkisp1,
+		     RKISP1_CIF_ISP_EXP_H_SIZE_SET(block_hsize),
+		     RKISP1_CIF_ISP_EXP_H_SIZE);
+	rkisp1_write(params->rkisp1,
+		     RKISP1_CIF_ISP_EXP_V_SIZE_SET(block_vsize),
+		     RKISP1_CIF_ISP_EXP_V_SIZE);
+}
+
+static void rkisp1_cproc_config(struct rkisp1_params *params,
+				const struct rkisp1_cif_isp_cproc_config *arg)
+{
+	struct rkisp1_cif_isp_isp_other_cfg *cur_other_cfg =
+		container_of(arg, struct rkisp1_cif_isp_isp_other_cfg, cproc_config);
+	struct rkisp1_cif_isp_ie_config *cur_ie_config =
+						&cur_other_cfg->ie_config;
+	u32 effect = cur_ie_config->effect;
+	u32 quantization = params->quantization;
+
+	rkisp1_write(params->rkisp1, arg->contrast, RKISP1_CIF_C_PROC_CONTRAST);
+	rkisp1_write(params->rkisp1, arg->hue, RKISP1_CIF_C_PROC_HUE);
+	rkisp1_write(params->rkisp1, arg->sat, RKISP1_CIF_C_PROC_SATURATION);
+	rkisp1_write(params->rkisp1, arg->brightness,
+		     RKISP1_CIF_C_PROC_BRIGHTNESS);
+
+	if (quantization != V4L2_QUANTIZATION_FULL_RANGE ||
+	    effect != V4L2_COLORFX_NONE) {
+		rkisp1_param_clear_bits(params, RKISP1_CIF_C_PROC_CTRL,
+					RKISP1_CIF_C_PROC_YOUT_FULL |
+					RKISP1_CIF_C_PROC_YIN_FULL |
+					RKISP1_CIF_C_PROC_COUT_FULL);
+	} else {
+		rkisp1_param_set_bits(params, RKISP1_CIF_C_PROC_CTRL,
+				      RKISP1_CIF_C_PROC_YOUT_FULL |
+				      RKISP1_CIF_C_PROC_YIN_FULL |
+				      RKISP1_CIF_C_PROC_COUT_FULL);
+	}
+}
+
+static void rkisp1_hst_config(struct rkisp1_params *params,
+			      const struct rkisp1_cif_isp_hst_config *arg)
+{
+	unsigned int block_hsize, block_vsize;
+	static const u32 hist_weight_regs[] = {
+		RKISP1_CIF_ISP_HIST_WEIGHT_00TO30,
+		RKISP1_CIF_ISP_HIST_WEIGHT_40TO21,
+		RKISP1_CIF_ISP_HIST_WEIGHT_31TO12,
+		RKISP1_CIF_ISP_HIST_WEIGHT_22TO03,
+		RKISP1_CIF_ISP_HIST_WEIGHT_13TO43,
+		RKISP1_CIF_ISP_HIST_WEIGHT_04TO34,
+		RKISP1_CIF_ISP_HIST_WEIGHT_44,
+	};
+	const u8 *weight;
+	unsigned int i;
+	u32 hist_prop;
+
+	/* avoid to override the old enable value */
+	hist_prop = rkisp1_read(params->rkisp1, RKISP1_CIF_ISP_HIST_PROP);
+	hist_prop &= RKISP1_CIF_ISP_HIST_PROP_MODE_MASK;
+	hist_prop |= RKISP1_CIF_ISP_HIST_PREDIV_SET(arg->histogram_predivider);
+	rkisp1_write(params->rkisp1, hist_prop, RKISP1_CIF_ISP_HIST_PROP);
+	rkisp1_write(params->rkisp1,
+		     arg->meas_window.h_offs,
+		     RKISP1_CIF_ISP_HIST_H_OFFS);
+	rkisp1_write(params->rkisp1,
+		     arg->meas_window.v_offs,
+		     RKISP1_CIF_ISP_HIST_V_OFFS);
+
+	block_hsize = arg->meas_window.h_size /
+		      RKISP1_CIF_ISP_HIST_COLUMN_NUM - 1;
+	block_vsize = arg->meas_window.v_size / RKISP1_CIF_ISP_HIST_ROW_NUM - 1;
+
+	rkisp1_write(params->rkisp1, block_hsize, RKISP1_CIF_ISP_HIST_H_SIZE);
+	rkisp1_write(params->rkisp1, block_vsize, RKISP1_CIF_ISP_HIST_V_SIZE);
+
+	weight = arg->hist_weight;
+	for (i = 0; i < ARRAY_SIZE(hist_weight_regs); ++i, weight += 4)
+		rkisp1_write(params->rkisp1,
+			     RKISP1_CIF_ISP_HIST_WEIGHT_SET(weight[0],
+							    weight[1],
+							    weight[2],
+							    weight[3]),
+				 hist_weight_regs[i]);
+}
+
+static void
+rkisp1_hst_enable(struct rkisp1_params *params,
+		  const struct rkisp1_cif_isp_hst_config *arg, bool en)
+{
+	if (en)	{
+		u32 hist_prop = rkisp1_read(params->rkisp1,
+					    RKISP1_CIF_ISP_HIST_PROP);
+
+		hist_prop &= ~RKISP1_CIF_ISP_HIST_PROP_MODE_MASK;
+		hist_prop |= arg->mode;
+		rkisp1_param_set_bits(params, RKISP1_CIF_ISP_HIST_PROP,
+				      hist_prop);
+	} else {
+		rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_HIST_PROP,
+					RKISP1_CIF_ISP_HIST_PROP_MODE_MASK);
+	}
+}
+
+static void rkisp1_afm_config(struct rkisp1_params *params,
+			      const struct rkisp1_cif_isp_afc_config *arg)
+{
+	size_t num_of_win = min_t(size_t, ARRAY_SIZE(arg->afm_win),
+				  arg->num_afm_win);
+	u32 afm_ctrl = rkisp1_read(params->rkisp1, RKISP1_CIF_ISP_AFM_CTRL);
+	unsigned int i;
+
+	/* Switch off to configure. */
+	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_AFM_CTRL,
+				RKISP1_CIF_ISP_AFM_ENA);
+
+	for (i = 0; i < num_of_win; i++) {
+		rkisp1_write(params->rkisp1,
+			     RKISP1_CIF_ISP_AFM_WINDOW_X(arg->afm_win[i].h_offs) |
+			     RKISP1_CIF_ISP_AFM_WINDOW_Y(arg->afm_win[i].v_offs),
+			     RKISP1_CIF_ISP_AFM_LT_A + i * 8);
+		rkisp1_write(params->rkisp1,
+			     RKISP1_CIF_ISP_AFM_WINDOW_X(arg->afm_win[i].h_size +
+							 arg->afm_win[i].h_offs) |
+			     RKISP1_CIF_ISP_AFM_WINDOW_Y(arg->afm_win[i].v_size +
+							 arg->afm_win[i].v_offs),
+			     RKISP1_CIF_ISP_AFM_RB_A + i * 8);
+	}
+	rkisp1_write(params->rkisp1, arg->thres, RKISP1_CIF_ISP_AFM_THRES);
+	rkisp1_write(params->rkisp1, arg->var_shift,
+		     RKISP1_CIF_ISP_AFM_VAR_SHIFT);
+	/* restore afm status */
+	rkisp1_write(params->rkisp1, afm_ctrl, RKISP1_CIF_ISP_AFM_CTRL);
+}
+
+static void rkisp1_ie_config(struct rkisp1_params *params,
+			     const struct rkisp1_cif_isp_ie_config *arg)
+{
+	u32 eff_ctrl;
+
+	eff_ctrl = rkisp1_read(params->rkisp1, RKISP1_CIF_IMG_EFF_CTRL);
+	eff_ctrl &= ~RKISP1_CIF_IMG_EFF_CTRL_MODE_MASK;
+
+	if (params->quantization == V4L2_QUANTIZATION_FULL_RANGE)
+		eff_ctrl |= RKISP1_CIF_IMG_EFF_CTRL_YCBCR_FULL;
+
+	switch (arg->effect) {
+	case V4L2_COLORFX_SEPIA:
+		eff_ctrl |= RKISP1_CIF_IMG_EFF_CTRL_MODE_SEPIA;
+		break;
+	case V4L2_COLORFX_SET_CBCR:
+		rkisp1_write(params->rkisp1, arg->eff_tint,
+			     RKISP1_CIF_IMG_EFF_TINT);
+		eff_ctrl |= RKISP1_CIF_IMG_EFF_CTRL_MODE_SEPIA;
+		break;
+		/*
+		 * Color selection is similar to water color(AQUA):
+		 * grayscale + selected color w threshold
+		 */
+	case V4L2_COLORFX_AQUA:
+		eff_ctrl |= RKISP1_CIF_IMG_EFF_CTRL_MODE_COLOR_SEL;
+		rkisp1_write(params->rkisp1, arg->color_sel,
+			     RKISP1_CIF_IMG_EFF_COLOR_SEL);
+		break;
+	case V4L2_COLORFX_EMBOSS:
+		eff_ctrl |= RKISP1_CIF_IMG_EFF_CTRL_MODE_EMBOSS;
+		rkisp1_write(params->rkisp1, arg->eff_mat_1,
+			     RKISP1_CIF_IMG_EFF_MAT_1);
+		rkisp1_write(params->rkisp1, arg->eff_mat_2,
+			     RKISP1_CIF_IMG_EFF_MAT_2);
+		rkisp1_write(params->rkisp1, arg->eff_mat_3,
+			     RKISP1_CIF_IMG_EFF_MAT_3);
+		break;
+	case V4L2_COLORFX_SKETCH:
+		eff_ctrl |= RKISP1_CIF_IMG_EFF_CTRL_MODE_SKETCH;
+		rkisp1_write(params->rkisp1, arg->eff_mat_3,
+			     RKISP1_CIF_IMG_EFF_MAT_3);
+		rkisp1_write(params->rkisp1, arg->eff_mat_4,
+			     RKISP1_CIF_IMG_EFF_MAT_4);
+		rkisp1_write(params->rkisp1, arg->eff_mat_5,
+			     RKISP1_CIF_IMG_EFF_MAT_5);
+		break;
+	case V4L2_COLORFX_BW:
+		eff_ctrl |= RKISP1_CIF_IMG_EFF_CTRL_MODE_BLACKWHITE;
+		break;
+	case V4L2_COLORFX_NEGATIVE:
+		eff_ctrl |= RKISP1_CIF_IMG_EFF_CTRL_MODE_NEGATIVE;
+		break;
+	default:
+		break;
+	}
+
+	rkisp1_write(params->rkisp1, eff_ctrl, RKISP1_CIF_IMG_EFF_CTRL);
+}
+
+static void rkisp1_ie_enable(struct rkisp1_params *params, bool en)
+{
+	if (en) {
+		rkisp1_param_set_bits(params, RKISP1_CIF_ICCL,
+				      RKISP1_CIF_ICCL_IE_CLK);
+		rkisp1_write(params->rkisp1, RKISP1_CIF_IMG_EFF_CTRL_ENABLE,
+			     RKISP1_CIF_IMG_EFF_CTRL);
+		rkisp1_param_set_bits(params, RKISP1_CIF_IMG_EFF_CTRL,
+				      RKISP1_CIF_IMG_EFF_CTRL_CFG_UPD);
+	} else {
+		rkisp1_param_clear_bits(params, RKISP1_CIF_IMG_EFF_CTRL,
+					RKISP1_CIF_IMG_EFF_CTRL_ENABLE);
+		rkisp1_param_clear_bits(params, RKISP1_CIF_ICCL,
+					RKISP1_CIF_ICCL_IE_CLK);
+	}
+}
+
+static void rkisp1_csm_config(struct rkisp1_params *params, bool full_range)
+{
+	static const u16 full_range_coeff[] = {
+		0x0026, 0x004b, 0x000f,
+		0x01ea, 0x01d6, 0x0040,
+		0x0040, 0x01ca, 0x01f6
+	};
+	static const u16 limited_range_coeff[] = {
+		0x0021, 0x0040, 0x000d,
+		0x01ed, 0x01db, 0x0038,
+		0x0038, 0x01d1, 0x01f7,
+	};
+	unsigned int i;
+
+	if (full_range) {
+		for (i = 0; i < ARRAY_SIZE(full_range_coeff); i++)
+			rkisp1_write(params->rkisp1, full_range_coeff[i],
+				     RKISP1_CIF_ISP_CC_COEFF_0 + i * 4);
+
+		rkisp1_param_set_bits(params, RKISP1_CIF_ISP_CTRL,
+				      RKISP1_CIF_ISP_CTRL_ISP_CSM_Y_FULL_ENA |
+				      RKISP1_CIF_ISP_CTRL_ISP_CSM_C_FULL_ENA);
+	} else {
+		for (i = 0; i < ARRAY_SIZE(limited_range_coeff); i++)
+			rkisp1_write(params->rkisp1, limited_range_coeff[i],
+				     RKISP1_CIF_ISP_CC_COEFF_0 + i * 4);
+
+		rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_CTRL,
+					RKISP1_CIF_ISP_CTRL_ISP_CSM_Y_FULL_ENA |
+					RKISP1_CIF_ISP_CTRL_ISP_CSM_C_FULL_ENA);
+	}
+}
+
+/* ISP De-noise Pre-Filter(DPF) function */
+static void rkisp1_dpf_config(struct rkisp1_params *params,
+			      const struct rkisp1_cif_isp_dpf_config *arg)
+{
+	unsigned int isp_dpf_mode, spatial_coeff, i;
+
+	switch (arg->gain.mode) {
+	case RKISP1_CIF_ISP_DPF_GAIN_USAGE_NF_GAINS:
+		isp_dpf_mode = RKISP1_CIF_ISP_DPF_MODE_USE_NF_GAIN |
+			       RKISP1_CIF_ISP_DPF_MODE_AWB_GAIN_COMP;
+		break;
+	case RKISP1_CIF_ISP_DPF_GAIN_USAGE_LSC_GAINS:
+		isp_dpf_mode = RKISP1_CIF_ISP_DPF_MODE_LSC_GAIN_COMP;
+		break;
+	case RKISP1_CIF_ISP_DPF_GAIN_USAGE_NF_LSC_GAINS:
+		isp_dpf_mode = RKISP1_CIF_ISP_DPF_MODE_USE_NF_GAIN |
+			       RKISP1_CIF_ISP_DPF_MODE_AWB_GAIN_COMP |
+			       RKISP1_CIF_ISP_DPF_MODE_LSC_GAIN_COMP;
+		break;
+	case RKISP1_CIF_ISP_DPF_GAIN_USAGE_AWB_GAINS:
+		isp_dpf_mode = RKISP1_CIF_ISP_DPF_MODE_AWB_GAIN_COMP;
+		break;
+	case RKISP1_CIF_ISP_DPF_GAIN_USAGE_AWB_LSC_GAINS:
+		isp_dpf_mode = RKISP1_CIF_ISP_DPF_MODE_LSC_GAIN_COMP |
+			       RKISP1_CIF_ISP_DPF_MODE_AWB_GAIN_COMP;
+		break;
+	case RKISP1_CIF_ISP_DPF_GAIN_USAGE_DISABLED:
+	default:
+		isp_dpf_mode = 0;
+		break;
+	}
+
+	if (arg->nll.scale_mode == RKISP1_CIF_ISP_NLL_SCALE_LOGARITHMIC)
+		isp_dpf_mode |= RKISP1_CIF_ISP_DPF_MODE_NLL_SEGMENTATION;
+	if (arg->rb_flt.fltsize == RKISP1_CIF_ISP_DPF_RB_FILTERSIZE_9x9)
+		isp_dpf_mode |= RKISP1_CIF_ISP_DPF_MODE_RB_FLTSIZE_9x9;
+	if (!arg->rb_flt.r_enable)
+		isp_dpf_mode |= RKISP1_CIF_ISP_DPF_MODE_R_FLT_DIS;
+	if (!arg->rb_flt.b_enable)
+		isp_dpf_mode |= RKISP1_CIF_ISP_DPF_MODE_B_FLT_DIS;
+	if (!arg->g_flt.gb_enable)
+		isp_dpf_mode |= RKISP1_CIF_ISP_DPF_MODE_GB_FLT_DIS;
+	if (!arg->g_flt.gr_enable)
+		isp_dpf_mode |= RKISP1_CIF_ISP_DPF_MODE_GR_FLT_DIS;
+
+	rkisp1_param_set_bits(params, RKISP1_CIF_ISP_DPF_MODE,
+			      isp_dpf_mode);
+	rkisp1_write(params->rkisp1, arg->gain.nf_b_gain,
+		     RKISP1_CIF_ISP_DPF_NF_GAIN_B);
+	rkisp1_write(params->rkisp1, arg->gain.nf_r_gain,
+		     RKISP1_CIF_ISP_DPF_NF_GAIN_R);
+	rkisp1_write(params->rkisp1, arg->gain.nf_gb_gain,
+		     RKISP1_CIF_ISP_DPF_NF_GAIN_GB);
+	rkisp1_write(params->rkisp1, arg->gain.nf_gr_gain,
+		     RKISP1_CIF_ISP_DPF_NF_GAIN_GR);
+
+	for (i = 0; i < RKISP1_CIF_ISP_DPF_MAX_NLF_COEFFS; i++) {
+		rkisp1_write(params->rkisp1, arg->nll.coeff[i],
+			     RKISP1_CIF_ISP_DPF_NULL_COEFF_0 + i * 4);
+	}
+
+	spatial_coeff = arg->g_flt.spatial_coeff[0] |
+			(arg->g_flt.spatial_coeff[1] << 8) |
+			(arg->g_flt.spatial_coeff[2] << 16) |
+			(arg->g_flt.spatial_coeff[3] << 24);
+	rkisp1_write(params->rkisp1, spatial_coeff,
+		     RKISP1_CIF_ISP_DPF_S_WEIGHT_G_1_4);
+
+	spatial_coeff = arg->g_flt.spatial_coeff[4] |
+			(arg->g_flt.spatial_coeff[5] << 8);
+	rkisp1_write(params->rkisp1, spatial_coeff,
+		     RKISP1_CIF_ISP_DPF_S_WEIGHT_G_5_6);
+
+	spatial_coeff = arg->rb_flt.spatial_coeff[0] |
+			(arg->rb_flt.spatial_coeff[1] << 8) |
+			(arg->rb_flt.spatial_coeff[2] << 16) |
+			(arg->rb_flt.spatial_coeff[3] << 24);
+	rkisp1_write(params->rkisp1, spatial_coeff,
+		     RKISP1_CIF_ISP_DPF_S_WEIGHT_RB_1_4);
+
+	spatial_coeff = arg->rb_flt.spatial_coeff[4] |
+			(arg->rb_flt.spatial_coeff[5] << 8);
+	rkisp1_write(params->rkisp1, spatial_coeff,
+		     RKISP1_CIF_ISP_DPF_S_WEIGHT_RB_5_6);
+}
+
+static void
+rkisp1_dpf_strength_config(struct rkisp1_params *params,
+			   const struct rkisp1_cif_isp_dpf_strength_config *arg)
+{
+	rkisp1_write(params->rkisp1, arg->b, RKISP1_CIF_ISP_DPF_STRENGTH_B);
+	rkisp1_write(params->rkisp1, arg->g, RKISP1_CIF_ISP_DPF_STRENGTH_G);
+	rkisp1_write(params->rkisp1, arg->r, RKISP1_CIF_ISP_DPF_STRENGTH_R);
+}
+
+static void
+rkisp1_isp_isr_other_config(struct rkisp1_params *params,
+			    const struct rkisp1_params_cfg *new_params)
+{
+	unsigned int module_en_update, module_cfg_update, module_ens;
+
+	module_en_update = new_params->module_en_update;
+	module_cfg_update = new_params->module_cfg_update;
+	module_ens = new_params->module_ens;
+
+	if ((module_en_update & RKISP1_CIF_ISP_MODULE_DPCC) ||
+	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_DPCC)) {
+		/*update dpc config */
+		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_DPCC)
+			rkisp1_dpcc_config(params,
+					   &new_params->others.dpcc_config);
+
+		if (module_en_update & RKISP1_CIF_ISP_MODULE_DPCC) {
+			if (module_ens & RKISP1_CIF_ISP_MODULE_DPCC)
+				rkisp1_param_set_bits(params,
+						      RKISP1_CIF_ISP_DPCC_MODE,
+						      RKISP1_CIF_ISP_DPCC_ENA);
+			else
+				rkisp1_param_clear_bits(params,
+							RKISP1_CIF_ISP_DPCC_MODE,
+							RKISP1_CIF_ISP_DPCC_ENA);
+		}
+	}
+
+	if ((module_en_update & RKISP1_CIF_ISP_MODULE_BLS) ||
+	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_BLS)) {
+		/* update bls config */
+		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_BLS)
+			rkisp1_bls_config(params,
+					  &new_params->others.bls_config);
+
+		if (module_en_update & RKISP1_CIF_ISP_MODULE_BLS) {
+			if (module_ens & RKISP1_CIF_ISP_MODULE_BLS)
+				rkisp1_param_set_bits(params,
+						      RKISP1_CIF_ISP_BLS_CTRL,
+						      RKISP1_CIF_ISP_BLS_ENA);
+			else
+				rkisp1_param_clear_bits(params,
+							RKISP1_CIF_ISP_BLS_CTRL,
+							RKISP1_CIF_ISP_BLS_ENA);
+		}
+	}
+
+	if ((module_en_update & RKISP1_CIF_ISP_MODULE_SDG) ||
+	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_SDG)) {
+		/* update sdg config */
+		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_SDG)
+			rkisp1_sdg_config(params,
+					  &new_params->others.sdg_config);
+
+		if (module_en_update & RKISP1_CIF_ISP_MODULE_SDG) {
+			if (module_ens & RKISP1_CIF_ISP_MODULE_SDG)
+				rkisp1_param_set_bits(params,
+						      RKISP1_CIF_ISP_CTRL,
+						      RKISP1_CIF_ISP_CTRL_ISP_GAMMA_IN_ENA);
+			else
+				rkisp1_param_clear_bits(params,
+							RKISP1_CIF_ISP_CTRL,
+							RKISP1_CIF_ISP_CTRL_ISP_GAMMA_IN_ENA);
+		}
+	}
+
+	if ((module_en_update & RKISP1_CIF_ISP_MODULE_LSC) ||
+	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_LSC)) {
+		/* update lsc config */
+		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_LSC)
+			rkisp1_lsc_config(params,
+					  &new_params->others.lsc_config);
+
+		if (module_en_update & RKISP1_CIF_ISP_MODULE_LSC) {
+			if (module_ens & RKISP1_CIF_ISP_MODULE_LSC)
+				rkisp1_param_set_bits(params,
+						      RKISP1_CIF_ISP_LSC_CTRL,
+						      RKISP1_CIF_ISP_LSC_CTRL_ENA);
+			else
+				rkisp1_param_clear_bits(params,
+							RKISP1_CIF_ISP_LSC_CTRL,
+							RKISP1_CIF_ISP_LSC_CTRL_ENA);
+		}
+	}
+
+	if ((module_en_update & RKISP1_CIF_ISP_MODULE_AWB_GAIN) ||
+	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_AWB_GAIN)) {
+		/* update awb gains */
+		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_AWB_GAIN)
+			rkisp1_awb_gain_config(params,
+					       &new_params->others.awb_gain_config);
+
+		if (module_en_update & RKISP1_CIF_ISP_MODULE_AWB_GAIN) {
+			if (module_ens & RKISP1_CIF_ISP_MODULE_AWB_GAIN)
+				rkisp1_param_set_bits(params,
+						      RKISP1_CIF_ISP_CTRL,
+						      RKISP1_CIF_ISP_CTRL_ISP_AWB_ENA);
+			else
+				rkisp1_param_clear_bits(params,
+							RKISP1_CIF_ISP_CTRL,
+							RKISP1_CIF_ISP_CTRL_ISP_AWB_ENA);
+		}
+	}
+
+	if ((module_en_update & RKISP1_CIF_ISP_MODULE_BDM) ||
+	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_BDM)) {
+		/* update bdm config */
+		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_BDM)
+			rkisp1_bdm_config(params,
+					  &new_params->others.bdm_config);
+
+		if (module_en_update & RKISP1_CIF_ISP_MODULE_BDM) {
+			if (module_ens & RKISP1_CIF_ISP_MODULE_BDM)
+				rkisp1_param_set_bits(params,
+						      RKISP1_CIF_ISP_DEMOSAIC,
+						      RKISP1_CIF_ISP_DEMOSAIC_BYPASS);
+			else
+				rkisp1_param_clear_bits(params,
+							RKISP1_CIF_ISP_DEMOSAIC,
+							RKISP1_CIF_ISP_DEMOSAIC_BYPASS);
+		}
+	}
+
+	if ((module_en_update & RKISP1_CIF_ISP_MODULE_FLT) ||
+	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_FLT)) {
+		/* update filter config */
+		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_FLT)
+			rkisp1_flt_config(params,
+					  &new_params->others.flt_config);
+
+		if (module_en_update & RKISP1_CIF_ISP_MODULE_FLT) {
+			if (module_ens & RKISP1_CIF_ISP_MODULE_FLT)
+				rkisp1_param_set_bits(params,
+						      RKISP1_CIF_ISP_FILT_MODE,
+						      RKISP1_CIF_ISP_FLT_ENA);
+			else
+				rkisp1_param_clear_bits(params,
+							RKISP1_CIF_ISP_FILT_MODE,
+							RKISP1_CIF_ISP_FLT_ENA);
+		}
+	}
+
+	if ((module_en_update & RKISP1_CIF_ISP_MODULE_CTK) ||
+	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_CTK)) {
+		/* update ctk config */
+		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_CTK)
+			rkisp1_ctk_config(params,
+					  &new_params->others.ctk_config);
+
+		if (module_en_update & RKISP1_CIF_ISP_MODULE_CTK)
+			rkisp1_ctk_enable(params,
+					  !!(module_ens & RKISP1_CIF_ISP_MODULE_CTK));
+	}
+
+	if ((module_en_update & RKISP1_CIF_ISP_MODULE_GOC) ||
+	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_GOC)) {
+		/* update goc config */
+		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_GOC)
+			rkisp1_goc_config(params,
+					  &new_params->others.goc_config);
+
+		if (module_en_update & RKISP1_CIF_ISP_MODULE_GOC) {
+			if (module_ens & RKISP1_CIF_ISP_MODULE_GOC)
+				rkisp1_param_set_bits(params,
+						      RKISP1_CIF_ISP_CTRL,
+						      RKISP1_CIF_ISP_CTRL_ISP_GAMMA_OUT_ENA);
+			else
+				rkisp1_param_clear_bits(params,
+							RKISP1_CIF_ISP_CTRL,
+							RKISP1_CIF_ISP_CTRL_ISP_GAMMA_OUT_ENA);
+		}
+	}
+
+	if ((module_en_update & RKISP1_CIF_ISP_MODULE_CPROC) ||
+	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_CPROC)) {
+		/* update cproc config */
+		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_CPROC) {
+			rkisp1_cproc_config(params,
+					    &new_params->others.cproc_config);
+		}
+
+		if (module_en_update & RKISP1_CIF_ISP_MODULE_CPROC) {
+			if (module_ens & RKISP1_CIF_ISP_MODULE_CPROC)
+				rkisp1_param_set_bits(params,
+						      RKISP1_CIF_C_PROC_CTRL,
+						      RKISP1_CIF_C_PROC_CTR_ENABLE);
+			else
+				rkisp1_param_clear_bits(params,
+							RKISP1_CIF_C_PROC_CTRL,
+							RKISP1_CIF_C_PROC_CTR_ENABLE);
+		}
+	}
+
+	if ((module_en_update & RKISP1_CIF_ISP_MODULE_IE) ||
+	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_IE)) {
+		/* update ie config */
+		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_IE)
+			rkisp1_ie_config(params,
+					 &new_params->others.ie_config);
+
+		if (module_en_update & RKISP1_CIF_ISP_MODULE_IE)
+			rkisp1_ie_enable(params,
+					 !!(module_ens & RKISP1_CIF_ISP_MODULE_IE));
+	}
+
+	if ((module_en_update & RKISP1_CIF_ISP_MODULE_DPF) ||
+	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_DPF)) {
+		/* update dpf  config */
+		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_DPF)
+			rkisp1_dpf_config(params,
+					  &new_params->others.dpf_config);
+
+		if (module_en_update & RKISP1_CIF_ISP_MODULE_DPF) {
+			if (module_ens & RKISP1_CIF_ISP_MODULE_DPF)
+				rkisp1_param_set_bits(params,
+						      RKISP1_CIF_ISP_DPF_MODE,
+						      RKISP1_CIF_ISP_DPF_MODE_EN);
+			else
+				rkisp1_param_clear_bits(params,
+							RKISP1_CIF_ISP_DPF_MODE,
+							RKISP1_CIF_ISP_DPF_MODE_EN);
+		}
+	}
+
+	if ((module_en_update & RKISP1_CIF_ISP_MODULE_DPF_STRENGTH) ||
+	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_DPF_STRENGTH)) {
+		/* update dpf strength config */
+		rkisp1_dpf_strength_config(params,
+					   &new_params->others.dpf_strength_config);
+	}
+}
+
+static void rkisp1_isp_isr_meas_config(struct rkisp1_params *params,
+				       struct  rkisp1_params_cfg *new_params)
+{
+	unsigned int module_en_update, module_cfg_update, module_ens;
+
+	module_en_update = new_params->module_en_update;
+	module_cfg_update = new_params->module_cfg_update;
+	module_ens = new_params->module_ens;
+
+	if ((module_en_update & RKISP1_CIF_ISP_MODULE_AWB) ||
+	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_AWB)) {
+		/* update awb config */
+		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_AWB)
+			rkisp1_awb_meas_config(params,
+					       &new_params->meas.awb_meas_config);
+
+		if (module_en_update & RKISP1_CIF_ISP_MODULE_AWB)
+			rkisp1_awb_meas_enable(params,
+					       &new_params->meas.awb_meas_config,
+					       !!(module_ens & RKISP1_CIF_ISP_MODULE_AWB));
+	}
+
+	if ((module_en_update & RKISP1_CIF_ISP_MODULE_AFC) ||
+	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_AFC)) {
+		/* update afc config */
+		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_AFC)
+			rkisp1_afm_config(params,
+					  &new_params->meas.afc_config);
+
+		if (module_en_update & RKISP1_CIF_ISP_MODULE_AFC) {
+			if (module_ens & RKISP1_CIF_ISP_MODULE_AFC)
+				rkisp1_param_set_bits(params,
+						      RKISP1_CIF_ISP_AFM_CTRL,
+						      RKISP1_CIF_ISP_AFM_ENA);
+			else
+				rkisp1_param_clear_bits(params,
+							RKISP1_CIF_ISP_AFM_CTRL,
+							RKISP1_CIF_ISP_AFM_ENA);
+		}
+	}
+
+	if ((module_en_update & RKISP1_CIF_ISP_MODULE_HST) ||
+	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_HST)) {
+		/* update hst config */
+		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_HST)
+			rkisp1_hst_config(params,
+					  &new_params->meas.hst_config);
+
+		if (module_en_update & RKISP1_CIF_ISP_MODULE_HST)
+			rkisp1_hst_enable(params,
+					  &new_params->meas.hst_config,
+					  !!(module_ens & RKISP1_CIF_ISP_MODULE_HST));
+	}
+
+	if ((module_en_update & RKISP1_CIF_ISP_MODULE_AEC) ||
+	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_AEC)) {
+		/* update aec config */
+		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_AEC)
+			rkisp1_aec_config(params,
+					  &new_params->meas.aec_config);
+
+		if (module_en_update & RKISP1_CIF_ISP_MODULE_AEC) {
+			if (module_ens & RKISP1_CIF_ISP_MODULE_AEC)
+				rkisp1_param_set_bits(params,
+						      RKISP1_CIF_ISP_EXP_CTRL,
+						      RKISP1_CIF_ISP_EXP_ENA);
+			else
+				rkisp1_param_clear_bits(params,
+							RKISP1_CIF_ISP_EXP_CTRL,
+							RKISP1_CIF_ISP_EXP_ENA);
+		}
+	}
+}
+
+static void rkisp1_params_apply_params_cfg(struct rkisp1_params *params,
+					   unsigned int frame_sequence)
+{
+	struct rkisp1_params_cfg *new_params;
+	struct rkisp1_buffer *cur_buf = NULL;
+
+	if (list_empty(&params->params))
+		return;
+
+	cur_buf = list_first_entry(&params->params,
+				   struct rkisp1_buffer, queue);
+
+	new_params = (struct rkisp1_params_cfg *)(cur_buf->vaddr);
+
+	rkisp1_isp_isr_other_config(params, new_params);
+	rkisp1_isp_isr_meas_config(params, new_params);
+
+	/* update shadow register immediately */
+	rkisp1_param_set_bits(params, RKISP1_CIF_ISP_CTRL, RKISP1_CIF_ISP_CTRL_ISP_CFG_UPD);
+
+	list_del(&cur_buf->queue);
+
+	cur_buf->vb.sequence = frame_sequence;
+	vb2_buffer_done(&cur_buf->vb.vb2_buf, VB2_BUF_STATE_DONE);
+}
+
+void rkisp1_params_isr(struct rkisp1_device *rkisp1)
+{
+	/*
+	 * This isr is called when the ISR finishes processing a frame (RKISP1_CIF_ISP_FRAME).
+	 * Configurations performed here will be applied on the next frame.
+	 * Since frame_sequence is updated on the vertical sync signal, we should use
+	 * frame_sequence + 1 here to indicate to userspace on which frame these parameters
+	 * are being applied.
+	 */
+	unsigned int frame_sequence = rkisp1->isp.frame_sequence + 1;
+	struct rkisp1_params *params = &rkisp1->params;
+
+	spin_lock(&params->config_lock);
+	rkisp1_params_apply_params_cfg(params, frame_sequence);
+
+	spin_unlock(&params->config_lock);
+}
+
+static const struct rkisp1_cif_isp_awb_meas_config rkisp1_awb_params_default_config = {
+	{
+		0, 0, RKISP1_DEFAULT_WIDTH, RKISP1_DEFAULT_HEIGHT
+	},
+	RKISP1_CIF_ISP_AWB_MODE_YCBCR, 200, 30, 20, 20, 0, 128, 128
+};
+
+static const struct rkisp1_cif_isp_aec_config rkisp1_aec_params_default_config = {
+	RKISP1_CIF_ISP_EXP_MEASURING_MODE_0,
+	RKISP1_CIF_ISP_EXP_CTRL_AUTOSTOP_0,
+	{
+		RKISP1_DEFAULT_WIDTH >> 2, RKISP1_DEFAULT_HEIGHT >> 2,
+		RKISP1_DEFAULT_WIDTH >> 1, RKISP1_DEFAULT_HEIGHT >> 1
+	}
+};
+
+static const struct rkisp1_cif_isp_hst_config rkisp1_hst_params_default_config = {
+	RKISP1_CIF_ISP_HISTOGRAM_MODE_RGB_COMBINED,
+	3,
+	{
+		RKISP1_DEFAULT_WIDTH >> 2, RKISP1_DEFAULT_HEIGHT >> 2,
+		RKISP1_DEFAULT_WIDTH >> 1, RKISP1_DEFAULT_HEIGHT >> 1
+	},
+	{
+		0, /* To be filled in with 0x01 at runtime. */
+	}
+};
+
+static const struct rkisp1_cif_isp_afc_config rkisp1_afc_params_default_config = {
+	1,
+	{
+		{
+			300, 225, 200, 150
+		}
+	},
+	4,
+	14
+};
+
+static void rkisp1_params_config_parameter(struct rkisp1_params *params)
+{
+	struct rkisp1_cif_isp_hst_config hst = rkisp1_hst_params_default_config;
+
+	rkisp1_awb_meas_config(params, &rkisp1_awb_params_default_config);
+	rkisp1_awb_meas_enable(params, &rkisp1_awb_params_default_config,
+			       true);
+
+	rkisp1_aec_config(params, &rkisp1_aec_params_default_config);
+	rkisp1_param_set_bits(params, RKISP1_CIF_ISP_EXP_CTRL,
+			      RKISP1_CIF_ISP_EXP_ENA);
+
+	rkisp1_afm_config(params, &rkisp1_afc_params_default_config);
+	rkisp1_param_set_bits(params, RKISP1_CIF_ISP_AFM_CTRL,
+			      RKISP1_CIF_ISP_AFM_ENA);
+
+	memset(hst.hist_weight, 0x01, sizeof(hst.hist_weight));
+	rkisp1_hst_config(params, &hst);
+	rkisp1_param_set_bits(params, RKISP1_CIF_ISP_HIST_PROP,
+			      ~RKISP1_CIF_ISP_HIST_PROP_MODE_MASK |
+			      rkisp1_hst_params_default_config.mode);
+
+	/* set the  range */
+	if (params->quantization == V4L2_QUANTIZATION_FULL_RANGE)
+		rkisp1_csm_config(params, true);
+	else
+		rkisp1_csm_config(params, false);
+
+	spin_lock_irq(&params->config_lock);
+
+	/* apply the first buffer if there is one already */
+	rkisp1_params_apply_params_cfg(params, 0);
+
+	spin_unlock_irq(&params->config_lock);
+}
+
+void rkisp1_params_configure(struct rkisp1_params *params,
+			     enum rkisp1_fmt_raw_pat_type bayer_pat,
+			     enum v4l2_quantization quantization)
+{
+	params->quantization = quantization;
+	params->raw_type = bayer_pat;
+	rkisp1_params_config_parameter(params);
+}
+
+/* Not called when the camera active, thus not isr protection. */
+void rkisp1_params_disable(struct rkisp1_params *params)
+{
+	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_DPCC_MODE,
+				RKISP1_CIF_ISP_DPCC_ENA);
+	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_LSC_CTRL,
+				RKISP1_CIF_ISP_LSC_CTRL_ENA);
+	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_BLS_CTRL,
+				RKISP1_CIF_ISP_BLS_ENA);
+	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_CTRL,
+				RKISP1_CIF_ISP_CTRL_ISP_GAMMA_IN_ENA);
+	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_CTRL,
+				RKISP1_CIF_ISP_CTRL_ISP_GAMMA_OUT_ENA);
+	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_DEMOSAIC,
+				RKISP1_CIF_ISP_DEMOSAIC_BYPASS);
+	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_FILT_MODE,
+				RKISP1_CIF_ISP_FLT_ENA);
+	rkisp1_awb_meas_enable(params, NULL, false);
+	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_CTRL,
+				RKISP1_CIF_ISP_CTRL_ISP_AWB_ENA);
+	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_EXP_CTRL,
+				RKISP1_CIF_ISP_EXP_ENA);
+	rkisp1_ctk_enable(params, false);
+	rkisp1_param_clear_bits(params, RKISP1_CIF_C_PROC_CTRL,
+				RKISP1_CIF_C_PROC_CTR_ENABLE);
+	rkisp1_hst_enable(params, NULL, false);
+	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_AFM_CTRL,
+				RKISP1_CIF_ISP_AFM_ENA);
+	rkisp1_ie_enable(params, false);
+	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_DPF_MODE,
+				RKISP1_CIF_ISP_DPF_MODE_EN);
+}
+
+static int rkisp1_params_enum_fmt_meta_out(struct file *file, void *priv,
+					   struct v4l2_fmtdesc *f)
+{
+	struct video_device *video = video_devdata(file);
+	struct rkisp1_params *params = video_get_drvdata(video);
+
+	if (f->index > 0 || f->type != video->queue->type)
+		return -EINVAL;
+
+	f->pixelformat = params->vdev_fmt.fmt.meta.dataformat;
+
+	return 0;
+}
+
+static int rkisp1_params_g_fmt_meta_out(struct file *file, void *fh,
+					struct v4l2_format *f)
+{
+	struct video_device *video = video_devdata(file);
+	struct rkisp1_params *params = video_get_drvdata(video);
+	struct v4l2_meta_format *meta = &f->fmt.meta;
+
+	if (f->type != video->queue->type)
+		return -EINVAL;
+
+	memset(meta, 0, sizeof(*meta));
+	meta->dataformat = params->vdev_fmt.fmt.meta.dataformat;
+	meta->buffersize = params->vdev_fmt.fmt.meta.buffersize;
+
+	return 0;
+}
+
+static int rkisp1_params_querycap(struct file *file,
+				  void *priv, struct v4l2_capability *cap)
+{
+	struct video_device *vdev = video_devdata(file);
+
+	strscpy(cap->driver, RKISP1_DRIVER_NAME, sizeof(cap->driver));
+	strscpy(cap->card, vdev->name, sizeof(cap->card));
+	strscpy(cap->bus_info, RKISP1_BUS_INFO, sizeof(cap->bus_info));
+
+	return 0;
+}
+
+/* ISP params video device IOCTLs */
+static const struct v4l2_ioctl_ops rkisp1_params_ioctl = {
+	.vidioc_reqbufs = vb2_ioctl_reqbufs,
+	.vidioc_querybuf = vb2_ioctl_querybuf,
+	.vidioc_create_bufs = vb2_ioctl_create_bufs,
+	.vidioc_qbuf = vb2_ioctl_qbuf,
+	.vidioc_dqbuf = vb2_ioctl_dqbuf,
+	.vidioc_prepare_buf = vb2_ioctl_prepare_buf,
+	.vidioc_expbuf = vb2_ioctl_expbuf,
+	.vidioc_streamon = vb2_ioctl_streamon,
+	.vidioc_streamoff = vb2_ioctl_streamoff,
+	.vidioc_enum_fmt_meta_out = rkisp1_params_enum_fmt_meta_out,
+	.vidioc_g_fmt_meta_out = rkisp1_params_g_fmt_meta_out,
+	.vidioc_s_fmt_meta_out = rkisp1_params_g_fmt_meta_out,
+	.vidioc_try_fmt_meta_out = rkisp1_params_g_fmt_meta_out,
+	.vidioc_querycap = rkisp1_params_querycap,
+	.vidioc_subscribe_event = v4l2_ctrl_subscribe_event,
+	.vidioc_unsubscribe_event = v4l2_event_unsubscribe,
+};
+
+static int rkisp1_params_vb2_queue_setup(struct vb2_queue *vq,
+					 unsigned int *num_buffers,
+					 unsigned int *num_planes,
+					 unsigned int sizes[],
+					 struct device *alloc_devs[])
+{
+	*num_buffers = clamp_t(u32, *num_buffers,
+			       RKISP1_ISP_PARAMS_REQ_BUFS_MIN,
+			       RKISP1_ISP_PARAMS_REQ_BUFS_MAX);
+
+	*num_planes = 1;
+
+	sizes[0] = sizeof(struct rkisp1_params_cfg);
+
+	return 0;
+}
+
+static void rkisp1_params_vb2_buf_queue(struct vb2_buffer *vb)
+{
+	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
+	struct rkisp1_buffer *params_buf =
+		container_of(vbuf, struct rkisp1_buffer, vb);
+	struct vb2_queue *vq = vb->vb2_queue;
+	struct rkisp1_params *params = vq->drv_priv;
+
+	params_buf->vaddr = vb2_plane_vaddr(vb, 0);
+	spin_lock_irq(&params->config_lock);
+	list_add_tail(&params_buf->queue, &params->params);
+	spin_unlock_irq(&params->config_lock);
+}
+
+static int rkisp1_params_vb2_buf_prepare(struct vb2_buffer *vb)
+{
+	if (vb2_plane_size(vb, 0) < sizeof(struct rkisp1_params_cfg))
+		return -EINVAL;
+
+	vb2_set_plane_payload(vb, 0, sizeof(struct rkisp1_params_cfg));
+
+	return 0;
+}
+
+static void rkisp1_params_vb2_stop_streaming(struct vb2_queue *vq)
+{
+	struct rkisp1_params *params = vq->drv_priv;
+	struct rkisp1_buffer *buf;
+	LIST_HEAD(tmp_list);
+
+	/*
+	 * we first move the buffers into a local list 'tmp_list'
+	 * and then we can iterate it and call vb2_buffer_done
+	 * without holding the lock
+	 */
+	spin_lock_irq(&params->config_lock);
+	list_splice_init(&params->params, &tmp_list);
+	spin_unlock_irq(&params->config_lock);
+
+	list_for_each_entry(buf, &tmp_list, queue)
+		vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_ERROR);
+}
+
+static struct vb2_ops rkisp1_params_vb2_ops = {
+	.queue_setup = rkisp1_params_vb2_queue_setup,
+	.wait_prepare = vb2_ops_wait_prepare,
+	.wait_finish = vb2_ops_wait_finish,
+	.buf_queue = rkisp1_params_vb2_buf_queue,
+	.buf_prepare = rkisp1_params_vb2_buf_prepare,
+	.stop_streaming = rkisp1_params_vb2_stop_streaming,
+
+};
+
+static struct v4l2_file_operations rkisp1_params_fops = {
+	.mmap = vb2_fop_mmap,
+	.unlocked_ioctl = video_ioctl2,
+	.poll = vb2_fop_poll,
+	.open = v4l2_fh_open,
+	.release = vb2_fop_release
+};
+
+static int rkisp1_params_init_vb2_queue(struct vb2_queue *q,
+					struct rkisp1_params *params)
+{
+	struct rkisp1_vdev_node *node;
+
+	node = container_of(q, struct rkisp1_vdev_node, buf_queue);
+
+	q->type = V4L2_BUF_TYPE_META_OUTPUT;
+	q->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF;
+	q->drv_priv = params;
+	q->ops = &rkisp1_params_vb2_ops;
+	q->mem_ops = &vb2_vmalloc_memops;
+	q->buf_struct_size = sizeof(struct rkisp1_buffer);
+	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
+	q->lock = &node->vlock;
+
+	return vb2_queue_init(q);
+}
+
+static void rkisp1_init_params(struct rkisp1_params *params)
+{
+	params->vdev_fmt.fmt.meta.dataformat =
+		V4L2_META_FMT_RK_ISP1_PARAMS;
+	params->vdev_fmt.fmt.meta.buffersize =
+		sizeof(struct rkisp1_params_cfg);
+}
+
+int rkisp1_params_register(struct rkisp1_device *rkisp1)
+{
+	struct rkisp1_params *params = &rkisp1->params;
+	struct rkisp1_vdev_node *node = &params->vnode;
+	struct video_device *vdev = &node->vdev;
+	int ret;
+
+	params->rkisp1 = rkisp1;
+	mutex_init(&node->vlock);
+	INIT_LIST_HEAD(&params->params);
+	spin_lock_init(&params->config_lock);
+
+	strscpy(vdev->name, RKISP1_PARAMS_DEV_NAME, sizeof(vdev->name));
+
+	video_set_drvdata(vdev, params);
+	vdev->ioctl_ops = &rkisp1_params_ioctl;
+	vdev->fops = &rkisp1_params_fops;
+	vdev->release = video_device_release_empty;
+	/*
+	 * Provide a mutex to v4l2 core. It will be used
+	 * to protect all fops and v4l2 ioctls.
+	 */
+	vdev->lock = &node->vlock;
+	vdev->v4l2_dev = &rkisp1->v4l2_dev;
+	vdev->queue = &node->buf_queue;
+	vdev->device_caps = V4L2_CAP_STREAMING | V4L2_CAP_META_OUTPUT;
+	vdev->vfl_dir = VFL_DIR_TX;
+	rkisp1_params_init_vb2_queue(vdev->queue, params);
+	rkisp1_init_params(params);
+	video_set_drvdata(vdev, params);
+
+	node->pad.flags = MEDIA_PAD_FL_SOURCE;
+	ret = media_entity_pads_init(&vdev->entity, 1, &node->pad);
+	if (ret)
+		return ret;
+	ret = video_register_device(vdev, VFL_TYPE_VIDEO, -1);
+	if (ret) {
+		dev_err(rkisp1->dev,
+			"failed to register %s, ret=%d\n", vdev->name, ret);
+		goto err_cleanup_media_entity;
+	}
+	return 0;
+err_cleanup_media_entity:
+	media_entity_cleanup(&vdev->entity);
+	return ret;
+}
+
+void rkisp1_params_unregister(struct rkisp1_device *rkisp1)
+{
+	struct rkisp1_params *params = &rkisp1->params;
+	struct rkisp1_vdev_node *node = &params->vnode;
+	struct video_device *vdev = &node->vdev;
+
+	vb2_video_unregister_device(vdev);
+	media_entity_cleanup(&vdev->entity);
+}
diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-regs.h b/drivers/media/platform/rockchip/rkisp1/rkisp1-regs.h
new file mode 100644
index 000000000000..049f6c3a11df
--- /dev/null
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-regs.h
@@ -0,0 +1,1262 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR MIT) */
+/*
+ * Rockchip ISP1 Driver - Registers header
+ *
+ * Copyright (C) 2017 Rockchip Electronics Co., Ltd.
+ */
+
+#ifndef _RKISP1_REGS_H
+#define _RKISP1_REGS_H
+
+/* ISP_CTRL */
+#define RKISP1_CIF_ISP_CTRL_ISP_ENABLE			BIT(0)
+#define RKISP1_CIF_ISP_CTRL_ISP_MODE_RAW_PICT		(0 << 1)
+#define RKISP1_CIF_ISP_CTRL_ISP_MODE_ITU656		BIT(1)
+#define RKISP1_CIF_ISP_CTRL_ISP_MODE_ITU601		(2 << 1)
+#define RKISP1_CIF_ISP_CTRL_ISP_MODE_BAYER_ITU601	(3 << 1)
+#define RKISP1_CIF_ISP_CTRL_ISP_MODE_DATA_MODE		(4 << 1)
+#define RKISP1_CIF_ISP_CTRL_ISP_MODE_BAYER_ITU656	(5 << 1)
+#define RKISP1_CIF_ISP_CTRL_ISP_MODE_RAW_PICT_ITU656	(6 << 1)
+#define RKISP1_CIF_ISP_CTRL_ISP_INFORM_ENABLE		BIT(4)
+#define RKISP1_CIF_ISP_CTRL_ISP_GAMMA_IN_ENA		BIT(6)
+#define RKISP1_CIF_ISP_CTRL_ISP_AWB_ENA			BIT(7)
+#define RKISP1_CIF_ISP_CTRL_ISP_CFG_UPD_PERMANENT	BIT(8)
+#define RKISP1_CIF_ISP_CTRL_ISP_CFG_UPD			BIT(9)
+#define RKISP1_CIF_ISP_CTRL_ISP_GEN_CFG_UPD		BIT(10)
+#define RKISP1_CIF_ISP_CTRL_ISP_GAMMA_OUT_ENA		BIT(11)
+#define RKISP1_CIF_ISP_CTRL_ISP_FLASH_MODE_ENA		BIT(12)
+#define RKISP1_CIF_ISP_CTRL_ISP_CSM_Y_FULL_ENA		BIT(13)
+#define RKISP1_CIF_ISP_CTRL_ISP_CSM_C_FULL_ENA		BIT(14)
+
+/* ISP_ACQ_PROP */
+#define RKISP1_CIF_ISP_ACQ_PROP_POS_EDGE		BIT(0)
+#define RKISP1_CIF_ISP_ACQ_PROP_HSYNC_LOW		BIT(1)
+#define RKISP1_CIF_ISP_ACQ_PROP_VSYNC_LOW		BIT(2)
+#define RKISP1_CIF_ISP_ACQ_PROP_BAYER_PAT_RGGB		(0 << 3)
+#define RKISP1_CIF_ISP_ACQ_PROP_BAYER_PAT_GRBG		BIT(3)
+#define RKISP1_CIF_ISP_ACQ_PROP_BAYER_PAT_GBRG		(2 << 3)
+#define RKISP1_CIF_ISP_ACQ_PROP_BAYER_PAT_BGGR		(3 << 3)
+#define RKISP1_CIF_ISP_ACQ_PROP_BAYER_PAT(pat)		((pat) << 3)
+#define RKISP1_CIF_ISP_ACQ_PROP_YCBYCR			(0 << 7)
+#define RKISP1_CIF_ISP_ACQ_PROP_YCRYCB			BIT(7)
+#define RKISP1_CIF_ISP_ACQ_PROP_CBYCRY			(2 << 7)
+#define RKISP1_CIF_ISP_ACQ_PROP_CRYCBY			(3 << 7)
+#define RKISP1_CIF_ISP_ACQ_PROP_FIELD_SEL_ALL		(0 << 9)
+#define RKISP1_CIF_ISP_ACQ_PROP_FIELD_SEL_EVEN		BIT(9)
+#define RKISP1_CIF_ISP_ACQ_PROP_FIELD_SEL_ODD		(2 << 9)
+#define RKISP1_CIF_ISP_ACQ_PROP_IN_SEL_12B		(0 << 12)
+#define RKISP1_CIF_ISP_ACQ_PROP_IN_SEL_10B_ZERO		BIT(12)
+#define RKISP1_CIF_ISP_ACQ_PROP_IN_SEL_10B_MSB		(2 << 12)
+#define RKISP1_CIF_ISP_ACQ_PROP_IN_SEL_8B_ZERO		(3 << 12)
+#define RKISP1_CIF_ISP_ACQ_PROP_IN_SEL_8B_MSB		(4 << 12)
+
+/* VI_DPCL */
+#define RKISP1_CIF_VI_DPCL_DMA_JPEG			(0 << 0)
+#define RKISP1_CIF_VI_DPCL_MP_MUX_MRSZ_MI		BIT(0)
+#define RKISP1_CIF_VI_DPCL_MP_MUX_MRSZ_JPEG		(2 << 0)
+#define RKISP1_CIF_VI_DPCL_CHAN_MODE_MP			BIT(2)
+#define RKISP1_CIF_VI_DPCL_CHAN_MODE_SP			(2 << 2)
+#define RKISP1_CIF_VI_DPCL_CHAN_MODE_MPSP		(3 << 2)
+#define RKISP1_CIF_VI_DPCL_DMA_SW_SPMUX			(0 << 4)
+#define RKISP1_CIF_VI_DPCL_DMA_SW_SI			BIT(4)
+#define RKISP1_CIF_VI_DPCL_DMA_SW_IE			(2 << 4)
+#define RKISP1_CIF_VI_DPCL_DMA_SW_JPEG			(3 << 4)
+#define RKISP1_CIF_VI_DPCL_DMA_SW_ISP			(4 << 4)
+#define RKISP1_CIF_VI_DPCL_IF_SEL_PARALLEL		(0 << 8)
+#define RKISP1_CIF_VI_DPCL_IF_SEL_SMIA			BIT(8)
+#define RKISP1_CIF_VI_DPCL_IF_SEL_MIPI			(2 << 8)
+#define RKISP1_CIF_VI_DPCL_DMA_IE_MUX_DMA		BIT(10)
+#define RKISP1_CIF_VI_DPCL_DMA_SP_MUX_DMA		BIT(11)
+
+/* ISP_IMSC - ISP_MIS - ISP_RIS - ISP_ICR - ISP_ISR */
+#define RKISP1_CIF_ISP_OFF				BIT(0)
+#define RKISP1_CIF_ISP_FRAME				BIT(1)
+#define RKISP1_CIF_ISP_DATA_LOSS			BIT(2)
+#define RKISP1_CIF_ISP_PIC_SIZE_ERROR			BIT(3)
+#define RKISP1_CIF_ISP_AWB_DONE				BIT(4)
+#define RKISP1_CIF_ISP_FRAME_IN				BIT(5)
+#define RKISP1_CIF_ISP_V_START				BIT(6)
+#define RKISP1_CIF_ISP_H_START				BIT(7)
+#define RKISP1_CIF_ISP_FLASH_ON				BIT(8)
+#define RKISP1_CIF_ISP_FLASH_OFF			BIT(9)
+#define RKISP1_CIF_ISP_SHUTTER_ON			BIT(10)
+#define RKISP1_CIF_ISP_SHUTTER_OFF			BIT(11)
+#define RKISP1_CIF_ISP_AFM_SUM_OF			BIT(12)
+#define RKISP1_CIF_ISP_AFM_LUM_OF			BIT(13)
+#define RKISP1_CIF_ISP_AFM_FIN				BIT(14)
+#define RKISP1_CIF_ISP_HIST_MEASURE_RDY			BIT(15)
+#define RKISP1_CIF_ISP_FLASH_CAP			BIT(17)
+#define RKISP1_CIF_ISP_EXP_END				BIT(18)
+#define RKISP1_CIF_ISP_VSM_END				BIT(19)
+
+/* ISP_ERR */
+#define RKISP1_CIF_ISP_ERR_INFORM_SIZE			BIT(0)
+#define RKISP1_CIF_ISP_ERR_IS_SIZE			BIT(1)
+#define RKISP1_CIF_ISP_ERR_OUTFORM_SIZE			BIT(2)
+
+/* MI_CTRL */
+#define RKISP1_CIF_MI_CTRL_MP_ENABLE			BIT(0)
+#define RKISP1_CIF_MI_CTRL_SP_ENABLE			(2 << 0)
+#define RKISP1_CIF_MI_CTRL_JPEG_ENABLE			(4 << 0)
+#define RKISP1_CIF_MI_CTRL_RAW_ENABLE			(8 << 0)
+#define RKISP1_CIF_MI_CTRL_HFLIP			BIT(4)
+#define RKISP1_CIF_MI_CTRL_VFLIP			BIT(5)
+#define RKISP1_CIF_MI_CTRL_ROT				BIT(6)
+#define RKISP1_CIF_MI_BYTE_SWAP				BIT(7)
+#define RKISP1_CIF_MI_SP_Y_FULL_YUV2RGB			BIT(8)
+#define RKISP1_CIF_MI_SP_CBCR_FULL_YUV2RGB		BIT(9)
+#define RKISP1_CIF_MI_SP_422NONCOSITEED			BIT(10)
+#define RKISP1_CIF_MI_MP_PINGPONG_ENABEL		BIT(11)
+#define RKISP1_CIF_MI_SP_PINGPONG_ENABEL		BIT(12)
+#define RKISP1_CIF_MI_MP_AUTOUPDATE_ENABLE		BIT(13)
+#define RKISP1_CIF_MI_SP_AUTOUPDATE_ENABLE		BIT(14)
+#define RKISP1_CIF_MI_LAST_PIXEL_SIG_ENABLE		BIT(15)
+#define RKISP1_CIF_MI_CTRL_BURST_LEN_LUM_16		(0 << 16)
+#define RKISP1_CIF_MI_CTRL_BURST_LEN_LUM_32		BIT(16)
+#define RKISP1_CIF_MI_CTRL_BURST_LEN_LUM_64		(2 << 16)
+#define RKISP1_CIF_MI_CTRL_BURST_LEN_CHROM_16		(0 << 18)
+#define RKISP1_CIF_MI_CTRL_BURST_LEN_CHROM_32		BIT(18)
+#define RKISP1_CIF_MI_CTRL_BURST_LEN_CHROM_64		(2 << 18)
+#define RKISP1_CIF_MI_CTRL_INIT_BASE_EN			BIT(20)
+#define RKISP1_CIF_MI_CTRL_INIT_OFFSET_EN		BIT(21)
+#define RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8		(0 << 22)
+#define RKISP1_MI_CTRL_MP_WRITE_YUV_SPLA		BIT(22)
+#define RKISP1_MI_CTRL_MP_WRITE_YUVINT			(2 << 22)
+#define RKISP1_MI_CTRL_MP_WRITE_RAW12			(2 << 22)
+#define RKISP1_MI_CTRL_SP_WRITE_PLA			(0 << 24)
+#define RKISP1_MI_CTRL_SP_WRITE_SPLA			BIT(24)
+#define RKISP1_MI_CTRL_SP_WRITE_INT			(2 << 24)
+#define RKISP1_MI_CTRL_SP_INPUT_YUV400			(0 << 26)
+#define RKISP1_MI_CTRL_SP_INPUT_YUV420			BIT(26)
+#define RKISP1_MI_CTRL_SP_INPUT_YUV422			(2 << 26)
+#define RKISP1_MI_CTRL_SP_INPUT_YUV444			(3 << 26)
+#define RKISP1_MI_CTRL_SP_OUTPUT_YUV400			(0 << 28)
+#define RKISP1_MI_CTRL_SP_OUTPUT_YUV420			BIT(28)
+#define RKISP1_MI_CTRL_SP_OUTPUT_YUV422			(2 << 28)
+#define RKISP1_MI_CTRL_SP_OUTPUT_YUV444			(3 << 28)
+#define RKISP1_MI_CTRL_SP_OUTPUT_RGB565			(4 << 28)
+#define RKISP1_MI_CTRL_SP_OUTPUT_RGB666			(5 << 28)
+#define RKISP1_MI_CTRL_SP_OUTPUT_RGB888			(6 << 28)
+
+#define RKISP1_MI_CTRL_MP_FMT_MASK			GENMASK(23, 22)
+#define RKISP1_MI_CTRL_SP_FMT_MASK			GENMASK(30, 24)
+
+/* MI_INIT */
+#define RKISP1_CIF_MI_INIT_SKIP				BIT(2)
+#define RKISP1_CIF_MI_INIT_SOFT_UPD			BIT(4)
+
+/* MI_CTRL_SHD */
+#define RKISP1_CIF_MI_CTRL_SHD_MP_IN_ENABLED		BIT(0)
+#define RKISP1_CIF_MI_CTRL_SHD_SP_IN_ENABLED		BIT(1)
+#define RKISP1_CIF_MI_CTRL_SHD_JPEG_IN_ENABLED		BIT(2)
+#define RKISP1_CIF_MI_CTRL_SHD_RAW_IN_ENABLED		BIT(3)
+#define RKISP1_CIF_MI_CTRL_SHD_MP_OUT_ENABLED		BIT(16)
+#define RKISP1_CIF_MI_CTRL_SHD_SP_OUT_ENABLED		BIT(17)
+#define RKISP1_CIF_MI_CTRL_SHD_JPEG_OUT_ENABLED		BIT(18)
+#define RKISP1_CIF_MI_CTRL_SHD_RAW_OUT_ENABLED		BIT(19)
+
+/* RSZ_CTRL */
+#define RKISP1_CIF_RSZ_CTRL_SCALE_HY_ENABLE		BIT(0)
+#define RKISP1_CIF_RSZ_CTRL_SCALE_HC_ENABLE		BIT(1)
+#define RKISP1_CIF_RSZ_CTRL_SCALE_VY_ENABLE		BIT(2)
+#define RKISP1_CIF_RSZ_CTRL_SCALE_VC_ENABLE		BIT(3)
+#define RKISP1_CIF_RSZ_CTRL_SCALE_HY_UP			BIT(4)
+#define RKISP1_CIF_RSZ_CTRL_SCALE_HC_UP			BIT(5)
+#define RKISP1_CIF_RSZ_CTRL_SCALE_VY_UP			BIT(6)
+#define RKISP1_CIF_RSZ_CTRL_SCALE_VC_UP			BIT(7)
+#define RKISP1_CIF_RSZ_CTRL_CFG_UPD			BIT(8)
+#define RKISP1_CIF_RSZ_CTRL_CFG_UPD_AUTO		BIT(9)
+#define RKISP1_CIF_RSZ_SCALER_FACTOR			BIT(16)
+
+/* MI_IMSC - MI_MIS - MI_RIS - MI_ICR - MI_ISR */
+#define RKISP1_CIF_MI_FRAME(stream)			BIT((stream)->id)
+#define RKISP1_CIF_MI_MBLK_LINE				BIT(2)
+#define RKISP1_CIF_MI_FILL_MP_Y				BIT(3)
+#define RKISP1_CIF_MI_WRAP_MP_Y				BIT(4)
+#define RKISP1_CIF_MI_WRAP_MP_CB			BIT(5)
+#define RKISP1_CIF_MI_WRAP_MP_CR			BIT(6)
+#define RKISP1_CIF_MI_WRAP_SP_Y				BIT(7)
+#define RKISP1_CIF_MI_WRAP_SP_CB			BIT(8)
+#define RKISP1_CIF_MI_WRAP_SP_CR			BIT(9)
+#define RKISP1_CIF_MI_DMA_READY				BIT(11)
+
+/* MI_STATUS */
+#define RKISP1_CIF_MI_STATUS_MP_Y_FIFO_FULL		BIT(0)
+#define RKISP1_CIF_MI_STATUS_SP_Y_FIFO_FULL		BIT(4)
+
+/* MI_DMA_CTRL */
+#define RKISP1_CIF_MI_DMA_CTRL_BURST_LEN_LUM_16		(0 << 0)
+#define RKISP1_CIF_MI_DMA_CTRL_BURST_LEN_LUM_32		BIT(0)
+#define RKISP1_CIF_MI_DMA_CTRL_BURST_LEN_LUM_64		(2 << 0)
+#define RKISP1_CIF_MI_DMA_CTRL_BURST_LEN_CHROM_16	(0 << 2)
+#define RKISP1_CIF_MI_DMA_CTRL_BURST_LEN_CHROM_32	BIT(2)
+#define RKISP1_CIF_MI_DMA_CTRL_BURST_LEN_CHROM_64	(2 << 2)
+#define RKISP1_CIF_MI_DMA_CTRL_READ_FMT_PLANAR		(0 << 4)
+#define RKISP1_CIF_MI_DMA_CTRL_READ_FMT_SPLANAR		BIT(4)
+#define RKISP1_CIF_MI_DMA_CTRL_FMT_YUV400		(0 << 6)
+#define RKISP1_CIF_MI_DMA_CTRL_FMT_YUV420		BIT(6)
+#define RKISP1_CIF_MI_DMA_CTRL_READ_FMT_PACKED		(2 << 4)
+#define RKISP1_CIF_MI_DMA_CTRL_FMT_YUV422		(2 << 6)
+#define RKISP1_CIF_MI_DMA_CTRL_FMT_YUV444		(3 << 6)
+#define RKISP1_CIF_MI_DMA_CTRL_BYTE_SWAP		BIT(8)
+#define RKISP1_CIF_MI_DMA_CTRL_CONTINUOUS_ENA		BIT(9)
+#define RKISP1_CIF_MI_DMA_CTRL_RGB_BAYER_NO		(0 << 12)
+#define RKISP1_CIF_MI_DMA_CTRL_RGB_BAYER_8BIT		BIT(12)
+#define RKISP1_CIF_MI_DMA_CTRL_RGB_BAYER_16BIT		(2 << 12)
+/* MI_DMA_START */
+#define RKISP1_CIF_MI_DMA_START_ENABLE			BIT(0)
+/* MI_XTD_FORMAT_CTRL  */
+#define RKISP1_CIF_MI_XTD_FMT_CTRL_MP_CB_CR_SWAP	BIT(0)
+#define RKISP1_CIF_MI_XTD_FMT_CTRL_SP_CB_CR_SWAP	BIT(1)
+#define RKISP1_CIF_MI_XTD_FMT_CTRL_DMA_CB_CR_SWAP	BIT(2)
+
+/* CCL */
+#define RKISP1_CIF_CCL_CIF_CLK_DIS			BIT(2)
+/* ICCL */
+#define RKISP1_CIF_ICCL_ISP_CLK				BIT(0)
+#define RKISP1_CIF_ICCL_CP_CLK				BIT(1)
+#define RKISP1_CIF_ICCL_RES_2				BIT(2)
+#define RKISP1_CIF_ICCL_MRSZ_CLK			BIT(3)
+#define RKISP1_CIF_ICCL_SRSZ_CLK			BIT(4)
+#define RKISP1_CIF_ICCL_JPEG_CLK			BIT(5)
+#define RKISP1_CIF_ICCL_MI_CLK				BIT(6)
+#define RKISP1_CIF_ICCL_RES_7				BIT(7)
+#define RKISP1_CIF_ICCL_IE_CLK				BIT(8)
+#define RKISP1_CIF_ICCL_SIMP_CLK			BIT(9)
+#define RKISP1_CIF_ICCL_SMIA_CLK			BIT(10)
+#define RKISP1_CIF_ICCL_MIPI_CLK			BIT(11)
+#define RKISP1_CIF_ICCL_DCROP_CLK			BIT(12)
+/* IRCL */
+#define RKISP1_CIF_IRCL_ISP_SW_RST			BIT(0)
+#define RKISP1_CIF_IRCL_CP_SW_RST			BIT(1)
+#define RKISP1_CIF_IRCL_YCS_SW_RST			BIT(2)
+#define RKISP1_CIF_IRCL_MRSZ_SW_RST			BIT(3)
+#define RKISP1_CIF_IRCL_SRSZ_SW_RST			BIT(4)
+#define RKISP1_CIF_IRCL_JPEG_SW_RST			BIT(5)
+#define RKISP1_CIF_IRCL_MI_SW_RST			BIT(6)
+#define RKISP1_CIF_IRCL_CIF_SW_RST			BIT(7)
+#define RKISP1_CIF_IRCL_IE_SW_RST			BIT(8)
+#define RKISP1_CIF_IRCL_SI_SW_RST			BIT(9)
+#define RKISP1_CIF_IRCL_MIPI_SW_RST			BIT(11)
+
+/* C_PROC_CTR */
+#define RKISP1_CIF_C_PROC_CTR_ENABLE			BIT(0)
+#define RKISP1_CIF_C_PROC_YOUT_FULL			BIT(1)
+#define RKISP1_CIF_C_PROC_YIN_FULL			BIT(2)
+#define RKISP1_CIF_C_PROC_COUT_FULL			BIT(3)
+#define RKISP1_CIF_C_PROC_CTRL_RESERVED			0xFFFFFFFE
+#define RKISP1_CIF_C_PROC_CONTRAST_RESERVED		0xFFFFFF00
+#define RKISP1_CIF_C_PROC_BRIGHTNESS_RESERVED		0xFFFFFF00
+#define RKISP1_CIF_C_PROC_HUE_RESERVED			0xFFFFFF00
+#define RKISP1_CIF_C_PROC_SATURATION_RESERVED		0xFFFFFF00
+#define RKISP1_CIF_C_PROC_MACC_RESERVED			0xE000E000
+#define RKISP1_CIF_C_PROC_TONE_RESERVED			0xF000
+/* DUAL_CROP_CTRL */
+#define RKISP1_CIF_DUAL_CROP_MP_MODE_BYPASS		(0 << 0)
+#define RKISP1_CIF_DUAL_CROP_MP_MODE_YUV		BIT(0)
+#define RKISP1_CIF_DUAL_CROP_MP_MODE_RAW		(2 << 0)
+#define RKISP1_CIF_DUAL_CROP_SP_MODE_BYPASS		(0 << 2)
+#define RKISP1_CIF_DUAL_CROP_SP_MODE_YUV		BIT(2)
+#define RKISP1_CIF_DUAL_CROP_SP_MODE_RAW		(2 << 2)
+#define RKISP1_CIF_DUAL_CROP_CFG_UPD_PERMANENT		BIT(4)
+#define RKISP1_CIF_DUAL_CROP_CFG_UPD			BIT(5)
+#define RKISP1_CIF_DUAL_CROP_GEN_CFG_UPD		BIT(6)
+
+/* IMG_EFF_CTRL */
+#define RKISP1_CIF_IMG_EFF_CTRL_ENABLE			BIT(0)
+#define RKISP1_CIF_IMG_EFF_CTRL_MODE_BLACKWHITE		(0 << 1)
+#define RKISP1_CIF_IMG_EFF_CTRL_MODE_NEGATIVE		BIT(1)
+#define RKISP1_CIF_IMG_EFF_CTRL_MODE_SEPIA		(2 << 1)
+#define RKISP1_CIF_IMG_EFF_CTRL_MODE_COLOR_SEL		(3 << 1)
+#define RKISP1_CIF_IMG_EFF_CTRL_MODE_EMBOSS		(4 << 1)
+#define RKISP1_CIF_IMG_EFF_CTRL_MODE_SKETCH		(5 << 1)
+#define RKISP1_CIF_IMG_EFF_CTRL_MODE_SHARPEN		(6 << 1)
+#define RKISP1_CIF_IMG_EFF_CTRL_CFG_UPD			BIT(4)
+#define RKISP1_CIF_IMG_EFF_CTRL_YCBCR_FULL		BIT(5)
+
+#define RKISP1_CIF_IMG_EFF_CTRL_MODE_BLACKWHITE_SHIFT	0
+#define RKISP1_CIF_IMG_EFF_CTRL_MODE_NEGATIVE_SHIFT	1
+#define RKISP1_CIF_IMG_EFF_CTRL_MODE_SEPIA_SHIFT	2
+#define RKISP1_CIF_IMG_EFF_CTRL_MODE_COLOR_SEL_SHIFT	3
+#define RKISP1_CIF_IMG_EFF_CTRL_MODE_EMBOSS_SHIFT	4
+#define RKISP1_CIF_IMG_EFF_CTRL_MODE_SKETCH_SHIFT	5
+#define RKISP1_CIF_IMG_EFF_CTRL_MODE_SHARPEN_SHIFT	6
+#define RKISP1_CIF_IMG_EFF_CTRL_MODE_MASK		0xE
+
+/* IMG_EFF_COLOR_SEL */
+#define RKISP1_CIF_IMG_EFF_COLOR_RGB			0
+#define RKISP1_CIF_IMG_EFF_COLOR_B			BIT(0)
+#define RKISP1_CIF_IMG_EFF_COLOR_G			(2 << 0)
+#define RKISP1_CIF_IMG_EFF_COLOR_GB			(3 << 0)
+#define RKISP1_CIF_IMG_EFF_COLOR_R			(4 << 0)
+#define RKISP1_CIF_IMG_EFF_COLOR_RB			(5 << 0)
+#define RKISP1_CIF_IMG_EFF_COLOR_RG			(6 << 0)
+#define RKISP1_CIF_IMG_EFF_COLOR_RGB2			(7 << 0)
+
+/* MIPI_CTRL */
+#define RKISP1_CIF_MIPI_CTRL_OUTPUT_ENA			BIT(0)
+#define RKISP1_CIF_MIPI_CTRL_SHUTDOWNLANES(a)		(((a) & 0xF) << 8)
+#define RKISP1_CIF_MIPI_CTRL_NUM_LANES(a)		(((a) & 0x3) << 12)
+#define RKISP1_CIF_MIPI_CTRL_ERR_SOT_HS_SKIP		BIT(16)
+#define RKISP1_CIF_MIPI_CTRL_ERR_SOT_SYNC_HS_SKIP	BIT(17)
+#define RKISP1_CIF_MIPI_CTRL_CLOCKLANE_ENA		BIT(18)
+
+/* MIPI_DATA_SEL */
+#define RKISP1_CIF_MIPI_DATA_SEL_VC(a)			(((a) & 0x3) << 6)
+#define RKISP1_CIF_MIPI_DATA_SEL_DT(a)			(((a) & 0x3F) << 0)
+/* MIPI DATA_TYPE */
+#define RKISP1_CIF_CSI2_DT_YUV420_8b			0x18
+#define RKISP1_CIF_CSI2_DT_YUV420_10b			0x19
+#define RKISP1_CIF_CSI2_DT_YUV422_8b			0x1E
+#define RKISP1_CIF_CSI2_DT_YUV422_10b			0x1F
+#define RKISP1_CIF_CSI2_DT_RGB565			0x22
+#define RKISP1_CIF_CSI2_DT_RGB666			0x23
+#define RKISP1_CIF_CSI2_DT_RGB888			0x24
+#define RKISP1_CIF_CSI2_DT_RAW8				0x2A
+#define RKISP1_CIF_CSI2_DT_RAW10			0x2B
+#define RKISP1_CIF_CSI2_DT_RAW12			0x2C
+
+/* MIPI_IMSC, MIPI_RIS, MIPI_MIS, MIPI_ICR, MIPI_ISR */
+#define RKISP1_CIF_MIPI_SYNC_FIFO_OVFLW(a)		(((a) & 0xF) << 0)
+#define RKISP1_CIF_MIPI_ERR_SOT(a)			(((a) & 0xF) << 4)
+#define RKISP1_CIF_MIPI_ERR_SOT_SYNC(a)			(((a) & 0xF) << 8)
+#define RKISP1_CIF_MIPI_ERR_EOT_SYNC(a)			(((a) & 0xF) << 12)
+#define RKISP1_CIF_MIPI_ERR_CTRL(a)			(((a) & 0xF) << 16)
+#define RKISP1_CIF_MIPI_ERR_PROTOCOL			BIT(20)
+#define RKISP1_CIF_MIPI_ERR_ECC1			BIT(21)
+#define RKISP1_CIF_MIPI_ERR_ECC2			BIT(22)
+#define RKISP1_CIF_MIPI_ERR_CS				BIT(23)
+#define RKISP1_CIF_MIPI_FRAME_END			BIT(24)
+#define RKISP1_CIF_MIPI_ADD_DATA_OVFLW			BIT(25)
+#define RKISP1_CIF_MIPI_ADD_DATA_WATER_MARK		BIT(26)
+
+#define RKISP1_CIF_MIPI_ERR_CSI  (RKISP1_CIF_MIPI_ERR_PROTOCOL | \
+	RKISP1_CIF_MIPI_ERR_ECC1 | \
+	RKISP1_CIF_MIPI_ERR_ECC2 | \
+	RKISP1_CIF_MIPI_ERR_CS)
+
+#define RKISP1_CIF_MIPI_ERR_DPHY  (RKISP1_CIF_MIPI_ERR_SOT(3) | \
+	RKISP1_CIF_MIPI_ERR_SOT_SYNC(3) | \
+	RKISP1_CIF_MIPI_ERR_EOT_SYNC(3) | \
+	RKISP1_CIF_MIPI_ERR_CTRL(3))
+
+/* SUPER_IMPOSE */
+#define RKISP1_CIF_SUPER_IMP_CTRL_NORMAL_MODE		BIT(0)
+#define RKISP1_CIF_SUPER_IMP_CTRL_REF_IMG_MEM		BIT(1)
+#define RKISP1_CIF_SUPER_IMP_CTRL_TRANSP_DIS		BIT(2)
+
+/* ISP HISTOGRAM CALCULATION : ISP_HIST_PROP */
+#define RKISP1_CIF_ISP_HIST_PROP_MODE_DIS		(0 << 0)
+#define RKISP1_CIF_ISP_HIST_PROP_MODE_RGB		BIT(0)
+#define RKISP1_CIF_ISP_HIST_PROP_MODE_RED		(2 << 0)
+#define RKISP1_CIF_ISP_HIST_PROP_MODE_GREEN		(3 << 0)
+#define RKISP1_CIF_ISP_HIST_PROP_MODE_BLUE		(4 << 0)
+#define RKISP1_CIF_ISP_HIST_PROP_MODE_LUM		(5 << 0)
+#define RKISP1_CIF_ISP_HIST_PROP_MODE_MASK		0x7
+#define RKISP1_CIF_ISP_HIST_PREDIV_SET(x)		(((x) & 0x7F) << 3)
+#define RKISP1_CIF_ISP_HIST_WEIGHT_SET(v0, v1, v2, v3)	\
+				     (((v0) & 0x1F) | (((v1) & 0x1F) << 8)  |\
+				     (((v2) & 0x1F) << 16) | \
+				     (((v3) & 0x1F) << 24))
+
+#define RKISP1_CIF_ISP_HIST_WINDOW_OFFSET_RESERVED	0xFFFFF000
+#define RKISP1_CIF_ISP_HIST_WINDOW_SIZE_RESERVED	0xFFFFF800
+#define RKISP1_CIF_ISP_HIST_WEIGHT_RESERVED		0xE0E0E0E0
+#define RKISP1_CIF_ISP_MAX_HIST_PREDIVIDER		0x0000007F
+#define RKISP1_CIF_ISP_HIST_ROW_NUM			5
+#define RKISP1_CIF_ISP_HIST_COLUMN_NUM			5
+
+/* AUTO FOCUS MEASUREMENT:  ISP_AFM_CTRL */
+#define RKISP1_ISP_AFM_CTRL_ENABLE			BIT(0)
+
+/* SHUTTER CONTROL */
+#define RKISP1_CIF_ISP_SH_CTRL_SH_ENA			BIT(0)
+#define RKISP1_CIF_ISP_SH_CTRL_REP_EN			BIT(1)
+#define RKISP1_CIF_ISP_SH_CTRL_SRC_SH_TRIG		BIT(2)
+#define RKISP1_CIF_ISP_SH_CTRL_EDGE_POS			BIT(3)
+#define RKISP1_CIF_ISP_SH_CTRL_POL_LOW			BIT(4)
+
+/* FLASH MODULE */
+/* ISP_FLASH_CMD */
+#define RKISP1_CIFFLASH_CMD_PRELIGHT_ON			BIT(0)
+#define RKISP1_CIFFLASH_CMD_FLASH_ON			BIT(1)
+#define RKISP1_CIFFLASH_CMD_PRE_FLASH_ON		BIT(2)
+/* ISP_FLASH_CONFIG */
+#define RKISP1_CIFFLASH_CONFIG_PRELIGHT_END		BIT(0)
+#define RKISP1_CIFFLASH_CONFIG_VSYNC_POS		BIT(1)
+#define RKISP1_CIFFLASH_CONFIG_PRELIGHT_LOW		BIT(2)
+#define RKISP1_CIFFLASH_CONFIG_SRC_FL_TRIG		BIT(3)
+#define RKISP1_CIFFLASH_CONFIG_DELAY(a)			(((a) & 0xF) << 4)
+
+/* Demosaic:  ISP_DEMOSAIC */
+#define RKISP1_CIF_ISP_DEMOSAIC_BYPASS			BIT(10)
+#define RKISP1_CIF_ISP_DEMOSAIC_TH(x)			((x) & 0xFF)
+
+/* AWB */
+/* ISP_AWB_PROP */
+#define RKISP1_CIF_ISP_AWB_YMAX_CMP_EN			BIT(2)
+#define RKISP1_CIF_ISP_AWB_YMAX_READ(x)			(((x) >> 2) & 1)
+#define RKISP1_CIF_ISP_AWB_MODE_RGB_EN			((1 << 31) | (0x2 << 0))
+#define RKISP1_CIF_ISP_AWB_MODE_YCBCR_EN		((0 << 31) | (0x2 << 0))
+#define RKISP1_CIF_ISP_AWB_MODE_MASK_NONE		0xFFFFFFFC
+#define RKISP1_CIF_ISP_AWB_MODE_READ(x)			((x) & 3)
+/* ISP_AWB_GAIN_RB, ISP_AWB_GAIN_G  */
+#define RKISP1_CIF_ISP_AWB_GAIN_R_SET(x)		(((x) & 0x3FF) << 16)
+#define RKISP1_CIF_ISP_AWB_GAIN_R_READ(x)		(((x) >> 16) & 0x3FF)
+#define RKISP1_CIF_ISP_AWB_GAIN_B_SET(x)		((x) & 0x3FFF)
+#define RKISP1_CIF_ISP_AWB_GAIN_B_READ(x)		((x) & 0x3FFF)
+/* ISP_AWB_REF */
+#define RKISP1_CIF_ISP_AWB_REF_CR_SET(x)		(((x) & 0xFF) << 8)
+#define RKISP1_CIF_ISP_AWB_REF_CR_READ(x)		(((x) >> 8) & 0xFF)
+#define RKISP1_CIF_ISP_AWB_REF_CB_READ(x)		((x) & 0xFF)
+/* ISP_AWB_THRESH */
+#define RKISP1_CIF_ISP_AWB_MAX_CS_SET(x)		(((x) & 0xFF) << 8)
+#define RKISP1_CIF_ISP_AWB_MAX_CS_READ(x)		(((x) >> 8) & 0xFF)
+#define RKISP1_CIF_ISP_AWB_MIN_C_READ(x)		((x) & 0xFF)
+#define RKISP1_CIF_ISP_AWB_MIN_Y_SET(x)			(((x) & 0xFF) << 16)
+#define RKISP1_CIF_ISP_AWB_MIN_Y_READ(x)		(((x) >> 16) & 0xFF)
+#define RKISP1_CIF_ISP_AWB_MAX_Y_SET(x)			(((x) & 0xFF) << 24)
+#define RKISP1_CIF_ISP_AWB_MAX_Y_READ(x)			(((x) >> 24) & 0xFF)
+/* ISP_AWB_MEAN */
+#define RKISP1_CIF_ISP_AWB_GET_MEAN_CR_R(x)		((x) & 0xFF)
+#define RKISP1_CIF_ISP_AWB_GET_MEAN_CB_B(x)		(((x) >> 8) & 0xFF)
+#define RKISP1_CIF_ISP_AWB_GET_MEAN_Y_G(x)		(((x) >> 16) & 0xFF)
+/* ISP_AWB_WHITE_CNT */
+#define RKISP1_CIF_ISP_AWB_GET_PIXEL_CNT(x)		((x) & 0x3FFFFFF)
+
+#define RKISP1_CIF_ISP_AWB_GAINS_MAX_VAL		0x000003FF
+#define RKISP1_CIF_ISP_AWB_WINDOW_OFFSET_MAX		0x00000FFF
+#define RKISP1_CIF_ISP_AWB_WINDOW_MAX_SIZE		0x00001FFF
+#define RKISP1_CIF_ISP_AWB_CBCR_MAX_REF			0x000000FF
+#define RKISP1_CIF_ISP_AWB_THRES_MAX_YC			0x000000FF
+
+/* AE */
+/* ISP_EXP_CTRL */
+#define RKISP1_CIF_ISP_EXP_ENA				BIT(0)
+#define RKISP1_CIF_ISP_EXP_CTRL_AUTOSTOP		BIT(1)
+/*
+ *'1' luminance calculation according to  Y=(R+G+B) x 0.332 (85/256)
+ *'0' luminance calculation according to Y=16+0.25R+0.5G+0.1094B
+ */
+#define RKISP1_CIF_ISP_EXP_CTRL_MEASMODE_1		BIT(31)
+
+/* ISP_EXP_H_SIZE */
+#define RKISP1_CIF_ISP_EXP_H_SIZE_SET(x)		((x) & 0x7FF)
+#define RKISP1_CIF_ISP_EXP_HEIGHT_MASK			0x000007FF
+/* ISP_EXP_V_SIZE : vertical size must be a multiple of 2). */
+#define RKISP1_CIF_ISP_EXP_V_SIZE_SET(x)		((x) & 0x7FE)
+
+/* ISP_EXP_H_OFFSET */
+#define RKISP1_CIF_ISP_EXP_H_OFFSET_SET(x)		((x) & 0x1FFF)
+#define RKISP1_CIF_ISP_EXP_MAX_HOFFS			2424
+/* ISP_EXP_V_OFFSET */
+#define RKISP1_CIF_ISP_EXP_V_OFFSET_SET(x)		((x) & 0x1FFF)
+#define RKISP1_CIF_ISP_EXP_MAX_VOFFS			1806
+
+#define RKISP1_CIF_ISP_EXP_ROW_NUM			5
+#define RKISP1_CIF_ISP_EXP_COLUMN_NUM			5
+#define RKISP1_CIF_ISP_EXP_NUM_LUMA_REGS \
+	(RKISP1_CIF_ISP_EXP_ROW_NUM * RKISP1_CIF_ISP_EXP_COLUMN_NUM)
+#define RKISP1_CIF_ISP_EXP_BLOCK_MAX_HSIZE		516
+#define RKISP1_CIF_ISP_EXP_BLOCK_MIN_HSIZE		35
+#define RKISP1_CIF_ISP_EXP_BLOCK_MAX_VSIZE		390
+#define RKISP1_CIF_ISP_EXP_BLOCK_MIN_VSIZE		28
+#define RKISP1_CIF_ISP_EXP_MAX_HSIZE	\
+	(RKISP1_CIF_ISP_EXP_BLOCK_MAX_HSIZE * RKISP1_CIF_ISP_EXP_COLUMN_NUM + 1)
+#define RKISP1_CIF_ISP_EXP_MIN_HSIZE	\
+	(RKISP1_CIF_ISP_EXP_BLOCK_MIN_HSIZE * RKISP1_CIF_ISP_EXP_COLUMN_NUM + 1)
+#define RKISP1_CIF_ISP_EXP_MAX_VSIZE	\
+	(RKISP1_CIF_ISP_EXP_BLOCK_MAX_VSIZE * RKISP1_CIF_ISP_EXP_ROW_NUM + 1)
+#define RKISP1_CIF_ISP_EXP_MIN_VSIZE	\
+	(RKISP1_CIF_ISP_EXP_BLOCK_MIN_VSIZE * RKISP1_CIF_ISP_EXP_ROW_NUM + 1)
+
+/* LSC: ISP_LSC_CTRL */
+#define RKISP1_CIF_ISP_LSC_CTRL_ENA			BIT(0)
+#define RKISP1_CIF_ISP_LSC_SECT_SIZE_RESERVED		0xFC00FC00
+#define RKISP1_CIF_ISP_LSC_GRAD_RESERVED		0xF000F000
+#define RKISP1_CIF_ISP_LSC_SAMPLE_RESERVED		0xF000F000
+#define RKISP1_CIF_ISP_LSC_TABLE_DATA(v0, v1)     \
+	(((v0) & 0xFFF) | (((v1) & 0xFFF) << 12))
+#define RKISP1_CIF_ISP_LSC_SECT_SIZE(v0, v1)      \
+	(((v0) & 0xFFF) | (((v1) & 0xFFF) << 16))
+#define RKISP1_CIF_ISP_LSC_GRAD_SIZE(v0, v1)      \
+	(((v0) & 0xFFF) | (((v1) & 0xFFF) << 16))
+
+/* LSC: ISP_LSC_TABLE_SEL */
+#define RKISP1_CIF_ISP_LSC_TABLE_0			0
+#define RKISP1_CIF_ISP_LSC_TABLE_1			1
+
+/* LSC: ISP_LSC_STATUS */
+#define RKISP1_CIF_ISP_LSC_ACTIVE_TABLE			BIT(1)
+#define RKISP1_CIF_ISP_LSC_TABLE_ADDRESS_0		0
+#define RKISP1_CIF_ISP_LSC_TABLE_ADDRESS_153		153
+
+/* FLT */
+/* ISP_FILT_MODE */
+#define RKISP1_CIF_ISP_FLT_ENA				BIT(0)
+
+/*
+ * 0: green filter static mode (active filter factor = FILT_FAC_MID)
+ * 1: dynamic noise reduction/sharpen Default
+ */
+#define RKISP1_CIF_ISP_FLT_MODE_DNR			BIT(1)
+#define RKISP1_CIF_ISP_FLT_MODE_MAX			1
+#define RKISP1_CIF_ISP_FLT_CHROMA_V_MODE(x)		(((x) & 0x3) << 4)
+#define RKISP1_CIF_ISP_FLT_CHROMA_H_MODE(x)		(((x) & 0x3) << 6)
+#define RKISP1_CIF_ISP_FLT_CHROMA_MODE_MAX		3
+#define RKISP1_CIF_ISP_FLT_GREEN_STAGE1(x)		(((x) & 0xF) << 8)
+#define RKISP1_CIF_ISP_FLT_GREEN_STAGE1_MAX		8
+#define RKISP1_CIF_ISP_FLT_THREAD_RESERVED		0xFFFFFC00
+#define RKISP1_CIF_ISP_FLT_FAC_RESERVED			0xFFFFFFC0
+#define RKISP1_CIF_ISP_FLT_LUM_WEIGHT_RESERVED		0xFFF80000
+
+#define RKISP1_CIF_ISP_CTK_COEFF_RESERVED		0xFFFFF800
+#define RKISP1_CIF_ISP_XTALK_OFFSET_RESERVED		0xFFFFF000
+
+/* GOC */
+#define RKISP1_CIF_ISP_GAMMA_OUT_MODE_EQU		BIT(0)
+#define RKISP1_CIF_ISP_GOC_MODE_MAX			1
+#define RKISP1_CIF_ISP_GOC_RESERVED			0xFFFFF800
+/* ISP_CTRL BIT 11*/
+#define RKISP1_CIF_ISP_CTRL_ISP_GAMMA_OUT_ENA_READ(x)	(((x) >> 11) & 1)
+
+/* DPCC */
+/* ISP_DPCC_MODE */
+#define RKISP1_CIF_ISP_DPCC_ENA				BIT(0)
+#define RKISP1_CIF_ISP_DPCC_MODE_MAX			0x07
+#define RKISP1_CIF_ISP_DPCC_OUTPUTMODE_MAX		0x0F
+#define RKISP1_CIF_ISP_DPCC_SETUSE_MAX			0x0F
+#define RKISP1_CIF_ISP_DPCC_METHODS_SET_RESERVED	0xFFFFE000
+#define RKISP1_CIF_ISP_DPCC_LINE_THRESH_RESERVED	0xFFFF0000
+#define RKISP1_CIF_ISP_DPCC_LINE_MAD_FAC_RESERVED	0xFFFFC0C0
+#define RKISP1_CIF_ISP_DPCC_PG_FAC_RESERVED		0xFFFFC0C0
+#define RKISP1_CIF_ISP_DPCC_RND_THRESH_RESERVED		0xFFFF0000
+#define RKISP1_CIF_ISP_DPCC_RG_FAC_RESERVED		0xFFFFC0C0
+#define RKISP1_CIF_ISP_DPCC_RO_LIMIT_RESERVED		0xFFFFF000
+#define RKISP1_CIF_ISP_DPCC_RND_OFFS_RESERVED		0xFFFFF000
+
+/* BLS */
+/* ISP_BLS_CTRL */
+#define RKISP1_CIF_ISP_BLS_ENA				BIT(0)
+#define RKISP1_CIF_ISP_BLS_MODE_MEASURED		BIT(1)
+#define RKISP1_CIF_ISP_BLS_MODE_FIXED			0
+#define RKISP1_CIF_ISP_BLS_WINDOW_1			BIT(2)
+#define RKISP1_CIF_ISP_BLS_WINDOW_2			(2 << 2)
+
+/* GAMMA-IN */
+#define RKISP1_CIFISP_DEGAMMA_X_RESERVED	\
+	((1 << 31) | (1 << 27) | (1 << 23) | (1 << 19) |\
+	(1 << 15) | (1 << 11) | (1 << 7) | (1 << 3))
+#define RKISP1_CIFISP_DEGAMMA_Y_RESERVED		0xFFFFF000
+
+/* AFM */
+#define RKISP1_CIF_ISP_AFM_ENA				BIT(0)
+#define RKISP1_CIF_ISP_AFM_THRES_RESERVED		0xFFFF0000
+#define RKISP1_CIF_ISP_AFM_VAR_SHIFT_RESERVED		0xFFF8FFF8
+#define RKISP1_CIF_ISP_AFM_WINDOW_X_RESERVED		0xE000
+#define RKISP1_CIF_ISP_AFM_WINDOW_Y_RESERVED		0xF000
+#define RKISP1_CIF_ISP_AFM_WINDOW_X_MIN			0x5
+#define RKISP1_CIF_ISP_AFM_WINDOW_Y_MIN			0x2
+#define RKISP1_CIF_ISP_AFM_WINDOW_X(x)			(((x) & 0x1FFF) << 16)
+#define RKISP1_CIF_ISP_AFM_WINDOW_Y(x)			((x) & 0x1FFF)
+
+/* DPF */
+#define RKISP1_CIF_ISP_DPF_MODE_EN			BIT(0)
+#define RKISP1_CIF_ISP_DPF_MODE_B_FLT_DIS		BIT(1)
+#define RKISP1_CIF_ISP_DPF_MODE_GB_FLT_DIS		BIT(2)
+#define RKISP1_CIF_ISP_DPF_MODE_GR_FLT_DIS		BIT(3)
+#define RKISP1_CIF_ISP_DPF_MODE_R_FLT_DIS		BIT(4)
+#define RKISP1_CIF_ISP_DPF_MODE_RB_FLTSIZE_9x9		BIT(5)
+#define RKISP1_CIF_ISP_DPF_MODE_NLL_SEGMENTATION	BIT(6)
+#define RKISP1_CIF_ISP_DPF_MODE_AWB_GAIN_COMP		BIT(7)
+#define RKISP1_CIF_ISP_DPF_MODE_LSC_GAIN_COMP		BIT(8)
+#define RKISP1_CIF_ISP_DPF_MODE_USE_NF_GAIN		BIT(9)
+#define RKISP1_CIF_ISP_DPF_NF_GAIN_RESERVED		0xFFFFF000
+#define RKISP1_CIF_ISP_DPF_SPATIAL_COEFF_MAX		0x1F
+#define RKISP1_CIF_ISP_DPF_NLL_COEFF_N_MAX		0x3FF
+
+/* =================================================================== */
+/*                            CIF Registers                            */
+/* =================================================================== */
+#define RKISP1_CIF_CTRL_BASE			0x00000000
+#define RKISP1_CIF_CCL				(RKISP1_CIF_CTRL_BASE + 0x00000000)
+#define RKISP1_CIF_VI_ID			(RKISP1_CIF_CTRL_BASE + 0x00000008)
+#define RKISP1_CIF_ICCL				(RKISP1_CIF_CTRL_BASE + 0x00000010)
+#define RKISP1_CIF_IRCL				(RKISP1_CIF_CTRL_BASE + 0x00000014)
+#define RKISP1_CIF_VI_DPCL			(RKISP1_CIF_CTRL_BASE + 0x00000018)
+
+#define RKISP1_CIF_IMG_EFF_BASE			0x00000200
+#define RKISP1_CIF_IMG_EFF_CTRL			(RKISP1_CIF_IMG_EFF_BASE + 0x00000000)
+#define RKISP1_CIF_IMG_EFF_COLOR_SEL		(RKISP1_CIF_IMG_EFF_BASE + 0x00000004)
+#define RKISP1_CIF_IMG_EFF_MAT_1		(RKISP1_CIF_IMG_EFF_BASE + 0x00000008)
+#define RKISP1_CIF_IMG_EFF_MAT_2		(RKISP1_CIF_IMG_EFF_BASE + 0x0000000C)
+#define RKISP1_CIF_IMG_EFF_MAT_3		(RKISP1_CIF_IMG_EFF_BASE + 0x00000010)
+#define RKISP1_CIF_IMG_EFF_MAT_4		(RKISP1_CIF_IMG_EFF_BASE + 0x00000014)
+#define RKISP1_CIF_IMG_EFF_MAT_5		(RKISP1_CIF_IMG_EFF_BASE + 0x00000018)
+#define RKISP1_CIF_IMG_EFF_TINT			(RKISP1_CIF_IMG_EFF_BASE + 0x0000001C)
+#define RKISP1_CIF_IMG_EFF_CTRL_SHD		(RKISP1_CIF_IMG_EFF_BASE + 0x00000020)
+#define RKISP1_CIF_IMG_EFF_SHARPEN		(RKISP1_CIF_IMG_EFF_BASE + 0x00000024)
+
+#define RKISP1_CIF_SUPER_IMP_BASE		0x00000300
+#define RKISP1_CIF_SUPER_IMP_CTRL		(RKISP1_CIF_SUPER_IMP_BASE + 0x00000000)
+#define RKISP1_CIF_SUPER_IMP_OFFSET_X		(RKISP1_CIF_SUPER_IMP_BASE + 0x00000004)
+#define RKISP1_CIF_SUPER_IMP_OFFSET_Y		(RKISP1_CIF_SUPER_IMP_BASE + 0x00000008)
+#define RKISP1_CIF_SUPER_IMP_COLOR_Y		(RKISP1_CIF_SUPER_IMP_BASE + 0x0000000C)
+#define RKISP1_CIF_SUPER_IMP_COLOR_CB		(RKISP1_CIF_SUPER_IMP_BASE + 0x00000010)
+#define RKISP1_CIF_SUPER_IMP_COLOR_CR		(RKISP1_CIF_SUPER_IMP_BASE + 0x00000014)
+
+#define RKISP1_CIF_ISP_BASE			0x00000400
+#define RKISP1_CIF_ISP_CTRL			(RKISP1_CIF_ISP_BASE + 0x00000000)
+#define RKISP1_CIF_ISP_ACQ_PROP			(RKISP1_CIF_ISP_BASE + 0x00000004)
+#define RKISP1_CIF_ISP_ACQ_H_OFFS		(RKISP1_CIF_ISP_BASE + 0x00000008)
+#define RKISP1_CIF_ISP_ACQ_V_OFFS		(RKISP1_CIF_ISP_BASE + 0x0000000C)
+#define RKISP1_CIF_ISP_ACQ_H_SIZE		(RKISP1_CIF_ISP_BASE + 0x00000010)
+#define RKISP1_CIF_ISP_ACQ_V_SIZE		(RKISP1_CIF_ISP_BASE + 0x00000014)
+#define RKISP1_CIF_ISP_ACQ_NR_FRAMES		(RKISP1_CIF_ISP_BASE + 0x00000018)
+#define RKISP1_CIF_ISP_GAMMA_DX_LO		(RKISP1_CIF_ISP_BASE + 0x0000001C)
+#define RKISP1_CIF_ISP_GAMMA_DX_HI		(RKISP1_CIF_ISP_BASE + 0x00000020)
+#define RKISP1_CIF_ISP_GAMMA_R_Y0		(RKISP1_CIF_ISP_BASE + 0x00000024)
+#define RKISP1_CIF_ISP_GAMMA_R_Y1		(RKISP1_CIF_ISP_BASE + 0x00000028)
+#define RKISP1_CIF_ISP_GAMMA_R_Y2		(RKISP1_CIF_ISP_BASE + 0x0000002C)
+#define RKISP1_CIF_ISP_GAMMA_R_Y3		(RKISP1_CIF_ISP_BASE + 0x00000030)
+#define RKISP1_CIF_ISP_GAMMA_R_Y4		(RKISP1_CIF_ISP_BASE + 0x00000034)
+#define RKISP1_CIF_ISP_GAMMA_R_Y5		(RKISP1_CIF_ISP_BASE + 0x00000038)
+#define RKISP1_CIF_ISP_GAMMA_R_Y6		(RKISP1_CIF_ISP_BASE + 0x0000003C)
+#define RKISP1_CIF_ISP_GAMMA_R_Y7		(RKISP1_CIF_ISP_BASE + 0x00000040)
+#define RKISP1_CIF_ISP_GAMMA_R_Y8		(RKISP1_CIF_ISP_BASE + 0x00000044)
+#define RKISP1_CIF_ISP_GAMMA_R_Y9		(RKISP1_CIF_ISP_BASE + 0x00000048)
+#define RKISP1_CIF_ISP_GAMMA_R_Y10		(RKISP1_CIF_ISP_BASE + 0x0000004C)
+#define RKISP1_CIF_ISP_GAMMA_R_Y11		(RKISP1_CIF_ISP_BASE + 0x00000050)
+#define RKISP1_CIF_ISP_GAMMA_R_Y12		(RKISP1_CIF_ISP_BASE + 0x00000054)
+#define RKISP1_CIF_ISP_GAMMA_R_Y13		(RKISP1_CIF_ISP_BASE + 0x00000058)
+#define RKISP1_CIF_ISP_GAMMA_R_Y14		(RKISP1_CIF_ISP_BASE + 0x0000005C)
+#define RKISP1_CIF_ISP_GAMMA_R_Y15		(RKISP1_CIF_ISP_BASE + 0x00000060)
+#define RKISP1_CIF_ISP_GAMMA_R_Y16		(RKISP1_CIF_ISP_BASE + 0x00000064)
+#define RKISP1_CIF_ISP_GAMMA_G_Y0		(RKISP1_CIF_ISP_BASE + 0x00000068)
+#define RKISP1_CIF_ISP_GAMMA_G_Y1		(RKISP1_CIF_ISP_BASE + 0x0000006C)
+#define RKISP1_CIF_ISP_GAMMA_G_Y2		(RKISP1_CIF_ISP_BASE + 0x00000070)
+#define RKISP1_CIF_ISP_GAMMA_G_Y3		(RKISP1_CIF_ISP_BASE + 0x00000074)
+#define RKISP1_CIF_ISP_GAMMA_G_Y4		(RKISP1_CIF_ISP_BASE + 0x00000078)
+#define RKISP1_CIF_ISP_GAMMA_G_Y5		(RKISP1_CIF_ISP_BASE + 0x0000007C)
+#define RKISP1_CIF_ISP_GAMMA_G_Y6		(RKISP1_CIF_ISP_BASE + 0x00000080)
+#define RKISP1_CIF_ISP_GAMMA_G_Y7		(RKISP1_CIF_ISP_BASE + 0x00000084)
+#define RKISP1_CIF_ISP_GAMMA_G_Y8		(RKISP1_CIF_ISP_BASE + 0x00000088)
+#define RKISP1_CIF_ISP_GAMMA_G_Y9		(RKISP1_CIF_ISP_BASE + 0x0000008C)
+#define RKISP1_CIF_ISP_GAMMA_G_Y10		(RKISP1_CIF_ISP_BASE + 0x00000090)
+#define RKISP1_CIF_ISP_GAMMA_G_Y11		(RKISP1_CIF_ISP_BASE + 0x00000094)
+#define RKISP1_CIF_ISP_GAMMA_G_Y12		(RKISP1_CIF_ISP_BASE + 0x00000098)
+#define RKISP1_CIF_ISP_GAMMA_G_Y13		(RKISP1_CIF_ISP_BASE + 0x0000009C)
+#define RKISP1_CIF_ISP_GAMMA_G_Y14		(RKISP1_CIF_ISP_BASE + 0x000000A0)
+#define RKISP1_CIF_ISP_GAMMA_G_Y15		(RKISP1_CIF_ISP_BASE + 0x000000A4)
+#define RKISP1_CIF_ISP_GAMMA_G_Y16		(RKISP1_CIF_ISP_BASE + 0x000000A8)
+#define RKISP1_CIF_ISP_GAMMA_B_Y0		(RKISP1_CIF_ISP_BASE + 0x000000AC)
+#define RKISP1_CIF_ISP_GAMMA_B_Y1		(RKISP1_CIF_ISP_BASE + 0x000000B0)
+#define RKISP1_CIF_ISP_GAMMA_B_Y2		(RKISP1_CIF_ISP_BASE + 0x000000B4)
+#define RKISP1_CIF_ISP_GAMMA_B_Y3		(RKISP1_CIF_ISP_BASE + 0x000000B8)
+#define RKISP1_CIF_ISP_GAMMA_B_Y4		(RKISP1_CIF_ISP_BASE + 0x000000BC)
+#define RKISP1_CIF_ISP_GAMMA_B_Y5		(RKISP1_CIF_ISP_BASE + 0x000000C0)
+#define RKISP1_CIF_ISP_GAMMA_B_Y6		(RKISP1_CIF_ISP_BASE + 0x000000C4)
+#define RKISP1_CIF_ISP_GAMMA_B_Y7		(RKISP1_CIF_ISP_BASE + 0x000000C8)
+#define RKISP1_CIF_ISP_GAMMA_B_Y8		(RKISP1_CIF_ISP_BASE + 0x000000CC)
+#define RKISP1_CIF_ISP_GAMMA_B_Y9		(RKISP1_CIF_ISP_BASE + 0x000000D0)
+#define RKISP1_CIF_ISP_GAMMA_B_Y10		(RKISP1_CIF_ISP_BASE + 0x000000D4)
+#define RKISP1_CIF_ISP_GAMMA_B_Y11		(RKISP1_CIF_ISP_BASE + 0x000000D8)
+#define RKISP1_CIF_ISP_GAMMA_B_Y12		(RKISP1_CIF_ISP_BASE + 0x000000DC)
+#define RKISP1_CIF_ISP_GAMMA_B_Y13		(RKISP1_CIF_ISP_BASE + 0x000000E0)
+#define RKISP1_CIF_ISP_GAMMA_B_Y14		(RKISP1_CIF_ISP_BASE + 0x000000E4)
+#define RKISP1_CIF_ISP_GAMMA_B_Y15		(RKISP1_CIF_ISP_BASE + 0x000000E8)
+#define RKISP1_CIF_ISP_GAMMA_B_Y16		(RKISP1_CIF_ISP_BASE + 0x000000EC)
+#define RKISP1_CIF_ISP_AWB_PROP			(RKISP1_CIF_ISP_BASE + 0x00000110)
+#define RKISP1_CIF_ISP_AWB_WND_H_OFFS		(RKISP1_CIF_ISP_BASE + 0x00000114)
+#define RKISP1_CIF_ISP_AWB_WND_V_OFFS		(RKISP1_CIF_ISP_BASE + 0x00000118)
+#define RKISP1_CIF_ISP_AWB_WND_H_SIZE		(RKISP1_CIF_ISP_BASE + 0x0000011C)
+#define RKISP1_CIF_ISP_AWB_WND_V_SIZE		(RKISP1_CIF_ISP_BASE + 0x00000120)
+#define RKISP1_CIF_ISP_AWB_FRAMES		(RKISP1_CIF_ISP_BASE + 0x00000124)
+#define RKISP1_CIF_ISP_AWB_REF			(RKISP1_CIF_ISP_BASE + 0x00000128)
+#define RKISP1_CIF_ISP_AWB_THRESH		(RKISP1_CIF_ISP_BASE + 0x0000012C)
+#define RKISP1_CIF_ISP_AWB_GAIN_G		(RKISP1_CIF_ISP_BASE + 0x00000138)
+#define RKISP1_CIF_ISP_AWB_GAIN_RB		(RKISP1_CIF_ISP_BASE + 0x0000013C)
+#define RKISP1_CIF_ISP_AWB_WHITE_CNT		(RKISP1_CIF_ISP_BASE + 0x00000140)
+#define RKISP1_CIF_ISP_AWB_MEAN			(RKISP1_CIF_ISP_BASE + 0x00000144)
+#define RKISP1_CIF_ISP_CC_COEFF_0		(RKISP1_CIF_ISP_BASE + 0x00000170)
+#define RKISP1_CIF_ISP_CC_COEFF_1		(RKISP1_CIF_ISP_BASE + 0x00000174)
+#define RKISP1_CIF_ISP_CC_COEFF_2		(RKISP1_CIF_ISP_BASE + 0x00000178)
+#define RKISP1_CIF_ISP_CC_COEFF_3		(RKISP1_CIF_ISP_BASE + 0x0000017C)
+#define RKISP1_CIF_ISP_CC_COEFF_4		(RKISP1_CIF_ISP_BASE + 0x00000180)
+#define RKISP1_CIF_ISP_CC_COEFF_5		(RKISP1_CIF_ISP_BASE + 0x00000184)
+#define RKISP1_CIF_ISP_CC_COEFF_6		(RKISP1_CIF_ISP_BASE + 0x00000188)
+#define RKISP1_CIF_ISP_CC_COEFF_7		(RKISP1_CIF_ISP_BASE + 0x0000018C)
+#define RKISP1_CIF_ISP_CC_COEFF_8		(RKISP1_CIF_ISP_BASE + 0x00000190)
+#define RKISP1_CIF_ISP_OUT_H_OFFS		(RKISP1_CIF_ISP_BASE + 0x00000194)
+#define RKISP1_CIF_ISP_OUT_V_OFFS		(RKISP1_CIF_ISP_BASE + 0x00000198)
+#define RKISP1_CIF_ISP_OUT_H_SIZE		(RKISP1_CIF_ISP_BASE + 0x0000019C)
+#define RKISP1_CIF_ISP_OUT_V_SIZE		(RKISP1_CIF_ISP_BASE + 0x000001A0)
+#define RKISP1_CIF_ISP_DEMOSAIC			(RKISP1_CIF_ISP_BASE + 0x000001A4)
+#define RKISP1_CIF_ISP_FLAGS_SHD		(RKISP1_CIF_ISP_BASE + 0x000001A8)
+#define RKISP1_CIF_ISP_OUT_H_OFFS_SHD		(RKISP1_CIF_ISP_BASE + 0x000001AC)
+#define RKISP1_CIF_ISP_OUT_V_OFFS_SHD		(RKISP1_CIF_ISP_BASE + 0x000001B0)
+#define RKISP1_CIF_ISP_OUT_H_SIZE_SHD		(RKISP1_CIF_ISP_BASE + 0x000001B4)
+#define RKISP1_CIF_ISP_OUT_V_SIZE_SHD		(RKISP1_CIF_ISP_BASE + 0x000001B8)
+#define RKISP1_CIF_ISP_IMSC			(RKISP1_CIF_ISP_BASE + 0x000001BC)
+#define RKISP1_CIF_ISP_RIS			(RKISP1_CIF_ISP_BASE + 0x000001C0)
+#define RKISP1_CIF_ISP_MIS			(RKISP1_CIF_ISP_BASE + 0x000001C4)
+#define RKISP1_CIF_ISP_ICR			(RKISP1_CIF_ISP_BASE + 0x000001C8)
+#define RKISP1_CIF_ISP_ISR			(RKISP1_CIF_ISP_BASE + 0x000001CC)
+#define RKISP1_CIF_ISP_CT_COEFF_0		(RKISP1_CIF_ISP_BASE + 0x000001D0)
+#define RKISP1_CIF_ISP_CT_COEFF_1		(RKISP1_CIF_ISP_BASE + 0x000001D4)
+#define RKISP1_CIF_ISP_CT_COEFF_2		(RKISP1_CIF_ISP_BASE + 0x000001D8)
+#define RKISP1_CIF_ISP_CT_COEFF_3		(RKISP1_CIF_ISP_BASE + 0x000001DC)
+#define RKISP1_CIF_ISP_CT_COEFF_4		(RKISP1_CIF_ISP_BASE + 0x000001E0)
+#define RKISP1_CIF_ISP_CT_COEFF_5		(RKISP1_CIF_ISP_BASE + 0x000001E4)
+#define RKISP1_CIF_ISP_CT_COEFF_6		(RKISP1_CIF_ISP_BASE + 0x000001E8)
+#define RKISP1_CIF_ISP_CT_COEFF_7		(RKISP1_CIF_ISP_BASE + 0x000001EC)
+#define RKISP1_CIF_ISP_CT_COEFF_8		(RKISP1_CIF_ISP_BASE + 0x000001F0)
+#define RKISP1_CIF_ISP_GAMMA_OUT_MODE		(RKISP1_CIF_ISP_BASE + 0x000001F4)
+#define RKISP1_CIF_ISP_GAMMA_OUT_Y_0		(RKISP1_CIF_ISP_BASE + 0x000001F8)
+#define RKISP1_CIF_ISP_GAMMA_OUT_Y_1		(RKISP1_CIF_ISP_BASE + 0x000001FC)
+#define RKISP1_CIF_ISP_GAMMA_OUT_Y_2		(RKISP1_CIF_ISP_BASE + 0x00000200)
+#define RKISP1_CIF_ISP_GAMMA_OUT_Y_3		(RKISP1_CIF_ISP_BASE + 0x00000204)
+#define RKISP1_CIF_ISP_GAMMA_OUT_Y_4		(RKISP1_CIF_ISP_BASE + 0x00000208)
+#define RKISP1_CIF_ISP_GAMMA_OUT_Y_5		(RKISP1_CIF_ISP_BASE + 0x0000020C)
+#define RKISP1_CIF_ISP_GAMMA_OUT_Y_6		(RKISP1_CIF_ISP_BASE + 0x00000210)
+#define RKISP1_CIF_ISP_GAMMA_OUT_Y_7		(RKISP1_CIF_ISP_BASE + 0x00000214)
+#define RKISP1_CIF_ISP_GAMMA_OUT_Y_8		(RKISP1_CIF_ISP_BASE + 0x00000218)
+#define RKISP1_CIF_ISP_GAMMA_OUT_Y_9		(RKISP1_CIF_ISP_BASE + 0x0000021C)
+#define RKISP1_CIF_ISP_GAMMA_OUT_Y_10		(RKISP1_CIF_ISP_BASE + 0x00000220)
+#define RKISP1_CIF_ISP_GAMMA_OUT_Y_11		(RKISP1_CIF_ISP_BASE + 0x00000224)
+#define RKISP1_CIF_ISP_GAMMA_OUT_Y_12		(RKISP1_CIF_ISP_BASE + 0x00000228)
+#define RKISP1_CIF_ISP_GAMMA_OUT_Y_13		(RKISP1_CIF_ISP_BASE + 0x0000022C)
+#define RKISP1_CIF_ISP_GAMMA_OUT_Y_14		(RKISP1_CIF_ISP_BASE + 0x00000230)
+#define RKISP1_CIF_ISP_GAMMA_OUT_Y_15		(RKISP1_CIF_ISP_BASE + 0x00000234)
+#define RKISP1_CIF_ISP_GAMMA_OUT_Y_16		(RKISP1_CIF_ISP_BASE + 0x00000238)
+#define RKISP1_CIF_ISP_ERR			(RKISP1_CIF_ISP_BASE + 0x0000023C)
+#define RKISP1_CIF_ISP_ERR_CLR			(RKISP1_CIF_ISP_BASE + 0x00000240)
+#define RKISP1_CIF_ISP_FRAME_COUNT		(RKISP1_CIF_ISP_BASE + 0x00000244)
+#define RKISP1_CIF_ISP_CT_OFFSET_R		(RKISP1_CIF_ISP_BASE + 0x00000248)
+#define RKISP1_CIF_ISP_CT_OFFSET_G		(RKISP1_CIF_ISP_BASE + 0x0000024C)
+#define RKISP1_CIF_ISP_CT_OFFSET_B		(RKISP1_CIF_ISP_BASE + 0x00000250)
+
+#define RKISP1_CIF_ISP_FLASH_BASE		0x00000660
+#define RKISP1_CIF_ISP_FLASH_CMD		(RKISP1_CIF_ISP_FLASH_BASE + 0x00000000)
+#define RKISP1_CIF_ISP_FLASH_CONFIG		(RKISP1_CIF_ISP_FLASH_BASE + 0x00000004)
+#define RKISP1_CIF_ISP_FLASH_PREDIV		(RKISP1_CIF_ISP_FLASH_BASE + 0x00000008)
+#define RKISP1_CIF_ISP_FLASH_DELAY		(RKISP1_CIF_ISP_FLASH_BASE + 0x0000000C)
+#define RKISP1_CIF_ISP_FLASH_TIME		(RKISP1_CIF_ISP_FLASH_BASE + 0x00000010)
+#define RKISP1_CIF_ISP_FLASH_MAXP		(RKISP1_CIF_ISP_FLASH_BASE + 0x00000014)
+
+#define RKISP1_CIF_ISP_SH_BASE			0x00000680
+#define RKISP1_CIF_ISP_SH_CTRL			(RKISP1_CIF_ISP_SH_BASE + 0x00000000)
+#define RKISP1_CIF_ISP_SH_PREDIV		(RKISP1_CIF_ISP_SH_BASE + 0x00000004)
+#define RKISP1_CIF_ISP_SH_DELAY			(RKISP1_CIF_ISP_SH_BASE + 0x00000008)
+#define RKISP1_CIF_ISP_SH_TIME			(RKISP1_CIF_ISP_SH_BASE + 0x0000000C)
+
+#define RKISP1_CIF_C_PROC_BASE			0x00000800
+#define RKISP1_CIF_C_PROC_CTRL			(RKISP1_CIF_C_PROC_BASE + 0x00000000)
+#define RKISP1_CIF_C_PROC_CONTRAST		(RKISP1_CIF_C_PROC_BASE + 0x00000004)
+#define RKISP1_CIF_C_PROC_BRIGHTNESS		(RKISP1_CIF_C_PROC_BASE + 0x00000008)
+#define RKISP1_CIF_C_PROC_SATURATION		(RKISP1_CIF_C_PROC_BASE + 0x0000000C)
+#define RKISP1_CIF_C_PROC_HUE			(RKISP1_CIF_C_PROC_BASE + 0x00000010)
+
+#define RKISP1_CIF_DUAL_CROP_BASE		0x00000880
+#define RKISP1_CIF_DUAL_CROP_CTRL		(RKISP1_CIF_DUAL_CROP_BASE + 0x00000000)
+#define RKISP1_CIF_DUAL_CROP_M_H_OFFS		(RKISP1_CIF_DUAL_CROP_BASE + 0x00000004)
+#define RKISP1_CIF_DUAL_CROP_M_V_OFFS		(RKISP1_CIF_DUAL_CROP_BASE + 0x00000008)
+#define RKISP1_CIF_DUAL_CROP_M_H_SIZE		(RKISP1_CIF_DUAL_CROP_BASE + 0x0000000C)
+#define RKISP1_CIF_DUAL_CROP_M_V_SIZE		(RKISP1_CIF_DUAL_CROP_BASE + 0x00000010)
+#define RKISP1_CIF_DUAL_CROP_S_H_OFFS		(RKISP1_CIF_DUAL_CROP_BASE + 0x00000014)
+#define RKISP1_CIF_DUAL_CROP_S_V_OFFS		(RKISP1_CIF_DUAL_CROP_BASE + 0x00000018)
+#define RKISP1_CIF_DUAL_CROP_S_H_SIZE		(RKISP1_CIF_DUAL_CROP_BASE + 0x0000001C)
+#define RKISP1_CIF_DUAL_CROP_S_V_SIZE		(RKISP1_CIF_DUAL_CROP_BASE + 0x00000020)
+#define RKISP1_CIF_DUAL_CROP_M_H_OFFS_SHD	(RKISP1_CIF_DUAL_CROP_BASE + 0x00000024)
+#define RKISP1_CIF_DUAL_CROP_M_V_OFFS_SHD	(RKISP1_CIF_DUAL_CROP_BASE + 0x00000028)
+#define RKISP1_CIF_DUAL_CROP_M_H_SIZE_SHD	(RKISP1_CIF_DUAL_CROP_BASE + 0x0000002C)
+#define RKISP1_CIF_DUAL_CROP_M_V_SIZE_SHD	(RKISP1_CIF_DUAL_CROP_BASE + 0x00000030)
+#define RKISP1_CIF_DUAL_CROP_S_H_OFFS_SHD	(RKISP1_CIF_DUAL_CROP_BASE + 0x00000034)
+#define RKISP1_CIF_DUAL_CROP_S_V_OFFS_SHD	(RKISP1_CIF_DUAL_CROP_BASE + 0x00000038)
+#define RKISP1_CIF_DUAL_CROP_S_H_SIZE_SHD	(RKISP1_CIF_DUAL_CROP_BASE + 0x0000003C)
+#define RKISP1_CIF_DUAL_CROP_S_V_SIZE_SHD	(RKISP1_CIF_DUAL_CROP_BASE + 0x00000040)
+
+#define RKISP1_CIF_MRSZ_BASE			0x00000C00
+#define RKISP1_CIF_MRSZ_CTRL			(RKISP1_CIF_MRSZ_BASE + 0x00000000)
+#define RKISP1_CIF_MRSZ_SCALE_HY		(RKISP1_CIF_MRSZ_BASE + 0x00000004)
+#define RKISP1_CIF_MRSZ_SCALE_HCB		(RKISP1_CIF_MRSZ_BASE + 0x00000008)
+#define RKISP1_CIF_MRSZ_SCALE_HCR		(RKISP1_CIF_MRSZ_BASE + 0x0000000C)
+#define RKISP1_CIF_MRSZ_SCALE_VY		(RKISP1_CIF_MRSZ_BASE + 0x00000010)
+#define RKISP1_CIF_MRSZ_SCALE_VC		(RKISP1_CIF_MRSZ_BASE + 0x00000014)
+#define RKISP1_CIF_MRSZ_PHASE_HY		(RKISP1_CIF_MRSZ_BASE + 0x00000018)
+#define RKISP1_CIF_MRSZ_PHASE_HC		(RKISP1_CIF_MRSZ_BASE + 0x0000001C)
+#define RKISP1_CIF_MRSZ_PHASE_VY		(RKISP1_CIF_MRSZ_BASE + 0x00000020)
+#define RKISP1_CIF_MRSZ_PHASE_VC		(RKISP1_CIF_MRSZ_BASE + 0x00000024)
+#define RKISP1_CIF_MRSZ_SCALE_LUT_ADDR		(RKISP1_CIF_MRSZ_BASE + 0x00000028)
+#define RKISP1_CIF_MRSZ_SCALE_LUT		(RKISP1_CIF_MRSZ_BASE + 0x0000002C)
+#define RKISP1_CIF_MRSZ_CTRL_SHD		(RKISP1_CIF_MRSZ_BASE + 0x00000030)
+#define RKISP1_CIF_MRSZ_SCALE_HY_SHD		(RKISP1_CIF_MRSZ_BASE + 0x00000034)
+#define RKISP1_CIF_MRSZ_SCALE_HCB_SHD		(RKISP1_CIF_MRSZ_BASE + 0x00000038)
+#define RKISP1_CIF_MRSZ_SCALE_HCR_SHD		(RKISP1_CIF_MRSZ_BASE + 0x0000003C)
+#define RKISP1_CIF_MRSZ_SCALE_VY_SHD		(RKISP1_CIF_MRSZ_BASE + 0x00000040)
+#define RKISP1_CIF_MRSZ_SCALE_VC_SHD		(RKISP1_CIF_MRSZ_BASE + 0x00000044)
+#define RKISP1_CIF_MRSZ_PHASE_HY_SHD		(RKISP1_CIF_MRSZ_BASE + 0x00000048)
+#define RKISP1_CIF_MRSZ_PHASE_HC_SHD		(RKISP1_CIF_MRSZ_BASE + 0x0000004C)
+#define RKISP1_CIF_MRSZ_PHASE_VY_SHD		(RKISP1_CIF_MRSZ_BASE + 0x00000050)
+#define RKISP1_CIF_MRSZ_PHASE_VC_SHD		(RKISP1_CIF_MRSZ_BASE + 0x00000054)
+
+#define RKISP1_CIF_SRSZ_BASE			0x00001000
+#define RKISP1_CIF_SRSZ_CTRL			(RKISP1_CIF_SRSZ_BASE + 0x00000000)
+#define RKISP1_CIF_SRSZ_SCALE_HY		(RKISP1_CIF_SRSZ_BASE + 0x00000004)
+#define RKISP1_CIF_SRSZ_SCALE_HCB		(RKISP1_CIF_SRSZ_BASE + 0x00000008)
+#define RKISP1_CIF_SRSZ_SCALE_HCR		(RKISP1_CIF_SRSZ_BASE + 0x0000000C)
+#define RKISP1_CIF_SRSZ_SCALE_VY		(RKISP1_CIF_SRSZ_BASE + 0x00000010)
+#define RKISP1_CIF_SRSZ_SCALE_VC		(RKISP1_CIF_SRSZ_BASE + 0x00000014)
+#define RKISP1_CIF_SRSZ_PHASE_HY		(RKISP1_CIF_SRSZ_BASE + 0x00000018)
+#define RKISP1_CIF_SRSZ_PHASE_HC		(RKISP1_CIF_SRSZ_BASE + 0x0000001C)
+#define RKISP1_CIF_SRSZ_PHASE_VY		(RKISP1_CIF_SRSZ_BASE + 0x00000020)
+#define RKISP1_CIF_SRSZ_PHASE_VC		(RKISP1_CIF_SRSZ_BASE + 0x00000024)
+#define RKISP1_CIF_SRSZ_SCALE_LUT_ADDR		(RKISP1_CIF_SRSZ_BASE + 0x00000028)
+#define RKISP1_CIF_SRSZ_SCALE_LUT		(RKISP1_CIF_SRSZ_BASE + 0x0000002C)
+#define RKISP1_CIF_SRSZ_CTRL_SHD		(RKISP1_CIF_SRSZ_BASE + 0x00000030)
+#define RKISP1_CIF_SRSZ_SCALE_HY_SHD		(RKISP1_CIF_SRSZ_BASE + 0x00000034)
+#define RKISP1_CIF_SRSZ_SCALE_HCB_SHD		(RKISP1_CIF_SRSZ_BASE + 0x00000038)
+#define RKISP1_CIF_SRSZ_SCALE_HCR_SHD		(RKISP1_CIF_SRSZ_BASE + 0x0000003C)
+#define RKISP1_CIF_SRSZ_SCALE_VY_SHD		(RKISP1_CIF_SRSZ_BASE + 0x00000040)
+#define RKISP1_CIF_SRSZ_SCALE_VC_SHD		(RKISP1_CIF_SRSZ_BASE + 0x00000044)
+#define RKISP1_CIF_SRSZ_PHASE_HY_SHD		(RKISP1_CIF_SRSZ_BASE + 0x00000048)
+#define RKISP1_CIF_SRSZ_PHASE_HC_SHD		(RKISP1_CIF_SRSZ_BASE + 0x0000004C)
+#define RKISP1_CIF_SRSZ_PHASE_VY_SHD		(RKISP1_CIF_SRSZ_BASE + 0x00000050)
+#define RKISP1_CIF_SRSZ_PHASE_VC_SHD		(RKISP1_CIF_SRSZ_BASE + 0x00000054)
+
+#define RKISP1_CIF_MI_BASE			0x00001400
+#define RKISP1_CIF_MI_CTRL			(RKISP1_CIF_MI_BASE + 0x00000000)
+#define RKISP1_CIF_MI_INIT			(RKISP1_CIF_MI_BASE + 0x00000004)
+#define RKISP1_CIF_MI_MP_Y_BASE_AD_INIT		(RKISP1_CIF_MI_BASE + 0x00000008)
+#define RKISP1_CIF_MI_MP_Y_SIZE_INIT		(RKISP1_CIF_MI_BASE + 0x0000000C)
+#define RKISP1_CIF_MI_MP_Y_OFFS_CNT_INIT	(RKISP1_CIF_MI_BASE + 0x00000010)
+#define RKISP1_CIF_MI_MP_Y_OFFS_CNT_START	(RKISP1_CIF_MI_BASE + 0x00000014)
+#define RKISP1_CIF_MI_MP_Y_IRQ_OFFS_INIT	(RKISP1_CIF_MI_BASE + 0x00000018)
+#define RKISP1_CIF_MI_MP_CB_BASE_AD_INIT	(RKISP1_CIF_MI_BASE + 0x0000001C)
+#define RKISP1_CIF_MI_MP_CB_SIZE_INIT		(RKISP1_CIF_MI_BASE + 0x00000020)
+#define RKISP1_CIF_MI_MP_CB_OFFS_CNT_INIT	(RKISP1_CIF_MI_BASE + 0x00000024)
+#define RKISP1_CIF_MI_MP_CB_OFFS_CNT_START	(RKISP1_CIF_MI_BASE + 0x00000028)
+#define RKISP1_CIF_MI_MP_CR_BASE_AD_INIT	(RKISP1_CIF_MI_BASE + 0x0000002C)
+#define RKISP1_CIF_MI_MP_CR_SIZE_INIT		(RKISP1_CIF_MI_BASE + 0x00000030)
+#define RKISP1_CIF_MI_MP_CR_OFFS_CNT_INIT	(RKISP1_CIF_MI_BASE + 0x00000034)
+#define RKISP1_CIF_MI_MP_CR_OFFS_CNT_START	(RKISP1_CIF_MI_BASE + 0x00000038)
+#define RKISP1_CIF_MI_SP_Y_BASE_AD_INIT		(RKISP1_CIF_MI_BASE + 0x0000003C)
+#define RKISP1_CIF_MI_SP_Y_SIZE_INIT		(RKISP1_CIF_MI_BASE + 0x00000040)
+#define RKISP1_CIF_MI_SP_Y_OFFS_CNT_INIT	(RKISP1_CIF_MI_BASE + 0x00000044)
+#define RKISP1_CIF_MI_SP_Y_OFFS_CNT_START	(RKISP1_CIF_MI_BASE + 0x00000048)
+#define RKISP1_CIF_MI_SP_Y_LLENGTH		(RKISP1_CIF_MI_BASE + 0x0000004C)
+#define RKISP1_CIF_MI_SP_CB_BASE_AD_INIT	(RKISP1_CIF_MI_BASE + 0x00000050)
+#define RKISP1_CIF_MI_SP_CB_SIZE_INIT		(RKISP1_CIF_MI_BASE + 0x00000054)
+#define RKISP1_CIF_MI_SP_CB_OFFS_CNT_INIT	(RKISP1_CIF_MI_BASE + 0x00000058)
+#define RKISP1_CIF_MI_SP_CB_OFFS_CNT_START	(RKISP1_CIF_MI_BASE + 0x0000005C)
+#define RKISP1_CIF_MI_SP_CR_BASE_AD_INIT	(RKISP1_CIF_MI_BASE + 0x00000060)
+#define RKISP1_CIF_MI_SP_CR_SIZE_INIT		(RKISP1_CIF_MI_BASE + 0x00000064)
+#define RKISP1_CIF_MI_SP_CR_OFFS_CNT_INIT	(RKISP1_CIF_MI_BASE + 0x00000068)
+#define RKISP1_CIF_MI_SP_CR_OFFS_CNT_START	(RKISP1_CIF_MI_BASE + 0x0000006C)
+#define RKISP1_CIF_MI_BYTE_CNT			(RKISP1_CIF_MI_BASE + 0x00000070)
+#define RKISP1_CIF_MI_CTRL_SHD			(RKISP1_CIF_MI_BASE + 0x00000074)
+#define RKISP1_CIF_MI_MP_Y_BASE_AD_SHD		(RKISP1_CIF_MI_BASE + 0x00000078)
+#define RKISP1_CIF_MI_MP_Y_SIZE_SHD		(RKISP1_CIF_MI_BASE + 0x0000007C)
+#define RKISP1_CIF_MI_MP_Y_OFFS_CNT_SHD		(RKISP1_CIF_MI_BASE + 0x00000080)
+#define RKISP1_CIF_MI_MP_Y_IRQ_OFFS_SHD		(RKISP1_CIF_MI_BASE + 0x00000084)
+#define RKISP1_CIF_MI_MP_CB_BASE_AD_SHD		(RKISP1_CIF_MI_BASE + 0x00000088)
+#define RKISP1_CIF_MI_MP_CB_SIZE_SHD		(RKISP1_CIF_MI_BASE + 0x0000008C)
+#define RKISP1_CIF_MI_MP_CB_OFFS_CNT_SHD	(RKISP1_CIF_MI_BASE + 0x00000090)
+#define RKISP1_CIF_MI_MP_CR_BASE_AD_SHD		(RKISP1_CIF_MI_BASE + 0x00000094)
+#define RKISP1_CIF_MI_MP_CR_SIZE_SHD		(RKISP1_CIF_MI_BASE + 0x00000098)
+#define RKISP1_CIF_MI_MP_CR_OFFS_CNT_SHD	(RKISP1_CIF_MI_BASE + 0x0000009C)
+#define RKISP1_CIF_MI_SP_Y_BASE_AD_SHD		(RKISP1_CIF_MI_BASE + 0x000000A0)
+#define RKISP1_CIF_MI_SP_Y_SIZE_SHD		(RKISP1_CIF_MI_BASE + 0x000000A4)
+#define RKISP1_CIF_MI_SP_Y_OFFS_CNT_SHD		(RKISP1_CIF_MI_BASE + 0x000000A8)
+#define RKISP1_CIF_MI_SP_CB_BASE_AD_SHD		(RKISP1_CIF_MI_BASE + 0x000000B0)
+#define RKISP1_CIF_MI_SP_CB_SIZE_SHD		(RKISP1_CIF_MI_BASE + 0x000000B4)
+#define RKISP1_CIF_MI_SP_CB_OFFS_CNT_SHD	(RKISP1_CIF_MI_BASE + 0x000000B8)
+#define RKISP1_CIF_MI_SP_CR_BASE_AD_SHD		(RKISP1_CIF_MI_BASE + 0x000000BC)
+#define RKISP1_CIF_MI_SP_CR_SIZE_SHD		(RKISP1_CIF_MI_BASE + 0x000000C0)
+#define RKISP1_CIF_MI_SP_CR_OFFS_CNT_SHD	(RKISP1_CIF_MI_BASE + 0x000000C4)
+#define RKISP1_CIF_MI_DMA_Y_PIC_START_AD	(RKISP1_CIF_MI_BASE + 0x000000C8)
+#define RKISP1_CIF_MI_DMA_Y_PIC_WIDTH		(RKISP1_CIF_MI_BASE + 0x000000CC)
+#define RKISP1_CIF_MI_DMA_Y_LLENGTH		(RKISP1_CIF_MI_BASE + 0x000000D0)
+#define RKISP1_CIF_MI_DMA_Y_PIC_SIZE		(RKISP1_CIF_MI_BASE + 0x000000D4)
+#define RKISP1_CIF_MI_DMA_CB_PIC_START_AD	(RKISP1_CIF_MI_BASE + 0x000000D8)
+#define RKISP1_CIF_MI_DMA_CR_PIC_START_AD	(RKISP1_CIF_MI_BASE + 0x000000E8)
+#define RKISP1_CIF_MI_IMSC			(RKISP1_CIF_MI_BASE + 0x000000F8)
+#define RKISP1_CIF_MI_RIS			(RKISP1_CIF_MI_BASE + 0x000000FC)
+#define RKISP1_CIF_MI_MIS			(RKISP1_CIF_MI_BASE + 0x00000100)
+#define RKISP1_CIF_MI_ICR			(RKISP1_CIF_MI_BASE + 0x00000104)
+#define RKISP1_CIF_MI_ISR			(RKISP1_CIF_MI_BASE + 0x00000108)
+#define RKISP1_CIF_MI_STATUS			(RKISP1_CIF_MI_BASE + 0x0000010C)
+#define RKISP1_CIF_MI_STATUS_CLR		(RKISP1_CIF_MI_BASE + 0x00000110)
+#define RKISP1_CIF_MI_SP_Y_PIC_WIDTH		(RKISP1_CIF_MI_BASE + 0x00000114)
+#define RKISP1_CIF_MI_SP_Y_PIC_HEIGHT		(RKISP1_CIF_MI_BASE + 0x00000118)
+#define RKISP1_CIF_MI_SP_Y_PIC_SIZE		(RKISP1_CIF_MI_BASE + 0x0000011C)
+#define RKISP1_CIF_MI_DMA_CTRL			(RKISP1_CIF_MI_BASE + 0x00000120)
+#define RKISP1_CIF_MI_DMA_START			(RKISP1_CIF_MI_BASE + 0x00000124)
+#define RKISP1_CIF_MI_DMA_STATUS		(RKISP1_CIF_MI_BASE + 0x00000128)
+#define RKISP1_CIF_MI_PIXEL_COUNT		(RKISP1_CIF_MI_BASE + 0x0000012C)
+#define RKISP1_CIF_MI_MP_Y_BASE_AD_INIT2	(RKISP1_CIF_MI_BASE + 0x00000130)
+#define RKISP1_CIF_MI_MP_CB_BASE_AD_INIT2	(RKISP1_CIF_MI_BASE + 0x00000134)
+#define RKISP1_CIF_MI_MP_CR_BASE_AD_INIT2	(RKISP1_CIF_MI_BASE + 0x00000138)
+#define RKISP1_CIF_MI_SP_Y_BASE_AD_INIT2	(RKISP1_CIF_MI_BASE + 0x0000013C)
+#define RKISP1_CIF_MI_SP_CB_BASE_AD_INIT2	(RKISP1_CIF_MI_BASE + 0x00000140)
+#define RKISP1_CIF_MI_SP_CR_BASE_AD_INIT2	(RKISP1_CIF_MI_BASE + 0x00000144)
+#define RKISP1_CIF_MI_XTD_FORMAT_CTRL		(RKISP1_CIF_MI_BASE + 0x00000148)
+
+#define RKISP1_CIF_SMIA_BASE			0x00001A00
+#define RKISP1_CIF_SMIA_CTRL			(RKISP1_CIF_SMIA_BASE + 0x00000000)
+#define RKISP1_CIF_SMIA_STATUS			(RKISP1_CIF_SMIA_BASE + 0x00000004)
+#define RKISP1_CIF_SMIA_IMSC			(RKISP1_CIF_SMIA_BASE + 0x00000008)
+#define RKISP1_CIF_SMIA_RIS			(RKISP1_CIF_SMIA_BASE + 0x0000000C)
+#define RKISP1_CIF_SMIA_MIS			(RKISP1_CIF_SMIA_BASE + 0x00000010)
+#define RKISP1_CIF_SMIA_ICR			(RKISP1_CIF_SMIA_BASE + 0x00000014)
+#define RKISP1_CIF_SMIA_ISR			(RKISP1_CIF_SMIA_BASE + 0x00000018)
+#define RKISP1_CIF_SMIA_DATA_FORMAT_SEL		(RKISP1_CIF_SMIA_BASE + 0x0000001C)
+#define RKISP1_CIF_SMIA_SOF_EMB_DATA_LINES	(RKISP1_CIF_SMIA_BASE + 0x00000020)
+#define RKISP1_CIF_SMIA_EMB_HSTART		(RKISP1_CIF_SMIA_BASE + 0x00000024)
+#define RKISP1_CIF_SMIA_EMB_HSIZE		(RKISP1_CIF_SMIA_BASE + 0x00000028)
+#define RKISP1_CIF_SMIA_EMB_VSTART		(RKISP1_CIF_SMIA_BASE + 0x0000002c)
+#define RKISP1_CIF_SMIA_NUM_LINES		(RKISP1_CIF_SMIA_BASE + 0x00000030)
+#define RKISP1_CIF_SMIA_EMB_DATA_FIFO		(RKISP1_CIF_SMIA_BASE + 0x00000034)
+#define RKISP1_CIF_SMIA_EMB_DATA_WATERMARK	(RKISP1_CIF_SMIA_BASE + 0x00000038)
+
+#define RKISP1_CIF_MIPI_BASE			0x00001C00
+#define RKISP1_CIF_MIPI_CTRL			(RKISP1_CIF_MIPI_BASE + 0x00000000)
+#define RKISP1_CIF_MIPI_STATUS			(RKISP1_CIF_MIPI_BASE + 0x00000004)
+#define RKISP1_CIF_MIPI_IMSC			(RKISP1_CIF_MIPI_BASE + 0x00000008)
+#define RKISP1_CIF_MIPI_RIS			(RKISP1_CIF_MIPI_BASE + 0x0000000C)
+#define RKISP1_CIF_MIPI_MIS			(RKISP1_CIF_MIPI_BASE + 0x00000010)
+#define RKISP1_CIF_MIPI_ICR			(RKISP1_CIF_MIPI_BASE + 0x00000014)
+#define RKISP1_CIF_MIPI_ISR			(RKISP1_CIF_MIPI_BASE + 0x00000018)
+#define RKISP1_CIF_MIPI_CUR_DATA_ID		(RKISP1_CIF_MIPI_BASE + 0x0000001C)
+#define RKISP1_CIF_MIPI_IMG_DATA_SEL		(RKISP1_CIF_MIPI_BASE + 0x00000020)
+#define RKISP1_CIF_MIPI_ADD_DATA_SEL_1		(RKISP1_CIF_MIPI_BASE + 0x00000024)
+#define RKISP1_CIF_MIPI_ADD_DATA_SEL_2		(RKISP1_CIF_MIPI_BASE + 0x00000028)
+#define RKISP1_CIF_MIPI_ADD_DATA_SEL_3		(RKISP1_CIF_MIPI_BASE + 0x0000002C)
+#define RKISP1_CIF_MIPI_ADD_DATA_SEL_4		(RKISP1_CIF_MIPI_BASE + 0x00000030)
+#define RKISP1_CIF_MIPI_ADD_DATA_FIFO		(RKISP1_CIF_MIPI_BASE + 0x00000034)
+#define RKISP1_CIF_MIPI_FIFO_FILL_LEVEL		(RKISP1_CIF_MIPI_BASE + 0x00000038)
+#define RKISP1_CIF_MIPI_COMPRESSED_MODE		(RKISP1_CIF_MIPI_BASE + 0x0000003C)
+#define RKISP1_CIF_MIPI_FRAME			(RKISP1_CIF_MIPI_BASE + 0x00000040)
+#define RKISP1_CIF_MIPI_GEN_SHORT_DT		(RKISP1_CIF_MIPI_BASE + 0x00000044)
+#define RKISP1_CIF_MIPI_GEN_SHORT_8_9		(RKISP1_CIF_MIPI_BASE + 0x00000048)
+#define RKISP1_CIF_MIPI_GEN_SHORT_A_B		(RKISP1_CIF_MIPI_BASE + 0x0000004C)
+#define RKISP1_CIF_MIPI_GEN_SHORT_C_D		(RKISP1_CIF_MIPI_BASE + 0x00000050)
+#define RKISP1_CIF_MIPI_GEN_SHORT_E_F		(RKISP1_CIF_MIPI_BASE + 0x00000054)
+
+#define RKISP1_CIF_ISP_AFM_BASE			0x00002000
+#define RKISP1_CIF_ISP_AFM_CTRL			(RKISP1_CIF_ISP_AFM_BASE + 0x00000000)
+#define RKISP1_CIF_ISP_AFM_LT_A			(RKISP1_CIF_ISP_AFM_BASE + 0x00000004)
+#define RKISP1_CIF_ISP_AFM_RB_A			(RKISP1_CIF_ISP_AFM_BASE + 0x00000008)
+#define RKISP1_CIF_ISP_AFM_LT_B			(RKISP1_CIF_ISP_AFM_BASE + 0x0000000C)
+#define RKISP1_CIF_ISP_AFM_RB_B			(RKISP1_CIF_ISP_AFM_BASE + 0x00000010)
+#define RKISP1_CIF_ISP_AFM_LT_C			(RKISP1_CIF_ISP_AFM_BASE + 0x00000014)
+#define RKISP1_CIF_ISP_AFM_RB_C			(RKISP1_CIF_ISP_AFM_BASE + 0x00000018)
+#define RKISP1_CIF_ISP_AFM_THRES		(RKISP1_CIF_ISP_AFM_BASE + 0x0000001C)
+#define RKISP1_CIF_ISP_AFM_VAR_SHIFT		(RKISP1_CIF_ISP_AFM_BASE + 0x00000020)
+#define RKISP1_CIF_ISP_AFM_SUM_A		(RKISP1_CIF_ISP_AFM_BASE + 0x00000024)
+#define RKISP1_CIF_ISP_AFM_SUM_B		(RKISP1_CIF_ISP_AFM_BASE + 0x00000028)
+#define RKISP1_CIF_ISP_AFM_SUM_C		(RKISP1_CIF_ISP_AFM_BASE + 0x0000002C)
+#define RKISP1_CIF_ISP_AFM_LUM_A		(RKISP1_CIF_ISP_AFM_BASE + 0x00000030)
+#define RKISP1_CIF_ISP_AFM_LUM_B		(RKISP1_CIF_ISP_AFM_BASE + 0x00000034)
+#define RKISP1_CIF_ISP_AFM_LUM_C		(RKISP1_CIF_ISP_AFM_BASE + 0x00000038)
+
+#define RKISP1_CIF_ISP_LSC_BASE			0x00002200
+#define RKISP1_CIF_ISP_LSC_CTRL			(RKISP1_CIF_ISP_LSC_BASE + 0x00000000)
+#define RKISP1_CIF_ISP_LSC_R_TABLE_ADDR		(RKISP1_CIF_ISP_LSC_BASE + 0x00000004)
+#define RKISP1_CIF_ISP_LSC_GR_TABLE_ADDR	(RKISP1_CIF_ISP_LSC_BASE + 0x00000008)
+#define RKISP1_CIF_ISP_LSC_B_TABLE_ADDR		(RKISP1_CIF_ISP_LSC_BASE + 0x0000000C)
+#define RKISP1_CIF_ISP_LSC_GB_TABLE_ADDR	(RKISP1_CIF_ISP_LSC_BASE + 0x00000010)
+#define RKISP1_CIF_ISP_LSC_R_TABLE_DATA		(RKISP1_CIF_ISP_LSC_BASE + 0x00000014)
+#define RKISP1_CIF_ISP_LSC_GR_TABLE_DATA	(RKISP1_CIF_ISP_LSC_BASE + 0x00000018)
+#define RKISP1_CIF_ISP_LSC_B_TABLE_DATA		(RKISP1_CIF_ISP_LSC_BASE + 0x0000001C)
+#define RKISP1_CIF_ISP_LSC_GB_TABLE_DATA	(RKISP1_CIF_ISP_LSC_BASE + 0x00000020)
+#define RKISP1_CIF_ISP_LSC_XGRAD_01		(RKISP1_CIF_ISP_LSC_BASE + 0x00000024)
+#define RKISP1_CIF_ISP_LSC_XGRAD_23		(RKISP1_CIF_ISP_LSC_BASE + 0x00000028)
+#define RKISP1_CIF_ISP_LSC_XGRAD_45		(RKISP1_CIF_ISP_LSC_BASE + 0x0000002C)
+#define RKISP1_CIF_ISP_LSC_XGRAD_67		(RKISP1_CIF_ISP_LSC_BASE + 0x00000030)
+#define RKISP1_CIF_ISP_LSC_YGRAD_01		(RKISP1_CIF_ISP_LSC_BASE + 0x00000034)
+#define RKISP1_CIF_ISP_LSC_YGRAD_23		(RKISP1_CIF_ISP_LSC_BASE + 0x00000038)
+#define RKISP1_CIF_ISP_LSC_YGRAD_45		(RKISP1_CIF_ISP_LSC_BASE + 0x0000003C)
+#define RKISP1_CIF_ISP_LSC_YGRAD_67		(RKISP1_CIF_ISP_LSC_BASE + 0x00000040)
+#define RKISP1_CIF_ISP_LSC_XSIZE_01		(RKISP1_CIF_ISP_LSC_BASE + 0x00000044)
+#define RKISP1_CIF_ISP_LSC_XSIZE_23		(RKISP1_CIF_ISP_LSC_BASE + 0x00000048)
+#define RKISP1_CIF_ISP_LSC_XSIZE_45		(RKISP1_CIF_ISP_LSC_BASE + 0x0000004C)
+#define RKISP1_CIF_ISP_LSC_XSIZE_67		(RKISP1_CIF_ISP_LSC_BASE + 0x00000050)
+#define RKISP1_CIF_ISP_LSC_YSIZE_01		(RKISP1_CIF_ISP_LSC_BASE + 0x00000054)
+#define RKISP1_CIF_ISP_LSC_YSIZE_23		(RKISP1_CIF_ISP_LSC_BASE + 0x00000058)
+#define RKISP1_CIF_ISP_LSC_YSIZE_45		(RKISP1_CIF_ISP_LSC_BASE + 0x0000005C)
+#define RKISP1_CIF_ISP_LSC_YSIZE_67		(RKISP1_CIF_ISP_LSC_BASE + 0x00000060)
+#define RKISP1_CIF_ISP_LSC_TABLE_SEL		(RKISP1_CIF_ISP_LSC_BASE + 0x00000064)
+#define RKISP1_CIF_ISP_LSC_STATUS		(RKISP1_CIF_ISP_LSC_BASE + 0x00000068)
+
+#define RKISP1_CIF_ISP_IS_BASE			0x00002300
+#define RKISP1_CIF_ISP_IS_CTRL			(RKISP1_CIF_ISP_IS_BASE + 0x00000000)
+#define RKISP1_CIF_ISP_IS_RECENTER		(RKISP1_CIF_ISP_IS_BASE + 0x00000004)
+#define RKISP1_CIF_ISP_IS_H_OFFS		(RKISP1_CIF_ISP_IS_BASE + 0x00000008)
+#define RKISP1_CIF_ISP_IS_V_OFFS		(RKISP1_CIF_ISP_IS_BASE + 0x0000000C)
+#define RKISP1_CIF_ISP_IS_H_SIZE		(RKISP1_CIF_ISP_IS_BASE + 0x00000010)
+#define RKISP1_CIF_ISP_IS_V_SIZE		(RKISP1_CIF_ISP_IS_BASE + 0x00000014)
+#define RKISP1_CIF_ISP_IS_MAX_DX		(RKISP1_CIF_ISP_IS_BASE + 0x00000018)
+#define RKISP1_CIF_ISP_IS_MAX_DY		(RKISP1_CIF_ISP_IS_BASE + 0x0000001C)
+#define RKISP1_CIF_ISP_IS_DISPLACE		(RKISP1_CIF_ISP_IS_BASE + 0x00000020)
+#define RKISP1_CIF_ISP_IS_H_OFFS_SHD		(RKISP1_CIF_ISP_IS_BASE + 0x00000024)
+#define RKISP1_CIF_ISP_IS_V_OFFS_SHD		(RKISP1_CIF_ISP_IS_BASE + 0x00000028)
+#define RKISP1_CIF_ISP_IS_H_SIZE_SHD		(RKISP1_CIF_ISP_IS_BASE + 0x0000002C)
+#define RKISP1_CIF_ISP_IS_V_SIZE_SHD		(RKISP1_CIF_ISP_IS_BASE + 0x00000030)
+
+#define RKISP1_CIF_ISP_HIST_BASE		0x00002400
+
+#define RKISP1_CIF_ISP_HIST_PROP		(RKISP1_CIF_ISP_HIST_BASE + 0x00000000)
+#define RKISP1_CIF_ISP_HIST_H_OFFS		(RKISP1_CIF_ISP_HIST_BASE + 0x00000004)
+#define RKISP1_CIF_ISP_HIST_V_OFFS		(RKISP1_CIF_ISP_HIST_BASE + 0x00000008)
+#define RKISP1_CIF_ISP_HIST_H_SIZE		(RKISP1_CIF_ISP_HIST_BASE + 0x0000000C)
+#define RKISP1_CIF_ISP_HIST_V_SIZE		(RKISP1_CIF_ISP_HIST_BASE + 0x00000010)
+#define RKISP1_CIF_ISP_HIST_BIN_0		(RKISP1_CIF_ISP_HIST_BASE + 0x00000014)
+#define RKISP1_CIF_ISP_HIST_BIN_1		(RKISP1_CIF_ISP_HIST_BASE + 0x00000018)
+#define RKISP1_CIF_ISP_HIST_BIN_2		(RKISP1_CIF_ISP_HIST_BASE + 0x0000001C)
+#define RKISP1_CIF_ISP_HIST_BIN_3		(RKISP1_CIF_ISP_HIST_BASE + 0x00000020)
+#define RKISP1_CIF_ISP_HIST_BIN_4		(RKISP1_CIF_ISP_HIST_BASE + 0x00000024)
+#define RKISP1_CIF_ISP_HIST_BIN_5		(RKISP1_CIF_ISP_HIST_BASE + 0x00000028)
+#define RKISP1_CIF_ISP_HIST_BIN_6		(RKISP1_CIF_ISP_HIST_BASE + 0x0000002C)
+#define RKISP1_CIF_ISP_HIST_BIN_7		(RKISP1_CIF_ISP_HIST_BASE + 0x00000030)
+#define RKISP1_CIF_ISP_HIST_BIN_8		(RKISP1_CIF_ISP_HIST_BASE + 0x00000034)
+#define RKISP1_CIF_ISP_HIST_BIN_9		(RKISP1_CIF_ISP_HIST_BASE + 0x00000038)
+#define RKISP1_CIF_ISP_HIST_BIN_10		(RKISP1_CIF_ISP_HIST_BASE + 0x0000003C)
+#define RKISP1_CIF_ISP_HIST_BIN_11		(RKISP1_CIF_ISP_HIST_BASE + 0x00000040)
+#define RKISP1_CIF_ISP_HIST_BIN_12		(RKISP1_CIF_ISP_HIST_BASE + 0x00000044)
+#define RKISP1_CIF_ISP_HIST_BIN_13		(RKISP1_CIF_ISP_HIST_BASE + 0x00000048)
+#define RKISP1_CIF_ISP_HIST_BIN_14		(RKISP1_CIF_ISP_HIST_BASE + 0x0000004C)
+#define RKISP1_CIF_ISP_HIST_BIN_15		(RKISP1_CIF_ISP_HIST_BASE + 0x00000050)
+#define RKISP1_CIF_ISP_HIST_WEIGHT_00TO30	(RKISP1_CIF_ISP_HIST_BASE + 0x00000054)
+#define RKISP1_CIF_ISP_HIST_WEIGHT_40TO21	(RKISP1_CIF_ISP_HIST_BASE + 0x00000058)
+#define RKISP1_CIF_ISP_HIST_WEIGHT_31TO12	(RKISP1_CIF_ISP_HIST_BASE + 0x0000005C)
+#define RKISP1_CIF_ISP_HIST_WEIGHT_22TO03	(RKISP1_CIF_ISP_HIST_BASE + 0x00000060)
+#define RKISP1_CIF_ISP_HIST_WEIGHT_13TO43	(RKISP1_CIF_ISP_HIST_BASE + 0x00000064)
+#define RKISP1_CIF_ISP_HIST_WEIGHT_04TO34	(RKISP1_CIF_ISP_HIST_BASE + 0x00000068)
+#define RKISP1_CIF_ISP_HIST_WEIGHT_44		(RKISP1_CIF_ISP_HIST_BASE + 0x0000006C)
+
+#define RKISP1_CIF_ISP_FILT_BASE		0x00002500
+#define RKISP1_CIF_ISP_FILT_MODE		(RKISP1_CIF_ISP_FILT_BASE + 0x00000000)
+#define RKISP1_CIF_ISP_FILT_THRESH_BL0		(RKISP1_CIF_ISP_FILT_BASE + 0x00000028)
+#define RKISP1_CIF_ISP_FILT_THRESH_BL1		(RKISP1_CIF_ISP_FILT_BASE + 0x0000002c)
+#define RKISP1_CIF_ISP_FILT_THRESH_SH0		(RKISP1_CIF_ISP_FILT_BASE + 0x00000030)
+#define RKISP1_CIF_ISP_FILT_THRESH_SH1		(RKISP1_CIF_ISP_FILT_BASE + 0x00000034)
+#define RKISP1_CIF_ISP_FILT_LUM_WEIGHT		(RKISP1_CIF_ISP_FILT_BASE + 0x00000038)
+#define RKISP1_CIF_ISP_FILT_FAC_SH1		(RKISP1_CIF_ISP_FILT_BASE + 0x0000003c)
+#define RKISP1_CIF_ISP_FILT_FAC_SH0		(RKISP1_CIF_ISP_FILT_BASE + 0x00000040)
+#define RKISP1_CIF_ISP_FILT_FAC_MID		(RKISP1_CIF_ISP_FILT_BASE + 0x00000044)
+#define RKISP1_CIF_ISP_FILT_FAC_BL0		(RKISP1_CIF_ISP_FILT_BASE + 0x00000048)
+#define RKISP1_CIF_ISP_FILT_FAC_BL1		(RKISP1_CIF_ISP_FILT_BASE + 0x0000004C)
+
+#define RKISP1_CIF_ISP_CAC_BASE			0x00002580
+#define RKISP1_CIF_ISP_CAC_CTRL			(RKISP1_CIF_ISP_CAC_BASE + 0x00000000)
+#define RKISP1_CIF_ISP_CAC_COUNT_START		(RKISP1_CIF_ISP_CAC_BASE + 0x00000004)
+#define RKISP1_CIF_ISP_CAC_A			(RKISP1_CIF_ISP_CAC_BASE + 0x00000008)
+#define RKISP1_CIF_ISP_CAC_B			(RKISP1_CIF_ISP_CAC_BASE + 0x0000000C)
+#define RKISP1_CIF_ISP_CAC_C			(RKISP1_CIF_ISP_CAC_BASE + 0x00000010)
+#define RKISP1_CIF_ISP_X_NORM			(RKISP1_CIF_ISP_CAC_BASE + 0x00000014)
+#define RKISP1_CIF_ISP_Y_NORM			(RKISP1_CIF_ISP_CAC_BASE + 0x00000018)
+
+#define RKISP1_CIF_ISP_EXP_BASE			0x00002600
+#define RKISP1_CIF_ISP_EXP_CTRL			(RKISP1_CIF_ISP_EXP_BASE + 0x00000000)
+#define RKISP1_CIF_ISP_EXP_H_OFFSET		(RKISP1_CIF_ISP_EXP_BASE + 0x00000004)
+#define RKISP1_CIF_ISP_EXP_V_OFFSET		(RKISP1_CIF_ISP_EXP_BASE + 0x00000008)
+#define RKISP1_CIF_ISP_EXP_H_SIZE		(RKISP1_CIF_ISP_EXP_BASE + 0x0000000C)
+#define RKISP1_CIF_ISP_EXP_V_SIZE		(RKISP1_CIF_ISP_EXP_BASE + 0x00000010)
+#define RKISP1_CIF_ISP_EXP_MEAN_00		(RKISP1_CIF_ISP_EXP_BASE + 0x00000014)
+#define RKISP1_CIF_ISP_EXP_MEAN_10		(RKISP1_CIF_ISP_EXP_BASE + 0x00000018)
+#define RKISP1_CIF_ISP_EXP_MEAN_20		(RKISP1_CIF_ISP_EXP_BASE + 0x0000001c)
+#define RKISP1_CIF_ISP_EXP_MEAN_30		(RKISP1_CIF_ISP_EXP_BASE + 0x00000020)
+#define RKISP1_CIF_ISP_EXP_MEAN_40		(RKISP1_CIF_ISP_EXP_BASE + 0x00000024)
+#define RKISP1_CIF_ISP_EXP_MEAN_01		(RKISP1_CIF_ISP_EXP_BASE + 0x00000028)
+#define RKISP1_CIF_ISP_EXP_MEAN_11		(RKISP1_CIF_ISP_EXP_BASE + 0x0000002c)
+#define RKISP1_CIF_ISP_EXP_MEAN_21		(RKISP1_CIF_ISP_EXP_BASE + 0x00000030)
+#define RKISP1_CIF_ISP_EXP_MEAN_31		(RKISP1_CIF_ISP_EXP_BASE + 0x00000034)
+#define RKISP1_CIF_ISP_EXP_MEAN_41		(RKISP1_CIF_ISP_EXP_BASE + 0x00000038)
+#define RKISP1_CIF_ISP_EXP_MEAN_02		(RKISP1_CIF_ISP_EXP_BASE + 0x0000003c)
+#define RKISP1_CIF_ISP_EXP_MEAN_12		(RKISP1_CIF_ISP_EXP_BASE + 0x00000040)
+#define RKISP1_CIF_ISP_EXP_MEAN_22		(RKISP1_CIF_ISP_EXP_BASE + 0x00000044)
+#define RKISP1_CIF_ISP_EXP_MEAN_32		(RKISP1_CIF_ISP_EXP_BASE + 0x00000048)
+#define RKISP1_CIF_ISP_EXP_MEAN_42		(RKISP1_CIF_ISP_EXP_BASE + 0x0000004c)
+#define RKISP1_CIF_ISP_EXP_MEAN_03		(RKISP1_CIF_ISP_EXP_BASE + 0x00000050)
+#define RKISP1_CIF_ISP_EXP_MEAN_13		(RKISP1_CIF_ISP_EXP_BASE + 0x00000054)
+#define RKISP1_CIF_ISP_EXP_MEAN_23		(RKISP1_CIF_ISP_EXP_BASE + 0x00000058)
+#define RKISP1_CIF_ISP_EXP_MEAN_33		(RKISP1_CIF_ISP_EXP_BASE + 0x0000005c)
+#define RKISP1_CIF_ISP_EXP_MEAN_43		(RKISP1_CIF_ISP_EXP_BASE + 0x00000060)
+#define RKISP1_CIF_ISP_EXP_MEAN_04		(RKISP1_CIF_ISP_EXP_BASE + 0x00000064)
+#define RKISP1_CIF_ISP_EXP_MEAN_14		(RKISP1_CIF_ISP_EXP_BASE + 0x00000068)
+#define RKISP1_CIF_ISP_EXP_MEAN_24		(RKISP1_CIF_ISP_EXP_BASE + 0x0000006c)
+#define RKISP1_CIF_ISP_EXP_MEAN_34		(RKISP1_CIF_ISP_EXP_BASE + 0x00000070)
+#define RKISP1_CIF_ISP_EXP_MEAN_44		(RKISP1_CIF_ISP_EXP_BASE + 0x00000074)
+
+#define RKISP1_CIF_ISP_BLS_BASE			0x00002700
+#define RKISP1_CIF_ISP_BLS_CTRL			(RKISP1_CIF_ISP_BLS_BASE + 0x00000000)
+#define RKISP1_CIF_ISP_BLS_SAMPLES		(RKISP1_CIF_ISP_BLS_BASE + 0x00000004)
+#define RKISP1_CIF_ISP_BLS_H1_START		(RKISP1_CIF_ISP_BLS_BASE + 0x00000008)
+#define RKISP1_CIF_ISP_BLS_H1_STOP		(RKISP1_CIF_ISP_BLS_BASE + 0x0000000c)
+#define RKISP1_CIF_ISP_BLS_V1_START		(RKISP1_CIF_ISP_BLS_BASE + 0x00000010)
+#define RKISP1_CIF_ISP_BLS_V1_STOP		(RKISP1_CIF_ISP_BLS_BASE + 0x00000014)
+#define RKISP1_CIF_ISP_BLS_H2_START		(RKISP1_CIF_ISP_BLS_BASE + 0x00000018)
+#define RKISP1_CIF_ISP_BLS_H2_STOP		(RKISP1_CIF_ISP_BLS_BASE + 0x0000001c)
+#define RKISP1_CIF_ISP_BLS_V2_START		(RKISP1_CIF_ISP_BLS_BASE + 0x00000020)
+#define RKISP1_CIF_ISP_BLS_V2_STOP		(RKISP1_CIF_ISP_BLS_BASE + 0x00000024)
+#define RKISP1_CIF_ISP_BLS_A_FIXED		(RKISP1_CIF_ISP_BLS_BASE + 0x00000028)
+#define RKISP1_CIF_ISP_BLS_B_FIXED		(RKISP1_CIF_ISP_BLS_BASE + 0x0000002c)
+#define RKISP1_CIF_ISP_BLS_C_FIXED		(RKISP1_CIF_ISP_BLS_BASE + 0x00000030)
+#define RKISP1_CIF_ISP_BLS_D_FIXED		(RKISP1_CIF_ISP_BLS_BASE + 0x00000034)
+#define RKISP1_CIF_ISP_BLS_A_MEASURED		(RKISP1_CIF_ISP_BLS_BASE + 0x00000038)
+#define RKISP1_CIF_ISP_BLS_B_MEASURED		(RKISP1_CIF_ISP_BLS_BASE + 0x0000003c)
+#define RKISP1_CIF_ISP_BLS_C_MEASURED		(RKISP1_CIF_ISP_BLS_BASE + 0x00000040)
+#define RKISP1_CIF_ISP_BLS_D_MEASURED		(RKISP1_CIF_ISP_BLS_BASE + 0x00000044)
+
+#define RKISP1_CIF_ISP_DPF_BASE			0x00002800
+#define RKISP1_CIF_ISP_DPF_MODE			(RKISP1_CIF_ISP_DPF_BASE + 0x00000000)
+#define RKISP1_CIF_ISP_DPF_STRENGTH_R		(RKISP1_CIF_ISP_DPF_BASE + 0x00000004)
+#define RKISP1_CIF_ISP_DPF_STRENGTH_G		(RKISP1_CIF_ISP_DPF_BASE + 0x00000008)
+#define RKISP1_CIF_ISP_DPF_STRENGTH_B		(RKISP1_CIF_ISP_DPF_BASE + 0x0000000C)
+#define RKISP1_CIF_ISP_DPF_S_WEIGHT_G_1_4	(RKISP1_CIF_ISP_DPF_BASE + 0x00000010)
+#define RKISP1_CIF_ISP_DPF_S_WEIGHT_G_5_6	(RKISP1_CIF_ISP_DPF_BASE + 0x00000014)
+#define RKISP1_CIF_ISP_DPF_S_WEIGHT_RB_1_4	(RKISP1_CIF_ISP_DPF_BASE + 0x00000018)
+#define RKISP1_CIF_ISP_DPF_S_WEIGHT_RB_5_6	(RKISP1_CIF_ISP_DPF_BASE + 0x0000001C)
+#define RKISP1_CIF_ISP_DPF_NULL_COEFF_0		(RKISP1_CIF_ISP_DPF_BASE + 0x00000020)
+#define RKISP1_CIF_ISP_DPF_NULL_COEFF_1		(RKISP1_CIF_ISP_DPF_BASE + 0x00000024)
+#define RKISP1_CIF_ISP_DPF_NULL_COEFF_2		(RKISP1_CIF_ISP_DPF_BASE + 0x00000028)
+#define RKISP1_CIF_ISP_DPF_NULL_COEFF_3		(RKISP1_CIF_ISP_DPF_BASE + 0x0000002C)
+#define RKISP1_CIF_ISP_DPF_NULL_COEFF_4		(RKISP1_CIF_ISP_DPF_BASE + 0x00000030)
+#define RKISP1_CIF_ISP_DPF_NULL_COEFF_5		(RKISP1_CIF_ISP_DPF_BASE + 0x00000034)
+#define RKISP1_CIF_ISP_DPF_NULL_COEFF_6		(RKISP1_CIF_ISP_DPF_BASE + 0x00000038)
+#define RKISP1_CIF_ISP_DPF_NULL_COEFF_7		(RKISP1_CIF_ISP_DPF_BASE + 0x0000003C)
+#define RKISP1_CIF_ISP_DPF_NULL_COEFF_8		(RKISP1_CIF_ISP_DPF_BASE + 0x00000040)
+#define RKISP1_CIF_ISP_DPF_NULL_COEFF_9		(RKISP1_CIF_ISP_DPF_BASE + 0x00000044)
+#define RKISP1_CIF_ISP_DPF_NULL_COEFF_10	(RKISP1_CIF_ISP_DPF_BASE + 0x00000048)
+#define RKISP1_CIF_ISP_DPF_NULL_COEFF_11	(RKISP1_CIF_ISP_DPF_BASE + 0x0000004C)
+#define RKISP1_CIF_ISP_DPF_NULL_COEFF_12	(RKISP1_CIF_ISP_DPF_BASE + 0x00000050)
+#define RKISP1_CIF_ISP_DPF_NULL_COEFF_13	(RKISP1_CIF_ISP_DPF_BASE + 0x00000054)
+#define RKISP1_CIF_ISP_DPF_NULL_COEFF_14	(RKISP1_CIF_ISP_DPF_BASE + 0x00000058)
+#define RKISP1_CIF_ISP_DPF_NULL_COEFF_15	(RKISP1_CIF_ISP_DPF_BASE + 0x0000005C)
+#define RKISP1_CIF_ISP_DPF_NULL_COEFF_16	(RKISP1_CIF_ISP_DPF_BASE + 0x00000060)
+#define RKISP1_CIF_ISP_DPF_NF_GAIN_R		(RKISP1_CIF_ISP_DPF_BASE + 0x00000064)
+#define RKISP1_CIF_ISP_DPF_NF_GAIN_GR		(RKISP1_CIF_ISP_DPF_BASE + 0x00000068)
+#define RKISP1_CIF_ISP_DPF_NF_GAIN_GB		(RKISP1_CIF_ISP_DPF_BASE + 0x0000006C)
+#define RKISP1_CIF_ISP_DPF_NF_GAIN_B		(RKISP1_CIF_ISP_DPF_BASE + 0x00000070)
+
+#define RKISP1_CIF_ISP_DPCC_BASE		0x00002900
+#define RKISP1_CIF_ISP_DPCC_MODE		(RKISP1_CIF_ISP_DPCC_BASE + 0x00000000)
+#define RKISP1_CIF_ISP_DPCC_OUTPUT_MODE		(RKISP1_CIF_ISP_DPCC_BASE + 0x00000004)
+#define RKISP1_CIF_ISP_DPCC_SET_USE		(RKISP1_CIF_ISP_DPCC_BASE + 0x00000008)
+#define RKISP1_CIF_ISP_DPCC_METHODS_SET_1	(RKISP1_CIF_ISP_DPCC_BASE + 0x0000000C)
+#define RKISP1_CIF_ISP_DPCC_METHODS_SET_2	(RKISP1_CIF_ISP_DPCC_BASE + 0x00000010)
+#define RKISP1_CIF_ISP_DPCC_METHODS_SET_3	(RKISP1_CIF_ISP_DPCC_BASE + 0x00000014)
+#define RKISP1_CIF_ISP_DPCC_LINE_THRESH_1	(RKISP1_CIF_ISP_DPCC_BASE + 0x00000018)
+#define RKISP1_CIF_ISP_DPCC_LINE_MAD_FAC_1	(RKISP1_CIF_ISP_DPCC_BASE + 0x0000001C)
+#define RKISP1_CIF_ISP_DPCC_PG_FAC_1		(RKISP1_CIF_ISP_DPCC_BASE + 0x00000020)
+#define RKISP1_CIF_ISP_DPCC_RND_THRESH_1	(RKISP1_CIF_ISP_DPCC_BASE + 0x00000024)
+#define RKISP1_CIF_ISP_DPCC_RG_FAC_1		(RKISP1_CIF_ISP_DPCC_BASE + 0x00000028)
+#define RKISP1_CIF_ISP_DPCC_LINE_THRESH_2	(RKISP1_CIF_ISP_DPCC_BASE + 0x0000002C)
+#define RKISP1_CIF_ISP_DPCC_LINE_MAD_FAC_2	(RKISP1_CIF_ISP_DPCC_BASE + 0x00000030)
+#define RKISP1_CIF_ISP_DPCC_PG_FAC_2		(RKISP1_CIF_ISP_DPCC_BASE + 0x00000034)
+#define RKISP1_CIF_ISP_DPCC_RND_THRESH_2	(RKISP1_CIF_ISP_DPCC_BASE + 0x00000038)
+#define RKISP1_CIF_ISP_DPCC_RG_FAC_2		(RKISP1_CIF_ISP_DPCC_BASE + 0x0000003C)
+#define RKISP1_CIF_ISP_DPCC_LINE_THRESH_3	(RKISP1_CIF_ISP_DPCC_BASE + 0x00000040)
+#define RKISP1_CIF_ISP_DPCC_LINE_MAD_FAC_3	(RKISP1_CIF_ISP_DPCC_BASE + 0x00000044)
+#define RKISP1_CIF_ISP_DPCC_PG_FAC_3		(RKISP1_CIF_ISP_DPCC_BASE + 0x00000048)
+#define RKISP1_CIF_ISP_DPCC_RND_THRESH_3	(RKISP1_CIF_ISP_DPCC_BASE + 0x0000004C)
+#define RKISP1_CIF_ISP_DPCC_RG_FAC_3		(RKISP1_CIF_ISP_DPCC_BASE + 0x00000050)
+#define RKISP1_CIF_ISP_DPCC_RO_LIMITS		(RKISP1_CIF_ISP_DPCC_BASE + 0x00000054)
+#define RKISP1_CIF_ISP_DPCC_RND_OFFS		(RKISP1_CIF_ISP_DPCC_BASE + 0x00000058)
+#define RKISP1_CIF_ISP_DPCC_BPT_CTRL		(RKISP1_CIF_ISP_DPCC_BASE + 0x0000005C)
+#define RKISP1_CIF_ISP_DPCC_BPT_NUMBER		(RKISP1_CIF_ISP_DPCC_BASE + 0x00000060)
+#define RKISP1_CIF_ISP_DPCC_BPT_ADDR		(RKISP1_CIF_ISP_DPCC_BASE + 0x00000064)
+#define RKISP1_CIF_ISP_DPCC_BPT_DATA		(RKISP1_CIF_ISP_DPCC_BASE + 0x00000068)
+
+#define RKISP1_CIF_ISP_WDR_BASE			0x00002A00
+#define RKISP1_CIF_ISP_WDR_CTRL			(RKISP1_CIF_ISP_WDR_BASE + 0x00000000)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_1		(RKISP1_CIF_ISP_WDR_BASE + 0x00000004)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_2		(RKISP1_CIF_ISP_WDR_BASE + 0x00000008)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_3		(RKISP1_CIF_ISP_WDR_BASE + 0x0000000C)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_4		(RKISP1_CIF_ISP_WDR_BASE + 0x00000010)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_0	(RKISP1_CIF_ISP_WDR_BASE + 0x00000014)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_1	(RKISP1_CIF_ISP_WDR_BASE + 0x00000018)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_2	(RKISP1_CIF_ISP_WDR_BASE + 0x0000001C)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_3	(RKISP1_CIF_ISP_WDR_BASE + 0x00000020)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_4	(RKISP1_CIF_ISP_WDR_BASE + 0x00000024)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_5	(RKISP1_CIF_ISP_WDR_BASE + 0x00000028)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_6	(RKISP1_CIF_ISP_WDR_BASE + 0x0000002C)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_7	(RKISP1_CIF_ISP_WDR_BASE + 0x00000030)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_8	(RKISP1_CIF_ISP_WDR_BASE + 0x00000034)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_9	(RKISP1_CIF_ISP_WDR_BASE + 0x00000038)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_10	(RKISP1_CIF_ISP_WDR_BASE + 0x0000003C)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_11	(RKISP1_CIF_ISP_WDR_BASE + 0x00000040)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_12	(RKISP1_CIF_ISP_WDR_BASE + 0x00000044)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_13	(RKISP1_CIF_ISP_WDR_BASE + 0x00000048)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_14	(RKISP1_CIF_ISP_WDR_BASE + 0x0000004C)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_15	(RKISP1_CIF_ISP_WDR_BASE + 0x00000050)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_16	(RKISP1_CIF_ISP_WDR_BASE + 0x00000054)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_17	(RKISP1_CIF_ISP_WDR_BASE + 0x00000058)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_18	(RKISP1_CIF_ISP_WDR_BASE + 0x0000005C)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_19	(RKISP1_CIF_ISP_WDR_BASE + 0x00000060)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_20	(RKISP1_CIF_ISP_WDR_BASE + 0x00000064)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_21	(RKISP1_CIF_ISP_WDR_BASE + 0x00000068)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_22	(RKISP1_CIF_ISP_WDR_BASE + 0x0000006C)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_23	(RKISP1_CIF_ISP_WDR_BASE + 0x00000070)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_24	(RKISP1_CIF_ISP_WDR_BASE + 0x00000074)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_25	(RKISP1_CIF_ISP_WDR_BASE + 0x00000078)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_26	(RKISP1_CIF_ISP_WDR_BASE + 0x0000007C)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_27	(RKISP1_CIF_ISP_WDR_BASE + 0x00000080)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_28	(RKISP1_CIF_ISP_WDR_BASE + 0x00000084)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_29	(RKISP1_CIF_ISP_WDR_BASE + 0x00000088)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_30	(RKISP1_CIF_ISP_WDR_BASE + 0x0000008C)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_31	(RKISP1_CIF_ISP_WDR_BASE + 0x00000090)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_32	(RKISP1_CIF_ISP_WDR_BASE + 0x00000094)
+#define RKISP1_CIF_ISP_WDR_OFFSET		(RKISP1_CIF_ISP_WDR_BASE + 0x00000098)
+#define RKISP1_CIF_ISP_WDR_DELTAMIN		(RKISP1_CIF_ISP_WDR_BASE + 0x0000009C)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_1_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000A0)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_2_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000A4)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_3_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000A8)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_4_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000AC)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_0_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000B0)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_1_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000B4)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_2_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000B8)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_3_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000BC)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_4_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000C0)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_5_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000C4)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_6_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000C8)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_7_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000CC)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_8_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000D0)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_9_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000D4)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_10_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000D8)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_11_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000DC)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_12_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000E0)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_13_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000E4)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_14_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000E8)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_15_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000EC)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_16_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000F0)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_17_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000F4)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_18_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000F8)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_19_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000FC)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_20_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x00000100)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_21_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x00000104)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_22_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x00000108)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_23_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x0000010C)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_24_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x00000110)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_25_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x00000114)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_26_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x00000118)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_27_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x0000011C)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_28_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x00000120)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_29_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x00000124)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_30_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x00000128)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_31_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x0000012C)
+#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_32_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x00000130)
+
+#define RKISP1_CIF_ISP_VSM_BASE			0x00002F00
+#define RKISP1_CIF_ISP_VSM_MODE			(RKISP1_CIF_ISP_VSM_BASE + 0x00000000)
+#define RKISP1_CIF_ISP_VSM_H_OFFS		(RKISP1_CIF_ISP_VSM_BASE + 0x00000004)
+#define RKISP1_CIF_ISP_VSM_V_OFFS		(RKISP1_CIF_ISP_VSM_BASE + 0x00000008)
+#define RKISP1_CIF_ISP_VSM_H_SIZE		(RKISP1_CIF_ISP_VSM_BASE + 0x0000000C)
+#define RKISP1_CIF_ISP_VSM_V_SIZE		(RKISP1_CIF_ISP_VSM_BASE + 0x00000010)
+#define RKISP1_CIF_ISP_VSM_H_SEGMENTS		(RKISP1_CIF_ISP_VSM_BASE + 0x00000014)
+#define RKISP1_CIF_ISP_VSM_V_SEGMENTS		(RKISP1_CIF_ISP_VSM_BASE + 0x00000018)
+#define RKISP1_CIF_ISP_VSM_DELTA_H		(RKISP1_CIF_ISP_VSM_BASE + 0x0000001C)
+#define RKISP1_CIF_ISP_VSM_DELTA_V		(RKISP1_CIF_ISP_VSM_BASE + 0x00000020)
+
+#endif /* _RKISP1_REGS_H */
diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-resizer.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-resizer.c
new file mode 100644
index 000000000000..813670ed9577
--- /dev/null
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-resizer.c
@@ -0,0 +1,846 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Rockchip ISP1 Driver - V4l resizer device
+ *
+ * Copyright (C) 2019 Collabora, Ltd.
+ *
+ * Based on Rockchip ISP1 driver by Rockchip Electronics Co., Ltd.
+ * Copyright (C) 2017 Rockchip Electronics Co., Ltd.
+ */
+
+#include "rkisp1-common.h"
+
+#define RKISP1_RSZ_SP_DEV_NAME	RKISP1_DRIVER_NAME "_resizer_selfpath"
+#define RKISP1_RSZ_MP_DEV_NAME	RKISP1_DRIVER_NAME "_resizer_mainpath"
+
+#define RKISP1_DEF_FMT MEDIA_BUS_FMT_YUYV8_2X8
+#define RKISP1_DEF_PIXEL_ENC V4L2_PIXEL_ENC_YUV
+
+struct rkisp1_rsz_yuv_mbus_info {
+	u32 mbus_code;
+	u32 hdiv;
+	u32 vdiv;
+};
+
+static const struct rkisp1_rsz_yuv_mbus_info rkisp1_rsz_yuv_src_formats[] = {
+	{
+		.mbus_code	= MEDIA_BUS_FMT_YUYV8_2X8, /* YUV422 */
+		.hdiv		= 2,
+		.vdiv		= 1,
+	},
+	{
+		.mbus_code	= MEDIA_BUS_FMT_YUYV8_1_5X8, /* YUV420 */
+		.hdiv		= 2,
+		.vdiv		= 2,
+	},
+};
+
+static const struct rkisp1_rsz_yuv_mbus_info *rkisp1_rsz_get_yuv_mbus_info(u32 mbus_code)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(rkisp1_rsz_yuv_src_formats); i++) {
+		if (rkisp1_rsz_yuv_src_formats[i].mbus_code == mbus_code)
+			return &rkisp1_rsz_yuv_src_formats[i];
+	}
+
+	return NULL;
+}
+
+enum rkisp1_shadow_regs_when {
+	RKISP1_SHADOW_REGS_SYNC,
+	RKISP1_SHADOW_REGS_ASYNC,
+};
+
+struct rkisp1_rsz_config {
+	/* constrains */
+	const int max_rsz_width;
+	const int max_rsz_height;
+	const int min_rsz_width;
+	const int min_rsz_height;
+	/* registers */
+	struct {
+		u32 ctrl;
+		u32 ctrl_shd;
+		u32 scale_hy;
+		u32 scale_hcr;
+		u32 scale_hcb;
+		u32 scale_vy;
+		u32 scale_vc;
+		u32 scale_lut;
+		u32 scale_lut_addr;
+		u32 scale_hy_shd;
+		u32 scale_hcr_shd;
+		u32 scale_hcb_shd;
+		u32 scale_vy_shd;
+		u32 scale_vc_shd;
+		u32 phase_hy;
+		u32 phase_hc;
+		u32 phase_vy;
+		u32 phase_vc;
+		u32 phase_hy_shd;
+		u32 phase_hc_shd;
+		u32 phase_vy_shd;
+		u32 phase_vc_shd;
+	} rsz;
+	struct {
+		u32 ctrl;
+		u32 yuvmode_mask;
+		u32 rawmode_mask;
+		u32 h_offset;
+		u32 v_offset;
+		u32 h_size;
+		u32 v_size;
+	} dual_crop;
+};
+
+static const struct rkisp1_rsz_config rkisp1_rsz_config_mp = {
+	/* constraints */
+	.max_rsz_width = RKISP1_RSZ_MP_SRC_MAX_WIDTH,
+	.max_rsz_height = RKISP1_RSZ_MP_SRC_MAX_HEIGHT,
+	.min_rsz_width = RKISP1_RSZ_SRC_MIN_WIDTH,
+	.min_rsz_height = RKISP1_RSZ_SRC_MIN_HEIGHT,
+	/* registers */
+	.rsz = {
+		.ctrl =			RKISP1_CIF_MRSZ_CTRL,
+		.scale_hy =		RKISP1_CIF_MRSZ_SCALE_HY,
+		.scale_hcr =		RKISP1_CIF_MRSZ_SCALE_HCR,
+		.scale_hcb =		RKISP1_CIF_MRSZ_SCALE_HCB,
+		.scale_vy =		RKISP1_CIF_MRSZ_SCALE_VY,
+		.scale_vc =		RKISP1_CIF_MRSZ_SCALE_VC,
+		.scale_lut =		RKISP1_CIF_MRSZ_SCALE_LUT,
+		.scale_lut_addr =	RKISP1_CIF_MRSZ_SCALE_LUT_ADDR,
+		.scale_hy_shd =		RKISP1_CIF_MRSZ_SCALE_HY_SHD,
+		.scale_hcr_shd =	RKISP1_CIF_MRSZ_SCALE_HCR_SHD,
+		.scale_hcb_shd =	RKISP1_CIF_MRSZ_SCALE_HCB_SHD,
+		.scale_vy_shd =		RKISP1_CIF_MRSZ_SCALE_VY_SHD,
+		.scale_vc_shd =		RKISP1_CIF_MRSZ_SCALE_VC_SHD,
+		.phase_hy =		RKISP1_CIF_MRSZ_PHASE_HY,
+		.phase_hc =		RKISP1_CIF_MRSZ_PHASE_HC,
+		.phase_vy =		RKISP1_CIF_MRSZ_PHASE_VY,
+		.phase_vc =		RKISP1_CIF_MRSZ_PHASE_VC,
+		.ctrl_shd =		RKISP1_CIF_MRSZ_CTRL_SHD,
+		.phase_hy_shd =		RKISP1_CIF_MRSZ_PHASE_HY_SHD,
+		.phase_hc_shd =		RKISP1_CIF_MRSZ_PHASE_HC_SHD,
+		.phase_vy_shd =		RKISP1_CIF_MRSZ_PHASE_VY_SHD,
+		.phase_vc_shd =		RKISP1_CIF_MRSZ_PHASE_VC_SHD,
+	},
+	.dual_crop = {
+		.ctrl =			RKISP1_CIF_DUAL_CROP_CTRL,
+		.yuvmode_mask =		RKISP1_CIF_DUAL_CROP_MP_MODE_YUV,
+		.rawmode_mask =		RKISP1_CIF_DUAL_CROP_MP_MODE_RAW,
+		.h_offset =		RKISP1_CIF_DUAL_CROP_M_H_OFFS,
+		.v_offset =		RKISP1_CIF_DUAL_CROP_M_V_OFFS,
+		.h_size =		RKISP1_CIF_DUAL_CROP_M_H_SIZE,
+		.v_size =		RKISP1_CIF_DUAL_CROP_M_V_SIZE,
+	},
+};
+
+static const struct rkisp1_rsz_config rkisp1_rsz_config_sp = {
+	/* constraints */
+	.max_rsz_width = RKISP1_RSZ_SP_SRC_MAX_WIDTH,
+	.max_rsz_height = RKISP1_RSZ_SP_SRC_MAX_HEIGHT,
+	.min_rsz_width = RKISP1_RSZ_SRC_MIN_WIDTH,
+	.min_rsz_height = RKISP1_RSZ_SRC_MIN_HEIGHT,
+	/* registers */
+	.rsz = {
+		.ctrl =			RKISP1_CIF_SRSZ_CTRL,
+		.scale_hy =		RKISP1_CIF_SRSZ_SCALE_HY,
+		.scale_hcr =		RKISP1_CIF_SRSZ_SCALE_HCR,
+		.scale_hcb =		RKISP1_CIF_SRSZ_SCALE_HCB,
+		.scale_vy =		RKISP1_CIF_SRSZ_SCALE_VY,
+		.scale_vc =		RKISP1_CIF_SRSZ_SCALE_VC,
+		.scale_lut =		RKISP1_CIF_SRSZ_SCALE_LUT,
+		.scale_lut_addr =	RKISP1_CIF_SRSZ_SCALE_LUT_ADDR,
+		.scale_hy_shd =		RKISP1_CIF_SRSZ_SCALE_HY_SHD,
+		.scale_hcr_shd =	RKISP1_CIF_SRSZ_SCALE_HCR_SHD,
+		.scale_hcb_shd =	RKISP1_CIF_SRSZ_SCALE_HCB_SHD,
+		.scale_vy_shd =		RKISP1_CIF_SRSZ_SCALE_VY_SHD,
+		.scale_vc_shd =		RKISP1_CIF_SRSZ_SCALE_VC_SHD,
+		.phase_hy =		RKISP1_CIF_SRSZ_PHASE_HY,
+		.phase_hc =		RKISP1_CIF_SRSZ_PHASE_HC,
+		.phase_vy =		RKISP1_CIF_SRSZ_PHASE_VY,
+		.phase_vc =		RKISP1_CIF_SRSZ_PHASE_VC,
+		.ctrl_shd =		RKISP1_CIF_SRSZ_CTRL_SHD,
+		.phase_hy_shd =		RKISP1_CIF_SRSZ_PHASE_HY_SHD,
+		.phase_hc_shd =		RKISP1_CIF_SRSZ_PHASE_HC_SHD,
+		.phase_vy_shd =		RKISP1_CIF_SRSZ_PHASE_VY_SHD,
+		.phase_vc_shd =		RKISP1_CIF_SRSZ_PHASE_VC_SHD,
+	},
+	.dual_crop = {
+		.ctrl =			RKISP1_CIF_DUAL_CROP_CTRL,
+		.yuvmode_mask =		RKISP1_CIF_DUAL_CROP_SP_MODE_YUV,
+		.rawmode_mask =		RKISP1_CIF_DUAL_CROP_SP_MODE_RAW,
+		.h_offset =		RKISP1_CIF_DUAL_CROP_S_H_OFFS,
+		.v_offset =		RKISP1_CIF_DUAL_CROP_S_V_OFFS,
+		.h_size =		RKISP1_CIF_DUAL_CROP_S_H_SIZE,
+		.v_size =		RKISP1_CIF_DUAL_CROP_S_V_SIZE,
+	},
+};
+
+static struct v4l2_mbus_framefmt *
+rkisp1_rsz_get_pad_fmt(struct rkisp1_resizer *rsz,
+		       struct v4l2_subdev_pad_config *cfg,
+		       unsigned int pad, u32 which)
+{
+	if (which == V4L2_SUBDEV_FORMAT_TRY)
+		return v4l2_subdev_get_try_format(&rsz->sd, cfg, pad);
+	else
+		return v4l2_subdev_get_try_format(&rsz->sd, rsz->pad_cfg, pad);
+}
+
+static struct v4l2_rect *
+rkisp1_rsz_get_pad_crop(struct rkisp1_resizer *rsz,
+			struct v4l2_subdev_pad_config *cfg,
+			unsigned int pad, u32 which)
+{
+	if (which == V4L2_SUBDEV_FORMAT_TRY)
+		return v4l2_subdev_get_try_crop(&rsz->sd, cfg, pad);
+	else
+		return v4l2_subdev_get_try_crop(&rsz->sd, rsz->pad_cfg, pad);
+}
+
+/* ----------------------------------------------------------------------------
+ * Dual crop hw configs
+ */
+
+static void rkisp1_dcrop_disable(struct rkisp1_resizer *rsz,
+				 enum rkisp1_shadow_regs_when when)
+{
+	u32 dc_ctrl = rkisp1_read(rsz->rkisp1, rsz->config->dual_crop.ctrl);
+	u32 mask = ~(rsz->config->dual_crop.yuvmode_mask |
+		     rsz->config->dual_crop.rawmode_mask);
+
+	dc_ctrl &= mask;
+	if (when == RKISP1_SHADOW_REGS_ASYNC)
+		dc_ctrl |= RKISP1_CIF_DUAL_CROP_GEN_CFG_UPD;
+	else
+		dc_ctrl |= RKISP1_CIF_DUAL_CROP_CFG_UPD;
+	rkisp1_write(rsz->rkisp1, dc_ctrl, rsz->config->dual_crop.ctrl);
+}
+
+/* configure dual-crop unit */
+static void rkisp1_dcrop_config(struct rkisp1_resizer *rsz)
+{
+	struct rkisp1_device *rkisp1 = rsz->rkisp1;
+	struct v4l2_mbus_framefmt *sink_fmt;
+	struct v4l2_rect *sink_crop;
+	u32 dc_ctrl;
+
+	sink_crop = rkisp1_rsz_get_pad_crop(rsz, NULL, RKISP1_RSZ_PAD_SINK,
+					    V4L2_SUBDEV_FORMAT_ACTIVE);
+	sink_fmt = rkisp1_rsz_get_pad_fmt(rsz, NULL, RKISP1_RSZ_PAD_SINK,
+					  V4L2_SUBDEV_FORMAT_ACTIVE);
+
+	if (sink_crop->width == sink_fmt->width &&
+	    sink_crop->height == sink_fmt->height &&
+	    sink_crop->left == 0 && sink_crop->top == 0) {
+		rkisp1_dcrop_disable(rsz, RKISP1_SHADOW_REGS_SYNC);
+		dev_dbg(rkisp1->dev, "capture %d crop disabled\n", rsz->id);
+		return;
+	}
+
+	dc_ctrl = rkisp1_read(rkisp1, rsz->config->dual_crop.ctrl);
+	rkisp1_write(rkisp1, sink_crop->left, rsz->config->dual_crop.h_offset);
+	rkisp1_write(rkisp1, sink_crop->top, rsz->config->dual_crop.v_offset);
+	rkisp1_write(rkisp1, sink_crop->width, rsz->config->dual_crop.h_size);
+	rkisp1_write(rkisp1, sink_crop->height, rsz->config->dual_crop.v_size);
+	dc_ctrl |= rsz->config->dual_crop.yuvmode_mask;
+	dc_ctrl |= RKISP1_CIF_DUAL_CROP_CFG_UPD;
+	rkisp1_write(rkisp1, dc_ctrl, rsz->config->dual_crop.ctrl);
+
+	dev_dbg(rkisp1->dev, "stream %d crop: %dx%d -> %dx%d\n", rsz->id,
+		sink_fmt->width, sink_fmt->height,
+		sink_crop->width, sink_crop->height);
+}
+
+/* ----------------------------------------------------------------------------
+ * Resizer hw configs
+ */
+
+static void rkisp1_rsz_dump_regs(struct rkisp1_resizer *rsz)
+{
+	dev_dbg(rsz->rkisp1->dev,
+		"RSZ_CTRL 0x%08x/0x%08x\n"
+		"RSZ_SCALE_HY %d/%d\n"
+		"RSZ_SCALE_HCB %d/%d\n"
+		"RSZ_SCALE_HCR %d/%d\n"
+		"RSZ_SCALE_VY %d/%d\n"
+		"RSZ_SCALE_VC %d/%d\n"
+		"RSZ_PHASE_HY %d/%d\n"
+		"RSZ_PHASE_HC %d/%d\n"
+		"RSZ_PHASE_VY %d/%d\n"
+		"RSZ_PHASE_VC %d/%d\n",
+		rkisp1_read(rsz->rkisp1, rsz->config->rsz.ctrl),
+		rkisp1_read(rsz->rkisp1, rsz->config->rsz.ctrl_shd),
+		rkisp1_read(rsz->rkisp1, rsz->config->rsz.scale_hy),
+		rkisp1_read(rsz->rkisp1, rsz->config->rsz.scale_hy_shd),
+		rkisp1_read(rsz->rkisp1, rsz->config->rsz.scale_hcb),
+		rkisp1_read(rsz->rkisp1, rsz->config->rsz.scale_hcb_shd),
+		rkisp1_read(rsz->rkisp1, rsz->config->rsz.scale_hcr),
+		rkisp1_read(rsz->rkisp1, rsz->config->rsz.scale_hcr_shd),
+		rkisp1_read(rsz->rkisp1, rsz->config->rsz.scale_vy),
+		rkisp1_read(rsz->rkisp1, rsz->config->rsz.scale_vy_shd),
+		rkisp1_read(rsz->rkisp1, rsz->config->rsz.scale_vc),
+		rkisp1_read(rsz->rkisp1, rsz->config->rsz.scale_vc_shd),
+		rkisp1_read(rsz->rkisp1, rsz->config->rsz.phase_hy),
+		rkisp1_read(rsz->rkisp1, rsz->config->rsz.phase_hy_shd),
+		rkisp1_read(rsz->rkisp1, rsz->config->rsz.phase_hc),
+		rkisp1_read(rsz->rkisp1, rsz->config->rsz.phase_hc_shd),
+		rkisp1_read(rsz->rkisp1, rsz->config->rsz.phase_vy),
+		rkisp1_read(rsz->rkisp1, rsz->config->rsz.phase_vy_shd),
+		rkisp1_read(rsz->rkisp1, rsz->config->rsz.phase_vc),
+		rkisp1_read(rsz->rkisp1, rsz->config->rsz.phase_vc_shd));
+}
+
+static void rkisp1_rsz_update_shadow(struct rkisp1_resizer *rsz,
+				     enum rkisp1_shadow_regs_when when)
+{
+	u32 ctrl_cfg = rkisp1_read(rsz->rkisp1, rsz->config->rsz.ctrl);
+
+	if (when == RKISP1_SHADOW_REGS_ASYNC)
+		ctrl_cfg |= RKISP1_CIF_RSZ_CTRL_CFG_UPD_AUTO;
+	else
+		ctrl_cfg |= RKISP1_CIF_RSZ_CTRL_CFG_UPD;
+
+	rkisp1_write(rsz->rkisp1, ctrl_cfg, rsz->config->rsz.ctrl);
+}
+
+static u32 rkisp1_rsz_calc_ratio(u32 len_sink, u32 len_src)
+{
+	if (len_sink < len_src)
+		return ((len_sink - 1) * RKISP1_CIF_RSZ_SCALER_FACTOR) /
+		       (len_src - 1);
+
+	return ((len_src - 1) * RKISP1_CIF_RSZ_SCALER_FACTOR) /
+	       (len_sink - 1) + 1;
+}
+
+static void rkisp1_rsz_disable(struct rkisp1_resizer *rsz,
+			       enum rkisp1_shadow_regs_when when)
+{
+	rkisp1_write(rsz->rkisp1, 0, rsz->config->rsz.ctrl);
+
+	if (when == RKISP1_SHADOW_REGS_SYNC)
+		rkisp1_rsz_update_shadow(rsz, when);
+}
+
+static void rkisp1_rsz_config_regs(struct rkisp1_resizer *rsz,
+				   struct v4l2_rect *sink_y,
+				   struct v4l2_rect *sink_c,
+				   struct v4l2_rect *src_y,
+				   struct v4l2_rect *src_c,
+				   enum rkisp1_shadow_regs_when when)
+{
+	struct rkisp1_device *rkisp1 = rsz->rkisp1;
+	u32 ratio, rsz_ctrl = 0;
+	unsigned int i;
+
+	/* No phase offset */
+	rkisp1_write(rkisp1, 0, rsz->config->rsz.phase_hy);
+	rkisp1_write(rkisp1, 0, rsz->config->rsz.phase_hc);
+	rkisp1_write(rkisp1, 0, rsz->config->rsz.phase_vy);
+	rkisp1_write(rkisp1, 0, rsz->config->rsz.phase_vc);
+
+	/* Linear interpolation */
+	for (i = 0; i < 64; i++) {
+		rkisp1_write(rkisp1, i, rsz->config->rsz.scale_lut_addr);
+		rkisp1_write(rkisp1, i, rsz->config->rsz.scale_lut);
+	}
+
+	if (sink_y->width != src_y->width) {
+		rsz_ctrl |= RKISP1_CIF_RSZ_CTRL_SCALE_HY_ENABLE;
+		if (sink_y->width < src_y->width)
+			rsz_ctrl |= RKISP1_CIF_RSZ_CTRL_SCALE_HY_UP;
+		ratio = rkisp1_rsz_calc_ratio(sink_y->width, src_y->width);
+		rkisp1_write(rkisp1, ratio, rsz->config->rsz.scale_hy);
+	}
+
+	if (sink_c->width != src_c->width) {
+		rsz_ctrl |= RKISP1_CIF_RSZ_CTRL_SCALE_HC_ENABLE;
+		if (sink_c->width < src_c->width)
+			rsz_ctrl |= RKISP1_CIF_RSZ_CTRL_SCALE_HC_UP;
+		ratio = rkisp1_rsz_calc_ratio(sink_c->width, src_c->width);
+		rkisp1_write(rkisp1, ratio, rsz->config->rsz.scale_hcb);
+		rkisp1_write(rkisp1, ratio, rsz->config->rsz.scale_hcr);
+	}
+
+	if (sink_y->height != src_y->height) {
+		rsz_ctrl |= RKISP1_CIF_RSZ_CTRL_SCALE_VY_ENABLE;
+		if (sink_y->height < src_y->height)
+			rsz_ctrl |= RKISP1_CIF_RSZ_CTRL_SCALE_VY_UP;
+		ratio = rkisp1_rsz_calc_ratio(sink_y->height, src_y->height);
+		rkisp1_write(rkisp1, ratio, rsz->config->rsz.scale_vy);
+	}
+
+	if (sink_c->height != src_c->height) {
+		rsz_ctrl |= RKISP1_CIF_RSZ_CTRL_SCALE_VC_ENABLE;
+		if (sink_c->height < src_c->height)
+			rsz_ctrl |= RKISP1_CIF_RSZ_CTRL_SCALE_VC_UP;
+		ratio = rkisp1_rsz_calc_ratio(sink_c->height, src_c->height);
+		rkisp1_write(rkisp1, ratio, rsz->config->rsz.scale_vc);
+	}
+
+	rkisp1_write(rkisp1, rsz_ctrl, rsz->config->rsz.ctrl);
+
+	rkisp1_rsz_update_shadow(rsz, when);
+}
+
+static void rkisp1_rsz_config(struct rkisp1_resizer *rsz,
+			      enum rkisp1_shadow_regs_when when)
+{
+	const struct rkisp1_rsz_yuv_mbus_info *sink_yuv_info, *src_yuv_info;
+	struct v4l2_rect sink_y, sink_c, src_y, src_c;
+	struct v4l2_mbus_framefmt *src_fmt, *sink_fmt;
+	struct v4l2_rect *sink_crop;
+
+	sink_crop = rkisp1_rsz_get_pad_crop(rsz, NULL, RKISP1_RSZ_PAD_SINK,
+					    V4L2_SUBDEV_FORMAT_ACTIVE);
+	src_fmt = rkisp1_rsz_get_pad_fmt(rsz, NULL, RKISP1_RSZ_PAD_SRC,
+					 V4L2_SUBDEV_FORMAT_ACTIVE);
+	src_yuv_info = rkisp1_rsz_get_yuv_mbus_info(src_fmt->code);
+	sink_fmt = rkisp1_rsz_get_pad_fmt(rsz, NULL, RKISP1_RSZ_PAD_SINK,
+					  V4L2_SUBDEV_FORMAT_ACTIVE);
+	sink_yuv_info = rkisp1_rsz_get_yuv_mbus_info(sink_fmt->code);
+
+	/*
+	 * The resizer only works on yuv formats,
+	 * so return if it is bayer format.
+	 */
+	if (rsz->pixel_enc == V4L2_PIXEL_ENC_BAYER) {
+		rkisp1_rsz_disable(rsz, when);
+		return;
+	}
+
+	sink_y.width = sink_crop->width;
+	sink_y.height = sink_crop->height;
+	src_y.width = src_fmt->width;
+	src_y.height = src_fmt->height;
+
+	sink_c.width = sink_y.width / sink_yuv_info->hdiv;
+	sink_c.height = sink_y.height / sink_yuv_info->vdiv;
+
+	/*
+	 * The resizer is used not only to change the dimensions of the frame
+	 * but also to change the scale for YUV formats,
+	 * (4:2:2 -> 4:2:0 for example). So the width/height of the CbCr
+	 * streams should be set according to the media bus format in the src pad.
+	 */
+	src_c.width = src_y.width / src_yuv_info->hdiv;
+	src_c.height = src_y.height / src_yuv_info->vdiv;
+
+	if (sink_c.width == src_c.width && sink_c.height == src_c.height) {
+		rkisp1_rsz_disable(rsz, when);
+		return;
+	}
+
+	dev_dbg(rsz->rkisp1->dev, "stream %d rsz/scale: %dx%d -> %dx%d\n",
+		rsz->id, sink_crop->width, sink_crop->height,
+		src_fmt->width, src_fmt->height);
+	dev_dbg(rsz->rkisp1->dev, "chroma scaling %dx%d -> %dx%d\n",
+		sink_c.width, sink_c.height, src_c.width, src_c.height);
+
+	/* set values in the hw */
+	rkisp1_rsz_config_regs(rsz, &sink_y, &sink_c, &src_y, &src_c, when);
+
+	rkisp1_rsz_dump_regs(rsz);
+}
+
+/* ----------------------------------------------------------------------------
+ * Subdev pad operations
+ */
+
+static int rkisp1_rsz_enum_mbus_code(struct v4l2_subdev *sd,
+				     struct v4l2_subdev_pad_config *cfg,
+				     struct v4l2_subdev_mbus_code_enum *code)
+{
+	struct rkisp1_resizer *rsz =
+		container_of(sd, struct rkisp1_resizer, sd);
+	struct v4l2_subdev_pad_config dummy_cfg;
+	u32 pad = code->pad;
+	int ret;
+
+	if (code->pad == RKISP1_RSZ_PAD_SRC) {
+		/* supported mbus codes on the src are the same as in the capture */
+		struct rkisp1_capture *cap = &rsz->rkisp1->capture_devs[rsz->id];
+
+		return rkisp1_cap_enum_mbus_codes(cap, code);
+	}
+
+	/*
+	 * The selfpath capture doesn't support bayer formats. Therefore the selfpath resizer
+	 * should support only YUV422 on the sink pad
+	 */
+	if (rsz->id == RKISP1_SELFPATH) {
+		if (code->index > 0)
+			return -EINVAL;
+		code->code = MEDIA_BUS_FMT_YUYV8_2X8;
+		return 0;
+	}
+
+	/* supported mbus codes on the sink pad are the same as isp src pad */
+	code->pad = RKISP1_ISP_PAD_SOURCE_VIDEO;
+	ret = v4l2_subdev_call(&rsz->rkisp1->isp.sd, pad, enum_mbus_code,
+			       &dummy_cfg, code);
+
+	/* restore pad */
+	code->pad = pad;
+	code->flags = 0;
+	return ret;
+}
+
+static int rkisp1_rsz_init_config(struct v4l2_subdev *sd,
+				  struct v4l2_subdev_pad_config *cfg)
+{
+	struct v4l2_mbus_framefmt *sink_fmt, *src_fmt;
+	struct v4l2_rect *sink_crop;
+
+	sink_fmt = v4l2_subdev_get_try_format(sd, cfg, RKISP1_RSZ_PAD_SRC);
+	sink_fmt->width = RKISP1_DEFAULT_WIDTH;
+	sink_fmt->height = RKISP1_DEFAULT_HEIGHT;
+	sink_fmt->field = V4L2_FIELD_NONE;
+	sink_fmt->code = RKISP1_DEF_FMT;
+
+	sink_crop = v4l2_subdev_get_try_crop(sd, cfg, RKISP1_RSZ_PAD_SINK);
+	sink_crop->width = RKISP1_DEFAULT_WIDTH;
+	sink_crop->height = RKISP1_DEFAULT_HEIGHT;
+	sink_crop->left = 0;
+	sink_crop->top = 0;
+
+	src_fmt = v4l2_subdev_get_try_format(sd, cfg, RKISP1_RSZ_PAD_SINK);
+	*src_fmt = *sink_fmt;
+
+	/* NOTE: there is no crop in the source pad, only in the sink */
+
+	return 0;
+}
+
+static void rkisp1_rsz_set_src_fmt(struct rkisp1_resizer *rsz,
+				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_mbus_framefmt *format,
+				   unsigned int which)
+{
+	const struct rkisp1_isp_mbus_info *mbus_info;
+	struct v4l2_mbus_framefmt *src_fmt;
+
+	src_fmt = rkisp1_rsz_get_pad_fmt(rsz, cfg, RKISP1_RSZ_PAD_SRC, which);
+	mbus_info = rkisp1_isp_mbus_info_get(src_fmt->code);
+
+	/* for YUV formats, userspace can change the mbus code on the src pad if it is supported */
+	if (mbus_info->pixel_enc == V4L2_PIXEL_ENC_YUV &&
+	    rkisp1_rsz_get_yuv_mbus_info(format->code))
+		src_fmt->code = format->code;
+
+	src_fmt->width = clamp_t(u32, format->width,
+				 rsz->config->min_rsz_width,
+				 rsz->config->max_rsz_width);
+	src_fmt->height = clamp_t(u32, format->height,
+				  rsz->config->min_rsz_height,
+				  rsz->config->max_rsz_height);
+
+	*format = *src_fmt;
+}
+
+static void rkisp1_rsz_set_sink_crop(struct rkisp1_resizer *rsz,
+				     struct v4l2_subdev_pad_config *cfg,
+				     struct v4l2_rect *r,
+				     unsigned int which)
+{
+	const struct rkisp1_isp_mbus_info *mbus_info;
+	struct v4l2_mbus_framefmt *sink_fmt;
+	struct v4l2_rect *sink_crop;
+
+	sink_fmt = rkisp1_rsz_get_pad_fmt(rsz, cfg, RKISP1_RSZ_PAD_SINK, which);
+	sink_crop = rkisp1_rsz_get_pad_crop(rsz, cfg, RKISP1_RSZ_PAD_SINK,
+					    which);
+
+	/* Not crop for MP bayer raw data */
+	mbus_info = rkisp1_isp_mbus_info_get(sink_fmt->code);
+
+	if (rsz->id == RKISP1_MAINPATH &&
+	    mbus_info->pixel_enc == V4L2_PIXEL_ENC_BAYER) {
+		sink_crop->left = 0;
+		sink_crop->top = 0;
+		sink_crop->width = sink_fmt->width;
+		sink_crop->height = sink_fmt->height;
+
+		*r = *sink_crop;
+		return;
+	}
+
+	sink_crop->left = ALIGN(r->left, 2);
+	sink_crop->width = ALIGN(r->width, 2);
+	sink_crop->top = r->top;
+	sink_crop->height = r->height;
+	rkisp1_sd_adjust_crop(sink_crop, sink_fmt);
+
+	*r = *sink_crop;
+}
+
+static void rkisp1_rsz_set_sink_fmt(struct rkisp1_resizer *rsz,
+				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_mbus_framefmt *format,
+				    unsigned int which)
+{
+	const struct rkisp1_isp_mbus_info *mbus_info;
+	struct v4l2_mbus_framefmt *sink_fmt, *src_fmt;
+	struct v4l2_rect *sink_crop;
+
+	sink_fmt = rkisp1_rsz_get_pad_fmt(rsz, cfg, RKISP1_RSZ_PAD_SINK, which);
+	src_fmt = rkisp1_rsz_get_pad_fmt(rsz, cfg, RKISP1_RSZ_PAD_SRC, which);
+	sink_crop = rkisp1_rsz_get_pad_crop(rsz, cfg, RKISP1_RSZ_PAD_SINK,
+					    which);
+	if (rsz->id == RKISP1_SELFPATH)
+		sink_fmt->code = MEDIA_BUS_FMT_YUYV8_2X8;
+	else
+		sink_fmt->code = format->code;
+
+	mbus_info = rkisp1_isp_mbus_info_get(sink_fmt->code);
+	if (!mbus_info || !(mbus_info->direction & RKISP1_ISP_SD_SRC)) {
+		sink_fmt->code = RKISP1_DEF_FMT;
+		mbus_info = rkisp1_isp_mbus_info_get(sink_fmt->code);
+	}
+	if (which == V4L2_SUBDEV_FORMAT_ACTIVE)
+		rsz->pixel_enc = mbus_info->pixel_enc;
+
+	/* Propagete to source pad */
+	src_fmt->code = sink_fmt->code;
+
+	sink_fmt->width = clamp_t(u32, format->width,
+				  RKISP1_ISP_MIN_WIDTH,
+				  RKISP1_ISP_MAX_WIDTH);
+	sink_fmt->height = clamp_t(u32, format->height,
+				   RKISP1_ISP_MIN_HEIGHT,
+				   RKISP1_ISP_MAX_HEIGHT);
+
+	*format = *sink_fmt;
+
+	/* Update sink crop */
+	rkisp1_rsz_set_sink_crop(rsz, cfg, sink_crop, which);
+}
+
+static int rkisp1_rsz_get_fmt(struct v4l2_subdev *sd,
+			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_format *fmt)
+{
+	struct rkisp1_resizer *rsz =
+		container_of(sd, struct rkisp1_resizer, sd);
+
+	mutex_lock(&rsz->ops_lock);
+	fmt->format = *rkisp1_rsz_get_pad_fmt(rsz, cfg, fmt->pad, fmt->which);
+	mutex_unlock(&rsz->ops_lock);
+	return 0;
+}
+
+static int rkisp1_rsz_set_fmt(struct v4l2_subdev *sd,
+			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_format *fmt)
+{
+	struct rkisp1_resizer *rsz =
+		container_of(sd, struct rkisp1_resizer, sd);
+
+	mutex_lock(&rsz->ops_lock);
+	if (fmt->pad == RKISP1_RSZ_PAD_SINK)
+		rkisp1_rsz_set_sink_fmt(rsz, cfg, &fmt->format, fmt->which);
+	else
+		rkisp1_rsz_set_src_fmt(rsz, cfg, &fmt->format, fmt->which);
+
+	mutex_unlock(&rsz->ops_lock);
+	return 0;
+}
+
+static int rkisp1_rsz_get_selection(struct v4l2_subdev *sd,
+				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_selection *sel)
+{
+	struct rkisp1_resizer *rsz =
+		container_of(sd, struct rkisp1_resizer, sd);
+	struct v4l2_mbus_framefmt *mf_sink;
+	int ret = 0;
+
+	if (sel->pad == RKISP1_RSZ_PAD_SRC)
+		return -EINVAL;
+
+	mutex_lock(&rsz->ops_lock);
+	switch (sel->target) {
+	case V4L2_SEL_TGT_CROP_BOUNDS:
+		mf_sink = rkisp1_rsz_get_pad_fmt(rsz, cfg, RKISP1_RSZ_PAD_SINK,
+						 sel->which);
+		sel->r.height = mf_sink->height;
+		sel->r.width = mf_sink->width;
+		sel->r.left = 0;
+		sel->r.top = 0;
+		break;
+	case V4L2_SEL_TGT_CROP:
+		sel->r = *rkisp1_rsz_get_pad_crop(rsz, cfg, RKISP1_RSZ_PAD_SINK,
+						  sel->which);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	mutex_unlock(&rsz->ops_lock);
+	return ret;
+}
+
+static int rkisp1_rsz_set_selection(struct v4l2_subdev *sd,
+				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_selection *sel)
+{
+	struct rkisp1_resizer *rsz =
+		container_of(sd, struct rkisp1_resizer, sd);
+
+	if (sel->target != V4L2_SEL_TGT_CROP || sel->pad == RKISP1_RSZ_PAD_SRC)
+		return -EINVAL;
+
+	dev_dbg(rsz->rkisp1->dev, "%s: pad: %d sel(%d,%d)/%dx%d\n", __func__,
+		sel->pad, sel->r.left, sel->r.top, sel->r.width, sel->r.height);
+
+	mutex_lock(&rsz->ops_lock);
+	rkisp1_rsz_set_sink_crop(rsz, cfg, &sel->r, sel->which);
+	mutex_unlock(&rsz->ops_lock);
+
+	return 0;
+}
+
+static const struct media_entity_operations rkisp1_rsz_media_ops = {
+	.link_validate = v4l2_subdev_link_validate,
+};
+
+static const struct v4l2_subdev_pad_ops rkisp1_rsz_pad_ops = {
+	.enum_mbus_code = rkisp1_rsz_enum_mbus_code,
+	.get_selection = rkisp1_rsz_get_selection,
+	.set_selection = rkisp1_rsz_set_selection,
+	.init_cfg = rkisp1_rsz_init_config,
+	.get_fmt = rkisp1_rsz_get_fmt,
+	.set_fmt = rkisp1_rsz_set_fmt,
+	.link_validate = v4l2_subdev_link_validate_default,
+};
+
+/* ----------------------------------------------------------------------------
+ * Stream operations
+ */
+
+static int rkisp1_rsz_s_stream(struct v4l2_subdev *sd, int enable)
+{
+	struct rkisp1_resizer *rsz =
+		container_of(sd, struct rkisp1_resizer, sd);
+	struct rkisp1_device *rkisp1 = rsz->rkisp1;
+	struct rkisp1_capture *other = &rkisp1->capture_devs[rsz->id ^ 1];
+	enum rkisp1_shadow_regs_when when = RKISP1_SHADOW_REGS_SYNC;
+
+	if (!enable) {
+		rkisp1_dcrop_disable(rsz, RKISP1_SHADOW_REGS_ASYNC);
+		rkisp1_rsz_disable(rsz, RKISP1_SHADOW_REGS_ASYNC);
+		return 0;
+	}
+
+	if (other->is_streaming)
+		when = RKISP1_SHADOW_REGS_ASYNC;
+
+	mutex_lock(&rsz->ops_lock);
+	rkisp1_rsz_config(rsz, when);
+	rkisp1_dcrop_config(rsz);
+
+	mutex_unlock(&rsz->ops_lock);
+	return 0;
+}
+
+static const struct v4l2_subdev_video_ops rkisp1_rsz_video_ops = {
+	.s_stream = rkisp1_rsz_s_stream,
+};
+
+static const struct v4l2_subdev_ops rkisp1_rsz_ops = {
+	.video = &rkisp1_rsz_video_ops,
+	.pad = &rkisp1_rsz_pad_ops,
+};
+
+static void rkisp1_rsz_unregister(struct rkisp1_resizer *rsz)
+{
+	v4l2_device_unregister_subdev(&rsz->sd);
+	media_entity_cleanup(&rsz->sd.entity);
+}
+
+static int rkisp1_rsz_register(struct rkisp1_resizer *rsz)
+{
+	static const char * const dev_names[] = {
+		RKISP1_RSZ_MP_DEV_NAME,
+		RKISP1_RSZ_SP_DEV_NAME
+	};
+	struct media_pad *pads = rsz->pads;
+	struct v4l2_subdev *sd = &rsz->sd;
+	int ret;
+
+	if (rsz->id == RKISP1_SELFPATH)
+		rsz->config = &rkisp1_rsz_config_sp;
+	else
+		rsz->config = &rkisp1_rsz_config_mp;
+
+	v4l2_subdev_init(sd, &rkisp1_rsz_ops);
+	sd->flags |= V4L2_SUBDEV_FL_HAS_DEVNODE;
+	sd->entity.ops = &rkisp1_rsz_media_ops;
+	sd->entity.function = MEDIA_ENT_F_PROC_VIDEO_SCALER;
+	sd->owner = THIS_MODULE;
+	strscpy(sd->name, dev_names[rsz->id], sizeof(sd->name));
+
+	pads[RKISP1_RSZ_PAD_SINK].flags = MEDIA_PAD_FL_SINK |
+					  MEDIA_PAD_FL_MUST_CONNECT;
+	pads[RKISP1_RSZ_PAD_SRC].flags = MEDIA_PAD_FL_SOURCE |
+					 MEDIA_PAD_FL_MUST_CONNECT;
+
+	rsz->pixel_enc = RKISP1_DEF_PIXEL_ENC;
+
+	mutex_init(&rsz->ops_lock);
+	ret = media_entity_pads_init(&sd->entity, RKISP1_RSZ_PAD_MAX, pads);
+	if (ret)
+		return ret;
+
+	ret = v4l2_device_register_subdev(&rsz->rkisp1->v4l2_dev, sd);
+	if (ret) {
+		dev_err(sd->dev, "Failed to register resizer subdev\n");
+		goto err_cleanup_media_entity;
+	}
+
+	rkisp1_rsz_init_config(sd, rsz->pad_cfg);
+	return 0;
+
+err_cleanup_media_entity:
+	media_entity_cleanup(&sd->entity);
+
+	return ret;
+}
+
+int rkisp1_resizer_devs_register(struct rkisp1_device *rkisp1)
+{
+	struct rkisp1_resizer *rsz;
+	unsigned int i, j;
+	int ret;
+
+	for (i = 0; i < ARRAY_SIZE(rkisp1->resizer_devs); i++) {
+		rsz = &rkisp1->resizer_devs[i];
+		rsz->rkisp1 = rkisp1;
+		rsz->id = i;
+		ret = rkisp1_rsz_register(rsz);
+		if (ret)
+			goto err_unreg_resizer_devs;
+	}
+
+	return 0;
+
+err_unreg_resizer_devs:
+	for (j = 0; j < i; j++) {
+		rsz = &rkisp1->resizer_devs[j];
+		rkisp1_rsz_unregister(rsz);
+	}
+
+	return ret;
+}
+
+void rkisp1_resizer_devs_unregister(struct rkisp1_device *rkisp1)
+{
+	struct rkisp1_resizer *mp = &rkisp1->resizer_devs[RKISP1_MAINPATH];
+	struct rkisp1_resizer *sp = &rkisp1->resizer_devs[RKISP1_SELFPATH];
+
+	rkisp1_rsz_unregister(mp);
+	rkisp1_rsz_unregister(sp);
+}
diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-stats.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-stats.c
new file mode 100644
index 000000000000..3ddab8fa8f2d
--- /dev/null
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-stats.c
@@ -0,0 +1,415 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Rockchip ISP1 Driver - Stats subdevice
+ *
+ * Copyright (C) 2017 Rockchip Electronics Co., Ltd.
+ */
+
+#include <media/v4l2-common.h>
+#include <media/v4l2-event.h>
+#include <media/v4l2-ioctl.h>
+#include <media/videobuf2-core.h>
+#include <media/videobuf2-vmalloc.h>	/* for ISP statistics */
+
+#include "rkisp1-common.h"
+
+#define RKISP1_STATS_DEV_NAME	RKISP1_DRIVER_NAME "_stats"
+
+#define RKISP1_ISP_STATS_REQ_BUFS_MIN 2
+#define RKISP1_ISP_STATS_REQ_BUFS_MAX 8
+
+static int rkisp1_stats_enum_fmt_meta_cap(struct file *file, void *priv,
+					  struct v4l2_fmtdesc *f)
+{
+	struct video_device *video = video_devdata(file);
+	struct rkisp1_stats *stats = video_get_drvdata(video);
+
+	if (f->index > 0 || f->type != video->queue->type)
+		return -EINVAL;
+
+	f->pixelformat = stats->vdev_fmt.fmt.meta.dataformat;
+	return 0;
+}
+
+static int rkisp1_stats_g_fmt_meta_cap(struct file *file, void *priv,
+				       struct v4l2_format *f)
+{
+	struct video_device *video = video_devdata(file);
+	struct rkisp1_stats *stats = video_get_drvdata(video);
+	struct v4l2_meta_format *meta = &f->fmt.meta;
+
+	if (f->type != video->queue->type)
+		return -EINVAL;
+
+	memset(meta, 0, sizeof(*meta));
+	meta->dataformat = stats->vdev_fmt.fmt.meta.dataformat;
+	meta->buffersize = stats->vdev_fmt.fmt.meta.buffersize;
+
+	return 0;
+}
+
+static int rkisp1_stats_querycap(struct file *file,
+				 void *priv, struct v4l2_capability *cap)
+{
+	struct video_device *vdev = video_devdata(file);
+
+	strscpy(cap->driver, RKISP1_DRIVER_NAME, sizeof(cap->driver));
+	strscpy(cap->card, vdev->name, sizeof(cap->card));
+	strscpy(cap->bus_info, RKISP1_BUS_INFO, sizeof(cap->bus_info));
+
+	return 0;
+}
+
+/* ISP video device IOCTLs */
+static const struct v4l2_ioctl_ops rkisp1_stats_ioctl = {
+	.vidioc_reqbufs = vb2_ioctl_reqbufs,
+	.vidioc_querybuf = vb2_ioctl_querybuf,
+	.vidioc_create_bufs = vb2_ioctl_create_bufs,
+	.vidioc_qbuf = vb2_ioctl_qbuf,
+	.vidioc_dqbuf = vb2_ioctl_dqbuf,
+	.vidioc_prepare_buf = vb2_ioctl_prepare_buf,
+	.vidioc_expbuf = vb2_ioctl_expbuf,
+	.vidioc_streamon = vb2_ioctl_streamon,
+	.vidioc_streamoff = vb2_ioctl_streamoff,
+	.vidioc_enum_fmt_meta_cap = rkisp1_stats_enum_fmt_meta_cap,
+	.vidioc_g_fmt_meta_cap = rkisp1_stats_g_fmt_meta_cap,
+	.vidioc_s_fmt_meta_cap = rkisp1_stats_g_fmt_meta_cap,
+	.vidioc_try_fmt_meta_cap = rkisp1_stats_g_fmt_meta_cap,
+	.vidioc_querycap = rkisp1_stats_querycap,
+	.vidioc_subscribe_event = v4l2_ctrl_subscribe_event,
+	.vidioc_unsubscribe_event = v4l2_event_unsubscribe,
+};
+
+static const struct v4l2_file_operations rkisp1_stats_fops = {
+	.mmap = vb2_fop_mmap,
+	.unlocked_ioctl = video_ioctl2,
+	.poll = vb2_fop_poll,
+	.open = v4l2_fh_open,
+	.release = vb2_fop_release
+};
+
+static int rkisp1_stats_vb2_queue_setup(struct vb2_queue *vq,
+					unsigned int *num_buffers,
+					unsigned int *num_planes,
+					unsigned int sizes[],
+					struct device *alloc_devs[])
+{
+	*num_planes = 1;
+
+	*num_buffers = clamp_t(u32, *num_buffers, RKISP1_ISP_STATS_REQ_BUFS_MIN,
+			       RKISP1_ISP_STATS_REQ_BUFS_MAX);
+
+	sizes[0] = sizeof(struct rkisp1_stat_buffer);
+
+	return 0;
+}
+
+static void rkisp1_stats_vb2_buf_queue(struct vb2_buffer *vb)
+{
+	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
+	struct rkisp1_buffer *stats_buf =
+		container_of(vbuf, struct rkisp1_buffer, vb);
+	struct vb2_queue *vq = vb->vb2_queue;
+	struct rkisp1_stats *stats_dev = vq->drv_priv;
+
+	stats_buf->vaddr = vb2_plane_vaddr(vb, 0);
+
+	spin_lock_irq(&stats_dev->lock);
+	list_add_tail(&stats_buf->queue, &stats_dev->stat);
+	spin_unlock_irq(&stats_dev->lock);
+}
+
+static int rkisp1_stats_vb2_buf_prepare(struct vb2_buffer *vb)
+{
+	if (vb2_plane_size(vb, 0) < sizeof(struct rkisp1_stat_buffer))
+		return -EINVAL;
+
+	vb2_set_plane_payload(vb, 0, sizeof(struct rkisp1_stat_buffer));
+
+	return 0;
+}
+
+static void rkisp1_stats_vb2_stop_streaming(struct vb2_queue *vq)
+{
+	struct rkisp1_stats *stats = vq->drv_priv;
+	struct rkisp1_buffer *buf;
+	unsigned int i;
+
+	spin_lock_irq(&stats->lock);
+	for (i = 0; i < RKISP1_ISP_STATS_REQ_BUFS_MAX; i++) {
+		if (list_empty(&stats->stat))
+			break;
+		buf = list_first_entry(&stats->stat,
+				       struct rkisp1_buffer, queue);
+		list_del(&buf->queue);
+		vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_ERROR);
+	}
+	spin_unlock_irq(&stats->lock);
+}
+
+static const struct vb2_ops rkisp1_stats_vb2_ops = {
+	.queue_setup = rkisp1_stats_vb2_queue_setup,
+	.buf_queue = rkisp1_stats_vb2_buf_queue,
+	.buf_prepare = rkisp1_stats_vb2_buf_prepare,
+	.wait_prepare = vb2_ops_wait_prepare,
+	.wait_finish = vb2_ops_wait_finish,
+	.stop_streaming = rkisp1_stats_vb2_stop_streaming,
+};
+
+static int
+rkisp1_stats_init_vb2_queue(struct vb2_queue *q, struct rkisp1_stats *stats)
+{
+	struct rkisp1_vdev_node *node;
+
+	node = container_of(q, struct rkisp1_vdev_node, buf_queue);
+
+	q->type = V4L2_BUF_TYPE_META_CAPTURE;
+	q->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF;
+	q->drv_priv = stats;
+	q->ops = &rkisp1_stats_vb2_ops;
+	q->mem_ops = &vb2_vmalloc_memops;
+	q->buf_struct_size = sizeof(struct rkisp1_buffer);
+	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
+	q->lock = &node->vlock;
+
+	return vb2_queue_init(q);
+}
+
+static void rkisp1_stats_get_awb_meas(struct rkisp1_stats *stats,
+				      struct rkisp1_stat_buffer *pbuf)
+{
+	/* Protect against concurrent access from ISR? */
+	struct rkisp1_device *rkisp1 = stats->rkisp1;
+	u32 reg_val;
+
+	pbuf->meas_type |= RKISP1_CIF_ISP_STAT_AWB;
+	reg_val = rkisp1_read(rkisp1, RKISP1_CIF_ISP_AWB_WHITE_CNT);
+	pbuf->params.awb.awb_mean[0].cnt =
+				RKISP1_CIF_ISP_AWB_GET_PIXEL_CNT(reg_val);
+	reg_val = rkisp1_read(rkisp1, RKISP1_CIF_ISP_AWB_MEAN);
+
+	pbuf->params.awb.awb_mean[0].mean_cr_or_r =
+				RKISP1_CIF_ISP_AWB_GET_MEAN_CR_R(reg_val);
+	pbuf->params.awb.awb_mean[0].mean_cb_or_b =
+				RKISP1_CIF_ISP_AWB_GET_MEAN_CB_B(reg_val);
+	pbuf->params.awb.awb_mean[0].mean_y_or_g =
+				RKISP1_CIF_ISP_AWB_GET_MEAN_Y_G(reg_val);
+}
+
+static void rkisp1_stats_get_aec_meas(struct rkisp1_stats *stats,
+				      struct rkisp1_stat_buffer *pbuf)
+{
+	struct rkisp1_device *rkisp1 = stats->rkisp1;
+	unsigned int i;
+
+	pbuf->meas_type |= RKISP1_CIF_ISP_STAT_AUTOEXP;
+	for (i = 0; i < RKISP1_CIF_ISP_AE_MEAN_MAX; i++)
+		pbuf->params.ae.exp_mean[i] =
+			(u8)rkisp1_read(rkisp1,
+					RKISP1_CIF_ISP_EXP_MEAN_00 + i * 4);
+}
+
+static void rkisp1_stats_get_afc_meas(struct rkisp1_stats *stats,
+				      struct rkisp1_stat_buffer *pbuf)
+{
+	struct rkisp1_device *rkisp1 = stats->rkisp1;
+	struct rkisp1_cif_isp_af_stat *af;
+
+	pbuf->meas_type |= RKISP1_CIF_ISP_STAT_AFM;
+
+	af = &pbuf->params.af;
+	af->window[0].sum = rkisp1_read(rkisp1, RKISP1_CIF_ISP_AFM_SUM_A);
+	af->window[0].lum = rkisp1_read(rkisp1, RKISP1_CIF_ISP_AFM_LUM_A);
+	af->window[1].sum = rkisp1_read(rkisp1, RKISP1_CIF_ISP_AFM_SUM_B);
+	af->window[1].lum = rkisp1_read(rkisp1, RKISP1_CIF_ISP_AFM_LUM_B);
+	af->window[2].sum = rkisp1_read(rkisp1, RKISP1_CIF_ISP_AFM_SUM_C);
+	af->window[2].lum = rkisp1_read(rkisp1, RKISP1_CIF_ISP_AFM_LUM_C);
+}
+
+static void rkisp1_stats_get_hst_meas(struct rkisp1_stats *stats,
+				      struct rkisp1_stat_buffer *pbuf)
+{
+	struct rkisp1_device *rkisp1 = stats->rkisp1;
+	unsigned int i;
+
+	pbuf->meas_type |= RKISP1_CIF_ISP_STAT_HIST;
+	for (i = 0; i < RKISP1_CIF_ISP_HIST_BIN_N_MAX; i++)
+		pbuf->params.hist.hist_bins[i] =
+			(u8)rkisp1_read(rkisp1,
+					RKISP1_CIF_ISP_HIST_BIN_0 + i * 4);
+}
+
+static void rkisp1_stats_get_bls_meas(struct rkisp1_stats *stats,
+				      struct rkisp1_stat_buffer *pbuf)
+{
+	struct rkisp1_device *rkisp1 = stats->rkisp1;
+	const struct rkisp1_isp_mbus_info *in_fmt = rkisp1->isp.sink_fmt;
+	struct rkisp1_cif_isp_bls_meas_val *bls_val;
+
+	bls_val = &pbuf->params.ae.bls_val;
+	if (in_fmt->bayer_pat == RKISP1_RAW_BGGR) {
+		bls_val->meas_b =
+			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_A_MEASURED);
+		bls_val->meas_gb =
+			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_B_MEASURED);
+		bls_val->meas_gr =
+			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_C_MEASURED);
+		bls_val->meas_r =
+			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_D_MEASURED);
+	} else if (in_fmt->bayer_pat == RKISP1_RAW_GBRG) {
+		bls_val->meas_gb =
+			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_A_MEASURED);
+		bls_val->meas_b =
+			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_B_MEASURED);
+		bls_val->meas_r =
+			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_C_MEASURED);
+		bls_val->meas_gr =
+			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_D_MEASURED);
+	} else if (in_fmt->bayer_pat == RKISP1_RAW_GRBG) {
+		bls_val->meas_gr =
+			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_A_MEASURED);
+		bls_val->meas_r =
+			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_B_MEASURED);
+		bls_val->meas_b =
+			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_C_MEASURED);
+		bls_val->meas_gb =
+			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_D_MEASURED);
+	} else if (in_fmt->bayer_pat == RKISP1_RAW_RGGB) {
+		bls_val->meas_r =
+			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_A_MEASURED);
+		bls_val->meas_gr =
+			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_B_MEASURED);
+		bls_val->meas_gb =
+			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_C_MEASURED);
+		bls_val->meas_b =
+			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_D_MEASURED);
+	}
+}
+
+static void
+rkisp1_stats_send_measurement(struct rkisp1_stats *stats, u32 isp_ris)
+{
+	struct rkisp1_stat_buffer *cur_stat_buf;
+	struct rkisp1_buffer *cur_buf = NULL;
+	unsigned int frame_sequence = stats->rkisp1->isp.frame_sequence;
+	u64 timestamp = ktime_get_ns();
+
+	/* get one empty buffer */
+	if (!list_empty(&stats->stat)) {
+		cur_buf = list_first_entry(&stats->stat,
+					   struct rkisp1_buffer, queue);
+		list_del(&cur_buf->queue);
+	}
+
+	if (!cur_buf)
+		return;
+
+	cur_stat_buf =
+		(struct rkisp1_stat_buffer *)(cur_buf->vaddr);
+
+	if (isp_ris & RKISP1_CIF_ISP_AWB_DONE)
+		rkisp1_stats_get_awb_meas(stats, cur_stat_buf);
+
+	if (isp_ris & RKISP1_CIF_ISP_AFM_FIN)
+		rkisp1_stats_get_afc_meas(stats, cur_stat_buf);
+
+	if (isp_ris & RKISP1_CIF_ISP_EXP_END) {
+		rkisp1_stats_get_aec_meas(stats, cur_stat_buf);
+		rkisp1_stats_get_bls_meas(stats, cur_stat_buf);
+	}
+
+	if (isp_ris & RKISP1_CIF_ISP_HIST_MEASURE_RDY)
+		rkisp1_stats_get_hst_meas(stats, cur_stat_buf);
+
+	vb2_set_plane_payload(&cur_buf->vb.vb2_buf, 0,
+			      sizeof(struct rkisp1_stat_buffer));
+	cur_buf->vb.sequence = frame_sequence;
+	cur_buf->vb.vb2_buf.timestamp = timestamp;
+	vb2_buffer_done(&cur_buf->vb.vb2_buf, VB2_BUF_STATE_DONE);
+}
+
+void rkisp1_stats_isr(struct rkisp1_stats *stats, u32 isp_ris)
+{
+	struct rkisp1_device *rkisp1 = stats->rkisp1;
+	unsigned int isp_mis_tmp = 0;
+
+	spin_lock(&stats->lock);
+
+	rkisp1_write(rkisp1, RKISP1_STATS_MEAS_MASK, RKISP1_CIF_ISP_ICR);
+
+	isp_mis_tmp = rkisp1_read(rkisp1, RKISP1_CIF_ISP_MIS);
+	if (isp_mis_tmp & RKISP1_STATS_MEAS_MASK)
+		rkisp1->debug.stats_error++;
+
+	if (isp_ris & RKISP1_STATS_MEAS_MASK)
+		rkisp1_stats_send_measurement(stats, isp_ris);
+
+	spin_unlock(&stats->lock);
+}
+
+static void rkisp1_init_stats(struct rkisp1_stats *stats)
+{
+	stats->vdev_fmt.fmt.meta.dataformat =
+		V4L2_META_FMT_RK_ISP1_STAT_3A;
+	stats->vdev_fmt.fmt.meta.buffersize =
+		sizeof(struct rkisp1_stat_buffer);
+}
+
+int rkisp1_stats_register(struct rkisp1_device *rkisp1)
+{
+	struct rkisp1_stats *stats = &rkisp1->stats;
+	struct rkisp1_vdev_node *node = &stats->vnode;
+	struct video_device *vdev = &node->vdev;
+	int ret;
+
+	stats->rkisp1 = rkisp1;
+	mutex_init(&node->vlock);
+	INIT_LIST_HEAD(&stats->stat);
+	spin_lock_init(&stats->lock);
+
+	strscpy(vdev->name, RKISP1_STATS_DEV_NAME, sizeof(vdev->name));
+
+	video_set_drvdata(vdev, stats);
+	vdev->ioctl_ops = &rkisp1_stats_ioctl;
+	vdev->fops = &rkisp1_stats_fops;
+	vdev->release = video_device_release_empty;
+	vdev->lock = &node->vlock;
+	vdev->v4l2_dev = &rkisp1->v4l2_dev;
+	vdev->queue = &node->buf_queue;
+	vdev->device_caps = V4L2_CAP_META_CAPTURE | V4L2_CAP_STREAMING;
+	vdev->vfl_dir =  VFL_DIR_RX;
+	rkisp1_stats_init_vb2_queue(vdev->queue, stats);
+	rkisp1_init_stats(stats);
+	video_set_drvdata(vdev, stats);
+
+	node->pad.flags = MEDIA_PAD_FL_SINK;
+	ret = media_entity_pads_init(&vdev->entity, 1, &node->pad);
+	if (ret)
+		goto err_mutex_destroy;
+
+	ret = video_register_device(vdev, VFL_TYPE_VIDEO, -1);
+	if (ret) {
+		dev_err(&vdev->dev,
+			"failed to register %s, ret=%d\n", vdev->name, ret);
+		goto err_cleanup_media_entity;
+	}
+
+	return 0;
+
+err_cleanup_media_entity:
+	media_entity_cleanup(&vdev->entity);
+err_mutex_destroy:
+	mutex_destroy(&node->vlock);
+	return ret;
+}
+
+void rkisp1_stats_unregister(struct rkisp1_device *rkisp1)
+{
+	struct rkisp1_stats *stats = &rkisp1->stats;
+	struct rkisp1_vdev_node *node = &stats->vnode;
+	struct video_device *vdev = &node->vdev;
+
+	vb2_video_unregister_device(vdev);
+	media_entity_cleanup(&vdev->entity);
+	mutex_destroy(&node->vlock);
+}
diff --git a/drivers/staging/media/Kconfig b/drivers/staging/media/Kconfig
index 747c6cf1d795..e8996b1c3b35 100644
--- a/drivers/staging/media/Kconfig
+++ b/drivers/staging/media/Kconfig
@@ -44,6 +44,4 @@ source "drivers/staging/media/tegra-video/Kconfig"
 
 source "drivers/staging/media/ipu3/Kconfig"
 
-source "drivers/staging/media/rkisp1/Kconfig"
-
 endif
diff --git a/drivers/staging/media/Makefile b/drivers/staging/media/Makefile
index b59571826ba6..24b5873ff760 100644
--- a/drivers/staging/media/Makefile
+++ b/drivers/staging/media/Makefile
@@ -10,5 +10,4 @@ obj-$(CONFIG_VIDEO_TEGRA)	+= tegra-video/
 obj-$(CONFIG_TEGRA_VDE)		+= tegra-vde/
 obj-$(CONFIG_VIDEO_HANTRO)	+= hantro/
 obj-$(CONFIG_VIDEO_IPU3_IMGU)	+= ipu3/
-obj-$(CONFIG_VIDEO_ROCKCHIP_ISP1)	+= rkisp1/
 obj-$(CONFIG_VIDEO_ZORAN)	+= zoran/
diff --git a/drivers/staging/media/rkisp1/Documentation/devicetree/bindings/media/rockchip-isp1.yaml b/drivers/staging/media/rkisp1/Documentation/devicetree/bindings/media/rockchip-isp1.yaml
deleted file mode 100644
index 2004c054ed1a..000000000000
--- a/drivers/staging/media/rkisp1/Documentation/devicetree/bindings/media/rockchip-isp1.yaml
+++ /dev/null
@@ -1,215 +0,0 @@
-# SPDX-License-Identifier: (GPL-2.0+ OR MIT)
-%YAML 1.2
----
-$id: http://devicetree.org/schemas/media/rockchip-isp1.yaml#
-$schema: http://devicetree.org/meta-schemas/core.yaml#
-
-title: Rockchip SoC Image Signal Processing unit v1
-
-maintainers:
-  - Helen Koike <helen.koike@collabora.com>
-
-description: |
-  Rockchip ISP1 is the Camera interface for the Rockchip series of SoCs
-  which contains image processing, scaling, and compression functions.
-
-properties:
-  compatible:
-    const: rockchip,rk3399-cif-isp
-
-  reg:
-    maxItems: 1
-
-  interrupts:
-    maxItems: 1
-
-  clocks:
-    minItems: 3
-    items:
-      # isp0 and isp1
-      - description: ISP clock
-      - description: ISP AXI clock
-      - description: ISP AHB clock
-      # only for isp1
-      - description: ISP Pixel clock
-
-  clock-names:
-    minItems: 3
-    items:
-      # isp0 and isp1
-      - const: isp
-      - const: aclk
-      - const: hclk
-      # only for isp1
-      - const: pclk_isp
-
-  iommus:
-    maxItems: 1
-
-  phys:
-    maxItems: 1
-    description: phandle for the PHY port
-
-  phy-names:
-    const: dphy
-
-  power-domains:
-    maxItems: 1
-
-  # See ./video-interfaces.txt for details
-  ports:
-    type: object
-    additionalProperties: false
-
-    properties:
-      "#address-cells":
-        const: 1
-
-      "#size-cells":
-        const: 0
-
-      port@0:
-        type: object
-        description: connection point for sensors at MIPI-DPHY RX0
-        additionalProperties: false
-
-        properties:
-          "#address-cells":
-            const: 1
-
-          "#size-cells":
-            const: 0
-
-          reg:
-            const: 0
-
-        patternProperties:
-          endpoint:
-            type: object
-            additionalProperties: false
-
-            properties:
-              reg:
-                maxItems: 1
-
-              data-lanes:
-                minItems: 1
-                maxItems: 4
-
-              remote-endpoint: true
-
-        required:
-          - reg
-          - "#address-cells"
-          - "#size-cells"
-
-    required:
-      - "#address-cells"
-      - "#size-cells"
-      - port@0
-
-required:
-  - compatible
-  - reg
-  - interrupts
-  - clocks
-  - clock-names
-  - iommus
-  - phys
-  - phy-names
-  - power-domains
-  - ports
-
-if:
-  properties:
-    compatible:
-      contains:
-        const: rockchip,rk3399-cif-isp
-then:
-  properties:
-    clocks:
-      minItems: 3
-      maxItems: 4
-    clock-names:
-      minItems: 3
-      maxItems: 4
-
-additionalProperties: false
-
-examples:
-  - |
-
-    #include <dt-bindings/clock/rk3399-cru.h>
-    #include <dt-bindings/interrupt-controller/arm-gic.h>
-    #include <dt-bindings/power/rk3399-power.h>
-
-    parent0: parent {
-        #address-cells = <2>;
-        #size-cells = <2>;
-
-        isp0: isp0@ff910000 {
-            compatible = "rockchip,rk3399-cif-isp";
-            reg = <0x0 0xff910000 0x0 0x4000>;
-            interrupts = <GIC_SPI 43 IRQ_TYPE_LEVEL_HIGH 0>;
-            clocks = <&cru SCLK_ISP0>,
-                     <&cru ACLK_ISP0_WRAPPER>,
-                     <&cru HCLK_ISP0_WRAPPER>;
-            clock-names = "isp", "aclk", "hclk";
-            iommus = <&isp0_mmu>;
-            phys = <&dphy>;
-            phy-names = "dphy";
-            power-domains = <&power RK3399_PD_ISP0>;
-
-            ports {
-                #address-cells = <1>;
-                #size-cells = <0>;
-
-                port@0 {
-                    reg = <0>;
-                    #address-cells = <1>;
-                    #size-cells = <0>;
-
-                    mipi_in_wcam: endpoint@0 {
-                        reg = <0>;
-                        remote-endpoint = <&wcam_out>;
-                        data-lanes = <1 2>;
-                    };
-
-                    mipi_in_ucam: endpoint@1 {
-                        reg = <1>;
-                        remote-endpoint = <&ucam_out>;
-                        data-lanes = <1>;
-                    };
-                };
-            };
-        };
-
-        i2c7: i2c {
-            #address-cells = <1>;
-            #size-cells = <0>;
-
-            wcam: camera@36 {
-                compatible = "ovti,ov5695";
-                reg = <0x36>;
-
-                port {
-                    wcam_out: endpoint {
-                        remote-endpoint = <&mipi_in_wcam>;
-                        data-lanes = <1 2>;
-                    };
-                };
-            };
-
-            ucam: camera@3c {
-                compatible = "ovti,ov2685";
-                reg = <0x3c>;
-
-                  port {
-                      ucam_out: endpoint {
-                          remote-endpoint = <&mipi_in_ucam>;
-                          data-lanes = <1>;
-                      };
-                  };
-            };
-        };
-    };
diff --git a/drivers/staging/media/rkisp1/Kconfig b/drivers/staging/media/rkisp1/Kconfig
deleted file mode 100644
index 41f5def9ea44..000000000000
--- a/drivers/staging/media/rkisp1/Kconfig
+++ /dev/null
@@ -1,19 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-
-config VIDEO_ROCKCHIP_ISP1
-	tristate "Rockchip Image Signal Processing v1 Unit driver"
-	depends on VIDEO_V4L2 && OF
-	depends on ARCH_ROCKCHIP || COMPILE_TEST
-	select MEDIA_CONTROLLER
-	select VIDEO_V4L2_SUBDEV_API
-	select VIDEOBUF2_DMA_CONTIG
-	select VIDEOBUF2_VMALLOC
-	select V4L2_FWNODE
-	select GENERIC_PHY_MIPI_DPHY
-	default n
-	help
-	  Enable this to support the Image Signal Processing (ISP) module
-	  present in RK3399 SoCs.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called rockchip-isp1.
diff --git a/drivers/staging/media/rkisp1/Makefile b/drivers/staging/media/rkisp1/Makefile
deleted file mode 100644
index ab32a77db8f7..000000000000
--- a/drivers/staging/media/rkisp1/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-obj-$(CONFIG_VIDEO_ROCKCHIP_ISP1) += rockchip-isp1.o
-rockchip-isp1-objs += 	rkisp1-capture.o \
-			rkisp1-common.o \
-			rkisp1-dev.o \
-			rkisp1-isp.o \
-			rkisp1-resizer.o \
-			rkisp1-stats.o \
-			rkisp1-params.o
diff --git a/drivers/staging/media/rkisp1/TODO b/drivers/staging/media/rkisp1/TODO
deleted file mode 100644
index ca3651fd0833..000000000000
--- a/drivers/staging/media/rkisp1/TODO
+++ /dev/null
@@ -1,8 +0,0 @@
-* Fix checkpatch errors.
-
-NOTES:
-* All v4l2-compliance test must pass.
-* Stats and params can be tested with libcamera and ChromiumOS stack.
-
-Please CC patches to Linux Media <linux-media@vger.kernel.org> and
-Helen Koike <helen.koike@collabora.com>.
diff --git a/drivers/staging/media/rkisp1/rkisp1-capture.c b/drivers/staging/media/rkisp1/rkisp1-capture.c
deleted file mode 100644
index b81235afd053..000000000000
--- a/drivers/staging/media/rkisp1/rkisp1-capture.c
+++ /dev/null
@@ -1,1431 +0,0 @@
-// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
-/*
- * Rockchip ISP1 Driver - V4l capture device
- *
- * Copyright (C) 2019 Collabora, Ltd.
- *
- * Based on Rockchip ISP1 driver by Rockchip Electronics Co., Ltd.
- * Copyright (C) 2017 Rockchip Electronics Co., Ltd.
- */
-
-#include <linux/delay.h>
-#include <linux/pm_runtime.h>
-#include <media/v4l2-common.h>
-#include <media/v4l2-event.h>
-#include <media/v4l2-fh.h>
-#include <media/v4l2-ioctl.h>
-#include <media/v4l2-mc.h>
-#include <media/v4l2-subdev.h>
-#include <media/videobuf2-dma-contig.h>
-
-#include "rkisp1-common.h"
-
-/*
- * NOTE: There are two capture video devices in rkisp1, selfpath and mainpath.
- *
- * differences between selfpath and mainpath
- * available mp sink input: isp
- * available sp sink input : isp, dma(TODO)
- * available mp sink pad fmts: yuv422, raw
- * available sp sink pad fmts: yuv422, yuv420......
- * available mp source fmts: yuv, raw, jpeg(TODO)
- * available sp source fmts: yuv, rgb
- */
-
-#define RKISP1_SP_DEV_NAME	RKISP1_DRIVER_NAME "_selfpath"
-#define RKISP1_MP_DEV_NAME	RKISP1_DRIVER_NAME "_mainpath"
-
-#define RKISP1_MIN_BUFFERS_NEEDED 3
-
-enum rkisp1_plane {
-	RKISP1_PLANE_Y	= 0,
-	RKISP1_PLANE_CB	= 1,
-	RKISP1_PLANE_CR	= 2
-};
-
-/*
- * @fourcc: pixel format
- * @fmt_type: helper filed for pixel format
- * @uv_swap: if cb cr swaped, for yuv
- * @write_format: defines how YCbCr self picture data is written to memory
- * @output_format: defines sp output format
- * @mbus: the mbus code on the src resizer pad that matches the pixel format
- */
-struct rkisp1_capture_fmt_cfg {
-	u32 fourcc;
-	u8 uv_swap;
-	u32 write_format;
-	u32 output_format;
-	u32 mbus;
-};
-
-struct rkisp1_capture_ops {
-	void (*config)(struct rkisp1_capture *cap);
-	void (*stop)(struct rkisp1_capture *cap);
-	void (*enable)(struct rkisp1_capture *cap);
-	void (*disable)(struct rkisp1_capture *cap);
-	void (*set_data_path)(struct rkisp1_capture *cap);
-	bool (*is_stopped)(struct rkisp1_capture *cap);
-};
-
-struct rkisp1_capture_config {
-	const struct rkisp1_capture_fmt_cfg *fmts;
-	int fmt_size;
-	struct {
-		u32 y_size_init;
-		u32 cb_size_init;
-		u32 cr_size_init;
-		u32 y_base_ad_init;
-		u32 cb_base_ad_init;
-		u32 cr_base_ad_init;
-		u32 y_offs_cnt_init;
-		u32 cb_offs_cnt_init;
-		u32 cr_offs_cnt_init;
-	} mi;
-};
-
-/*
- * The supported pixel formats for mainpath. NOTE, pixel formats with identical 'mbus'
- * are grouped together. This is assumed and used by the function rkisp1_cap_enum_mbus_codes
- */
-static const struct rkisp1_capture_fmt_cfg rkisp1_mp_fmts[] = {
-	/* yuv422 */
-	{
-		.fourcc = V4L2_PIX_FMT_YUYV,
-		.uv_swap = 0,
-		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUVINT,
-		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
-	}, {
-		.fourcc = V4L2_PIX_FMT_YUV422P,
-		.uv_swap = 0,
-		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8,
-		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
-	}, {
-		.fourcc = V4L2_PIX_FMT_NV16,
-		.uv_swap = 0,
-		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_SPLA,
-		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
-	}, {
-		.fourcc = V4L2_PIX_FMT_NV61,
-		.uv_swap = 1,
-		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_SPLA,
-		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
-	}, {
-		.fourcc = V4L2_PIX_FMT_YVU422M,
-		.uv_swap = 1,
-		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8,
-		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
-	},
-	/* yuv400 */
-	{
-		.fourcc = V4L2_PIX_FMT_GREY,
-		.uv_swap = 0,
-		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8,
-		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
-	},
-	/* yuv420 */
-	{
-		.fourcc = V4L2_PIX_FMT_NV21,
-		.uv_swap = 1,
-		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_SPLA,
-		.mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
-	}, {
-		.fourcc = V4L2_PIX_FMT_NV12,
-		.uv_swap = 0,
-		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_SPLA,
-		.mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
-	}, {
-		.fourcc = V4L2_PIX_FMT_NV21M,
-		.uv_swap = 1,
-		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_SPLA,
-		.mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
-	}, {
-		.fourcc = V4L2_PIX_FMT_NV12M,
-		.uv_swap = 0,
-		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_SPLA,
-		.mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
-	}, {
-		.fourcc = V4L2_PIX_FMT_YUV420,
-		.uv_swap = 0,
-		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8,
-		.mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
-	}, {
-		.fourcc = V4L2_PIX_FMT_YVU420,
-		.uv_swap = 1,
-		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8,
-		.mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
-	},
-	/* raw */
-	{
-		.fourcc = V4L2_PIX_FMT_SRGGB8,
-		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8,
-		.mbus = MEDIA_BUS_FMT_SRGGB8_1X8,
-	}, {
-		.fourcc = V4L2_PIX_FMT_SGRBG8,
-		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8,
-		.mbus = MEDIA_BUS_FMT_SGRBG8_1X8,
-	}, {
-		.fourcc = V4L2_PIX_FMT_SGBRG8,
-		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8,
-		.mbus = MEDIA_BUS_FMT_SGBRG8_1X8,
-	}, {
-		.fourcc = V4L2_PIX_FMT_SBGGR8,
-		.write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8,
-		.mbus = MEDIA_BUS_FMT_SBGGR8_1X8,
-	}, {
-		.fourcc = V4L2_PIX_FMT_SRGGB10,
-		.write_format = RKISP1_MI_CTRL_MP_WRITE_RAW12,
-		.mbus = MEDIA_BUS_FMT_SRGGB10_1X10,
-	}, {
-		.fourcc = V4L2_PIX_FMT_SGRBG10,
-		.write_format = RKISP1_MI_CTRL_MP_WRITE_RAW12,
-		.mbus = MEDIA_BUS_FMT_SGRBG10_1X10,
-	}, {
-		.fourcc = V4L2_PIX_FMT_SGBRG10,
-		.write_format = RKISP1_MI_CTRL_MP_WRITE_RAW12,
-		.mbus = MEDIA_BUS_FMT_SGBRG10_1X10,
-	}, {
-		.fourcc = V4L2_PIX_FMT_SBGGR10,
-		.write_format = RKISP1_MI_CTRL_MP_WRITE_RAW12,
-		.mbus = MEDIA_BUS_FMT_SBGGR10_1X10,
-	}, {
-		.fourcc = V4L2_PIX_FMT_SRGGB12,
-		.write_format = RKISP1_MI_CTRL_MP_WRITE_RAW12,
-		.mbus = MEDIA_BUS_FMT_SRGGB12_1X12,
-	}, {
-		.fourcc = V4L2_PIX_FMT_SGRBG12,
-		.write_format = RKISP1_MI_CTRL_MP_WRITE_RAW12,
-		.mbus = MEDIA_BUS_FMT_SGRBG12_1X12,
-	}, {
-		.fourcc = V4L2_PIX_FMT_SGBRG12,
-		.write_format = RKISP1_MI_CTRL_MP_WRITE_RAW12,
-		.mbus = MEDIA_BUS_FMT_SGBRG12_1X12,
-	}, {
-		.fourcc = V4L2_PIX_FMT_SBGGR12,
-		.write_format = RKISP1_MI_CTRL_MP_WRITE_RAW12,
-		.mbus = MEDIA_BUS_FMT_SBGGR12_1X12,
-	},
-};
-
-/*
- * The supported pixel formats for selfpath. NOTE, pixel formats with identical 'mbus'
- * are grouped together. This is assumed and used by the function rkisp1_cap_enum_mbus_codes
- */
-static const struct rkisp1_capture_fmt_cfg rkisp1_sp_fmts[] = {
-	/* yuv422 */
-	{
-		.fourcc = V4L2_PIX_FMT_YUYV,
-		.uv_swap = 0,
-		.write_format = RKISP1_MI_CTRL_SP_WRITE_INT,
-		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_YUV422,
-		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
-	}, {
-		.fourcc = V4L2_PIX_FMT_YUV422P,
-		.uv_swap = 0,
-		.write_format = RKISP1_MI_CTRL_SP_WRITE_PLA,
-		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_YUV422,
-		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
-	}, {
-		.fourcc = V4L2_PIX_FMT_NV16,
-		.uv_swap = 0,
-		.write_format = RKISP1_MI_CTRL_SP_WRITE_SPLA,
-		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_YUV422,
-		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
-	}, {
-		.fourcc = V4L2_PIX_FMT_NV61,
-		.uv_swap = 1,
-		.write_format = RKISP1_MI_CTRL_SP_WRITE_SPLA,
-		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_YUV422,
-		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
-	}, {
-		.fourcc = V4L2_PIX_FMT_YVU422M,
-		.uv_swap = 1,
-		.write_format = RKISP1_MI_CTRL_SP_WRITE_PLA,
-		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_YUV422,
-		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
-	},
-	/* yuv400 */
-	{
-		.fourcc = V4L2_PIX_FMT_GREY,
-		.uv_swap = 0,
-		.write_format = RKISP1_MI_CTRL_SP_WRITE_PLA,
-		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_YUV400,
-		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
-	},
-	/* rgb */
-	{
-		.fourcc = V4L2_PIX_FMT_XBGR32,
-		.write_format = RKISP1_MI_CTRL_SP_WRITE_PLA,
-		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_RGB888,
-		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
-	}, {
-		.fourcc = V4L2_PIX_FMT_RGB565,
-		.write_format = RKISP1_MI_CTRL_SP_WRITE_PLA,
-		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_RGB565,
-		.mbus = MEDIA_BUS_FMT_YUYV8_2X8,
-	},
-	/* yuv420 */
-	{
-		.fourcc = V4L2_PIX_FMT_NV21,
-		.uv_swap = 1,
-		.write_format = RKISP1_MI_CTRL_SP_WRITE_SPLA,
-		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_YUV420,
-		.mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
-	}, {
-		.fourcc = V4L2_PIX_FMT_NV12,
-		.uv_swap = 0,
-		.write_format = RKISP1_MI_CTRL_SP_WRITE_SPLA,
-		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_YUV420,
-		.mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
-	}, {
-		.fourcc = V4L2_PIX_FMT_NV21M,
-		.uv_swap = 1,
-		.write_format = RKISP1_MI_CTRL_SP_WRITE_SPLA,
-		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_YUV420,
-		.mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
-	}, {
-		.fourcc = V4L2_PIX_FMT_NV12M,
-		.uv_swap = 0,
-		.write_format = RKISP1_MI_CTRL_SP_WRITE_SPLA,
-		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_YUV420,
-		.mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
-	}, {
-		.fourcc = V4L2_PIX_FMT_YUV420,
-		.uv_swap = 0,
-		.write_format = RKISP1_MI_CTRL_SP_WRITE_PLA,
-		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_YUV420,
-		.mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
-	}, {
-		.fourcc = V4L2_PIX_FMT_YVU420,
-		.uv_swap = 1,
-		.write_format = RKISP1_MI_CTRL_SP_WRITE_PLA,
-		.output_format = RKISP1_MI_CTRL_SP_OUTPUT_YUV420,
-		.mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
-	},
-};
-
-static const struct rkisp1_capture_config rkisp1_capture_config_mp = {
-	.fmts = rkisp1_mp_fmts,
-	.fmt_size = ARRAY_SIZE(rkisp1_mp_fmts),
-	.mi = {
-		.y_size_init =		RKISP1_CIF_MI_MP_Y_SIZE_INIT,
-		.cb_size_init =		RKISP1_CIF_MI_MP_CB_SIZE_INIT,
-		.cr_size_init =		RKISP1_CIF_MI_MP_CR_SIZE_INIT,
-		.y_base_ad_init =	RKISP1_CIF_MI_MP_Y_BASE_AD_INIT,
-		.cb_base_ad_init =	RKISP1_CIF_MI_MP_CB_BASE_AD_INIT,
-		.cr_base_ad_init =	RKISP1_CIF_MI_MP_CR_BASE_AD_INIT,
-		.y_offs_cnt_init =	RKISP1_CIF_MI_MP_Y_OFFS_CNT_INIT,
-		.cb_offs_cnt_init =	RKISP1_CIF_MI_MP_CB_OFFS_CNT_INIT,
-		.cr_offs_cnt_init =	RKISP1_CIF_MI_MP_CR_OFFS_CNT_INIT,
-	},
-};
-
-static const struct rkisp1_capture_config rkisp1_capture_config_sp = {
-	.fmts = rkisp1_sp_fmts,
-	.fmt_size = ARRAY_SIZE(rkisp1_sp_fmts),
-	.mi = {
-		.y_size_init =		RKISP1_CIF_MI_SP_Y_SIZE_INIT,
-		.cb_size_init =		RKISP1_CIF_MI_SP_CB_SIZE_INIT,
-		.cr_size_init =		RKISP1_CIF_MI_SP_CR_SIZE_INIT,
-		.y_base_ad_init =	RKISP1_CIF_MI_SP_Y_BASE_AD_INIT,
-		.cb_base_ad_init =	RKISP1_CIF_MI_SP_CB_BASE_AD_INIT,
-		.cr_base_ad_init =	RKISP1_CIF_MI_SP_CR_BASE_AD_INIT,
-		.y_offs_cnt_init =	RKISP1_CIF_MI_SP_Y_OFFS_CNT_INIT,
-		.cb_offs_cnt_init =	RKISP1_CIF_MI_SP_CB_OFFS_CNT_INIT,
-		.cr_offs_cnt_init =	RKISP1_CIF_MI_SP_CR_OFFS_CNT_INIT,
-	},
-};
-
-static inline struct rkisp1_vdev_node *
-rkisp1_vdev_to_node(struct video_device *vdev)
-{
-	return container_of(vdev, struct rkisp1_vdev_node, vdev);
-}
-
-int rkisp1_cap_enum_mbus_codes(struct rkisp1_capture *cap,
-			       struct v4l2_subdev_mbus_code_enum *code)
-{
-	const struct rkisp1_capture_fmt_cfg *fmts = cap->config->fmts;
-	/*
-	 * initialize curr_mbus to non existing mbus code 0 to ensure it is
-	 * different from fmts[0].mbus
-	 */
-	u32 curr_mbus = 0;
-	int i, n = 0;
-
-	for (i = 0; i < cap->config->fmt_size; i++) {
-		if (fmts[i].mbus == curr_mbus)
-			continue;
-
-		curr_mbus = fmts[i].mbus;
-		if (n++ == code->index) {
-			code->code = curr_mbus;
-			return 0;
-		}
-	}
-	return -EINVAL;
-}
-
-/* ----------------------------------------------------------------------------
- * Stream operations for self-picture path (sp) and main-picture path (mp)
- */
-
-static void rkisp1_mi_config_ctrl(struct rkisp1_capture *cap)
-{
-	u32 mi_ctrl = rkisp1_read(cap->rkisp1, RKISP1_CIF_MI_CTRL);
-
-	mi_ctrl &= ~GENMASK(17, 16);
-	mi_ctrl |= RKISP1_CIF_MI_CTRL_BURST_LEN_LUM_64;
-
-	mi_ctrl &= ~GENMASK(19, 18);
-	mi_ctrl |= RKISP1_CIF_MI_CTRL_BURST_LEN_CHROM_64;
-
-	mi_ctrl |= RKISP1_CIF_MI_CTRL_INIT_BASE_EN |
-		   RKISP1_CIF_MI_CTRL_INIT_OFFSET_EN;
-
-	rkisp1_write(cap->rkisp1, mi_ctrl, RKISP1_CIF_MI_CTRL);
-}
-
-static u32 rkisp1_pixfmt_comp_size(const struct v4l2_pix_format_mplane *pixm,
-				   unsigned int component)
-{
-	/*
-	 * If packed format, then plane_fmt[0].sizeimage is the sum of all
-	 * components, so we need to calculate just the size of Y component.
-	 * See rkisp1_fill_pixfmt().
-	 */
-	if (!component && pixm->num_planes == 1)
-		return pixm->plane_fmt[0].bytesperline * pixm->height;
-	return pixm->plane_fmt[component].sizeimage;
-}
-
-static void rkisp1_irq_frame_end_enable(struct rkisp1_capture *cap)
-{
-	u32 mi_imsc = rkisp1_read(cap->rkisp1, RKISP1_CIF_MI_IMSC);
-
-	mi_imsc |= RKISP1_CIF_MI_FRAME(cap);
-	rkisp1_write(cap->rkisp1, mi_imsc, RKISP1_CIF_MI_IMSC);
-}
-
-static void rkisp1_mp_config(struct rkisp1_capture *cap)
-{
-	const struct v4l2_pix_format_mplane *pixm = &cap->pix.fmt;
-	struct rkisp1_device *rkisp1 = cap->rkisp1;
-	u32 reg;
-
-	rkisp1_write(rkisp1, rkisp1_pixfmt_comp_size(pixm, RKISP1_PLANE_Y),
-		     cap->config->mi.y_size_init);
-	rkisp1_write(rkisp1, rkisp1_pixfmt_comp_size(pixm, RKISP1_PLANE_CB),
-		     cap->config->mi.cb_size_init);
-	rkisp1_write(rkisp1, rkisp1_pixfmt_comp_size(pixm, RKISP1_PLANE_CR),
-		     cap->config->mi.cr_size_init);
-
-	rkisp1_irq_frame_end_enable(cap);
-
-	/* set uv swapping for semiplanar formats */
-	if (cap->pix.info->comp_planes == 2) {
-		reg = rkisp1_read(rkisp1, RKISP1_CIF_MI_XTD_FORMAT_CTRL);
-		if (cap->pix.cfg->uv_swap)
-			reg |= RKISP1_CIF_MI_XTD_FMT_CTRL_MP_CB_CR_SWAP;
-		else
-			reg &= ~RKISP1_CIF_MI_XTD_FMT_CTRL_MP_CB_CR_SWAP;
-		rkisp1_write(rkisp1, reg, RKISP1_CIF_MI_XTD_FORMAT_CTRL);
-	}
-
-	rkisp1_mi_config_ctrl(cap);
-
-	reg = rkisp1_read(rkisp1, RKISP1_CIF_MI_CTRL);
-	reg &= ~RKISP1_MI_CTRL_MP_FMT_MASK;
-	reg |= cap->pix.cfg->write_format;
-	rkisp1_write(rkisp1, reg, RKISP1_CIF_MI_CTRL);
-
-	reg = rkisp1_read(rkisp1, RKISP1_CIF_MI_CTRL);
-	reg |= RKISP1_CIF_MI_MP_AUTOUPDATE_ENABLE;
-	rkisp1_write(rkisp1, reg, RKISP1_CIF_MI_CTRL);
-}
-
-static void rkisp1_sp_config(struct rkisp1_capture *cap)
-{
-	const struct v4l2_pix_format_mplane *pixm = &cap->pix.fmt;
-	struct rkisp1_device *rkisp1 = cap->rkisp1;
-	u32 mi_ctrl, reg;
-
-	rkisp1_write(rkisp1, rkisp1_pixfmt_comp_size(pixm, RKISP1_PLANE_Y),
-		     cap->config->mi.y_size_init);
-	rkisp1_write(rkisp1, rkisp1_pixfmt_comp_size(pixm, RKISP1_PLANE_CB),
-		     cap->config->mi.cb_size_init);
-	rkisp1_write(rkisp1, rkisp1_pixfmt_comp_size(pixm, RKISP1_PLANE_CR),
-		     cap->config->mi.cr_size_init);
-
-	rkisp1_write(rkisp1, pixm->width, RKISP1_CIF_MI_SP_Y_PIC_WIDTH);
-	rkisp1_write(rkisp1, pixm->height, RKISP1_CIF_MI_SP_Y_PIC_HEIGHT);
-	rkisp1_write(rkisp1, cap->sp_y_stride, RKISP1_CIF_MI_SP_Y_LLENGTH);
-
-	rkisp1_irq_frame_end_enable(cap);
-
-	/* set uv swapping for semiplanar formats */
-	if (cap->pix.info->comp_planes == 2) {
-		reg = rkisp1_read(rkisp1, RKISP1_CIF_MI_XTD_FORMAT_CTRL);
-		if (cap->pix.cfg->uv_swap)
-			reg |= RKISP1_CIF_MI_XTD_FMT_CTRL_SP_CB_CR_SWAP;
-		else
-			reg &= ~RKISP1_CIF_MI_XTD_FMT_CTRL_SP_CB_CR_SWAP;
-		rkisp1_write(rkisp1, reg, RKISP1_CIF_MI_XTD_FORMAT_CTRL);
-	}
-
-	rkisp1_mi_config_ctrl(cap);
-
-	mi_ctrl = rkisp1_read(rkisp1, RKISP1_CIF_MI_CTRL);
-	mi_ctrl &= ~RKISP1_MI_CTRL_SP_FMT_MASK;
-	mi_ctrl |= cap->pix.cfg->write_format |
-		   RKISP1_MI_CTRL_SP_INPUT_YUV422 |
-		   cap->pix.cfg->output_format |
-		   RKISP1_CIF_MI_SP_AUTOUPDATE_ENABLE;
-	rkisp1_write(rkisp1, mi_ctrl, RKISP1_CIF_MI_CTRL);
-}
-
-static void rkisp1_mp_disable(struct rkisp1_capture *cap)
-{
-	u32 mi_ctrl = rkisp1_read(cap->rkisp1, RKISP1_CIF_MI_CTRL);
-
-	mi_ctrl &= ~(RKISP1_CIF_MI_CTRL_MP_ENABLE |
-		     RKISP1_CIF_MI_CTRL_RAW_ENABLE);
-	rkisp1_write(cap->rkisp1, mi_ctrl, RKISP1_CIF_MI_CTRL);
-}
-
-static void rkisp1_sp_disable(struct rkisp1_capture *cap)
-{
-	u32 mi_ctrl = rkisp1_read(cap->rkisp1, RKISP1_CIF_MI_CTRL);
-
-	mi_ctrl &= ~RKISP1_CIF_MI_CTRL_SP_ENABLE;
-	rkisp1_write(cap->rkisp1, mi_ctrl, RKISP1_CIF_MI_CTRL);
-}
-
-static void rkisp1_mp_enable(struct rkisp1_capture *cap)
-{
-	u32 mi_ctrl;
-
-	rkisp1_mp_disable(cap);
-
-	mi_ctrl = rkisp1_read(cap->rkisp1, RKISP1_CIF_MI_CTRL);
-	if (v4l2_is_format_bayer(cap->pix.info))
-		mi_ctrl |= RKISP1_CIF_MI_CTRL_RAW_ENABLE;
-	/* YUV */
-	else
-		mi_ctrl |= RKISP1_CIF_MI_CTRL_MP_ENABLE;
-
-	rkisp1_write(cap->rkisp1, mi_ctrl, RKISP1_CIF_MI_CTRL);
-}
-
-static void rkisp1_sp_enable(struct rkisp1_capture *cap)
-{
-	u32 mi_ctrl = rkisp1_read(cap->rkisp1, RKISP1_CIF_MI_CTRL);
-
-	mi_ctrl |= RKISP1_CIF_MI_CTRL_SP_ENABLE;
-	rkisp1_write(cap->rkisp1, mi_ctrl, RKISP1_CIF_MI_CTRL);
-}
-
-static void rkisp1_mp_sp_stop(struct rkisp1_capture *cap)
-{
-	if (!cap->is_streaming)
-		return;
-	rkisp1_write(cap->rkisp1,
-		     RKISP1_CIF_MI_FRAME(cap), RKISP1_CIF_MI_ICR);
-	cap->ops->disable(cap);
-}
-
-static bool rkisp1_mp_is_stopped(struct rkisp1_capture *cap)
-{
-	u32 en = RKISP1_CIF_MI_CTRL_SHD_MP_IN_ENABLED |
-		 RKISP1_CIF_MI_CTRL_SHD_RAW_OUT_ENABLED;
-
-	return !(rkisp1_read(cap->rkisp1, RKISP1_CIF_MI_CTRL_SHD) & en);
-}
-
-static bool rkisp1_sp_is_stopped(struct rkisp1_capture *cap)
-{
-	return !(rkisp1_read(cap->rkisp1, RKISP1_CIF_MI_CTRL_SHD) &
-		 RKISP1_CIF_MI_CTRL_SHD_SP_IN_ENABLED);
-}
-
-static void rkisp1_mp_set_data_path(struct rkisp1_capture *cap)
-{
-	u32 dpcl = rkisp1_read(cap->rkisp1, RKISP1_CIF_VI_DPCL);
-
-	dpcl = dpcl | RKISP1_CIF_VI_DPCL_CHAN_MODE_MP |
-	       RKISP1_CIF_VI_DPCL_MP_MUX_MRSZ_MI;
-	rkisp1_write(cap->rkisp1, dpcl, RKISP1_CIF_VI_DPCL);
-}
-
-static void rkisp1_sp_set_data_path(struct rkisp1_capture *cap)
-{
-	u32 dpcl = rkisp1_read(cap->rkisp1, RKISP1_CIF_VI_DPCL);
-
-	dpcl |= RKISP1_CIF_VI_DPCL_CHAN_MODE_SP;
-	rkisp1_write(cap->rkisp1, dpcl, RKISP1_CIF_VI_DPCL);
-}
-
-static struct rkisp1_capture_ops rkisp1_capture_ops_mp = {
-	.config = rkisp1_mp_config,
-	.enable = rkisp1_mp_enable,
-	.disable = rkisp1_mp_disable,
-	.stop = rkisp1_mp_sp_stop,
-	.set_data_path = rkisp1_mp_set_data_path,
-	.is_stopped = rkisp1_mp_is_stopped,
-};
-
-static struct rkisp1_capture_ops rkisp1_capture_ops_sp = {
-	.config = rkisp1_sp_config,
-	.enable = rkisp1_sp_enable,
-	.disable = rkisp1_sp_disable,
-	.stop = rkisp1_mp_sp_stop,
-	.set_data_path = rkisp1_sp_set_data_path,
-	.is_stopped = rkisp1_sp_is_stopped,
-};
-
-/* ----------------------------------------------------------------------------
- * Frame buffer operations
- */
-
-static int rkisp1_dummy_buf_create(struct rkisp1_capture *cap)
-{
-	const struct v4l2_pix_format_mplane *pixm = &cap->pix.fmt;
-	struct rkisp1_dummy_buffer *dummy_buf = &cap->buf.dummy;
-
-	dummy_buf->size = max3(rkisp1_pixfmt_comp_size(pixm, RKISP1_PLANE_Y),
-			       rkisp1_pixfmt_comp_size(pixm, RKISP1_PLANE_CB),
-			       rkisp1_pixfmt_comp_size(pixm, RKISP1_PLANE_CR));
-
-	/* The driver never access vaddr, no mapping is required */
-	dummy_buf->vaddr = dma_alloc_attrs(cap->rkisp1->dev,
-					   dummy_buf->size,
-					   &dummy_buf->dma_addr,
-					   GFP_KERNEL,
-					   DMA_ATTR_NO_KERNEL_MAPPING);
-	if (!dummy_buf->vaddr)
-		return -ENOMEM;
-
-	return 0;
-}
-
-static void rkisp1_dummy_buf_destroy(struct rkisp1_capture *cap)
-{
-	dma_free_attrs(cap->rkisp1->dev,
-		       cap->buf.dummy.size, cap->buf.dummy.vaddr,
-		       cap->buf.dummy.dma_addr, DMA_ATTR_NO_KERNEL_MAPPING);
-}
-
-static void rkisp1_set_next_buf(struct rkisp1_capture *cap)
-{
-	cap->buf.curr = cap->buf.next;
-	cap->buf.next = NULL;
-
-	if (!list_empty(&cap->buf.queue)) {
-		u32 *buff_addr;
-
-		cap->buf.next = list_first_entry(&cap->buf.queue, struct rkisp1_buffer, queue);
-		list_del(&cap->buf.next->queue);
-
-		buff_addr = cap->buf.next->buff_addr;
-
-		rkisp1_write(cap->rkisp1,
-			     buff_addr[RKISP1_PLANE_Y],
-			     cap->config->mi.y_base_ad_init);
-		rkisp1_write(cap->rkisp1,
-			     buff_addr[RKISP1_PLANE_CB],
-			     cap->config->mi.cb_base_ad_init);
-		rkisp1_write(cap->rkisp1,
-			     buff_addr[RKISP1_PLANE_CR],
-			     cap->config->mi.cr_base_ad_init);
-	} else {
-		/*
-		 * Use the dummy space allocated by dma_alloc_coherent to
-		 * throw data if there is no available buffer.
-		 */
-		rkisp1_write(cap->rkisp1,
-			     cap->buf.dummy.dma_addr,
-			     cap->config->mi.y_base_ad_init);
-		rkisp1_write(cap->rkisp1,
-			     cap->buf.dummy.dma_addr,
-			     cap->config->mi.cb_base_ad_init);
-		rkisp1_write(cap->rkisp1,
-			     cap->buf.dummy.dma_addr,
-			     cap->config->mi.cr_base_ad_init);
-	}
-
-	/* Set plane offsets */
-	rkisp1_write(cap->rkisp1, 0, cap->config->mi.y_offs_cnt_init);
-	rkisp1_write(cap->rkisp1, 0, cap->config->mi.cb_offs_cnt_init);
-	rkisp1_write(cap->rkisp1, 0, cap->config->mi.cr_offs_cnt_init);
-}
-
-/*
- * This function is called when a frame end comes. The next frame
- * is processing and we should set up buffer for next-next frame,
- * otherwise it will overflow.
- */
-static void rkisp1_handle_buffer(struct rkisp1_capture *cap)
-{
-	struct rkisp1_isp *isp = &cap->rkisp1->isp;
-	struct rkisp1_buffer *curr_buf;
-
-	spin_lock(&cap->buf.lock);
-	curr_buf = cap->buf.curr;
-
-	if (curr_buf) {
-		curr_buf->vb.sequence = isp->frame_sequence;
-		curr_buf->vb.vb2_buf.timestamp = ktime_get_boottime_ns();
-		curr_buf->vb.field = V4L2_FIELD_NONE;
-		vb2_buffer_done(&curr_buf->vb.vb2_buf, VB2_BUF_STATE_DONE);
-	} else {
-		cap->rkisp1->debug.frame_drop[cap->id]++;
-	}
-
-	rkisp1_set_next_buf(cap);
-	spin_unlock(&cap->buf.lock);
-}
-
-void rkisp1_capture_isr(struct rkisp1_device *rkisp1)
-{
-	unsigned int i;
-	u32 status;
-
-	status = rkisp1_read(rkisp1, RKISP1_CIF_MI_MIS);
-	rkisp1_write(rkisp1, status, RKISP1_CIF_MI_ICR);
-
-	for (i = 0; i < ARRAY_SIZE(rkisp1->capture_devs); ++i) {
-		struct rkisp1_capture *cap = &rkisp1->capture_devs[i];
-
-		if (!(status & RKISP1_CIF_MI_FRAME(cap)))
-			continue;
-		if (!cap->is_stopping) {
-			rkisp1_handle_buffer(cap);
-			continue;
-		}
-		/*
-		 * Make sure stream is actually stopped, whose state
-		 * can be read from the shadow register, before
-		 * wake_up() thread which would immediately free all
-		 * frame buffers. stop() takes effect at the next
-		 * frame end that sync the configurations to shadow
-		 * regs.
-		 */
-		if (!cap->ops->is_stopped(cap)) {
-			cap->ops->stop(cap);
-			continue;
-		}
-		cap->is_stopping = false;
-		cap->is_streaming = false;
-		wake_up(&cap->done);
-	}
-}
-
-/* ----------------------------------------------------------------------------
- * Vb2 operations
- */
-
-static int rkisp1_vb2_queue_setup(struct vb2_queue *queue,
-				  unsigned int *num_buffers,
-				  unsigned int *num_planes,
-				  unsigned int sizes[],
-				  struct device *alloc_devs[])
-{
-	struct rkisp1_capture *cap = queue->drv_priv;
-	const struct v4l2_pix_format_mplane *pixm = &cap->pix.fmt;
-	unsigned int i;
-
-	if (*num_planes) {
-		if (*num_planes != pixm->num_planes)
-			return -EINVAL;
-
-		for (i = 0; i < pixm->num_planes; i++)
-			if (sizes[i] < pixm->plane_fmt[i].sizeimage)
-				return -EINVAL;
-	} else {
-		*num_planes = pixm->num_planes;
-		for (i = 0; i < pixm->num_planes; i++)
-			sizes[i] = pixm->plane_fmt[i].sizeimage;
-	}
-
-	return 0;
-}
-
-static void rkisp1_vb2_buf_queue(struct vb2_buffer *vb)
-{
-	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
-	struct rkisp1_buffer *ispbuf =
-		container_of(vbuf, struct rkisp1_buffer, vb);
-	struct rkisp1_capture *cap = vb->vb2_queue->drv_priv;
-	const struct v4l2_pix_format_mplane *pixm = &cap->pix.fmt;
-	unsigned int i;
-
-	memset(ispbuf->buff_addr, 0, sizeof(ispbuf->buff_addr));
-	for (i = 0; i < pixm->num_planes; i++)
-		ispbuf->buff_addr[i] = vb2_dma_contig_plane_dma_addr(vb, i);
-
-	/* Convert to non-MPLANE */
-	if (pixm->num_planes == 1) {
-		ispbuf->buff_addr[RKISP1_PLANE_CB] =
-			ispbuf->buff_addr[RKISP1_PLANE_Y] +
-			rkisp1_pixfmt_comp_size(pixm, RKISP1_PLANE_Y);
-		ispbuf->buff_addr[RKISP1_PLANE_CR] =
-			ispbuf->buff_addr[RKISP1_PLANE_CB] +
-			rkisp1_pixfmt_comp_size(pixm, RKISP1_PLANE_CB);
-	}
-
-	/*
-	 * uv swap can be supported for planar formats by switching
-	 * the address of cb and cr
-	 */
-	if (cap->pix.info->comp_planes == 3 && cap->pix.cfg->uv_swap)
-		swap(ispbuf->buff_addr[RKISP1_PLANE_CR],
-		     ispbuf->buff_addr[RKISP1_PLANE_CB]);
-
-	spin_lock_irq(&cap->buf.lock);
-	list_add_tail(&ispbuf->queue, &cap->buf.queue);
-	spin_unlock_irq(&cap->buf.lock);
-}
-
-static int rkisp1_vb2_buf_prepare(struct vb2_buffer *vb)
-{
-	struct rkisp1_capture *cap = vb->vb2_queue->drv_priv;
-	unsigned int i;
-
-	for (i = 0; i < cap->pix.fmt.num_planes; i++) {
-		unsigned long size = cap->pix.fmt.plane_fmt[i].sizeimage;
-
-		if (vb2_plane_size(vb, i) < size) {
-			dev_err(cap->rkisp1->dev,
-				"User buffer too small (%ld < %ld)\n",
-				vb2_plane_size(vb, i), size);
-			return -EINVAL;
-		}
-		vb2_set_plane_payload(vb, i, size);
-	}
-
-	return 0;
-}
-
-static void rkisp1_return_all_buffers(struct rkisp1_capture *cap,
-				      enum vb2_buffer_state state)
-{
-	struct rkisp1_buffer *buf;
-
-	spin_lock_irq(&cap->buf.lock);
-	if (cap->buf.curr) {
-		vb2_buffer_done(&cap->buf.curr->vb.vb2_buf, state);
-		cap->buf.curr = NULL;
-	}
-	if (cap->buf.next) {
-		vb2_buffer_done(&cap->buf.next->vb.vb2_buf, state);
-		cap->buf.next = NULL;
-	}
-	while (!list_empty(&cap->buf.queue)) {
-		buf = list_first_entry(&cap->buf.queue,
-				       struct rkisp1_buffer, queue);
-		list_del(&buf->queue);
-		vb2_buffer_done(&buf->vb.vb2_buf, state);
-	}
-	spin_unlock_irq(&cap->buf.lock);
-}
-
-/*
- * Most of registers inside rockchip ISP1 have shadow register since
- * they must be not be changed during processing a frame.
- * Usually, each sub-module updates its shadow register after
- * processing the last pixel of a frame.
- */
-static void rkisp1_cap_stream_enable(struct rkisp1_capture *cap)
-{
-	struct rkisp1_device *rkisp1 = cap->rkisp1;
-	struct rkisp1_capture *other = &rkisp1->capture_devs[cap->id ^ 1];
-
-	cap->ops->set_data_path(cap);
-	cap->ops->config(cap);
-
-	/* Setup a buffer for the next frame */
-	spin_lock_irq(&cap->buf.lock);
-	rkisp1_set_next_buf(cap);
-	cap->ops->enable(cap);
-	/* It's safe to config ACTIVE and SHADOW regs for the
-	 * first stream. While when the second is starting, do NOT
-	 * force update because it also update the first one.
-	 *
-	 * The latter case would drop one more buf(that is 2) since
-	 * there's not buf in shadow when the second FE received. This's
-	 * also required because the second FE maybe corrupt especially
-	 * when run at 120fps.
-	 */
-	if (!other->is_streaming) {
-		/* force cfg update */
-		rkisp1_write(rkisp1,
-			     RKISP1_CIF_MI_INIT_SOFT_UPD, RKISP1_CIF_MI_INIT);
-		rkisp1_set_next_buf(cap);
-	}
-	spin_unlock_irq(&cap->buf.lock);
-	cap->is_streaming = true;
-}
-
-static void rkisp1_cap_stream_disable(struct rkisp1_capture *cap)
-{
-	int ret;
-
-	/* Stream should stop in interrupt. If it dosn't, stop it by force. */
-	cap->is_stopping = true;
-	ret = wait_event_timeout(cap->done,
-				 !cap->is_streaming,
-				 msecs_to_jiffies(1000));
-	if (!ret) {
-		cap->rkisp1->debug.stop_timeout[cap->id]++;
-		cap->ops->stop(cap);
-		cap->is_stopping = false;
-		cap->is_streaming = false;
-	}
-}
-
-/*
- * rkisp1_pipeline_stream_disable - disable nodes in the pipeline
- *
- * Call s_stream(false) in the reverse order from
- * rkisp1_pipeline_stream_enable() and disable the DMA engine.
- * Should be called before media_pipeline_stop()
- */
-static void rkisp1_pipeline_stream_disable(struct rkisp1_capture *cap)
-	__must_hold(&cap->rkisp1->stream_lock)
-{
-	struct rkisp1_device *rkisp1 = cap->rkisp1;
-
-	rkisp1_cap_stream_disable(cap);
-
-	/*
-	 * If the other capture is streaming, isp and sensor nodes shouldn't
-	 * be disabled, skip them.
-	 */
-	if (rkisp1->pipe.streaming_count < 2) {
-		v4l2_subdev_call(rkisp1->active_sensor->sd, video, s_stream,
-				 false);
-		v4l2_subdev_call(&rkisp1->isp.sd, video, s_stream, false);
-	}
-
-	v4l2_subdev_call(&rkisp1->resizer_devs[cap->id].sd, video, s_stream,
-			 false);
-}
-
-/*
- * rkisp1_pipeline_stream_enable - enable nodes in the pipeline
- *
- * Enable the DMA Engine and call s_stream(true) through the pipeline.
- * Should be called after media_pipeline_start()
- */
-static int rkisp1_pipeline_stream_enable(struct rkisp1_capture *cap)
-	__must_hold(&cap->rkisp1->stream_lock)
-{
-	struct rkisp1_device *rkisp1 = cap->rkisp1;
-	int ret;
-
-	rkisp1_cap_stream_enable(cap);
-
-	ret = v4l2_subdev_call(&rkisp1->resizer_devs[cap->id].sd, video,
-			       s_stream, true);
-	if (ret)
-		goto err_disable_cap;
-
-	/*
-	 * If the other capture is streaming, isp and sensor nodes are already
-	 * enabled, skip them.
-	 */
-	if (rkisp1->pipe.streaming_count > 1)
-		return 0;
-
-	ret = v4l2_subdev_call(&rkisp1->isp.sd, video, s_stream, true);
-	if (ret)
-		goto err_disable_rsz;
-
-	ret = v4l2_subdev_call(rkisp1->active_sensor->sd, video, s_stream,
-			       true);
-	if (ret)
-		goto err_disable_isp;
-
-	return 0;
-
-err_disable_isp:
-	v4l2_subdev_call(&rkisp1->isp.sd, video, s_stream, false);
-err_disable_rsz:
-	v4l2_subdev_call(&rkisp1->resizer_devs[cap->id].sd, video, s_stream,
-			 false);
-err_disable_cap:
-	rkisp1_cap_stream_disable(cap);
-
-	return ret;
-}
-
-static void rkisp1_vb2_stop_streaming(struct vb2_queue *queue)
-{
-	struct rkisp1_capture *cap = queue->drv_priv;
-	struct rkisp1_vdev_node *node = &cap->vnode;
-	struct rkisp1_device *rkisp1 = cap->rkisp1;
-	int ret;
-
-	mutex_lock(&cap->rkisp1->stream_lock);
-
-	rkisp1_pipeline_stream_disable(cap);
-
-	rkisp1_return_all_buffers(cap, VB2_BUF_STATE_ERROR);
-
-	v4l2_pipeline_pm_put(&node->vdev.entity);
-	ret = pm_runtime_put(rkisp1->dev);
-	if (ret < 0)
-		dev_err(rkisp1->dev, "power down failed error:%d\n", ret);
-
-	rkisp1_dummy_buf_destroy(cap);
-
-	media_pipeline_stop(&node->vdev.entity);
-
-	mutex_unlock(&cap->rkisp1->stream_lock);
-}
-
-static int
-rkisp1_vb2_start_streaming(struct vb2_queue *queue, unsigned int count)
-{
-	struct rkisp1_capture *cap = queue->drv_priv;
-	struct media_entity *entity = &cap->vnode.vdev.entity;
-	int ret;
-
-	mutex_lock(&cap->rkisp1->stream_lock);
-
-	ret = media_pipeline_start(entity, &cap->rkisp1->pipe);
-	if (ret) {
-		dev_err(cap->rkisp1->dev, "start pipeline failed %d\n", ret);
-		goto err_ret_buffers;
-	}
-
-	ret = rkisp1_dummy_buf_create(cap);
-	if (ret)
-		goto err_pipeline_stop;
-
-	ret = pm_runtime_get_sync(cap->rkisp1->dev);
-	if (ret < 0) {
-		pm_runtime_put_noidle(cap->rkisp1->dev);
-		dev_err(cap->rkisp1->dev, "power up failed %d\n", ret);
-		goto err_destroy_dummy;
-	}
-	ret = v4l2_pipeline_pm_get(entity);
-	if (ret) {
-		dev_err(cap->rkisp1->dev, "open cif pipeline failed %d\n", ret);
-		goto err_pipe_pm_put;
-	}
-
-	ret = rkisp1_pipeline_stream_enable(cap);
-	if (ret)
-		goto err_v4l2_pm_put;
-
-	mutex_unlock(&cap->rkisp1->stream_lock);
-
-	return 0;
-
-err_v4l2_pm_put:
-	v4l2_pipeline_pm_put(entity);
-err_pipe_pm_put:
-	pm_runtime_put(cap->rkisp1->dev);
-err_destroy_dummy:
-	rkisp1_dummy_buf_destroy(cap);
-err_pipeline_stop:
-	media_pipeline_stop(entity);
-err_ret_buffers:
-	rkisp1_return_all_buffers(cap, VB2_BUF_STATE_QUEUED);
-	mutex_unlock(&cap->rkisp1->stream_lock);
-
-	return ret;
-}
-
-static struct vb2_ops rkisp1_vb2_ops = {
-	.queue_setup = rkisp1_vb2_queue_setup,
-	.buf_queue = rkisp1_vb2_buf_queue,
-	.buf_prepare = rkisp1_vb2_buf_prepare,
-	.wait_prepare = vb2_ops_wait_prepare,
-	.wait_finish = vb2_ops_wait_finish,
-	.stop_streaming = rkisp1_vb2_stop_streaming,
-	.start_streaming = rkisp1_vb2_start_streaming,
-};
-
-/* ----------------------------------------------------------------------------
- * IOCTLs operations
- */
-
-static const struct v4l2_format_info *
-rkisp1_fill_pixfmt(struct v4l2_pix_format_mplane *pixm,
-		   enum rkisp1_stream_id id)
-{
-	struct v4l2_plane_pix_format *plane_y = &pixm->plane_fmt[0];
-	const struct v4l2_format_info *info;
-	unsigned int i;
-	u32 stride;
-
-	memset(pixm->plane_fmt, 0, sizeof(pixm->plane_fmt));
-	info = v4l2_format_info(pixm->pixelformat);
-	pixm->num_planes = info->mem_planes;
-	stride = info->bpp[0] * pixm->width;
-	/* Self path supports custom stride but Main path doesn't */
-	if (id == RKISP1_MAINPATH || plane_y->bytesperline < stride)
-		plane_y->bytesperline = stride;
-	plane_y->sizeimage = plane_y->bytesperline * pixm->height;
-
-	/* normalize stride to pixels per line */
-	stride = DIV_ROUND_UP(plane_y->bytesperline, info->bpp[0]);
-
-	for (i = 1; i < info->comp_planes; i++) {
-		struct v4l2_plane_pix_format *plane = &pixm->plane_fmt[i];
-
-		/* bytesperline for other components derive from Y component */
-		plane->bytesperline = DIV_ROUND_UP(stride, info->hdiv) *
-				      info->bpp[i];
-		plane->sizeimage = plane->bytesperline *
-				   DIV_ROUND_UP(pixm->height, info->vdiv);
-	}
-
-	/*
-	 * If pixfmt is packed, then plane_fmt[0] should contain the total size
-	 * considering all components. plane_fmt[i] for i > 0 should be ignored
-	 * by userspace as mem_planes == 1, but we are keeping information there
-	 * for convenience.
-	 */
-	if (info->mem_planes == 1)
-		for (i = 1; i < info->comp_planes; i++)
-			plane_y->sizeimage += pixm->plane_fmt[i].sizeimage;
-
-	return info;
-}
-
-static const struct rkisp1_capture_fmt_cfg *
-rkisp1_find_fmt_cfg(const struct rkisp1_capture *cap, const u32 pixelfmt)
-{
-	unsigned int i;
-
-	for (i = 0; i < cap->config->fmt_size; i++) {
-		if (cap->config->fmts[i].fourcc == pixelfmt)
-			return &cap->config->fmts[i];
-	}
-	return NULL;
-}
-
-static void rkisp1_try_fmt(const struct rkisp1_capture *cap,
-			   struct v4l2_pix_format_mplane *pixm,
-			   const struct rkisp1_capture_fmt_cfg **fmt_cfg,
-			   const struct v4l2_format_info **fmt_info)
-{
-	const struct rkisp1_capture_config *config = cap->config;
-	const struct rkisp1_capture_fmt_cfg *fmt;
-	const struct v4l2_format_info *info;
-	const unsigned int max_widths[] = { RKISP1_RSZ_MP_SRC_MAX_WIDTH,
-					    RKISP1_RSZ_SP_SRC_MAX_WIDTH };
-	const unsigned int max_heights[] = { RKISP1_RSZ_MP_SRC_MAX_HEIGHT,
-					     RKISP1_RSZ_SP_SRC_MAX_HEIGHT};
-
-	fmt = rkisp1_find_fmt_cfg(cap, pixm->pixelformat);
-	if (!fmt) {
-		fmt = config->fmts;
-		pixm->pixelformat = fmt->fourcc;
-	}
-
-	pixm->width = clamp_t(u32, pixm->width,
-			      RKISP1_RSZ_SRC_MIN_WIDTH, max_widths[cap->id]);
-	pixm->height = clamp_t(u32, pixm->height,
-			       RKISP1_RSZ_SRC_MIN_HEIGHT, max_heights[cap->id]);
-
-	pixm->field = V4L2_FIELD_NONE;
-	pixm->colorspace = V4L2_COLORSPACE_DEFAULT;
-	pixm->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
-	pixm->quantization = V4L2_QUANTIZATION_DEFAULT;
-
-	info = rkisp1_fill_pixfmt(pixm, cap->id);
-
-	if (fmt_cfg)
-		*fmt_cfg = fmt;
-	if (fmt_info)
-		*fmt_info = info;
-}
-
-static void rkisp1_set_fmt(struct rkisp1_capture *cap,
-			   struct v4l2_pix_format_mplane *pixm)
-{
-	rkisp1_try_fmt(cap, pixm, &cap->pix.cfg, &cap->pix.info);
-	cap->pix.fmt = *pixm;
-
-	/* SP supports custom stride in number of pixels of the Y plane */
-	if (cap->id == RKISP1_SELFPATH)
-		cap->sp_y_stride = pixm->plane_fmt[0].bytesperline /
-				   cap->pix.info->bpp[0];
-}
-
-static int rkisp1_try_fmt_vid_cap_mplane(struct file *file, void *fh,
-					 struct v4l2_format *f)
-{
-	struct rkisp1_capture *cap = video_drvdata(file);
-
-	rkisp1_try_fmt(cap, &f->fmt.pix_mp, NULL, NULL);
-
-	return 0;
-}
-
-static int rkisp1_enum_fmt_vid_cap_mplane(struct file *file, void *priv,
-					  struct v4l2_fmtdesc *f)
-{
-	struct rkisp1_capture *cap = video_drvdata(file);
-	const struct rkisp1_capture_fmt_cfg *fmt = NULL;
-	unsigned int i, n = 0;
-
-	if (!f->mbus_code) {
-		if (f->index >= cap->config->fmt_size)
-			return -EINVAL;
-
-		fmt = &cap->config->fmts[f->index];
-		f->pixelformat = fmt->fourcc;
-		return 0;
-	}
-
-	for (i = 0; i < cap->config->fmt_size; i++) {
-		if (cap->config->fmts[i].mbus != f->mbus_code)
-			continue;
-
-		if (n++ == f->index) {
-			f->pixelformat = cap->config->fmts[i].fourcc;
-			return 0;
-		}
-	}
-	return -EINVAL;
-}
-
-static int rkisp1_s_fmt_vid_cap_mplane(struct file *file,
-				       void *priv, struct v4l2_format *f)
-{
-	struct rkisp1_capture *cap = video_drvdata(file);
-	struct rkisp1_vdev_node *node =
-				rkisp1_vdev_to_node(&cap->vnode.vdev);
-
-	if (vb2_is_busy(&node->buf_queue))
-		return -EBUSY;
-
-	rkisp1_set_fmt(cap, &f->fmt.pix_mp);
-
-	return 0;
-}
-
-static int rkisp1_g_fmt_vid_cap_mplane(struct file *file, void *fh,
-				       struct v4l2_format *f)
-{
-	struct rkisp1_capture *cap = video_drvdata(file);
-
-	f->fmt.pix_mp = cap->pix.fmt;
-
-	return 0;
-}
-
-static int
-rkisp1_querycap(struct file *file, void *priv, struct v4l2_capability *cap)
-{
-	struct rkisp1_capture *cap_dev = video_drvdata(file);
-	struct rkisp1_device *rkisp1 = cap_dev->rkisp1;
-
-	strscpy(cap->driver, rkisp1->dev->driver->name, sizeof(cap->driver));
-	strscpy(cap->card, rkisp1->dev->driver->name, sizeof(cap->card));
-	strscpy(cap->bus_info, RKISP1_BUS_INFO, sizeof(cap->bus_info));
-
-	return 0;
-}
-
-static const struct v4l2_ioctl_ops rkisp1_v4l2_ioctl_ops = {
-	.vidioc_reqbufs = vb2_ioctl_reqbufs,
-	.vidioc_querybuf = vb2_ioctl_querybuf,
-	.vidioc_create_bufs = vb2_ioctl_create_bufs,
-	.vidioc_qbuf = vb2_ioctl_qbuf,
-	.vidioc_expbuf = vb2_ioctl_expbuf,
-	.vidioc_dqbuf = vb2_ioctl_dqbuf,
-	.vidioc_prepare_buf = vb2_ioctl_prepare_buf,
-	.vidioc_streamon = vb2_ioctl_streamon,
-	.vidioc_streamoff = vb2_ioctl_streamoff,
-	.vidioc_try_fmt_vid_cap_mplane = rkisp1_try_fmt_vid_cap_mplane,
-	.vidioc_s_fmt_vid_cap_mplane = rkisp1_s_fmt_vid_cap_mplane,
-	.vidioc_g_fmt_vid_cap_mplane = rkisp1_g_fmt_vid_cap_mplane,
-	.vidioc_enum_fmt_vid_cap = rkisp1_enum_fmt_vid_cap_mplane,
-	.vidioc_querycap = rkisp1_querycap,
-	.vidioc_subscribe_event = v4l2_ctrl_subscribe_event,
-	.vidioc_unsubscribe_event = v4l2_event_unsubscribe,
-};
-
-static int rkisp1_capture_link_validate(struct media_link *link)
-{
-	struct video_device *vdev =
-		media_entity_to_video_device(link->sink->entity);
-	struct v4l2_subdev *sd =
-		media_entity_to_v4l2_subdev(link->source->entity);
-	struct rkisp1_capture *cap = video_get_drvdata(vdev);
-	const struct rkisp1_capture_fmt_cfg *fmt =
-		rkisp1_find_fmt_cfg(cap, cap->pix.fmt.pixelformat);
-	struct v4l2_subdev_format sd_fmt;
-	int ret;
-
-	sd_fmt.which = V4L2_SUBDEV_FORMAT_ACTIVE;
-	sd_fmt.pad = link->source->index;
-	ret = v4l2_subdev_call(sd, pad, get_fmt, NULL, &sd_fmt);
-	if (ret)
-		return ret;
-
-	if (sd_fmt.format.height != cap->pix.fmt.height ||
-	    sd_fmt.format.width != cap->pix.fmt.width ||
-	    sd_fmt.format.code != fmt->mbus)
-		return -EPIPE;
-
-	return 0;
-}
-
-/* ----------------------------------------------------------------------------
- * core functions
- */
-
-static const struct media_entity_operations rkisp1_media_ops = {
-	.link_validate = rkisp1_capture_link_validate,
-};
-
-static const struct v4l2_file_operations rkisp1_fops = {
-	.open = v4l2_fh_open,
-	.release = vb2_fop_release,
-	.unlocked_ioctl = video_ioctl2,
-	.poll = vb2_fop_poll,
-	.mmap = vb2_fop_mmap,
-};
-
-static void rkisp1_unregister_capture(struct rkisp1_capture *cap)
-{
-	media_entity_cleanup(&cap->vnode.vdev.entity);
-	vb2_video_unregister_device(&cap->vnode.vdev);
-}
-
-void rkisp1_capture_devs_unregister(struct rkisp1_device *rkisp1)
-{
-	struct rkisp1_capture *mp = &rkisp1->capture_devs[RKISP1_MAINPATH];
-	struct rkisp1_capture *sp = &rkisp1->capture_devs[RKISP1_SELFPATH];
-
-	rkisp1_unregister_capture(mp);
-	rkisp1_unregister_capture(sp);
-}
-
-static int rkisp1_register_capture(struct rkisp1_capture *cap)
-{
-	const char * const dev_names[] = {RKISP1_MP_DEV_NAME,
-					  RKISP1_SP_DEV_NAME};
-	struct v4l2_device *v4l2_dev = &cap->rkisp1->v4l2_dev;
-	struct video_device *vdev = &cap->vnode.vdev;
-	struct rkisp1_vdev_node *node;
-	struct vb2_queue *q;
-	int ret;
-
-	strscpy(vdev->name, dev_names[cap->id], sizeof(vdev->name));
-	node = rkisp1_vdev_to_node(vdev);
-	mutex_init(&node->vlock);
-
-	vdev->ioctl_ops = &rkisp1_v4l2_ioctl_ops;
-	vdev->release = video_device_release_empty;
-	vdev->fops = &rkisp1_fops;
-	vdev->minor = -1;
-	vdev->v4l2_dev = v4l2_dev;
-	vdev->lock = &node->vlock;
-	vdev->device_caps = V4L2_CAP_VIDEO_CAPTURE_MPLANE |
-			    V4L2_CAP_STREAMING | V4L2_CAP_IO_MC;
-	vdev->entity.ops = &rkisp1_media_ops;
-	video_set_drvdata(vdev, cap);
-	vdev->vfl_dir = VFL_DIR_RX;
-	node->pad.flags = MEDIA_PAD_FL_SINK;
-
-	q = &node->buf_queue;
-	q->type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
-	q->io_modes = VB2_MMAP | VB2_DMABUF;
-	q->drv_priv = cap;
-	q->ops = &rkisp1_vb2_ops;
-	q->mem_ops = &vb2_dma_contig_memops;
-	q->buf_struct_size = sizeof(struct rkisp1_buffer);
-	q->min_buffers_needed = RKISP1_MIN_BUFFERS_NEEDED;
-	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
-	q->lock = &node->vlock;
-	q->dev = cap->rkisp1->dev;
-	ret = vb2_queue_init(q);
-	if (ret) {
-		dev_err(cap->rkisp1->dev,
-			"vb2 queue init failed (err=%d)\n", ret);
-		return ret;
-	}
-
-	vdev->queue = q;
-
-	ret = video_register_device(vdev, VFL_TYPE_VIDEO, -1);
-	if (ret) {
-		dev_err(cap->rkisp1->dev,
-			"failed to register %s, ret=%d\n", vdev->name, ret);
-		return ret;
-	}
-	v4l2_info(v4l2_dev, "registered %s as /dev/video%d\n", vdev->name,
-		  vdev->num);
-
-	ret = media_entity_pads_init(&vdev->entity, 1, &node->pad);
-	if (ret) {
-		video_unregister_device(vdev);
-		return ret;
-	}
-
-	return 0;
-}
-
-static void
-rkisp1_capture_init(struct rkisp1_device *rkisp1, enum rkisp1_stream_id id)
-{
-	struct rkisp1_capture *cap = &rkisp1->capture_devs[id];
-	struct v4l2_pix_format_mplane pixm;
-
-	memset(cap, 0, sizeof(*cap));
-	cap->id = id;
-	cap->rkisp1 = rkisp1;
-
-	INIT_LIST_HEAD(&cap->buf.queue);
-	init_waitqueue_head(&cap->done);
-	spin_lock_init(&cap->buf.lock);
-	if (cap->id == RKISP1_SELFPATH) {
-		cap->ops = &rkisp1_capture_ops_sp;
-		cap->config = &rkisp1_capture_config_sp;
-	} else {
-		cap->ops = &rkisp1_capture_ops_mp;
-		cap->config = &rkisp1_capture_config_mp;
-	}
-
-	cap->is_streaming = false;
-
-	memset(&pixm, 0, sizeof(pixm));
-	pixm.pixelformat = V4L2_PIX_FMT_YUYV;
-	pixm.width = RKISP1_DEFAULT_WIDTH;
-	pixm.height = RKISP1_DEFAULT_HEIGHT;
-	rkisp1_set_fmt(cap, &pixm);
-}
-
-int rkisp1_capture_devs_register(struct rkisp1_device *rkisp1)
-{
-	struct rkisp1_capture *cap;
-	unsigned int i, j;
-	int ret;
-
-	for (i = 0; i < ARRAY_SIZE(rkisp1->capture_devs); i++) {
-		rkisp1_capture_init(rkisp1, i);
-		cap = &rkisp1->capture_devs[i];
-		cap->rkisp1 = rkisp1;
-		ret = rkisp1_register_capture(cap);
-		if (ret)
-			goto err_unreg_capture_devs;
-	}
-
-	return 0;
-
-err_unreg_capture_devs:
-	for (j = 0; j < i; j++) {
-		cap = &rkisp1->capture_devs[j];
-		rkisp1_unregister_capture(cap);
-	}
-
-	return ret;
-}
diff --git a/drivers/staging/media/rkisp1/rkisp1-common.c b/drivers/staging/media/rkisp1/rkisp1-common.c
deleted file mode 100644
index cf889666e166..000000000000
--- a/drivers/staging/media/rkisp1/rkisp1-common.c
+++ /dev/null
@@ -1,37 +0,0 @@
-// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
-/*
- * Rockchip ISP1 Driver - Common definitions
- *
- * Copyright (C) 2019 Collabora, Ltd.
- */
-
-#include <media/v4l2-rect.h>
-
-#include "rkisp1-common.h"
-
-static const struct v4l2_rect rkisp1_sd_min_crop = {
-	.width = RKISP1_ISP_MIN_WIDTH,
-	.height = RKISP1_ISP_MIN_HEIGHT,
-	.top = 0,
-	.left = 0,
-};
-
-void rkisp1_sd_adjust_crop_rect(struct v4l2_rect *crop,
-				const struct v4l2_rect *bounds)
-{
-	v4l2_rect_set_min_size(crop, &rkisp1_sd_min_crop);
-	v4l2_rect_map_inside(crop, bounds);
-}
-
-void rkisp1_sd_adjust_crop(struct v4l2_rect *crop,
-			   const struct v4l2_mbus_framefmt *bounds)
-{
-	struct v4l2_rect crop_bounds = {
-		.left = 0,
-		.top = 0,
-		.width = bounds->width,
-		.height = bounds->height,
-	};
-
-	rkisp1_sd_adjust_crop_rect(crop, &crop_bounds);
-}
diff --git a/drivers/staging/media/rkisp1/rkisp1-common.h b/drivers/staging/media/rkisp1/rkisp1-common.h
deleted file mode 100644
index 692333c66f9d..000000000000
--- a/drivers/staging/media/rkisp1/rkisp1-common.h
+++ /dev/null
@@ -1,485 +0,0 @@
-/* SPDX-License-Identifier: (GPL-2.0+ OR MIT) */
-/*
- * Rockchip ISP1 Driver - Common definitions
- *
- * Copyright (C) 2019 Collabora, Ltd.
- *
- * Based on Rockchip ISP1 driver by Rockchip Electronics Co., Ltd.
- * Copyright (C) 2017 Rockchip Electronics Co., Ltd.
- */
-
-#ifndef _RKISP1_COMMON_H
-#define _RKISP1_COMMON_H
-
-#include <linux/clk.h>
-#include <linux/mutex.h>
-#include <media/media-device.h>
-#include <media/media-entity.h>
-#include <media/v4l2-ctrls.h>
-#include <media/v4l2-device.h>
-#include <media/videobuf2-v4l2.h>
-
-#include "rkisp1-regs.h"
-#include "uapi/rkisp1-config.h"
-
-/*
- * flags on the 'direction' field in struct 'rkisp1_isp_mbus_info' that indicate
- * on which pad the media bus format is supported
- */
-#define RKISP1_ISP_SD_SRC BIT(0)
-#define RKISP1_ISP_SD_SINK BIT(1)
-
-/* min and max values for the widths and heights of the entities */
-#define RKISP1_ISP_MAX_WIDTH		4032
-#define RKISP1_ISP_MAX_HEIGHT		3024
-#define RKISP1_ISP_MIN_WIDTH		32
-#define RKISP1_ISP_MIN_HEIGHT		32
-
-#define RKISP1_RSZ_MP_SRC_MAX_WIDTH		4416
-#define RKISP1_RSZ_MP_SRC_MAX_HEIGHT		3312
-#define RKISP1_RSZ_SP_SRC_MAX_WIDTH		1920
-#define RKISP1_RSZ_SP_SRC_MAX_HEIGHT		1920
-#define RKISP1_RSZ_SRC_MIN_WIDTH		32
-#define RKISP1_RSZ_SRC_MIN_HEIGHT		16
-
-/* the default width and height of all the entities */
-#define RKISP1_DEFAULT_WIDTH		800
-#define RKISP1_DEFAULT_HEIGHT		600
-
-#define RKISP1_DRIVER_NAME	"rkisp1"
-#define RKISP1_BUS_INFO		"platform:" RKISP1_DRIVER_NAME
-
-/* maximum number of clocks */
-#define RKISP1_MAX_BUS_CLK	8
-
-/* a bitmask of the ready stats */
-#define RKISP1_STATS_MEAS_MASK		(RKISP1_CIF_ISP_AWB_DONE |	\
-					 RKISP1_CIF_ISP_AFM_FIN |	\
-					 RKISP1_CIF_ISP_EXP_END |	\
-					 RKISP1_CIF_ISP_HIST_MEASURE_RDY)
-
-/* enum for the resizer pads */
-enum rkisp1_rsz_pad {
-	RKISP1_RSZ_PAD_SINK,
-	RKISP1_RSZ_PAD_SRC,
-	RKISP1_RSZ_PAD_MAX
-};
-
-/* enum for the capture id */
-enum rkisp1_stream_id {
-	RKISP1_MAINPATH,
-	RKISP1_SELFPATH,
-};
-
-/* bayer patterns */
-enum rkisp1_fmt_raw_pat_type {
-	RKISP1_RAW_RGGB = 0,
-	RKISP1_RAW_GRBG,
-	RKISP1_RAW_GBRG,
-	RKISP1_RAW_BGGR,
-};
-
-/* enum for the isp pads */
-enum rkisp1_isp_pad {
-	RKISP1_ISP_PAD_SINK_VIDEO,
-	RKISP1_ISP_PAD_SINK_PARAMS,
-	RKISP1_ISP_PAD_SOURCE_VIDEO,
-	RKISP1_ISP_PAD_SOURCE_STATS,
-	RKISP1_ISP_PAD_MAX
-};
-
-/*
- * struct rkisp1_sensor_async - A container for the v4l2_async_subdev to add to the notifier
- *				of the v4l2-async API
- *
- * @asd:		async_subdev variable for the sensor
- * @lanes:		number of lanes
- * @mbus_type:		type of bus (currently only CSI2 is supported)
- * @mbus_flags:		media bus (V4L2_MBUS_*) flags
- * @sd:			a pointer to v4l2_subdev struct of the sensor
- * @pixel_rate_ctrl:	pixel rate of the sensor, used to initialize the phy
- * @dphy:		a pointer to the phy
- */
-struct rkisp1_sensor_async {
-	struct v4l2_async_subdev asd;
-	unsigned int lanes;
-	enum v4l2_mbus_type mbus_type;
-	unsigned int mbus_flags;
-	struct v4l2_subdev *sd;
-	struct v4l2_ctrl *pixel_rate_ctrl;
-	struct phy *dphy;
-};
-
-/*
- * struct rkisp1_isp - ISP subdev entity
- *
- * @sd:				v4l2_subdev variable
- * @rkisp1:			pointer to rkisp1_device
- * @pads:			media pads
- * @pad_cfg:			pads configurations
- * @sink_fmt:			input format
- * @src_fmt:			output format
- * @ops_lock:			ops serialization
- * @is_dphy_errctrl_disabled:	if dphy errctrl is disabled (avoid endless interrupt)
- * @frame_sequence:		used to synchronize frame_id between video devices.
- */
-struct rkisp1_isp {
-	struct v4l2_subdev sd;
-	struct media_pad pads[RKISP1_ISP_PAD_MAX];
-	struct v4l2_subdev_pad_config pad_cfg[RKISP1_ISP_PAD_MAX];
-	const struct rkisp1_isp_mbus_info *sink_fmt;
-	const struct rkisp1_isp_mbus_info *src_fmt;
-	struct mutex ops_lock; /* serialize the subdevice ops */
-	bool is_dphy_errctrl_disabled;
-	__u32 frame_sequence;
-};
-
-/*
- * struct rkisp1_vdev_node - Container for the video nodes: params, stats, mainpath, selfpath
- *
- * @buf_queue:	queue of buffers
- * @vlock:	lock of the video node
- * @vdev:	video node
- * @pad:	media pad
- */
-struct rkisp1_vdev_node {
-	struct vb2_queue buf_queue;
-	struct mutex vlock; /* ioctl serialization mutex */
-	struct video_device vdev;
-	struct media_pad pad;
-};
-
-/*
- * struct rkisp1_buffer - A container for the vb2 buffers used by the video devices:
- *			  params, stats, mainpath, selfpath
- *
- * @vb:		vb2 buffer
- * @queue:	entry of the buffer in the queue
- * @buff_addr:	dma addresses of each plane, used only by the capture devices: selfpath, mainpath
- * @vaddr:	virtual address for buffers used by params and stats devices
- */
-struct rkisp1_buffer {
-	struct vb2_v4l2_buffer vb;
-	struct list_head queue;
-	union {
-		u32 buff_addr[VIDEO_MAX_PLANES];
-		void *vaddr;
-	};
-};
-
-/*
- * struct rkisp1_dummy_buffer - A buffer to write the next frame to in case
- *				there are no vb2 buffers available.
- *
- * @vaddr:	return value of call to dma_alloc_attrs.
- * @dma_addr:	dma address of the buffer.
- * @size:	size of the buffer.
- */
-struct rkisp1_dummy_buffer {
-	void *vaddr;
-	dma_addr_t dma_addr;
-	u32 size;
-};
-
-struct rkisp1_device;
-
-/*
- * struct rkisp1_capture - ISP capture video device
- *
- * @vnode:	  video node
- * @rkisp1:	  pointer to rkisp1_device
- * @id:		  id of the capture, one of RKISP1_SELFPATH, RKISP1_MAINPATH
- * @ops:	  list of callbacks to configure the capture device.
- * @config:	  a pointer to the list of registers to configure the capture format.
- * @is_streaming: device is streaming
- * @is_stopping:  stop_streaming callback was called and the device is in the process of
- *		  stopping the streaming.
- * @done:	  when stop_streaming callback is called, the device waits for the next irq
- *		  handler to stop the streaming by waiting on the 'done' wait queue.
- *		  If the irq handler is not called, the stream is stopped by the callback
- *		  after timeout.
- * @sp_y_stride:  the selfpath allows to configure a y stride that is longer than the image width.
- * @buf.lock:	  lock to protect buf.queue
- * @buf.queue:	  queued buffer list
- * @buf.dummy:	  dummy space to store dropped data
- *
- * rkisp1 uses shadow registers, so it needs two buffers at a time
- * @buf.curr:	  the buffer used for current frame
- * @buf.next:	  the buffer used for next frame
- * @pix.cfg:	  pixel configuration
- * @pix.info:	  a pointer to the v4l2_format_info of the pixel format
- * @pix.fmt:	  buffer format
- */
-struct rkisp1_capture {
-	struct rkisp1_vdev_node vnode;
-	struct rkisp1_device *rkisp1;
-	enum rkisp1_stream_id id;
-	struct rkisp1_capture_ops *ops;
-	const struct rkisp1_capture_config *config;
-	bool is_streaming;
-	bool is_stopping;
-	wait_queue_head_t done;
-	unsigned int sp_y_stride;
-	struct {
-		/* protects queue, curr and next */
-		spinlock_t lock;
-		struct list_head queue;
-		struct rkisp1_dummy_buffer dummy;
-		struct rkisp1_buffer *curr;
-		struct rkisp1_buffer *next;
-	} buf;
-	struct {
-		const struct rkisp1_capture_fmt_cfg *cfg;
-		const struct v4l2_format_info *info;
-		struct v4l2_pix_format_mplane fmt;
-	} pix;
-};
-
-/*
- * struct rkisp1_stats - ISP Statistics device
- *
- * @vnode:	  video node
- * @rkisp1:	  pointer to the rkisp1 device
- * @lock:	  locks the buffer list 'stat'
- * @stat:	  queue of rkisp1_buffer
- * @vdev_fmt:	  v4l2_format of the metadata format
- */
-struct rkisp1_stats {
-	struct rkisp1_vdev_node vnode;
-	struct rkisp1_device *rkisp1;
-
-	spinlock_t lock; /* locks the buffers list 'stats' */
-	struct list_head stat;
-	struct v4l2_format vdev_fmt;
-};
-
-/*
- * struct rkisp1_params - ISP input parameters device
- *
- * @vnode:		video node
- * @rkisp1:		pointer to the rkisp1 device
- * @config_lock:	locks the buffer list 'params'
- * @params:		queue of rkisp1_buffer
- * @vdev_fmt:		v4l2_format of the metadata format
- * @quantization:	the quantization configured on the isp's src pad
- * @raw_type:		the bayer pattern on the isp video sink pad
- */
-struct rkisp1_params {
-	struct rkisp1_vdev_node vnode;
-	struct rkisp1_device *rkisp1;
-
-	spinlock_t config_lock; /* locks the buffers list 'params' */
-	struct list_head params;
-	struct v4l2_format vdev_fmt;
-
-	enum v4l2_quantization quantization;
-	enum rkisp1_fmt_raw_pat_type raw_type;
-};
-
-/*
- * struct rkisp1_resizer - Resizer subdev
- *
- * @sd:	       v4l2_subdev variable
- * @id:	       id of the resizer, one of RKISP1_SELFPATH, RKISP1_MAINPATH
- * @rkisp1:    pointer to the rkisp1 device
- * @pads:      media pads
- * @pad_cfg:   configurations for the pads
- * @config:    the set of registers to configure the resizer
- * @pixel_enc: pixel encoding of the resizer
- * @ops_lock:  a lock for the subdev ops
- */
-struct rkisp1_resizer {
-	struct v4l2_subdev sd;
-	enum rkisp1_stream_id id;
-	struct rkisp1_device *rkisp1;
-	struct media_pad pads[RKISP1_RSZ_PAD_MAX];
-	struct v4l2_subdev_pad_config pad_cfg[RKISP1_RSZ_PAD_MAX];
-	const struct rkisp1_rsz_config *config;
-	enum v4l2_pixel_encoding pixel_enc;
-	struct mutex ops_lock; /* serialize the subdevice ops */
-};
-
-/*
- * struct rkisp1_debug - Values to be exposed on debugfs.
- *			 The parameters are counters of the number of times the
- *			 event occurred since the driver was loaded.
- *
- * @data_loss:			  loss of data occurred within a line, processing failure
- * @outform_size_error:		  size error is generated in outmux submodule
- * @img_stabilization_size_error: size error is generated in image stabilization submodule
- * @inform_size_err:		  size error is generated in inform submodule
- * @mipi_error:			  mipi error occurred
- * @stats_error:		  writing to the 'Interrupt clear register' did not clear
- *				  it in the register 'Masked interrupt status'
- * @stop_timeout:		  upon stream stop, the capture waits 1 second for the isr to stop
- *				  the stream. This param is incremented in case of timeout.
- * @frame_drop:			  a frame was ready but the buffer queue was empty so the frame
- *				  was not sent to userspace
- */
-struct rkisp1_debug {
-	struct dentry *debugfs_dir;
-	unsigned long data_loss;
-	unsigned long outform_size_error;
-	unsigned long img_stabilization_size_error;
-	unsigned long inform_size_error;
-	unsigned long irq_delay;
-	unsigned long mipi_error;
-	unsigned long stats_error;
-	unsigned long stop_timeout[2];
-	unsigned long frame_drop[2];
-};
-
-/*
- * struct rkisp1_device - ISP platform device
- *
- * @base_addr:	   base register address
- * @irq:	   the irq number
- * @dev:	   a pointer to the struct device
- * @clk_size:	   number of clocks
- * @clks:	   array of clocks
- * @v4l2_dev:	   v4l2_device variable
- * @media_dev:	   media_device variable
- * @notifier:	   a notifier to register on the v4l2-async API to be notified on the sensor
- * @active_sensor: sensor in-use, set when streaming on
- * @isp:	   ISP sub-device
- * @resizer_devs:  resizer sub-devices
- * @capture_devs:  capture devices
- * @stats:	   ISP statistics metadata capture device
- * @params:	   ISP parameters metadata output device
- * @pipe:	   media pipeline
- * @stream_lock:   serializes {start/stop}_streaming callbacks between the capture devices.
- * @debug:	   debug params to be exposed on debugfs
- */
-struct rkisp1_device {
-	void __iomem *base_addr;
-	int irq;
-	struct device *dev;
-	unsigned int clk_size;
-	struct clk_bulk_data clks[RKISP1_MAX_BUS_CLK];
-	struct v4l2_device v4l2_dev;
-	struct media_device media_dev;
-	struct v4l2_async_notifier notifier;
-	struct rkisp1_sensor_async *active_sensor;
-	struct rkisp1_isp isp;
-	struct rkisp1_resizer resizer_devs[2];
-	struct rkisp1_capture capture_devs[2];
-	struct rkisp1_stats stats;
-	struct rkisp1_params params;
-	struct media_pipeline pipe;
-	struct mutex stream_lock; /* serialize {start/stop}_streaming cb between capture devices */
-	struct rkisp1_debug debug;
-};
-
-/*
- * struct rkisp1_isp_mbus_info - ISP media bus info, Translates media bus code to hardware
- *				 format values
- *
- * @mbus_code: media bus code
- * @pixel_enc: pixel encoding
- * @mipi_dt:   mipi data type
- * @yuv_seq:   the order of the Y, Cb, Cr values
- * @bus_width: bus width
- * @bayer_pat: bayer pattern
- * @direction: a bitmask of the flags indicating on which pad the format is supported on
- */
-struct rkisp1_isp_mbus_info {
-	u32 mbus_code;
-	enum v4l2_pixel_encoding pixel_enc;
-	u32 mipi_dt;
-	u32 yuv_seq;
-	u8 bus_width;
-	enum rkisp1_fmt_raw_pat_type bayer_pat;
-	unsigned int direction;
-};
-
-static inline void
-rkisp1_write(struct rkisp1_device *rkisp1, u32 val, unsigned int addr)
-{
-	writel(val, rkisp1->base_addr + addr);
-}
-
-static inline u32 rkisp1_read(struct rkisp1_device *rkisp1, unsigned int addr)
-{
-	return readl(rkisp1->base_addr + addr);
-}
-
-/*
- * rkisp1_cap_enum_mbus_codes - A helper function that return the i'th supported mbus code
- *				of the capture entity. This is used to enumerate the supported
- *				mbus codes on the source pad of the resizer.
- *
- * @cap:  the capture entity
- * @code: the mbus code, the function reads the code->index and fills the code->code
- */
-int rkisp1_cap_enum_mbus_codes(struct rkisp1_capture *cap,
-			       struct v4l2_subdev_mbus_code_enum *code);
-
-/*
- * rkisp1_sd_adjust_crop_rect - adjust a rectangle to fit into another rectangle.
- *
- * @crop:   rectangle to adjust.
- * @bounds: rectangle used as bounds.
- */
-void rkisp1_sd_adjust_crop_rect(struct v4l2_rect *crop,
-				const struct v4l2_rect *bounds);
-
-/*
- * rkisp1_sd_adjust_crop - adjust a rectangle to fit into media bus format
- *
- * @crop:   rectangle to adjust.
- * @bounds: media bus format used as bounds.
- */
-void rkisp1_sd_adjust_crop(struct v4l2_rect *crop,
-			   const struct v4l2_mbus_framefmt *bounds);
-
-/*
- * rkisp1_isp_mbus_info - get the isp info of the media bus code
- *
- * @mbus_code: the media bus code
- */
-const struct rkisp1_isp_mbus_info *rkisp1_isp_mbus_info_get(u32 mbus_code);
-
-/* rkisp1_params_configure - configure the params when stream starts.
- *			     This function is called by the isp entity upon stream starts.
- *			     The function applies the initial configuration of the parameters.
- *
- * @params:	  pointer to rkisp1_params.
- * @bayer_pat:	  the bayer pattern on the isp video sink pad
- * @quantization: the quantization configured on the isp's src pad
- */
-void rkisp1_params_configure(struct rkisp1_params *params,
-			     enum rkisp1_fmt_raw_pat_type bayer_pat,
-			     enum v4l2_quantization quantization);
-
-/* rkisp1_params_disable - disable all parameters.
- *			   This function is called by the isp entity upon stream start
- *			   when capturing bayer format.
- *
- * @params: pointer to rkisp1_params.
- */
-void rkisp1_params_disable(struct rkisp1_params *params);
-
-/* irq handlers */
-void rkisp1_isp_isr(struct rkisp1_device *rkisp1);
-void rkisp1_mipi_isr(struct rkisp1_device *rkisp1);
-void rkisp1_capture_isr(struct rkisp1_device *rkisp1);
-void rkisp1_stats_isr(struct rkisp1_stats *stats, u32 isp_ris);
-void rkisp1_params_isr(struct rkisp1_device *rkisp1);
-
-/* register/unregisters functions of the entities */
-int rkisp1_capture_devs_register(struct rkisp1_device *rkisp1);
-void rkisp1_capture_devs_unregister(struct rkisp1_device *rkisp1);
-
-int rkisp1_isp_register(struct rkisp1_device *rkisp1);
-void rkisp1_isp_unregister(struct rkisp1_device *rkisp1);
-
-int rkisp1_resizer_devs_register(struct rkisp1_device *rkisp1);
-void rkisp1_resizer_devs_unregister(struct rkisp1_device *rkisp1);
-
-int rkisp1_stats_register(struct rkisp1_device *rkisp1);
-void rkisp1_stats_unregister(struct rkisp1_device *rkisp1);
-
-int rkisp1_params_register(struct rkisp1_device *rkisp1);
-void rkisp1_params_unregister(struct rkisp1_device *rkisp1);
-
-#endif /* _RKISP1_COMMON_H */
diff --git a/drivers/staging/media/rkisp1/rkisp1-dev.c b/drivers/staging/media/rkisp1/rkisp1-dev.c
deleted file mode 100644
index 90d654346556..000000000000
--- a/drivers/staging/media/rkisp1/rkisp1-dev.c
+++ /dev/null
@@ -1,580 +0,0 @@
-// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
-/*
- * Rockchip ISP1 Driver - Base driver
- *
- * Copyright (C) 2019 Collabora, Ltd.
- *
- * Based on Rockchip ISP1 driver by Rockchip Electronics Co., Ltd.
- * Copyright (C) 2017 Rockchip Electronics Co., Ltd.
- */
-
-#include <linux/clk.h>
-#include <linux/debugfs.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_graph.h>
-#include <linux/of_platform.h>
-#include <linux/pinctrl/consumer.h>
-#include <linux/phy/phy.h>
-#include <linux/phy/phy-mipi-dphy.h>
-#include <media/v4l2-fwnode.h>
-
-#include "rkisp1-common.h"
-
-/*
- * ISP Details
- * -----------
- *
- * ISP Comprises with:
- *	MIPI serial camera interface
- *	Image Signal Processing
- *	Many Image Enhancement Blocks
- *	Crop
- *	Resizer
- *	RBG display ready image
- *	Image Rotation
- *
- * ISP Block Diagram
- * -----------------
- *                                                             rkisp1-resizer.c          rkisp1-capture.c
- *                                                          |====================|  |=======================|
- *                                rkisp1-isp.c                              Main Picture Path
- *                        |==========================|      |===============================================|
- *                        +-----------+  +--+--+--+--+      +--------+  +--------+              +-----------+
- *                        |           |  |  |  |  |  |      |        |  |        |              |           |
- * +--------+    |\       |           |  |  |  |  |  |   -->|  Crop  |->|  RSZ   |------------->|           |
- * |  MIPI  |--->|  \     |           |  |  |  |  |  |   |  |        |  |        |              |           |
- * +--------+    |   |    |           |  |IE|IE|IE|IE|   |  +--------+  +--------+              |  Memory   |
- *               |MUX|--->|    ISP    |->|0 |1 |2 |3 |---+                                      | Interface |
- * +--------+    |   |    |           |  |  |  |  |  |   |  +--------+  +--------+  +--------+  |           |
- * |Parallel|--->|  /     |           |  |  |  |  |  |   |  |        |  |        |  |        |  |           |
- * +--------+    |/       |           |  |  |  |  |  |   -->|  Crop  |->|  RSZ   |->|  RGB   |->|           |
- *                        |           |  |  |  |  |  |      |        |  |        |  | Rotate |  |           |
- *                        +-----------+  +--+--+--+--+      +--------+  +--------+  +--------+  +-----------+
- *                                               ^
- * +--------+                                    |          |===============================================|
- * |  DMA   |------------------------------------+                          Self Picture Path
- * +--------+
- *
- *         rkisp1-stats.c        rkisp1-params.c
- *       |===============|      |===============|
- *       +---------------+      +---------------+
- *       |               |      |               |
- *       |      ISP      |      |      ISP      |
- *       |               |      |               |
- *       +---------------+      +---------------+
- *
- *
- * Media Topology
- * --------------
- *      +----------+     +----------+
- *      | Sensor 2 |     | Sensor X |
- *      ------------ ... ------------
- *      |    0     |     |    0     |
- *      +----------+     +----------+      +-----------+
- *                  \      |               |  params   |
- *                   \     |               | (output)  |
- *    +----------+    \    |               +-----------+
- *    | Sensor 1 |     v   v                     |
- *    ------------      +------+------+          |
- *    |    0     |----->|  0   |  1   |<---------+
- *    +----------+      |------+------|
- *                      |     ISP     |
- *                      |------+------|
- *        +-------------|  2   |  3   |----------+
- *        |             +------+------+          |
- *        |                |                     |
- *        v                v                     v
- *  +- ---------+    +-----------+         +-----------+
- *  |     0     |    |     0     |         |   stats   |
- *  -------------    -------------         | (capture) |
- *  |  Resizer  |    |  Resizer  |         +-----------+
- *  ------------|    ------------|
- *  |     1     |    |     1     |
- *  +-----------+    +-----------+
- *        |                |
- *        v                v
- *  +-----------+    +-----------+
- *  | selfpath  |    | mainpath  |
- *  | (capture) |    | (capture) |
- *  +-----------+    +-----------+
- */
-
-struct rkisp1_match_data {
-	const char * const *clks;
-	unsigned int size;
-};
-
-/* ----------------------------------------------------------------------------
- * Sensor DT bindings
- */
-
-static int rkisp1_create_links(struct rkisp1_device *rkisp1)
-{
-	struct media_entity *source, *sink;
-	unsigned int flags, source_pad;
-	struct v4l2_subdev *sd;
-	unsigned int i;
-	int ret;
-
-	/* sensor links */
-	flags = MEDIA_LNK_FL_ENABLED;
-	list_for_each_entry(sd, &rkisp1->v4l2_dev.subdevs, list) {
-		if (sd == &rkisp1->isp.sd ||
-		    sd == &rkisp1->resizer_devs[RKISP1_MAINPATH].sd ||
-		    sd == &rkisp1->resizer_devs[RKISP1_SELFPATH].sd)
-			continue;
-
-		ret = media_entity_get_fwnode_pad(&sd->entity, sd->fwnode,
-						  MEDIA_PAD_FL_SOURCE);
-		if (ret < 0) {
-			dev_err(rkisp1->dev, "failed to find src pad for %s\n",
-				sd->name);
-			return ret;
-		}
-		source_pad = ret;
-
-		ret = media_create_pad_link(&sd->entity, source_pad,
-					    &rkisp1->isp.sd.entity,
-					    RKISP1_ISP_PAD_SINK_VIDEO,
-					    flags);
-		if (ret)
-			return ret;
-
-		flags = 0;
-	}
-
-	flags = MEDIA_LNK_FL_ENABLED | MEDIA_LNK_FL_IMMUTABLE;
-
-	/* create ISP->RSZ->CAP links */
-	for (i = 0; i < 2; i++) {
-		source = &rkisp1->isp.sd.entity;
-		sink = &rkisp1->resizer_devs[i].sd.entity;
-		ret = media_create_pad_link(source, RKISP1_ISP_PAD_SOURCE_VIDEO,
-					    sink, RKISP1_RSZ_PAD_SINK,
-					    MEDIA_LNK_FL_ENABLED);
-		if (ret)
-			return ret;
-
-		source = sink;
-		sink = &rkisp1->capture_devs[i].vnode.vdev.entity;
-		ret = media_create_pad_link(source, RKISP1_RSZ_PAD_SRC,
-					    sink, 0, flags);
-		if (ret)
-			return ret;
-	}
-
-	/* params links */
-	source = &rkisp1->params.vnode.vdev.entity;
-	sink = &rkisp1->isp.sd.entity;
-	ret = media_create_pad_link(source, 0, sink,
-				    RKISP1_ISP_PAD_SINK_PARAMS, flags);
-	if (ret)
-		return ret;
-
-	/* 3A stats links */
-	source = &rkisp1->isp.sd.entity;
-	sink = &rkisp1->stats.vnode.vdev.entity;
-	return media_create_pad_link(source, RKISP1_ISP_PAD_SOURCE_STATS,
-				     sink, 0, flags);
-}
-
-static int rkisp1_subdev_notifier_bound(struct v4l2_async_notifier *notifier,
-					struct v4l2_subdev *sd,
-					struct v4l2_async_subdev *asd)
-{
-	struct rkisp1_device *rkisp1 =
-		container_of(notifier, struct rkisp1_device, notifier);
-	struct rkisp1_sensor_async *s_asd =
-		container_of(asd, struct rkisp1_sensor_async, asd);
-
-	s_asd->pixel_rate_ctrl = v4l2_ctrl_find(sd->ctrl_handler,
-						V4L2_CID_PIXEL_RATE);
-	s_asd->sd = sd;
-	s_asd->dphy = devm_phy_get(rkisp1->dev, "dphy");
-	if (IS_ERR(s_asd->dphy)) {
-		if (PTR_ERR(s_asd->dphy) != -EPROBE_DEFER)
-			dev_err(rkisp1->dev, "Couldn't get the MIPI D-PHY\n");
-		return PTR_ERR(s_asd->dphy);
-	}
-
-	phy_init(s_asd->dphy);
-
-	return 0;
-}
-
-static void rkisp1_subdev_notifier_unbind(struct v4l2_async_notifier *notifier,
-					  struct v4l2_subdev *sd,
-					  struct v4l2_async_subdev *asd)
-{
-	struct rkisp1_sensor_async *s_asd =
-		container_of(asd, struct rkisp1_sensor_async, asd);
-
-	phy_exit(s_asd->dphy);
-}
-
-static int rkisp1_subdev_notifier_complete(struct v4l2_async_notifier *notifier)
-{
-	struct rkisp1_device *rkisp1 =
-		container_of(notifier, struct rkisp1_device, notifier);
-	int ret;
-
-	ret = rkisp1_create_links(rkisp1);
-	if (ret)
-		return ret;
-
-	ret = v4l2_device_register_subdev_nodes(&rkisp1->v4l2_dev);
-	if (ret)
-		return ret;
-
-	dev_dbg(rkisp1->dev, "Async subdev notifier completed\n");
-
-	return 0;
-}
-
-static const struct v4l2_async_notifier_operations rkisp1_subdev_notifier_ops = {
-	.bound = rkisp1_subdev_notifier_bound,
-	.unbind = rkisp1_subdev_notifier_unbind,
-	.complete = rkisp1_subdev_notifier_complete,
-};
-
-static int rkisp1_subdev_notifier(struct rkisp1_device *rkisp1)
-{
-	struct v4l2_async_notifier *ntf = &rkisp1->notifier;
-	unsigned int next_id = 0;
-	int ret;
-
-	v4l2_async_notifier_init(ntf);
-
-	while (1) {
-		struct v4l2_fwnode_endpoint vep = {
-			.bus_type = V4L2_MBUS_CSI2_DPHY
-		};
-		struct rkisp1_sensor_async *rk_asd = NULL;
-		struct fwnode_handle *ep;
-
-		ep = fwnode_graph_get_endpoint_by_id(dev_fwnode(rkisp1->dev),
-			0, next_id, FWNODE_GRAPH_ENDPOINT_NEXT);
-		if (!ep)
-			break;
-
-		ret = v4l2_fwnode_endpoint_parse(ep, &vep);
-		if (ret)
-			goto err_parse;
-
-		rk_asd = kzalloc(sizeof(*rk_asd), GFP_KERNEL);
-		if (!rk_asd) {
-			ret = -ENOMEM;
-			goto err_parse;
-		}
-
-		rk_asd->mbus_type = vep.bus_type;
-		rk_asd->mbus_flags = vep.bus.mipi_csi2.flags;
-		rk_asd->lanes = vep.bus.mipi_csi2.num_data_lanes;
-
-		ret = v4l2_async_notifier_add_fwnode_remote_subdev(ntf, ep,
-								   &rk_asd->asd);
-		if (ret)
-			goto err_parse;
-
-		dev_dbg(rkisp1->dev, "registered ep id %d with %d lanes\n",
-			vep.base.id, rk_asd->lanes);
-
-		next_id = vep.base.id + 1;
-
-		fwnode_handle_put(ep);
-
-		continue;
-err_parse:
-		fwnode_handle_put(ep);
-		kfree(rk_asd);
-		v4l2_async_notifier_cleanup(ntf);
-		return ret;
-	}
-
-	if (next_id == 0)
-		dev_dbg(rkisp1->dev, "no remote subdevice found\n");
-	ntf->ops = &rkisp1_subdev_notifier_ops;
-	ret = v4l2_async_notifier_register(&rkisp1->v4l2_dev, ntf);
-	if (ret) {
-		v4l2_async_notifier_cleanup(ntf);
-		return ret;
-	}
-	return 0;
-}
-
-/* ----------------------------------------------------------------------------
- * Power
- */
-
-static int __maybe_unused rkisp1_runtime_suspend(struct device *dev)
-{
-	struct rkisp1_device *rkisp1 = dev_get_drvdata(dev);
-
-	clk_bulk_disable_unprepare(rkisp1->clk_size, rkisp1->clks);
-	return pinctrl_pm_select_sleep_state(dev);
-}
-
-static int __maybe_unused rkisp1_runtime_resume(struct device *dev)
-{
-	struct rkisp1_device *rkisp1 = dev_get_drvdata(dev);
-	int ret;
-
-	ret = pinctrl_pm_select_default_state(dev);
-	if (ret)
-		return ret;
-	ret = clk_bulk_prepare_enable(rkisp1->clk_size, rkisp1->clks);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-static const struct dev_pm_ops rkisp1_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
-				pm_runtime_force_resume)
-	SET_RUNTIME_PM_OPS(rkisp1_runtime_suspend, rkisp1_runtime_resume, NULL)
-};
-
-/* ----------------------------------------------------------------------------
- * Core
- */
-
-static int rkisp1_entities_register(struct rkisp1_device *rkisp1)
-{
-	int ret;
-
-	ret = rkisp1_isp_register(rkisp1);
-	if (ret)
-		return ret;
-
-	ret = rkisp1_resizer_devs_register(rkisp1);
-	if (ret)
-		goto err_unreg_isp_subdev;
-
-	ret = rkisp1_capture_devs_register(rkisp1);
-	if (ret)
-		goto err_unreg_resizer_devs;
-
-	ret = rkisp1_stats_register(rkisp1);
-	if (ret)
-		goto err_unreg_capture_devs;
-
-	ret = rkisp1_params_register(rkisp1);
-	if (ret)
-		goto err_unreg_stats;
-
-	ret = rkisp1_subdev_notifier(rkisp1);
-	if (ret) {
-		dev_err(rkisp1->dev,
-			"Failed to register subdev notifier(%d)\n", ret);
-		goto err_unreg_params;
-	}
-
-	return 0;
-err_unreg_params:
-	rkisp1_params_unregister(rkisp1);
-err_unreg_stats:
-	rkisp1_stats_unregister(rkisp1);
-err_unreg_capture_devs:
-	rkisp1_capture_devs_unregister(rkisp1);
-err_unreg_resizer_devs:
-	rkisp1_resizer_devs_unregister(rkisp1);
-err_unreg_isp_subdev:
-	rkisp1_isp_unregister(rkisp1);
-	return ret;
-}
-
-static irqreturn_t rkisp1_isr(int irq, void *ctx)
-{
-	struct device *dev = ctx;
-	struct rkisp1_device *rkisp1 = dev_get_drvdata(dev);
-
-	/*
-	 * Call rkisp1_capture_isr() first to handle the frame that
-	 * potentially completed using the current frame_sequence number before
-	 * it is potentially incremented by rkisp1_isp_isr() in the vertical
-	 * sync.
-	 */
-	rkisp1_capture_isr(rkisp1);
-	rkisp1_isp_isr(rkisp1);
-	rkisp1_mipi_isr(rkisp1);
-
-	return IRQ_HANDLED;
-}
-
-static const char * const rk3399_isp_clks[] = {
-	"isp",
-	"aclk",
-	"hclk",
-};
-
-static const struct rkisp1_match_data rk3399_isp_clk_data = {
-	.clks = rk3399_isp_clks,
-	.size = ARRAY_SIZE(rk3399_isp_clks),
-};
-
-static const struct of_device_id rkisp1_of_match[] = {
-	{
-		.compatible = "rockchip,rk3399-cif-isp",
-		.data = &rk3399_isp_clk_data,
-	},
-	{},
-};
-MODULE_DEVICE_TABLE(of, rkisp1_of_match);
-
-static void rkisp1_debug_init(struct rkisp1_device *rkisp1)
-{
-	struct rkisp1_debug *debug = &rkisp1->debug;
-
-	debug->debugfs_dir = debugfs_create_dir(RKISP1_DRIVER_NAME, NULL);
-	if (!debug->debugfs_dir) {
-		dev_dbg(rkisp1->dev, "failed to create debugfs directory\n");
-		return;
-	}
-	debugfs_create_ulong("data_loss", 0444, debug->debugfs_dir,
-			     &debug->data_loss);
-	debugfs_create_ulong("outform_size_err", 0444,  debug->debugfs_dir,
-			     &debug->outform_size_error);
-	debugfs_create_ulong("img_stabilization_size_error", 0444,
-			     debug->debugfs_dir,
-			     &debug->img_stabilization_size_error);
-	debugfs_create_ulong("inform_size_error", 0444,  debug->debugfs_dir,
-			     &debug->inform_size_error);
-	debugfs_create_ulong("irq_delay", 0444,  debug->debugfs_dir,
-			     &debug->irq_delay);
-	debugfs_create_ulong("mipi_error", 0444, debug->debugfs_dir,
-			     &debug->mipi_error);
-	debugfs_create_ulong("stats_error", 0444, debug->debugfs_dir,
-			     &debug->stats_error);
-	debugfs_create_ulong("mp_stop_timeout", 0444, debug->debugfs_dir,
-			     &debug->stop_timeout[RKISP1_MAINPATH]);
-	debugfs_create_ulong("sp_stop_timeout", 0444, debug->debugfs_dir,
-			     &debug->stop_timeout[RKISP1_SELFPATH]);
-	debugfs_create_ulong("mp_frame_drop", 0444, debug->debugfs_dir,
-			     &debug->frame_drop[RKISP1_MAINPATH]);
-	debugfs_create_ulong("sp_frame_drop", 0444, debug->debugfs_dir,
-			     &debug->frame_drop[RKISP1_SELFPATH]);
-}
-
-static int rkisp1_probe(struct platform_device *pdev)
-{
-	const struct rkisp1_match_data *clk_data;
-	struct device *dev = &pdev->dev;
-	struct rkisp1_device *rkisp1;
-	struct v4l2_device *v4l2_dev;
-	unsigned int i;
-	int ret, irq;
-
-	clk_data = of_device_get_match_data(&pdev->dev);
-	if (!clk_data)
-		return -ENODEV;
-
-	rkisp1 = devm_kzalloc(dev, sizeof(*rkisp1), GFP_KERNEL);
-	if (!rkisp1)
-		return -ENOMEM;
-
-	dev_set_drvdata(dev, rkisp1);
-	rkisp1->dev = dev;
-
-	mutex_init(&rkisp1->stream_lock);
-
-	rkisp1->base_addr = devm_platform_ioremap_resource(pdev, 0);
-	if (IS_ERR(rkisp1->base_addr))
-		return PTR_ERR(rkisp1->base_addr);
-
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0)
-		return irq;
-
-	ret = devm_request_irq(dev, irq, rkisp1_isr, IRQF_SHARED,
-			       dev_driver_string(dev), dev);
-	if (ret) {
-		dev_err(dev, "request irq failed: %d\n", ret);
-		return ret;
-	}
-
-	rkisp1->irq = irq;
-
-	for (i = 0; i < clk_data->size; i++)
-		rkisp1->clks[i].id = clk_data->clks[i];
-	ret = devm_clk_bulk_get(dev, clk_data->size, rkisp1->clks);
-	if (ret)
-		return ret;
-	rkisp1->clk_size = clk_data->size;
-
-	pm_runtime_enable(&pdev->dev);
-
-	strscpy(rkisp1->media_dev.model, RKISP1_DRIVER_NAME,
-		sizeof(rkisp1->media_dev.model));
-	rkisp1->media_dev.dev = &pdev->dev;
-	strscpy(rkisp1->media_dev.bus_info, RKISP1_BUS_INFO,
-		sizeof(rkisp1->media_dev.bus_info));
-	media_device_init(&rkisp1->media_dev);
-
-	v4l2_dev = &rkisp1->v4l2_dev;
-	v4l2_dev->mdev = &rkisp1->media_dev;
-	strscpy(v4l2_dev->name, RKISP1_DRIVER_NAME, sizeof(v4l2_dev->name));
-
-	ret = v4l2_device_register(rkisp1->dev, &rkisp1->v4l2_dev);
-	if (ret)
-		return ret;
-
-	ret = media_device_register(&rkisp1->media_dev);
-	if (ret) {
-		dev_err(dev, "Failed to register media device: %d\n", ret);
-		goto err_unreg_v4l2_dev;
-	}
-
-	ret = rkisp1_entities_register(rkisp1);
-	if (ret)
-		goto err_unreg_media_dev;
-
-	rkisp1_debug_init(rkisp1);
-
-	return 0;
-
-err_unreg_media_dev:
-	media_device_unregister(&rkisp1->media_dev);
-err_unreg_v4l2_dev:
-	v4l2_device_unregister(&rkisp1->v4l2_dev);
-	pm_runtime_disable(&pdev->dev);
-	return ret;
-}
-
-static int rkisp1_remove(struct platform_device *pdev)
-{
-	struct rkisp1_device *rkisp1 = platform_get_drvdata(pdev);
-
-	v4l2_async_notifier_unregister(&rkisp1->notifier);
-	v4l2_async_notifier_cleanup(&rkisp1->notifier);
-
-	rkisp1_params_unregister(rkisp1);
-	rkisp1_stats_unregister(rkisp1);
-	rkisp1_capture_devs_unregister(rkisp1);
-	rkisp1_resizer_devs_unregister(rkisp1);
-	rkisp1_isp_unregister(rkisp1);
-
-	media_device_unregister(&rkisp1->media_dev);
-	v4l2_device_unregister(&rkisp1->v4l2_dev);
-
-	pm_runtime_disable(&pdev->dev);
-
-	debugfs_remove_recursive(rkisp1->debug.debugfs_dir);
-	return 0;
-}
-
-static struct platform_driver rkisp1_drv = {
-	.driver = {
-		.name = RKISP1_DRIVER_NAME,
-		.of_match_table = of_match_ptr(rkisp1_of_match),
-		.pm = &rkisp1_pm_ops,
-	},
-	.probe = rkisp1_probe,
-	.remove = rkisp1_remove,
-};
-
-module_platform_driver(rkisp1_drv);
-MODULE_DESCRIPTION("Rockchip ISP1 platform driver");
-MODULE_LICENSE("Dual MIT/GPL");
diff --git a/drivers/staging/media/rkisp1/rkisp1-isp.c b/drivers/staging/media/rkisp1/rkisp1-isp.c
deleted file mode 100644
index 48d08ff87da2..000000000000
--- a/drivers/staging/media/rkisp1/rkisp1-isp.c
+++ /dev/null
@@ -1,1161 +0,0 @@
-// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
-/*
- * Rockchip ISP1 Driver - ISP Subdevice
- *
- * Copyright (C) 2019 Collabora, Ltd.
- *
- * Based on Rockchip ISP1 driver by Rockchip Electronics Co., Ltd.
- * Copyright (C) 2017 Rockchip Electronics Co., Ltd.
- */
-
-#include <linux/iopoll.h>
-#include <linux/phy/phy.h>
-#include <linux/phy/phy-mipi-dphy.h>
-#include <linux/pm_runtime.h>
-#include <linux/videodev2.h>
-#include <linux/vmalloc.h>
-#include <media/v4l2-event.h>
-
-#include "rkisp1-common.h"
-
-#define RKISP1_DEF_SINK_PAD_FMT MEDIA_BUS_FMT_SRGGB10_1X10
-#define RKISP1_DEF_SRC_PAD_FMT MEDIA_BUS_FMT_YUYV8_2X8
-
-#define RKISP1_ISP_DEV_NAME	RKISP1_DRIVER_NAME "_isp"
-
-/*
- * NOTE: MIPI controller and input MUX are also configured in this file.
- * This is because ISP Subdev describes not only ISP submodule (input size,
- * format, output size, format), but also a virtual route device.
- */
-
-/*
- * There are many variables named with format/frame in below code,
- * please see here for their meaning.
- * Cropping in the sink pad defines the image region from the sensor.
- * Cropping in the source pad defines the region for the Image Stabilizer (IS)
- *
- * Cropping regions of ISP
- *
- * +---------------------------------------------------------+
- * | Sensor image                                            |
- * | +---------------------------------------------------+   |
- * | | CIF_ISP_ACQ (for black level)                     |   |
- * | | sink pad format                                   |   |
- * | | +--------------------------------------------+    |   |
- * | | |    CIF_ISP_OUT                             |    |   |
- * | | |    sink pad crop                           |    |   |
- * | | |    +---------------------------------+     |    |   |
- * | | |    |   CIF_ISP_IS                    |     |    |   |
- * | | |    |   source pad crop and format    |     |    |   |
- * | | |    +---------------------------------+     |    |   |
- * | | +--------------------------------------------+    |   |
- * | +---------------------------------------------------+   |
- * +---------------------------------------------------------+
- */
-
-static const struct rkisp1_isp_mbus_info rkisp1_isp_formats[] = {
-	{
-		.mbus_code	= MEDIA_BUS_FMT_YUYV8_2X8,
-		.pixel_enc	= V4L2_PIXEL_ENC_YUV,
-		.direction	= RKISP1_ISP_SD_SRC,
-	}, {
-		.mbus_code	= MEDIA_BUS_FMT_SRGGB10_1X10,
-		.pixel_enc	= V4L2_PIXEL_ENC_BAYER,
-		.mipi_dt	= RKISP1_CIF_CSI2_DT_RAW10,
-		.bayer_pat	= RKISP1_RAW_RGGB,
-		.bus_width	= 10,
-		.direction	= RKISP1_ISP_SD_SINK | RKISP1_ISP_SD_SRC,
-	}, {
-		.mbus_code	= MEDIA_BUS_FMT_SBGGR10_1X10,
-		.pixel_enc	= V4L2_PIXEL_ENC_BAYER,
-		.mipi_dt	= RKISP1_CIF_CSI2_DT_RAW10,
-		.bayer_pat	= RKISP1_RAW_BGGR,
-		.bus_width	= 10,
-		.direction	= RKISP1_ISP_SD_SINK | RKISP1_ISP_SD_SRC,
-	}, {
-		.mbus_code	= MEDIA_BUS_FMT_SGBRG10_1X10,
-		.pixel_enc	= V4L2_PIXEL_ENC_BAYER,
-		.mipi_dt	= RKISP1_CIF_CSI2_DT_RAW10,
-		.bayer_pat	= RKISP1_RAW_GBRG,
-		.bus_width	= 10,
-		.direction	= RKISP1_ISP_SD_SINK | RKISP1_ISP_SD_SRC,
-	}, {
-		.mbus_code	= MEDIA_BUS_FMT_SGRBG10_1X10,
-		.pixel_enc	= V4L2_PIXEL_ENC_BAYER,
-		.mipi_dt	= RKISP1_CIF_CSI2_DT_RAW10,
-		.bayer_pat	= RKISP1_RAW_GRBG,
-		.bus_width	= 10,
-		.direction	= RKISP1_ISP_SD_SINK | RKISP1_ISP_SD_SRC,
-	}, {
-		.mbus_code	= MEDIA_BUS_FMT_SRGGB12_1X12,
-		.pixel_enc	= V4L2_PIXEL_ENC_BAYER,
-		.mipi_dt	= RKISP1_CIF_CSI2_DT_RAW12,
-		.bayer_pat	= RKISP1_RAW_RGGB,
-		.bus_width	= 12,
-		.direction	= RKISP1_ISP_SD_SINK | RKISP1_ISP_SD_SRC,
-	}, {
-		.mbus_code	= MEDIA_BUS_FMT_SBGGR12_1X12,
-		.pixel_enc	= V4L2_PIXEL_ENC_BAYER,
-		.mipi_dt	= RKISP1_CIF_CSI2_DT_RAW12,
-		.bayer_pat	= RKISP1_RAW_BGGR,
-		.bus_width	= 12,
-		.direction	= RKISP1_ISP_SD_SINK | RKISP1_ISP_SD_SRC,
-	}, {
-		.mbus_code	= MEDIA_BUS_FMT_SGBRG12_1X12,
-		.pixel_enc	= V4L2_PIXEL_ENC_BAYER,
-		.mipi_dt	= RKISP1_CIF_CSI2_DT_RAW12,
-		.bayer_pat	= RKISP1_RAW_GBRG,
-		.bus_width	= 12,
-		.direction	= RKISP1_ISP_SD_SINK | RKISP1_ISP_SD_SRC,
-	}, {
-		.mbus_code	= MEDIA_BUS_FMT_SGRBG12_1X12,
-		.pixel_enc	= V4L2_PIXEL_ENC_BAYER,
-		.mipi_dt	= RKISP1_CIF_CSI2_DT_RAW12,
-		.bayer_pat	= RKISP1_RAW_GRBG,
-		.bus_width	= 12,
-		.direction	= RKISP1_ISP_SD_SINK | RKISP1_ISP_SD_SRC,
-	}, {
-		.mbus_code	= MEDIA_BUS_FMT_SRGGB8_1X8,
-		.pixel_enc	= V4L2_PIXEL_ENC_BAYER,
-		.mipi_dt	= RKISP1_CIF_CSI2_DT_RAW8,
-		.bayer_pat	= RKISP1_RAW_RGGB,
-		.bus_width	= 8,
-		.direction	= RKISP1_ISP_SD_SINK | RKISP1_ISP_SD_SRC,
-	}, {
-		.mbus_code	= MEDIA_BUS_FMT_SBGGR8_1X8,
-		.pixel_enc	= V4L2_PIXEL_ENC_BAYER,
-		.mipi_dt	= RKISP1_CIF_CSI2_DT_RAW8,
-		.bayer_pat	= RKISP1_RAW_BGGR,
-		.bus_width	= 8,
-		.direction	= RKISP1_ISP_SD_SINK | RKISP1_ISP_SD_SRC,
-	}, {
-		.mbus_code	= MEDIA_BUS_FMT_SGBRG8_1X8,
-		.pixel_enc	= V4L2_PIXEL_ENC_BAYER,
-		.mipi_dt	= RKISP1_CIF_CSI2_DT_RAW8,
-		.bayer_pat	= RKISP1_RAW_GBRG,
-		.bus_width	= 8,
-		.direction	= RKISP1_ISP_SD_SINK | RKISP1_ISP_SD_SRC,
-	}, {
-		.mbus_code	= MEDIA_BUS_FMT_SGRBG8_1X8,
-		.pixel_enc	= V4L2_PIXEL_ENC_BAYER,
-		.mipi_dt	= RKISP1_CIF_CSI2_DT_RAW8,
-		.bayer_pat	= RKISP1_RAW_GRBG,
-		.bus_width	= 8,
-		.direction	= RKISP1_ISP_SD_SINK | RKISP1_ISP_SD_SRC,
-	}, {
-		.mbus_code	= MEDIA_BUS_FMT_YUYV8_1X16,
-		.pixel_enc	= V4L2_PIXEL_ENC_YUV,
-		.mipi_dt	= RKISP1_CIF_CSI2_DT_YUV422_8b,
-		.yuv_seq	= RKISP1_CIF_ISP_ACQ_PROP_YCBYCR,
-		.bus_width	= 16,
-		.direction	= RKISP1_ISP_SD_SINK,
-	}, {
-		.mbus_code	= MEDIA_BUS_FMT_YVYU8_1X16,
-		.pixel_enc	= V4L2_PIXEL_ENC_YUV,
-		.mipi_dt	= RKISP1_CIF_CSI2_DT_YUV422_8b,
-		.yuv_seq	= RKISP1_CIF_ISP_ACQ_PROP_YCRYCB,
-		.bus_width	= 16,
-		.direction	= RKISP1_ISP_SD_SINK,
-	}, {
-		.mbus_code	= MEDIA_BUS_FMT_UYVY8_1X16,
-		.pixel_enc	= V4L2_PIXEL_ENC_YUV,
-		.mipi_dt	= RKISP1_CIF_CSI2_DT_YUV422_8b,
-		.yuv_seq	= RKISP1_CIF_ISP_ACQ_PROP_CBYCRY,
-		.bus_width	= 16,
-		.direction	= RKISP1_ISP_SD_SINK,
-	}, {
-		.mbus_code	= MEDIA_BUS_FMT_VYUY8_1X16,
-		.pixel_enc	= V4L2_PIXEL_ENC_YUV,
-		.mipi_dt	= RKISP1_CIF_CSI2_DT_YUV422_8b,
-		.yuv_seq	= RKISP1_CIF_ISP_ACQ_PROP_CRYCBY,
-		.bus_width	= 16,
-		.direction	= RKISP1_ISP_SD_SINK,
-	},
-};
-
-/* ----------------------------------------------------------------------------
- * Helpers
- */
-
-const struct rkisp1_isp_mbus_info *rkisp1_isp_mbus_info_get(u32 mbus_code)
-{
-	unsigned int i;
-
-	for (i = 0; i < ARRAY_SIZE(rkisp1_isp_formats); i++) {
-		const struct rkisp1_isp_mbus_info *fmt = &rkisp1_isp_formats[i];
-
-		if (fmt->mbus_code == mbus_code)
-			return fmt;
-	}
-
-	return NULL;
-}
-
-static struct v4l2_subdev *rkisp1_get_remote_sensor(struct v4l2_subdev *sd)
-{
-	struct media_pad *local, *remote;
-	struct media_entity *sensor_me;
-
-	local = &sd->entity.pads[RKISP1_ISP_PAD_SINK_VIDEO];
-	remote = media_entity_remote_pad(local);
-	if (!remote)
-		return NULL;
-
-	sensor_me = remote->entity;
-	return media_entity_to_v4l2_subdev(sensor_me);
-}
-
-static struct v4l2_mbus_framefmt *
-rkisp1_isp_get_pad_fmt(struct rkisp1_isp *isp,
-		       struct v4l2_subdev_pad_config *cfg,
-		       unsigned int pad, u32 which)
-{
-	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(&isp->sd, cfg, pad);
-	else
-		return v4l2_subdev_get_try_format(&isp->sd, isp->pad_cfg, pad);
-}
-
-static struct v4l2_rect *
-rkisp1_isp_get_pad_crop(struct rkisp1_isp *isp,
-			struct v4l2_subdev_pad_config *cfg,
-			unsigned int pad, u32 which)
-{
-	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_crop(&isp->sd, cfg, pad);
-	else
-		return v4l2_subdev_get_try_crop(&isp->sd, isp->pad_cfg, pad);
-}
-
-/* ----------------------------------------------------------------------------
- * Camera Interface registers configurations
- */
-
-/*
- * Image Stabilization.
- * This should only be called when configuring CIF
- * or at the frame end interrupt
- */
-static void rkisp1_config_ism(struct rkisp1_device *rkisp1)
-{
-	struct v4l2_rect *src_crop =
-		rkisp1_isp_get_pad_crop(&rkisp1->isp, NULL,
-					RKISP1_ISP_PAD_SOURCE_VIDEO,
-					V4L2_SUBDEV_FORMAT_ACTIVE);
-	u32 val;
-
-	rkisp1_write(rkisp1, 0, RKISP1_CIF_ISP_IS_RECENTER);
-	rkisp1_write(rkisp1, 0, RKISP1_CIF_ISP_IS_MAX_DX);
-	rkisp1_write(rkisp1, 0, RKISP1_CIF_ISP_IS_MAX_DY);
-	rkisp1_write(rkisp1, 0, RKISP1_CIF_ISP_IS_DISPLACE);
-	rkisp1_write(rkisp1, src_crop->left, RKISP1_CIF_ISP_IS_H_OFFS);
-	rkisp1_write(rkisp1, src_crop->top, RKISP1_CIF_ISP_IS_V_OFFS);
-	rkisp1_write(rkisp1, src_crop->width, RKISP1_CIF_ISP_IS_H_SIZE);
-	rkisp1_write(rkisp1, src_crop->height, RKISP1_CIF_ISP_IS_V_SIZE);
-
-	/* IS(Image Stabilization) is always on, working as output crop */
-	rkisp1_write(rkisp1, 1, RKISP1_CIF_ISP_IS_CTRL);
-	val = rkisp1_read(rkisp1, RKISP1_CIF_ISP_CTRL);
-	val |= RKISP1_CIF_ISP_CTRL_ISP_CFG_UPD;
-	rkisp1_write(rkisp1, val, RKISP1_CIF_ISP_CTRL);
-}
-
-/*
- * configure ISP blocks with input format, size......
- */
-static int rkisp1_config_isp(struct rkisp1_device *rkisp1)
-{
-	u32 isp_ctrl = 0, irq_mask = 0, acq_mult = 0, signal = 0;
-	const struct rkisp1_isp_mbus_info *src_fmt, *sink_fmt;
-	struct rkisp1_sensor_async *sensor;
-	struct v4l2_mbus_framefmt *sink_frm;
-	struct v4l2_rect *sink_crop;
-
-	sensor = rkisp1->active_sensor;
-	sink_fmt = rkisp1->isp.sink_fmt;
-	src_fmt = rkisp1->isp.src_fmt;
-	sink_frm = rkisp1_isp_get_pad_fmt(&rkisp1->isp, NULL,
-					  RKISP1_ISP_PAD_SINK_VIDEO,
-					  V4L2_SUBDEV_FORMAT_ACTIVE);
-	sink_crop = rkisp1_isp_get_pad_crop(&rkisp1->isp, NULL,
-					    RKISP1_ISP_PAD_SINK_VIDEO,
-					    V4L2_SUBDEV_FORMAT_ACTIVE);
-
-	if (sink_fmt->pixel_enc == V4L2_PIXEL_ENC_BAYER) {
-		acq_mult = 1;
-		if (src_fmt->pixel_enc == V4L2_PIXEL_ENC_BAYER) {
-			if (sensor->mbus_type == V4L2_MBUS_BT656)
-				isp_ctrl = RKISP1_CIF_ISP_CTRL_ISP_MODE_RAW_PICT_ITU656;
-			else
-				isp_ctrl = RKISP1_CIF_ISP_CTRL_ISP_MODE_RAW_PICT;
-		} else {
-			rkisp1_write(rkisp1, RKISP1_CIF_ISP_DEMOSAIC_TH(0xc),
-				     RKISP1_CIF_ISP_DEMOSAIC);
-
-			if (sensor->mbus_type == V4L2_MBUS_BT656)
-				isp_ctrl = RKISP1_CIF_ISP_CTRL_ISP_MODE_BAYER_ITU656;
-			else
-				isp_ctrl = RKISP1_CIF_ISP_CTRL_ISP_MODE_BAYER_ITU601;
-		}
-	} else if (sink_fmt->pixel_enc == V4L2_PIXEL_ENC_YUV) {
-		acq_mult = 2;
-		if (sensor->mbus_type == V4L2_MBUS_CSI2_DPHY) {
-			isp_ctrl = RKISP1_CIF_ISP_CTRL_ISP_MODE_ITU601;
-		} else {
-			if (sensor->mbus_type == V4L2_MBUS_BT656)
-				isp_ctrl = RKISP1_CIF_ISP_CTRL_ISP_MODE_ITU656;
-			else
-				isp_ctrl = RKISP1_CIF_ISP_CTRL_ISP_MODE_ITU601;
-		}
-
-		irq_mask |= RKISP1_CIF_ISP_DATA_LOSS;
-	}
-
-	/* Set up input acquisition properties */
-	if (sensor->mbus_type == V4L2_MBUS_BT656 ||
-	    sensor->mbus_type == V4L2_MBUS_PARALLEL) {
-		if (sensor->mbus_flags & V4L2_MBUS_PCLK_SAMPLE_RISING)
-			signal = RKISP1_CIF_ISP_ACQ_PROP_POS_EDGE;
-	}
-
-	if (sensor->mbus_type == V4L2_MBUS_PARALLEL) {
-		if (sensor->mbus_flags & V4L2_MBUS_VSYNC_ACTIVE_LOW)
-			signal |= RKISP1_CIF_ISP_ACQ_PROP_VSYNC_LOW;
-
-		if (sensor->mbus_flags & V4L2_MBUS_HSYNC_ACTIVE_LOW)
-			signal |= RKISP1_CIF_ISP_ACQ_PROP_HSYNC_LOW;
-	}
-
-	rkisp1_write(rkisp1, isp_ctrl, RKISP1_CIF_ISP_CTRL);
-	rkisp1_write(rkisp1, signal | sink_fmt->yuv_seq |
-		     RKISP1_CIF_ISP_ACQ_PROP_BAYER_PAT(sink_fmt->bayer_pat) |
-		     RKISP1_CIF_ISP_ACQ_PROP_FIELD_SEL_ALL,
-		     RKISP1_CIF_ISP_ACQ_PROP);
-	rkisp1_write(rkisp1, 0, RKISP1_CIF_ISP_ACQ_NR_FRAMES);
-
-	/* Acquisition Size */
-	rkisp1_write(rkisp1, 0, RKISP1_CIF_ISP_ACQ_H_OFFS);
-	rkisp1_write(rkisp1, 0, RKISP1_CIF_ISP_ACQ_V_OFFS);
-	rkisp1_write(rkisp1,
-		     acq_mult * sink_frm->width, RKISP1_CIF_ISP_ACQ_H_SIZE);
-	rkisp1_write(rkisp1, sink_frm->height, RKISP1_CIF_ISP_ACQ_V_SIZE);
-
-	/* ISP Out Area */
-	rkisp1_write(rkisp1, sink_crop->left, RKISP1_CIF_ISP_OUT_H_OFFS);
-	rkisp1_write(rkisp1, sink_crop->top, RKISP1_CIF_ISP_OUT_V_OFFS);
-	rkisp1_write(rkisp1, sink_crop->width, RKISP1_CIF_ISP_OUT_H_SIZE);
-	rkisp1_write(rkisp1, sink_crop->height, RKISP1_CIF_ISP_OUT_V_SIZE);
-
-	irq_mask |= RKISP1_CIF_ISP_FRAME | RKISP1_CIF_ISP_V_START |
-		    RKISP1_CIF_ISP_PIC_SIZE_ERROR;
-	rkisp1_write(rkisp1, irq_mask, RKISP1_CIF_ISP_IMSC);
-
-	if (src_fmt->pixel_enc == V4L2_PIXEL_ENC_BAYER) {
-		rkisp1_params_disable(&rkisp1->params);
-	} else {
-		struct v4l2_mbus_framefmt *src_frm;
-
-		src_frm = rkisp1_isp_get_pad_fmt(&rkisp1->isp, NULL,
-						 RKISP1_ISP_PAD_SINK_VIDEO,
-						 V4L2_SUBDEV_FORMAT_ACTIVE);
-		rkisp1_params_configure(&rkisp1->params, sink_fmt->bayer_pat,
-					src_frm->quantization);
-	}
-
-	return 0;
-}
-
-static int rkisp1_config_dvp(struct rkisp1_device *rkisp1)
-{
-	const struct rkisp1_isp_mbus_info *sink_fmt = rkisp1->isp.sink_fmt;
-	u32 val, input_sel;
-
-	switch (sink_fmt->bus_width) {
-	case 8:
-		input_sel = RKISP1_CIF_ISP_ACQ_PROP_IN_SEL_8B_ZERO;
-		break;
-	case 10:
-		input_sel = RKISP1_CIF_ISP_ACQ_PROP_IN_SEL_10B_ZERO;
-		break;
-	case 12:
-		input_sel = RKISP1_CIF_ISP_ACQ_PROP_IN_SEL_12B;
-		break;
-	default:
-		dev_err(rkisp1->dev, "Invalid bus width\n");
-		return -EINVAL;
-	}
-
-	val = rkisp1_read(rkisp1, RKISP1_CIF_ISP_ACQ_PROP);
-	rkisp1_write(rkisp1, val | input_sel, RKISP1_CIF_ISP_ACQ_PROP);
-
-	return 0;
-}
-
-static int rkisp1_config_mipi(struct rkisp1_device *rkisp1)
-{
-	const struct rkisp1_isp_mbus_info *sink_fmt = rkisp1->isp.sink_fmt;
-	unsigned int lanes = rkisp1->active_sensor->lanes;
-	u32 mipi_ctrl;
-
-	if (lanes < 1 || lanes > 4)
-		return -EINVAL;
-
-	mipi_ctrl = RKISP1_CIF_MIPI_CTRL_NUM_LANES(lanes - 1) |
-		    RKISP1_CIF_MIPI_CTRL_SHUTDOWNLANES(0xf) |
-		    RKISP1_CIF_MIPI_CTRL_ERR_SOT_SYNC_HS_SKIP |
-		    RKISP1_CIF_MIPI_CTRL_CLOCKLANE_ENA;
-
-	rkisp1_write(rkisp1, mipi_ctrl, RKISP1_CIF_MIPI_CTRL);
-
-	/* Configure Data Type and Virtual Channel */
-	rkisp1_write(rkisp1,
-		     RKISP1_CIF_MIPI_DATA_SEL_DT(sink_fmt->mipi_dt) |
-		     RKISP1_CIF_MIPI_DATA_SEL_VC(0),
-		     RKISP1_CIF_MIPI_IMG_DATA_SEL);
-
-	/* Clear MIPI interrupts */
-	rkisp1_write(rkisp1, ~0, RKISP1_CIF_MIPI_ICR);
-	/*
-	 * Disable RKISP1_CIF_MIPI_ERR_DPHY interrupt here temporary for
-	 * isp bus may be dead when switch isp.
-	 */
-	rkisp1_write(rkisp1,
-		     RKISP1_CIF_MIPI_FRAME_END | RKISP1_CIF_MIPI_ERR_CSI |
-		     RKISP1_CIF_MIPI_ERR_DPHY |
-		     RKISP1_CIF_MIPI_SYNC_FIFO_OVFLW(0x03) |
-		     RKISP1_CIF_MIPI_ADD_DATA_OVFLW,
-		     RKISP1_CIF_MIPI_IMSC);
-
-	dev_dbg(rkisp1->dev, "\n  MIPI_CTRL 0x%08x\n"
-		"  MIPI_IMG_DATA_SEL 0x%08x\n"
-		"  MIPI_STATUS 0x%08x\n"
-		"  MIPI_IMSC 0x%08x\n",
-		rkisp1_read(rkisp1, RKISP1_CIF_MIPI_CTRL),
-		rkisp1_read(rkisp1, RKISP1_CIF_MIPI_IMG_DATA_SEL),
-		rkisp1_read(rkisp1, RKISP1_CIF_MIPI_STATUS),
-		rkisp1_read(rkisp1, RKISP1_CIF_MIPI_IMSC));
-
-	return 0;
-}
-
-/* Configure MUX */
-static int rkisp1_config_path(struct rkisp1_device *rkisp1)
-{
-	struct rkisp1_sensor_async *sensor = rkisp1->active_sensor;
-	u32 dpcl = rkisp1_read(rkisp1, RKISP1_CIF_VI_DPCL);
-	int ret = 0;
-
-	if (sensor->mbus_type == V4L2_MBUS_BT656 ||
-	    sensor->mbus_type == V4L2_MBUS_PARALLEL) {
-		ret = rkisp1_config_dvp(rkisp1);
-		dpcl |= RKISP1_CIF_VI_DPCL_IF_SEL_PARALLEL;
-	} else if (sensor->mbus_type == V4L2_MBUS_CSI2_DPHY) {
-		ret = rkisp1_config_mipi(rkisp1);
-		dpcl |= RKISP1_CIF_VI_DPCL_IF_SEL_MIPI;
-	}
-
-	rkisp1_write(rkisp1, dpcl, RKISP1_CIF_VI_DPCL);
-
-	return ret;
-}
-
-/* Hardware configure Entry */
-static int rkisp1_config_cif(struct rkisp1_device *rkisp1)
-{
-	u32 cif_id;
-	int ret;
-
-	cif_id = rkisp1_read(rkisp1, RKISP1_CIF_VI_ID);
-	dev_dbg(rkisp1->dev, "CIF_ID 0x%08x\n", cif_id);
-
-	ret = rkisp1_config_isp(rkisp1);
-	if (ret)
-		return ret;
-	ret = rkisp1_config_path(rkisp1);
-	if (ret)
-		return ret;
-	rkisp1_config_ism(rkisp1);
-
-	return 0;
-}
-
-static void rkisp1_isp_stop(struct rkisp1_device *rkisp1)
-{
-	u32 val;
-
-	/*
-	 * ISP(mi) stop in mi frame end -> Stop ISP(mipi) ->
-	 * Stop ISP(isp) ->wait for ISP isp off
-	 */
-	/* stop and clear MI, MIPI, and ISP interrupts */
-	rkisp1_write(rkisp1, 0, RKISP1_CIF_MIPI_IMSC);
-	rkisp1_write(rkisp1, ~0, RKISP1_CIF_MIPI_ICR);
-
-	rkisp1_write(rkisp1, 0, RKISP1_CIF_ISP_IMSC);
-	rkisp1_write(rkisp1, ~0, RKISP1_CIF_ISP_ICR);
-
-	rkisp1_write(rkisp1, 0, RKISP1_CIF_MI_IMSC);
-	rkisp1_write(rkisp1, ~0, RKISP1_CIF_MI_ICR);
-	val = rkisp1_read(rkisp1, RKISP1_CIF_MIPI_CTRL);
-	rkisp1_write(rkisp1, val & (~RKISP1_CIF_MIPI_CTRL_OUTPUT_ENA),
-		     RKISP1_CIF_MIPI_CTRL);
-	/* stop ISP */
-	val = rkisp1_read(rkisp1, RKISP1_CIF_ISP_CTRL);
-	val &= ~(RKISP1_CIF_ISP_CTRL_ISP_INFORM_ENABLE |
-		 RKISP1_CIF_ISP_CTRL_ISP_ENABLE);
-	rkisp1_write(rkisp1, val, RKISP1_CIF_ISP_CTRL);
-
-	val = rkisp1_read(rkisp1,	RKISP1_CIF_ISP_CTRL);
-	rkisp1_write(rkisp1, val | RKISP1_CIF_ISP_CTRL_ISP_CFG_UPD,
-		     RKISP1_CIF_ISP_CTRL);
-
-	readx_poll_timeout(readl, rkisp1->base_addr + RKISP1_CIF_ISP_RIS,
-			   val, val & RKISP1_CIF_ISP_OFF, 20, 100);
-	rkisp1_write(rkisp1,
-		     RKISP1_CIF_IRCL_MIPI_SW_RST | RKISP1_CIF_IRCL_ISP_SW_RST,
-		     RKISP1_CIF_IRCL);
-	rkisp1_write(rkisp1, 0x0, RKISP1_CIF_IRCL);
-}
-
-static void rkisp1_config_clk(struct rkisp1_device *rkisp1)
-{
-	u32 val = RKISP1_CIF_ICCL_ISP_CLK | RKISP1_CIF_ICCL_CP_CLK |
-		  RKISP1_CIF_ICCL_MRSZ_CLK | RKISP1_CIF_ICCL_SRSZ_CLK |
-		  RKISP1_CIF_ICCL_JPEG_CLK | RKISP1_CIF_ICCL_MI_CLK |
-		  RKISP1_CIF_ICCL_IE_CLK | RKISP1_CIF_ICCL_MIPI_CLK |
-		  RKISP1_CIF_ICCL_DCROP_CLK;
-
-	rkisp1_write(rkisp1, val, RKISP1_CIF_ICCL);
-}
-
-static void rkisp1_isp_start(struct rkisp1_device *rkisp1)
-{
-	struct rkisp1_sensor_async *sensor = rkisp1->active_sensor;
-	u32 val;
-
-	rkisp1_config_clk(rkisp1);
-
-	/* Activate MIPI */
-	if (sensor->mbus_type == V4L2_MBUS_CSI2_DPHY) {
-		val = rkisp1_read(rkisp1, RKISP1_CIF_MIPI_CTRL);
-		rkisp1_write(rkisp1, val | RKISP1_CIF_MIPI_CTRL_OUTPUT_ENA,
-			     RKISP1_CIF_MIPI_CTRL);
-	}
-	/* Activate ISP */
-	val = rkisp1_read(rkisp1, RKISP1_CIF_ISP_CTRL);
-	val |= RKISP1_CIF_ISP_CTRL_ISP_CFG_UPD |
-	       RKISP1_CIF_ISP_CTRL_ISP_ENABLE |
-	       RKISP1_CIF_ISP_CTRL_ISP_INFORM_ENABLE;
-	rkisp1_write(rkisp1, val, RKISP1_CIF_ISP_CTRL);
-
-	/*
-	 * CIF spec says to wait for sufficient time after enabling
-	 * the MIPI interface and before starting the sensor output.
-	 */
-	usleep_range(1000, 1200);
-}
-
-/* ----------------------------------------------------------------------------
- * Subdev pad operations
- */
-
-static int rkisp1_isp_enum_mbus_code(struct v4l2_subdev *sd,
-				     struct v4l2_subdev_pad_config *cfg,
-				     struct v4l2_subdev_mbus_code_enum *code)
-{
-	unsigned int i, dir;
-	int pos = 0;
-
-	if (code->pad == RKISP1_ISP_PAD_SINK_VIDEO) {
-		dir = RKISP1_ISP_SD_SINK;
-	} else if (code->pad == RKISP1_ISP_PAD_SOURCE_VIDEO) {
-		dir = RKISP1_ISP_SD_SRC;
-	} else {
-		if (code->index > 0)
-			return -EINVAL;
-		code->code = MEDIA_BUS_FMT_METADATA_FIXED;
-		return 0;
-	}
-
-	if (code->index >= ARRAY_SIZE(rkisp1_isp_formats))
-		return -EINVAL;
-
-	for (i = 0; i < ARRAY_SIZE(rkisp1_isp_formats); i++) {
-		const struct rkisp1_isp_mbus_info *fmt = &rkisp1_isp_formats[i];
-
-		if (fmt->direction & dir)
-			pos++;
-
-		if (code->index == pos - 1) {
-			code->code = fmt->mbus_code;
-			if (fmt->pixel_enc == V4L2_PIXEL_ENC_YUV &&
-			    dir == RKISP1_ISP_SD_SRC)
-				code->flags =
-					V4L2_SUBDEV_MBUS_CODE_CSC_QUANTIZATION;
-			return 0;
-		}
-	}
-
-	return -EINVAL;
-}
-
-static int rkisp1_isp_init_config(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg)
-{
-	struct v4l2_mbus_framefmt *sink_fmt, *src_fmt;
-	struct v4l2_rect *sink_crop, *src_crop;
-
-	sink_fmt = v4l2_subdev_get_try_format(sd, cfg,
-					      RKISP1_ISP_PAD_SINK_VIDEO);
-	sink_fmt->width = RKISP1_DEFAULT_WIDTH;
-	sink_fmt->height = RKISP1_DEFAULT_HEIGHT;
-	sink_fmt->field = V4L2_FIELD_NONE;
-	sink_fmt->code = RKISP1_DEF_SINK_PAD_FMT;
-
-	sink_crop = v4l2_subdev_get_try_crop(sd, cfg,
-					     RKISP1_ISP_PAD_SINK_VIDEO);
-	sink_crop->width = RKISP1_DEFAULT_WIDTH;
-	sink_crop->height = RKISP1_DEFAULT_HEIGHT;
-	sink_crop->left = 0;
-	sink_crop->top = 0;
-
-	src_fmt = v4l2_subdev_get_try_format(sd, cfg,
-					     RKISP1_ISP_PAD_SOURCE_VIDEO);
-	*src_fmt = *sink_fmt;
-	src_fmt->code = RKISP1_DEF_SRC_PAD_FMT;
-
-	src_crop = v4l2_subdev_get_try_crop(sd, cfg,
-					    RKISP1_ISP_PAD_SOURCE_VIDEO);
-	*src_crop = *sink_crop;
-
-	sink_fmt = v4l2_subdev_get_try_format(sd, cfg,
-					      RKISP1_ISP_PAD_SINK_PARAMS);
-	src_fmt = v4l2_subdev_get_try_format(sd, cfg,
-					     RKISP1_ISP_PAD_SOURCE_STATS);
-	sink_fmt->width = 0;
-	sink_fmt->height = 0;
-	sink_fmt->field = V4L2_FIELD_NONE;
-	sink_fmt->code = MEDIA_BUS_FMT_METADATA_FIXED;
-	*src_fmt = *sink_fmt;
-
-	return 0;
-}
-
-static void rkisp1_isp_set_src_fmt(struct rkisp1_isp *isp,
-				   struct v4l2_subdev_pad_config *cfg,
-				   struct v4l2_mbus_framefmt *format,
-				   unsigned int which)
-{
-	const struct rkisp1_isp_mbus_info *mbus_info;
-	struct v4l2_mbus_framefmt *src_fmt;
-	const struct v4l2_rect *src_crop;
-
-	src_fmt = rkisp1_isp_get_pad_fmt(isp, cfg,
-					 RKISP1_ISP_PAD_SOURCE_VIDEO, which);
-	src_crop = rkisp1_isp_get_pad_crop(isp, cfg,
-					   RKISP1_ISP_PAD_SOURCE_VIDEO, which);
-
-	src_fmt->code = format->code;
-	mbus_info = rkisp1_isp_mbus_info_get(src_fmt->code);
-	if (!mbus_info || !(mbus_info->direction & RKISP1_ISP_SD_SRC)) {
-		src_fmt->code = RKISP1_DEF_SRC_PAD_FMT;
-		mbus_info = rkisp1_isp_mbus_info_get(src_fmt->code);
-	}
-	if (which == V4L2_SUBDEV_FORMAT_ACTIVE)
-		isp->src_fmt = mbus_info;
-	src_fmt->width  = src_crop->width;
-	src_fmt->height = src_crop->height;
-
-	/*
-	 * The CSC API is used to allow userspace to force full
-	 * quantization on YUV formats.
-	 */
-	if (format->flags & V4L2_MBUS_FRAMEFMT_SET_CSC &&
-	    format->quantization == V4L2_QUANTIZATION_FULL_RANGE &&
-	    mbus_info->pixel_enc == V4L2_PIXEL_ENC_YUV)
-		src_fmt->quantization = V4L2_QUANTIZATION_FULL_RANGE;
-	else if (mbus_info->pixel_enc == V4L2_PIXEL_ENC_YUV)
-		src_fmt->quantization = V4L2_QUANTIZATION_LIM_RANGE;
-	else
-		src_fmt->quantization = V4L2_QUANTIZATION_FULL_RANGE;
-
-	*format = *src_fmt;
-}
-
-static void rkisp1_isp_set_src_crop(struct rkisp1_isp *isp,
-				    struct v4l2_subdev_pad_config *cfg,
-				    struct v4l2_rect *r, unsigned int which)
-{
-	struct v4l2_mbus_framefmt *src_fmt;
-	const struct v4l2_rect *sink_crop;
-	struct v4l2_rect *src_crop;
-
-	src_crop = rkisp1_isp_get_pad_crop(isp, cfg,
-					   RKISP1_ISP_PAD_SOURCE_VIDEO,
-					   which);
-	sink_crop = rkisp1_isp_get_pad_crop(isp, cfg,
-					    RKISP1_ISP_PAD_SINK_VIDEO,
-					    which);
-
-	src_crop->left = ALIGN(r->left, 2);
-	src_crop->width = ALIGN(r->width, 2);
-	src_crop->top = r->top;
-	src_crop->height = r->height;
-	rkisp1_sd_adjust_crop_rect(src_crop, sink_crop);
-
-	*r = *src_crop;
-
-	/* Propagate to out format */
-	src_fmt = rkisp1_isp_get_pad_fmt(isp, cfg,
-					 RKISP1_ISP_PAD_SOURCE_VIDEO, which);
-	rkisp1_isp_set_src_fmt(isp, cfg, src_fmt, which);
-}
-
-static void rkisp1_isp_set_sink_crop(struct rkisp1_isp *isp,
-				     struct v4l2_subdev_pad_config *cfg,
-				     struct v4l2_rect *r, unsigned int which)
-{
-	struct v4l2_rect *sink_crop, *src_crop;
-	struct v4l2_mbus_framefmt *sink_fmt;
-
-	sink_crop = rkisp1_isp_get_pad_crop(isp, cfg, RKISP1_ISP_PAD_SINK_VIDEO,
-					    which);
-	sink_fmt = rkisp1_isp_get_pad_fmt(isp, cfg, RKISP1_ISP_PAD_SINK_VIDEO,
-					  which);
-
-	sink_crop->left = ALIGN(r->left, 2);
-	sink_crop->width = ALIGN(r->width, 2);
-	sink_crop->top = r->top;
-	sink_crop->height = r->height;
-	rkisp1_sd_adjust_crop(sink_crop, sink_fmt);
-
-	*r = *sink_crop;
-
-	/* Propagate to out crop */
-	src_crop = rkisp1_isp_get_pad_crop(isp, cfg,
-					   RKISP1_ISP_PAD_SOURCE_VIDEO, which);
-	rkisp1_isp_set_src_crop(isp, cfg, src_crop, which);
-}
-
-static void rkisp1_isp_set_sink_fmt(struct rkisp1_isp *isp,
-				    struct v4l2_subdev_pad_config *cfg,
-				    struct v4l2_mbus_framefmt *format,
-				    unsigned int which)
-{
-	const struct rkisp1_isp_mbus_info *mbus_info;
-	struct v4l2_mbus_framefmt *sink_fmt;
-	struct v4l2_rect *sink_crop;
-
-	sink_fmt = rkisp1_isp_get_pad_fmt(isp, cfg, RKISP1_ISP_PAD_SINK_VIDEO,
-					  which);
-	sink_fmt->code = format->code;
-	mbus_info = rkisp1_isp_mbus_info_get(sink_fmt->code);
-	if (!mbus_info || !(mbus_info->direction & RKISP1_ISP_SD_SINK)) {
-		sink_fmt->code = RKISP1_DEF_SINK_PAD_FMT;
-		mbus_info = rkisp1_isp_mbus_info_get(sink_fmt->code);
-	}
-	if (which == V4L2_SUBDEV_FORMAT_ACTIVE)
-		isp->sink_fmt = mbus_info;
-
-	sink_fmt->width = clamp_t(u32, format->width,
-				  RKISP1_ISP_MIN_WIDTH,
-				  RKISP1_ISP_MAX_WIDTH);
-	sink_fmt->height = clamp_t(u32, format->height,
-				   RKISP1_ISP_MIN_HEIGHT,
-				   RKISP1_ISP_MAX_HEIGHT);
-
-	*format = *sink_fmt;
-
-	/* Propagate to in crop */
-	sink_crop = rkisp1_isp_get_pad_crop(isp, cfg, RKISP1_ISP_PAD_SINK_VIDEO,
-					    which);
-	rkisp1_isp_set_sink_crop(isp, cfg, sink_crop, which);
-}
-
-static int rkisp1_isp_get_fmt(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
-			      struct v4l2_subdev_format *fmt)
-{
-	struct rkisp1_isp *isp = container_of(sd, struct rkisp1_isp, sd);
-
-	mutex_lock(&isp->ops_lock);
-	fmt->format = *rkisp1_isp_get_pad_fmt(isp, cfg, fmt->pad, fmt->which);
-	mutex_unlock(&isp->ops_lock);
-	return 0;
-}
-
-static int rkisp1_isp_set_fmt(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
-			      struct v4l2_subdev_format *fmt)
-{
-	struct rkisp1_isp *isp = container_of(sd, struct rkisp1_isp, sd);
-
-	mutex_lock(&isp->ops_lock);
-	if (fmt->pad == RKISP1_ISP_PAD_SINK_VIDEO)
-		rkisp1_isp_set_sink_fmt(isp, cfg, &fmt->format, fmt->which);
-	else if (fmt->pad == RKISP1_ISP_PAD_SOURCE_VIDEO)
-		rkisp1_isp_set_src_fmt(isp, cfg, &fmt->format, fmt->which);
-	else
-		fmt->format = *rkisp1_isp_get_pad_fmt(isp, cfg, fmt->pad,
-						      fmt->which);
-
-	mutex_unlock(&isp->ops_lock);
-	return 0;
-}
-
-static int rkisp1_isp_get_selection(struct v4l2_subdev *sd,
-				    struct v4l2_subdev_pad_config *cfg,
-				    struct v4l2_subdev_selection *sel)
-{
-	struct rkisp1_isp *isp = container_of(sd, struct rkisp1_isp, sd);
-	int ret = 0;
-
-	if (sel->pad != RKISP1_ISP_PAD_SOURCE_VIDEO &&
-	    sel->pad != RKISP1_ISP_PAD_SINK_VIDEO)
-		return -EINVAL;
-
-	mutex_lock(&isp->ops_lock);
-	switch (sel->target) {
-	case V4L2_SEL_TGT_CROP_BOUNDS:
-		if (sel->pad == RKISP1_ISP_PAD_SINK_VIDEO) {
-			struct v4l2_mbus_framefmt *fmt;
-
-			fmt = rkisp1_isp_get_pad_fmt(isp, cfg, sel->pad,
-						     sel->which);
-			sel->r.height = fmt->height;
-			sel->r.width = fmt->width;
-			sel->r.left = 0;
-			sel->r.top = 0;
-		} else {
-			sel->r = *rkisp1_isp_get_pad_crop(isp, cfg,
-						RKISP1_ISP_PAD_SINK_VIDEO,
-						sel->which);
-		}
-		break;
-	case V4L2_SEL_TGT_CROP:
-		sel->r = *rkisp1_isp_get_pad_crop(isp, cfg, sel->pad,
-						  sel->which);
-		break;
-	default:
-		ret = -EINVAL;
-	}
-	mutex_unlock(&isp->ops_lock);
-	return ret;
-}
-
-static int rkisp1_isp_set_selection(struct v4l2_subdev *sd,
-				    struct v4l2_subdev_pad_config *cfg,
-				    struct v4l2_subdev_selection *sel)
-{
-	struct rkisp1_device *rkisp1 =
-		container_of(sd->v4l2_dev, struct rkisp1_device, v4l2_dev);
-	struct rkisp1_isp *isp = container_of(sd, struct rkisp1_isp, sd);
-	int ret = 0;
-
-	if (sel->target != V4L2_SEL_TGT_CROP)
-		return -EINVAL;
-
-	dev_dbg(rkisp1->dev, "%s: pad: %d sel(%d,%d)/%dx%d\n", __func__,
-		sel->pad, sel->r.left, sel->r.top, sel->r.width, sel->r.height);
-	mutex_lock(&isp->ops_lock);
-	if (sel->pad == RKISP1_ISP_PAD_SINK_VIDEO)
-		rkisp1_isp_set_sink_crop(isp, cfg, &sel->r, sel->which);
-	else if (sel->pad == RKISP1_ISP_PAD_SOURCE_VIDEO)
-		rkisp1_isp_set_src_crop(isp, cfg, &sel->r, sel->which);
-	else
-		ret = -EINVAL;
-
-	mutex_unlock(&isp->ops_lock);
-	return ret;
-}
-
-static int rkisp1_subdev_link_validate(struct media_link *link)
-{
-	if (link->sink->index == RKISP1_ISP_PAD_SINK_PARAMS)
-		return 0;
-
-	return v4l2_subdev_link_validate(link);
-}
-
-static const struct v4l2_subdev_pad_ops rkisp1_isp_pad_ops = {
-	.enum_mbus_code = rkisp1_isp_enum_mbus_code,
-	.get_selection = rkisp1_isp_get_selection,
-	.set_selection = rkisp1_isp_set_selection,
-	.init_cfg = rkisp1_isp_init_config,
-	.get_fmt = rkisp1_isp_get_fmt,
-	.set_fmt = rkisp1_isp_set_fmt,
-	.link_validate = v4l2_subdev_link_validate_default,
-};
-
-/* ----------------------------------------------------------------------------
- * Stream operations
- */
-
-static int rkisp1_mipi_csi2_start(struct rkisp1_isp *isp,
-				  struct rkisp1_sensor_async *sensor)
-{
-	struct rkisp1_device *rkisp1 =
-		container_of(isp->sd.v4l2_dev, struct rkisp1_device, v4l2_dev);
-	union phy_configure_opts opts;
-	struct phy_configure_opts_mipi_dphy *cfg = &opts.mipi_dphy;
-	s64 pixel_clock;
-
-	if (!sensor->pixel_rate_ctrl) {
-		dev_warn(rkisp1->dev, "No pixel rate control in sensor subdev\n");
-		return -EPIPE;
-	}
-
-	pixel_clock = v4l2_ctrl_g_ctrl_int64(sensor->pixel_rate_ctrl);
-	if (!pixel_clock) {
-		dev_err(rkisp1->dev, "Invalid pixel rate value\n");
-		return -EINVAL;
-	}
-
-	phy_mipi_dphy_get_default_config(pixel_clock, isp->sink_fmt->bus_width,
-					 sensor->lanes, cfg);
-	phy_set_mode(sensor->dphy, PHY_MODE_MIPI_DPHY);
-	phy_configure(sensor->dphy, &opts);
-	phy_power_on(sensor->dphy);
-
-	return 0;
-}
-
-static void rkisp1_mipi_csi2_stop(struct rkisp1_sensor_async *sensor)
-{
-	phy_power_off(sensor->dphy);
-}
-
-static int rkisp1_isp_s_stream(struct v4l2_subdev *sd, int enable)
-{
-	struct rkisp1_device *rkisp1 =
-		container_of(sd->v4l2_dev, struct rkisp1_device, v4l2_dev);
-	struct rkisp1_isp *isp = &rkisp1->isp;
-	struct v4l2_subdev *sensor_sd;
-	int ret = 0;
-
-	if (!enable) {
-		rkisp1_isp_stop(rkisp1);
-		rkisp1_mipi_csi2_stop(rkisp1->active_sensor);
-		return 0;
-	}
-
-	sensor_sd = rkisp1_get_remote_sensor(sd);
-	if (!sensor_sd) {
-		dev_warn(rkisp1->dev, "No link between isp and sensor\n");
-		return -ENODEV;
-	}
-
-	rkisp1->active_sensor = container_of(sensor_sd->asd,
-					     struct rkisp1_sensor_async, asd);
-
-	if (rkisp1->active_sensor->mbus_type != V4L2_MBUS_CSI2_DPHY)
-		return -EINVAL;
-
-	rkisp1->isp.frame_sequence = -1;
-	mutex_lock(&isp->ops_lock);
-	ret = rkisp1_config_cif(rkisp1);
-	if (ret)
-		goto mutex_unlock;
-
-	ret = rkisp1_mipi_csi2_start(&rkisp1->isp, rkisp1->active_sensor);
-	if (ret)
-		goto mutex_unlock;
-
-	rkisp1_isp_start(rkisp1);
-
-mutex_unlock:
-	mutex_unlock(&isp->ops_lock);
-	return ret;
-}
-
-static int rkisp1_isp_subs_evt(struct v4l2_subdev *sd, struct v4l2_fh *fh,
-			       struct v4l2_event_subscription *sub)
-{
-	if (sub->type != V4L2_EVENT_FRAME_SYNC)
-		return -EINVAL;
-
-	/* V4L2_EVENT_FRAME_SYNC doesn't require an id, so zero should be set */
-	if (sub->id != 0)
-		return -EINVAL;
-
-	return v4l2_event_subscribe(fh, sub, 0, NULL);
-}
-
-static const struct media_entity_operations rkisp1_isp_media_ops = {
-	.link_validate = rkisp1_subdev_link_validate,
-};
-
-static const struct v4l2_subdev_video_ops rkisp1_isp_video_ops = {
-	.s_stream = rkisp1_isp_s_stream,
-};
-
-static const struct v4l2_subdev_core_ops rkisp1_isp_core_ops = {
-	.subscribe_event = rkisp1_isp_subs_evt,
-	.unsubscribe_event = v4l2_event_subdev_unsubscribe,
-};
-
-static const struct v4l2_subdev_ops rkisp1_isp_ops = {
-	.core = &rkisp1_isp_core_ops,
-	.video = &rkisp1_isp_video_ops,
-	.pad = &rkisp1_isp_pad_ops,
-};
-
-int rkisp1_isp_register(struct rkisp1_device *rkisp1)
-{
-	struct rkisp1_isp *isp = &rkisp1->isp;
-	struct media_pad *pads = isp->pads;
-	struct v4l2_subdev *sd = &isp->sd;
-	int ret;
-
-	v4l2_subdev_init(sd, &rkisp1_isp_ops);
-	sd->flags |= V4L2_SUBDEV_FL_HAS_DEVNODE | V4L2_SUBDEV_FL_HAS_EVENTS;
-	sd->entity.ops = &rkisp1_isp_media_ops;
-	sd->entity.function = MEDIA_ENT_F_PROC_VIDEO_PIXEL_FORMATTER;
-	sd->owner = THIS_MODULE;
-	strscpy(sd->name, RKISP1_ISP_DEV_NAME, sizeof(sd->name));
-
-	pads[RKISP1_ISP_PAD_SINK_VIDEO].flags = MEDIA_PAD_FL_SINK |
-						MEDIA_PAD_FL_MUST_CONNECT;
-	pads[RKISP1_ISP_PAD_SINK_PARAMS].flags = MEDIA_PAD_FL_SINK;
-	pads[RKISP1_ISP_PAD_SOURCE_VIDEO].flags = MEDIA_PAD_FL_SOURCE;
-	pads[RKISP1_ISP_PAD_SOURCE_STATS].flags = MEDIA_PAD_FL_SOURCE;
-
-	isp->sink_fmt = rkisp1_isp_mbus_info_get(RKISP1_DEF_SINK_PAD_FMT);
-	isp->src_fmt = rkisp1_isp_mbus_info_get(RKISP1_DEF_SRC_PAD_FMT);
-
-	mutex_init(&isp->ops_lock);
-	ret = media_entity_pads_init(&sd->entity, RKISP1_ISP_PAD_MAX, pads);
-	if (ret)
-		return ret;
-
-	ret = v4l2_device_register_subdev(&rkisp1->v4l2_dev, sd);
-	if (ret) {
-		dev_err(rkisp1->dev, "Failed to register isp subdev\n");
-		goto err_cleanup_media_entity;
-	}
-
-	rkisp1_isp_init_config(sd, rkisp1->isp.pad_cfg);
-	return 0;
-
-err_cleanup_media_entity:
-	media_entity_cleanup(&sd->entity);
-
-	return ret;
-}
-
-void rkisp1_isp_unregister(struct rkisp1_device *rkisp1)
-{
-	struct v4l2_subdev *sd = &rkisp1->isp.sd;
-
-	v4l2_device_unregister_subdev(sd);
-	media_entity_cleanup(&sd->entity);
-}
-
-/* ----------------------------------------------------------------------------
- * Interrupt handlers
- */
-
-void rkisp1_mipi_isr(struct rkisp1_device *rkisp1)
-{
-	u32 val, status;
-
-	status = rkisp1_read(rkisp1, RKISP1_CIF_MIPI_MIS);
-	if (!status)
-		return;
-
-	rkisp1_write(rkisp1, status, RKISP1_CIF_MIPI_ICR);
-
-	/*
-	 * Disable DPHY errctrl interrupt, because this dphy
-	 * erctrl signal is asserted until the next changes
-	 * of line state. This time is may be too long and cpu
-	 * is hold in this interrupt.
-	 */
-	if (status & RKISP1_CIF_MIPI_ERR_CTRL(0x0f)) {
-		val = rkisp1_read(rkisp1, RKISP1_CIF_MIPI_IMSC);
-		rkisp1_write(rkisp1, val & ~RKISP1_CIF_MIPI_ERR_CTRL(0x0f),
-			     RKISP1_CIF_MIPI_IMSC);
-		rkisp1->isp.is_dphy_errctrl_disabled = true;
-	}
-
-	/*
-	 * Enable DPHY errctrl interrupt again, if mipi have receive
-	 * the whole frame without any error.
-	 */
-	if (status == RKISP1_CIF_MIPI_FRAME_END) {
-		/*
-		 * Enable DPHY errctrl interrupt again, if mipi have receive
-		 * the whole frame without any error.
-		 */
-		if (rkisp1->isp.is_dphy_errctrl_disabled) {
-			val = rkisp1_read(rkisp1, RKISP1_CIF_MIPI_IMSC);
-			val |= RKISP1_CIF_MIPI_ERR_CTRL(0x0f);
-			rkisp1_write(rkisp1, val, RKISP1_CIF_MIPI_IMSC);
-			rkisp1->isp.is_dphy_errctrl_disabled = false;
-		}
-	} else {
-		rkisp1->debug.mipi_error++;
-	}
-}
-
-static void rkisp1_isp_queue_event_sof(struct rkisp1_isp *isp)
-{
-	struct v4l2_event event = {
-		.type = V4L2_EVENT_FRAME_SYNC,
-	};
-	event.u.frame_sync.frame_sequence = isp->frame_sequence;
-
-	v4l2_event_queue(isp->sd.devnode, &event);
-}
-
-void rkisp1_isp_isr(struct rkisp1_device *rkisp1)
-{
-	u32 status, isp_err;
-
-	status = rkisp1_read(rkisp1, RKISP1_CIF_ISP_MIS);
-	if (!status)
-		return;
-
-	rkisp1_write(rkisp1, status, RKISP1_CIF_ISP_ICR);
-
-	/* Vertical sync signal, starting generating new frame */
-	if (status & RKISP1_CIF_ISP_V_START) {
-		rkisp1->isp.frame_sequence++;
-		rkisp1_isp_queue_event_sof(&rkisp1->isp);
-		if (status & RKISP1_CIF_ISP_FRAME) {
-			WARN_ONCE(1, "irq delay is too long, buffers might not be in sync\n");
-			rkisp1->debug.irq_delay++;
-		}
-	}
-	if (status & RKISP1_CIF_ISP_PIC_SIZE_ERROR) {
-		/* Clear pic_size_error */
-		isp_err = rkisp1_read(rkisp1, RKISP1_CIF_ISP_ERR);
-		if (isp_err & RKISP1_CIF_ISP_ERR_INFORM_SIZE)
-			rkisp1->debug.inform_size_error++;
-		if (isp_err & RKISP1_CIF_ISP_ERR_IS_SIZE)
-			rkisp1->debug.img_stabilization_size_error++;
-		if (isp_err & RKISP1_CIF_ISP_ERR_OUTFORM_SIZE)
-			rkisp1->debug.outform_size_error++;
-		rkisp1_write(rkisp1, isp_err, RKISP1_CIF_ISP_ERR_CLR);
-	} else if (status & RKISP1_CIF_ISP_DATA_LOSS) {
-		/* keep track of data_loss in debugfs */
-		rkisp1->debug.data_loss++;
-	}
-
-	if (status & RKISP1_CIF_ISP_FRAME) {
-		u32 isp_ris;
-
-		/* New frame from the sensor received */
-		isp_ris = rkisp1_read(rkisp1, RKISP1_CIF_ISP_RIS);
-		if (isp_ris & RKISP1_STATS_MEAS_MASK)
-			rkisp1_stats_isr(&rkisp1->stats, isp_ris);
-		/*
-		 * Then update changed configs. Some of them involve
-		 * lot of register writes. Do those only one per frame.
-		 * Do the updates in the order of the processing flow.
-		 */
-		rkisp1_params_isr(rkisp1);
-	}
-
-}
diff --git a/drivers/staging/media/rkisp1/rkisp1-params.c b/drivers/staging/media/rkisp1/rkisp1-params.c
deleted file mode 100644
index 298c16736b1c..000000000000
--- a/drivers/staging/media/rkisp1/rkisp1-params.c
+++ /dev/null
@@ -1,1572 +0,0 @@
-// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
-/*
- * Rockchip ISP1 Driver - Params subdevice
- *
- * Copyright (C) 2017 Rockchip Electronics Co., Ltd.
- */
-
-#include <media/v4l2-common.h>
-#include <media/v4l2-event.h>
-#include <media/v4l2-ioctl.h>
-#include <media/videobuf2-core.h>
-#include <media/videobuf2-vmalloc.h>	/* for ISP params */
-
-#include "rkisp1-common.h"
-
-#define RKISP1_PARAMS_DEV_NAME	RKISP1_DRIVER_NAME "_params"
-
-#define RKISP1_ISP_PARAMS_REQ_BUFS_MIN	2
-#define RKISP1_ISP_PARAMS_REQ_BUFS_MAX	8
-
-#define RKISP1_ISP_DPCC_LINE_THRESH(n) \
-			(RKISP1_CIF_ISP_DPCC_LINE_THRESH_1 + 0x14 * (n))
-#define RKISP1_ISP_DPCC_LINE_MAD_FAC(n) \
-			(RKISP1_CIF_ISP_DPCC_LINE_MAD_FAC_1 + 0x14 * (n))
-#define RKISP1_ISP_DPCC_PG_FAC(n) \
-			(RKISP1_CIF_ISP_DPCC_PG_FAC_1 + 0x14 * (n))
-#define RKISP1_ISP_DPCC_RND_THRESH(n) \
-			(RKISP1_CIF_ISP_DPCC_RND_THRESH_1 + 0x14 * (n))
-#define RKISP1_ISP_DPCC_RG_FAC(n) \
-			(RKISP1_CIF_ISP_DPCC_RG_FAC_1 + 0x14 * (n))
-#define RKISP1_ISP_CC_COEFF(n) \
-			(RKISP1_CIF_ISP_CC_COEFF_0 + (n) * 4)
-
-static inline void
-rkisp1_param_set_bits(struct rkisp1_params *params, u32 reg, u32 bit_mask)
-{
-	u32 val;
-
-	val = rkisp1_read(params->rkisp1, reg);
-	rkisp1_write(params->rkisp1, val | bit_mask, reg);
-}
-
-static inline void
-rkisp1_param_clear_bits(struct rkisp1_params *params, u32 reg, u32 bit_mask)
-{
-	u32 val;
-
-	val = rkisp1_read(params->rkisp1, reg);
-	rkisp1_write(params->rkisp1, val & ~bit_mask, reg);
-}
-
-/* ISP BP interface function */
-static void rkisp1_dpcc_config(struct rkisp1_params *params,
-			       const struct rkisp1_cif_isp_dpcc_config *arg)
-{
-	unsigned int i;
-	u32 mode;
-
-	/* avoid to override the old enable value */
-	mode = rkisp1_read(params->rkisp1, RKISP1_CIF_ISP_DPCC_MODE);
-	mode &= RKISP1_CIF_ISP_DPCC_ENA;
-	mode |= arg->mode & ~RKISP1_CIF_ISP_DPCC_ENA;
-	rkisp1_write(params->rkisp1, mode, RKISP1_CIF_ISP_DPCC_MODE);
-	rkisp1_write(params->rkisp1, arg->output_mode,
-		     RKISP1_CIF_ISP_DPCC_OUTPUT_MODE);
-	rkisp1_write(params->rkisp1, arg->set_use,
-		     RKISP1_CIF_ISP_DPCC_SET_USE);
-
-	rkisp1_write(params->rkisp1, arg->methods[0].method,
-		     RKISP1_CIF_ISP_DPCC_METHODS_SET_1);
-	rkisp1_write(params->rkisp1, arg->methods[1].method,
-		     RKISP1_CIF_ISP_DPCC_METHODS_SET_2);
-	rkisp1_write(params->rkisp1, arg->methods[2].method,
-		     RKISP1_CIF_ISP_DPCC_METHODS_SET_3);
-	for (i = 0; i < RKISP1_CIF_ISP_DPCC_METHODS_MAX; i++) {
-		rkisp1_write(params->rkisp1, arg->methods[i].line_thresh,
-			     RKISP1_ISP_DPCC_LINE_THRESH(i));
-		rkisp1_write(params->rkisp1, arg->methods[i].line_mad_fac,
-			     RKISP1_ISP_DPCC_LINE_MAD_FAC(i));
-		rkisp1_write(params->rkisp1, arg->methods[i].pg_fac,
-			     RKISP1_ISP_DPCC_PG_FAC(i));
-		rkisp1_write(params->rkisp1, arg->methods[i].rnd_thresh,
-			     RKISP1_ISP_DPCC_RND_THRESH(i));
-		rkisp1_write(params->rkisp1, arg->methods[i].rg_fac,
-			     RKISP1_ISP_DPCC_RG_FAC(i));
-	}
-
-	rkisp1_write(params->rkisp1, arg->rnd_offs,
-		     RKISP1_CIF_ISP_DPCC_RND_OFFS);
-	rkisp1_write(params->rkisp1, arg->ro_limits,
-		     RKISP1_CIF_ISP_DPCC_RO_LIMITS);
-}
-
-/* ISP black level subtraction interface function */
-static void rkisp1_bls_config(struct rkisp1_params *params,
-			      const struct rkisp1_cif_isp_bls_config *arg)
-{
-	/* avoid to override the old enable value */
-	u32 new_control;
-
-	new_control = rkisp1_read(params->rkisp1, RKISP1_CIF_ISP_BLS_CTRL);
-	new_control &= RKISP1_CIF_ISP_BLS_ENA;
-	/* fixed subtraction values */
-	if (!arg->enable_auto) {
-		const struct rkisp1_cif_isp_bls_fixed_val *pval =
-								&arg->fixed_val;
-
-		switch (params->raw_type) {
-		case RKISP1_RAW_BGGR:
-			rkisp1_write(params->rkisp1,
-				     pval->r, RKISP1_CIF_ISP_BLS_D_FIXED);
-			rkisp1_write(params->rkisp1,
-				     pval->gr, RKISP1_CIF_ISP_BLS_C_FIXED);
-			rkisp1_write(params->rkisp1,
-				     pval->gb, RKISP1_CIF_ISP_BLS_B_FIXED);
-			rkisp1_write(params->rkisp1,
-				     pval->b, RKISP1_CIF_ISP_BLS_A_FIXED);
-			break;
-		case RKISP1_RAW_GBRG:
-			rkisp1_write(params->rkisp1,
-				     pval->r, RKISP1_CIF_ISP_BLS_C_FIXED);
-			rkisp1_write(params->rkisp1,
-				     pval->gr, RKISP1_CIF_ISP_BLS_D_FIXED);
-			rkisp1_write(params->rkisp1,
-				     pval->gb, RKISP1_CIF_ISP_BLS_A_FIXED);
-			rkisp1_write(params->rkisp1,
-				     pval->b, RKISP1_CIF_ISP_BLS_B_FIXED);
-			break;
-		case RKISP1_RAW_GRBG:
-			rkisp1_write(params->rkisp1,
-				     pval->r, RKISP1_CIF_ISP_BLS_B_FIXED);
-			rkisp1_write(params->rkisp1,
-				     pval->gr, RKISP1_CIF_ISP_BLS_A_FIXED);
-			rkisp1_write(params->rkisp1,
-				     pval->gb, RKISP1_CIF_ISP_BLS_D_FIXED);
-			rkisp1_write(params->rkisp1,
-				     pval->b, RKISP1_CIF_ISP_BLS_C_FIXED);
-			break;
-		case RKISP1_RAW_RGGB:
-			rkisp1_write(params->rkisp1,
-				     pval->r, RKISP1_CIF_ISP_BLS_A_FIXED);
-			rkisp1_write(params->rkisp1,
-				     pval->gr, RKISP1_CIF_ISP_BLS_B_FIXED);
-			rkisp1_write(params->rkisp1,
-				     pval->gb, RKISP1_CIF_ISP_BLS_C_FIXED);
-			rkisp1_write(params->rkisp1,
-				     pval->b, RKISP1_CIF_ISP_BLS_D_FIXED);
-			break;
-		default:
-			break;
-		}
-
-	} else {
-		if (arg->en_windows & BIT(1)) {
-			rkisp1_write(params->rkisp1, arg->bls_window2.h_offs,
-				     RKISP1_CIF_ISP_BLS_H2_START);
-			rkisp1_write(params->rkisp1, arg->bls_window2.h_size,
-				     RKISP1_CIF_ISP_BLS_H2_STOP);
-			rkisp1_write(params->rkisp1, arg->bls_window2.v_offs,
-				     RKISP1_CIF_ISP_BLS_V2_START);
-			rkisp1_write(params->rkisp1, arg->bls_window2.v_size,
-				     RKISP1_CIF_ISP_BLS_V2_STOP);
-			new_control |= RKISP1_CIF_ISP_BLS_WINDOW_2;
-		}
-
-		if (arg->en_windows & BIT(0)) {
-			rkisp1_write(params->rkisp1, arg->bls_window1.h_offs,
-				     RKISP1_CIF_ISP_BLS_H1_START);
-			rkisp1_write(params->rkisp1, arg->bls_window1.h_size,
-				     RKISP1_CIF_ISP_BLS_H1_STOP);
-			rkisp1_write(params->rkisp1, arg->bls_window1.v_offs,
-				     RKISP1_CIF_ISP_BLS_V1_START);
-			rkisp1_write(params->rkisp1, arg->bls_window1.v_size,
-				     RKISP1_CIF_ISP_BLS_V1_STOP);
-			new_control |= RKISP1_CIF_ISP_BLS_WINDOW_1;
-		}
-
-		rkisp1_write(params->rkisp1, arg->bls_samples,
-			     RKISP1_CIF_ISP_BLS_SAMPLES);
-
-		new_control |= RKISP1_CIF_ISP_BLS_MODE_MEASURED;
-	}
-	rkisp1_write(params->rkisp1, new_control, RKISP1_CIF_ISP_BLS_CTRL);
-}
-
-/* ISP LS correction interface function */
-static void
-rkisp1_lsc_correct_matrix_config(struct rkisp1_params *params,
-				const struct rkisp1_cif_isp_lsc_config *pconfig)
-{
-	unsigned int isp_lsc_status, sram_addr, isp_lsc_table_sel, i, j, data;
-
-	isp_lsc_status = rkisp1_read(params->rkisp1, RKISP1_CIF_ISP_LSC_STATUS);
-
-	/* RKISP1_CIF_ISP_LSC_TABLE_ADDRESS_153 = ( 17 * 18 ) >> 1 */
-	sram_addr = (isp_lsc_status & RKISP1_CIF_ISP_LSC_ACTIVE_TABLE) ?
-		    RKISP1_CIF_ISP_LSC_TABLE_ADDRESS_0 :
-		    RKISP1_CIF_ISP_LSC_TABLE_ADDRESS_153;
-	rkisp1_write(params->rkisp1, sram_addr,
-		     RKISP1_CIF_ISP_LSC_R_TABLE_ADDR);
-	rkisp1_write(params->rkisp1, sram_addr,
-		     RKISP1_CIF_ISP_LSC_GR_TABLE_ADDR);
-	rkisp1_write(params->rkisp1, sram_addr,
-		     RKISP1_CIF_ISP_LSC_GB_TABLE_ADDR);
-	rkisp1_write(params->rkisp1, sram_addr,
-		     RKISP1_CIF_ISP_LSC_B_TABLE_ADDR);
-
-	/* program data tables (table size is 9 * 17 = 153) */
-	for (i = 0; i < RKISP1_CIF_ISP_LSC_SAMPLES_MAX; i++) {
-		/*
-		 * 17 sectors with 2 values in one DWORD = 9
-		 * DWORDs (2nd value of last DWORD unused)
-		 */
-		for (j = 0; j < RKISP1_CIF_ISP_LSC_SAMPLES_MAX - 1; j += 2) {
-			data = RKISP1_CIF_ISP_LSC_TABLE_DATA(pconfig->r_data_tbl[i][j],
-							     pconfig->r_data_tbl[i][j + 1]);
-			rkisp1_write(params->rkisp1, data,
-				     RKISP1_CIF_ISP_LSC_R_TABLE_DATA);
-
-			data = RKISP1_CIF_ISP_LSC_TABLE_DATA(pconfig->gr_data_tbl[i][j],
-							     pconfig->gr_data_tbl[i][j + 1]);
-			rkisp1_write(params->rkisp1, data,
-				     RKISP1_CIF_ISP_LSC_GR_TABLE_DATA);
-
-			data = RKISP1_CIF_ISP_LSC_TABLE_DATA(pconfig->gb_data_tbl[i][j],
-							     pconfig->gb_data_tbl[i][j + 1]);
-			rkisp1_write(params->rkisp1, data,
-				     RKISP1_CIF_ISP_LSC_GB_TABLE_DATA);
-
-			data = RKISP1_CIF_ISP_LSC_TABLE_DATA(pconfig->b_data_tbl[i][j],
-							     pconfig->b_data_tbl[i][j + 1]);
-			rkisp1_write(params->rkisp1, data,
-				     RKISP1_CIF_ISP_LSC_B_TABLE_DATA);
-		}
-		data = RKISP1_CIF_ISP_LSC_TABLE_DATA(pconfig->r_data_tbl[i][j], 0);
-		rkisp1_write(params->rkisp1, data,
-			     RKISP1_CIF_ISP_LSC_R_TABLE_DATA);
-
-		data = RKISP1_CIF_ISP_LSC_TABLE_DATA(pconfig->gr_data_tbl[i][j], 0);
-		rkisp1_write(params->rkisp1, data,
-			     RKISP1_CIF_ISP_LSC_GR_TABLE_DATA);
-
-		data = RKISP1_CIF_ISP_LSC_TABLE_DATA(pconfig->gb_data_tbl[i][j], 0);
-		rkisp1_write(params->rkisp1, data,
-			     RKISP1_CIF_ISP_LSC_GB_TABLE_DATA);
-
-		data = RKISP1_CIF_ISP_LSC_TABLE_DATA(pconfig->b_data_tbl[i][j], 0);
-		rkisp1_write(params->rkisp1, data,
-			     RKISP1_CIF_ISP_LSC_B_TABLE_DATA);
-	}
-	isp_lsc_table_sel = (isp_lsc_status & RKISP1_CIF_ISP_LSC_ACTIVE_TABLE) ?
-			    RKISP1_CIF_ISP_LSC_TABLE_0 :
-			    RKISP1_CIF_ISP_LSC_TABLE_1;
-	rkisp1_write(params->rkisp1, isp_lsc_table_sel,
-		     RKISP1_CIF_ISP_LSC_TABLE_SEL);
-}
-
-static void rkisp1_lsc_config(struct rkisp1_params *params,
-			      const struct rkisp1_cif_isp_lsc_config *arg)
-{
-	unsigned int i, data;
-	u32 lsc_ctrl;
-
-	/* To config must be off , store the current status firstly */
-	lsc_ctrl = rkisp1_read(params->rkisp1, RKISP1_CIF_ISP_LSC_CTRL);
-	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_LSC_CTRL,
-				RKISP1_CIF_ISP_LSC_CTRL_ENA);
-	rkisp1_lsc_correct_matrix_config(params, arg);
-
-	for (i = 0; i < RKISP1_CIF_ISP_LSC_SECTORS_TBL_SIZE / 2; i++) {
-		/* program x size tables */
-		data = RKISP1_CIF_ISP_LSC_SECT_SIZE(arg->x_size_tbl[i * 2],
-						    arg->x_size_tbl[i * 2 + 1]);
-		rkisp1_write(params->rkisp1, data,
-			     RKISP1_CIF_ISP_LSC_XSIZE_01 + i * 4);
-
-		/* program x grad tables */
-		data = RKISP1_CIF_ISP_LSC_SECT_SIZE(arg->x_grad_tbl[i * 2],
-						    arg->x_grad_tbl[i * 2 + 1]);
-		rkisp1_write(params->rkisp1, data,
-			     RKISP1_CIF_ISP_LSC_XGRAD_01 + i * 4);
-
-		/* program y size tables */
-		data = RKISP1_CIF_ISP_LSC_SECT_SIZE(arg->y_size_tbl[i * 2],
-						    arg->y_size_tbl[i * 2 + 1]);
-		rkisp1_write(params->rkisp1, data,
-			     RKISP1_CIF_ISP_LSC_YSIZE_01 + i * 4);
-
-		/* program y grad tables */
-		data = RKISP1_CIF_ISP_LSC_SECT_SIZE(arg->y_grad_tbl[i * 2],
-						    arg->y_grad_tbl[i * 2 + 1]);
-		rkisp1_write(params->rkisp1, data,
-			     RKISP1_CIF_ISP_LSC_YGRAD_01 + i * 4);
-	}
-
-	/* restore the lsc ctrl status */
-	if (lsc_ctrl & RKISP1_CIF_ISP_LSC_CTRL_ENA) {
-		rkisp1_param_set_bits(params,
-				      RKISP1_CIF_ISP_LSC_CTRL,
-				      RKISP1_CIF_ISP_LSC_CTRL_ENA);
-	} else {
-		rkisp1_param_clear_bits(params,
-					RKISP1_CIF_ISP_LSC_CTRL,
-					RKISP1_CIF_ISP_LSC_CTRL_ENA);
-	}
-}
-
-/* ISP Filtering function */
-static void rkisp1_flt_config(struct rkisp1_params *params,
-			      const struct rkisp1_cif_isp_flt_config *arg)
-{
-	u32 filt_mode;
-
-	rkisp1_write(params->rkisp1,
-		     arg->thresh_bl0, RKISP1_CIF_ISP_FILT_THRESH_BL0);
-	rkisp1_write(params->rkisp1,
-		     arg->thresh_bl1, RKISP1_CIF_ISP_FILT_THRESH_BL1);
-	rkisp1_write(params->rkisp1,
-		     arg->thresh_sh0, RKISP1_CIF_ISP_FILT_THRESH_SH0);
-	rkisp1_write(params->rkisp1,
-		     arg->thresh_sh1, RKISP1_CIF_ISP_FILT_THRESH_SH1);
-	rkisp1_write(params->rkisp1, arg->fac_bl0, RKISP1_CIF_ISP_FILT_FAC_BL0);
-	rkisp1_write(params->rkisp1, arg->fac_bl1, RKISP1_CIF_ISP_FILT_FAC_BL1);
-	rkisp1_write(params->rkisp1, arg->fac_mid, RKISP1_CIF_ISP_FILT_FAC_MID);
-	rkisp1_write(params->rkisp1, arg->fac_sh0, RKISP1_CIF_ISP_FILT_FAC_SH0);
-	rkisp1_write(params->rkisp1, arg->fac_sh1, RKISP1_CIF_ISP_FILT_FAC_SH1);
-	rkisp1_write(params->rkisp1,
-		     arg->lum_weight, RKISP1_CIF_ISP_FILT_LUM_WEIGHT);
-
-	rkisp1_write(params->rkisp1,
-		     (arg->mode ? RKISP1_CIF_ISP_FLT_MODE_DNR : 0) |
-		     RKISP1_CIF_ISP_FLT_CHROMA_V_MODE(arg->chr_v_mode) |
-		     RKISP1_CIF_ISP_FLT_CHROMA_H_MODE(arg->chr_h_mode) |
-		     RKISP1_CIF_ISP_FLT_GREEN_STAGE1(arg->grn_stage1),
-		     RKISP1_CIF_ISP_FILT_MODE);
-
-	/* avoid to override the old enable value */
-	filt_mode = rkisp1_read(params->rkisp1, RKISP1_CIF_ISP_FILT_MODE);
-	filt_mode &= RKISP1_CIF_ISP_FLT_ENA;
-	if (arg->mode)
-		filt_mode |= RKISP1_CIF_ISP_FLT_MODE_DNR;
-	filt_mode |= RKISP1_CIF_ISP_FLT_CHROMA_V_MODE(arg->chr_v_mode) |
-		     RKISP1_CIF_ISP_FLT_CHROMA_H_MODE(arg->chr_h_mode) |
-		     RKISP1_CIF_ISP_FLT_GREEN_STAGE1(arg->grn_stage1);
-	rkisp1_write(params->rkisp1, filt_mode, RKISP1_CIF_ISP_FILT_MODE);
-}
-
-/* ISP demosaic interface function */
-static int rkisp1_bdm_config(struct rkisp1_params *params,
-			     const struct rkisp1_cif_isp_bdm_config *arg)
-{
-	u32 bdm_th;
-
-	/* avoid to override the old enable value */
-	bdm_th = rkisp1_read(params->rkisp1, RKISP1_CIF_ISP_DEMOSAIC);
-	bdm_th &= RKISP1_CIF_ISP_DEMOSAIC_BYPASS;
-	bdm_th |= arg->demosaic_th & ~RKISP1_CIF_ISP_DEMOSAIC_BYPASS;
-	/* set demosaic threshold */
-	rkisp1_write(params->rkisp1, bdm_th, RKISP1_CIF_ISP_DEMOSAIC);
-	return 0;
-}
-
-/* ISP GAMMA correction interface function */
-static void rkisp1_sdg_config(struct rkisp1_params *params,
-			      const struct rkisp1_cif_isp_sdg_config *arg)
-{
-	unsigned int i;
-
-	rkisp1_write(params->rkisp1,
-		     arg->xa_pnts.gamma_dx0, RKISP1_CIF_ISP_GAMMA_DX_LO);
-	rkisp1_write(params->rkisp1,
-		     arg->xa_pnts.gamma_dx1, RKISP1_CIF_ISP_GAMMA_DX_HI);
-
-	for (i = 0; i < RKISP1_CIF_ISP_DEGAMMA_CURVE_SIZE; i++) {
-		rkisp1_write(params->rkisp1, arg->curve_r.gamma_y[i],
-			     RKISP1_CIF_ISP_GAMMA_R_Y0 + i * 4);
-		rkisp1_write(params->rkisp1, arg->curve_g.gamma_y[i],
-			     RKISP1_CIF_ISP_GAMMA_G_Y0 + i * 4);
-		rkisp1_write(params->rkisp1, arg->curve_b.gamma_y[i],
-			     RKISP1_CIF_ISP_GAMMA_B_Y0 + i * 4);
-	}
-}
-
-/* ISP GAMMA correction interface function */
-static void rkisp1_goc_config(struct rkisp1_params *params,
-			      const struct rkisp1_cif_isp_goc_config *arg)
-{
-	unsigned int i;
-
-	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_CTRL,
-				RKISP1_CIF_ISP_CTRL_ISP_GAMMA_OUT_ENA);
-	rkisp1_write(params->rkisp1, arg->mode, RKISP1_CIF_ISP_GAMMA_OUT_MODE);
-
-	for (i = 0; i < RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES; i++)
-		rkisp1_write(params->rkisp1, arg->gamma_y[i],
-			     RKISP1_CIF_ISP_GAMMA_OUT_Y_0 + i * 4);
-}
-
-/* ISP Cross Talk */
-static void rkisp1_ctk_config(struct rkisp1_params *params,
-			      const struct rkisp1_cif_isp_ctk_config *arg)
-{
-	unsigned int i, j, k = 0;
-
-	for (i = 0; i < 3; i++)
-		for (j = 0; j < 3; j++)
-			rkisp1_write(params->rkisp1, arg->coeff[i][j],
-				     RKISP1_CIF_ISP_CT_COEFF_0 + 4 * k++);
-	for (i = 0; i < 3; i++)
-		rkisp1_write(params->rkisp1, arg->ct_offset[i],
-			     RKISP1_CIF_ISP_CT_OFFSET_R + i * 4);
-}
-
-static void rkisp1_ctk_enable(struct rkisp1_params *params, bool en)
-{
-	if (en)
-		return;
-
-	/* Write back the default values. */
-	rkisp1_write(params->rkisp1, 0x80, RKISP1_CIF_ISP_CT_COEFF_0);
-	rkisp1_write(params->rkisp1, 0, RKISP1_CIF_ISP_CT_COEFF_1);
-	rkisp1_write(params->rkisp1, 0, RKISP1_CIF_ISP_CT_COEFF_2);
-	rkisp1_write(params->rkisp1, 0, RKISP1_CIF_ISP_CT_COEFF_3);
-	rkisp1_write(params->rkisp1, 0x80, RKISP1_CIF_ISP_CT_COEFF_4);
-	rkisp1_write(params->rkisp1, 0, RKISP1_CIF_ISP_CT_COEFF_5);
-	rkisp1_write(params->rkisp1, 0, RKISP1_CIF_ISP_CT_COEFF_6);
-	rkisp1_write(params->rkisp1, 0, RKISP1_CIF_ISP_CT_COEFF_7);
-	rkisp1_write(params->rkisp1, 0x80, RKISP1_CIF_ISP_CT_COEFF_8);
-
-	rkisp1_write(params->rkisp1, 0, RKISP1_CIF_ISP_CT_OFFSET_R);
-	rkisp1_write(params->rkisp1, 0, RKISP1_CIF_ISP_CT_OFFSET_G);
-	rkisp1_write(params->rkisp1, 0, RKISP1_CIF_ISP_CT_OFFSET_B);
-}
-
-/* ISP White Balance Mode */
-static void rkisp1_awb_meas_config(struct rkisp1_params *params,
-			const struct rkisp1_cif_isp_awb_meas_config *arg)
-{
-	u32 reg_val = 0;
-	/* based on the mode,configure the awb module */
-	if (arg->awb_mode == RKISP1_CIF_ISP_AWB_MODE_YCBCR) {
-		/* Reference Cb and Cr */
-		rkisp1_write(params->rkisp1,
-			     RKISP1_CIF_ISP_AWB_REF_CR_SET(arg->awb_ref_cr) |
-			     arg->awb_ref_cb, RKISP1_CIF_ISP_AWB_REF);
-		/* Yc Threshold */
-		rkisp1_write(params->rkisp1,
-			     RKISP1_CIF_ISP_AWB_MAX_Y_SET(arg->max_y) |
-			     RKISP1_CIF_ISP_AWB_MIN_Y_SET(arg->min_y) |
-			     RKISP1_CIF_ISP_AWB_MAX_CS_SET(arg->max_csum) |
-			     arg->min_c, RKISP1_CIF_ISP_AWB_THRESH);
-	}
-
-	reg_val = rkisp1_read(params->rkisp1, RKISP1_CIF_ISP_AWB_PROP);
-	if (arg->enable_ymax_cmp)
-		reg_val |= RKISP1_CIF_ISP_AWB_YMAX_CMP_EN;
-	else
-		reg_val &= ~RKISP1_CIF_ISP_AWB_YMAX_CMP_EN;
-	rkisp1_write(params->rkisp1, reg_val, RKISP1_CIF_ISP_AWB_PROP);
-
-	/* window offset */
-	rkisp1_write(params->rkisp1,
-		     arg->awb_wnd.v_offs, RKISP1_CIF_ISP_AWB_WND_V_OFFS);
-	rkisp1_write(params->rkisp1,
-		     arg->awb_wnd.h_offs, RKISP1_CIF_ISP_AWB_WND_H_OFFS);
-	/* AWB window size */
-	rkisp1_write(params->rkisp1,
-		     arg->awb_wnd.v_size, RKISP1_CIF_ISP_AWB_WND_V_SIZE);
-	rkisp1_write(params->rkisp1,
-		     arg->awb_wnd.h_size, RKISP1_CIF_ISP_AWB_WND_H_SIZE);
-	/* Number of frames */
-	rkisp1_write(params->rkisp1,
-		     arg->frames, RKISP1_CIF_ISP_AWB_FRAMES);
-}
-
-static void
-rkisp1_awb_meas_enable(struct rkisp1_params *params,
-		       const struct rkisp1_cif_isp_awb_meas_config *arg,
-		       bool en)
-{
-	u32 reg_val = rkisp1_read(params->rkisp1, RKISP1_CIF_ISP_AWB_PROP);
-
-	/* switch off */
-	reg_val &= RKISP1_CIF_ISP_AWB_MODE_MASK_NONE;
-
-	if (en) {
-		if (arg->awb_mode == RKISP1_CIF_ISP_AWB_MODE_RGB)
-			reg_val |= RKISP1_CIF_ISP_AWB_MODE_RGB_EN;
-		else
-			reg_val |= RKISP1_CIF_ISP_AWB_MODE_YCBCR_EN;
-
-		rkisp1_write(params->rkisp1, reg_val, RKISP1_CIF_ISP_AWB_PROP);
-
-		/* Measurements require AWB block be active. */
-		rkisp1_param_set_bits(params, RKISP1_CIF_ISP_CTRL,
-				      RKISP1_CIF_ISP_CTRL_ISP_AWB_ENA);
-	} else {
-		rkisp1_write(params->rkisp1,
-			     reg_val, RKISP1_CIF_ISP_AWB_PROP);
-		rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_CTRL,
-					RKISP1_CIF_ISP_CTRL_ISP_AWB_ENA);
-	}
-}
-
-static void
-rkisp1_awb_gain_config(struct rkisp1_params *params,
-		       const struct rkisp1_cif_isp_awb_gain_config *arg)
-{
-	rkisp1_write(params->rkisp1,
-		     RKISP1_CIF_ISP_AWB_GAIN_R_SET(arg->gain_green_r) |
-		     arg->gain_green_b, RKISP1_CIF_ISP_AWB_GAIN_G);
-
-	rkisp1_write(params->rkisp1,
-		     RKISP1_CIF_ISP_AWB_GAIN_R_SET(arg->gain_red) |
-		     arg->gain_blue, RKISP1_CIF_ISP_AWB_GAIN_RB);
-}
-
-static void rkisp1_aec_config(struct rkisp1_params *params,
-			      const struct rkisp1_cif_isp_aec_config *arg)
-{
-	unsigned int block_hsize, block_vsize;
-	u32 exp_ctrl;
-
-	/* avoid to override the old enable value */
-	exp_ctrl = rkisp1_read(params->rkisp1, RKISP1_CIF_ISP_EXP_CTRL);
-	exp_ctrl &= RKISP1_CIF_ISP_EXP_ENA;
-	if (arg->autostop)
-		exp_ctrl |= RKISP1_CIF_ISP_EXP_CTRL_AUTOSTOP;
-	if (arg->mode == RKISP1_CIF_ISP_EXP_MEASURING_MODE_1)
-		exp_ctrl |= RKISP1_CIF_ISP_EXP_CTRL_MEASMODE_1;
-	rkisp1_write(params->rkisp1, exp_ctrl, RKISP1_CIF_ISP_EXP_CTRL);
-
-	rkisp1_write(params->rkisp1,
-		     arg->meas_window.h_offs, RKISP1_CIF_ISP_EXP_H_OFFSET);
-	rkisp1_write(params->rkisp1,
-		     arg->meas_window.v_offs, RKISP1_CIF_ISP_EXP_V_OFFSET);
-
-	block_hsize = arg->meas_window.h_size /
-		      RKISP1_CIF_ISP_EXP_COLUMN_NUM - 1;
-	block_vsize = arg->meas_window.v_size /
-		      RKISP1_CIF_ISP_EXP_ROW_NUM - 1;
-
-	rkisp1_write(params->rkisp1,
-		     RKISP1_CIF_ISP_EXP_H_SIZE_SET(block_hsize),
-		     RKISP1_CIF_ISP_EXP_H_SIZE);
-	rkisp1_write(params->rkisp1,
-		     RKISP1_CIF_ISP_EXP_V_SIZE_SET(block_vsize),
-		     RKISP1_CIF_ISP_EXP_V_SIZE);
-}
-
-static void rkisp1_cproc_config(struct rkisp1_params *params,
-				const struct rkisp1_cif_isp_cproc_config *arg)
-{
-	struct rkisp1_cif_isp_isp_other_cfg *cur_other_cfg =
-		container_of(arg, struct rkisp1_cif_isp_isp_other_cfg, cproc_config);
-	struct rkisp1_cif_isp_ie_config *cur_ie_config =
-						&cur_other_cfg->ie_config;
-	u32 effect = cur_ie_config->effect;
-	u32 quantization = params->quantization;
-
-	rkisp1_write(params->rkisp1, arg->contrast, RKISP1_CIF_C_PROC_CONTRAST);
-	rkisp1_write(params->rkisp1, arg->hue, RKISP1_CIF_C_PROC_HUE);
-	rkisp1_write(params->rkisp1, arg->sat, RKISP1_CIF_C_PROC_SATURATION);
-	rkisp1_write(params->rkisp1, arg->brightness,
-		     RKISP1_CIF_C_PROC_BRIGHTNESS);
-
-	if (quantization != V4L2_QUANTIZATION_FULL_RANGE ||
-	    effect != V4L2_COLORFX_NONE) {
-		rkisp1_param_clear_bits(params, RKISP1_CIF_C_PROC_CTRL,
-					RKISP1_CIF_C_PROC_YOUT_FULL |
-					RKISP1_CIF_C_PROC_YIN_FULL |
-					RKISP1_CIF_C_PROC_COUT_FULL);
-	} else {
-		rkisp1_param_set_bits(params, RKISP1_CIF_C_PROC_CTRL,
-				      RKISP1_CIF_C_PROC_YOUT_FULL |
-				      RKISP1_CIF_C_PROC_YIN_FULL |
-				      RKISP1_CIF_C_PROC_COUT_FULL);
-	}
-}
-
-static void rkisp1_hst_config(struct rkisp1_params *params,
-			      const struct rkisp1_cif_isp_hst_config *arg)
-{
-	unsigned int block_hsize, block_vsize;
-	static const u32 hist_weight_regs[] = {
-		RKISP1_CIF_ISP_HIST_WEIGHT_00TO30,
-		RKISP1_CIF_ISP_HIST_WEIGHT_40TO21,
-		RKISP1_CIF_ISP_HIST_WEIGHT_31TO12,
-		RKISP1_CIF_ISP_HIST_WEIGHT_22TO03,
-		RKISP1_CIF_ISP_HIST_WEIGHT_13TO43,
-		RKISP1_CIF_ISP_HIST_WEIGHT_04TO34,
-		RKISP1_CIF_ISP_HIST_WEIGHT_44,
-	};
-	const u8 *weight;
-	unsigned int i;
-	u32 hist_prop;
-
-	/* avoid to override the old enable value */
-	hist_prop = rkisp1_read(params->rkisp1, RKISP1_CIF_ISP_HIST_PROP);
-	hist_prop &= RKISP1_CIF_ISP_HIST_PROP_MODE_MASK;
-	hist_prop |= RKISP1_CIF_ISP_HIST_PREDIV_SET(arg->histogram_predivider);
-	rkisp1_write(params->rkisp1, hist_prop, RKISP1_CIF_ISP_HIST_PROP);
-	rkisp1_write(params->rkisp1,
-		     arg->meas_window.h_offs,
-		     RKISP1_CIF_ISP_HIST_H_OFFS);
-	rkisp1_write(params->rkisp1,
-		     arg->meas_window.v_offs,
-		     RKISP1_CIF_ISP_HIST_V_OFFS);
-
-	block_hsize = arg->meas_window.h_size /
-		      RKISP1_CIF_ISP_HIST_COLUMN_NUM - 1;
-	block_vsize = arg->meas_window.v_size / RKISP1_CIF_ISP_HIST_ROW_NUM - 1;
-
-	rkisp1_write(params->rkisp1, block_hsize, RKISP1_CIF_ISP_HIST_H_SIZE);
-	rkisp1_write(params->rkisp1, block_vsize, RKISP1_CIF_ISP_HIST_V_SIZE);
-
-	weight = arg->hist_weight;
-	for (i = 0; i < ARRAY_SIZE(hist_weight_regs); ++i, weight += 4)
-		rkisp1_write(params->rkisp1,
-			     RKISP1_CIF_ISP_HIST_WEIGHT_SET(weight[0],
-							    weight[1],
-							    weight[2],
-							    weight[3]),
-				 hist_weight_regs[i]);
-}
-
-static void
-rkisp1_hst_enable(struct rkisp1_params *params,
-		  const struct rkisp1_cif_isp_hst_config *arg, bool en)
-{
-	if (en)	{
-		u32 hist_prop = rkisp1_read(params->rkisp1,
-					    RKISP1_CIF_ISP_HIST_PROP);
-
-		hist_prop &= ~RKISP1_CIF_ISP_HIST_PROP_MODE_MASK;
-		hist_prop |= arg->mode;
-		rkisp1_param_set_bits(params, RKISP1_CIF_ISP_HIST_PROP,
-				      hist_prop);
-	} else {
-		rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_HIST_PROP,
-					RKISP1_CIF_ISP_HIST_PROP_MODE_MASK);
-	}
-}
-
-static void rkisp1_afm_config(struct rkisp1_params *params,
-			      const struct rkisp1_cif_isp_afc_config *arg)
-{
-	size_t num_of_win = min_t(size_t, ARRAY_SIZE(arg->afm_win),
-				  arg->num_afm_win);
-	u32 afm_ctrl = rkisp1_read(params->rkisp1, RKISP1_CIF_ISP_AFM_CTRL);
-	unsigned int i;
-
-	/* Switch off to configure. */
-	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_AFM_CTRL,
-				RKISP1_CIF_ISP_AFM_ENA);
-
-	for (i = 0; i < num_of_win; i++) {
-		rkisp1_write(params->rkisp1,
-			     RKISP1_CIF_ISP_AFM_WINDOW_X(arg->afm_win[i].h_offs) |
-			     RKISP1_CIF_ISP_AFM_WINDOW_Y(arg->afm_win[i].v_offs),
-			     RKISP1_CIF_ISP_AFM_LT_A + i * 8);
-		rkisp1_write(params->rkisp1,
-			     RKISP1_CIF_ISP_AFM_WINDOW_X(arg->afm_win[i].h_size +
-							 arg->afm_win[i].h_offs) |
-			     RKISP1_CIF_ISP_AFM_WINDOW_Y(arg->afm_win[i].v_size +
-							 arg->afm_win[i].v_offs),
-			     RKISP1_CIF_ISP_AFM_RB_A + i * 8);
-	}
-	rkisp1_write(params->rkisp1, arg->thres, RKISP1_CIF_ISP_AFM_THRES);
-	rkisp1_write(params->rkisp1, arg->var_shift,
-		     RKISP1_CIF_ISP_AFM_VAR_SHIFT);
-	/* restore afm status */
-	rkisp1_write(params->rkisp1, afm_ctrl, RKISP1_CIF_ISP_AFM_CTRL);
-}
-
-static void rkisp1_ie_config(struct rkisp1_params *params,
-			     const struct rkisp1_cif_isp_ie_config *arg)
-{
-	u32 eff_ctrl;
-
-	eff_ctrl = rkisp1_read(params->rkisp1, RKISP1_CIF_IMG_EFF_CTRL);
-	eff_ctrl &= ~RKISP1_CIF_IMG_EFF_CTRL_MODE_MASK;
-
-	if (params->quantization == V4L2_QUANTIZATION_FULL_RANGE)
-		eff_ctrl |= RKISP1_CIF_IMG_EFF_CTRL_YCBCR_FULL;
-
-	switch (arg->effect) {
-	case V4L2_COLORFX_SEPIA:
-		eff_ctrl |= RKISP1_CIF_IMG_EFF_CTRL_MODE_SEPIA;
-		break;
-	case V4L2_COLORFX_SET_CBCR:
-		rkisp1_write(params->rkisp1, arg->eff_tint,
-			     RKISP1_CIF_IMG_EFF_TINT);
-		eff_ctrl |= RKISP1_CIF_IMG_EFF_CTRL_MODE_SEPIA;
-		break;
-		/*
-		 * Color selection is similar to water color(AQUA):
-		 * grayscale + selected color w threshold
-		 */
-	case V4L2_COLORFX_AQUA:
-		eff_ctrl |= RKISP1_CIF_IMG_EFF_CTRL_MODE_COLOR_SEL;
-		rkisp1_write(params->rkisp1, arg->color_sel,
-			     RKISP1_CIF_IMG_EFF_COLOR_SEL);
-		break;
-	case V4L2_COLORFX_EMBOSS:
-		eff_ctrl |= RKISP1_CIF_IMG_EFF_CTRL_MODE_EMBOSS;
-		rkisp1_write(params->rkisp1, arg->eff_mat_1,
-			     RKISP1_CIF_IMG_EFF_MAT_1);
-		rkisp1_write(params->rkisp1, arg->eff_mat_2,
-			     RKISP1_CIF_IMG_EFF_MAT_2);
-		rkisp1_write(params->rkisp1, arg->eff_mat_3,
-			     RKISP1_CIF_IMG_EFF_MAT_3);
-		break;
-	case V4L2_COLORFX_SKETCH:
-		eff_ctrl |= RKISP1_CIF_IMG_EFF_CTRL_MODE_SKETCH;
-		rkisp1_write(params->rkisp1, arg->eff_mat_3,
-			     RKISP1_CIF_IMG_EFF_MAT_3);
-		rkisp1_write(params->rkisp1, arg->eff_mat_4,
-			     RKISP1_CIF_IMG_EFF_MAT_4);
-		rkisp1_write(params->rkisp1, arg->eff_mat_5,
-			     RKISP1_CIF_IMG_EFF_MAT_5);
-		break;
-	case V4L2_COLORFX_BW:
-		eff_ctrl |= RKISP1_CIF_IMG_EFF_CTRL_MODE_BLACKWHITE;
-		break;
-	case V4L2_COLORFX_NEGATIVE:
-		eff_ctrl |= RKISP1_CIF_IMG_EFF_CTRL_MODE_NEGATIVE;
-		break;
-	default:
-		break;
-	}
-
-	rkisp1_write(params->rkisp1, eff_ctrl, RKISP1_CIF_IMG_EFF_CTRL);
-}
-
-static void rkisp1_ie_enable(struct rkisp1_params *params, bool en)
-{
-	if (en) {
-		rkisp1_param_set_bits(params, RKISP1_CIF_ICCL,
-				      RKISP1_CIF_ICCL_IE_CLK);
-		rkisp1_write(params->rkisp1, RKISP1_CIF_IMG_EFF_CTRL_ENABLE,
-			     RKISP1_CIF_IMG_EFF_CTRL);
-		rkisp1_param_set_bits(params, RKISP1_CIF_IMG_EFF_CTRL,
-				      RKISP1_CIF_IMG_EFF_CTRL_CFG_UPD);
-	} else {
-		rkisp1_param_clear_bits(params, RKISP1_CIF_IMG_EFF_CTRL,
-					RKISP1_CIF_IMG_EFF_CTRL_ENABLE);
-		rkisp1_param_clear_bits(params, RKISP1_CIF_ICCL,
-					RKISP1_CIF_ICCL_IE_CLK);
-	}
-}
-
-static void rkisp1_csm_config(struct rkisp1_params *params, bool full_range)
-{
-	static const u16 full_range_coeff[] = {
-		0x0026, 0x004b, 0x000f,
-		0x01ea, 0x01d6, 0x0040,
-		0x0040, 0x01ca, 0x01f6
-	};
-	static const u16 limited_range_coeff[] = {
-		0x0021, 0x0040, 0x000d,
-		0x01ed, 0x01db, 0x0038,
-		0x0038, 0x01d1, 0x01f7,
-	};
-	unsigned int i;
-
-	if (full_range) {
-		for (i = 0; i < ARRAY_SIZE(full_range_coeff); i++)
-			rkisp1_write(params->rkisp1, full_range_coeff[i],
-				     RKISP1_CIF_ISP_CC_COEFF_0 + i * 4);
-
-		rkisp1_param_set_bits(params, RKISP1_CIF_ISP_CTRL,
-				      RKISP1_CIF_ISP_CTRL_ISP_CSM_Y_FULL_ENA |
-				      RKISP1_CIF_ISP_CTRL_ISP_CSM_C_FULL_ENA);
-	} else {
-		for (i = 0; i < ARRAY_SIZE(limited_range_coeff); i++)
-			rkisp1_write(params->rkisp1, limited_range_coeff[i],
-				     RKISP1_CIF_ISP_CC_COEFF_0 + i * 4);
-
-		rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_CTRL,
-					RKISP1_CIF_ISP_CTRL_ISP_CSM_Y_FULL_ENA |
-					RKISP1_CIF_ISP_CTRL_ISP_CSM_C_FULL_ENA);
-	}
-}
-
-/* ISP De-noise Pre-Filter(DPF) function */
-static void rkisp1_dpf_config(struct rkisp1_params *params,
-			      const struct rkisp1_cif_isp_dpf_config *arg)
-{
-	unsigned int isp_dpf_mode, spatial_coeff, i;
-
-	switch (arg->gain.mode) {
-	case RKISP1_CIF_ISP_DPF_GAIN_USAGE_NF_GAINS:
-		isp_dpf_mode = RKISP1_CIF_ISP_DPF_MODE_USE_NF_GAIN |
-			       RKISP1_CIF_ISP_DPF_MODE_AWB_GAIN_COMP;
-		break;
-	case RKISP1_CIF_ISP_DPF_GAIN_USAGE_LSC_GAINS:
-		isp_dpf_mode = RKISP1_CIF_ISP_DPF_MODE_LSC_GAIN_COMP;
-		break;
-	case RKISP1_CIF_ISP_DPF_GAIN_USAGE_NF_LSC_GAINS:
-		isp_dpf_mode = RKISP1_CIF_ISP_DPF_MODE_USE_NF_GAIN |
-			       RKISP1_CIF_ISP_DPF_MODE_AWB_GAIN_COMP |
-			       RKISP1_CIF_ISP_DPF_MODE_LSC_GAIN_COMP;
-		break;
-	case RKISP1_CIF_ISP_DPF_GAIN_USAGE_AWB_GAINS:
-		isp_dpf_mode = RKISP1_CIF_ISP_DPF_MODE_AWB_GAIN_COMP;
-		break;
-	case RKISP1_CIF_ISP_DPF_GAIN_USAGE_AWB_LSC_GAINS:
-		isp_dpf_mode = RKISP1_CIF_ISP_DPF_MODE_LSC_GAIN_COMP |
-			       RKISP1_CIF_ISP_DPF_MODE_AWB_GAIN_COMP;
-		break;
-	case RKISP1_CIF_ISP_DPF_GAIN_USAGE_DISABLED:
-	default:
-		isp_dpf_mode = 0;
-		break;
-	}
-
-	if (arg->nll.scale_mode == RKISP1_CIF_ISP_NLL_SCALE_LOGARITHMIC)
-		isp_dpf_mode |= RKISP1_CIF_ISP_DPF_MODE_NLL_SEGMENTATION;
-	if (arg->rb_flt.fltsize == RKISP1_CIF_ISP_DPF_RB_FILTERSIZE_9x9)
-		isp_dpf_mode |= RKISP1_CIF_ISP_DPF_MODE_RB_FLTSIZE_9x9;
-	if (!arg->rb_flt.r_enable)
-		isp_dpf_mode |= RKISP1_CIF_ISP_DPF_MODE_R_FLT_DIS;
-	if (!arg->rb_flt.b_enable)
-		isp_dpf_mode |= RKISP1_CIF_ISP_DPF_MODE_B_FLT_DIS;
-	if (!arg->g_flt.gb_enable)
-		isp_dpf_mode |= RKISP1_CIF_ISP_DPF_MODE_GB_FLT_DIS;
-	if (!arg->g_flt.gr_enable)
-		isp_dpf_mode |= RKISP1_CIF_ISP_DPF_MODE_GR_FLT_DIS;
-
-	rkisp1_param_set_bits(params, RKISP1_CIF_ISP_DPF_MODE,
-			      isp_dpf_mode);
-	rkisp1_write(params->rkisp1, arg->gain.nf_b_gain,
-		     RKISP1_CIF_ISP_DPF_NF_GAIN_B);
-	rkisp1_write(params->rkisp1, arg->gain.nf_r_gain,
-		     RKISP1_CIF_ISP_DPF_NF_GAIN_R);
-	rkisp1_write(params->rkisp1, arg->gain.nf_gb_gain,
-		     RKISP1_CIF_ISP_DPF_NF_GAIN_GB);
-	rkisp1_write(params->rkisp1, arg->gain.nf_gr_gain,
-		     RKISP1_CIF_ISP_DPF_NF_GAIN_GR);
-
-	for (i = 0; i < RKISP1_CIF_ISP_DPF_MAX_NLF_COEFFS; i++) {
-		rkisp1_write(params->rkisp1, arg->nll.coeff[i],
-			     RKISP1_CIF_ISP_DPF_NULL_COEFF_0 + i * 4);
-	}
-
-	spatial_coeff = arg->g_flt.spatial_coeff[0] |
-			(arg->g_flt.spatial_coeff[1] << 8) |
-			(arg->g_flt.spatial_coeff[2] << 16) |
-			(arg->g_flt.spatial_coeff[3] << 24);
-	rkisp1_write(params->rkisp1, spatial_coeff,
-		     RKISP1_CIF_ISP_DPF_S_WEIGHT_G_1_4);
-
-	spatial_coeff = arg->g_flt.spatial_coeff[4] |
-			(arg->g_flt.spatial_coeff[5] << 8);
-	rkisp1_write(params->rkisp1, spatial_coeff,
-		     RKISP1_CIF_ISP_DPF_S_WEIGHT_G_5_6);
-
-	spatial_coeff = arg->rb_flt.spatial_coeff[0] |
-			(arg->rb_flt.spatial_coeff[1] << 8) |
-			(arg->rb_flt.spatial_coeff[2] << 16) |
-			(arg->rb_flt.spatial_coeff[3] << 24);
-	rkisp1_write(params->rkisp1, spatial_coeff,
-		     RKISP1_CIF_ISP_DPF_S_WEIGHT_RB_1_4);
-
-	spatial_coeff = arg->rb_flt.spatial_coeff[4] |
-			(arg->rb_flt.spatial_coeff[5] << 8);
-	rkisp1_write(params->rkisp1, spatial_coeff,
-		     RKISP1_CIF_ISP_DPF_S_WEIGHT_RB_5_6);
-}
-
-static void
-rkisp1_dpf_strength_config(struct rkisp1_params *params,
-			   const struct rkisp1_cif_isp_dpf_strength_config *arg)
-{
-	rkisp1_write(params->rkisp1, arg->b, RKISP1_CIF_ISP_DPF_STRENGTH_B);
-	rkisp1_write(params->rkisp1, arg->g, RKISP1_CIF_ISP_DPF_STRENGTH_G);
-	rkisp1_write(params->rkisp1, arg->r, RKISP1_CIF_ISP_DPF_STRENGTH_R);
-}
-
-static void
-rkisp1_isp_isr_other_config(struct rkisp1_params *params,
-			    const struct rkisp1_params_cfg *new_params)
-{
-	unsigned int module_en_update, module_cfg_update, module_ens;
-
-	module_en_update = new_params->module_en_update;
-	module_cfg_update = new_params->module_cfg_update;
-	module_ens = new_params->module_ens;
-
-	if ((module_en_update & RKISP1_CIF_ISP_MODULE_DPCC) ||
-	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_DPCC)) {
-		/*update dpc config */
-		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_DPCC)
-			rkisp1_dpcc_config(params,
-					   &new_params->others.dpcc_config);
-
-		if (module_en_update & RKISP1_CIF_ISP_MODULE_DPCC) {
-			if (module_ens & RKISP1_CIF_ISP_MODULE_DPCC)
-				rkisp1_param_set_bits(params,
-						      RKISP1_CIF_ISP_DPCC_MODE,
-						      RKISP1_CIF_ISP_DPCC_ENA);
-			else
-				rkisp1_param_clear_bits(params,
-						RKISP1_CIF_ISP_DPCC_MODE,
-						RKISP1_CIF_ISP_DPCC_ENA);
-		}
-	}
-
-	if ((module_en_update & RKISP1_CIF_ISP_MODULE_BLS) ||
-	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_BLS)) {
-		/* update bls config */
-		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_BLS)
-			rkisp1_bls_config(params,
-					  &new_params->others.bls_config);
-
-		if (module_en_update & RKISP1_CIF_ISP_MODULE_BLS) {
-			if (module_ens & RKISP1_CIF_ISP_MODULE_BLS)
-				rkisp1_param_set_bits(params,
-						      RKISP1_CIF_ISP_BLS_CTRL,
-						      RKISP1_CIF_ISP_BLS_ENA);
-			else
-				rkisp1_param_clear_bits(params,
-							RKISP1_CIF_ISP_BLS_CTRL,
-							RKISP1_CIF_ISP_BLS_ENA);
-		}
-	}
-
-	if ((module_en_update & RKISP1_CIF_ISP_MODULE_SDG) ||
-	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_SDG)) {
-		/* update sdg config */
-		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_SDG)
-			rkisp1_sdg_config(params,
-					  &new_params->others.sdg_config);
-
-		if (module_en_update & RKISP1_CIF_ISP_MODULE_SDG) {
-			if (module_ens & RKISP1_CIF_ISP_MODULE_SDG)
-				rkisp1_param_set_bits(params,
-					RKISP1_CIF_ISP_CTRL,
-					RKISP1_CIF_ISP_CTRL_ISP_GAMMA_IN_ENA);
-			else
-				rkisp1_param_clear_bits(params,
-					RKISP1_CIF_ISP_CTRL,
-					RKISP1_CIF_ISP_CTRL_ISP_GAMMA_IN_ENA);
-		}
-	}
-
-	if ((module_en_update & RKISP1_CIF_ISP_MODULE_LSC) ||
-	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_LSC)) {
-		/* update lsc config */
-		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_LSC)
-			rkisp1_lsc_config(params,
-					  &new_params->others.lsc_config);
-
-		if (module_en_update & RKISP1_CIF_ISP_MODULE_LSC) {
-			if (module_ens & RKISP1_CIF_ISP_MODULE_LSC)
-				rkisp1_param_set_bits(params,
-						RKISP1_CIF_ISP_LSC_CTRL,
-						RKISP1_CIF_ISP_LSC_CTRL_ENA);
-			else
-				rkisp1_param_clear_bits(params,
-						RKISP1_CIF_ISP_LSC_CTRL,
-						RKISP1_CIF_ISP_LSC_CTRL_ENA);
-		}
-	}
-
-	if ((module_en_update & RKISP1_CIF_ISP_MODULE_AWB_GAIN) ||
-	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_AWB_GAIN)) {
-		/* update awb gains */
-		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_AWB_GAIN)
-			rkisp1_awb_gain_config(params,
-					&new_params->others.awb_gain_config);
-
-		if (module_en_update & RKISP1_CIF_ISP_MODULE_AWB_GAIN) {
-			if (module_ens & RKISP1_CIF_ISP_MODULE_AWB_GAIN)
-				rkisp1_param_set_bits(params,
-					RKISP1_CIF_ISP_CTRL,
-					RKISP1_CIF_ISP_CTRL_ISP_AWB_ENA);
-			else
-				rkisp1_param_clear_bits(params,
-					RKISP1_CIF_ISP_CTRL,
-					RKISP1_CIF_ISP_CTRL_ISP_AWB_ENA);
-		}
-	}
-
-	if ((module_en_update & RKISP1_CIF_ISP_MODULE_BDM) ||
-	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_BDM)) {
-		/* update bdm config */
-		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_BDM)
-			rkisp1_bdm_config(params,
-					  &new_params->others.bdm_config);
-
-		if (module_en_update & RKISP1_CIF_ISP_MODULE_BDM) {
-			if (module_ens & RKISP1_CIF_ISP_MODULE_BDM)
-				rkisp1_param_set_bits(params,
-						RKISP1_CIF_ISP_DEMOSAIC,
-						RKISP1_CIF_ISP_DEMOSAIC_BYPASS);
-			else
-				rkisp1_param_clear_bits(params,
-						RKISP1_CIF_ISP_DEMOSAIC,
-						RKISP1_CIF_ISP_DEMOSAIC_BYPASS);
-		}
-	}
-
-	if ((module_en_update & RKISP1_CIF_ISP_MODULE_FLT) ||
-	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_FLT)) {
-		/* update filter config */
-		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_FLT)
-			rkisp1_flt_config(params,
-					  &new_params->others.flt_config);
-
-		if (module_en_update & RKISP1_CIF_ISP_MODULE_FLT) {
-			if (module_ens & RKISP1_CIF_ISP_MODULE_FLT)
-				rkisp1_param_set_bits(params,
-						      RKISP1_CIF_ISP_FILT_MODE,
-						      RKISP1_CIF_ISP_FLT_ENA);
-			else
-				rkisp1_param_clear_bits(params,
-						RKISP1_CIF_ISP_FILT_MODE,
-						RKISP1_CIF_ISP_FLT_ENA);
-		}
-	}
-
-	if ((module_en_update & RKISP1_CIF_ISP_MODULE_CTK) ||
-	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_CTK)) {
-		/* update ctk config */
-		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_CTK)
-			rkisp1_ctk_config(params,
-					  &new_params->others.ctk_config);
-
-		if (module_en_update & RKISP1_CIF_ISP_MODULE_CTK)
-			rkisp1_ctk_enable(params,
-				!!(module_ens & RKISP1_CIF_ISP_MODULE_CTK));
-	}
-
-	if ((module_en_update & RKISP1_CIF_ISP_MODULE_GOC) ||
-	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_GOC)) {
-		/* update goc config */
-		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_GOC)
-			rkisp1_goc_config(params,
-					  &new_params->others.goc_config);
-
-		if (module_en_update & RKISP1_CIF_ISP_MODULE_GOC) {
-			if (module_ens & RKISP1_CIF_ISP_MODULE_GOC)
-				rkisp1_param_set_bits(params,
-					RKISP1_CIF_ISP_CTRL,
-					RKISP1_CIF_ISP_CTRL_ISP_GAMMA_OUT_ENA);
-			else
-				rkisp1_param_clear_bits(params,
-					RKISP1_CIF_ISP_CTRL,
-					RKISP1_CIF_ISP_CTRL_ISP_GAMMA_OUT_ENA);
-		}
-	}
-
-	if ((module_en_update & RKISP1_CIF_ISP_MODULE_CPROC) ||
-	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_CPROC)) {
-		/* update cproc config */
-		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_CPROC) {
-			rkisp1_cproc_config(params,
-					    &new_params->others.cproc_config);
-		}
-
-		if (module_en_update & RKISP1_CIF_ISP_MODULE_CPROC) {
-			if (module_ens & RKISP1_CIF_ISP_MODULE_CPROC)
-				rkisp1_param_set_bits(params,
-						RKISP1_CIF_C_PROC_CTRL,
-						RKISP1_CIF_C_PROC_CTR_ENABLE);
-			else
-				rkisp1_param_clear_bits(params,
-						RKISP1_CIF_C_PROC_CTRL,
-						RKISP1_CIF_C_PROC_CTR_ENABLE);
-		}
-	}
-
-	if ((module_en_update & RKISP1_CIF_ISP_MODULE_IE) ||
-	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_IE)) {
-		/* update ie config */
-		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_IE)
-			rkisp1_ie_config(params,
-					 &new_params->others.ie_config);
-
-		if (module_en_update & RKISP1_CIF_ISP_MODULE_IE)
-			rkisp1_ie_enable(params,
-				!!(module_ens & RKISP1_CIF_ISP_MODULE_IE));
-	}
-
-	if ((module_en_update & RKISP1_CIF_ISP_MODULE_DPF) ||
-	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_DPF)) {
-		/* update dpf  config */
-		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_DPF)
-			rkisp1_dpf_config(params,
-					  &new_params->others.dpf_config);
-
-		if (module_en_update & RKISP1_CIF_ISP_MODULE_DPF) {
-			if (module_ens & RKISP1_CIF_ISP_MODULE_DPF)
-				rkisp1_param_set_bits(params,
-						   RKISP1_CIF_ISP_DPF_MODE,
-						   RKISP1_CIF_ISP_DPF_MODE_EN);
-			else
-				rkisp1_param_clear_bits(params,
-						RKISP1_CIF_ISP_DPF_MODE,
-						RKISP1_CIF_ISP_DPF_MODE_EN);
-		}
-	}
-
-	if ((module_en_update & RKISP1_CIF_ISP_MODULE_DPF_STRENGTH) ||
-	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_DPF_STRENGTH)) {
-		/* update dpf strength config */
-		rkisp1_dpf_strength_config(params,
-				&new_params->others.dpf_strength_config);
-	}
-}
-
-static void rkisp1_isp_isr_meas_config(struct rkisp1_params *params,
-				       struct  rkisp1_params_cfg *new_params)
-{
-	unsigned int module_en_update, module_cfg_update, module_ens;
-
-	module_en_update = new_params->module_en_update;
-	module_cfg_update = new_params->module_cfg_update;
-	module_ens = new_params->module_ens;
-
-	if ((module_en_update & RKISP1_CIF_ISP_MODULE_AWB) ||
-	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_AWB)) {
-		/* update awb config */
-		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_AWB)
-			rkisp1_awb_meas_config(params,
-					&new_params->meas.awb_meas_config);
-
-		if (module_en_update & RKISP1_CIF_ISP_MODULE_AWB)
-			rkisp1_awb_meas_enable(params,
-				&new_params->meas.awb_meas_config,
-				!!(module_ens & RKISP1_CIF_ISP_MODULE_AWB));
-	}
-
-	if ((module_en_update & RKISP1_CIF_ISP_MODULE_AFC) ||
-	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_AFC)) {
-		/* update afc config */
-		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_AFC)
-			rkisp1_afm_config(params,
-					  &new_params->meas.afc_config);
-
-		if (module_en_update & RKISP1_CIF_ISP_MODULE_AFC) {
-			if (module_ens & RKISP1_CIF_ISP_MODULE_AFC)
-				rkisp1_param_set_bits(params,
-						      RKISP1_CIF_ISP_AFM_CTRL,
-						      RKISP1_CIF_ISP_AFM_ENA);
-			else
-				rkisp1_param_clear_bits(params,
-							RKISP1_CIF_ISP_AFM_CTRL,
-							RKISP1_CIF_ISP_AFM_ENA);
-		}
-	}
-
-	if ((module_en_update & RKISP1_CIF_ISP_MODULE_HST) ||
-	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_HST)) {
-		/* update hst config */
-		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_HST)
-			rkisp1_hst_config(params,
-					  &new_params->meas.hst_config);
-
-		if (module_en_update & RKISP1_CIF_ISP_MODULE_HST)
-			rkisp1_hst_enable(params,
-				&new_params->meas.hst_config,
-				!!(module_ens & RKISP1_CIF_ISP_MODULE_HST));
-	}
-
-	if ((module_en_update & RKISP1_CIF_ISP_MODULE_AEC) ||
-	    (module_cfg_update & RKISP1_CIF_ISP_MODULE_AEC)) {
-		/* update aec config */
-		if (module_cfg_update & RKISP1_CIF_ISP_MODULE_AEC)
-			rkisp1_aec_config(params,
-					  &new_params->meas.aec_config);
-
-		if (module_en_update & RKISP1_CIF_ISP_MODULE_AEC) {
-			if (module_ens & RKISP1_CIF_ISP_MODULE_AEC)
-				rkisp1_param_set_bits(params,
-						      RKISP1_CIF_ISP_EXP_CTRL,
-						      RKISP1_CIF_ISP_EXP_ENA);
-			else
-				rkisp1_param_clear_bits(params,
-							RKISP1_CIF_ISP_EXP_CTRL,
-							RKISP1_CIF_ISP_EXP_ENA);
-		}
-	}
-}
-
-static void rkisp1_params_apply_params_cfg(struct rkisp1_params *params,
-					   unsigned int frame_sequence)
-{
-	struct rkisp1_params_cfg *new_params;
-	struct rkisp1_buffer *cur_buf = NULL;
-
-	if (list_empty(&params->params))
-		return;
-
-	cur_buf = list_first_entry(&params->params,
-				   struct rkisp1_buffer, queue);
-
-	new_params = (struct rkisp1_params_cfg *)(cur_buf->vaddr);
-
-	rkisp1_isp_isr_other_config(params, new_params);
-	rkisp1_isp_isr_meas_config(params, new_params);
-
-	/* update shadow register immediately */
-	rkisp1_param_set_bits(params, RKISP1_CIF_ISP_CTRL, RKISP1_CIF_ISP_CTRL_ISP_CFG_UPD);
-
-	list_del(&cur_buf->queue);
-
-	cur_buf->vb.sequence = frame_sequence;
-	vb2_buffer_done(&cur_buf->vb.vb2_buf, VB2_BUF_STATE_DONE);
-}
-
-void rkisp1_params_isr(struct rkisp1_device *rkisp1)
-{
-	/*
-	 * This isr is called when the ISR finishes processing a frame (RKISP1_CIF_ISP_FRAME).
-	 * Configurations performed here will be applied on the next frame.
-	 * Since frame_sequence is updated on the vertical sync signal, we should use
-	 * frame_sequence + 1 here to indicate to userspace on which frame these parameters
-	 * are being applied.
-	 */
-	unsigned int frame_sequence = rkisp1->isp.frame_sequence + 1;
-	struct rkisp1_params *params = &rkisp1->params;
-
-	spin_lock(&params->config_lock);
-	rkisp1_params_apply_params_cfg(params, frame_sequence);
-
-	spin_unlock(&params->config_lock);
-}
-
-static const struct rkisp1_cif_isp_awb_meas_config rkisp1_awb_params_default_config = {
-	{
-		0, 0, RKISP1_DEFAULT_WIDTH, RKISP1_DEFAULT_HEIGHT
-	},
-	RKISP1_CIF_ISP_AWB_MODE_YCBCR, 200, 30, 20, 20, 0, 128, 128
-};
-
-static const struct rkisp1_cif_isp_aec_config rkisp1_aec_params_default_config = {
-	RKISP1_CIF_ISP_EXP_MEASURING_MODE_0,
-	RKISP1_CIF_ISP_EXP_CTRL_AUTOSTOP_0,
-	{
-		RKISP1_DEFAULT_WIDTH >> 2, RKISP1_DEFAULT_HEIGHT >> 2,
-		RKISP1_DEFAULT_WIDTH >> 1, RKISP1_DEFAULT_HEIGHT >> 1
-	}
-};
-
-static const struct rkisp1_cif_isp_hst_config rkisp1_hst_params_default_config = {
-	RKISP1_CIF_ISP_HISTOGRAM_MODE_RGB_COMBINED,
-	3,
-	{
-		RKISP1_DEFAULT_WIDTH >> 2, RKISP1_DEFAULT_HEIGHT >> 2,
-		RKISP1_DEFAULT_WIDTH >> 1, RKISP1_DEFAULT_HEIGHT >> 1
-	},
-	{
-		0, /* To be filled in with 0x01 at runtime. */
-	}
-};
-
-static const struct rkisp1_cif_isp_afc_config rkisp1_afc_params_default_config = {
-	1,
-	{
-		{
-			300, 225, 200, 150
-		}
-	},
-	4,
-	14
-};
-
-static void rkisp1_params_config_parameter(struct rkisp1_params *params)
-{
-	struct rkisp1_cif_isp_hst_config hst = rkisp1_hst_params_default_config;
-
-	rkisp1_awb_meas_config(params, &rkisp1_awb_params_default_config);
-	rkisp1_awb_meas_enable(params, &rkisp1_awb_params_default_config,
-			       true);
-
-	rkisp1_aec_config(params, &rkisp1_aec_params_default_config);
-	rkisp1_param_set_bits(params, RKISP1_CIF_ISP_EXP_CTRL,
-			      RKISP1_CIF_ISP_EXP_ENA);
-
-	rkisp1_afm_config(params, &rkisp1_afc_params_default_config);
-	rkisp1_param_set_bits(params, RKISP1_CIF_ISP_AFM_CTRL,
-			      RKISP1_CIF_ISP_AFM_ENA);
-
-	memset(hst.hist_weight, 0x01, sizeof(hst.hist_weight));
-	rkisp1_hst_config(params, &hst);
-	rkisp1_param_set_bits(params, RKISP1_CIF_ISP_HIST_PROP,
-			      ~RKISP1_CIF_ISP_HIST_PROP_MODE_MASK |
-			      rkisp1_hst_params_default_config.mode);
-
-	/* set the  range */
-	if (params->quantization == V4L2_QUANTIZATION_FULL_RANGE)
-		rkisp1_csm_config(params, true);
-	else
-		rkisp1_csm_config(params, false);
-
-	spin_lock_irq(&params->config_lock);
-
-	/* apply the first buffer if there is one already */
-	rkisp1_params_apply_params_cfg(params, 0);
-
-	spin_unlock_irq(&params->config_lock);
-}
-
-void rkisp1_params_configure(struct rkisp1_params *params,
-			     enum rkisp1_fmt_raw_pat_type bayer_pat,
-			     enum v4l2_quantization quantization)
-{
-	params->quantization = quantization;
-	params->raw_type = bayer_pat;
-	rkisp1_params_config_parameter(params);
-}
-
-/* Not called when the camera active, thus not isr protection. */
-void rkisp1_params_disable(struct rkisp1_params *params)
-{
-	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_DPCC_MODE,
-				RKISP1_CIF_ISP_DPCC_ENA);
-	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_LSC_CTRL,
-				RKISP1_CIF_ISP_LSC_CTRL_ENA);
-	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_BLS_CTRL,
-				RKISP1_CIF_ISP_BLS_ENA);
-	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_CTRL,
-				RKISP1_CIF_ISP_CTRL_ISP_GAMMA_IN_ENA);
-	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_CTRL,
-				RKISP1_CIF_ISP_CTRL_ISP_GAMMA_OUT_ENA);
-	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_DEMOSAIC,
-				RKISP1_CIF_ISP_DEMOSAIC_BYPASS);
-	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_FILT_MODE,
-				RKISP1_CIF_ISP_FLT_ENA);
-	rkisp1_awb_meas_enable(params, NULL, false);
-	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_CTRL,
-				RKISP1_CIF_ISP_CTRL_ISP_AWB_ENA);
-	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_EXP_CTRL,
-				RKISP1_CIF_ISP_EXP_ENA);
-	rkisp1_ctk_enable(params, false);
-	rkisp1_param_clear_bits(params, RKISP1_CIF_C_PROC_CTRL,
-				RKISP1_CIF_C_PROC_CTR_ENABLE);
-	rkisp1_hst_enable(params, NULL, false);
-	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_AFM_CTRL,
-				RKISP1_CIF_ISP_AFM_ENA);
-	rkisp1_ie_enable(params, false);
-	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_DPF_MODE,
-				RKISP1_CIF_ISP_DPF_MODE_EN);
-}
-
-static int rkisp1_params_enum_fmt_meta_out(struct file *file, void *priv,
-					   struct v4l2_fmtdesc *f)
-{
-	struct video_device *video = video_devdata(file);
-	struct rkisp1_params *params = video_get_drvdata(video);
-
-	if (f->index > 0 || f->type != video->queue->type)
-		return -EINVAL;
-
-	f->pixelformat = params->vdev_fmt.fmt.meta.dataformat;
-
-	return 0;
-}
-
-static int rkisp1_params_g_fmt_meta_out(struct file *file, void *fh,
-					struct v4l2_format *f)
-{
-	struct video_device *video = video_devdata(file);
-	struct rkisp1_params *params = video_get_drvdata(video);
-	struct v4l2_meta_format *meta = &f->fmt.meta;
-
-	if (f->type != video->queue->type)
-		return -EINVAL;
-
-	memset(meta, 0, sizeof(*meta));
-	meta->dataformat = params->vdev_fmt.fmt.meta.dataformat;
-	meta->buffersize = params->vdev_fmt.fmt.meta.buffersize;
-
-	return 0;
-}
-
-static int rkisp1_params_querycap(struct file *file,
-				  void *priv, struct v4l2_capability *cap)
-{
-	struct video_device *vdev = video_devdata(file);
-
-	strscpy(cap->driver, RKISP1_DRIVER_NAME, sizeof(cap->driver));
-	strscpy(cap->card, vdev->name, sizeof(cap->card));
-	strscpy(cap->bus_info, RKISP1_BUS_INFO, sizeof(cap->bus_info));
-
-	return 0;
-}
-
-/* ISP params video device IOCTLs */
-static const struct v4l2_ioctl_ops rkisp1_params_ioctl = {
-	.vidioc_reqbufs = vb2_ioctl_reqbufs,
-	.vidioc_querybuf = vb2_ioctl_querybuf,
-	.vidioc_create_bufs = vb2_ioctl_create_bufs,
-	.vidioc_qbuf = vb2_ioctl_qbuf,
-	.vidioc_dqbuf = vb2_ioctl_dqbuf,
-	.vidioc_prepare_buf = vb2_ioctl_prepare_buf,
-	.vidioc_expbuf = vb2_ioctl_expbuf,
-	.vidioc_streamon = vb2_ioctl_streamon,
-	.vidioc_streamoff = vb2_ioctl_streamoff,
-	.vidioc_enum_fmt_meta_out = rkisp1_params_enum_fmt_meta_out,
-	.vidioc_g_fmt_meta_out = rkisp1_params_g_fmt_meta_out,
-	.vidioc_s_fmt_meta_out = rkisp1_params_g_fmt_meta_out,
-	.vidioc_try_fmt_meta_out = rkisp1_params_g_fmt_meta_out,
-	.vidioc_querycap = rkisp1_params_querycap,
-	.vidioc_subscribe_event = v4l2_ctrl_subscribe_event,
-	.vidioc_unsubscribe_event = v4l2_event_unsubscribe,
-};
-
-static int rkisp1_params_vb2_queue_setup(struct vb2_queue *vq,
-					 unsigned int *num_buffers,
-					 unsigned int *num_planes,
-					 unsigned int sizes[],
-					 struct device *alloc_devs[])
-{
-	*num_buffers = clamp_t(u32, *num_buffers,
-			       RKISP1_ISP_PARAMS_REQ_BUFS_MIN,
-			       RKISP1_ISP_PARAMS_REQ_BUFS_MAX);
-
-	*num_planes = 1;
-
-	sizes[0] = sizeof(struct rkisp1_params_cfg);
-
-	return 0;
-}
-
-static void rkisp1_params_vb2_buf_queue(struct vb2_buffer *vb)
-{
-	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
-	struct rkisp1_buffer *params_buf =
-		container_of(vbuf, struct rkisp1_buffer, vb);
-	struct vb2_queue *vq = vb->vb2_queue;
-	struct rkisp1_params *params = vq->drv_priv;
-
-	params_buf->vaddr = vb2_plane_vaddr(vb, 0);
-	spin_lock_irq(&params->config_lock);
-	list_add_tail(&params_buf->queue, &params->params);
-	spin_unlock_irq(&params->config_lock);
-}
-
-static int rkisp1_params_vb2_buf_prepare(struct vb2_buffer *vb)
-{
-	if (vb2_plane_size(vb, 0) < sizeof(struct rkisp1_params_cfg))
-		return -EINVAL;
-
-	vb2_set_plane_payload(vb, 0, sizeof(struct rkisp1_params_cfg));
-
-	return 0;
-}
-
-static void rkisp1_params_vb2_stop_streaming(struct vb2_queue *vq)
-{
-	struct rkisp1_params *params = vq->drv_priv;
-	struct rkisp1_buffer *buf;
-	LIST_HEAD(tmp_list);
-
-	/*
-	 * we first move the buffers into a local list 'tmp_list'
-	 * and then we can iterate it and call vb2_buffer_done
-	 * without holding the lock
-	 */
-	spin_lock_irq(&params->config_lock);
-	list_splice_init(&params->params, &tmp_list);
-	spin_unlock_irq(&params->config_lock);
-
-	list_for_each_entry(buf, &tmp_list, queue)
-		vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_ERROR);
-}
-
-static struct vb2_ops rkisp1_params_vb2_ops = {
-	.queue_setup = rkisp1_params_vb2_queue_setup,
-	.wait_prepare = vb2_ops_wait_prepare,
-	.wait_finish = vb2_ops_wait_finish,
-	.buf_queue = rkisp1_params_vb2_buf_queue,
-	.buf_prepare = rkisp1_params_vb2_buf_prepare,
-	.stop_streaming = rkisp1_params_vb2_stop_streaming,
-
-};
-
-static struct v4l2_file_operations rkisp1_params_fops = {
-	.mmap = vb2_fop_mmap,
-	.unlocked_ioctl = video_ioctl2,
-	.poll = vb2_fop_poll,
-	.open = v4l2_fh_open,
-	.release = vb2_fop_release
-};
-
-static int rkisp1_params_init_vb2_queue(struct vb2_queue *q,
-					struct rkisp1_params *params)
-{
-	struct rkisp1_vdev_node *node;
-
-	node = container_of(q, struct rkisp1_vdev_node, buf_queue);
-
-	q->type = V4L2_BUF_TYPE_META_OUTPUT;
-	q->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF;
-	q->drv_priv = params;
-	q->ops = &rkisp1_params_vb2_ops;
-	q->mem_ops = &vb2_vmalloc_memops;
-	q->buf_struct_size = sizeof(struct rkisp1_buffer);
-	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
-	q->lock = &node->vlock;
-
-	return vb2_queue_init(q);
-}
-
-static void rkisp1_init_params(struct rkisp1_params *params)
-{
-	params->vdev_fmt.fmt.meta.dataformat =
-		V4L2_META_FMT_RK_ISP1_PARAMS;
-	params->vdev_fmt.fmt.meta.buffersize =
-		sizeof(struct rkisp1_params_cfg);
-}
-
-int rkisp1_params_register(struct rkisp1_device *rkisp1)
-{
-	struct rkisp1_params *params = &rkisp1->params;
-	struct rkisp1_vdev_node *node = &params->vnode;
-	struct video_device *vdev = &node->vdev;
-	int ret;
-
-	params->rkisp1 = rkisp1;
-	mutex_init(&node->vlock);
-	INIT_LIST_HEAD(&params->params);
-	spin_lock_init(&params->config_lock);
-
-	strscpy(vdev->name, RKISP1_PARAMS_DEV_NAME, sizeof(vdev->name));
-
-	video_set_drvdata(vdev, params);
-	vdev->ioctl_ops = &rkisp1_params_ioctl;
-	vdev->fops = &rkisp1_params_fops;
-	vdev->release = video_device_release_empty;
-	/*
-	 * Provide a mutex to v4l2 core. It will be used
-	 * to protect all fops and v4l2 ioctls.
-	 */
-	vdev->lock = &node->vlock;
-	vdev->v4l2_dev = &rkisp1->v4l2_dev;
-	vdev->queue = &node->buf_queue;
-	vdev->device_caps = V4L2_CAP_STREAMING | V4L2_CAP_META_OUTPUT;
-	vdev->vfl_dir = VFL_DIR_TX;
-	rkisp1_params_init_vb2_queue(vdev->queue, params);
-	rkisp1_init_params(params);
-	video_set_drvdata(vdev, params);
-
-	node->pad.flags = MEDIA_PAD_FL_SOURCE;
-	ret = media_entity_pads_init(&vdev->entity, 1, &node->pad);
-	if (ret)
-		return ret;
-	ret = video_register_device(vdev, VFL_TYPE_VIDEO, -1);
-	if (ret) {
-		dev_err(rkisp1->dev,
-			"failed to register %s, ret=%d\n", vdev->name, ret);
-		goto err_cleanup_media_entity;
-	}
-	return 0;
-err_cleanup_media_entity:
-	media_entity_cleanup(&vdev->entity);
-	return ret;
-}
-
-void rkisp1_params_unregister(struct rkisp1_device *rkisp1)
-{
-	struct rkisp1_params *params = &rkisp1->params;
-	struct rkisp1_vdev_node *node = &params->vnode;
-	struct video_device *vdev = &node->vdev;
-
-	vb2_video_unregister_device(vdev);
-	media_entity_cleanup(&vdev->entity);
-}
diff --git a/drivers/staging/media/rkisp1/rkisp1-regs.h b/drivers/staging/media/rkisp1/rkisp1-regs.h
deleted file mode 100644
index 049f6c3a11df..000000000000
--- a/drivers/staging/media/rkisp1/rkisp1-regs.h
+++ /dev/null
@@ -1,1262 +0,0 @@
-/* SPDX-License-Identifier: (GPL-2.0+ OR MIT) */
-/*
- * Rockchip ISP1 Driver - Registers header
- *
- * Copyright (C) 2017 Rockchip Electronics Co., Ltd.
- */
-
-#ifndef _RKISP1_REGS_H
-#define _RKISP1_REGS_H
-
-/* ISP_CTRL */
-#define RKISP1_CIF_ISP_CTRL_ISP_ENABLE			BIT(0)
-#define RKISP1_CIF_ISP_CTRL_ISP_MODE_RAW_PICT		(0 << 1)
-#define RKISP1_CIF_ISP_CTRL_ISP_MODE_ITU656		BIT(1)
-#define RKISP1_CIF_ISP_CTRL_ISP_MODE_ITU601		(2 << 1)
-#define RKISP1_CIF_ISP_CTRL_ISP_MODE_BAYER_ITU601	(3 << 1)
-#define RKISP1_CIF_ISP_CTRL_ISP_MODE_DATA_MODE		(4 << 1)
-#define RKISP1_CIF_ISP_CTRL_ISP_MODE_BAYER_ITU656	(5 << 1)
-#define RKISP1_CIF_ISP_CTRL_ISP_MODE_RAW_PICT_ITU656	(6 << 1)
-#define RKISP1_CIF_ISP_CTRL_ISP_INFORM_ENABLE		BIT(4)
-#define RKISP1_CIF_ISP_CTRL_ISP_GAMMA_IN_ENA		BIT(6)
-#define RKISP1_CIF_ISP_CTRL_ISP_AWB_ENA			BIT(7)
-#define RKISP1_CIF_ISP_CTRL_ISP_CFG_UPD_PERMANENT	BIT(8)
-#define RKISP1_CIF_ISP_CTRL_ISP_CFG_UPD			BIT(9)
-#define RKISP1_CIF_ISP_CTRL_ISP_GEN_CFG_UPD		BIT(10)
-#define RKISP1_CIF_ISP_CTRL_ISP_GAMMA_OUT_ENA		BIT(11)
-#define RKISP1_CIF_ISP_CTRL_ISP_FLASH_MODE_ENA		BIT(12)
-#define RKISP1_CIF_ISP_CTRL_ISP_CSM_Y_FULL_ENA		BIT(13)
-#define RKISP1_CIF_ISP_CTRL_ISP_CSM_C_FULL_ENA		BIT(14)
-
-/* ISP_ACQ_PROP */
-#define RKISP1_CIF_ISP_ACQ_PROP_POS_EDGE		BIT(0)
-#define RKISP1_CIF_ISP_ACQ_PROP_HSYNC_LOW		BIT(1)
-#define RKISP1_CIF_ISP_ACQ_PROP_VSYNC_LOW		BIT(2)
-#define RKISP1_CIF_ISP_ACQ_PROP_BAYER_PAT_RGGB		(0 << 3)
-#define RKISP1_CIF_ISP_ACQ_PROP_BAYER_PAT_GRBG		BIT(3)
-#define RKISP1_CIF_ISP_ACQ_PROP_BAYER_PAT_GBRG		(2 << 3)
-#define RKISP1_CIF_ISP_ACQ_PROP_BAYER_PAT_BGGR		(3 << 3)
-#define RKISP1_CIF_ISP_ACQ_PROP_BAYER_PAT(pat)		((pat) << 3)
-#define RKISP1_CIF_ISP_ACQ_PROP_YCBYCR			(0 << 7)
-#define RKISP1_CIF_ISP_ACQ_PROP_YCRYCB			BIT(7)
-#define RKISP1_CIF_ISP_ACQ_PROP_CBYCRY			(2 << 7)
-#define RKISP1_CIF_ISP_ACQ_PROP_CRYCBY			(3 << 7)
-#define RKISP1_CIF_ISP_ACQ_PROP_FIELD_SEL_ALL		(0 << 9)
-#define RKISP1_CIF_ISP_ACQ_PROP_FIELD_SEL_EVEN		BIT(9)
-#define RKISP1_CIF_ISP_ACQ_PROP_FIELD_SEL_ODD		(2 << 9)
-#define RKISP1_CIF_ISP_ACQ_PROP_IN_SEL_12B		(0 << 12)
-#define RKISP1_CIF_ISP_ACQ_PROP_IN_SEL_10B_ZERO		BIT(12)
-#define RKISP1_CIF_ISP_ACQ_PROP_IN_SEL_10B_MSB		(2 << 12)
-#define RKISP1_CIF_ISP_ACQ_PROP_IN_SEL_8B_ZERO		(3 << 12)
-#define RKISP1_CIF_ISP_ACQ_PROP_IN_SEL_8B_MSB		(4 << 12)
-
-/* VI_DPCL */
-#define RKISP1_CIF_VI_DPCL_DMA_JPEG			(0 << 0)
-#define RKISP1_CIF_VI_DPCL_MP_MUX_MRSZ_MI		BIT(0)
-#define RKISP1_CIF_VI_DPCL_MP_MUX_MRSZ_JPEG		(2 << 0)
-#define RKISP1_CIF_VI_DPCL_CHAN_MODE_MP			BIT(2)
-#define RKISP1_CIF_VI_DPCL_CHAN_MODE_SP			(2 << 2)
-#define RKISP1_CIF_VI_DPCL_CHAN_MODE_MPSP		(3 << 2)
-#define RKISP1_CIF_VI_DPCL_DMA_SW_SPMUX			(0 << 4)
-#define RKISP1_CIF_VI_DPCL_DMA_SW_SI			BIT(4)
-#define RKISP1_CIF_VI_DPCL_DMA_SW_IE			(2 << 4)
-#define RKISP1_CIF_VI_DPCL_DMA_SW_JPEG			(3 << 4)
-#define RKISP1_CIF_VI_DPCL_DMA_SW_ISP			(4 << 4)
-#define RKISP1_CIF_VI_DPCL_IF_SEL_PARALLEL		(0 << 8)
-#define RKISP1_CIF_VI_DPCL_IF_SEL_SMIA			BIT(8)
-#define RKISP1_CIF_VI_DPCL_IF_SEL_MIPI			(2 << 8)
-#define RKISP1_CIF_VI_DPCL_DMA_IE_MUX_DMA		BIT(10)
-#define RKISP1_CIF_VI_DPCL_DMA_SP_MUX_DMA		BIT(11)
-
-/* ISP_IMSC - ISP_MIS - ISP_RIS - ISP_ICR - ISP_ISR */
-#define RKISP1_CIF_ISP_OFF				BIT(0)
-#define RKISP1_CIF_ISP_FRAME				BIT(1)
-#define RKISP1_CIF_ISP_DATA_LOSS			BIT(2)
-#define RKISP1_CIF_ISP_PIC_SIZE_ERROR			BIT(3)
-#define RKISP1_CIF_ISP_AWB_DONE				BIT(4)
-#define RKISP1_CIF_ISP_FRAME_IN				BIT(5)
-#define RKISP1_CIF_ISP_V_START				BIT(6)
-#define RKISP1_CIF_ISP_H_START				BIT(7)
-#define RKISP1_CIF_ISP_FLASH_ON				BIT(8)
-#define RKISP1_CIF_ISP_FLASH_OFF			BIT(9)
-#define RKISP1_CIF_ISP_SHUTTER_ON			BIT(10)
-#define RKISP1_CIF_ISP_SHUTTER_OFF			BIT(11)
-#define RKISP1_CIF_ISP_AFM_SUM_OF			BIT(12)
-#define RKISP1_CIF_ISP_AFM_LUM_OF			BIT(13)
-#define RKISP1_CIF_ISP_AFM_FIN				BIT(14)
-#define RKISP1_CIF_ISP_HIST_MEASURE_RDY			BIT(15)
-#define RKISP1_CIF_ISP_FLASH_CAP			BIT(17)
-#define RKISP1_CIF_ISP_EXP_END				BIT(18)
-#define RKISP1_CIF_ISP_VSM_END				BIT(19)
-
-/* ISP_ERR */
-#define RKISP1_CIF_ISP_ERR_INFORM_SIZE			BIT(0)
-#define RKISP1_CIF_ISP_ERR_IS_SIZE			BIT(1)
-#define RKISP1_CIF_ISP_ERR_OUTFORM_SIZE			BIT(2)
-
-/* MI_CTRL */
-#define RKISP1_CIF_MI_CTRL_MP_ENABLE			BIT(0)
-#define RKISP1_CIF_MI_CTRL_SP_ENABLE			(2 << 0)
-#define RKISP1_CIF_MI_CTRL_JPEG_ENABLE			(4 << 0)
-#define RKISP1_CIF_MI_CTRL_RAW_ENABLE			(8 << 0)
-#define RKISP1_CIF_MI_CTRL_HFLIP			BIT(4)
-#define RKISP1_CIF_MI_CTRL_VFLIP			BIT(5)
-#define RKISP1_CIF_MI_CTRL_ROT				BIT(6)
-#define RKISP1_CIF_MI_BYTE_SWAP				BIT(7)
-#define RKISP1_CIF_MI_SP_Y_FULL_YUV2RGB			BIT(8)
-#define RKISP1_CIF_MI_SP_CBCR_FULL_YUV2RGB		BIT(9)
-#define RKISP1_CIF_MI_SP_422NONCOSITEED			BIT(10)
-#define RKISP1_CIF_MI_MP_PINGPONG_ENABEL		BIT(11)
-#define RKISP1_CIF_MI_SP_PINGPONG_ENABEL		BIT(12)
-#define RKISP1_CIF_MI_MP_AUTOUPDATE_ENABLE		BIT(13)
-#define RKISP1_CIF_MI_SP_AUTOUPDATE_ENABLE		BIT(14)
-#define RKISP1_CIF_MI_LAST_PIXEL_SIG_ENABLE		BIT(15)
-#define RKISP1_CIF_MI_CTRL_BURST_LEN_LUM_16		(0 << 16)
-#define RKISP1_CIF_MI_CTRL_BURST_LEN_LUM_32		BIT(16)
-#define RKISP1_CIF_MI_CTRL_BURST_LEN_LUM_64		(2 << 16)
-#define RKISP1_CIF_MI_CTRL_BURST_LEN_CHROM_16		(0 << 18)
-#define RKISP1_CIF_MI_CTRL_BURST_LEN_CHROM_32		BIT(18)
-#define RKISP1_CIF_MI_CTRL_BURST_LEN_CHROM_64		(2 << 18)
-#define RKISP1_CIF_MI_CTRL_INIT_BASE_EN			BIT(20)
-#define RKISP1_CIF_MI_CTRL_INIT_OFFSET_EN		BIT(21)
-#define RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8		(0 << 22)
-#define RKISP1_MI_CTRL_MP_WRITE_YUV_SPLA		BIT(22)
-#define RKISP1_MI_CTRL_MP_WRITE_YUVINT			(2 << 22)
-#define RKISP1_MI_CTRL_MP_WRITE_RAW12			(2 << 22)
-#define RKISP1_MI_CTRL_SP_WRITE_PLA			(0 << 24)
-#define RKISP1_MI_CTRL_SP_WRITE_SPLA			BIT(24)
-#define RKISP1_MI_CTRL_SP_WRITE_INT			(2 << 24)
-#define RKISP1_MI_CTRL_SP_INPUT_YUV400			(0 << 26)
-#define RKISP1_MI_CTRL_SP_INPUT_YUV420			BIT(26)
-#define RKISP1_MI_CTRL_SP_INPUT_YUV422			(2 << 26)
-#define RKISP1_MI_CTRL_SP_INPUT_YUV444			(3 << 26)
-#define RKISP1_MI_CTRL_SP_OUTPUT_YUV400			(0 << 28)
-#define RKISP1_MI_CTRL_SP_OUTPUT_YUV420			BIT(28)
-#define RKISP1_MI_CTRL_SP_OUTPUT_YUV422			(2 << 28)
-#define RKISP1_MI_CTRL_SP_OUTPUT_YUV444			(3 << 28)
-#define RKISP1_MI_CTRL_SP_OUTPUT_RGB565			(4 << 28)
-#define RKISP1_MI_CTRL_SP_OUTPUT_RGB666			(5 << 28)
-#define RKISP1_MI_CTRL_SP_OUTPUT_RGB888			(6 << 28)
-
-#define RKISP1_MI_CTRL_MP_FMT_MASK			GENMASK(23, 22)
-#define RKISP1_MI_CTRL_SP_FMT_MASK			GENMASK(30, 24)
-
-/* MI_INIT */
-#define RKISP1_CIF_MI_INIT_SKIP				BIT(2)
-#define RKISP1_CIF_MI_INIT_SOFT_UPD			BIT(4)
-
-/* MI_CTRL_SHD */
-#define RKISP1_CIF_MI_CTRL_SHD_MP_IN_ENABLED		BIT(0)
-#define RKISP1_CIF_MI_CTRL_SHD_SP_IN_ENABLED		BIT(1)
-#define RKISP1_CIF_MI_CTRL_SHD_JPEG_IN_ENABLED		BIT(2)
-#define RKISP1_CIF_MI_CTRL_SHD_RAW_IN_ENABLED		BIT(3)
-#define RKISP1_CIF_MI_CTRL_SHD_MP_OUT_ENABLED		BIT(16)
-#define RKISP1_CIF_MI_CTRL_SHD_SP_OUT_ENABLED		BIT(17)
-#define RKISP1_CIF_MI_CTRL_SHD_JPEG_OUT_ENABLED		BIT(18)
-#define RKISP1_CIF_MI_CTRL_SHD_RAW_OUT_ENABLED		BIT(19)
-
-/* RSZ_CTRL */
-#define RKISP1_CIF_RSZ_CTRL_SCALE_HY_ENABLE		BIT(0)
-#define RKISP1_CIF_RSZ_CTRL_SCALE_HC_ENABLE		BIT(1)
-#define RKISP1_CIF_RSZ_CTRL_SCALE_VY_ENABLE		BIT(2)
-#define RKISP1_CIF_RSZ_CTRL_SCALE_VC_ENABLE		BIT(3)
-#define RKISP1_CIF_RSZ_CTRL_SCALE_HY_UP			BIT(4)
-#define RKISP1_CIF_RSZ_CTRL_SCALE_HC_UP			BIT(5)
-#define RKISP1_CIF_RSZ_CTRL_SCALE_VY_UP			BIT(6)
-#define RKISP1_CIF_RSZ_CTRL_SCALE_VC_UP			BIT(7)
-#define RKISP1_CIF_RSZ_CTRL_CFG_UPD			BIT(8)
-#define RKISP1_CIF_RSZ_CTRL_CFG_UPD_AUTO		BIT(9)
-#define RKISP1_CIF_RSZ_SCALER_FACTOR			BIT(16)
-
-/* MI_IMSC - MI_MIS - MI_RIS - MI_ICR - MI_ISR */
-#define RKISP1_CIF_MI_FRAME(stream)			BIT((stream)->id)
-#define RKISP1_CIF_MI_MBLK_LINE				BIT(2)
-#define RKISP1_CIF_MI_FILL_MP_Y				BIT(3)
-#define RKISP1_CIF_MI_WRAP_MP_Y				BIT(4)
-#define RKISP1_CIF_MI_WRAP_MP_CB			BIT(5)
-#define RKISP1_CIF_MI_WRAP_MP_CR			BIT(6)
-#define RKISP1_CIF_MI_WRAP_SP_Y				BIT(7)
-#define RKISP1_CIF_MI_WRAP_SP_CB			BIT(8)
-#define RKISP1_CIF_MI_WRAP_SP_CR			BIT(9)
-#define RKISP1_CIF_MI_DMA_READY				BIT(11)
-
-/* MI_STATUS */
-#define RKISP1_CIF_MI_STATUS_MP_Y_FIFO_FULL		BIT(0)
-#define RKISP1_CIF_MI_STATUS_SP_Y_FIFO_FULL		BIT(4)
-
-/* MI_DMA_CTRL */
-#define RKISP1_CIF_MI_DMA_CTRL_BURST_LEN_LUM_16		(0 << 0)
-#define RKISP1_CIF_MI_DMA_CTRL_BURST_LEN_LUM_32		BIT(0)
-#define RKISP1_CIF_MI_DMA_CTRL_BURST_LEN_LUM_64		(2 << 0)
-#define RKISP1_CIF_MI_DMA_CTRL_BURST_LEN_CHROM_16	(0 << 2)
-#define RKISP1_CIF_MI_DMA_CTRL_BURST_LEN_CHROM_32	BIT(2)
-#define RKISP1_CIF_MI_DMA_CTRL_BURST_LEN_CHROM_64	(2 << 2)
-#define RKISP1_CIF_MI_DMA_CTRL_READ_FMT_PLANAR		(0 << 4)
-#define RKISP1_CIF_MI_DMA_CTRL_READ_FMT_SPLANAR		BIT(4)
-#define RKISP1_CIF_MI_DMA_CTRL_FMT_YUV400		(0 << 6)
-#define RKISP1_CIF_MI_DMA_CTRL_FMT_YUV420		BIT(6)
-#define RKISP1_CIF_MI_DMA_CTRL_READ_FMT_PACKED		(2 << 4)
-#define RKISP1_CIF_MI_DMA_CTRL_FMT_YUV422		(2 << 6)
-#define RKISP1_CIF_MI_DMA_CTRL_FMT_YUV444		(3 << 6)
-#define RKISP1_CIF_MI_DMA_CTRL_BYTE_SWAP		BIT(8)
-#define RKISP1_CIF_MI_DMA_CTRL_CONTINUOUS_ENA		BIT(9)
-#define RKISP1_CIF_MI_DMA_CTRL_RGB_BAYER_NO		(0 << 12)
-#define RKISP1_CIF_MI_DMA_CTRL_RGB_BAYER_8BIT		BIT(12)
-#define RKISP1_CIF_MI_DMA_CTRL_RGB_BAYER_16BIT		(2 << 12)
-/* MI_DMA_START */
-#define RKISP1_CIF_MI_DMA_START_ENABLE			BIT(0)
-/* MI_XTD_FORMAT_CTRL  */
-#define RKISP1_CIF_MI_XTD_FMT_CTRL_MP_CB_CR_SWAP	BIT(0)
-#define RKISP1_CIF_MI_XTD_FMT_CTRL_SP_CB_CR_SWAP	BIT(1)
-#define RKISP1_CIF_MI_XTD_FMT_CTRL_DMA_CB_CR_SWAP	BIT(2)
-
-/* CCL */
-#define RKISP1_CIF_CCL_CIF_CLK_DIS			BIT(2)
-/* ICCL */
-#define RKISP1_CIF_ICCL_ISP_CLK				BIT(0)
-#define RKISP1_CIF_ICCL_CP_CLK				BIT(1)
-#define RKISP1_CIF_ICCL_RES_2				BIT(2)
-#define RKISP1_CIF_ICCL_MRSZ_CLK			BIT(3)
-#define RKISP1_CIF_ICCL_SRSZ_CLK			BIT(4)
-#define RKISP1_CIF_ICCL_JPEG_CLK			BIT(5)
-#define RKISP1_CIF_ICCL_MI_CLK				BIT(6)
-#define RKISP1_CIF_ICCL_RES_7				BIT(7)
-#define RKISP1_CIF_ICCL_IE_CLK				BIT(8)
-#define RKISP1_CIF_ICCL_SIMP_CLK			BIT(9)
-#define RKISP1_CIF_ICCL_SMIA_CLK			BIT(10)
-#define RKISP1_CIF_ICCL_MIPI_CLK			BIT(11)
-#define RKISP1_CIF_ICCL_DCROP_CLK			BIT(12)
-/* IRCL */
-#define RKISP1_CIF_IRCL_ISP_SW_RST			BIT(0)
-#define RKISP1_CIF_IRCL_CP_SW_RST			BIT(1)
-#define RKISP1_CIF_IRCL_YCS_SW_RST			BIT(2)
-#define RKISP1_CIF_IRCL_MRSZ_SW_RST			BIT(3)
-#define RKISP1_CIF_IRCL_SRSZ_SW_RST			BIT(4)
-#define RKISP1_CIF_IRCL_JPEG_SW_RST			BIT(5)
-#define RKISP1_CIF_IRCL_MI_SW_RST			BIT(6)
-#define RKISP1_CIF_IRCL_CIF_SW_RST			BIT(7)
-#define RKISP1_CIF_IRCL_IE_SW_RST			BIT(8)
-#define RKISP1_CIF_IRCL_SI_SW_RST			BIT(9)
-#define RKISP1_CIF_IRCL_MIPI_SW_RST			BIT(11)
-
-/* C_PROC_CTR */
-#define RKISP1_CIF_C_PROC_CTR_ENABLE			BIT(0)
-#define RKISP1_CIF_C_PROC_YOUT_FULL			BIT(1)
-#define RKISP1_CIF_C_PROC_YIN_FULL			BIT(2)
-#define RKISP1_CIF_C_PROC_COUT_FULL			BIT(3)
-#define RKISP1_CIF_C_PROC_CTRL_RESERVED			0xFFFFFFFE
-#define RKISP1_CIF_C_PROC_CONTRAST_RESERVED		0xFFFFFF00
-#define RKISP1_CIF_C_PROC_BRIGHTNESS_RESERVED		0xFFFFFF00
-#define RKISP1_CIF_C_PROC_HUE_RESERVED			0xFFFFFF00
-#define RKISP1_CIF_C_PROC_SATURATION_RESERVED		0xFFFFFF00
-#define RKISP1_CIF_C_PROC_MACC_RESERVED			0xE000E000
-#define RKISP1_CIF_C_PROC_TONE_RESERVED			0xF000
-/* DUAL_CROP_CTRL */
-#define RKISP1_CIF_DUAL_CROP_MP_MODE_BYPASS		(0 << 0)
-#define RKISP1_CIF_DUAL_CROP_MP_MODE_YUV		BIT(0)
-#define RKISP1_CIF_DUAL_CROP_MP_MODE_RAW		(2 << 0)
-#define RKISP1_CIF_DUAL_CROP_SP_MODE_BYPASS		(0 << 2)
-#define RKISP1_CIF_DUAL_CROP_SP_MODE_YUV		BIT(2)
-#define RKISP1_CIF_DUAL_CROP_SP_MODE_RAW		(2 << 2)
-#define RKISP1_CIF_DUAL_CROP_CFG_UPD_PERMANENT		BIT(4)
-#define RKISP1_CIF_DUAL_CROP_CFG_UPD			BIT(5)
-#define RKISP1_CIF_DUAL_CROP_GEN_CFG_UPD		BIT(6)
-
-/* IMG_EFF_CTRL */
-#define RKISP1_CIF_IMG_EFF_CTRL_ENABLE			BIT(0)
-#define RKISP1_CIF_IMG_EFF_CTRL_MODE_BLACKWHITE		(0 << 1)
-#define RKISP1_CIF_IMG_EFF_CTRL_MODE_NEGATIVE		BIT(1)
-#define RKISP1_CIF_IMG_EFF_CTRL_MODE_SEPIA		(2 << 1)
-#define RKISP1_CIF_IMG_EFF_CTRL_MODE_COLOR_SEL		(3 << 1)
-#define RKISP1_CIF_IMG_EFF_CTRL_MODE_EMBOSS		(4 << 1)
-#define RKISP1_CIF_IMG_EFF_CTRL_MODE_SKETCH		(5 << 1)
-#define RKISP1_CIF_IMG_EFF_CTRL_MODE_SHARPEN		(6 << 1)
-#define RKISP1_CIF_IMG_EFF_CTRL_CFG_UPD			BIT(4)
-#define RKISP1_CIF_IMG_EFF_CTRL_YCBCR_FULL		BIT(5)
-
-#define RKISP1_CIF_IMG_EFF_CTRL_MODE_BLACKWHITE_SHIFT	0
-#define RKISP1_CIF_IMG_EFF_CTRL_MODE_NEGATIVE_SHIFT	1
-#define RKISP1_CIF_IMG_EFF_CTRL_MODE_SEPIA_SHIFT	2
-#define RKISP1_CIF_IMG_EFF_CTRL_MODE_COLOR_SEL_SHIFT	3
-#define RKISP1_CIF_IMG_EFF_CTRL_MODE_EMBOSS_SHIFT	4
-#define RKISP1_CIF_IMG_EFF_CTRL_MODE_SKETCH_SHIFT	5
-#define RKISP1_CIF_IMG_EFF_CTRL_MODE_SHARPEN_SHIFT	6
-#define RKISP1_CIF_IMG_EFF_CTRL_MODE_MASK		0xE
-
-/* IMG_EFF_COLOR_SEL */
-#define RKISP1_CIF_IMG_EFF_COLOR_RGB			0
-#define RKISP1_CIF_IMG_EFF_COLOR_B			BIT(0)
-#define RKISP1_CIF_IMG_EFF_COLOR_G			(2 << 0)
-#define RKISP1_CIF_IMG_EFF_COLOR_GB			(3 << 0)
-#define RKISP1_CIF_IMG_EFF_COLOR_R			(4 << 0)
-#define RKISP1_CIF_IMG_EFF_COLOR_RB			(5 << 0)
-#define RKISP1_CIF_IMG_EFF_COLOR_RG			(6 << 0)
-#define RKISP1_CIF_IMG_EFF_COLOR_RGB2			(7 << 0)
-
-/* MIPI_CTRL */
-#define RKISP1_CIF_MIPI_CTRL_OUTPUT_ENA			BIT(0)
-#define RKISP1_CIF_MIPI_CTRL_SHUTDOWNLANES(a)		(((a) & 0xF) << 8)
-#define RKISP1_CIF_MIPI_CTRL_NUM_LANES(a)		(((a) & 0x3) << 12)
-#define RKISP1_CIF_MIPI_CTRL_ERR_SOT_HS_SKIP		BIT(16)
-#define RKISP1_CIF_MIPI_CTRL_ERR_SOT_SYNC_HS_SKIP	BIT(17)
-#define RKISP1_CIF_MIPI_CTRL_CLOCKLANE_ENA		BIT(18)
-
-/* MIPI_DATA_SEL */
-#define RKISP1_CIF_MIPI_DATA_SEL_VC(a)			(((a) & 0x3) << 6)
-#define RKISP1_CIF_MIPI_DATA_SEL_DT(a)			(((a) & 0x3F) << 0)
-/* MIPI DATA_TYPE */
-#define RKISP1_CIF_CSI2_DT_YUV420_8b			0x18
-#define RKISP1_CIF_CSI2_DT_YUV420_10b			0x19
-#define RKISP1_CIF_CSI2_DT_YUV422_8b			0x1E
-#define RKISP1_CIF_CSI2_DT_YUV422_10b			0x1F
-#define RKISP1_CIF_CSI2_DT_RGB565			0x22
-#define RKISP1_CIF_CSI2_DT_RGB666			0x23
-#define RKISP1_CIF_CSI2_DT_RGB888			0x24
-#define RKISP1_CIF_CSI2_DT_RAW8				0x2A
-#define RKISP1_CIF_CSI2_DT_RAW10			0x2B
-#define RKISP1_CIF_CSI2_DT_RAW12			0x2C
-
-/* MIPI_IMSC, MIPI_RIS, MIPI_MIS, MIPI_ICR, MIPI_ISR */
-#define RKISP1_CIF_MIPI_SYNC_FIFO_OVFLW(a)		(((a) & 0xF) << 0)
-#define RKISP1_CIF_MIPI_ERR_SOT(a)			(((a) & 0xF) << 4)
-#define RKISP1_CIF_MIPI_ERR_SOT_SYNC(a)			(((a) & 0xF) << 8)
-#define RKISP1_CIF_MIPI_ERR_EOT_SYNC(a)			(((a) & 0xF) << 12)
-#define RKISP1_CIF_MIPI_ERR_CTRL(a)			(((a) & 0xF) << 16)
-#define RKISP1_CIF_MIPI_ERR_PROTOCOL			BIT(20)
-#define RKISP1_CIF_MIPI_ERR_ECC1			BIT(21)
-#define RKISP1_CIF_MIPI_ERR_ECC2			BIT(22)
-#define RKISP1_CIF_MIPI_ERR_CS				BIT(23)
-#define RKISP1_CIF_MIPI_FRAME_END			BIT(24)
-#define RKISP1_CIF_MIPI_ADD_DATA_OVFLW			BIT(25)
-#define RKISP1_CIF_MIPI_ADD_DATA_WATER_MARK		BIT(26)
-
-#define RKISP1_CIF_MIPI_ERR_CSI  (RKISP1_CIF_MIPI_ERR_PROTOCOL | \
-	RKISP1_CIF_MIPI_ERR_ECC1 | \
-	RKISP1_CIF_MIPI_ERR_ECC2 | \
-	RKISP1_CIF_MIPI_ERR_CS)
-
-#define RKISP1_CIF_MIPI_ERR_DPHY  (RKISP1_CIF_MIPI_ERR_SOT(3) | \
-	RKISP1_CIF_MIPI_ERR_SOT_SYNC(3) | \
-	RKISP1_CIF_MIPI_ERR_EOT_SYNC(3) | \
-	RKISP1_CIF_MIPI_ERR_CTRL(3))
-
-/* SUPER_IMPOSE */
-#define RKISP1_CIF_SUPER_IMP_CTRL_NORMAL_MODE		BIT(0)
-#define RKISP1_CIF_SUPER_IMP_CTRL_REF_IMG_MEM		BIT(1)
-#define RKISP1_CIF_SUPER_IMP_CTRL_TRANSP_DIS		BIT(2)
-
-/* ISP HISTOGRAM CALCULATION : ISP_HIST_PROP */
-#define RKISP1_CIF_ISP_HIST_PROP_MODE_DIS		(0 << 0)
-#define RKISP1_CIF_ISP_HIST_PROP_MODE_RGB		BIT(0)
-#define RKISP1_CIF_ISP_HIST_PROP_MODE_RED		(2 << 0)
-#define RKISP1_CIF_ISP_HIST_PROP_MODE_GREEN		(3 << 0)
-#define RKISP1_CIF_ISP_HIST_PROP_MODE_BLUE		(4 << 0)
-#define RKISP1_CIF_ISP_HIST_PROP_MODE_LUM		(5 << 0)
-#define RKISP1_CIF_ISP_HIST_PROP_MODE_MASK		0x7
-#define RKISP1_CIF_ISP_HIST_PREDIV_SET(x)		(((x) & 0x7F) << 3)
-#define RKISP1_CIF_ISP_HIST_WEIGHT_SET(v0, v1, v2, v3)	\
-				     (((v0) & 0x1F) | (((v1) & 0x1F) << 8)  |\
-				     (((v2) & 0x1F) << 16) | \
-				     (((v3) & 0x1F) << 24))
-
-#define RKISP1_CIF_ISP_HIST_WINDOW_OFFSET_RESERVED	0xFFFFF000
-#define RKISP1_CIF_ISP_HIST_WINDOW_SIZE_RESERVED	0xFFFFF800
-#define RKISP1_CIF_ISP_HIST_WEIGHT_RESERVED		0xE0E0E0E0
-#define RKISP1_CIF_ISP_MAX_HIST_PREDIVIDER		0x0000007F
-#define RKISP1_CIF_ISP_HIST_ROW_NUM			5
-#define RKISP1_CIF_ISP_HIST_COLUMN_NUM			5
-
-/* AUTO FOCUS MEASUREMENT:  ISP_AFM_CTRL */
-#define RKISP1_ISP_AFM_CTRL_ENABLE			BIT(0)
-
-/* SHUTTER CONTROL */
-#define RKISP1_CIF_ISP_SH_CTRL_SH_ENA			BIT(0)
-#define RKISP1_CIF_ISP_SH_CTRL_REP_EN			BIT(1)
-#define RKISP1_CIF_ISP_SH_CTRL_SRC_SH_TRIG		BIT(2)
-#define RKISP1_CIF_ISP_SH_CTRL_EDGE_POS			BIT(3)
-#define RKISP1_CIF_ISP_SH_CTRL_POL_LOW			BIT(4)
-
-/* FLASH MODULE */
-/* ISP_FLASH_CMD */
-#define RKISP1_CIFFLASH_CMD_PRELIGHT_ON			BIT(0)
-#define RKISP1_CIFFLASH_CMD_FLASH_ON			BIT(1)
-#define RKISP1_CIFFLASH_CMD_PRE_FLASH_ON		BIT(2)
-/* ISP_FLASH_CONFIG */
-#define RKISP1_CIFFLASH_CONFIG_PRELIGHT_END		BIT(0)
-#define RKISP1_CIFFLASH_CONFIG_VSYNC_POS		BIT(1)
-#define RKISP1_CIFFLASH_CONFIG_PRELIGHT_LOW		BIT(2)
-#define RKISP1_CIFFLASH_CONFIG_SRC_FL_TRIG		BIT(3)
-#define RKISP1_CIFFLASH_CONFIG_DELAY(a)			(((a) & 0xF) << 4)
-
-/* Demosaic:  ISP_DEMOSAIC */
-#define RKISP1_CIF_ISP_DEMOSAIC_BYPASS			BIT(10)
-#define RKISP1_CIF_ISP_DEMOSAIC_TH(x)			((x) & 0xFF)
-
-/* AWB */
-/* ISP_AWB_PROP */
-#define RKISP1_CIF_ISP_AWB_YMAX_CMP_EN			BIT(2)
-#define RKISP1_CIF_ISP_AWB_YMAX_READ(x)			(((x) >> 2) & 1)
-#define RKISP1_CIF_ISP_AWB_MODE_RGB_EN			((1 << 31) | (0x2 << 0))
-#define RKISP1_CIF_ISP_AWB_MODE_YCBCR_EN		((0 << 31) | (0x2 << 0))
-#define RKISP1_CIF_ISP_AWB_MODE_MASK_NONE		0xFFFFFFFC
-#define RKISP1_CIF_ISP_AWB_MODE_READ(x)			((x) & 3)
-/* ISP_AWB_GAIN_RB, ISP_AWB_GAIN_G  */
-#define RKISP1_CIF_ISP_AWB_GAIN_R_SET(x)		(((x) & 0x3FF) << 16)
-#define RKISP1_CIF_ISP_AWB_GAIN_R_READ(x)		(((x) >> 16) & 0x3FF)
-#define RKISP1_CIF_ISP_AWB_GAIN_B_SET(x)		((x) & 0x3FFF)
-#define RKISP1_CIF_ISP_AWB_GAIN_B_READ(x)		((x) & 0x3FFF)
-/* ISP_AWB_REF */
-#define RKISP1_CIF_ISP_AWB_REF_CR_SET(x)		(((x) & 0xFF) << 8)
-#define RKISP1_CIF_ISP_AWB_REF_CR_READ(x)		(((x) >> 8) & 0xFF)
-#define RKISP1_CIF_ISP_AWB_REF_CB_READ(x)		((x) & 0xFF)
-/* ISP_AWB_THRESH */
-#define RKISP1_CIF_ISP_AWB_MAX_CS_SET(x)		(((x) & 0xFF) << 8)
-#define RKISP1_CIF_ISP_AWB_MAX_CS_READ(x)		(((x) >> 8) & 0xFF)
-#define RKISP1_CIF_ISP_AWB_MIN_C_READ(x)		((x) & 0xFF)
-#define RKISP1_CIF_ISP_AWB_MIN_Y_SET(x)			(((x) & 0xFF) << 16)
-#define RKISP1_CIF_ISP_AWB_MIN_Y_READ(x)		(((x) >> 16) & 0xFF)
-#define RKISP1_CIF_ISP_AWB_MAX_Y_SET(x)			(((x) & 0xFF) << 24)
-#define RKISP1_CIF_ISP_AWB_MAX_Y_READ(x)			(((x) >> 24) & 0xFF)
-/* ISP_AWB_MEAN */
-#define RKISP1_CIF_ISP_AWB_GET_MEAN_CR_R(x)		((x) & 0xFF)
-#define RKISP1_CIF_ISP_AWB_GET_MEAN_CB_B(x)		(((x) >> 8) & 0xFF)
-#define RKISP1_CIF_ISP_AWB_GET_MEAN_Y_G(x)		(((x) >> 16) & 0xFF)
-/* ISP_AWB_WHITE_CNT */
-#define RKISP1_CIF_ISP_AWB_GET_PIXEL_CNT(x)		((x) & 0x3FFFFFF)
-
-#define RKISP1_CIF_ISP_AWB_GAINS_MAX_VAL		0x000003FF
-#define RKISP1_CIF_ISP_AWB_WINDOW_OFFSET_MAX		0x00000FFF
-#define RKISP1_CIF_ISP_AWB_WINDOW_MAX_SIZE		0x00001FFF
-#define RKISP1_CIF_ISP_AWB_CBCR_MAX_REF			0x000000FF
-#define RKISP1_CIF_ISP_AWB_THRES_MAX_YC			0x000000FF
-
-/* AE */
-/* ISP_EXP_CTRL */
-#define RKISP1_CIF_ISP_EXP_ENA				BIT(0)
-#define RKISP1_CIF_ISP_EXP_CTRL_AUTOSTOP		BIT(1)
-/*
- *'1' luminance calculation according to  Y=(R+G+B) x 0.332 (85/256)
- *'0' luminance calculation according to Y=16+0.25R+0.5G+0.1094B
- */
-#define RKISP1_CIF_ISP_EXP_CTRL_MEASMODE_1		BIT(31)
-
-/* ISP_EXP_H_SIZE */
-#define RKISP1_CIF_ISP_EXP_H_SIZE_SET(x)		((x) & 0x7FF)
-#define RKISP1_CIF_ISP_EXP_HEIGHT_MASK			0x000007FF
-/* ISP_EXP_V_SIZE : vertical size must be a multiple of 2). */
-#define RKISP1_CIF_ISP_EXP_V_SIZE_SET(x)		((x) & 0x7FE)
-
-/* ISP_EXP_H_OFFSET */
-#define RKISP1_CIF_ISP_EXP_H_OFFSET_SET(x)		((x) & 0x1FFF)
-#define RKISP1_CIF_ISP_EXP_MAX_HOFFS			2424
-/* ISP_EXP_V_OFFSET */
-#define RKISP1_CIF_ISP_EXP_V_OFFSET_SET(x)		((x) & 0x1FFF)
-#define RKISP1_CIF_ISP_EXP_MAX_VOFFS			1806
-
-#define RKISP1_CIF_ISP_EXP_ROW_NUM			5
-#define RKISP1_CIF_ISP_EXP_COLUMN_NUM			5
-#define RKISP1_CIF_ISP_EXP_NUM_LUMA_REGS \
-	(RKISP1_CIF_ISP_EXP_ROW_NUM * RKISP1_CIF_ISP_EXP_COLUMN_NUM)
-#define RKISP1_CIF_ISP_EXP_BLOCK_MAX_HSIZE		516
-#define RKISP1_CIF_ISP_EXP_BLOCK_MIN_HSIZE		35
-#define RKISP1_CIF_ISP_EXP_BLOCK_MAX_VSIZE		390
-#define RKISP1_CIF_ISP_EXP_BLOCK_MIN_VSIZE		28
-#define RKISP1_CIF_ISP_EXP_MAX_HSIZE	\
-	(RKISP1_CIF_ISP_EXP_BLOCK_MAX_HSIZE * RKISP1_CIF_ISP_EXP_COLUMN_NUM + 1)
-#define RKISP1_CIF_ISP_EXP_MIN_HSIZE	\
-	(RKISP1_CIF_ISP_EXP_BLOCK_MIN_HSIZE * RKISP1_CIF_ISP_EXP_COLUMN_NUM + 1)
-#define RKISP1_CIF_ISP_EXP_MAX_VSIZE	\
-	(RKISP1_CIF_ISP_EXP_BLOCK_MAX_VSIZE * RKISP1_CIF_ISP_EXP_ROW_NUM + 1)
-#define RKISP1_CIF_ISP_EXP_MIN_VSIZE	\
-	(RKISP1_CIF_ISP_EXP_BLOCK_MIN_VSIZE * RKISP1_CIF_ISP_EXP_ROW_NUM + 1)
-
-/* LSC: ISP_LSC_CTRL */
-#define RKISP1_CIF_ISP_LSC_CTRL_ENA			BIT(0)
-#define RKISP1_CIF_ISP_LSC_SECT_SIZE_RESERVED		0xFC00FC00
-#define RKISP1_CIF_ISP_LSC_GRAD_RESERVED		0xF000F000
-#define RKISP1_CIF_ISP_LSC_SAMPLE_RESERVED		0xF000F000
-#define RKISP1_CIF_ISP_LSC_TABLE_DATA(v0, v1)     \
-	(((v0) & 0xFFF) | (((v1) & 0xFFF) << 12))
-#define RKISP1_CIF_ISP_LSC_SECT_SIZE(v0, v1)      \
-	(((v0) & 0xFFF) | (((v1) & 0xFFF) << 16))
-#define RKISP1_CIF_ISP_LSC_GRAD_SIZE(v0, v1)      \
-	(((v0) & 0xFFF) | (((v1) & 0xFFF) << 16))
-
-/* LSC: ISP_LSC_TABLE_SEL */
-#define RKISP1_CIF_ISP_LSC_TABLE_0			0
-#define RKISP1_CIF_ISP_LSC_TABLE_1			1
-
-/* LSC: ISP_LSC_STATUS */
-#define RKISP1_CIF_ISP_LSC_ACTIVE_TABLE			BIT(1)
-#define RKISP1_CIF_ISP_LSC_TABLE_ADDRESS_0		0
-#define RKISP1_CIF_ISP_LSC_TABLE_ADDRESS_153		153
-
-/* FLT */
-/* ISP_FILT_MODE */
-#define RKISP1_CIF_ISP_FLT_ENA				BIT(0)
-
-/*
- * 0: green filter static mode (active filter factor = FILT_FAC_MID)
- * 1: dynamic noise reduction/sharpen Default
- */
-#define RKISP1_CIF_ISP_FLT_MODE_DNR			BIT(1)
-#define RKISP1_CIF_ISP_FLT_MODE_MAX			1
-#define RKISP1_CIF_ISP_FLT_CHROMA_V_MODE(x)		(((x) & 0x3) << 4)
-#define RKISP1_CIF_ISP_FLT_CHROMA_H_MODE(x)		(((x) & 0x3) << 6)
-#define RKISP1_CIF_ISP_FLT_CHROMA_MODE_MAX		3
-#define RKISP1_CIF_ISP_FLT_GREEN_STAGE1(x)		(((x) & 0xF) << 8)
-#define RKISP1_CIF_ISP_FLT_GREEN_STAGE1_MAX		8
-#define RKISP1_CIF_ISP_FLT_THREAD_RESERVED		0xFFFFFC00
-#define RKISP1_CIF_ISP_FLT_FAC_RESERVED			0xFFFFFFC0
-#define RKISP1_CIF_ISP_FLT_LUM_WEIGHT_RESERVED		0xFFF80000
-
-#define RKISP1_CIF_ISP_CTK_COEFF_RESERVED		0xFFFFF800
-#define RKISP1_CIF_ISP_XTALK_OFFSET_RESERVED		0xFFFFF000
-
-/* GOC */
-#define RKISP1_CIF_ISP_GAMMA_OUT_MODE_EQU		BIT(0)
-#define RKISP1_CIF_ISP_GOC_MODE_MAX			1
-#define RKISP1_CIF_ISP_GOC_RESERVED			0xFFFFF800
-/* ISP_CTRL BIT 11*/
-#define RKISP1_CIF_ISP_CTRL_ISP_GAMMA_OUT_ENA_READ(x)	(((x) >> 11) & 1)
-
-/* DPCC */
-/* ISP_DPCC_MODE */
-#define RKISP1_CIF_ISP_DPCC_ENA				BIT(0)
-#define RKISP1_CIF_ISP_DPCC_MODE_MAX			0x07
-#define RKISP1_CIF_ISP_DPCC_OUTPUTMODE_MAX		0x0F
-#define RKISP1_CIF_ISP_DPCC_SETUSE_MAX			0x0F
-#define RKISP1_CIF_ISP_DPCC_METHODS_SET_RESERVED	0xFFFFE000
-#define RKISP1_CIF_ISP_DPCC_LINE_THRESH_RESERVED	0xFFFF0000
-#define RKISP1_CIF_ISP_DPCC_LINE_MAD_FAC_RESERVED	0xFFFFC0C0
-#define RKISP1_CIF_ISP_DPCC_PG_FAC_RESERVED		0xFFFFC0C0
-#define RKISP1_CIF_ISP_DPCC_RND_THRESH_RESERVED		0xFFFF0000
-#define RKISP1_CIF_ISP_DPCC_RG_FAC_RESERVED		0xFFFFC0C0
-#define RKISP1_CIF_ISP_DPCC_RO_LIMIT_RESERVED		0xFFFFF000
-#define RKISP1_CIF_ISP_DPCC_RND_OFFS_RESERVED		0xFFFFF000
-
-/* BLS */
-/* ISP_BLS_CTRL */
-#define RKISP1_CIF_ISP_BLS_ENA				BIT(0)
-#define RKISP1_CIF_ISP_BLS_MODE_MEASURED		BIT(1)
-#define RKISP1_CIF_ISP_BLS_MODE_FIXED			0
-#define RKISP1_CIF_ISP_BLS_WINDOW_1			BIT(2)
-#define RKISP1_CIF_ISP_BLS_WINDOW_2			(2 << 2)
-
-/* GAMMA-IN */
-#define RKISP1_CIFISP_DEGAMMA_X_RESERVED	\
-	((1 << 31) | (1 << 27) | (1 << 23) | (1 << 19) |\
-	(1 << 15) | (1 << 11) | (1 << 7) | (1 << 3))
-#define RKISP1_CIFISP_DEGAMMA_Y_RESERVED		0xFFFFF000
-
-/* AFM */
-#define RKISP1_CIF_ISP_AFM_ENA				BIT(0)
-#define RKISP1_CIF_ISP_AFM_THRES_RESERVED		0xFFFF0000
-#define RKISP1_CIF_ISP_AFM_VAR_SHIFT_RESERVED		0xFFF8FFF8
-#define RKISP1_CIF_ISP_AFM_WINDOW_X_RESERVED		0xE000
-#define RKISP1_CIF_ISP_AFM_WINDOW_Y_RESERVED		0xF000
-#define RKISP1_CIF_ISP_AFM_WINDOW_X_MIN			0x5
-#define RKISP1_CIF_ISP_AFM_WINDOW_Y_MIN			0x2
-#define RKISP1_CIF_ISP_AFM_WINDOW_X(x)			(((x) & 0x1FFF) << 16)
-#define RKISP1_CIF_ISP_AFM_WINDOW_Y(x)			((x) & 0x1FFF)
-
-/* DPF */
-#define RKISP1_CIF_ISP_DPF_MODE_EN			BIT(0)
-#define RKISP1_CIF_ISP_DPF_MODE_B_FLT_DIS		BIT(1)
-#define RKISP1_CIF_ISP_DPF_MODE_GB_FLT_DIS		BIT(2)
-#define RKISP1_CIF_ISP_DPF_MODE_GR_FLT_DIS		BIT(3)
-#define RKISP1_CIF_ISP_DPF_MODE_R_FLT_DIS		BIT(4)
-#define RKISP1_CIF_ISP_DPF_MODE_RB_FLTSIZE_9x9		BIT(5)
-#define RKISP1_CIF_ISP_DPF_MODE_NLL_SEGMENTATION	BIT(6)
-#define RKISP1_CIF_ISP_DPF_MODE_AWB_GAIN_COMP		BIT(7)
-#define RKISP1_CIF_ISP_DPF_MODE_LSC_GAIN_COMP		BIT(8)
-#define RKISP1_CIF_ISP_DPF_MODE_USE_NF_GAIN		BIT(9)
-#define RKISP1_CIF_ISP_DPF_NF_GAIN_RESERVED		0xFFFFF000
-#define RKISP1_CIF_ISP_DPF_SPATIAL_COEFF_MAX		0x1F
-#define RKISP1_CIF_ISP_DPF_NLL_COEFF_N_MAX		0x3FF
-
-/* =================================================================== */
-/*                            CIF Registers                            */
-/* =================================================================== */
-#define RKISP1_CIF_CTRL_BASE			0x00000000
-#define RKISP1_CIF_CCL				(RKISP1_CIF_CTRL_BASE + 0x00000000)
-#define RKISP1_CIF_VI_ID			(RKISP1_CIF_CTRL_BASE + 0x00000008)
-#define RKISP1_CIF_ICCL				(RKISP1_CIF_CTRL_BASE + 0x00000010)
-#define RKISP1_CIF_IRCL				(RKISP1_CIF_CTRL_BASE + 0x00000014)
-#define RKISP1_CIF_VI_DPCL			(RKISP1_CIF_CTRL_BASE + 0x00000018)
-
-#define RKISP1_CIF_IMG_EFF_BASE			0x00000200
-#define RKISP1_CIF_IMG_EFF_CTRL			(RKISP1_CIF_IMG_EFF_BASE + 0x00000000)
-#define RKISP1_CIF_IMG_EFF_COLOR_SEL		(RKISP1_CIF_IMG_EFF_BASE + 0x00000004)
-#define RKISP1_CIF_IMG_EFF_MAT_1		(RKISP1_CIF_IMG_EFF_BASE + 0x00000008)
-#define RKISP1_CIF_IMG_EFF_MAT_2		(RKISP1_CIF_IMG_EFF_BASE + 0x0000000C)
-#define RKISP1_CIF_IMG_EFF_MAT_3		(RKISP1_CIF_IMG_EFF_BASE + 0x00000010)
-#define RKISP1_CIF_IMG_EFF_MAT_4		(RKISP1_CIF_IMG_EFF_BASE + 0x00000014)
-#define RKISP1_CIF_IMG_EFF_MAT_5		(RKISP1_CIF_IMG_EFF_BASE + 0x00000018)
-#define RKISP1_CIF_IMG_EFF_TINT			(RKISP1_CIF_IMG_EFF_BASE + 0x0000001C)
-#define RKISP1_CIF_IMG_EFF_CTRL_SHD		(RKISP1_CIF_IMG_EFF_BASE + 0x00000020)
-#define RKISP1_CIF_IMG_EFF_SHARPEN		(RKISP1_CIF_IMG_EFF_BASE + 0x00000024)
-
-#define RKISP1_CIF_SUPER_IMP_BASE		0x00000300
-#define RKISP1_CIF_SUPER_IMP_CTRL		(RKISP1_CIF_SUPER_IMP_BASE + 0x00000000)
-#define RKISP1_CIF_SUPER_IMP_OFFSET_X		(RKISP1_CIF_SUPER_IMP_BASE + 0x00000004)
-#define RKISP1_CIF_SUPER_IMP_OFFSET_Y		(RKISP1_CIF_SUPER_IMP_BASE + 0x00000008)
-#define RKISP1_CIF_SUPER_IMP_COLOR_Y		(RKISP1_CIF_SUPER_IMP_BASE + 0x0000000C)
-#define RKISP1_CIF_SUPER_IMP_COLOR_CB		(RKISP1_CIF_SUPER_IMP_BASE + 0x00000010)
-#define RKISP1_CIF_SUPER_IMP_COLOR_CR		(RKISP1_CIF_SUPER_IMP_BASE + 0x00000014)
-
-#define RKISP1_CIF_ISP_BASE			0x00000400
-#define RKISP1_CIF_ISP_CTRL			(RKISP1_CIF_ISP_BASE + 0x00000000)
-#define RKISP1_CIF_ISP_ACQ_PROP			(RKISP1_CIF_ISP_BASE + 0x00000004)
-#define RKISP1_CIF_ISP_ACQ_H_OFFS		(RKISP1_CIF_ISP_BASE + 0x00000008)
-#define RKISP1_CIF_ISP_ACQ_V_OFFS		(RKISP1_CIF_ISP_BASE + 0x0000000C)
-#define RKISP1_CIF_ISP_ACQ_H_SIZE		(RKISP1_CIF_ISP_BASE + 0x00000010)
-#define RKISP1_CIF_ISP_ACQ_V_SIZE		(RKISP1_CIF_ISP_BASE + 0x00000014)
-#define RKISP1_CIF_ISP_ACQ_NR_FRAMES		(RKISP1_CIF_ISP_BASE + 0x00000018)
-#define RKISP1_CIF_ISP_GAMMA_DX_LO		(RKISP1_CIF_ISP_BASE + 0x0000001C)
-#define RKISP1_CIF_ISP_GAMMA_DX_HI		(RKISP1_CIF_ISP_BASE + 0x00000020)
-#define RKISP1_CIF_ISP_GAMMA_R_Y0		(RKISP1_CIF_ISP_BASE + 0x00000024)
-#define RKISP1_CIF_ISP_GAMMA_R_Y1		(RKISP1_CIF_ISP_BASE + 0x00000028)
-#define RKISP1_CIF_ISP_GAMMA_R_Y2		(RKISP1_CIF_ISP_BASE + 0x0000002C)
-#define RKISP1_CIF_ISP_GAMMA_R_Y3		(RKISP1_CIF_ISP_BASE + 0x00000030)
-#define RKISP1_CIF_ISP_GAMMA_R_Y4		(RKISP1_CIF_ISP_BASE + 0x00000034)
-#define RKISP1_CIF_ISP_GAMMA_R_Y5		(RKISP1_CIF_ISP_BASE + 0x00000038)
-#define RKISP1_CIF_ISP_GAMMA_R_Y6		(RKISP1_CIF_ISP_BASE + 0x0000003C)
-#define RKISP1_CIF_ISP_GAMMA_R_Y7		(RKISP1_CIF_ISP_BASE + 0x00000040)
-#define RKISP1_CIF_ISP_GAMMA_R_Y8		(RKISP1_CIF_ISP_BASE + 0x00000044)
-#define RKISP1_CIF_ISP_GAMMA_R_Y9		(RKISP1_CIF_ISP_BASE + 0x00000048)
-#define RKISP1_CIF_ISP_GAMMA_R_Y10		(RKISP1_CIF_ISP_BASE + 0x0000004C)
-#define RKISP1_CIF_ISP_GAMMA_R_Y11		(RKISP1_CIF_ISP_BASE + 0x00000050)
-#define RKISP1_CIF_ISP_GAMMA_R_Y12		(RKISP1_CIF_ISP_BASE + 0x00000054)
-#define RKISP1_CIF_ISP_GAMMA_R_Y13		(RKISP1_CIF_ISP_BASE + 0x00000058)
-#define RKISP1_CIF_ISP_GAMMA_R_Y14		(RKISP1_CIF_ISP_BASE + 0x0000005C)
-#define RKISP1_CIF_ISP_GAMMA_R_Y15		(RKISP1_CIF_ISP_BASE + 0x00000060)
-#define RKISP1_CIF_ISP_GAMMA_R_Y16		(RKISP1_CIF_ISP_BASE + 0x00000064)
-#define RKISP1_CIF_ISP_GAMMA_G_Y0		(RKISP1_CIF_ISP_BASE + 0x00000068)
-#define RKISP1_CIF_ISP_GAMMA_G_Y1		(RKISP1_CIF_ISP_BASE + 0x0000006C)
-#define RKISP1_CIF_ISP_GAMMA_G_Y2		(RKISP1_CIF_ISP_BASE + 0x00000070)
-#define RKISP1_CIF_ISP_GAMMA_G_Y3		(RKISP1_CIF_ISP_BASE + 0x00000074)
-#define RKISP1_CIF_ISP_GAMMA_G_Y4		(RKISP1_CIF_ISP_BASE + 0x00000078)
-#define RKISP1_CIF_ISP_GAMMA_G_Y5		(RKISP1_CIF_ISP_BASE + 0x0000007C)
-#define RKISP1_CIF_ISP_GAMMA_G_Y6		(RKISP1_CIF_ISP_BASE + 0x00000080)
-#define RKISP1_CIF_ISP_GAMMA_G_Y7		(RKISP1_CIF_ISP_BASE + 0x00000084)
-#define RKISP1_CIF_ISP_GAMMA_G_Y8		(RKISP1_CIF_ISP_BASE + 0x00000088)
-#define RKISP1_CIF_ISP_GAMMA_G_Y9		(RKISP1_CIF_ISP_BASE + 0x0000008C)
-#define RKISP1_CIF_ISP_GAMMA_G_Y10		(RKISP1_CIF_ISP_BASE + 0x00000090)
-#define RKISP1_CIF_ISP_GAMMA_G_Y11		(RKISP1_CIF_ISP_BASE + 0x00000094)
-#define RKISP1_CIF_ISP_GAMMA_G_Y12		(RKISP1_CIF_ISP_BASE + 0x00000098)
-#define RKISP1_CIF_ISP_GAMMA_G_Y13		(RKISP1_CIF_ISP_BASE + 0x0000009C)
-#define RKISP1_CIF_ISP_GAMMA_G_Y14		(RKISP1_CIF_ISP_BASE + 0x000000A0)
-#define RKISP1_CIF_ISP_GAMMA_G_Y15		(RKISP1_CIF_ISP_BASE + 0x000000A4)
-#define RKISP1_CIF_ISP_GAMMA_G_Y16		(RKISP1_CIF_ISP_BASE + 0x000000A8)
-#define RKISP1_CIF_ISP_GAMMA_B_Y0		(RKISP1_CIF_ISP_BASE + 0x000000AC)
-#define RKISP1_CIF_ISP_GAMMA_B_Y1		(RKISP1_CIF_ISP_BASE + 0x000000B0)
-#define RKISP1_CIF_ISP_GAMMA_B_Y2		(RKISP1_CIF_ISP_BASE + 0x000000B4)
-#define RKISP1_CIF_ISP_GAMMA_B_Y3		(RKISP1_CIF_ISP_BASE + 0x000000B8)
-#define RKISP1_CIF_ISP_GAMMA_B_Y4		(RKISP1_CIF_ISP_BASE + 0x000000BC)
-#define RKISP1_CIF_ISP_GAMMA_B_Y5		(RKISP1_CIF_ISP_BASE + 0x000000C0)
-#define RKISP1_CIF_ISP_GAMMA_B_Y6		(RKISP1_CIF_ISP_BASE + 0x000000C4)
-#define RKISP1_CIF_ISP_GAMMA_B_Y7		(RKISP1_CIF_ISP_BASE + 0x000000C8)
-#define RKISP1_CIF_ISP_GAMMA_B_Y8		(RKISP1_CIF_ISP_BASE + 0x000000CC)
-#define RKISP1_CIF_ISP_GAMMA_B_Y9		(RKISP1_CIF_ISP_BASE + 0x000000D0)
-#define RKISP1_CIF_ISP_GAMMA_B_Y10		(RKISP1_CIF_ISP_BASE + 0x000000D4)
-#define RKISP1_CIF_ISP_GAMMA_B_Y11		(RKISP1_CIF_ISP_BASE + 0x000000D8)
-#define RKISP1_CIF_ISP_GAMMA_B_Y12		(RKISP1_CIF_ISP_BASE + 0x000000DC)
-#define RKISP1_CIF_ISP_GAMMA_B_Y13		(RKISP1_CIF_ISP_BASE + 0x000000E0)
-#define RKISP1_CIF_ISP_GAMMA_B_Y14		(RKISP1_CIF_ISP_BASE + 0x000000E4)
-#define RKISP1_CIF_ISP_GAMMA_B_Y15		(RKISP1_CIF_ISP_BASE + 0x000000E8)
-#define RKISP1_CIF_ISP_GAMMA_B_Y16		(RKISP1_CIF_ISP_BASE + 0x000000EC)
-#define RKISP1_CIF_ISP_AWB_PROP			(RKISP1_CIF_ISP_BASE + 0x00000110)
-#define RKISP1_CIF_ISP_AWB_WND_H_OFFS		(RKISP1_CIF_ISP_BASE + 0x00000114)
-#define RKISP1_CIF_ISP_AWB_WND_V_OFFS		(RKISP1_CIF_ISP_BASE + 0x00000118)
-#define RKISP1_CIF_ISP_AWB_WND_H_SIZE		(RKISP1_CIF_ISP_BASE + 0x0000011C)
-#define RKISP1_CIF_ISP_AWB_WND_V_SIZE		(RKISP1_CIF_ISP_BASE + 0x00000120)
-#define RKISP1_CIF_ISP_AWB_FRAMES		(RKISP1_CIF_ISP_BASE + 0x00000124)
-#define RKISP1_CIF_ISP_AWB_REF			(RKISP1_CIF_ISP_BASE + 0x00000128)
-#define RKISP1_CIF_ISP_AWB_THRESH		(RKISP1_CIF_ISP_BASE + 0x0000012C)
-#define RKISP1_CIF_ISP_AWB_GAIN_G		(RKISP1_CIF_ISP_BASE + 0x00000138)
-#define RKISP1_CIF_ISP_AWB_GAIN_RB		(RKISP1_CIF_ISP_BASE + 0x0000013C)
-#define RKISP1_CIF_ISP_AWB_WHITE_CNT		(RKISP1_CIF_ISP_BASE + 0x00000140)
-#define RKISP1_CIF_ISP_AWB_MEAN			(RKISP1_CIF_ISP_BASE + 0x00000144)
-#define RKISP1_CIF_ISP_CC_COEFF_0		(RKISP1_CIF_ISP_BASE + 0x00000170)
-#define RKISP1_CIF_ISP_CC_COEFF_1		(RKISP1_CIF_ISP_BASE + 0x00000174)
-#define RKISP1_CIF_ISP_CC_COEFF_2		(RKISP1_CIF_ISP_BASE + 0x00000178)
-#define RKISP1_CIF_ISP_CC_COEFF_3		(RKISP1_CIF_ISP_BASE + 0x0000017C)
-#define RKISP1_CIF_ISP_CC_COEFF_4		(RKISP1_CIF_ISP_BASE + 0x00000180)
-#define RKISP1_CIF_ISP_CC_COEFF_5		(RKISP1_CIF_ISP_BASE + 0x00000184)
-#define RKISP1_CIF_ISP_CC_COEFF_6		(RKISP1_CIF_ISP_BASE + 0x00000188)
-#define RKISP1_CIF_ISP_CC_COEFF_7		(RKISP1_CIF_ISP_BASE + 0x0000018C)
-#define RKISP1_CIF_ISP_CC_COEFF_8		(RKISP1_CIF_ISP_BASE + 0x00000190)
-#define RKISP1_CIF_ISP_OUT_H_OFFS		(RKISP1_CIF_ISP_BASE + 0x00000194)
-#define RKISP1_CIF_ISP_OUT_V_OFFS		(RKISP1_CIF_ISP_BASE + 0x00000198)
-#define RKISP1_CIF_ISP_OUT_H_SIZE		(RKISP1_CIF_ISP_BASE + 0x0000019C)
-#define RKISP1_CIF_ISP_OUT_V_SIZE		(RKISP1_CIF_ISP_BASE + 0x000001A0)
-#define RKISP1_CIF_ISP_DEMOSAIC			(RKISP1_CIF_ISP_BASE + 0x000001A4)
-#define RKISP1_CIF_ISP_FLAGS_SHD		(RKISP1_CIF_ISP_BASE + 0x000001A8)
-#define RKISP1_CIF_ISP_OUT_H_OFFS_SHD		(RKISP1_CIF_ISP_BASE + 0x000001AC)
-#define RKISP1_CIF_ISP_OUT_V_OFFS_SHD		(RKISP1_CIF_ISP_BASE + 0x000001B0)
-#define RKISP1_CIF_ISP_OUT_H_SIZE_SHD		(RKISP1_CIF_ISP_BASE + 0x000001B4)
-#define RKISP1_CIF_ISP_OUT_V_SIZE_SHD		(RKISP1_CIF_ISP_BASE + 0x000001B8)
-#define RKISP1_CIF_ISP_IMSC			(RKISP1_CIF_ISP_BASE + 0x000001BC)
-#define RKISP1_CIF_ISP_RIS			(RKISP1_CIF_ISP_BASE + 0x000001C0)
-#define RKISP1_CIF_ISP_MIS			(RKISP1_CIF_ISP_BASE + 0x000001C4)
-#define RKISP1_CIF_ISP_ICR			(RKISP1_CIF_ISP_BASE + 0x000001C8)
-#define RKISP1_CIF_ISP_ISR			(RKISP1_CIF_ISP_BASE + 0x000001CC)
-#define RKISP1_CIF_ISP_CT_COEFF_0		(RKISP1_CIF_ISP_BASE + 0x000001D0)
-#define RKISP1_CIF_ISP_CT_COEFF_1		(RKISP1_CIF_ISP_BASE + 0x000001D4)
-#define RKISP1_CIF_ISP_CT_COEFF_2		(RKISP1_CIF_ISP_BASE + 0x000001D8)
-#define RKISP1_CIF_ISP_CT_COEFF_3		(RKISP1_CIF_ISP_BASE + 0x000001DC)
-#define RKISP1_CIF_ISP_CT_COEFF_4		(RKISP1_CIF_ISP_BASE + 0x000001E0)
-#define RKISP1_CIF_ISP_CT_COEFF_5		(RKISP1_CIF_ISP_BASE + 0x000001E4)
-#define RKISP1_CIF_ISP_CT_COEFF_6		(RKISP1_CIF_ISP_BASE + 0x000001E8)
-#define RKISP1_CIF_ISP_CT_COEFF_7		(RKISP1_CIF_ISP_BASE + 0x000001EC)
-#define RKISP1_CIF_ISP_CT_COEFF_8		(RKISP1_CIF_ISP_BASE + 0x000001F0)
-#define RKISP1_CIF_ISP_GAMMA_OUT_MODE		(RKISP1_CIF_ISP_BASE + 0x000001F4)
-#define RKISP1_CIF_ISP_GAMMA_OUT_Y_0		(RKISP1_CIF_ISP_BASE + 0x000001F8)
-#define RKISP1_CIF_ISP_GAMMA_OUT_Y_1		(RKISP1_CIF_ISP_BASE + 0x000001FC)
-#define RKISP1_CIF_ISP_GAMMA_OUT_Y_2		(RKISP1_CIF_ISP_BASE + 0x00000200)
-#define RKISP1_CIF_ISP_GAMMA_OUT_Y_3		(RKISP1_CIF_ISP_BASE + 0x00000204)
-#define RKISP1_CIF_ISP_GAMMA_OUT_Y_4		(RKISP1_CIF_ISP_BASE + 0x00000208)
-#define RKISP1_CIF_ISP_GAMMA_OUT_Y_5		(RKISP1_CIF_ISP_BASE + 0x0000020C)
-#define RKISP1_CIF_ISP_GAMMA_OUT_Y_6		(RKISP1_CIF_ISP_BASE + 0x00000210)
-#define RKISP1_CIF_ISP_GAMMA_OUT_Y_7		(RKISP1_CIF_ISP_BASE + 0x00000214)
-#define RKISP1_CIF_ISP_GAMMA_OUT_Y_8		(RKISP1_CIF_ISP_BASE + 0x00000218)
-#define RKISP1_CIF_ISP_GAMMA_OUT_Y_9		(RKISP1_CIF_ISP_BASE + 0x0000021C)
-#define RKISP1_CIF_ISP_GAMMA_OUT_Y_10		(RKISP1_CIF_ISP_BASE + 0x00000220)
-#define RKISP1_CIF_ISP_GAMMA_OUT_Y_11		(RKISP1_CIF_ISP_BASE + 0x00000224)
-#define RKISP1_CIF_ISP_GAMMA_OUT_Y_12		(RKISP1_CIF_ISP_BASE + 0x00000228)
-#define RKISP1_CIF_ISP_GAMMA_OUT_Y_13		(RKISP1_CIF_ISP_BASE + 0x0000022C)
-#define RKISP1_CIF_ISP_GAMMA_OUT_Y_14		(RKISP1_CIF_ISP_BASE + 0x00000230)
-#define RKISP1_CIF_ISP_GAMMA_OUT_Y_15		(RKISP1_CIF_ISP_BASE + 0x00000234)
-#define RKISP1_CIF_ISP_GAMMA_OUT_Y_16		(RKISP1_CIF_ISP_BASE + 0x00000238)
-#define RKISP1_CIF_ISP_ERR			(RKISP1_CIF_ISP_BASE + 0x0000023C)
-#define RKISP1_CIF_ISP_ERR_CLR			(RKISP1_CIF_ISP_BASE + 0x00000240)
-#define RKISP1_CIF_ISP_FRAME_COUNT		(RKISP1_CIF_ISP_BASE + 0x00000244)
-#define RKISP1_CIF_ISP_CT_OFFSET_R		(RKISP1_CIF_ISP_BASE + 0x00000248)
-#define RKISP1_CIF_ISP_CT_OFFSET_G		(RKISP1_CIF_ISP_BASE + 0x0000024C)
-#define RKISP1_CIF_ISP_CT_OFFSET_B		(RKISP1_CIF_ISP_BASE + 0x00000250)
-
-#define RKISP1_CIF_ISP_FLASH_BASE		0x00000660
-#define RKISP1_CIF_ISP_FLASH_CMD		(RKISP1_CIF_ISP_FLASH_BASE + 0x00000000)
-#define RKISP1_CIF_ISP_FLASH_CONFIG		(RKISP1_CIF_ISP_FLASH_BASE + 0x00000004)
-#define RKISP1_CIF_ISP_FLASH_PREDIV		(RKISP1_CIF_ISP_FLASH_BASE + 0x00000008)
-#define RKISP1_CIF_ISP_FLASH_DELAY		(RKISP1_CIF_ISP_FLASH_BASE + 0x0000000C)
-#define RKISP1_CIF_ISP_FLASH_TIME		(RKISP1_CIF_ISP_FLASH_BASE + 0x00000010)
-#define RKISP1_CIF_ISP_FLASH_MAXP		(RKISP1_CIF_ISP_FLASH_BASE + 0x00000014)
-
-#define RKISP1_CIF_ISP_SH_BASE			0x00000680
-#define RKISP1_CIF_ISP_SH_CTRL			(RKISP1_CIF_ISP_SH_BASE + 0x00000000)
-#define RKISP1_CIF_ISP_SH_PREDIV		(RKISP1_CIF_ISP_SH_BASE + 0x00000004)
-#define RKISP1_CIF_ISP_SH_DELAY			(RKISP1_CIF_ISP_SH_BASE + 0x00000008)
-#define RKISP1_CIF_ISP_SH_TIME			(RKISP1_CIF_ISP_SH_BASE + 0x0000000C)
-
-#define RKISP1_CIF_C_PROC_BASE			0x00000800
-#define RKISP1_CIF_C_PROC_CTRL			(RKISP1_CIF_C_PROC_BASE + 0x00000000)
-#define RKISP1_CIF_C_PROC_CONTRAST		(RKISP1_CIF_C_PROC_BASE + 0x00000004)
-#define RKISP1_CIF_C_PROC_BRIGHTNESS		(RKISP1_CIF_C_PROC_BASE + 0x00000008)
-#define RKISP1_CIF_C_PROC_SATURATION		(RKISP1_CIF_C_PROC_BASE + 0x0000000C)
-#define RKISP1_CIF_C_PROC_HUE			(RKISP1_CIF_C_PROC_BASE + 0x00000010)
-
-#define RKISP1_CIF_DUAL_CROP_BASE		0x00000880
-#define RKISP1_CIF_DUAL_CROP_CTRL		(RKISP1_CIF_DUAL_CROP_BASE + 0x00000000)
-#define RKISP1_CIF_DUAL_CROP_M_H_OFFS		(RKISP1_CIF_DUAL_CROP_BASE + 0x00000004)
-#define RKISP1_CIF_DUAL_CROP_M_V_OFFS		(RKISP1_CIF_DUAL_CROP_BASE + 0x00000008)
-#define RKISP1_CIF_DUAL_CROP_M_H_SIZE		(RKISP1_CIF_DUAL_CROP_BASE + 0x0000000C)
-#define RKISP1_CIF_DUAL_CROP_M_V_SIZE		(RKISP1_CIF_DUAL_CROP_BASE + 0x00000010)
-#define RKISP1_CIF_DUAL_CROP_S_H_OFFS		(RKISP1_CIF_DUAL_CROP_BASE + 0x00000014)
-#define RKISP1_CIF_DUAL_CROP_S_V_OFFS		(RKISP1_CIF_DUAL_CROP_BASE + 0x00000018)
-#define RKISP1_CIF_DUAL_CROP_S_H_SIZE		(RKISP1_CIF_DUAL_CROP_BASE + 0x0000001C)
-#define RKISP1_CIF_DUAL_CROP_S_V_SIZE		(RKISP1_CIF_DUAL_CROP_BASE + 0x00000020)
-#define RKISP1_CIF_DUAL_CROP_M_H_OFFS_SHD	(RKISP1_CIF_DUAL_CROP_BASE + 0x00000024)
-#define RKISP1_CIF_DUAL_CROP_M_V_OFFS_SHD	(RKISP1_CIF_DUAL_CROP_BASE + 0x00000028)
-#define RKISP1_CIF_DUAL_CROP_M_H_SIZE_SHD	(RKISP1_CIF_DUAL_CROP_BASE + 0x0000002C)
-#define RKISP1_CIF_DUAL_CROP_M_V_SIZE_SHD	(RKISP1_CIF_DUAL_CROP_BASE + 0x00000030)
-#define RKISP1_CIF_DUAL_CROP_S_H_OFFS_SHD	(RKISP1_CIF_DUAL_CROP_BASE + 0x00000034)
-#define RKISP1_CIF_DUAL_CROP_S_V_OFFS_SHD	(RKISP1_CIF_DUAL_CROP_BASE + 0x00000038)
-#define RKISP1_CIF_DUAL_CROP_S_H_SIZE_SHD	(RKISP1_CIF_DUAL_CROP_BASE + 0x0000003C)
-#define RKISP1_CIF_DUAL_CROP_S_V_SIZE_SHD	(RKISP1_CIF_DUAL_CROP_BASE + 0x00000040)
-
-#define RKISP1_CIF_MRSZ_BASE			0x00000C00
-#define RKISP1_CIF_MRSZ_CTRL			(RKISP1_CIF_MRSZ_BASE + 0x00000000)
-#define RKISP1_CIF_MRSZ_SCALE_HY		(RKISP1_CIF_MRSZ_BASE + 0x00000004)
-#define RKISP1_CIF_MRSZ_SCALE_HCB		(RKISP1_CIF_MRSZ_BASE + 0x00000008)
-#define RKISP1_CIF_MRSZ_SCALE_HCR		(RKISP1_CIF_MRSZ_BASE + 0x0000000C)
-#define RKISP1_CIF_MRSZ_SCALE_VY		(RKISP1_CIF_MRSZ_BASE + 0x00000010)
-#define RKISP1_CIF_MRSZ_SCALE_VC		(RKISP1_CIF_MRSZ_BASE + 0x00000014)
-#define RKISP1_CIF_MRSZ_PHASE_HY		(RKISP1_CIF_MRSZ_BASE + 0x00000018)
-#define RKISP1_CIF_MRSZ_PHASE_HC		(RKISP1_CIF_MRSZ_BASE + 0x0000001C)
-#define RKISP1_CIF_MRSZ_PHASE_VY		(RKISP1_CIF_MRSZ_BASE + 0x00000020)
-#define RKISP1_CIF_MRSZ_PHASE_VC		(RKISP1_CIF_MRSZ_BASE + 0x00000024)
-#define RKISP1_CIF_MRSZ_SCALE_LUT_ADDR		(RKISP1_CIF_MRSZ_BASE + 0x00000028)
-#define RKISP1_CIF_MRSZ_SCALE_LUT		(RKISP1_CIF_MRSZ_BASE + 0x0000002C)
-#define RKISP1_CIF_MRSZ_CTRL_SHD		(RKISP1_CIF_MRSZ_BASE + 0x00000030)
-#define RKISP1_CIF_MRSZ_SCALE_HY_SHD		(RKISP1_CIF_MRSZ_BASE + 0x00000034)
-#define RKISP1_CIF_MRSZ_SCALE_HCB_SHD		(RKISP1_CIF_MRSZ_BASE + 0x00000038)
-#define RKISP1_CIF_MRSZ_SCALE_HCR_SHD		(RKISP1_CIF_MRSZ_BASE + 0x0000003C)
-#define RKISP1_CIF_MRSZ_SCALE_VY_SHD		(RKISP1_CIF_MRSZ_BASE + 0x00000040)
-#define RKISP1_CIF_MRSZ_SCALE_VC_SHD		(RKISP1_CIF_MRSZ_BASE + 0x00000044)
-#define RKISP1_CIF_MRSZ_PHASE_HY_SHD		(RKISP1_CIF_MRSZ_BASE + 0x00000048)
-#define RKISP1_CIF_MRSZ_PHASE_HC_SHD		(RKISP1_CIF_MRSZ_BASE + 0x0000004C)
-#define RKISP1_CIF_MRSZ_PHASE_VY_SHD		(RKISP1_CIF_MRSZ_BASE + 0x00000050)
-#define RKISP1_CIF_MRSZ_PHASE_VC_SHD		(RKISP1_CIF_MRSZ_BASE + 0x00000054)
-
-#define RKISP1_CIF_SRSZ_BASE			0x00001000
-#define RKISP1_CIF_SRSZ_CTRL			(RKISP1_CIF_SRSZ_BASE + 0x00000000)
-#define RKISP1_CIF_SRSZ_SCALE_HY		(RKISP1_CIF_SRSZ_BASE + 0x00000004)
-#define RKISP1_CIF_SRSZ_SCALE_HCB		(RKISP1_CIF_SRSZ_BASE + 0x00000008)
-#define RKISP1_CIF_SRSZ_SCALE_HCR		(RKISP1_CIF_SRSZ_BASE + 0x0000000C)
-#define RKISP1_CIF_SRSZ_SCALE_VY		(RKISP1_CIF_SRSZ_BASE + 0x00000010)
-#define RKISP1_CIF_SRSZ_SCALE_VC		(RKISP1_CIF_SRSZ_BASE + 0x00000014)
-#define RKISP1_CIF_SRSZ_PHASE_HY		(RKISP1_CIF_SRSZ_BASE + 0x00000018)
-#define RKISP1_CIF_SRSZ_PHASE_HC		(RKISP1_CIF_SRSZ_BASE + 0x0000001C)
-#define RKISP1_CIF_SRSZ_PHASE_VY		(RKISP1_CIF_SRSZ_BASE + 0x00000020)
-#define RKISP1_CIF_SRSZ_PHASE_VC		(RKISP1_CIF_SRSZ_BASE + 0x00000024)
-#define RKISP1_CIF_SRSZ_SCALE_LUT_ADDR		(RKISP1_CIF_SRSZ_BASE + 0x00000028)
-#define RKISP1_CIF_SRSZ_SCALE_LUT		(RKISP1_CIF_SRSZ_BASE + 0x0000002C)
-#define RKISP1_CIF_SRSZ_CTRL_SHD		(RKISP1_CIF_SRSZ_BASE + 0x00000030)
-#define RKISP1_CIF_SRSZ_SCALE_HY_SHD		(RKISP1_CIF_SRSZ_BASE + 0x00000034)
-#define RKISP1_CIF_SRSZ_SCALE_HCB_SHD		(RKISP1_CIF_SRSZ_BASE + 0x00000038)
-#define RKISP1_CIF_SRSZ_SCALE_HCR_SHD		(RKISP1_CIF_SRSZ_BASE + 0x0000003C)
-#define RKISP1_CIF_SRSZ_SCALE_VY_SHD		(RKISP1_CIF_SRSZ_BASE + 0x00000040)
-#define RKISP1_CIF_SRSZ_SCALE_VC_SHD		(RKISP1_CIF_SRSZ_BASE + 0x00000044)
-#define RKISP1_CIF_SRSZ_PHASE_HY_SHD		(RKISP1_CIF_SRSZ_BASE + 0x00000048)
-#define RKISP1_CIF_SRSZ_PHASE_HC_SHD		(RKISP1_CIF_SRSZ_BASE + 0x0000004C)
-#define RKISP1_CIF_SRSZ_PHASE_VY_SHD		(RKISP1_CIF_SRSZ_BASE + 0x00000050)
-#define RKISP1_CIF_SRSZ_PHASE_VC_SHD		(RKISP1_CIF_SRSZ_BASE + 0x00000054)
-
-#define RKISP1_CIF_MI_BASE			0x00001400
-#define RKISP1_CIF_MI_CTRL			(RKISP1_CIF_MI_BASE + 0x00000000)
-#define RKISP1_CIF_MI_INIT			(RKISP1_CIF_MI_BASE + 0x00000004)
-#define RKISP1_CIF_MI_MP_Y_BASE_AD_INIT		(RKISP1_CIF_MI_BASE + 0x00000008)
-#define RKISP1_CIF_MI_MP_Y_SIZE_INIT		(RKISP1_CIF_MI_BASE + 0x0000000C)
-#define RKISP1_CIF_MI_MP_Y_OFFS_CNT_INIT	(RKISP1_CIF_MI_BASE + 0x00000010)
-#define RKISP1_CIF_MI_MP_Y_OFFS_CNT_START	(RKISP1_CIF_MI_BASE + 0x00000014)
-#define RKISP1_CIF_MI_MP_Y_IRQ_OFFS_INIT	(RKISP1_CIF_MI_BASE + 0x00000018)
-#define RKISP1_CIF_MI_MP_CB_BASE_AD_INIT	(RKISP1_CIF_MI_BASE + 0x0000001C)
-#define RKISP1_CIF_MI_MP_CB_SIZE_INIT		(RKISP1_CIF_MI_BASE + 0x00000020)
-#define RKISP1_CIF_MI_MP_CB_OFFS_CNT_INIT	(RKISP1_CIF_MI_BASE + 0x00000024)
-#define RKISP1_CIF_MI_MP_CB_OFFS_CNT_START	(RKISP1_CIF_MI_BASE + 0x00000028)
-#define RKISP1_CIF_MI_MP_CR_BASE_AD_INIT	(RKISP1_CIF_MI_BASE + 0x0000002C)
-#define RKISP1_CIF_MI_MP_CR_SIZE_INIT		(RKISP1_CIF_MI_BASE + 0x00000030)
-#define RKISP1_CIF_MI_MP_CR_OFFS_CNT_INIT	(RKISP1_CIF_MI_BASE + 0x00000034)
-#define RKISP1_CIF_MI_MP_CR_OFFS_CNT_START	(RKISP1_CIF_MI_BASE + 0x00000038)
-#define RKISP1_CIF_MI_SP_Y_BASE_AD_INIT		(RKISP1_CIF_MI_BASE + 0x0000003C)
-#define RKISP1_CIF_MI_SP_Y_SIZE_INIT		(RKISP1_CIF_MI_BASE + 0x00000040)
-#define RKISP1_CIF_MI_SP_Y_OFFS_CNT_INIT	(RKISP1_CIF_MI_BASE + 0x00000044)
-#define RKISP1_CIF_MI_SP_Y_OFFS_CNT_START	(RKISP1_CIF_MI_BASE + 0x00000048)
-#define RKISP1_CIF_MI_SP_Y_LLENGTH		(RKISP1_CIF_MI_BASE + 0x0000004C)
-#define RKISP1_CIF_MI_SP_CB_BASE_AD_INIT	(RKISP1_CIF_MI_BASE + 0x00000050)
-#define RKISP1_CIF_MI_SP_CB_SIZE_INIT		(RKISP1_CIF_MI_BASE + 0x00000054)
-#define RKISP1_CIF_MI_SP_CB_OFFS_CNT_INIT	(RKISP1_CIF_MI_BASE + 0x00000058)
-#define RKISP1_CIF_MI_SP_CB_OFFS_CNT_START	(RKISP1_CIF_MI_BASE + 0x0000005C)
-#define RKISP1_CIF_MI_SP_CR_BASE_AD_INIT	(RKISP1_CIF_MI_BASE + 0x00000060)
-#define RKISP1_CIF_MI_SP_CR_SIZE_INIT		(RKISP1_CIF_MI_BASE + 0x00000064)
-#define RKISP1_CIF_MI_SP_CR_OFFS_CNT_INIT	(RKISP1_CIF_MI_BASE + 0x00000068)
-#define RKISP1_CIF_MI_SP_CR_OFFS_CNT_START	(RKISP1_CIF_MI_BASE + 0x0000006C)
-#define RKISP1_CIF_MI_BYTE_CNT			(RKISP1_CIF_MI_BASE + 0x00000070)
-#define RKISP1_CIF_MI_CTRL_SHD			(RKISP1_CIF_MI_BASE + 0x00000074)
-#define RKISP1_CIF_MI_MP_Y_BASE_AD_SHD		(RKISP1_CIF_MI_BASE + 0x00000078)
-#define RKISP1_CIF_MI_MP_Y_SIZE_SHD		(RKISP1_CIF_MI_BASE + 0x0000007C)
-#define RKISP1_CIF_MI_MP_Y_OFFS_CNT_SHD		(RKISP1_CIF_MI_BASE + 0x00000080)
-#define RKISP1_CIF_MI_MP_Y_IRQ_OFFS_SHD		(RKISP1_CIF_MI_BASE + 0x00000084)
-#define RKISP1_CIF_MI_MP_CB_BASE_AD_SHD		(RKISP1_CIF_MI_BASE + 0x00000088)
-#define RKISP1_CIF_MI_MP_CB_SIZE_SHD		(RKISP1_CIF_MI_BASE + 0x0000008C)
-#define RKISP1_CIF_MI_MP_CB_OFFS_CNT_SHD	(RKISP1_CIF_MI_BASE + 0x00000090)
-#define RKISP1_CIF_MI_MP_CR_BASE_AD_SHD		(RKISP1_CIF_MI_BASE + 0x00000094)
-#define RKISP1_CIF_MI_MP_CR_SIZE_SHD		(RKISP1_CIF_MI_BASE + 0x00000098)
-#define RKISP1_CIF_MI_MP_CR_OFFS_CNT_SHD	(RKISP1_CIF_MI_BASE + 0x0000009C)
-#define RKISP1_CIF_MI_SP_Y_BASE_AD_SHD		(RKISP1_CIF_MI_BASE + 0x000000A0)
-#define RKISP1_CIF_MI_SP_Y_SIZE_SHD		(RKISP1_CIF_MI_BASE + 0x000000A4)
-#define RKISP1_CIF_MI_SP_Y_OFFS_CNT_SHD		(RKISP1_CIF_MI_BASE + 0x000000A8)
-#define RKISP1_CIF_MI_SP_CB_BASE_AD_SHD		(RKISP1_CIF_MI_BASE + 0x000000B0)
-#define RKISP1_CIF_MI_SP_CB_SIZE_SHD		(RKISP1_CIF_MI_BASE + 0x000000B4)
-#define RKISP1_CIF_MI_SP_CB_OFFS_CNT_SHD	(RKISP1_CIF_MI_BASE + 0x000000B8)
-#define RKISP1_CIF_MI_SP_CR_BASE_AD_SHD		(RKISP1_CIF_MI_BASE + 0x000000BC)
-#define RKISP1_CIF_MI_SP_CR_SIZE_SHD		(RKISP1_CIF_MI_BASE + 0x000000C0)
-#define RKISP1_CIF_MI_SP_CR_OFFS_CNT_SHD	(RKISP1_CIF_MI_BASE + 0x000000C4)
-#define RKISP1_CIF_MI_DMA_Y_PIC_START_AD	(RKISP1_CIF_MI_BASE + 0x000000C8)
-#define RKISP1_CIF_MI_DMA_Y_PIC_WIDTH		(RKISP1_CIF_MI_BASE + 0x000000CC)
-#define RKISP1_CIF_MI_DMA_Y_LLENGTH		(RKISP1_CIF_MI_BASE + 0x000000D0)
-#define RKISP1_CIF_MI_DMA_Y_PIC_SIZE		(RKISP1_CIF_MI_BASE + 0x000000D4)
-#define RKISP1_CIF_MI_DMA_CB_PIC_START_AD	(RKISP1_CIF_MI_BASE + 0x000000D8)
-#define RKISP1_CIF_MI_DMA_CR_PIC_START_AD	(RKISP1_CIF_MI_BASE + 0x000000E8)
-#define RKISP1_CIF_MI_IMSC			(RKISP1_CIF_MI_BASE + 0x000000F8)
-#define RKISP1_CIF_MI_RIS			(RKISP1_CIF_MI_BASE + 0x000000FC)
-#define RKISP1_CIF_MI_MIS			(RKISP1_CIF_MI_BASE + 0x00000100)
-#define RKISP1_CIF_MI_ICR			(RKISP1_CIF_MI_BASE + 0x00000104)
-#define RKISP1_CIF_MI_ISR			(RKISP1_CIF_MI_BASE + 0x00000108)
-#define RKISP1_CIF_MI_STATUS			(RKISP1_CIF_MI_BASE + 0x0000010C)
-#define RKISP1_CIF_MI_STATUS_CLR		(RKISP1_CIF_MI_BASE + 0x00000110)
-#define RKISP1_CIF_MI_SP_Y_PIC_WIDTH		(RKISP1_CIF_MI_BASE + 0x00000114)
-#define RKISP1_CIF_MI_SP_Y_PIC_HEIGHT		(RKISP1_CIF_MI_BASE + 0x00000118)
-#define RKISP1_CIF_MI_SP_Y_PIC_SIZE		(RKISP1_CIF_MI_BASE + 0x0000011C)
-#define RKISP1_CIF_MI_DMA_CTRL			(RKISP1_CIF_MI_BASE + 0x00000120)
-#define RKISP1_CIF_MI_DMA_START			(RKISP1_CIF_MI_BASE + 0x00000124)
-#define RKISP1_CIF_MI_DMA_STATUS		(RKISP1_CIF_MI_BASE + 0x00000128)
-#define RKISP1_CIF_MI_PIXEL_COUNT		(RKISP1_CIF_MI_BASE + 0x0000012C)
-#define RKISP1_CIF_MI_MP_Y_BASE_AD_INIT2	(RKISP1_CIF_MI_BASE + 0x00000130)
-#define RKISP1_CIF_MI_MP_CB_BASE_AD_INIT2	(RKISP1_CIF_MI_BASE + 0x00000134)
-#define RKISP1_CIF_MI_MP_CR_BASE_AD_INIT2	(RKISP1_CIF_MI_BASE + 0x00000138)
-#define RKISP1_CIF_MI_SP_Y_BASE_AD_INIT2	(RKISP1_CIF_MI_BASE + 0x0000013C)
-#define RKISP1_CIF_MI_SP_CB_BASE_AD_INIT2	(RKISP1_CIF_MI_BASE + 0x00000140)
-#define RKISP1_CIF_MI_SP_CR_BASE_AD_INIT2	(RKISP1_CIF_MI_BASE + 0x00000144)
-#define RKISP1_CIF_MI_XTD_FORMAT_CTRL		(RKISP1_CIF_MI_BASE + 0x00000148)
-
-#define RKISP1_CIF_SMIA_BASE			0x00001A00
-#define RKISP1_CIF_SMIA_CTRL			(RKISP1_CIF_SMIA_BASE + 0x00000000)
-#define RKISP1_CIF_SMIA_STATUS			(RKISP1_CIF_SMIA_BASE + 0x00000004)
-#define RKISP1_CIF_SMIA_IMSC			(RKISP1_CIF_SMIA_BASE + 0x00000008)
-#define RKISP1_CIF_SMIA_RIS			(RKISP1_CIF_SMIA_BASE + 0x0000000C)
-#define RKISP1_CIF_SMIA_MIS			(RKISP1_CIF_SMIA_BASE + 0x00000010)
-#define RKISP1_CIF_SMIA_ICR			(RKISP1_CIF_SMIA_BASE + 0x00000014)
-#define RKISP1_CIF_SMIA_ISR			(RKISP1_CIF_SMIA_BASE + 0x00000018)
-#define RKISP1_CIF_SMIA_DATA_FORMAT_SEL		(RKISP1_CIF_SMIA_BASE + 0x0000001C)
-#define RKISP1_CIF_SMIA_SOF_EMB_DATA_LINES	(RKISP1_CIF_SMIA_BASE + 0x00000020)
-#define RKISP1_CIF_SMIA_EMB_HSTART		(RKISP1_CIF_SMIA_BASE + 0x00000024)
-#define RKISP1_CIF_SMIA_EMB_HSIZE		(RKISP1_CIF_SMIA_BASE + 0x00000028)
-#define RKISP1_CIF_SMIA_EMB_VSTART		(RKISP1_CIF_SMIA_BASE + 0x0000002c)
-#define RKISP1_CIF_SMIA_NUM_LINES		(RKISP1_CIF_SMIA_BASE + 0x00000030)
-#define RKISP1_CIF_SMIA_EMB_DATA_FIFO		(RKISP1_CIF_SMIA_BASE + 0x00000034)
-#define RKISP1_CIF_SMIA_EMB_DATA_WATERMARK	(RKISP1_CIF_SMIA_BASE + 0x00000038)
-
-#define RKISP1_CIF_MIPI_BASE			0x00001C00
-#define RKISP1_CIF_MIPI_CTRL			(RKISP1_CIF_MIPI_BASE + 0x00000000)
-#define RKISP1_CIF_MIPI_STATUS			(RKISP1_CIF_MIPI_BASE + 0x00000004)
-#define RKISP1_CIF_MIPI_IMSC			(RKISP1_CIF_MIPI_BASE + 0x00000008)
-#define RKISP1_CIF_MIPI_RIS			(RKISP1_CIF_MIPI_BASE + 0x0000000C)
-#define RKISP1_CIF_MIPI_MIS			(RKISP1_CIF_MIPI_BASE + 0x00000010)
-#define RKISP1_CIF_MIPI_ICR			(RKISP1_CIF_MIPI_BASE + 0x00000014)
-#define RKISP1_CIF_MIPI_ISR			(RKISP1_CIF_MIPI_BASE + 0x00000018)
-#define RKISP1_CIF_MIPI_CUR_DATA_ID		(RKISP1_CIF_MIPI_BASE + 0x0000001C)
-#define RKISP1_CIF_MIPI_IMG_DATA_SEL		(RKISP1_CIF_MIPI_BASE + 0x00000020)
-#define RKISP1_CIF_MIPI_ADD_DATA_SEL_1		(RKISP1_CIF_MIPI_BASE + 0x00000024)
-#define RKISP1_CIF_MIPI_ADD_DATA_SEL_2		(RKISP1_CIF_MIPI_BASE + 0x00000028)
-#define RKISP1_CIF_MIPI_ADD_DATA_SEL_3		(RKISP1_CIF_MIPI_BASE + 0x0000002C)
-#define RKISP1_CIF_MIPI_ADD_DATA_SEL_4		(RKISP1_CIF_MIPI_BASE + 0x00000030)
-#define RKISP1_CIF_MIPI_ADD_DATA_FIFO		(RKISP1_CIF_MIPI_BASE + 0x00000034)
-#define RKISP1_CIF_MIPI_FIFO_FILL_LEVEL		(RKISP1_CIF_MIPI_BASE + 0x00000038)
-#define RKISP1_CIF_MIPI_COMPRESSED_MODE		(RKISP1_CIF_MIPI_BASE + 0x0000003C)
-#define RKISP1_CIF_MIPI_FRAME			(RKISP1_CIF_MIPI_BASE + 0x00000040)
-#define RKISP1_CIF_MIPI_GEN_SHORT_DT		(RKISP1_CIF_MIPI_BASE + 0x00000044)
-#define RKISP1_CIF_MIPI_GEN_SHORT_8_9		(RKISP1_CIF_MIPI_BASE + 0x00000048)
-#define RKISP1_CIF_MIPI_GEN_SHORT_A_B		(RKISP1_CIF_MIPI_BASE + 0x0000004C)
-#define RKISP1_CIF_MIPI_GEN_SHORT_C_D		(RKISP1_CIF_MIPI_BASE + 0x00000050)
-#define RKISP1_CIF_MIPI_GEN_SHORT_E_F		(RKISP1_CIF_MIPI_BASE + 0x00000054)
-
-#define RKISP1_CIF_ISP_AFM_BASE			0x00002000
-#define RKISP1_CIF_ISP_AFM_CTRL			(RKISP1_CIF_ISP_AFM_BASE + 0x00000000)
-#define RKISP1_CIF_ISP_AFM_LT_A			(RKISP1_CIF_ISP_AFM_BASE + 0x00000004)
-#define RKISP1_CIF_ISP_AFM_RB_A			(RKISP1_CIF_ISP_AFM_BASE + 0x00000008)
-#define RKISP1_CIF_ISP_AFM_LT_B			(RKISP1_CIF_ISP_AFM_BASE + 0x0000000C)
-#define RKISP1_CIF_ISP_AFM_RB_B			(RKISP1_CIF_ISP_AFM_BASE + 0x00000010)
-#define RKISP1_CIF_ISP_AFM_LT_C			(RKISP1_CIF_ISP_AFM_BASE + 0x00000014)
-#define RKISP1_CIF_ISP_AFM_RB_C			(RKISP1_CIF_ISP_AFM_BASE + 0x00000018)
-#define RKISP1_CIF_ISP_AFM_THRES		(RKISP1_CIF_ISP_AFM_BASE + 0x0000001C)
-#define RKISP1_CIF_ISP_AFM_VAR_SHIFT		(RKISP1_CIF_ISP_AFM_BASE + 0x00000020)
-#define RKISP1_CIF_ISP_AFM_SUM_A		(RKISP1_CIF_ISP_AFM_BASE + 0x00000024)
-#define RKISP1_CIF_ISP_AFM_SUM_B		(RKISP1_CIF_ISP_AFM_BASE + 0x00000028)
-#define RKISP1_CIF_ISP_AFM_SUM_C		(RKISP1_CIF_ISP_AFM_BASE + 0x0000002C)
-#define RKISP1_CIF_ISP_AFM_LUM_A		(RKISP1_CIF_ISP_AFM_BASE + 0x00000030)
-#define RKISP1_CIF_ISP_AFM_LUM_B		(RKISP1_CIF_ISP_AFM_BASE + 0x00000034)
-#define RKISP1_CIF_ISP_AFM_LUM_C		(RKISP1_CIF_ISP_AFM_BASE + 0x00000038)
-
-#define RKISP1_CIF_ISP_LSC_BASE			0x00002200
-#define RKISP1_CIF_ISP_LSC_CTRL			(RKISP1_CIF_ISP_LSC_BASE + 0x00000000)
-#define RKISP1_CIF_ISP_LSC_R_TABLE_ADDR		(RKISP1_CIF_ISP_LSC_BASE + 0x00000004)
-#define RKISP1_CIF_ISP_LSC_GR_TABLE_ADDR	(RKISP1_CIF_ISP_LSC_BASE + 0x00000008)
-#define RKISP1_CIF_ISP_LSC_B_TABLE_ADDR		(RKISP1_CIF_ISP_LSC_BASE + 0x0000000C)
-#define RKISP1_CIF_ISP_LSC_GB_TABLE_ADDR	(RKISP1_CIF_ISP_LSC_BASE + 0x00000010)
-#define RKISP1_CIF_ISP_LSC_R_TABLE_DATA		(RKISP1_CIF_ISP_LSC_BASE + 0x00000014)
-#define RKISP1_CIF_ISP_LSC_GR_TABLE_DATA	(RKISP1_CIF_ISP_LSC_BASE + 0x00000018)
-#define RKISP1_CIF_ISP_LSC_B_TABLE_DATA		(RKISP1_CIF_ISP_LSC_BASE + 0x0000001C)
-#define RKISP1_CIF_ISP_LSC_GB_TABLE_DATA	(RKISP1_CIF_ISP_LSC_BASE + 0x00000020)
-#define RKISP1_CIF_ISP_LSC_XGRAD_01		(RKISP1_CIF_ISP_LSC_BASE + 0x00000024)
-#define RKISP1_CIF_ISP_LSC_XGRAD_23		(RKISP1_CIF_ISP_LSC_BASE + 0x00000028)
-#define RKISP1_CIF_ISP_LSC_XGRAD_45		(RKISP1_CIF_ISP_LSC_BASE + 0x0000002C)
-#define RKISP1_CIF_ISP_LSC_XGRAD_67		(RKISP1_CIF_ISP_LSC_BASE + 0x00000030)
-#define RKISP1_CIF_ISP_LSC_YGRAD_01		(RKISP1_CIF_ISP_LSC_BASE + 0x00000034)
-#define RKISP1_CIF_ISP_LSC_YGRAD_23		(RKISP1_CIF_ISP_LSC_BASE + 0x00000038)
-#define RKISP1_CIF_ISP_LSC_YGRAD_45		(RKISP1_CIF_ISP_LSC_BASE + 0x0000003C)
-#define RKISP1_CIF_ISP_LSC_YGRAD_67		(RKISP1_CIF_ISP_LSC_BASE + 0x00000040)
-#define RKISP1_CIF_ISP_LSC_XSIZE_01		(RKISP1_CIF_ISP_LSC_BASE + 0x00000044)
-#define RKISP1_CIF_ISP_LSC_XSIZE_23		(RKISP1_CIF_ISP_LSC_BASE + 0x00000048)
-#define RKISP1_CIF_ISP_LSC_XSIZE_45		(RKISP1_CIF_ISP_LSC_BASE + 0x0000004C)
-#define RKISP1_CIF_ISP_LSC_XSIZE_67		(RKISP1_CIF_ISP_LSC_BASE + 0x00000050)
-#define RKISP1_CIF_ISP_LSC_YSIZE_01		(RKISP1_CIF_ISP_LSC_BASE + 0x00000054)
-#define RKISP1_CIF_ISP_LSC_YSIZE_23		(RKISP1_CIF_ISP_LSC_BASE + 0x00000058)
-#define RKISP1_CIF_ISP_LSC_YSIZE_45		(RKISP1_CIF_ISP_LSC_BASE + 0x0000005C)
-#define RKISP1_CIF_ISP_LSC_YSIZE_67		(RKISP1_CIF_ISP_LSC_BASE + 0x00000060)
-#define RKISP1_CIF_ISP_LSC_TABLE_SEL		(RKISP1_CIF_ISP_LSC_BASE + 0x00000064)
-#define RKISP1_CIF_ISP_LSC_STATUS		(RKISP1_CIF_ISP_LSC_BASE + 0x00000068)
-
-#define RKISP1_CIF_ISP_IS_BASE			0x00002300
-#define RKISP1_CIF_ISP_IS_CTRL			(RKISP1_CIF_ISP_IS_BASE + 0x00000000)
-#define RKISP1_CIF_ISP_IS_RECENTER		(RKISP1_CIF_ISP_IS_BASE + 0x00000004)
-#define RKISP1_CIF_ISP_IS_H_OFFS		(RKISP1_CIF_ISP_IS_BASE + 0x00000008)
-#define RKISP1_CIF_ISP_IS_V_OFFS		(RKISP1_CIF_ISP_IS_BASE + 0x0000000C)
-#define RKISP1_CIF_ISP_IS_H_SIZE		(RKISP1_CIF_ISP_IS_BASE + 0x00000010)
-#define RKISP1_CIF_ISP_IS_V_SIZE		(RKISP1_CIF_ISP_IS_BASE + 0x00000014)
-#define RKISP1_CIF_ISP_IS_MAX_DX		(RKISP1_CIF_ISP_IS_BASE + 0x00000018)
-#define RKISP1_CIF_ISP_IS_MAX_DY		(RKISP1_CIF_ISP_IS_BASE + 0x0000001C)
-#define RKISP1_CIF_ISP_IS_DISPLACE		(RKISP1_CIF_ISP_IS_BASE + 0x00000020)
-#define RKISP1_CIF_ISP_IS_H_OFFS_SHD		(RKISP1_CIF_ISP_IS_BASE + 0x00000024)
-#define RKISP1_CIF_ISP_IS_V_OFFS_SHD		(RKISP1_CIF_ISP_IS_BASE + 0x00000028)
-#define RKISP1_CIF_ISP_IS_H_SIZE_SHD		(RKISP1_CIF_ISP_IS_BASE + 0x0000002C)
-#define RKISP1_CIF_ISP_IS_V_SIZE_SHD		(RKISP1_CIF_ISP_IS_BASE + 0x00000030)
-
-#define RKISP1_CIF_ISP_HIST_BASE		0x00002400
-
-#define RKISP1_CIF_ISP_HIST_PROP		(RKISP1_CIF_ISP_HIST_BASE + 0x00000000)
-#define RKISP1_CIF_ISP_HIST_H_OFFS		(RKISP1_CIF_ISP_HIST_BASE + 0x00000004)
-#define RKISP1_CIF_ISP_HIST_V_OFFS		(RKISP1_CIF_ISP_HIST_BASE + 0x00000008)
-#define RKISP1_CIF_ISP_HIST_H_SIZE		(RKISP1_CIF_ISP_HIST_BASE + 0x0000000C)
-#define RKISP1_CIF_ISP_HIST_V_SIZE		(RKISP1_CIF_ISP_HIST_BASE + 0x00000010)
-#define RKISP1_CIF_ISP_HIST_BIN_0		(RKISP1_CIF_ISP_HIST_BASE + 0x00000014)
-#define RKISP1_CIF_ISP_HIST_BIN_1		(RKISP1_CIF_ISP_HIST_BASE + 0x00000018)
-#define RKISP1_CIF_ISP_HIST_BIN_2		(RKISP1_CIF_ISP_HIST_BASE + 0x0000001C)
-#define RKISP1_CIF_ISP_HIST_BIN_3		(RKISP1_CIF_ISP_HIST_BASE + 0x00000020)
-#define RKISP1_CIF_ISP_HIST_BIN_4		(RKISP1_CIF_ISP_HIST_BASE + 0x00000024)
-#define RKISP1_CIF_ISP_HIST_BIN_5		(RKISP1_CIF_ISP_HIST_BASE + 0x00000028)
-#define RKISP1_CIF_ISP_HIST_BIN_6		(RKISP1_CIF_ISP_HIST_BASE + 0x0000002C)
-#define RKISP1_CIF_ISP_HIST_BIN_7		(RKISP1_CIF_ISP_HIST_BASE + 0x00000030)
-#define RKISP1_CIF_ISP_HIST_BIN_8		(RKISP1_CIF_ISP_HIST_BASE + 0x00000034)
-#define RKISP1_CIF_ISP_HIST_BIN_9		(RKISP1_CIF_ISP_HIST_BASE + 0x00000038)
-#define RKISP1_CIF_ISP_HIST_BIN_10		(RKISP1_CIF_ISP_HIST_BASE + 0x0000003C)
-#define RKISP1_CIF_ISP_HIST_BIN_11		(RKISP1_CIF_ISP_HIST_BASE + 0x00000040)
-#define RKISP1_CIF_ISP_HIST_BIN_12		(RKISP1_CIF_ISP_HIST_BASE + 0x00000044)
-#define RKISP1_CIF_ISP_HIST_BIN_13		(RKISP1_CIF_ISP_HIST_BASE + 0x00000048)
-#define RKISP1_CIF_ISP_HIST_BIN_14		(RKISP1_CIF_ISP_HIST_BASE + 0x0000004C)
-#define RKISP1_CIF_ISP_HIST_BIN_15		(RKISP1_CIF_ISP_HIST_BASE + 0x00000050)
-#define RKISP1_CIF_ISP_HIST_WEIGHT_00TO30	(RKISP1_CIF_ISP_HIST_BASE + 0x00000054)
-#define RKISP1_CIF_ISP_HIST_WEIGHT_40TO21	(RKISP1_CIF_ISP_HIST_BASE + 0x00000058)
-#define RKISP1_CIF_ISP_HIST_WEIGHT_31TO12	(RKISP1_CIF_ISP_HIST_BASE + 0x0000005C)
-#define RKISP1_CIF_ISP_HIST_WEIGHT_22TO03	(RKISP1_CIF_ISP_HIST_BASE + 0x00000060)
-#define RKISP1_CIF_ISP_HIST_WEIGHT_13TO43	(RKISP1_CIF_ISP_HIST_BASE + 0x00000064)
-#define RKISP1_CIF_ISP_HIST_WEIGHT_04TO34	(RKISP1_CIF_ISP_HIST_BASE + 0x00000068)
-#define RKISP1_CIF_ISP_HIST_WEIGHT_44		(RKISP1_CIF_ISP_HIST_BASE + 0x0000006C)
-
-#define RKISP1_CIF_ISP_FILT_BASE		0x00002500
-#define RKISP1_CIF_ISP_FILT_MODE		(RKISP1_CIF_ISP_FILT_BASE + 0x00000000)
-#define RKISP1_CIF_ISP_FILT_THRESH_BL0		(RKISP1_CIF_ISP_FILT_BASE + 0x00000028)
-#define RKISP1_CIF_ISP_FILT_THRESH_BL1		(RKISP1_CIF_ISP_FILT_BASE + 0x0000002c)
-#define RKISP1_CIF_ISP_FILT_THRESH_SH0		(RKISP1_CIF_ISP_FILT_BASE + 0x00000030)
-#define RKISP1_CIF_ISP_FILT_THRESH_SH1		(RKISP1_CIF_ISP_FILT_BASE + 0x00000034)
-#define RKISP1_CIF_ISP_FILT_LUM_WEIGHT		(RKISP1_CIF_ISP_FILT_BASE + 0x00000038)
-#define RKISP1_CIF_ISP_FILT_FAC_SH1		(RKISP1_CIF_ISP_FILT_BASE + 0x0000003c)
-#define RKISP1_CIF_ISP_FILT_FAC_SH0		(RKISP1_CIF_ISP_FILT_BASE + 0x00000040)
-#define RKISP1_CIF_ISP_FILT_FAC_MID		(RKISP1_CIF_ISP_FILT_BASE + 0x00000044)
-#define RKISP1_CIF_ISP_FILT_FAC_BL0		(RKISP1_CIF_ISP_FILT_BASE + 0x00000048)
-#define RKISP1_CIF_ISP_FILT_FAC_BL1		(RKISP1_CIF_ISP_FILT_BASE + 0x0000004C)
-
-#define RKISP1_CIF_ISP_CAC_BASE			0x00002580
-#define RKISP1_CIF_ISP_CAC_CTRL			(RKISP1_CIF_ISP_CAC_BASE + 0x00000000)
-#define RKISP1_CIF_ISP_CAC_COUNT_START		(RKISP1_CIF_ISP_CAC_BASE + 0x00000004)
-#define RKISP1_CIF_ISP_CAC_A			(RKISP1_CIF_ISP_CAC_BASE + 0x00000008)
-#define RKISP1_CIF_ISP_CAC_B			(RKISP1_CIF_ISP_CAC_BASE + 0x0000000C)
-#define RKISP1_CIF_ISP_CAC_C			(RKISP1_CIF_ISP_CAC_BASE + 0x00000010)
-#define RKISP1_CIF_ISP_X_NORM			(RKISP1_CIF_ISP_CAC_BASE + 0x00000014)
-#define RKISP1_CIF_ISP_Y_NORM			(RKISP1_CIF_ISP_CAC_BASE + 0x00000018)
-
-#define RKISP1_CIF_ISP_EXP_BASE			0x00002600
-#define RKISP1_CIF_ISP_EXP_CTRL			(RKISP1_CIF_ISP_EXP_BASE + 0x00000000)
-#define RKISP1_CIF_ISP_EXP_H_OFFSET		(RKISP1_CIF_ISP_EXP_BASE + 0x00000004)
-#define RKISP1_CIF_ISP_EXP_V_OFFSET		(RKISP1_CIF_ISP_EXP_BASE + 0x00000008)
-#define RKISP1_CIF_ISP_EXP_H_SIZE		(RKISP1_CIF_ISP_EXP_BASE + 0x0000000C)
-#define RKISP1_CIF_ISP_EXP_V_SIZE		(RKISP1_CIF_ISP_EXP_BASE + 0x00000010)
-#define RKISP1_CIF_ISP_EXP_MEAN_00		(RKISP1_CIF_ISP_EXP_BASE + 0x00000014)
-#define RKISP1_CIF_ISP_EXP_MEAN_10		(RKISP1_CIF_ISP_EXP_BASE + 0x00000018)
-#define RKISP1_CIF_ISP_EXP_MEAN_20		(RKISP1_CIF_ISP_EXP_BASE + 0x0000001c)
-#define RKISP1_CIF_ISP_EXP_MEAN_30		(RKISP1_CIF_ISP_EXP_BASE + 0x00000020)
-#define RKISP1_CIF_ISP_EXP_MEAN_40		(RKISP1_CIF_ISP_EXP_BASE + 0x00000024)
-#define RKISP1_CIF_ISP_EXP_MEAN_01		(RKISP1_CIF_ISP_EXP_BASE + 0x00000028)
-#define RKISP1_CIF_ISP_EXP_MEAN_11		(RKISP1_CIF_ISP_EXP_BASE + 0x0000002c)
-#define RKISP1_CIF_ISP_EXP_MEAN_21		(RKISP1_CIF_ISP_EXP_BASE + 0x00000030)
-#define RKISP1_CIF_ISP_EXP_MEAN_31		(RKISP1_CIF_ISP_EXP_BASE + 0x00000034)
-#define RKISP1_CIF_ISP_EXP_MEAN_41		(RKISP1_CIF_ISP_EXP_BASE + 0x00000038)
-#define RKISP1_CIF_ISP_EXP_MEAN_02		(RKISP1_CIF_ISP_EXP_BASE + 0x0000003c)
-#define RKISP1_CIF_ISP_EXP_MEAN_12		(RKISP1_CIF_ISP_EXP_BASE + 0x00000040)
-#define RKISP1_CIF_ISP_EXP_MEAN_22		(RKISP1_CIF_ISP_EXP_BASE + 0x00000044)
-#define RKISP1_CIF_ISP_EXP_MEAN_32		(RKISP1_CIF_ISP_EXP_BASE + 0x00000048)
-#define RKISP1_CIF_ISP_EXP_MEAN_42		(RKISP1_CIF_ISP_EXP_BASE + 0x0000004c)
-#define RKISP1_CIF_ISP_EXP_MEAN_03		(RKISP1_CIF_ISP_EXP_BASE + 0x00000050)
-#define RKISP1_CIF_ISP_EXP_MEAN_13		(RKISP1_CIF_ISP_EXP_BASE + 0x00000054)
-#define RKISP1_CIF_ISP_EXP_MEAN_23		(RKISP1_CIF_ISP_EXP_BASE + 0x00000058)
-#define RKISP1_CIF_ISP_EXP_MEAN_33		(RKISP1_CIF_ISP_EXP_BASE + 0x0000005c)
-#define RKISP1_CIF_ISP_EXP_MEAN_43		(RKISP1_CIF_ISP_EXP_BASE + 0x00000060)
-#define RKISP1_CIF_ISP_EXP_MEAN_04		(RKISP1_CIF_ISP_EXP_BASE + 0x00000064)
-#define RKISP1_CIF_ISP_EXP_MEAN_14		(RKISP1_CIF_ISP_EXP_BASE + 0x00000068)
-#define RKISP1_CIF_ISP_EXP_MEAN_24		(RKISP1_CIF_ISP_EXP_BASE + 0x0000006c)
-#define RKISP1_CIF_ISP_EXP_MEAN_34		(RKISP1_CIF_ISP_EXP_BASE + 0x00000070)
-#define RKISP1_CIF_ISP_EXP_MEAN_44		(RKISP1_CIF_ISP_EXP_BASE + 0x00000074)
-
-#define RKISP1_CIF_ISP_BLS_BASE			0x00002700
-#define RKISP1_CIF_ISP_BLS_CTRL			(RKISP1_CIF_ISP_BLS_BASE + 0x00000000)
-#define RKISP1_CIF_ISP_BLS_SAMPLES		(RKISP1_CIF_ISP_BLS_BASE + 0x00000004)
-#define RKISP1_CIF_ISP_BLS_H1_START		(RKISP1_CIF_ISP_BLS_BASE + 0x00000008)
-#define RKISP1_CIF_ISP_BLS_H1_STOP		(RKISP1_CIF_ISP_BLS_BASE + 0x0000000c)
-#define RKISP1_CIF_ISP_BLS_V1_START		(RKISP1_CIF_ISP_BLS_BASE + 0x00000010)
-#define RKISP1_CIF_ISP_BLS_V1_STOP		(RKISP1_CIF_ISP_BLS_BASE + 0x00000014)
-#define RKISP1_CIF_ISP_BLS_H2_START		(RKISP1_CIF_ISP_BLS_BASE + 0x00000018)
-#define RKISP1_CIF_ISP_BLS_H2_STOP		(RKISP1_CIF_ISP_BLS_BASE + 0x0000001c)
-#define RKISP1_CIF_ISP_BLS_V2_START		(RKISP1_CIF_ISP_BLS_BASE + 0x00000020)
-#define RKISP1_CIF_ISP_BLS_V2_STOP		(RKISP1_CIF_ISP_BLS_BASE + 0x00000024)
-#define RKISP1_CIF_ISP_BLS_A_FIXED		(RKISP1_CIF_ISP_BLS_BASE + 0x00000028)
-#define RKISP1_CIF_ISP_BLS_B_FIXED		(RKISP1_CIF_ISP_BLS_BASE + 0x0000002c)
-#define RKISP1_CIF_ISP_BLS_C_FIXED		(RKISP1_CIF_ISP_BLS_BASE + 0x00000030)
-#define RKISP1_CIF_ISP_BLS_D_FIXED		(RKISP1_CIF_ISP_BLS_BASE + 0x00000034)
-#define RKISP1_CIF_ISP_BLS_A_MEASURED		(RKISP1_CIF_ISP_BLS_BASE + 0x00000038)
-#define RKISP1_CIF_ISP_BLS_B_MEASURED		(RKISP1_CIF_ISP_BLS_BASE + 0x0000003c)
-#define RKISP1_CIF_ISP_BLS_C_MEASURED		(RKISP1_CIF_ISP_BLS_BASE + 0x00000040)
-#define RKISP1_CIF_ISP_BLS_D_MEASURED		(RKISP1_CIF_ISP_BLS_BASE + 0x00000044)
-
-#define RKISP1_CIF_ISP_DPF_BASE			0x00002800
-#define RKISP1_CIF_ISP_DPF_MODE			(RKISP1_CIF_ISP_DPF_BASE + 0x00000000)
-#define RKISP1_CIF_ISP_DPF_STRENGTH_R		(RKISP1_CIF_ISP_DPF_BASE + 0x00000004)
-#define RKISP1_CIF_ISP_DPF_STRENGTH_G		(RKISP1_CIF_ISP_DPF_BASE + 0x00000008)
-#define RKISP1_CIF_ISP_DPF_STRENGTH_B		(RKISP1_CIF_ISP_DPF_BASE + 0x0000000C)
-#define RKISP1_CIF_ISP_DPF_S_WEIGHT_G_1_4	(RKISP1_CIF_ISP_DPF_BASE + 0x00000010)
-#define RKISP1_CIF_ISP_DPF_S_WEIGHT_G_5_6	(RKISP1_CIF_ISP_DPF_BASE + 0x00000014)
-#define RKISP1_CIF_ISP_DPF_S_WEIGHT_RB_1_4	(RKISP1_CIF_ISP_DPF_BASE + 0x00000018)
-#define RKISP1_CIF_ISP_DPF_S_WEIGHT_RB_5_6	(RKISP1_CIF_ISP_DPF_BASE + 0x0000001C)
-#define RKISP1_CIF_ISP_DPF_NULL_COEFF_0		(RKISP1_CIF_ISP_DPF_BASE + 0x00000020)
-#define RKISP1_CIF_ISP_DPF_NULL_COEFF_1		(RKISP1_CIF_ISP_DPF_BASE + 0x00000024)
-#define RKISP1_CIF_ISP_DPF_NULL_COEFF_2		(RKISP1_CIF_ISP_DPF_BASE + 0x00000028)
-#define RKISP1_CIF_ISP_DPF_NULL_COEFF_3		(RKISP1_CIF_ISP_DPF_BASE + 0x0000002C)
-#define RKISP1_CIF_ISP_DPF_NULL_COEFF_4		(RKISP1_CIF_ISP_DPF_BASE + 0x00000030)
-#define RKISP1_CIF_ISP_DPF_NULL_COEFF_5		(RKISP1_CIF_ISP_DPF_BASE + 0x00000034)
-#define RKISP1_CIF_ISP_DPF_NULL_COEFF_6		(RKISP1_CIF_ISP_DPF_BASE + 0x00000038)
-#define RKISP1_CIF_ISP_DPF_NULL_COEFF_7		(RKISP1_CIF_ISP_DPF_BASE + 0x0000003C)
-#define RKISP1_CIF_ISP_DPF_NULL_COEFF_8		(RKISP1_CIF_ISP_DPF_BASE + 0x00000040)
-#define RKISP1_CIF_ISP_DPF_NULL_COEFF_9		(RKISP1_CIF_ISP_DPF_BASE + 0x00000044)
-#define RKISP1_CIF_ISP_DPF_NULL_COEFF_10	(RKISP1_CIF_ISP_DPF_BASE + 0x00000048)
-#define RKISP1_CIF_ISP_DPF_NULL_COEFF_11	(RKISP1_CIF_ISP_DPF_BASE + 0x0000004C)
-#define RKISP1_CIF_ISP_DPF_NULL_COEFF_12	(RKISP1_CIF_ISP_DPF_BASE + 0x00000050)
-#define RKISP1_CIF_ISP_DPF_NULL_COEFF_13	(RKISP1_CIF_ISP_DPF_BASE + 0x00000054)
-#define RKISP1_CIF_ISP_DPF_NULL_COEFF_14	(RKISP1_CIF_ISP_DPF_BASE + 0x00000058)
-#define RKISP1_CIF_ISP_DPF_NULL_COEFF_15	(RKISP1_CIF_ISP_DPF_BASE + 0x0000005C)
-#define RKISP1_CIF_ISP_DPF_NULL_COEFF_16	(RKISP1_CIF_ISP_DPF_BASE + 0x00000060)
-#define RKISP1_CIF_ISP_DPF_NF_GAIN_R		(RKISP1_CIF_ISP_DPF_BASE + 0x00000064)
-#define RKISP1_CIF_ISP_DPF_NF_GAIN_GR		(RKISP1_CIF_ISP_DPF_BASE + 0x00000068)
-#define RKISP1_CIF_ISP_DPF_NF_GAIN_GB		(RKISP1_CIF_ISP_DPF_BASE + 0x0000006C)
-#define RKISP1_CIF_ISP_DPF_NF_GAIN_B		(RKISP1_CIF_ISP_DPF_BASE + 0x00000070)
-
-#define RKISP1_CIF_ISP_DPCC_BASE		0x00002900
-#define RKISP1_CIF_ISP_DPCC_MODE		(RKISP1_CIF_ISP_DPCC_BASE + 0x00000000)
-#define RKISP1_CIF_ISP_DPCC_OUTPUT_MODE		(RKISP1_CIF_ISP_DPCC_BASE + 0x00000004)
-#define RKISP1_CIF_ISP_DPCC_SET_USE		(RKISP1_CIF_ISP_DPCC_BASE + 0x00000008)
-#define RKISP1_CIF_ISP_DPCC_METHODS_SET_1	(RKISP1_CIF_ISP_DPCC_BASE + 0x0000000C)
-#define RKISP1_CIF_ISP_DPCC_METHODS_SET_2	(RKISP1_CIF_ISP_DPCC_BASE + 0x00000010)
-#define RKISP1_CIF_ISP_DPCC_METHODS_SET_3	(RKISP1_CIF_ISP_DPCC_BASE + 0x00000014)
-#define RKISP1_CIF_ISP_DPCC_LINE_THRESH_1	(RKISP1_CIF_ISP_DPCC_BASE + 0x00000018)
-#define RKISP1_CIF_ISP_DPCC_LINE_MAD_FAC_1	(RKISP1_CIF_ISP_DPCC_BASE + 0x0000001C)
-#define RKISP1_CIF_ISP_DPCC_PG_FAC_1		(RKISP1_CIF_ISP_DPCC_BASE + 0x00000020)
-#define RKISP1_CIF_ISP_DPCC_RND_THRESH_1	(RKISP1_CIF_ISP_DPCC_BASE + 0x00000024)
-#define RKISP1_CIF_ISP_DPCC_RG_FAC_1		(RKISP1_CIF_ISP_DPCC_BASE + 0x00000028)
-#define RKISP1_CIF_ISP_DPCC_LINE_THRESH_2	(RKISP1_CIF_ISP_DPCC_BASE + 0x0000002C)
-#define RKISP1_CIF_ISP_DPCC_LINE_MAD_FAC_2	(RKISP1_CIF_ISP_DPCC_BASE + 0x00000030)
-#define RKISP1_CIF_ISP_DPCC_PG_FAC_2		(RKISP1_CIF_ISP_DPCC_BASE + 0x00000034)
-#define RKISP1_CIF_ISP_DPCC_RND_THRESH_2	(RKISP1_CIF_ISP_DPCC_BASE + 0x00000038)
-#define RKISP1_CIF_ISP_DPCC_RG_FAC_2		(RKISP1_CIF_ISP_DPCC_BASE + 0x0000003C)
-#define RKISP1_CIF_ISP_DPCC_LINE_THRESH_3	(RKISP1_CIF_ISP_DPCC_BASE + 0x00000040)
-#define RKISP1_CIF_ISP_DPCC_LINE_MAD_FAC_3	(RKISP1_CIF_ISP_DPCC_BASE + 0x00000044)
-#define RKISP1_CIF_ISP_DPCC_PG_FAC_3		(RKISP1_CIF_ISP_DPCC_BASE + 0x00000048)
-#define RKISP1_CIF_ISP_DPCC_RND_THRESH_3	(RKISP1_CIF_ISP_DPCC_BASE + 0x0000004C)
-#define RKISP1_CIF_ISP_DPCC_RG_FAC_3		(RKISP1_CIF_ISP_DPCC_BASE + 0x00000050)
-#define RKISP1_CIF_ISP_DPCC_RO_LIMITS		(RKISP1_CIF_ISP_DPCC_BASE + 0x00000054)
-#define RKISP1_CIF_ISP_DPCC_RND_OFFS		(RKISP1_CIF_ISP_DPCC_BASE + 0x00000058)
-#define RKISP1_CIF_ISP_DPCC_BPT_CTRL		(RKISP1_CIF_ISP_DPCC_BASE + 0x0000005C)
-#define RKISP1_CIF_ISP_DPCC_BPT_NUMBER		(RKISP1_CIF_ISP_DPCC_BASE + 0x00000060)
-#define RKISP1_CIF_ISP_DPCC_BPT_ADDR		(RKISP1_CIF_ISP_DPCC_BASE + 0x00000064)
-#define RKISP1_CIF_ISP_DPCC_BPT_DATA		(RKISP1_CIF_ISP_DPCC_BASE + 0x00000068)
-
-#define RKISP1_CIF_ISP_WDR_BASE			0x00002A00
-#define RKISP1_CIF_ISP_WDR_CTRL			(RKISP1_CIF_ISP_WDR_BASE + 0x00000000)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_1		(RKISP1_CIF_ISP_WDR_BASE + 0x00000004)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_2		(RKISP1_CIF_ISP_WDR_BASE + 0x00000008)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_3		(RKISP1_CIF_ISP_WDR_BASE + 0x0000000C)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_4		(RKISP1_CIF_ISP_WDR_BASE + 0x00000010)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_0	(RKISP1_CIF_ISP_WDR_BASE + 0x00000014)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_1	(RKISP1_CIF_ISP_WDR_BASE + 0x00000018)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_2	(RKISP1_CIF_ISP_WDR_BASE + 0x0000001C)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_3	(RKISP1_CIF_ISP_WDR_BASE + 0x00000020)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_4	(RKISP1_CIF_ISP_WDR_BASE + 0x00000024)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_5	(RKISP1_CIF_ISP_WDR_BASE + 0x00000028)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_6	(RKISP1_CIF_ISP_WDR_BASE + 0x0000002C)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_7	(RKISP1_CIF_ISP_WDR_BASE + 0x00000030)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_8	(RKISP1_CIF_ISP_WDR_BASE + 0x00000034)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_9	(RKISP1_CIF_ISP_WDR_BASE + 0x00000038)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_10	(RKISP1_CIF_ISP_WDR_BASE + 0x0000003C)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_11	(RKISP1_CIF_ISP_WDR_BASE + 0x00000040)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_12	(RKISP1_CIF_ISP_WDR_BASE + 0x00000044)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_13	(RKISP1_CIF_ISP_WDR_BASE + 0x00000048)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_14	(RKISP1_CIF_ISP_WDR_BASE + 0x0000004C)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_15	(RKISP1_CIF_ISP_WDR_BASE + 0x00000050)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_16	(RKISP1_CIF_ISP_WDR_BASE + 0x00000054)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_17	(RKISP1_CIF_ISP_WDR_BASE + 0x00000058)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_18	(RKISP1_CIF_ISP_WDR_BASE + 0x0000005C)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_19	(RKISP1_CIF_ISP_WDR_BASE + 0x00000060)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_20	(RKISP1_CIF_ISP_WDR_BASE + 0x00000064)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_21	(RKISP1_CIF_ISP_WDR_BASE + 0x00000068)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_22	(RKISP1_CIF_ISP_WDR_BASE + 0x0000006C)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_23	(RKISP1_CIF_ISP_WDR_BASE + 0x00000070)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_24	(RKISP1_CIF_ISP_WDR_BASE + 0x00000074)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_25	(RKISP1_CIF_ISP_WDR_BASE + 0x00000078)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_26	(RKISP1_CIF_ISP_WDR_BASE + 0x0000007C)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_27	(RKISP1_CIF_ISP_WDR_BASE + 0x00000080)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_28	(RKISP1_CIF_ISP_WDR_BASE + 0x00000084)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_29	(RKISP1_CIF_ISP_WDR_BASE + 0x00000088)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_30	(RKISP1_CIF_ISP_WDR_BASE + 0x0000008C)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_31	(RKISP1_CIF_ISP_WDR_BASE + 0x00000090)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_32	(RKISP1_CIF_ISP_WDR_BASE + 0x00000094)
-#define RKISP1_CIF_ISP_WDR_OFFSET		(RKISP1_CIF_ISP_WDR_BASE + 0x00000098)
-#define RKISP1_CIF_ISP_WDR_DELTAMIN		(RKISP1_CIF_ISP_WDR_BASE + 0x0000009C)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_1_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000A0)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_2_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000A4)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_3_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000A8)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_4_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000AC)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_0_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000B0)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_1_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000B4)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_2_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000B8)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_3_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000BC)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_4_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000C0)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_5_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000C4)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_6_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000C8)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_7_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000CC)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_8_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000D0)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_9_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000D4)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_10_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000D8)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_11_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000DC)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_12_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000E0)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_13_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000E4)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_14_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000E8)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_15_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000EC)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_16_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000F0)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_17_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000F4)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_18_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000F8)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_19_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x000000FC)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_20_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x00000100)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_21_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x00000104)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_22_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x00000108)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_23_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x0000010C)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_24_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x00000110)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_25_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x00000114)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_26_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x00000118)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_27_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x0000011C)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_28_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x00000120)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_29_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x00000124)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_30_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x00000128)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_31_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x0000012C)
-#define RKISP1_CIF_ISP_WDR_TONECURVE_YM_32_SHD	(RKISP1_CIF_ISP_WDR_BASE + 0x00000130)
-
-#define RKISP1_CIF_ISP_VSM_BASE			0x00002F00
-#define RKISP1_CIF_ISP_VSM_MODE			(RKISP1_CIF_ISP_VSM_BASE + 0x00000000)
-#define RKISP1_CIF_ISP_VSM_H_OFFS		(RKISP1_CIF_ISP_VSM_BASE + 0x00000004)
-#define RKISP1_CIF_ISP_VSM_V_OFFS		(RKISP1_CIF_ISP_VSM_BASE + 0x00000008)
-#define RKISP1_CIF_ISP_VSM_H_SIZE		(RKISP1_CIF_ISP_VSM_BASE + 0x0000000C)
-#define RKISP1_CIF_ISP_VSM_V_SIZE		(RKISP1_CIF_ISP_VSM_BASE + 0x00000010)
-#define RKISP1_CIF_ISP_VSM_H_SEGMENTS		(RKISP1_CIF_ISP_VSM_BASE + 0x00000014)
-#define RKISP1_CIF_ISP_VSM_V_SEGMENTS		(RKISP1_CIF_ISP_VSM_BASE + 0x00000018)
-#define RKISP1_CIF_ISP_VSM_DELTA_H		(RKISP1_CIF_ISP_VSM_BASE + 0x0000001C)
-#define RKISP1_CIF_ISP_VSM_DELTA_V		(RKISP1_CIF_ISP_VSM_BASE + 0x00000020)
-
-#endif /* _RKISP1_REGS_H */
diff --git a/drivers/staging/media/rkisp1/rkisp1-resizer.c b/drivers/staging/media/rkisp1/rkisp1-resizer.c
deleted file mode 100644
index 7ca5b47c5bf5..000000000000
--- a/drivers/staging/media/rkisp1/rkisp1-resizer.c
+++ /dev/null
@@ -1,846 +0,0 @@
-// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
-/*
- * Rockchip ISP1 Driver - V4l resizer device
- *
- * Copyright (C) 2019 Collabora, Ltd.
- *
- * Based on Rockchip ISP1 driver by Rockchip Electronics Co., Ltd.
- * Copyright (C) 2017 Rockchip Electronics Co., Ltd.
- */
-
-#include "rkisp1-common.h"
-
-#define RKISP1_RSZ_SP_DEV_NAME	RKISP1_DRIVER_NAME "_resizer_selfpath"
-#define RKISP1_RSZ_MP_DEV_NAME	RKISP1_DRIVER_NAME "_resizer_mainpath"
-
-#define RKISP1_DEF_FMT MEDIA_BUS_FMT_YUYV8_2X8
-#define RKISP1_DEF_PIXEL_ENC V4L2_PIXEL_ENC_YUV
-
-struct rkisp1_rsz_yuv_mbus_info {
-	u32 mbus_code;
-	u32 hdiv;
-	u32 vdiv;
-};
-
-static const struct rkisp1_rsz_yuv_mbus_info rkisp1_rsz_yuv_src_formats[] = {
-	{
-		.mbus_code	= MEDIA_BUS_FMT_YUYV8_2X8, /* YUV422 */
-		.hdiv		= 2,
-		.vdiv		= 1,
-	},
-	{
-		.mbus_code	= MEDIA_BUS_FMT_YUYV8_1_5X8, /* YUV420 */
-		.hdiv		= 2,
-		.vdiv		= 2,
-	},
-};
-
-static const struct rkisp1_rsz_yuv_mbus_info *rkisp1_rsz_get_yuv_mbus_info(u32 mbus_code)
-{
-	unsigned int i;
-
-	for (i = 0; i < ARRAY_SIZE(rkisp1_rsz_yuv_src_formats); i++) {
-		if (rkisp1_rsz_yuv_src_formats[i].mbus_code == mbus_code)
-			return &rkisp1_rsz_yuv_src_formats[i];
-	}
-
-	return NULL;
-}
-
-enum rkisp1_shadow_regs_when {
-	RKISP1_SHADOW_REGS_SYNC,
-	RKISP1_SHADOW_REGS_ASYNC,
-};
-
-struct rkisp1_rsz_config {
-	/* constrains */
-	const int max_rsz_width;
-	const int max_rsz_height;
-	const int min_rsz_width;
-	const int min_rsz_height;
-	/* registers */
-	struct {
-		u32 ctrl;
-		u32 ctrl_shd;
-		u32 scale_hy;
-		u32 scale_hcr;
-		u32 scale_hcb;
-		u32 scale_vy;
-		u32 scale_vc;
-		u32 scale_lut;
-		u32 scale_lut_addr;
-		u32 scale_hy_shd;
-		u32 scale_hcr_shd;
-		u32 scale_hcb_shd;
-		u32 scale_vy_shd;
-		u32 scale_vc_shd;
-		u32 phase_hy;
-		u32 phase_hc;
-		u32 phase_vy;
-		u32 phase_vc;
-		u32 phase_hy_shd;
-		u32 phase_hc_shd;
-		u32 phase_vy_shd;
-		u32 phase_vc_shd;
-	} rsz;
-	struct {
-		u32 ctrl;
-		u32 yuvmode_mask;
-		u32 rawmode_mask;
-		u32 h_offset;
-		u32 v_offset;
-		u32 h_size;
-		u32 v_size;
-	} dual_crop;
-};
-
-static const struct rkisp1_rsz_config rkisp1_rsz_config_mp = {
-	/* constraints */
-	.max_rsz_width = RKISP1_RSZ_MP_SRC_MAX_WIDTH,
-	.max_rsz_height = RKISP1_RSZ_MP_SRC_MAX_HEIGHT,
-	.min_rsz_width = RKISP1_RSZ_SRC_MIN_WIDTH,
-	.min_rsz_height = RKISP1_RSZ_SRC_MIN_HEIGHT,
-	/* registers */
-	.rsz = {
-		.ctrl =			RKISP1_CIF_MRSZ_CTRL,
-		.scale_hy =		RKISP1_CIF_MRSZ_SCALE_HY,
-		.scale_hcr =		RKISP1_CIF_MRSZ_SCALE_HCR,
-		.scale_hcb =		RKISP1_CIF_MRSZ_SCALE_HCB,
-		.scale_vy =		RKISP1_CIF_MRSZ_SCALE_VY,
-		.scale_vc =		RKISP1_CIF_MRSZ_SCALE_VC,
-		.scale_lut =		RKISP1_CIF_MRSZ_SCALE_LUT,
-		.scale_lut_addr =	RKISP1_CIF_MRSZ_SCALE_LUT_ADDR,
-		.scale_hy_shd =		RKISP1_CIF_MRSZ_SCALE_HY_SHD,
-		.scale_hcr_shd =	RKISP1_CIF_MRSZ_SCALE_HCR_SHD,
-		.scale_hcb_shd =	RKISP1_CIF_MRSZ_SCALE_HCB_SHD,
-		.scale_vy_shd =		RKISP1_CIF_MRSZ_SCALE_VY_SHD,
-		.scale_vc_shd =		RKISP1_CIF_MRSZ_SCALE_VC_SHD,
-		.phase_hy =		RKISP1_CIF_MRSZ_PHASE_HY,
-		.phase_hc =		RKISP1_CIF_MRSZ_PHASE_HC,
-		.phase_vy =		RKISP1_CIF_MRSZ_PHASE_VY,
-		.phase_vc =		RKISP1_CIF_MRSZ_PHASE_VC,
-		.ctrl_shd =		RKISP1_CIF_MRSZ_CTRL_SHD,
-		.phase_hy_shd =		RKISP1_CIF_MRSZ_PHASE_HY_SHD,
-		.phase_hc_shd =		RKISP1_CIF_MRSZ_PHASE_HC_SHD,
-		.phase_vy_shd =		RKISP1_CIF_MRSZ_PHASE_VY_SHD,
-		.phase_vc_shd =		RKISP1_CIF_MRSZ_PHASE_VC_SHD,
-	},
-	.dual_crop = {
-		.ctrl =			RKISP1_CIF_DUAL_CROP_CTRL,
-		.yuvmode_mask =		RKISP1_CIF_DUAL_CROP_MP_MODE_YUV,
-		.rawmode_mask =		RKISP1_CIF_DUAL_CROP_MP_MODE_RAW,
-		.h_offset =		RKISP1_CIF_DUAL_CROP_M_H_OFFS,
-		.v_offset =		RKISP1_CIF_DUAL_CROP_M_V_OFFS,
-		.h_size =		RKISP1_CIF_DUAL_CROP_M_H_SIZE,
-		.v_size =		RKISP1_CIF_DUAL_CROP_M_V_SIZE,
-	},
-};
-
-static const struct rkisp1_rsz_config rkisp1_rsz_config_sp = {
-	/* constraints */
-	.max_rsz_width = RKISP1_RSZ_SP_SRC_MAX_WIDTH,
-	.max_rsz_height = RKISP1_RSZ_SP_SRC_MAX_HEIGHT,
-	.min_rsz_width = RKISP1_RSZ_SRC_MIN_WIDTH,
-	.min_rsz_height = RKISP1_RSZ_SRC_MIN_HEIGHT,
-	/* registers */
-	.rsz = {
-		.ctrl =			RKISP1_CIF_SRSZ_CTRL,
-		.scale_hy =		RKISP1_CIF_SRSZ_SCALE_HY,
-		.scale_hcr =		RKISP1_CIF_SRSZ_SCALE_HCR,
-		.scale_hcb =		RKISP1_CIF_SRSZ_SCALE_HCB,
-		.scale_vy =		RKISP1_CIF_SRSZ_SCALE_VY,
-		.scale_vc =		RKISP1_CIF_SRSZ_SCALE_VC,
-		.scale_lut =		RKISP1_CIF_SRSZ_SCALE_LUT,
-		.scale_lut_addr =	RKISP1_CIF_SRSZ_SCALE_LUT_ADDR,
-		.scale_hy_shd =		RKISP1_CIF_SRSZ_SCALE_HY_SHD,
-		.scale_hcr_shd =	RKISP1_CIF_SRSZ_SCALE_HCR_SHD,
-		.scale_hcb_shd =	RKISP1_CIF_SRSZ_SCALE_HCB_SHD,
-		.scale_vy_shd =		RKISP1_CIF_SRSZ_SCALE_VY_SHD,
-		.scale_vc_shd =		RKISP1_CIF_SRSZ_SCALE_VC_SHD,
-		.phase_hy =		RKISP1_CIF_SRSZ_PHASE_HY,
-		.phase_hc =		RKISP1_CIF_SRSZ_PHASE_HC,
-		.phase_vy =		RKISP1_CIF_SRSZ_PHASE_VY,
-		.phase_vc =		RKISP1_CIF_SRSZ_PHASE_VC,
-		.ctrl_shd =		RKISP1_CIF_SRSZ_CTRL_SHD,
-		.phase_hy_shd =		RKISP1_CIF_SRSZ_PHASE_HY_SHD,
-		.phase_hc_shd =		RKISP1_CIF_SRSZ_PHASE_HC_SHD,
-		.phase_vy_shd =		RKISP1_CIF_SRSZ_PHASE_VY_SHD,
-		.phase_vc_shd =		RKISP1_CIF_SRSZ_PHASE_VC_SHD,
-	},
-	.dual_crop = {
-		.ctrl =			RKISP1_CIF_DUAL_CROP_CTRL,
-		.yuvmode_mask =		RKISP1_CIF_DUAL_CROP_SP_MODE_YUV,
-		.rawmode_mask =		RKISP1_CIF_DUAL_CROP_SP_MODE_RAW,
-		.h_offset =		RKISP1_CIF_DUAL_CROP_S_H_OFFS,
-		.v_offset =		RKISP1_CIF_DUAL_CROP_S_V_OFFS,
-		.h_size =		RKISP1_CIF_DUAL_CROP_S_H_SIZE,
-		.v_size =		RKISP1_CIF_DUAL_CROP_S_V_SIZE,
-	},
-};
-
-static struct v4l2_mbus_framefmt *
-rkisp1_rsz_get_pad_fmt(struct rkisp1_resizer *rsz,
-		       struct v4l2_subdev_pad_config *cfg,
-		       unsigned int pad, u32 which)
-{
-	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(&rsz->sd, cfg, pad);
-	else
-		return v4l2_subdev_get_try_format(&rsz->sd, rsz->pad_cfg, pad);
-}
-
-static struct v4l2_rect *
-rkisp1_rsz_get_pad_crop(struct rkisp1_resizer *rsz,
-			struct v4l2_subdev_pad_config *cfg,
-			unsigned int pad, u32 which)
-{
-	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_crop(&rsz->sd, cfg, pad);
-	else
-		return v4l2_subdev_get_try_crop(&rsz->sd, rsz->pad_cfg, pad);
-}
-
-/* ----------------------------------------------------------------------------
- * Dual crop hw configs
- */
-
-static void rkisp1_dcrop_disable(struct rkisp1_resizer *rsz,
-				 enum rkisp1_shadow_regs_when when)
-{
-	u32 dc_ctrl = rkisp1_read(rsz->rkisp1, rsz->config->dual_crop.ctrl);
-	u32 mask = ~(rsz->config->dual_crop.yuvmode_mask |
-		     rsz->config->dual_crop.rawmode_mask);
-
-	dc_ctrl &= mask;
-	if (when == RKISP1_SHADOW_REGS_ASYNC)
-		dc_ctrl |= RKISP1_CIF_DUAL_CROP_GEN_CFG_UPD;
-	else
-		dc_ctrl |= RKISP1_CIF_DUAL_CROP_CFG_UPD;
-	rkisp1_write(rsz->rkisp1, dc_ctrl, rsz->config->dual_crop.ctrl);
-}
-
-/* configure dual-crop unit */
-static void rkisp1_dcrop_config(struct rkisp1_resizer *rsz)
-{
-	struct rkisp1_device *rkisp1 = rsz->rkisp1;
-	struct v4l2_mbus_framefmt *sink_fmt;
-	struct v4l2_rect *sink_crop;
-	u32 dc_ctrl;
-
-	sink_crop = rkisp1_rsz_get_pad_crop(rsz, NULL, RKISP1_RSZ_PAD_SINK,
-					    V4L2_SUBDEV_FORMAT_ACTIVE);
-	sink_fmt = rkisp1_rsz_get_pad_fmt(rsz, NULL, RKISP1_RSZ_PAD_SINK,
-					  V4L2_SUBDEV_FORMAT_ACTIVE);
-
-	if (sink_crop->width == sink_fmt->width &&
-	    sink_crop->height == sink_fmt->height &&
-	    sink_crop->left == 0 && sink_crop->top == 0) {
-		rkisp1_dcrop_disable(rsz, RKISP1_SHADOW_REGS_SYNC);
-		dev_dbg(rkisp1->dev, "capture %d crop disabled\n", rsz->id);
-		return;
-	}
-
-	dc_ctrl = rkisp1_read(rkisp1, rsz->config->dual_crop.ctrl);
-	rkisp1_write(rkisp1, sink_crop->left, rsz->config->dual_crop.h_offset);
-	rkisp1_write(rkisp1, sink_crop->top, rsz->config->dual_crop.v_offset);
-	rkisp1_write(rkisp1, sink_crop->width, rsz->config->dual_crop.h_size);
-	rkisp1_write(rkisp1, sink_crop->height, rsz->config->dual_crop.v_size);
-	dc_ctrl |= rsz->config->dual_crop.yuvmode_mask;
-	dc_ctrl |= RKISP1_CIF_DUAL_CROP_CFG_UPD;
-	rkisp1_write(rkisp1, dc_ctrl, rsz->config->dual_crop.ctrl);
-
-	dev_dbg(rkisp1->dev, "stream %d crop: %dx%d -> %dx%d\n", rsz->id,
-		sink_fmt->width, sink_fmt->height,
-		sink_crop->width, sink_crop->height);
-}
-
-/* ----------------------------------------------------------------------------
- * Resizer hw configs
- */
-
-static void rkisp1_rsz_dump_regs(struct rkisp1_resizer *rsz)
-{
-	dev_dbg(rsz->rkisp1->dev,
-		"RSZ_CTRL 0x%08x/0x%08x\n"
-		"RSZ_SCALE_HY %d/%d\n"
-		"RSZ_SCALE_HCB %d/%d\n"
-		"RSZ_SCALE_HCR %d/%d\n"
-		"RSZ_SCALE_VY %d/%d\n"
-		"RSZ_SCALE_VC %d/%d\n"
-		"RSZ_PHASE_HY %d/%d\n"
-		"RSZ_PHASE_HC %d/%d\n"
-		"RSZ_PHASE_VY %d/%d\n"
-		"RSZ_PHASE_VC %d/%d\n",
-		rkisp1_read(rsz->rkisp1, rsz->config->rsz.ctrl),
-		rkisp1_read(rsz->rkisp1, rsz->config->rsz.ctrl_shd),
-		rkisp1_read(rsz->rkisp1, rsz->config->rsz.scale_hy),
-		rkisp1_read(rsz->rkisp1, rsz->config->rsz.scale_hy_shd),
-		rkisp1_read(rsz->rkisp1, rsz->config->rsz.scale_hcb),
-		rkisp1_read(rsz->rkisp1, rsz->config->rsz.scale_hcb_shd),
-		rkisp1_read(rsz->rkisp1, rsz->config->rsz.scale_hcr),
-		rkisp1_read(rsz->rkisp1, rsz->config->rsz.scale_hcr_shd),
-		rkisp1_read(rsz->rkisp1, rsz->config->rsz.scale_vy),
-		rkisp1_read(rsz->rkisp1, rsz->config->rsz.scale_vy_shd),
-		rkisp1_read(rsz->rkisp1, rsz->config->rsz.scale_vc),
-		rkisp1_read(rsz->rkisp1, rsz->config->rsz.scale_vc_shd),
-		rkisp1_read(rsz->rkisp1, rsz->config->rsz.phase_hy),
-		rkisp1_read(rsz->rkisp1, rsz->config->rsz.phase_hy_shd),
-		rkisp1_read(rsz->rkisp1, rsz->config->rsz.phase_hc),
-		rkisp1_read(rsz->rkisp1, rsz->config->rsz.phase_hc_shd),
-		rkisp1_read(rsz->rkisp1, rsz->config->rsz.phase_vy),
-		rkisp1_read(rsz->rkisp1, rsz->config->rsz.phase_vy_shd),
-		rkisp1_read(rsz->rkisp1, rsz->config->rsz.phase_vc),
-		rkisp1_read(rsz->rkisp1, rsz->config->rsz.phase_vc_shd));
-}
-
-static void rkisp1_rsz_update_shadow(struct rkisp1_resizer *rsz,
-				     enum rkisp1_shadow_regs_when when)
-{
-	u32 ctrl_cfg = rkisp1_read(rsz->rkisp1, rsz->config->rsz.ctrl);
-
-	if (when == RKISP1_SHADOW_REGS_ASYNC)
-		ctrl_cfg |= RKISP1_CIF_RSZ_CTRL_CFG_UPD_AUTO;
-	else
-		ctrl_cfg |= RKISP1_CIF_RSZ_CTRL_CFG_UPD;
-
-	rkisp1_write(rsz->rkisp1, ctrl_cfg, rsz->config->rsz.ctrl);
-}
-
-static u32 rkisp1_rsz_calc_ratio(u32 len_sink, u32 len_src)
-{
-	if (len_sink < len_src)
-		return ((len_sink - 1) * RKISP1_CIF_RSZ_SCALER_FACTOR) /
-		       (len_src - 1);
-
-	return ((len_src - 1) * RKISP1_CIF_RSZ_SCALER_FACTOR) /
-	       (len_sink - 1) + 1;
-}
-
-static void rkisp1_rsz_disable(struct rkisp1_resizer *rsz,
-			       enum rkisp1_shadow_regs_when when)
-{
-	rkisp1_write(rsz->rkisp1, 0, rsz->config->rsz.ctrl);
-
-	if (when == RKISP1_SHADOW_REGS_SYNC)
-		rkisp1_rsz_update_shadow(rsz, when);
-}
-
-static void rkisp1_rsz_config_regs(struct rkisp1_resizer *rsz,
-				   struct v4l2_rect *sink_y,
-				   struct v4l2_rect *sink_c,
-				   struct v4l2_rect *src_y,
-				   struct v4l2_rect *src_c,
-				   enum rkisp1_shadow_regs_when when)
-{
-	struct rkisp1_device *rkisp1 = rsz->rkisp1;
-	u32 ratio, rsz_ctrl = 0;
-	unsigned int i;
-
-	/* No phase offset */
-	rkisp1_write(rkisp1, 0, rsz->config->rsz.phase_hy);
-	rkisp1_write(rkisp1, 0, rsz->config->rsz.phase_hc);
-	rkisp1_write(rkisp1, 0, rsz->config->rsz.phase_vy);
-	rkisp1_write(rkisp1, 0, rsz->config->rsz.phase_vc);
-
-	/* Linear interpolation */
-	for (i = 0; i < 64; i++) {
-		rkisp1_write(rkisp1, i, rsz->config->rsz.scale_lut_addr);
-		rkisp1_write(rkisp1, i, rsz->config->rsz.scale_lut);
-	}
-
-	if (sink_y->width != src_y->width) {
-		rsz_ctrl |= RKISP1_CIF_RSZ_CTRL_SCALE_HY_ENABLE;
-		if (sink_y->width < src_y->width)
-			rsz_ctrl |= RKISP1_CIF_RSZ_CTRL_SCALE_HY_UP;
-		ratio = rkisp1_rsz_calc_ratio(sink_y->width, src_y->width);
-		rkisp1_write(rkisp1, ratio, rsz->config->rsz.scale_hy);
-	}
-
-	if (sink_c->width != src_c->width) {
-		rsz_ctrl |= RKISP1_CIF_RSZ_CTRL_SCALE_HC_ENABLE;
-		if (sink_c->width < src_c->width)
-			rsz_ctrl |= RKISP1_CIF_RSZ_CTRL_SCALE_HC_UP;
-		ratio = rkisp1_rsz_calc_ratio(sink_c->width, src_c->width);
-		rkisp1_write(rkisp1, ratio, rsz->config->rsz.scale_hcb);
-		rkisp1_write(rkisp1, ratio, rsz->config->rsz.scale_hcr);
-	}
-
-	if (sink_y->height != src_y->height) {
-		rsz_ctrl |= RKISP1_CIF_RSZ_CTRL_SCALE_VY_ENABLE;
-		if (sink_y->height < src_y->height)
-			rsz_ctrl |= RKISP1_CIF_RSZ_CTRL_SCALE_VY_UP;
-		ratio = rkisp1_rsz_calc_ratio(sink_y->height, src_y->height);
-		rkisp1_write(rkisp1, ratio, rsz->config->rsz.scale_vy);
-	}
-
-	if (sink_c->height != src_c->height) {
-		rsz_ctrl |= RKISP1_CIF_RSZ_CTRL_SCALE_VC_ENABLE;
-		if (sink_c->height < src_c->height)
-			rsz_ctrl |= RKISP1_CIF_RSZ_CTRL_SCALE_VC_UP;
-		ratio = rkisp1_rsz_calc_ratio(sink_c->height, src_c->height);
-		rkisp1_write(rkisp1, ratio, rsz->config->rsz.scale_vc);
-	}
-
-	rkisp1_write(rkisp1, rsz_ctrl, rsz->config->rsz.ctrl);
-
-	rkisp1_rsz_update_shadow(rsz, when);
-}
-
-static void rkisp1_rsz_config(struct rkisp1_resizer *rsz,
-			      enum rkisp1_shadow_regs_when when)
-{
-	const struct rkisp1_rsz_yuv_mbus_info *sink_yuv_info, *src_yuv_info;
-	struct v4l2_rect sink_y, sink_c, src_y, src_c;
-	struct v4l2_mbus_framefmt *src_fmt, *sink_fmt;
-	struct v4l2_rect *sink_crop;
-
-	sink_crop = rkisp1_rsz_get_pad_crop(rsz, NULL, RKISP1_RSZ_PAD_SINK,
-					    V4L2_SUBDEV_FORMAT_ACTIVE);
-	src_fmt = rkisp1_rsz_get_pad_fmt(rsz, NULL, RKISP1_RSZ_PAD_SRC,
-					 V4L2_SUBDEV_FORMAT_ACTIVE);
-	src_yuv_info = rkisp1_rsz_get_yuv_mbus_info(src_fmt->code);
-	sink_fmt = rkisp1_rsz_get_pad_fmt(rsz, NULL, RKISP1_RSZ_PAD_SINK,
-					  V4L2_SUBDEV_FORMAT_ACTIVE);
-	sink_yuv_info = rkisp1_rsz_get_yuv_mbus_info(sink_fmt->code);
-
-	/*
-	 * The resizer only works on yuv formats,
-	 * so return if it is bayer format.
-	 */
-	if (rsz->pixel_enc == V4L2_PIXEL_ENC_BAYER) {
-		rkisp1_rsz_disable(rsz, when);
-		return;
-	}
-
-	sink_y.width = sink_crop->width;
-	sink_y.height = sink_crop->height;
-	src_y.width = src_fmt->width;
-	src_y.height = src_fmt->height;
-
-	sink_c.width = sink_y.width / sink_yuv_info->hdiv;
-	sink_c.height = sink_y.height / sink_yuv_info->vdiv;
-
-	/*
-	 * The resizer is used not only to change the dimensions of the frame
-	 * but also to change the scale for YUV formats,
-	 * (4:2:2 -> 4:2:0 for example). So the width/height of the CbCr
-	 * streams should be set according to the media bus format in the src pad.
-	 */
-	src_c.width = src_y.width / src_yuv_info->hdiv;
-	src_c.height = src_y.height / src_yuv_info->vdiv;
-
-	if (sink_c.width == src_c.width && sink_c.height == src_c.height) {
-		rkisp1_rsz_disable(rsz, when);
-		return;
-	}
-
-	dev_dbg(rsz->rkisp1->dev, "stream %d rsz/scale: %dx%d -> %dx%d\n",
-		rsz->id, sink_crop->width, sink_crop->height,
-		src_fmt->width, src_fmt->height);
-	dev_dbg(rsz->rkisp1->dev, "chroma scaling %dx%d -> %dx%d\n",
-		sink_c.width, sink_c.height, src_c.width, src_c.height);
-
-	/* set values in the hw */
-	rkisp1_rsz_config_regs(rsz, &sink_y, &sink_c, &src_y, &src_c, when);
-
-	rkisp1_rsz_dump_regs(rsz);
-}
-
-/* ----------------------------------------------------------------------------
- * Subdev pad operations
- */
-
-static int rkisp1_rsz_enum_mbus_code(struct v4l2_subdev *sd,
-				     struct v4l2_subdev_pad_config *cfg,
-				     struct v4l2_subdev_mbus_code_enum *code)
-{
-	struct rkisp1_resizer *rsz =
-		container_of(sd, struct rkisp1_resizer, sd);
-	struct v4l2_subdev_pad_config dummy_cfg;
-	u32 pad = code->pad;
-	int ret;
-
-	if (code->pad == RKISP1_RSZ_PAD_SRC) {
-		/* supported mbus codes on the src are the same as in the capture */
-		struct rkisp1_capture *cap = &rsz->rkisp1->capture_devs[rsz->id];
-
-		return rkisp1_cap_enum_mbus_codes(cap, code);
-	}
-
-	/*
-	 * The selfpath capture doesn't support bayer formats. Therefore the selfpath resizer
-	 * should support only YUV422 on the sink pad
-	 */
-	if (rsz->id == RKISP1_SELFPATH) {
-		if (code->index > 0)
-			return -EINVAL;
-		code->code = MEDIA_BUS_FMT_YUYV8_2X8;
-		return 0;
-	}
-
-	/* supported mbus codes on the sink pad are the same as isp src pad */
-	code->pad = RKISP1_ISP_PAD_SOURCE_VIDEO;
-	ret = v4l2_subdev_call(&rsz->rkisp1->isp.sd, pad, enum_mbus_code,
-			       &dummy_cfg, code);
-
-	/* restore pad */
-	code->pad = pad;
-	code->flags = 0;
-	return ret;
-}
-
-static int rkisp1_rsz_init_config(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg)
-{
-	struct v4l2_mbus_framefmt *sink_fmt, *src_fmt;
-	struct v4l2_rect *sink_crop;
-
-	sink_fmt = v4l2_subdev_get_try_format(sd, cfg, RKISP1_RSZ_PAD_SRC);
-	sink_fmt->width = RKISP1_DEFAULT_WIDTH;
-	sink_fmt->height = RKISP1_DEFAULT_HEIGHT;
-	sink_fmt->field = V4L2_FIELD_NONE;
-	sink_fmt->code = RKISP1_DEF_FMT;
-
-	sink_crop = v4l2_subdev_get_try_crop(sd, cfg, RKISP1_RSZ_PAD_SINK);
-	sink_crop->width = RKISP1_DEFAULT_WIDTH;
-	sink_crop->height = RKISP1_DEFAULT_HEIGHT;
-	sink_crop->left = 0;
-	sink_crop->top = 0;
-
-	src_fmt = v4l2_subdev_get_try_format(sd, cfg, RKISP1_RSZ_PAD_SINK);
-	*src_fmt = *sink_fmt;
-
-	/* NOTE: there is no crop in the source pad, only in the sink */
-
-	return 0;
-}
-
-static void rkisp1_rsz_set_src_fmt(struct rkisp1_resizer *rsz,
-				   struct v4l2_subdev_pad_config *cfg,
-				   struct v4l2_mbus_framefmt *format,
-				   unsigned int which)
-{
-	const struct rkisp1_isp_mbus_info *mbus_info;
-	struct v4l2_mbus_framefmt *src_fmt;
-
-	src_fmt = rkisp1_rsz_get_pad_fmt(rsz, cfg, RKISP1_RSZ_PAD_SRC, which);
-	mbus_info = rkisp1_isp_mbus_info_get(src_fmt->code);
-
-	/* for YUV formats, userspace can change the mbus code on the src pad if it is supported */
-	if (mbus_info->pixel_enc == V4L2_PIXEL_ENC_YUV &&
-	    rkisp1_rsz_get_yuv_mbus_info(format->code))
-		src_fmt->code = format->code;
-
-	src_fmt->width = clamp_t(u32, format->width,
-				 rsz->config->min_rsz_width,
-				 rsz->config->max_rsz_width);
-	src_fmt->height = clamp_t(u32, format->height,
-				  rsz->config->min_rsz_height,
-				  rsz->config->max_rsz_height);
-
-	*format = *src_fmt;
-}
-
-static void rkisp1_rsz_set_sink_crop(struct rkisp1_resizer *rsz,
-				     struct v4l2_subdev_pad_config *cfg,
-				     struct v4l2_rect *r,
-				     unsigned int which)
-{
-	const struct rkisp1_isp_mbus_info *mbus_info;
-	struct v4l2_mbus_framefmt *sink_fmt;
-	struct v4l2_rect *sink_crop;
-
-	sink_fmt = rkisp1_rsz_get_pad_fmt(rsz, cfg, RKISP1_RSZ_PAD_SINK, which);
-	sink_crop = rkisp1_rsz_get_pad_crop(rsz, cfg, RKISP1_RSZ_PAD_SINK,
-					    which);
-
-	/* Not crop for MP bayer raw data */
-	mbus_info = rkisp1_isp_mbus_info_get(sink_fmt->code);
-
-	if (rsz->id == RKISP1_MAINPATH &&
-	    mbus_info->pixel_enc == V4L2_PIXEL_ENC_BAYER) {
-		sink_crop->left = 0;
-		sink_crop->top = 0;
-		sink_crop->width = sink_fmt->width;
-		sink_crop->height = sink_fmt->height;
-
-		*r = *sink_crop;
-		return;
-	}
-
-	sink_crop->left = ALIGN(r->left, 2);
-	sink_crop->width = ALIGN(r->width, 2);
-	sink_crop->top = r->top;
-	sink_crop->height = r->height;
-	rkisp1_sd_adjust_crop(sink_crop, sink_fmt);
-
-	*r = *sink_crop;
-}
-
-static void rkisp1_rsz_set_sink_fmt(struct rkisp1_resizer *rsz,
-				    struct v4l2_subdev_pad_config *cfg,
-				    struct v4l2_mbus_framefmt *format,
-				    unsigned int which)
-{
-	const struct rkisp1_isp_mbus_info *mbus_info;
-	struct v4l2_mbus_framefmt *sink_fmt, *src_fmt;
-	struct v4l2_rect *sink_crop;
-
-	sink_fmt = rkisp1_rsz_get_pad_fmt(rsz, cfg, RKISP1_RSZ_PAD_SINK, which);
-	src_fmt = rkisp1_rsz_get_pad_fmt(rsz, cfg, RKISP1_RSZ_PAD_SRC, which);
-	sink_crop = rkisp1_rsz_get_pad_crop(rsz, cfg, RKISP1_RSZ_PAD_SINK,
-					    which);
-	if (rsz->id == RKISP1_SELFPATH)
-		sink_fmt->code = MEDIA_BUS_FMT_YUYV8_2X8;
-	else
-		sink_fmt->code = format->code;
-
-	mbus_info = rkisp1_isp_mbus_info_get(sink_fmt->code);
-	if (!mbus_info || !(mbus_info->direction & RKISP1_ISP_SD_SRC)) {
-		sink_fmt->code = RKISP1_DEF_FMT;
-		mbus_info = rkisp1_isp_mbus_info_get(sink_fmt->code);
-	}
-	if (which == V4L2_SUBDEV_FORMAT_ACTIVE)
-		rsz->pixel_enc = mbus_info->pixel_enc;
-
-	/* Propagete to source pad */
-	src_fmt->code = sink_fmt->code;
-
-	sink_fmt->width = clamp_t(u32, format->width,
-				  RKISP1_ISP_MIN_WIDTH,
-				  RKISP1_ISP_MAX_WIDTH);
-	sink_fmt->height = clamp_t(u32, format->height,
-				  RKISP1_ISP_MIN_HEIGHT,
-				  RKISP1_ISP_MAX_HEIGHT);
-
-	*format = *sink_fmt;
-
-	/* Update sink crop */
-	rkisp1_rsz_set_sink_crop(rsz, cfg, sink_crop, which);
-}
-
-static int rkisp1_rsz_get_fmt(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
-			      struct v4l2_subdev_format *fmt)
-{
-	struct rkisp1_resizer *rsz =
-		container_of(sd, struct rkisp1_resizer, sd);
-
-	mutex_lock(&rsz->ops_lock);
-	fmt->format = *rkisp1_rsz_get_pad_fmt(rsz, cfg, fmt->pad, fmt->which);
-	mutex_unlock(&rsz->ops_lock);
-	return 0;
-}
-
-static int rkisp1_rsz_set_fmt(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
-			      struct v4l2_subdev_format *fmt)
-{
-	struct rkisp1_resizer *rsz =
-		container_of(sd, struct rkisp1_resizer, sd);
-
-	mutex_lock(&rsz->ops_lock);
-	if (fmt->pad == RKISP1_RSZ_PAD_SINK)
-		rkisp1_rsz_set_sink_fmt(rsz, cfg, &fmt->format, fmt->which);
-	else
-		rkisp1_rsz_set_src_fmt(rsz, cfg, &fmt->format, fmt->which);
-
-	mutex_unlock(&rsz->ops_lock);
-	return 0;
-}
-
-static int rkisp1_rsz_get_selection(struct v4l2_subdev *sd,
-				    struct v4l2_subdev_pad_config *cfg,
-				    struct v4l2_subdev_selection *sel)
-{
-	struct rkisp1_resizer *rsz =
-		container_of(sd, struct rkisp1_resizer, sd);
-	struct v4l2_mbus_framefmt *mf_sink;
-	int ret = 0;
-
-	if (sel->pad == RKISP1_RSZ_PAD_SRC)
-		return -EINVAL;
-
-	mutex_lock(&rsz->ops_lock);
-	switch (sel->target) {
-	case V4L2_SEL_TGT_CROP_BOUNDS:
-		mf_sink = rkisp1_rsz_get_pad_fmt(rsz, cfg, RKISP1_RSZ_PAD_SINK,
-						 sel->which);
-		sel->r.height = mf_sink->height;
-		sel->r.width = mf_sink->width;
-		sel->r.left = 0;
-		sel->r.top = 0;
-		break;
-	case V4L2_SEL_TGT_CROP:
-		sel->r = *rkisp1_rsz_get_pad_crop(rsz, cfg, RKISP1_RSZ_PAD_SINK,
-						  sel->which);
-		break;
-	default:
-		ret = -EINVAL;
-	}
-
-	mutex_unlock(&rsz->ops_lock);
-	return ret;
-}
-
-static int rkisp1_rsz_set_selection(struct v4l2_subdev *sd,
-				    struct v4l2_subdev_pad_config *cfg,
-				    struct v4l2_subdev_selection *sel)
-{
-	struct rkisp1_resizer *rsz =
-		container_of(sd, struct rkisp1_resizer, sd);
-
-	if (sel->target != V4L2_SEL_TGT_CROP || sel->pad == RKISP1_RSZ_PAD_SRC)
-		return -EINVAL;
-
-	dev_dbg(rsz->rkisp1->dev, "%s: pad: %d sel(%d,%d)/%dx%d\n", __func__,
-		sel->pad, sel->r.left, sel->r.top, sel->r.width, sel->r.height);
-
-	mutex_lock(&rsz->ops_lock);
-	rkisp1_rsz_set_sink_crop(rsz, cfg, &sel->r, sel->which);
-	mutex_unlock(&rsz->ops_lock);
-
-	return 0;
-}
-
-static const struct media_entity_operations rkisp1_rsz_media_ops = {
-	.link_validate = v4l2_subdev_link_validate,
-};
-
-static const struct v4l2_subdev_pad_ops rkisp1_rsz_pad_ops = {
-	.enum_mbus_code = rkisp1_rsz_enum_mbus_code,
-	.get_selection = rkisp1_rsz_get_selection,
-	.set_selection = rkisp1_rsz_set_selection,
-	.init_cfg = rkisp1_rsz_init_config,
-	.get_fmt = rkisp1_rsz_get_fmt,
-	.set_fmt = rkisp1_rsz_set_fmt,
-	.link_validate = v4l2_subdev_link_validate_default,
-};
-
-/* ----------------------------------------------------------------------------
- * Stream operations
- */
-
-static int rkisp1_rsz_s_stream(struct v4l2_subdev *sd, int enable)
-{
-	struct rkisp1_resizer *rsz =
-		container_of(sd, struct rkisp1_resizer, sd);
-	struct rkisp1_device *rkisp1 = rsz->rkisp1;
-	struct rkisp1_capture *other = &rkisp1->capture_devs[rsz->id ^ 1];
-	enum rkisp1_shadow_regs_when when = RKISP1_SHADOW_REGS_SYNC;
-
-	if (!enable) {
-		rkisp1_dcrop_disable(rsz, RKISP1_SHADOW_REGS_ASYNC);
-		rkisp1_rsz_disable(rsz, RKISP1_SHADOW_REGS_ASYNC);
-		return 0;
-	}
-
-	if (other->is_streaming)
-		when = RKISP1_SHADOW_REGS_ASYNC;
-
-	mutex_lock(&rsz->ops_lock);
-	rkisp1_rsz_config(rsz, when);
-	rkisp1_dcrop_config(rsz);
-
-	mutex_unlock(&rsz->ops_lock);
-	return 0;
-}
-
-static const struct v4l2_subdev_video_ops rkisp1_rsz_video_ops = {
-	.s_stream = rkisp1_rsz_s_stream,
-};
-
-static const struct v4l2_subdev_ops rkisp1_rsz_ops = {
-	.video = &rkisp1_rsz_video_ops,
-	.pad = &rkisp1_rsz_pad_ops,
-};
-
-static void rkisp1_rsz_unregister(struct rkisp1_resizer *rsz)
-{
-	v4l2_device_unregister_subdev(&rsz->sd);
-	media_entity_cleanup(&rsz->sd.entity);
-}
-
-static int rkisp1_rsz_register(struct rkisp1_resizer *rsz)
-{
-	static const char * const dev_names[] = {
-		RKISP1_RSZ_MP_DEV_NAME,
-		RKISP1_RSZ_SP_DEV_NAME
-	};
-	struct media_pad *pads = rsz->pads;
-	struct v4l2_subdev *sd = &rsz->sd;
-	int ret;
-
-	if (rsz->id == RKISP1_SELFPATH)
-		rsz->config = &rkisp1_rsz_config_sp;
-	else
-		rsz->config = &rkisp1_rsz_config_mp;
-
-	v4l2_subdev_init(sd, &rkisp1_rsz_ops);
-	sd->flags |= V4L2_SUBDEV_FL_HAS_DEVNODE;
-	sd->entity.ops = &rkisp1_rsz_media_ops;
-	sd->entity.function = MEDIA_ENT_F_PROC_VIDEO_SCALER;
-	sd->owner = THIS_MODULE;
-	strscpy(sd->name, dev_names[rsz->id], sizeof(sd->name));
-
-	pads[RKISP1_RSZ_PAD_SINK].flags = MEDIA_PAD_FL_SINK |
-					  MEDIA_PAD_FL_MUST_CONNECT;
-	pads[RKISP1_RSZ_PAD_SRC].flags = MEDIA_PAD_FL_SOURCE |
-					 MEDIA_PAD_FL_MUST_CONNECT;
-
-	rsz->pixel_enc = RKISP1_DEF_PIXEL_ENC;
-
-	mutex_init(&rsz->ops_lock);
-	ret = media_entity_pads_init(&sd->entity, RKISP1_RSZ_PAD_MAX, pads);
-	if (ret)
-		return ret;
-
-	ret = v4l2_device_register_subdev(&rsz->rkisp1->v4l2_dev, sd);
-	if (ret) {
-		dev_err(sd->dev, "Failed to register resizer subdev\n");
-		goto err_cleanup_media_entity;
-	}
-
-	rkisp1_rsz_init_config(sd, rsz->pad_cfg);
-	return 0;
-
-err_cleanup_media_entity:
-	media_entity_cleanup(&sd->entity);
-
-	return ret;
-}
-
-int rkisp1_resizer_devs_register(struct rkisp1_device *rkisp1)
-{
-	struct rkisp1_resizer *rsz;
-	unsigned int i, j;
-	int ret;
-
-	for (i = 0; i < ARRAY_SIZE(rkisp1->resizer_devs); i++) {
-		rsz = &rkisp1->resizer_devs[i];
-		rsz->rkisp1 = rkisp1;
-		rsz->id = i;
-		ret = rkisp1_rsz_register(rsz);
-		if (ret)
-			goto err_unreg_resizer_devs;
-	}
-
-	return 0;
-
-err_unreg_resizer_devs:
-	for (j = 0; j < i; j++) {
-		rsz = &rkisp1->resizer_devs[j];
-		rkisp1_rsz_unregister(rsz);
-	}
-
-	return ret;
-}
-
-void rkisp1_resizer_devs_unregister(struct rkisp1_device *rkisp1)
-{
-	struct rkisp1_resizer *mp = &rkisp1->resizer_devs[RKISP1_MAINPATH];
-	struct rkisp1_resizer *sp = &rkisp1->resizer_devs[RKISP1_SELFPATH];
-
-	rkisp1_rsz_unregister(mp);
-	rkisp1_rsz_unregister(sp);
-}
diff --git a/drivers/staging/media/rkisp1/rkisp1-stats.c b/drivers/staging/media/rkisp1/rkisp1-stats.c
deleted file mode 100644
index 3ddab8fa8f2d..000000000000
--- a/drivers/staging/media/rkisp1/rkisp1-stats.c
+++ /dev/null
@@ -1,415 +0,0 @@
-// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
-/*
- * Rockchip ISP1 Driver - Stats subdevice
- *
- * Copyright (C) 2017 Rockchip Electronics Co., Ltd.
- */
-
-#include <media/v4l2-common.h>
-#include <media/v4l2-event.h>
-#include <media/v4l2-ioctl.h>
-#include <media/videobuf2-core.h>
-#include <media/videobuf2-vmalloc.h>	/* for ISP statistics */
-
-#include "rkisp1-common.h"
-
-#define RKISP1_STATS_DEV_NAME	RKISP1_DRIVER_NAME "_stats"
-
-#define RKISP1_ISP_STATS_REQ_BUFS_MIN 2
-#define RKISP1_ISP_STATS_REQ_BUFS_MAX 8
-
-static int rkisp1_stats_enum_fmt_meta_cap(struct file *file, void *priv,
-					  struct v4l2_fmtdesc *f)
-{
-	struct video_device *video = video_devdata(file);
-	struct rkisp1_stats *stats = video_get_drvdata(video);
-
-	if (f->index > 0 || f->type != video->queue->type)
-		return -EINVAL;
-
-	f->pixelformat = stats->vdev_fmt.fmt.meta.dataformat;
-	return 0;
-}
-
-static int rkisp1_stats_g_fmt_meta_cap(struct file *file, void *priv,
-				       struct v4l2_format *f)
-{
-	struct video_device *video = video_devdata(file);
-	struct rkisp1_stats *stats = video_get_drvdata(video);
-	struct v4l2_meta_format *meta = &f->fmt.meta;
-
-	if (f->type != video->queue->type)
-		return -EINVAL;
-
-	memset(meta, 0, sizeof(*meta));
-	meta->dataformat = stats->vdev_fmt.fmt.meta.dataformat;
-	meta->buffersize = stats->vdev_fmt.fmt.meta.buffersize;
-
-	return 0;
-}
-
-static int rkisp1_stats_querycap(struct file *file,
-				 void *priv, struct v4l2_capability *cap)
-{
-	struct video_device *vdev = video_devdata(file);
-
-	strscpy(cap->driver, RKISP1_DRIVER_NAME, sizeof(cap->driver));
-	strscpy(cap->card, vdev->name, sizeof(cap->card));
-	strscpy(cap->bus_info, RKISP1_BUS_INFO, sizeof(cap->bus_info));
-
-	return 0;
-}
-
-/* ISP video device IOCTLs */
-static const struct v4l2_ioctl_ops rkisp1_stats_ioctl = {
-	.vidioc_reqbufs = vb2_ioctl_reqbufs,
-	.vidioc_querybuf = vb2_ioctl_querybuf,
-	.vidioc_create_bufs = vb2_ioctl_create_bufs,
-	.vidioc_qbuf = vb2_ioctl_qbuf,
-	.vidioc_dqbuf = vb2_ioctl_dqbuf,
-	.vidioc_prepare_buf = vb2_ioctl_prepare_buf,
-	.vidioc_expbuf = vb2_ioctl_expbuf,
-	.vidioc_streamon = vb2_ioctl_streamon,
-	.vidioc_streamoff = vb2_ioctl_streamoff,
-	.vidioc_enum_fmt_meta_cap = rkisp1_stats_enum_fmt_meta_cap,
-	.vidioc_g_fmt_meta_cap = rkisp1_stats_g_fmt_meta_cap,
-	.vidioc_s_fmt_meta_cap = rkisp1_stats_g_fmt_meta_cap,
-	.vidioc_try_fmt_meta_cap = rkisp1_stats_g_fmt_meta_cap,
-	.vidioc_querycap = rkisp1_stats_querycap,
-	.vidioc_subscribe_event = v4l2_ctrl_subscribe_event,
-	.vidioc_unsubscribe_event = v4l2_event_unsubscribe,
-};
-
-static const struct v4l2_file_operations rkisp1_stats_fops = {
-	.mmap = vb2_fop_mmap,
-	.unlocked_ioctl = video_ioctl2,
-	.poll = vb2_fop_poll,
-	.open = v4l2_fh_open,
-	.release = vb2_fop_release
-};
-
-static int rkisp1_stats_vb2_queue_setup(struct vb2_queue *vq,
-					unsigned int *num_buffers,
-					unsigned int *num_planes,
-					unsigned int sizes[],
-					struct device *alloc_devs[])
-{
-	*num_planes = 1;
-
-	*num_buffers = clamp_t(u32, *num_buffers, RKISP1_ISP_STATS_REQ_BUFS_MIN,
-			       RKISP1_ISP_STATS_REQ_BUFS_MAX);
-
-	sizes[0] = sizeof(struct rkisp1_stat_buffer);
-
-	return 0;
-}
-
-static void rkisp1_stats_vb2_buf_queue(struct vb2_buffer *vb)
-{
-	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
-	struct rkisp1_buffer *stats_buf =
-		container_of(vbuf, struct rkisp1_buffer, vb);
-	struct vb2_queue *vq = vb->vb2_queue;
-	struct rkisp1_stats *stats_dev = vq->drv_priv;
-
-	stats_buf->vaddr = vb2_plane_vaddr(vb, 0);
-
-	spin_lock_irq(&stats_dev->lock);
-	list_add_tail(&stats_buf->queue, &stats_dev->stat);
-	spin_unlock_irq(&stats_dev->lock);
-}
-
-static int rkisp1_stats_vb2_buf_prepare(struct vb2_buffer *vb)
-{
-	if (vb2_plane_size(vb, 0) < sizeof(struct rkisp1_stat_buffer))
-		return -EINVAL;
-
-	vb2_set_plane_payload(vb, 0, sizeof(struct rkisp1_stat_buffer));
-
-	return 0;
-}
-
-static void rkisp1_stats_vb2_stop_streaming(struct vb2_queue *vq)
-{
-	struct rkisp1_stats *stats = vq->drv_priv;
-	struct rkisp1_buffer *buf;
-	unsigned int i;
-
-	spin_lock_irq(&stats->lock);
-	for (i = 0; i < RKISP1_ISP_STATS_REQ_BUFS_MAX; i++) {
-		if (list_empty(&stats->stat))
-			break;
-		buf = list_first_entry(&stats->stat,
-				       struct rkisp1_buffer, queue);
-		list_del(&buf->queue);
-		vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_ERROR);
-	}
-	spin_unlock_irq(&stats->lock);
-}
-
-static const struct vb2_ops rkisp1_stats_vb2_ops = {
-	.queue_setup = rkisp1_stats_vb2_queue_setup,
-	.buf_queue = rkisp1_stats_vb2_buf_queue,
-	.buf_prepare = rkisp1_stats_vb2_buf_prepare,
-	.wait_prepare = vb2_ops_wait_prepare,
-	.wait_finish = vb2_ops_wait_finish,
-	.stop_streaming = rkisp1_stats_vb2_stop_streaming,
-};
-
-static int
-rkisp1_stats_init_vb2_queue(struct vb2_queue *q, struct rkisp1_stats *stats)
-{
-	struct rkisp1_vdev_node *node;
-
-	node = container_of(q, struct rkisp1_vdev_node, buf_queue);
-
-	q->type = V4L2_BUF_TYPE_META_CAPTURE;
-	q->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF;
-	q->drv_priv = stats;
-	q->ops = &rkisp1_stats_vb2_ops;
-	q->mem_ops = &vb2_vmalloc_memops;
-	q->buf_struct_size = sizeof(struct rkisp1_buffer);
-	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
-	q->lock = &node->vlock;
-
-	return vb2_queue_init(q);
-}
-
-static void rkisp1_stats_get_awb_meas(struct rkisp1_stats *stats,
-				      struct rkisp1_stat_buffer *pbuf)
-{
-	/* Protect against concurrent access from ISR? */
-	struct rkisp1_device *rkisp1 = stats->rkisp1;
-	u32 reg_val;
-
-	pbuf->meas_type |= RKISP1_CIF_ISP_STAT_AWB;
-	reg_val = rkisp1_read(rkisp1, RKISP1_CIF_ISP_AWB_WHITE_CNT);
-	pbuf->params.awb.awb_mean[0].cnt =
-				RKISP1_CIF_ISP_AWB_GET_PIXEL_CNT(reg_val);
-	reg_val = rkisp1_read(rkisp1, RKISP1_CIF_ISP_AWB_MEAN);
-
-	pbuf->params.awb.awb_mean[0].mean_cr_or_r =
-				RKISP1_CIF_ISP_AWB_GET_MEAN_CR_R(reg_val);
-	pbuf->params.awb.awb_mean[0].mean_cb_or_b =
-				RKISP1_CIF_ISP_AWB_GET_MEAN_CB_B(reg_val);
-	pbuf->params.awb.awb_mean[0].mean_y_or_g =
-				RKISP1_CIF_ISP_AWB_GET_MEAN_Y_G(reg_val);
-}
-
-static void rkisp1_stats_get_aec_meas(struct rkisp1_stats *stats,
-				      struct rkisp1_stat_buffer *pbuf)
-{
-	struct rkisp1_device *rkisp1 = stats->rkisp1;
-	unsigned int i;
-
-	pbuf->meas_type |= RKISP1_CIF_ISP_STAT_AUTOEXP;
-	for (i = 0; i < RKISP1_CIF_ISP_AE_MEAN_MAX; i++)
-		pbuf->params.ae.exp_mean[i] =
-			(u8)rkisp1_read(rkisp1,
-					RKISP1_CIF_ISP_EXP_MEAN_00 + i * 4);
-}
-
-static void rkisp1_stats_get_afc_meas(struct rkisp1_stats *stats,
-				      struct rkisp1_stat_buffer *pbuf)
-{
-	struct rkisp1_device *rkisp1 = stats->rkisp1;
-	struct rkisp1_cif_isp_af_stat *af;
-
-	pbuf->meas_type |= RKISP1_CIF_ISP_STAT_AFM;
-
-	af = &pbuf->params.af;
-	af->window[0].sum = rkisp1_read(rkisp1, RKISP1_CIF_ISP_AFM_SUM_A);
-	af->window[0].lum = rkisp1_read(rkisp1, RKISP1_CIF_ISP_AFM_LUM_A);
-	af->window[1].sum = rkisp1_read(rkisp1, RKISP1_CIF_ISP_AFM_SUM_B);
-	af->window[1].lum = rkisp1_read(rkisp1, RKISP1_CIF_ISP_AFM_LUM_B);
-	af->window[2].sum = rkisp1_read(rkisp1, RKISP1_CIF_ISP_AFM_SUM_C);
-	af->window[2].lum = rkisp1_read(rkisp1, RKISP1_CIF_ISP_AFM_LUM_C);
-}
-
-static void rkisp1_stats_get_hst_meas(struct rkisp1_stats *stats,
-				      struct rkisp1_stat_buffer *pbuf)
-{
-	struct rkisp1_device *rkisp1 = stats->rkisp1;
-	unsigned int i;
-
-	pbuf->meas_type |= RKISP1_CIF_ISP_STAT_HIST;
-	for (i = 0; i < RKISP1_CIF_ISP_HIST_BIN_N_MAX; i++)
-		pbuf->params.hist.hist_bins[i] =
-			(u8)rkisp1_read(rkisp1,
-					RKISP1_CIF_ISP_HIST_BIN_0 + i * 4);
-}
-
-static void rkisp1_stats_get_bls_meas(struct rkisp1_stats *stats,
-				      struct rkisp1_stat_buffer *pbuf)
-{
-	struct rkisp1_device *rkisp1 = stats->rkisp1;
-	const struct rkisp1_isp_mbus_info *in_fmt = rkisp1->isp.sink_fmt;
-	struct rkisp1_cif_isp_bls_meas_val *bls_val;
-
-	bls_val = &pbuf->params.ae.bls_val;
-	if (in_fmt->bayer_pat == RKISP1_RAW_BGGR) {
-		bls_val->meas_b =
-			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_A_MEASURED);
-		bls_val->meas_gb =
-			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_B_MEASURED);
-		bls_val->meas_gr =
-			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_C_MEASURED);
-		bls_val->meas_r =
-			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_D_MEASURED);
-	} else if (in_fmt->bayer_pat == RKISP1_RAW_GBRG) {
-		bls_val->meas_gb =
-			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_A_MEASURED);
-		bls_val->meas_b =
-			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_B_MEASURED);
-		bls_val->meas_r =
-			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_C_MEASURED);
-		bls_val->meas_gr =
-			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_D_MEASURED);
-	} else if (in_fmt->bayer_pat == RKISP1_RAW_GRBG) {
-		bls_val->meas_gr =
-			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_A_MEASURED);
-		bls_val->meas_r =
-			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_B_MEASURED);
-		bls_val->meas_b =
-			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_C_MEASURED);
-		bls_val->meas_gb =
-			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_D_MEASURED);
-	} else if (in_fmt->bayer_pat == RKISP1_RAW_RGGB) {
-		bls_val->meas_r =
-			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_A_MEASURED);
-		bls_val->meas_gr =
-			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_B_MEASURED);
-		bls_val->meas_gb =
-			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_C_MEASURED);
-		bls_val->meas_b =
-			rkisp1_read(rkisp1, RKISP1_CIF_ISP_BLS_D_MEASURED);
-	}
-}
-
-static void
-rkisp1_stats_send_measurement(struct rkisp1_stats *stats, u32 isp_ris)
-{
-	struct rkisp1_stat_buffer *cur_stat_buf;
-	struct rkisp1_buffer *cur_buf = NULL;
-	unsigned int frame_sequence = stats->rkisp1->isp.frame_sequence;
-	u64 timestamp = ktime_get_ns();
-
-	/* get one empty buffer */
-	if (!list_empty(&stats->stat)) {
-		cur_buf = list_first_entry(&stats->stat,
-					   struct rkisp1_buffer, queue);
-		list_del(&cur_buf->queue);
-	}
-
-	if (!cur_buf)
-		return;
-
-	cur_stat_buf =
-		(struct rkisp1_stat_buffer *)(cur_buf->vaddr);
-
-	if (isp_ris & RKISP1_CIF_ISP_AWB_DONE)
-		rkisp1_stats_get_awb_meas(stats, cur_stat_buf);
-
-	if (isp_ris & RKISP1_CIF_ISP_AFM_FIN)
-		rkisp1_stats_get_afc_meas(stats, cur_stat_buf);
-
-	if (isp_ris & RKISP1_CIF_ISP_EXP_END) {
-		rkisp1_stats_get_aec_meas(stats, cur_stat_buf);
-		rkisp1_stats_get_bls_meas(stats, cur_stat_buf);
-	}
-
-	if (isp_ris & RKISP1_CIF_ISP_HIST_MEASURE_RDY)
-		rkisp1_stats_get_hst_meas(stats, cur_stat_buf);
-
-	vb2_set_plane_payload(&cur_buf->vb.vb2_buf, 0,
-			      sizeof(struct rkisp1_stat_buffer));
-	cur_buf->vb.sequence = frame_sequence;
-	cur_buf->vb.vb2_buf.timestamp = timestamp;
-	vb2_buffer_done(&cur_buf->vb.vb2_buf, VB2_BUF_STATE_DONE);
-}
-
-void rkisp1_stats_isr(struct rkisp1_stats *stats, u32 isp_ris)
-{
-	struct rkisp1_device *rkisp1 = stats->rkisp1;
-	unsigned int isp_mis_tmp = 0;
-
-	spin_lock(&stats->lock);
-
-	rkisp1_write(rkisp1, RKISP1_STATS_MEAS_MASK, RKISP1_CIF_ISP_ICR);
-
-	isp_mis_tmp = rkisp1_read(rkisp1, RKISP1_CIF_ISP_MIS);
-	if (isp_mis_tmp & RKISP1_STATS_MEAS_MASK)
-		rkisp1->debug.stats_error++;
-
-	if (isp_ris & RKISP1_STATS_MEAS_MASK)
-		rkisp1_stats_send_measurement(stats, isp_ris);
-
-	spin_unlock(&stats->lock);
-}
-
-static void rkisp1_init_stats(struct rkisp1_stats *stats)
-{
-	stats->vdev_fmt.fmt.meta.dataformat =
-		V4L2_META_FMT_RK_ISP1_STAT_3A;
-	stats->vdev_fmt.fmt.meta.buffersize =
-		sizeof(struct rkisp1_stat_buffer);
-}
-
-int rkisp1_stats_register(struct rkisp1_device *rkisp1)
-{
-	struct rkisp1_stats *stats = &rkisp1->stats;
-	struct rkisp1_vdev_node *node = &stats->vnode;
-	struct video_device *vdev = &node->vdev;
-	int ret;
-
-	stats->rkisp1 = rkisp1;
-	mutex_init(&node->vlock);
-	INIT_LIST_HEAD(&stats->stat);
-	spin_lock_init(&stats->lock);
-
-	strscpy(vdev->name, RKISP1_STATS_DEV_NAME, sizeof(vdev->name));
-
-	video_set_drvdata(vdev, stats);
-	vdev->ioctl_ops = &rkisp1_stats_ioctl;
-	vdev->fops = &rkisp1_stats_fops;
-	vdev->release = video_device_release_empty;
-	vdev->lock = &node->vlock;
-	vdev->v4l2_dev = &rkisp1->v4l2_dev;
-	vdev->queue = &node->buf_queue;
-	vdev->device_caps = V4L2_CAP_META_CAPTURE | V4L2_CAP_STREAMING;
-	vdev->vfl_dir =  VFL_DIR_RX;
-	rkisp1_stats_init_vb2_queue(vdev->queue, stats);
-	rkisp1_init_stats(stats);
-	video_set_drvdata(vdev, stats);
-
-	node->pad.flags = MEDIA_PAD_FL_SINK;
-	ret = media_entity_pads_init(&vdev->entity, 1, &node->pad);
-	if (ret)
-		goto err_mutex_destroy;
-
-	ret = video_register_device(vdev, VFL_TYPE_VIDEO, -1);
-	if (ret) {
-		dev_err(&vdev->dev,
-			"failed to register %s, ret=%d\n", vdev->name, ret);
-		goto err_cleanup_media_entity;
-	}
-
-	return 0;
-
-err_cleanup_media_entity:
-	media_entity_cleanup(&vdev->entity);
-err_mutex_destroy:
-	mutex_destroy(&node->vlock);
-	return ret;
-}
-
-void rkisp1_stats_unregister(struct rkisp1_device *rkisp1)
-{
-	struct rkisp1_stats *stats = &rkisp1->stats;
-	struct rkisp1_vdev_node *node = &stats->vnode;
-	struct video_device *vdev = &node->vdev;
-
-	vb2_video_unregister_device(vdev);
-	media_entity_cleanup(&vdev->entity);
-	mutex_destroy(&node->vlock);
-}
diff --git a/drivers/staging/media/rkisp1/uapi/rkisp1-config.h b/drivers/staging/media/rkisp1/uapi/rkisp1-config.h
deleted file mode 100644
index 6e449e784260..000000000000
--- a/drivers/staging/media/rkisp1/uapi/rkisp1-config.h
+++ /dev/null
@@ -1,884 +0,0 @@
-/* SPDX-License-Identifier: ((GPL-2.0+ WITH Linux-syscall-note) OR MIT) */
-/*
- * Rockchip ISP1 userspace API
- * Copyright (C) 2017 Rockchip Electronics Co., Ltd.
- */
-
-#ifndef _UAPI_RKISP1_CONFIG_H
-#define _UAPI_RKISP1_CONFIG_H
-
-#include <linux/types.h>
-
-/* Defect Pixel Cluster Detection */
-#define RKISP1_CIF_ISP_MODULE_DPCC		(1U << 0)
-/* Black Level Subtraction */
-#define RKISP1_CIF_ISP_MODULE_BLS		(1U << 1)
-/* Sensor De-gamma */
-#define RKISP1_CIF_ISP_MODULE_SDG		(1U << 2)
-/* Histogram */
-#define RKISP1_CIF_ISP_MODULE_HST		(1U << 3)
-/* Lens Shade Control */
-#define RKISP1_CIF_ISP_MODULE_LSC		(1U << 4)
-/* Auto White Balance Gain */
-#define RKISP1_CIF_ISP_MODULE_AWB_GAIN		(1U << 5)
-/* Filter */
-#define RKISP1_CIF_ISP_MODULE_FLT		(1U << 6)
-/* Bayer Demosaic */
-#define RKISP1_CIF_ISP_MODULE_BDM		(1U << 7)
-/* Cross Talk */
-#define RKISP1_CIF_ISP_MODULE_CTK		(1U << 8)
-/* Gamma Out Curve */
-#define RKISP1_CIF_ISP_MODULE_GOC		(1U << 9)
-/* Color Processing */
-#define RKISP1_CIF_ISP_MODULE_CPROC		(1U << 10)
-/* Auto Focus Control */
-#define RKISP1_CIF_ISP_MODULE_AFC		(1U << 11)
-/* Auto White Balancing */
-#define RKISP1_CIF_ISP_MODULE_AWB		(1U << 12)
-/* Image Effect */
-#define RKISP1_CIF_ISP_MODULE_IE		(1U << 13)
-/* Auto Exposure Control */
-#define RKISP1_CIF_ISP_MODULE_AEC		(1U << 14)
-/* Wide Dynamic Range */
-#define RKISP1_CIF_ISP_MODULE_WDR		(1U << 15)
-/* Denoise Pre-Filter */
-#define RKISP1_CIF_ISP_MODULE_DPF		(1U << 16)
-/* Denoise Pre-Filter Strength */
-#define RKISP1_CIF_ISP_MODULE_DPF_STRENGTH	(1U << 17)
-
-#define RKISP1_CIF_ISP_CTK_COEFF_MAX            0x100
-#define RKISP1_CIF_ISP_CTK_OFFSET_MAX           0x800
-
-#define RKISP1_CIF_ISP_AE_MEAN_MAX              25
-#define RKISP1_CIF_ISP_HIST_BIN_N_MAX           16
-#define RKISP1_CIF_ISP_AFM_MAX_WINDOWS          3
-#define RKISP1_CIF_ISP_DEGAMMA_CURVE_SIZE       17
-
-#define RKISP1_CIF_ISP_BDM_MAX_TH               0xff
-
-/*
- * Black level compensation
- */
-/* maximum value for horizontal start address */
-#define RKISP1_CIF_ISP_BLS_START_H_MAX             0x00000fff
-/* maximum value for horizontal stop address */
-#define RKISP1_CIF_ISP_BLS_STOP_H_MAX              0x00000fff
-/* maximum value for vertical start address */
-#define RKISP1_CIF_ISP_BLS_START_V_MAX             0x00000fff
-/* maximum value for vertical stop address */
-#define RKISP1_CIF_ISP_BLS_STOP_V_MAX              0x00000fff
-/* maximum is 2^18 = 262144*/
-#define RKISP1_CIF_ISP_BLS_SAMPLES_MAX             0x00000012
-/* maximum value for fixed black level */
-#define RKISP1_CIF_ISP_BLS_FIX_SUB_MAX             0x00000fff
-/* minimum value for fixed black level */
-#define RKISP1_CIF_ISP_BLS_FIX_SUB_MIN             0xfffff000
-/* 13 bit range (signed)*/
-#define RKISP1_CIF_ISP_BLS_FIX_MASK                0x00001fff
-
-/*
- * Automatic white balance measurements
- */
-#define RKISP1_CIF_ISP_AWB_MAX_GRID                1
-#define RKISP1_CIF_ISP_AWB_MAX_FRAMES              7
-
-/*
- * Gamma out
- */
-/* Maximum number of color samples supported */
-#define RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES       17
-
-/*
- * Lens shade correction
- */
-#define RKISP1_CIF_ISP_LSC_SECTORS_TBL_SIZE        8
-
-/*
- * The following matches the tuning process,
- * not the max capabilities of the chip.
- */
-#define RKISP1_CIF_ISP_LSC_SAMPLES_MAX             17
-
-/*
- * Histogram calculation
- */
-/* Last 3 values unused. */
-#define RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE 28
-
-/*
- * Defect Pixel Cluster Correction
- */
-#define RKISP1_CIF_ISP_DPCC_METHODS_MAX       3
-
-/*
- * Denoising pre filter
- */
-#define RKISP1_CIF_ISP_DPF_MAX_NLF_COEFFS      17
-#define RKISP1_CIF_ISP_DPF_MAX_SPATIAL_COEFFS  6
-
-/*
- * Measurement types
- */
-#define RKISP1_CIF_ISP_STAT_AWB           (1U << 0)
-#define RKISP1_CIF_ISP_STAT_AUTOEXP       (1U << 1)
-#define RKISP1_CIF_ISP_STAT_AFM           (1U << 2)
-#define RKISP1_CIF_ISP_STAT_HIST          (1U << 3)
-
-enum rkisp1_cif_isp_histogram_mode {
-	RKISP1_CIF_ISP_HISTOGRAM_MODE_DISABLE,
-	RKISP1_CIF_ISP_HISTOGRAM_MODE_RGB_COMBINED,
-	RKISP1_CIF_ISP_HISTOGRAM_MODE_R_HISTOGRAM,
-	RKISP1_CIF_ISP_HISTOGRAM_MODE_G_HISTOGRAM,
-	RKISP1_CIF_ISP_HISTOGRAM_MODE_B_HISTOGRAM,
-	RKISP1_CIF_ISP_HISTOGRAM_MODE_Y_HISTOGRAM
-};
-
-enum rkisp1_cif_isp_awb_mode_type {
-	RKISP1_CIF_ISP_AWB_MODE_MANUAL,
-	RKISP1_CIF_ISP_AWB_MODE_RGB,
-	RKISP1_CIF_ISP_AWB_MODE_YCBCR
-};
-
-enum rkisp1_cif_isp_flt_mode {
-	RKISP1_CIF_ISP_FLT_STATIC_MODE,
-	RKISP1_CIF_ISP_FLT_DYNAMIC_MODE
-};
-
-/**
- * enum rkisp1_cif_isp_exp_ctrl_autostop - stop modes
- * @RKISP1_CIF_ISP_EXP_CTRL_AUTOSTOP_0: continuous measurement
- * @RKISP1_CIF_ISP_EXP_CTRL_AUTOSTOP_1: stop measuring after a complete frame
- */
-enum rkisp1_cif_isp_exp_ctrl_autostop {
-	RKISP1_CIF_ISP_EXP_CTRL_AUTOSTOP_0 = 0,
-	RKISP1_CIF_ISP_EXP_CTRL_AUTOSTOP_1 = 1,
-};
-
-/**
- * enum rkisp1_cif_isp_exp_meas_mode - Exposure measure mode
- * @RKISP1_CIF_ISP_EXP_MEASURING_MODE_0: Y = 16 + 0.25R + 0.5G + 0.1094B
- * @RKISP1_CIF_ISP_EXP_MEASURING_MODE_1: Y = (R + G + B) x (85/256)
- */
-enum rkisp1_cif_isp_exp_meas_mode {
-	RKISP1_CIF_ISP_EXP_MEASURING_MODE_0,
-	RKISP1_CIF_ISP_EXP_MEASURING_MODE_1,
-};
-
-/*---------- PART1: Input Parameters ------------*/
-
-/**
- * struct rkisp1_cif_isp_window -  measurement window.
- *
- * Measurements are calculated per window inside the frame.
- * This struct represents a window for a measurement.
- *
- * @h_offs: the horizontal offset of the window from the left of the frame in pixels.
- * @v_offs: the vertical offset of the window from the top of the frame in pixels.
- * @h_size: the horizontal size of the window in pixels
- * @v_size: the vertical size of the window in pixels.
- */
-struct rkisp1_cif_isp_window {
-	__u16 h_offs;
-	__u16 v_offs;
-	__u16 h_size;
-	__u16 v_size;
-};
-
-/**
- * struct rkisp1_cif_isp_bls_fixed_val - BLS fixed subtraction values
- *
- * The values will be subtracted from the sensor
- * values. Therefore a negative value means addition instead of subtraction!
- *
- * @r: Fixed (signed!) subtraction value for Bayer pattern R
- * @gr: Fixed (signed!) subtraction value for Bayer pattern Gr
- * @gb: Fixed (signed!) subtraction value for Bayer pattern Gb
- * @b: Fixed (signed!) subtraction value for Bayer pattern B
- */
-struct rkisp1_cif_isp_bls_fixed_val {
-	__s16 r;
-	__s16 gr;
-	__s16 gb;
-	__s16 b;
-};
-
-/**
- * struct rkisp1_cif_isp_bls_config - Configuration used by black level subtraction
- *
- * @enable_auto: Automatic mode activated means that the measured values
- *		 are subtracted. Otherwise the fixed subtraction
- *		 values will be subtracted.
- * @en_windows: enabled window
- * @bls_window1: Measurement window 1 size
- * @bls_window2: Measurement window 2 size
- * @bls_samples: Set amount of measured pixels for each Bayer position
- *		 (A, B,C and D) to 2^bls_samples.
- * @fixed_val: Fixed subtraction values
- */
-struct rkisp1_cif_isp_bls_config {
-	__u8 enable_auto;
-	__u8 en_windows;
-	struct rkisp1_cif_isp_window bls_window1;
-	struct rkisp1_cif_isp_window bls_window2;
-	__u8 bls_samples;
-	struct rkisp1_cif_isp_bls_fixed_val fixed_val;
-};
-
-/**
- * struct rkisp1_cif_isp_dpcc_methods_config - Methods Configuration used by DPCC
- *
- * Methods Configuration used by Defect Pixel Cluster Correction
- *
- * @method: Method enable bits
- * @line_thresh: Line threshold
- * @line_mad_fac: Line MAD factor
- * @pg_fac: Peak gradient factor
- * @rnd_thresh: Rank Neighbor Difference threshold
- * @rg_fac: Rank gradient factor
- */
-struct rkisp1_cif_isp_dpcc_methods_config {
-	__u32 method;
-	__u32 line_thresh;
-	__u32 line_mad_fac;
-	__u32 pg_fac;
-	__u32 rnd_thresh;
-	__u32 rg_fac;
-};
-
-/**
- * struct rkisp1_cif_isp_dpcc_config - Configuration used by DPCC
- *
- * Configuration used by Defect Pixel Cluster Correction
- *
- * @mode: dpcc output mode
- * @output_mode: whether use hard coded methods
- * @set_use: stage1 methods set
- * @methods: methods config
- * @ro_limits: rank order limits
- * @rnd_offs: differential rank offsets for rank neighbor difference
- */
-struct rkisp1_cif_isp_dpcc_config {
-	__u32 mode;
-	__u32 output_mode;
-	__u32 set_use;
-	struct rkisp1_cif_isp_dpcc_methods_config methods[RKISP1_CIF_ISP_DPCC_METHODS_MAX];
-	__u32 ro_limits;
-	__u32 rnd_offs;
-};
-
-/**
- * struct rkisp1_cif_isp_gamma_corr_curve - gamma curve point definition y-axis (output).
- *
- * The reset values define a linear curve which has the same effect as bypass. Reset values are:
- * gamma_y[0] = 0x0000, gamma_y[1] = 0x0100, ... gamma_y[15] = 0x0f00, gamma_y[16] = 0xfff
- *
- * @gamma_y: the values for the y-axis of gamma curve points. Each value is 12 bit.
- */
-struct rkisp1_cif_isp_gamma_corr_curve {
-	__u16 gamma_y[RKISP1_CIF_ISP_DEGAMMA_CURVE_SIZE];
-};
-
-/**
- * struct rkisp1_cif_isp_gamma_curve_x_axis_pnts - De-Gamma Curve definition x increments
- *		(sampling points). gamma_dx0 is for the lower samples (1-8), gamma_dx1 is for the
- *		higher samples (9-16). The reset values for both fields is 0x44444444. This means
- *		that each sample is 4 units away from the previous one on the x-axis.
- *
- * @gamma_dx0: gamma curve sample points definitions. Bits 0:2 for sample 1. Bit 3 unused.
- *		Bits 4:6 for sample 2. bit 7 unused ... Bits 28:30 for sample 8. Bit 31 unused
- * @gamma_dx1: gamma curve sample points definitions. Bits 0:2 for sample 9. Bit 3 unused.
- *		Bits 4:6 for sample 10. bit 7 unused ... Bits 28:30 for sample 16. Bit 31 unused
- */
-struct rkisp1_cif_isp_gamma_curve_x_axis_pnts {
-	__u32 gamma_dx0;
-	__u32 gamma_dx1;
-};
-
-/**
- * struct rkisp1_cif_isp_sdg_config - Configuration used by sensor degamma
- *
- * @curve_r: gamma curve point definition axis for red
- * @curve_g: gamma curve point definition axis for green
- * @curve_b: gamma curve point definition axis for blue
- * @xa_pnts: x axis increments
- */
-struct rkisp1_cif_isp_sdg_config {
-	struct rkisp1_cif_isp_gamma_corr_curve curve_r;
-	struct rkisp1_cif_isp_gamma_corr_curve curve_g;
-	struct rkisp1_cif_isp_gamma_corr_curve curve_b;
-	struct rkisp1_cif_isp_gamma_curve_x_axis_pnts xa_pnts;
-};
-
-/**
- * struct rkisp1_cif_isp_lsc_config - Configuration used by Lens shading correction
- *
- * @r_data_tbl: sample table red
- * @gr_data_tbl: sample table green (red)
- * @gb_data_tbl: sample table green (blue)
- * @b_data_tbl: sample table blue
- * @x_grad_tbl: gradient table x
- * @y_grad_tbl: gradient table y
- * @x_size_tbl: size table x
- * @y_size_tbl: size table y
- * @config_width: not used at the moment
- * @config_height: not used at the moment
- */
-struct rkisp1_cif_isp_lsc_config {
-	__u16 r_data_tbl[RKISP1_CIF_ISP_LSC_SAMPLES_MAX][RKISP1_CIF_ISP_LSC_SAMPLES_MAX];
-	__u16 gr_data_tbl[RKISP1_CIF_ISP_LSC_SAMPLES_MAX][RKISP1_CIF_ISP_LSC_SAMPLES_MAX];
-	__u16 gb_data_tbl[RKISP1_CIF_ISP_LSC_SAMPLES_MAX][RKISP1_CIF_ISP_LSC_SAMPLES_MAX];
-	__u16 b_data_tbl[RKISP1_CIF_ISP_LSC_SAMPLES_MAX][RKISP1_CIF_ISP_LSC_SAMPLES_MAX];
-
-	__u16 x_grad_tbl[RKISP1_CIF_ISP_LSC_SECTORS_TBL_SIZE];
-	__u16 y_grad_tbl[RKISP1_CIF_ISP_LSC_SECTORS_TBL_SIZE];
-
-	__u16 x_size_tbl[RKISP1_CIF_ISP_LSC_SECTORS_TBL_SIZE];
-	__u16 y_size_tbl[RKISP1_CIF_ISP_LSC_SECTORS_TBL_SIZE];
-	__u16 config_width;
-	__u16 config_height;
-};
-
-/**
- * struct rkisp1_cif_isp_ie_config - Configuration used by image effects
- *
- * @effect: values from 'enum v4l2_colorfx'. Possible values are: V4L2_COLORFX_SEPIA,
- *		V4L2_COLORFX_SET_CBCR, V4L2_COLORFX_AQUA, V4L2_COLORFX_EMBOSS,
- *		V4L2_COLORFX_SKETCH,   V4L2_COLORFX_BW,   V4L2_COLORFX_NEGATIVE
- * @color_sel: bits 0:2 - colors bitmask (001 - blue, 010 - green, 100 - red).
- *		bits 8:15 - Threshold value of the RGB colors for the color selection effect.
- * @eff_mat_1: 3x3 Matrix Coefficients for Emboss Effect 1
- * @eff_mat_2: 3x3 Matrix Coefficients for Emboss Effect 2
- * @eff_mat_3: 3x3 Matrix Coefficients for Emboss 3/Sketch 1
- * @eff_mat_4: 3x3 Matrix Coefficients for Sketch Effect 2
- * @eff_mat_5: 3x3 Matrix Coefficients for Sketch Effect 3
- * @eff_tint: Chrominance increment values of tint (used for sepia effect)
- */
-struct rkisp1_cif_isp_ie_config {
-	__u16 effect;
-	__u16 color_sel;
-	__u16 eff_mat_1;
-	__u16 eff_mat_2;
-	__u16 eff_mat_3;
-	__u16 eff_mat_4;
-	__u16 eff_mat_5;
-	__u16 eff_tint;
-};
-
-/**
- * struct rkisp1_cif_isp_cproc_config - Configuration used by Color Processing
- *
- * @c_out_range: Chrominance pixel clipping range at output.
- *		 (0 for limit, 1 for full)
- * @y_in_range: Luminance pixel clipping range at output.
- * @y_out_range: Luminance pixel clipping range at output.
- * @contrast: 00~ff, 0.0~1.992
- * @brightness: 80~7F, -128~+127
- * @sat: saturation, 00~FF, 0.0~1.992
- * @hue: 80~7F, -90~+87.188
- */
-struct rkisp1_cif_isp_cproc_config {
-	__u8 c_out_range;
-	__u8 y_in_range;
-	__u8 y_out_range;
-	__u8 contrast;
-	__u8 brightness;
-	__u8 sat;
-	__u8 hue;
-};
-
-/**
- * struct rkisp1_cif_isp_awb_meas_config - Configuration used by auto white balance
- *
- * @awb_mode: the awb meas mode. From enum rkisp1_cif_isp_awb_mode_type.
- * @awb_wnd: white balance measurement window (in pixels)
- * @max_y: only pixels values < max_y contribute to awb measurement, set to 0
- *	   to disable this feature
- * @min_y: only pixels values > min_y contribute to awb measurement
- * @max_csum: Chrominance sum maximum value, only consider pixels with Cb+Cr,
- *	      smaller than threshold for awb measurements
- * @min_c: Chrominance minimum value, only consider pixels with Cb/Cr
- *	   each greater than threshold value for awb measurements
- * @frames: number of frames - 1 used for mean value calculation
- *	    (ucFrames=0 means 1 Frame)
- * @awb_ref_cr: reference Cr value for AWB regulation, target for AWB
- * @awb_ref_cb: reference Cb value for AWB regulation, target for AWB
- * @enable_ymax_cmp: enable Y_MAX compare (Not valid in RGB measurement mode.)
- */
-struct rkisp1_cif_isp_awb_meas_config {
-	/*
-	 * Note: currently the h and v offsets are mapped to grid offsets
-	 */
-	struct rkisp1_cif_isp_window awb_wnd;
-	__u32 awb_mode;
-	__u8 max_y;
-	__u8 min_y;
-	__u8 max_csum;
-	__u8 min_c;
-	__u8 frames;
-	__u8 awb_ref_cr;
-	__u8 awb_ref_cb;
-	__u8 enable_ymax_cmp;
-};
-
-/**
- * struct rkisp1_cif_isp_awb_gain_config - Configuration used by auto white balance gain
- *
- * All fields in this struct are 10 bit, where:
- * 0x100h = 1, unsigned integer value, range 0 to 4 with 8 bit fractional part.
- *
- * out_data_x = ( AWB_GAIN_X * in_data + 128) >> 8
- *
- * @gain_red: gain value for red component.
- * @gain_green_r: gain value for green component in red line.
- * @gain_blue: gain value for blue component.
- * @gain_green_b: gain value for green component in blue line.
- */
-struct rkisp1_cif_isp_awb_gain_config {
-	__u16 gain_red;
-	__u16 gain_green_r;
-	__u16 gain_blue;
-	__u16 gain_green_b;
-};
-
-/**
- * struct rkisp1_cif_isp_flt_config - Configuration used by ISP filtering
- *
- * All 4 threshold fields (thresh_*) are 10 bits.
- * All 6 factor fields (fac_*) are 6 bits.
- *
- * @mode: ISP_FILT_MODE register fields (from enum rkisp1_cif_isp_flt_mode)
- * @grn_stage1: Green filter stage 1 select (range 0x0...0x8)
- * @chr_h_mode: Chroma filter horizontal mode
- * @chr_v_mode: Chroma filter vertical mode
- * @thresh_bl0: If thresh_bl1 < sum_grad < thresh_bl0 then fac_bl0 is selected (blurring th)
- * @thresh_bl1: If sum_grad < thresh_bl1 then fac_bl1 is selected (blurring th)
- * @thresh_sh0: If thresh_sh0 < sum_grad < thresh_sh1 then thresh_sh0 is selected (sharpening th)
- * @thresh_sh1: If thresh_sh1 < sum_grad then thresh_sh1 is selected (sharpening th)
- * @lum_weight: Parameters for luminance weight function.
- * @fac_sh1: filter factor for sharp1 level
- * @fac_sh0: filter factor for sharp0 level
- * @fac_mid: filter factor for mid level and for static filter mode
- * @fac_bl0: filter factor for blur 0 level
- * @fac_bl1: filter factor for blur 1 level (max blur)
- */
-struct rkisp1_cif_isp_flt_config {
-	__u32 mode;
-	__u8 grn_stage1;
-	__u8 chr_h_mode;
-	__u8 chr_v_mode;
-	__u32 thresh_bl0;
-	__u32 thresh_bl1;
-	__u32 thresh_sh0;
-	__u32 thresh_sh1;
-	__u32 lum_weight;
-	__u32 fac_sh1;
-	__u32 fac_sh0;
-	__u32 fac_mid;
-	__u32 fac_bl0;
-	__u32 fac_bl1;
-};
-
-/**
- * struct rkisp1_cif_isp_bdm_config - Configuration used by Bayer DeMosaic
- *
- * @demosaic_th: threshold for bayer demosaicing texture detection
- */
-struct rkisp1_cif_isp_bdm_config {
-	__u8 demosaic_th;
-};
-
-/**
- * struct rkisp1_cif_isp_ctk_config - Configuration used by Cross Talk correction
- *
- * @coeff: color correction matrix. Values are 11-bit signed fixed-point numbers with 4 bit integer
- *		and 7 bit fractional part, ranging from -8 (0x400) to +7.992 (0x3FF). 0 is
- *		represented by 0x000 and a coefficient value of 1 as 0x080.
- * @ct_offset: Red, Green, Blue offsets for the crosstalk correction matrix
- */
-struct rkisp1_cif_isp_ctk_config {
-	__u16 coeff[3][3];
-	__u16 ct_offset[3];
-};
-
-enum rkisp1_cif_isp_goc_mode {
-	RKISP1_CIF_ISP_GOC_MODE_LOGARITHMIC,
-	RKISP1_CIF_ISP_GOC_MODE_EQUIDISTANT
-};
-
-/**
- * struct rkisp1_cif_isp_goc_config - Configuration used by Gamma Out correction
- *
- * @mode: goc mode (from enum rkisp1_cif_isp_goc_mode)
- * @gamma_y: gamma out curve y-axis for all color components
- */
-struct rkisp1_cif_isp_goc_config {
-	__u32 mode;
-	__u16 gamma_y[RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES];
-};
-
-/**
- * struct rkisp1_cif_isp_hst_config - Configuration used by Histogram
- *
- * @mode: histogram mode (from enum rkisp1_cif_isp_histogram_mode)
- * @histogram_predivider: process every stepsize pixel, all other pixels are
- *			  skipped
- * @meas_window: coordinates of the measure window
- * @hist_weight: weighting factor for sub-windows
- */
-struct rkisp1_cif_isp_hst_config {
-	__u32 mode;
-	__u8 histogram_predivider;
-	struct rkisp1_cif_isp_window meas_window;
-	__u8 hist_weight[RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE];
-};
-
-/**
- * struct rkisp1_cif_isp_aec_config - Configuration used by Auto Exposure Control
- *
- * @mode: Exposure measure mode (from enum rkisp1_cif_isp_exp_meas_mode)
- * @autostop: stop mode (from enum rkisp1_cif_isp_exp_ctrl_autostop)
- * @meas_window: coordinates of the measure window
- */
-struct rkisp1_cif_isp_aec_config {
-	__u32 mode;
-	__u32 autostop;
-	struct rkisp1_cif_isp_window meas_window;
-};
-
-/**
- * struct rkisp1_cif_isp_afc_config - Configuration used by Auto Focus Control
- *
- * @num_afm_win: max RKISP1_CIF_ISP_AFM_MAX_WINDOWS
- * @afm_win: coordinates of the meas window
- * @thres: threshold used for minimizing the influence of noise
- * @var_shift: the number of bits for the shift operation at the end of the
- *	       calculation chain.
- */
-struct rkisp1_cif_isp_afc_config {
-	__u8 num_afm_win;
-	struct rkisp1_cif_isp_window afm_win[RKISP1_CIF_ISP_AFM_MAX_WINDOWS];
-	__u32 thres;
-	__u32 var_shift;
-};
-
-/**
- * enum rkisp1_cif_isp_dpf_gain_usage - dpf gain usage
- * @RKISP1_CIF_ISP_DPF_GAIN_USAGE_DISABLED: don't use any gains in preprocessing stage
- * @RKISP1_CIF_ISP_DPF_GAIN_USAGE_NF_GAINS: use only the noise function gains from
- *				    registers DPF_NF_GAIN_R, ...
- * @RKISP1_CIF_ISP_DPF_GAIN_USAGE_LSC_GAINS:  use only the gains from LSC module
- * @RKISP1_CIF_ISP_DPF_GAIN_USAGE_NF_LSC_GAINS: use the noise function gains and the
- *					gains from LSC module
- * @RKISP1_CIF_ISP_DPF_GAIN_USAGE_AWB_GAINS: use only the gains from AWB module
- * @RKISP1_CIF_ISP_DPF_GAIN_USAGE_AWB_LSC_GAINS: use the gains from AWB and LSC module
- * @RKISP1_CIF_ISP_DPF_GAIN_USAGE_MAX: upper border (only for an internal evaluation)
- */
-enum rkisp1_cif_isp_dpf_gain_usage {
-	RKISP1_CIF_ISP_DPF_GAIN_USAGE_DISABLED,
-	RKISP1_CIF_ISP_DPF_GAIN_USAGE_NF_GAINS,
-	RKISP1_CIF_ISP_DPF_GAIN_USAGE_LSC_GAINS,
-	RKISP1_CIF_ISP_DPF_GAIN_USAGE_NF_LSC_GAINS,
-	RKISP1_CIF_ISP_DPF_GAIN_USAGE_AWB_GAINS,
-	RKISP1_CIF_ISP_DPF_GAIN_USAGE_AWB_LSC_GAINS,
-	RKISP1_CIF_ISP_DPF_GAIN_USAGE_MAX
-};
-
-/**
- * enum rkisp1_cif_isp_dpf_rb_filtersize - Red and blue filter sizes
- * @RKISP1_CIF_ISP_DPF_RB_FILTERSIZE_13x9: red and blue filter kernel size 13x9
- *				   (means 7x5 active pixel)
- * @RKISP1_CIF_ISP_DPF_RB_FILTERSIZE_9x9: red and blue filter kernel size 9x9
- *				   (means 5x5 active pixel)
- */
-enum rkisp1_cif_isp_dpf_rb_filtersize {
-	RKISP1_CIF_ISP_DPF_RB_FILTERSIZE_13x9,
-	RKISP1_CIF_ISP_DPF_RB_FILTERSIZE_9x9,
-};
-
-/**
- * enum rkisp1_cif_isp_dpf_nll_scale_mode - dpf noise level scale mode
- * @RKISP1_CIF_ISP_NLL_SCALE_LINEAR: use a linear scaling
- * @RKISP1_CIF_ISP_NLL_SCALE_LOGARITHMIC: use a logarithmic scaling
- */
-enum rkisp1_cif_isp_dpf_nll_scale_mode {
-	RKISP1_CIF_ISP_NLL_SCALE_LINEAR,
-	RKISP1_CIF_ISP_NLL_SCALE_LOGARITHMIC,
-};
-
-/**
- * struct rkisp1_cif_isp_dpf_nll - Noise level lookup
- *
- * @coeff: Noise level Lookup coefficient
- * @scale_mode: dpf noise level scale mode (from enum rkisp1_cif_isp_dpf_nll_scale_mode)
- */
-struct rkisp1_cif_isp_dpf_nll {
-	__u16 coeff[RKISP1_CIF_ISP_DPF_MAX_NLF_COEFFS];
-	__u32 scale_mode;
-};
-
-/**
- * struct rkisp1_cif_isp_dpf_rb_flt - Red blue filter config
- *
- * @fltsize: The filter size for the red and blue pixels
- *	     (from enum rkisp1_cif_isp_dpf_rb_filtersize)
- * @spatial_coeff: Spatial weights
- * @r_enable: enable filter processing for red pixels
- * @b_enable: enable filter processing for blue pixels
- */
-struct rkisp1_cif_isp_dpf_rb_flt {
-	__u32 fltsize;
-	__u8 spatial_coeff[RKISP1_CIF_ISP_DPF_MAX_SPATIAL_COEFFS];
-	__u8 r_enable;
-	__u8 b_enable;
-};
-
-/**
- * struct rkisp1_cif_isp_dpf_g_flt - Green filter Configuration
- *
- * @spatial_coeff: Spatial weights
- * @gr_enable: enable filter processing for green pixels in green/red lines
- * @gb_enable: enable filter processing for green pixels in green/blue lines
- */
-struct rkisp1_cif_isp_dpf_g_flt {
-	__u8 spatial_coeff[RKISP1_CIF_ISP_DPF_MAX_SPATIAL_COEFFS];
-	__u8 gr_enable;
-	__u8 gb_enable;
-};
-
-/**
- * struct rkisp1_cif_isp_dpf_gain - Noise function Configuration
- *
- * @mode: dpf gain usage  (from enum rkisp1_cif_isp_dpf_gain_usage)
- * @nf_r_gain: Noise function Gain that replaces the AWB gain for red pixels
- * @nf_b_gain: Noise function Gain that replaces the AWB gain for blue pixels
- * @nf_gr_gain: Noise function Gain that replaces the AWB gain
- *		for green pixels in a red line
- * @nf_gb_gain: Noise function Gain that replaces the AWB gain
- *		for green pixels in a blue line
- */
-struct rkisp1_cif_isp_dpf_gain {
-	__u32 mode;
-	__u16 nf_r_gain;
-	__u16 nf_b_gain;
-	__u16 nf_gr_gain;
-	__u16 nf_gb_gain;
-};
-
-/**
- * struct rkisp1_cif_isp_dpf_config - Configuration used by De-noising pre-filter
- *
- * @gain: noise function gain
- * @g_flt: green filter config
- * @rb_flt: red blue filter config
- * @nll: noise level lookup
- */
-struct rkisp1_cif_isp_dpf_config {
-	struct rkisp1_cif_isp_dpf_gain gain;
-	struct rkisp1_cif_isp_dpf_g_flt g_flt;
-	struct rkisp1_cif_isp_dpf_rb_flt rb_flt;
-	struct rkisp1_cif_isp_dpf_nll nll;
-};
-
-/**
- * struct rkisp1_cif_isp_dpf_strength_config - strength of the filter
- *
- * @r: filter strength of the RED filter
- * @g: filter strength of the GREEN filter
- * @b: filter strength of the BLUE filter
- */
-struct rkisp1_cif_isp_dpf_strength_config {
-	__u8 r;
-	__u8 g;
-	__u8 b;
-};
-
-/**
- * struct rkisp1_cif_isp_isp_other_cfg - Parameters for some blocks in rockchip isp1
- *
- * @dpcc_config: Defect Pixel Cluster Correction config
- * @bls_config: Black Level Subtraction config
- * @sdg_config: sensor degamma config
- * @lsc_config: Lens Shade config
- * @awb_gain_config: Auto White balance gain config
- * @flt_config: filter config
- * @bdm_config: demosaic config
- * @ctk_config: cross talk config
- * @goc_config: gamma out config
- * @bls_config: black level subtraction config
- * @dpf_config: De-noising pre-filter config
- * @dpf_strength_config: dpf strength config
- * @cproc_config: color process config
- * @ie_config: image effects config
- */
-struct rkisp1_cif_isp_isp_other_cfg {
-	struct rkisp1_cif_isp_dpcc_config dpcc_config;
-	struct rkisp1_cif_isp_bls_config bls_config;
-	struct rkisp1_cif_isp_sdg_config sdg_config;
-	struct rkisp1_cif_isp_lsc_config lsc_config;
-	struct rkisp1_cif_isp_awb_gain_config awb_gain_config;
-	struct rkisp1_cif_isp_flt_config flt_config;
-	struct rkisp1_cif_isp_bdm_config bdm_config;
-	struct rkisp1_cif_isp_ctk_config ctk_config;
-	struct rkisp1_cif_isp_goc_config goc_config;
-	struct rkisp1_cif_isp_dpf_config dpf_config;
-	struct rkisp1_cif_isp_dpf_strength_config dpf_strength_config;
-	struct rkisp1_cif_isp_cproc_config cproc_config;
-	struct rkisp1_cif_isp_ie_config ie_config;
-};
-
-/**
- * struct rkisp1_cif_isp_isp_meas_cfg - Rockchip ISP1 Measure Parameters
- *
- * @awb_meas_config: auto white balance config
- * @hst_config: histogram config
- * @aec_config: auto exposure config
- * @afc_config: auto focus config
- */
-struct rkisp1_cif_isp_isp_meas_cfg {
-	struct rkisp1_cif_isp_awb_meas_config awb_meas_config;
-	struct rkisp1_cif_isp_hst_config hst_config;
-	struct rkisp1_cif_isp_aec_config aec_config;
-	struct rkisp1_cif_isp_afc_config afc_config;
-};
-
-/**
- * struct rkisp1_params_cfg - Rockchip ISP1 Input Parameters Meta Data
- *
- * @module_en_update: mask the enable bits of which module should be updated
- * @module_ens: mask the enable value of each module, only update the module
- *		which correspond bit was set in module_en_update
- * @module_cfg_update: mask the config bits of which module should be updated
- * @meas: measurement config
- * @others: other config
- */
-struct rkisp1_params_cfg {
-	__u32 module_en_update;
-	__u32 module_ens;
-	__u32 module_cfg_update;
-
-	struct rkisp1_cif_isp_isp_meas_cfg meas;
-	struct rkisp1_cif_isp_isp_other_cfg others;
-};
-
-/*---------- PART2: Measurement Statistics ------------*/
-
-/**
- * struct rkisp1_cif_isp_awb_meas - AWB measured values
- *
- * @cnt: White pixel count, number of "white pixels" found during last
- *	 measurement
- * @mean_y_or_g: Mean value of Y within window and frames,
- *		 Green if RGB is selected.
- * @mean_cb_or_b: Mean value of Cb within window and frames,
- *		  Blue if RGB is selected.
- * @mean_cr_or_r: Mean value of Cr within window and frames,
- *		  Red if RGB is selected.
- */
-struct rkisp1_cif_isp_awb_meas {
-	__u32 cnt;
-	__u8 mean_y_or_g;
-	__u8 mean_cb_or_b;
-	__u8 mean_cr_or_r;
-};
-
-/**
- * struct rkisp1_cif_isp_awb_stat - statistics automatic white balance data
- *
- * @awb_mean: Mean measured data
- */
-struct rkisp1_cif_isp_awb_stat {
-	struct rkisp1_cif_isp_awb_meas awb_mean[RKISP1_CIF_ISP_AWB_MAX_GRID];
-};
-
-/**
- * struct rkisp1_cif_isp_bls_meas_val - BLS measured values
- *
- * @meas_r: Mean measured value for Bayer pattern R
- * @meas_gr: Mean measured value for Bayer pattern Gr
- * @meas_gb: Mean measured value for Bayer pattern Gb
- * @meas_b: Mean measured value for Bayer pattern B
- */
-struct rkisp1_cif_isp_bls_meas_val {
-	__u16 meas_r;
-	__u16 meas_gr;
-	__u16 meas_gb;
-	__u16 meas_b;
-};
-
-/**
- * struct rkisp1_cif_isp_ae_stat - statistics auto exposure data
- *
- * @exp_mean: Mean luminance value of block xx
- * @bls_val:  BLS measured values
- *
- * Image is divided into 5x5 blocks.
- */
-struct rkisp1_cif_isp_ae_stat {
-	__u8 exp_mean[RKISP1_CIF_ISP_AE_MEAN_MAX];
-	struct rkisp1_cif_isp_bls_meas_val bls_val;
-};
-
-/**
- * struct rkisp1_cif_isp_af_meas_val - AF measured values
- *
- * @sum: sharpness value
- * @lum: luminance value
- */
-struct rkisp1_cif_isp_af_meas_val {
-	__u32 sum;
-	__u32 lum;
-};
-
-/**
- * struct rkisp1_cif_isp_af_stat - statistics auto focus data
- *
- * @window: AF measured value of window x
- *
- * The module measures the sharpness in 3 windows of selectable size via
- * register settings(ISP_AFM_*_A/B/C)
- */
-struct rkisp1_cif_isp_af_stat {
-	struct rkisp1_cif_isp_af_meas_val window[RKISP1_CIF_ISP_AFM_MAX_WINDOWS];
-};
-
-/**
- * struct rkisp1_cif_isp_hist_stat - statistics histogram data
- *
- * @hist_bins: measured bin counters
- *
- * Measurement window divided into 25 sub-windows, set
- * with ISP_HIST_XXX
- */
-struct rkisp1_cif_isp_hist_stat {
-	__u16 hist_bins[RKISP1_CIF_ISP_HIST_BIN_N_MAX];
-};
-
-/**
- * struct rkisp1_cif_isp_stat - Rockchip ISP1 Statistics Data
- *
- * @awb: statistics data for automatic white balance
- * @ae: statistics data for auto exposure
- * @af: statistics data for auto focus
- * @hist: statistics histogram data
- */
-struct rkisp1_cif_isp_stat {
-	struct rkisp1_cif_isp_awb_stat awb;
-	struct rkisp1_cif_isp_ae_stat ae;
-	struct rkisp1_cif_isp_af_stat af;
-	struct rkisp1_cif_isp_hist_stat hist;
-};
-
-/**
- * struct rkisp1_stat_buffer - Rockchip ISP1 Statistics Meta Data
- *
- * @meas_type: measurement types (RKISP1_CIF_ISP_STAT_* definitions)
- * @frame_id: frame ID for sync
- * @params: statistics data
- */
-struct rkisp1_stat_buffer {
-	__u32 meas_type;
-	__u32 frame_id;
-	struct rkisp1_cif_isp_stat params;
-};
-
-#endif /* _UAPI_RKISP1_CONFIG_H */
diff --git a/include/uapi/linux/rkisp1-config.h b/include/uapi/linux/rkisp1-config.h
new file mode 100644
index 000000000000..6e449e784260
--- /dev/null
+++ b/include/uapi/linux/rkisp1-config.h
@@ -0,0 +1,884 @@
+/* SPDX-License-Identifier: ((GPL-2.0+ WITH Linux-syscall-note) OR MIT) */
+/*
+ * Rockchip ISP1 userspace API
+ * Copyright (C) 2017 Rockchip Electronics Co., Ltd.
+ */
+
+#ifndef _UAPI_RKISP1_CONFIG_H
+#define _UAPI_RKISP1_CONFIG_H
+
+#include <linux/types.h>
+
+/* Defect Pixel Cluster Detection */
+#define RKISP1_CIF_ISP_MODULE_DPCC		(1U << 0)
+/* Black Level Subtraction */
+#define RKISP1_CIF_ISP_MODULE_BLS		(1U << 1)
+/* Sensor De-gamma */
+#define RKISP1_CIF_ISP_MODULE_SDG		(1U << 2)
+/* Histogram */
+#define RKISP1_CIF_ISP_MODULE_HST		(1U << 3)
+/* Lens Shade Control */
+#define RKISP1_CIF_ISP_MODULE_LSC		(1U << 4)
+/* Auto White Balance Gain */
+#define RKISP1_CIF_ISP_MODULE_AWB_GAIN		(1U << 5)
+/* Filter */
+#define RKISP1_CIF_ISP_MODULE_FLT		(1U << 6)
+/* Bayer Demosaic */
+#define RKISP1_CIF_ISP_MODULE_BDM		(1U << 7)
+/* Cross Talk */
+#define RKISP1_CIF_ISP_MODULE_CTK		(1U << 8)
+/* Gamma Out Curve */
+#define RKISP1_CIF_ISP_MODULE_GOC		(1U << 9)
+/* Color Processing */
+#define RKISP1_CIF_ISP_MODULE_CPROC		(1U << 10)
+/* Auto Focus Control */
+#define RKISP1_CIF_ISP_MODULE_AFC		(1U << 11)
+/* Auto White Balancing */
+#define RKISP1_CIF_ISP_MODULE_AWB		(1U << 12)
+/* Image Effect */
+#define RKISP1_CIF_ISP_MODULE_IE		(1U << 13)
+/* Auto Exposure Control */
+#define RKISP1_CIF_ISP_MODULE_AEC		(1U << 14)
+/* Wide Dynamic Range */
+#define RKISP1_CIF_ISP_MODULE_WDR		(1U << 15)
+/* Denoise Pre-Filter */
+#define RKISP1_CIF_ISP_MODULE_DPF		(1U << 16)
+/* Denoise Pre-Filter Strength */
+#define RKISP1_CIF_ISP_MODULE_DPF_STRENGTH	(1U << 17)
+
+#define RKISP1_CIF_ISP_CTK_COEFF_MAX            0x100
+#define RKISP1_CIF_ISP_CTK_OFFSET_MAX           0x800
+
+#define RKISP1_CIF_ISP_AE_MEAN_MAX              25
+#define RKISP1_CIF_ISP_HIST_BIN_N_MAX           16
+#define RKISP1_CIF_ISP_AFM_MAX_WINDOWS          3
+#define RKISP1_CIF_ISP_DEGAMMA_CURVE_SIZE       17
+
+#define RKISP1_CIF_ISP_BDM_MAX_TH               0xff
+
+/*
+ * Black level compensation
+ */
+/* maximum value for horizontal start address */
+#define RKISP1_CIF_ISP_BLS_START_H_MAX             0x00000fff
+/* maximum value for horizontal stop address */
+#define RKISP1_CIF_ISP_BLS_STOP_H_MAX              0x00000fff
+/* maximum value for vertical start address */
+#define RKISP1_CIF_ISP_BLS_START_V_MAX             0x00000fff
+/* maximum value for vertical stop address */
+#define RKISP1_CIF_ISP_BLS_STOP_V_MAX              0x00000fff
+/* maximum is 2^18 = 262144*/
+#define RKISP1_CIF_ISP_BLS_SAMPLES_MAX             0x00000012
+/* maximum value for fixed black level */
+#define RKISP1_CIF_ISP_BLS_FIX_SUB_MAX             0x00000fff
+/* minimum value for fixed black level */
+#define RKISP1_CIF_ISP_BLS_FIX_SUB_MIN             0xfffff000
+/* 13 bit range (signed)*/
+#define RKISP1_CIF_ISP_BLS_FIX_MASK                0x00001fff
+
+/*
+ * Automatic white balance measurements
+ */
+#define RKISP1_CIF_ISP_AWB_MAX_GRID                1
+#define RKISP1_CIF_ISP_AWB_MAX_FRAMES              7
+
+/*
+ * Gamma out
+ */
+/* Maximum number of color samples supported */
+#define RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES       17
+
+/*
+ * Lens shade correction
+ */
+#define RKISP1_CIF_ISP_LSC_SECTORS_TBL_SIZE        8
+
+/*
+ * The following matches the tuning process,
+ * not the max capabilities of the chip.
+ */
+#define RKISP1_CIF_ISP_LSC_SAMPLES_MAX             17
+
+/*
+ * Histogram calculation
+ */
+/* Last 3 values unused. */
+#define RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE 28
+
+/*
+ * Defect Pixel Cluster Correction
+ */
+#define RKISP1_CIF_ISP_DPCC_METHODS_MAX       3
+
+/*
+ * Denoising pre filter
+ */
+#define RKISP1_CIF_ISP_DPF_MAX_NLF_COEFFS      17
+#define RKISP1_CIF_ISP_DPF_MAX_SPATIAL_COEFFS  6
+
+/*
+ * Measurement types
+ */
+#define RKISP1_CIF_ISP_STAT_AWB           (1U << 0)
+#define RKISP1_CIF_ISP_STAT_AUTOEXP       (1U << 1)
+#define RKISP1_CIF_ISP_STAT_AFM           (1U << 2)
+#define RKISP1_CIF_ISP_STAT_HIST          (1U << 3)
+
+enum rkisp1_cif_isp_histogram_mode {
+	RKISP1_CIF_ISP_HISTOGRAM_MODE_DISABLE,
+	RKISP1_CIF_ISP_HISTOGRAM_MODE_RGB_COMBINED,
+	RKISP1_CIF_ISP_HISTOGRAM_MODE_R_HISTOGRAM,
+	RKISP1_CIF_ISP_HISTOGRAM_MODE_G_HISTOGRAM,
+	RKISP1_CIF_ISP_HISTOGRAM_MODE_B_HISTOGRAM,
+	RKISP1_CIF_ISP_HISTOGRAM_MODE_Y_HISTOGRAM
+};
+
+enum rkisp1_cif_isp_awb_mode_type {
+	RKISP1_CIF_ISP_AWB_MODE_MANUAL,
+	RKISP1_CIF_ISP_AWB_MODE_RGB,
+	RKISP1_CIF_ISP_AWB_MODE_YCBCR
+};
+
+enum rkisp1_cif_isp_flt_mode {
+	RKISP1_CIF_ISP_FLT_STATIC_MODE,
+	RKISP1_CIF_ISP_FLT_DYNAMIC_MODE
+};
+
+/**
+ * enum rkisp1_cif_isp_exp_ctrl_autostop - stop modes
+ * @RKISP1_CIF_ISP_EXP_CTRL_AUTOSTOP_0: continuous measurement
+ * @RKISP1_CIF_ISP_EXP_CTRL_AUTOSTOP_1: stop measuring after a complete frame
+ */
+enum rkisp1_cif_isp_exp_ctrl_autostop {
+	RKISP1_CIF_ISP_EXP_CTRL_AUTOSTOP_0 = 0,
+	RKISP1_CIF_ISP_EXP_CTRL_AUTOSTOP_1 = 1,
+};
+
+/**
+ * enum rkisp1_cif_isp_exp_meas_mode - Exposure measure mode
+ * @RKISP1_CIF_ISP_EXP_MEASURING_MODE_0: Y = 16 + 0.25R + 0.5G + 0.1094B
+ * @RKISP1_CIF_ISP_EXP_MEASURING_MODE_1: Y = (R + G + B) x (85/256)
+ */
+enum rkisp1_cif_isp_exp_meas_mode {
+	RKISP1_CIF_ISP_EXP_MEASURING_MODE_0,
+	RKISP1_CIF_ISP_EXP_MEASURING_MODE_1,
+};
+
+/*---------- PART1: Input Parameters ------------*/
+
+/**
+ * struct rkisp1_cif_isp_window -  measurement window.
+ *
+ * Measurements are calculated per window inside the frame.
+ * This struct represents a window for a measurement.
+ *
+ * @h_offs: the horizontal offset of the window from the left of the frame in pixels.
+ * @v_offs: the vertical offset of the window from the top of the frame in pixels.
+ * @h_size: the horizontal size of the window in pixels
+ * @v_size: the vertical size of the window in pixels.
+ */
+struct rkisp1_cif_isp_window {
+	__u16 h_offs;
+	__u16 v_offs;
+	__u16 h_size;
+	__u16 v_size;
+};
+
+/**
+ * struct rkisp1_cif_isp_bls_fixed_val - BLS fixed subtraction values
+ *
+ * The values will be subtracted from the sensor
+ * values. Therefore a negative value means addition instead of subtraction!
+ *
+ * @r: Fixed (signed!) subtraction value for Bayer pattern R
+ * @gr: Fixed (signed!) subtraction value for Bayer pattern Gr
+ * @gb: Fixed (signed!) subtraction value for Bayer pattern Gb
+ * @b: Fixed (signed!) subtraction value for Bayer pattern B
+ */
+struct rkisp1_cif_isp_bls_fixed_val {
+	__s16 r;
+	__s16 gr;
+	__s16 gb;
+	__s16 b;
+};
+
+/**
+ * struct rkisp1_cif_isp_bls_config - Configuration used by black level subtraction
+ *
+ * @enable_auto: Automatic mode activated means that the measured values
+ *		 are subtracted. Otherwise the fixed subtraction
+ *		 values will be subtracted.
+ * @en_windows: enabled window
+ * @bls_window1: Measurement window 1 size
+ * @bls_window2: Measurement window 2 size
+ * @bls_samples: Set amount of measured pixels for each Bayer position
+ *		 (A, B,C and D) to 2^bls_samples.
+ * @fixed_val: Fixed subtraction values
+ */
+struct rkisp1_cif_isp_bls_config {
+	__u8 enable_auto;
+	__u8 en_windows;
+	struct rkisp1_cif_isp_window bls_window1;
+	struct rkisp1_cif_isp_window bls_window2;
+	__u8 bls_samples;
+	struct rkisp1_cif_isp_bls_fixed_val fixed_val;
+};
+
+/**
+ * struct rkisp1_cif_isp_dpcc_methods_config - Methods Configuration used by DPCC
+ *
+ * Methods Configuration used by Defect Pixel Cluster Correction
+ *
+ * @method: Method enable bits
+ * @line_thresh: Line threshold
+ * @line_mad_fac: Line MAD factor
+ * @pg_fac: Peak gradient factor
+ * @rnd_thresh: Rank Neighbor Difference threshold
+ * @rg_fac: Rank gradient factor
+ */
+struct rkisp1_cif_isp_dpcc_methods_config {
+	__u32 method;
+	__u32 line_thresh;
+	__u32 line_mad_fac;
+	__u32 pg_fac;
+	__u32 rnd_thresh;
+	__u32 rg_fac;
+};
+
+/**
+ * struct rkisp1_cif_isp_dpcc_config - Configuration used by DPCC
+ *
+ * Configuration used by Defect Pixel Cluster Correction
+ *
+ * @mode: dpcc output mode
+ * @output_mode: whether use hard coded methods
+ * @set_use: stage1 methods set
+ * @methods: methods config
+ * @ro_limits: rank order limits
+ * @rnd_offs: differential rank offsets for rank neighbor difference
+ */
+struct rkisp1_cif_isp_dpcc_config {
+	__u32 mode;
+	__u32 output_mode;
+	__u32 set_use;
+	struct rkisp1_cif_isp_dpcc_methods_config methods[RKISP1_CIF_ISP_DPCC_METHODS_MAX];
+	__u32 ro_limits;
+	__u32 rnd_offs;
+};
+
+/**
+ * struct rkisp1_cif_isp_gamma_corr_curve - gamma curve point definition y-axis (output).
+ *
+ * The reset values define a linear curve which has the same effect as bypass. Reset values are:
+ * gamma_y[0] = 0x0000, gamma_y[1] = 0x0100, ... gamma_y[15] = 0x0f00, gamma_y[16] = 0xfff
+ *
+ * @gamma_y: the values for the y-axis of gamma curve points. Each value is 12 bit.
+ */
+struct rkisp1_cif_isp_gamma_corr_curve {
+	__u16 gamma_y[RKISP1_CIF_ISP_DEGAMMA_CURVE_SIZE];
+};
+
+/**
+ * struct rkisp1_cif_isp_gamma_curve_x_axis_pnts - De-Gamma Curve definition x increments
+ *		(sampling points). gamma_dx0 is for the lower samples (1-8), gamma_dx1 is for the
+ *		higher samples (9-16). The reset values for both fields is 0x44444444. This means
+ *		that each sample is 4 units away from the previous one on the x-axis.
+ *
+ * @gamma_dx0: gamma curve sample points definitions. Bits 0:2 for sample 1. Bit 3 unused.
+ *		Bits 4:6 for sample 2. bit 7 unused ... Bits 28:30 for sample 8. Bit 31 unused
+ * @gamma_dx1: gamma curve sample points definitions. Bits 0:2 for sample 9. Bit 3 unused.
+ *		Bits 4:6 for sample 10. bit 7 unused ... Bits 28:30 for sample 16. Bit 31 unused
+ */
+struct rkisp1_cif_isp_gamma_curve_x_axis_pnts {
+	__u32 gamma_dx0;
+	__u32 gamma_dx1;
+};
+
+/**
+ * struct rkisp1_cif_isp_sdg_config - Configuration used by sensor degamma
+ *
+ * @curve_r: gamma curve point definition axis for red
+ * @curve_g: gamma curve point definition axis for green
+ * @curve_b: gamma curve point definition axis for blue
+ * @xa_pnts: x axis increments
+ */
+struct rkisp1_cif_isp_sdg_config {
+	struct rkisp1_cif_isp_gamma_corr_curve curve_r;
+	struct rkisp1_cif_isp_gamma_corr_curve curve_g;
+	struct rkisp1_cif_isp_gamma_corr_curve curve_b;
+	struct rkisp1_cif_isp_gamma_curve_x_axis_pnts xa_pnts;
+};
+
+/**
+ * struct rkisp1_cif_isp_lsc_config - Configuration used by Lens shading correction
+ *
+ * @r_data_tbl: sample table red
+ * @gr_data_tbl: sample table green (red)
+ * @gb_data_tbl: sample table green (blue)
+ * @b_data_tbl: sample table blue
+ * @x_grad_tbl: gradient table x
+ * @y_grad_tbl: gradient table y
+ * @x_size_tbl: size table x
+ * @y_size_tbl: size table y
+ * @config_width: not used at the moment
+ * @config_height: not used at the moment
+ */
+struct rkisp1_cif_isp_lsc_config {
+	__u16 r_data_tbl[RKISP1_CIF_ISP_LSC_SAMPLES_MAX][RKISP1_CIF_ISP_LSC_SAMPLES_MAX];
+	__u16 gr_data_tbl[RKISP1_CIF_ISP_LSC_SAMPLES_MAX][RKISP1_CIF_ISP_LSC_SAMPLES_MAX];
+	__u16 gb_data_tbl[RKISP1_CIF_ISP_LSC_SAMPLES_MAX][RKISP1_CIF_ISP_LSC_SAMPLES_MAX];
+	__u16 b_data_tbl[RKISP1_CIF_ISP_LSC_SAMPLES_MAX][RKISP1_CIF_ISP_LSC_SAMPLES_MAX];
+
+	__u16 x_grad_tbl[RKISP1_CIF_ISP_LSC_SECTORS_TBL_SIZE];
+	__u16 y_grad_tbl[RKISP1_CIF_ISP_LSC_SECTORS_TBL_SIZE];
+
+	__u16 x_size_tbl[RKISP1_CIF_ISP_LSC_SECTORS_TBL_SIZE];
+	__u16 y_size_tbl[RKISP1_CIF_ISP_LSC_SECTORS_TBL_SIZE];
+	__u16 config_width;
+	__u16 config_height;
+};
+
+/**
+ * struct rkisp1_cif_isp_ie_config - Configuration used by image effects
+ *
+ * @effect: values from 'enum v4l2_colorfx'. Possible values are: V4L2_COLORFX_SEPIA,
+ *		V4L2_COLORFX_SET_CBCR, V4L2_COLORFX_AQUA, V4L2_COLORFX_EMBOSS,
+ *		V4L2_COLORFX_SKETCH,   V4L2_COLORFX_BW,   V4L2_COLORFX_NEGATIVE
+ * @color_sel: bits 0:2 - colors bitmask (001 - blue, 010 - green, 100 - red).
+ *		bits 8:15 - Threshold value of the RGB colors for the color selection effect.
+ * @eff_mat_1: 3x3 Matrix Coefficients for Emboss Effect 1
+ * @eff_mat_2: 3x3 Matrix Coefficients for Emboss Effect 2
+ * @eff_mat_3: 3x3 Matrix Coefficients for Emboss 3/Sketch 1
+ * @eff_mat_4: 3x3 Matrix Coefficients for Sketch Effect 2
+ * @eff_mat_5: 3x3 Matrix Coefficients for Sketch Effect 3
+ * @eff_tint: Chrominance increment values of tint (used for sepia effect)
+ */
+struct rkisp1_cif_isp_ie_config {
+	__u16 effect;
+	__u16 color_sel;
+	__u16 eff_mat_1;
+	__u16 eff_mat_2;
+	__u16 eff_mat_3;
+	__u16 eff_mat_4;
+	__u16 eff_mat_5;
+	__u16 eff_tint;
+};
+
+/**
+ * struct rkisp1_cif_isp_cproc_config - Configuration used by Color Processing
+ *
+ * @c_out_range: Chrominance pixel clipping range at output.
+ *		 (0 for limit, 1 for full)
+ * @y_in_range: Luminance pixel clipping range at output.
+ * @y_out_range: Luminance pixel clipping range at output.
+ * @contrast: 00~ff, 0.0~1.992
+ * @brightness: 80~7F, -128~+127
+ * @sat: saturation, 00~FF, 0.0~1.992
+ * @hue: 80~7F, -90~+87.188
+ */
+struct rkisp1_cif_isp_cproc_config {
+	__u8 c_out_range;
+	__u8 y_in_range;
+	__u8 y_out_range;
+	__u8 contrast;
+	__u8 brightness;
+	__u8 sat;
+	__u8 hue;
+};
+
+/**
+ * struct rkisp1_cif_isp_awb_meas_config - Configuration used by auto white balance
+ *
+ * @awb_mode: the awb meas mode. From enum rkisp1_cif_isp_awb_mode_type.
+ * @awb_wnd: white balance measurement window (in pixels)
+ * @max_y: only pixels values < max_y contribute to awb measurement, set to 0
+ *	   to disable this feature
+ * @min_y: only pixels values > min_y contribute to awb measurement
+ * @max_csum: Chrominance sum maximum value, only consider pixels with Cb+Cr,
+ *	      smaller than threshold for awb measurements
+ * @min_c: Chrominance minimum value, only consider pixels with Cb/Cr
+ *	   each greater than threshold value for awb measurements
+ * @frames: number of frames - 1 used for mean value calculation
+ *	    (ucFrames=0 means 1 Frame)
+ * @awb_ref_cr: reference Cr value for AWB regulation, target for AWB
+ * @awb_ref_cb: reference Cb value for AWB regulation, target for AWB
+ * @enable_ymax_cmp: enable Y_MAX compare (Not valid in RGB measurement mode.)
+ */
+struct rkisp1_cif_isp_awb_meas_config {
+	/*
+	 * Note: currently the h and v offsets are mapped to grid offsets
+	 */
+	struct rkisp1_cif_isp_window awb_wnd;
+	__u32 awb_mode;
+	__u8 max_y;
+	__u8 min_y;
+	__u8 max_csum;
+	__u8 min_c;
+	__u8 frames;
+	__u8 awb_ref_cr;
+	__u8 awb_ref_cb;
+	__u8 enable_ymax_cmp;
+};
+
+/**
+ * struct rkisp1_cif_isp_awb_gain_config - Configuration used by auto white balance gain
+ *
+ * All fields in this struct are 10 bit, where:
+ * 0x100h = 1, unsigned integer value, range 0 to 4 with 8 bit fractional part.
+ *
+ * out_data_x = ( AWB_GAIN_X * in_data + 128) >> 8
+ *
+ * @gain_red: gain value for red component.
+ * @gain_green_r: gain value for green component in red line.
+ * @gain_blue: gain value for blue component.
+ * @gain_green_b: gain value for green component in blue line.
+ */
+struct rkisp1_cif_isp_awb_gain_config {
+	__u16 gain_red;
+	__u16 gain_green_r;
+	__u16 gain_blue;
+	__u16 gain_green_b;
+};
+
+/**
+ * struct rkisp1_cif_isp_flt_config - Configuration used by ISP filtering
+ *
+ * All 4 threshold fields (thresh_*) are 10 bits.
+ * All 6 factor fields (fac_*) are 6 bits.
+ *
+ * @mode: ISP_FILT_MODE register fields (from enum rkisp1_cif_isp_flt_mode)
+ * @grn_stage1: Green filter stage 1 select (range 0x0...0x8)
+ * @chr_h_mode: Chroma filter horizontal mode
+ * @chr_v_mode: Chroma filter vertical mode
+ * @thresh_bl0: If thresh_bl1 < sum_grad < thresh_bl0 then fac_bl0 is selected (blurring th)
+ * @thresh_bl1: If sum_grad < thresh_bl1 then fac_bl1 is selected (blurring th)
+ * @thresh_sh0: If thresh_sh0 < sum_grad < thresh_sh1 then thresh_sh0 is selected (sharpening th)
+ * @thresh_sh1: If thresh_sh1 < sum_grad then thresh_sh1 is selected (sharpening th)
+ * @lum_weight: Parameters for luminance weight function.
+ * @fac_sh1: filter factor for sharp1 level
+ * @fac_sh0: filter factor for sharp0 level
+ * @fac_mid: filter factor for mid level and for static filter mode
+ * @fac_bl0: filter factor for blur 0 level
+ * @fac_bl1: filter factor for blur 1 level (max blur)
+ */
+struct rkisp1_cif_isp_flt_config {
+	__u32 mode;
+	__u8 grn_stage1;
+	__u8 chr_h_mode;
+	__u8 chr_v_mode;
+	__u32 thresh_bl0;
+	__u32 thresh_bl1;
+	__u32 thresh_sh0;
+	__u32 thresh_sh1;
+	__u32 lum_weight;
+	__u32 fac_sh1;
+	__u32 fac_sh0;
+	__u32 fac_mid;
+	__u32 fac_bl0;
+	__u32 fac_bl1;
+};
+
+/**
+ * struct rkisp1_cif_isp_bdm_config - Configuration used by Bayer DeMosaic
+ *
+ * @demosaic_th: threshold for bayer demosaicing texture detection
+ */
+struct rkisp1_cif_isp_bdm_config {
+	__u8 demosaic_th;
+};
+
+/**
+ * struct rkisp1_cif_isp_ctk_config - Configuration used by Cross Talk correction
+ *
+ * @coeff: color correction matrix. Values are 11-bit signed fixed-point numbers with 4 bit integer
+ *		and 7 bit fractional part, ranging from -8 (0x400) to +7.992 (0x3FF). 0 is
+ *		represented by 0x000 and a coefficient value of 1 as 0x080.
+ * @ct_offset: Red, Green, Blue offsets for the crosstalk correction matrix
+ */
+struct rkisp1_cif_isp_ctk_config {
+	__u16 coeff[3][3];
+	__u16 ct_offset[3];
+};
+
+enum rkisp1_cif_isp_goc_mode {
+	RKISP1_CIF_ISP_GOC_MODE_LOGARITHMIC,
+	RKISP1_CIF_ISP_GOC_MODE_EQUIDISTANT
+};
+
+/**
+ * struct rkisp1_cif_isp_goc_config - Configuration used by Gamma Out correction
+ *
+ * @mode: goc mode (from enum rkisp1_cif_isp_goc_mode)
+ * @gamma_y: gamma out curve y-axis for all color components
+ */
+struct rkisp1_cif_isp_goc_config {
+	__u32 mode;
+	__u16 gamma_y[RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES];
+};
+
+/**
+ * struct rkisp1_cif_isp_hst_config - Configuration used by Histogram
+ *
+ * @mode: histogram mode (from enum rkisp1_cif_isp_histogram_mode)
+ * @histogram_predivider: process every stepsize pixel, all other pixels are
+ *			  skipped
+ * @meas_window: coordinates of the measure window
+ * @hist_weight: weighting factor for sub-windows
+ */
+struct rkisp1_cif_isp_hst_config {
+	__u32 mode;
+	__u8 histogram_predivider;
+	struct rkisp1_cif_isp_window meas_window;
+	__u8 hist_weight[RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE];
+};
+
+/**
+ * struct rkisp1_cif_isp_aec_config - Configuration used by Auto Exposure Control
+ *
+ * @mode: Exposure measure mode (from enum rkisp1_cif_isp_exp_meas_mode)
+ * @autostop: stop mode (from enum rkisp1_cif_isp_exp_ctrl_autostop)
+ * @meas_window: coordinates of the measure window
+ */
+struct rkisp1_cif_isp_aec_config {
+	__u32 mode;
+	__u32 autostop;
+	struct rkisp1_cif_isp_window meas_window;
+};
+
+/**
+ * struct rkisp1_cif_isp_afc_config - Configuration used by Auto Focus Control
+ *
+ * @num_afm_win: max RKISP1_CIF_ISP_AFM_MAX_WINDOWS
+ * @afm_win: coordinates of the meas window
+ * @thres: threshold used for minimizing the influence of noise
+ * @var_shift: the number of bits for the shift operation at the end of the
+ *	       calculation chain.
+ */
+struct rkisp1_cif_isp_afc_config {
+	__u8 num_afm_win;
+	struct rkisp1_cif_isp_window afm_win[RKISP1_CIF_ISP_AFM_MAX_WINDOWS];
+	__u32 thres;
+	__u32 var_shift;
+};
+
+/**
+ * enum rkisp1_cif_isp_dpf_gain_usage - dpf gain usage
+ * @RKISP1_CIF_ISP_DPF_GAIN_USAGE_DISABLED: don't use any gains in preprocessing stage
+ * @RKISP1_CIF_ISP_DPF_GAIN_USAGE_NF_GAINS: use only the noise function gains from
+ *				    registers DPF_NF_GAIN_R, ...
+ * @RKISP1_CIF_ISP_DPF_GAIN_USAGE_LSC_GAINS:  use only the gains from LSC module
+ * @RKISP1_CIF_ISP_DPF_GAIN_USAGE_NF_LSC_GAINS: use the noise function gains and the
+ *					gains from LSC module
+ * @RKISP1_CIF_ISP_DPF_GAIN_USAGE_AWB_GAINS: use only the gains from AWB module
+ * @RKISP1_CIF_ISP_DPF_GAIN_USAGE_AWB_LSC_GAINS: use the gains from AWB and LSC module
+ * @RKISP1_CIF_ISP_DPF_GAIN_USAGE_MAX: upper border (only for an internal evaluation)
+ */
+enum rkisp1_cif_isp_dpf_gain_usage {
+	RKISP1_CIF_ISP_DPF_GAIN_USAGE_DISABLED,
+	RKISP1_CIF_ISP_DPF_GAIN_USAGE_NF_GAINS,
+	RKISP1_CIF_ISP_DPF_GAIN_USAGE_LSC_GAINS,
+	RKISP1_CIF_ISP_DPF_GAIN_USAGE_NF_LSC_GAINS,
+	RKISP1_CIF_ISP_DPF_GAIN_USAGE_AWB_GAINS,
+	RKISP1_CIF_ISP_DPF_GAIN_USAGE_AWB_LSC_GAINS,
+	RKISP1_CIF_ISP_DPF_GAIN_USAGE_MAX
+};
+
+/**
+ * enum rkisp1_cif_isp_dpf_rb_filtersize - Red and blue filter sizes
+ * @RKISP1_CIF_ISP_DPF_RB_FILTERSIZE_13x9: red and blue filter kernel size 13x9
+ *				   (means 7x5 active pixel)
+ * @RKISP1_CIF_ISP_DPF_RB_FILTERSIZE_9x9: red and blue filter kernel size 9x9
+ *				   (means 5x5 active pixel)
+ */
+enum rkisp1_cif_isp_dpf_rb_filtersize {
+	RKISP1_CIF_ISP_DPF_RB_FILTERSIZE_13x9,
+	RKISP1_CIF_ISP_DPF_RB_FILTERSIZE_9x9,
+};
+
+/**
+ * enum rkisp1_cif_isp_dpf_nll_scale_mode - dpf noise level scale mode
+ * @RKISP1_CIF_ISP_NLL_SCALE_LINEAR: use a linear scaling
+ * @RKISP1_CIF_ISP_NLL_SCALE_LOGARITHMIC: use a logarithmic scaling
+ */
+enum rkisp1_cif_isp_dpf_nll_scale_mode {
+	RKISP1_CIF_ISP_NLL_SCALE_LINEAR,
+	RKISP1_CIF_ISP_NLL_SCALE_LOGARITHMIC,
+};
+
+/**
+ * struct rkisp1_cif_isp_dpf_nll - Noise level lookup
+ *
+ * @coeff: Noise level Lookup coefficient
+ * @scale_mode: dpf noise level scale mode (from enum rkisp1_cif_isp_dpf_nll_scale_mode)
+ */
+struct rkisp1_cif_isp_dpf_nll {
+	__u16 coeff[RKISP1_CIF_ISP_DPF_MAX_NLF_COEFFS];
+	__u32 scale_mode;
+};
+
+/**
+ * struct rkisp1_cif_isp_dpf_rb_flt - Red blue filter config
+ *
+ * @fltsize: The filter size for the red and blue pixels
+ *	     (from enum rkisp1_cif_isp_dpf_rb_filtersize)
+ * @spatial_coeff: Spatial weights
+ * @r_enable: enable filter processing for red pixels
+ * @b_enable: enable filter processing for blue pixels
+ */
+struct rkisp1_cif_isp_dpf_rb_flt {
+	__u32 fltsize;
+	__u8 spatial_coeff[RKISP1_CIF_ISP_DPF_MAX_SPATIAL_COEFFS];
+	__u8 r_enable;
+	__u8 b_enable;
+};
+
+/**
+ * struct rkisp1_cif_isp_dpf_g_flt - Green filter Configuration
+ *
+ * @spatial_coeff: Spatial weights
+ * @gr_enable: enable filter processing for green pixels in green/red lines
+ * @gb_enable: enable filter processing for green pixels in green/blue lines
+ */
+struct rkisp1_cif_isp_dpf_g_flt {
+	__u8 spatial_coeff[RKISP1_CIF_ISP_DPF_MAX_SPATIAL_COEFFS];
+	__u8 gr_enable;
+	__u8 gb_enable;
+};
+
+/**
+ * struct rkisp1_cif_isp_dpf_gain - Noise function Configuration
+ *
+ * @mode: dpf gain usage  (from enum rkisp1_cif_isp_dpf_gain_usage)
+ * @nf_r_gain: Noise function Gain that replaces the AWB gain for red pixels
+ * @nf_b_gain: Noise function Gain that replaces the AWB gain for blue pixels
+ * @nf_gr_gain: Noise function Gain that replaces the AWB gain
+ *		for green pixels in a red line
+ * @nf_gb_gain: Noise function Gain that replaces the AWB gain
+ *		for green pixels in a blue line
+ */
+struct rkisp1_cif_isp_dpf_gain {
+	__u32 mode;
+	__u16 nf_r_gain;
+	__u16 nf_b_gain;
+	__u16 nf_gr_gain;
+	__u16 nf_gb_gain;
+};
+
+/**
+ * struct rkisp1_cif_isp_dpf_config - Configuration used by De-noising pre-filter
+ *
+ * @gain: noise function gain
+ * @g_flt: green filter config
+ * @rb_flt: red blue filter config
+ * @nll: noise level lookup
+ */
+struct rkisp1_cif_isp_dpf_config {
+	struct rkisp1_cif_isp_dpf_gain gain;
+	struct rkisp1_cif_isp_dpf_g_flt g_flt;
+	struct rkisp1_cif_isp_dpf_rb_flt rb_flt;
+	struct rkisp1_cif_isp_dpf_nll nll;
+};
+
+/**
+ * struct rkisp1_cif_isp_dpf_strength_config - strength of the filter
+ *
+ * @r: filter strength of the RED filter
+ * @g: filter strength of the GREEN filter
+ * @b: filter strength of the BLUE filter
+ */
+struct rkisp1_cif_isp_dpf_strength_config {
+	__u8 r;
+	__u8 g;
+	__u8 b;
+};
+
+/**
+ * struct rkisp1_cif_isp_isp_other_cfg - Parameters for some blocks in rockchip isp1
+ *
+ * @dpcc_config: Defect Pixel Cluster Correction config
+ * @bls_config: Black Level Subtraction config
+ * @sdg_config: sensor degamma config
+ * @lsc_config: Lens Shade config
+ * @awb_gain_config: Auto White balance gain config
+ * @flt_config: filter config
+ * @bdm_config: demosaic config
+ * @ctk_config: cross talk config
+ * @goc_config: gamma out config
+ * @bls_config: black level subtraction config
+ * @dpf_config: De-noising pre-filter config
+ * @dpf_strength_config: dpf strength config
+ * @cproc_config: color process config
+ * @ie_config: image effects config
+ */
+struct rkisp1_cif_isp_isp_other_cfg {
+	struct rkisp1_cif_isp_dpcc_config dpcc_config;
+	struct rkisp1_cif_isp_bls_config bls_config;
+	struct rkisp1_cif_isp_sdg_config sdg_config;
+	struct rkisp1_cif_isp_lsc_config lsc_config;
+	struct rkisp1_cif_isp_awb_gain_config awb_gain_config;
+	struct rkisp1_cif_isp_flt_config flt_config;
+	struct rkisp1_cif_isp_bdm_config bdm_config;
+	struct rkisp1_cif_isp_ctk_config ctk_config;
+	struct rkisp1_cif_isp_goc_config goc_config;
+	struct rkisp1_cif_isp_dpf_config dpf_config;
+	struct rkisp1_cif_isp_dpf_strength_config dpf_strength_config;
+	struct rkisp1_cif_isp_cproc_config cproc_config;
+	struct rkisp1_cif_isp_ie_config ie_config;
+};
+
+/**
+ * struct rkisp1_cif_isp_isp_meas_cfg - Rockchip ISP1 Measure Parameters
+ *
+ * @awb_meas_config: auto white balance config
+ * @hst_config: histogram config
+ * @aec_config: auto exposure config
+ * @afc_config: auto focus config
+ */
+struct rkisp1_cif_isp_isp_meas_cfg {
+	struct rkisp1_cif_isp_awb_meas_config awb_meas_config;
+	struct rkisp1_cif_isp_hst_config hst_config;
+	struct rkisp1_cif_isp_aec_config aec_config;
+	struct rkisp1_cif_isp_afc_config afc_config;
+};
+
+/**
+ * struct rkisp1_params_cfg - Rockchip ISP1 Input Parameters Meta Data
+ *
+ * @module_en_update: mask the enable bits of which module should be updated
+ * @module_ens: mask the enable value of each module, only update the module
+ *		which correspond bit was set in module_en_update
+ * @module_cfg_update: mask the config bits of which module should be updated
+ * @meas: measurement config
+ * @others: other config
+ */
+struct rkisp1_params_cfg {
+	__u32 module_en_update;
+	__u32 module_ens;
+	__u32 module_cfg_update;
+
+	struct rkisp1_cif_isp_isp_meas_cfg meas;
+	struct rkisp1_cif_isp_isp_other_cfg others;
+};
+
+/*---------- PART2: Measurement Statistics ------------*/
+
+/**
+ * struct rkisp1_cif_isp_awb_meas - AWB measured values
+ *
+ * @cnt: White pixel count, number of "white pixels" found during last
+ *	 measurement
+ * @mean_y_or_g: Mean value of Y within window and frames,
+ *		 Green if RGB is selected.
+ * @mean_cb_or_b: Mean value of Cb within window and frames,
+ *		  Blue if RGB is selected.
+ * @mean_cr_or_r: Mean value of Cr within window and frames,
+ *		  Red if RGB is selected.
+ */
+struct rkisp1_cif_isp_awb_meas {
+	__u32 cnt;
+	__u8 mean_y_or_g;
+	__u8 mean_cb_or_b;
+	__u8 mean_cr_or_r;
+};
+
+/**
+ * struct rkisp1_cif_isp_awb_stat - statistics automatic white balance data
+ *
+ * @awb_mean: Mean measured data
+ */
+struct rkisp1_cif_isp_awb_stat {
+	struct rkisp1_cif_isp_awb_meas awb_mean[RKISP1_CIF_ISP_AWB_MAX_GRID];
+};
+
+/**
+ * struct rkisp1_cif_isp_bls_meas_val - BLS measured values
+ *
+ * @meas_r: Mean measured value for Bayer pattern R
+ * @meas_gr: Mean measured value for Bayer pattern Gr
+ * @meas_gb: Mean measured value for Bayer pattern Gb
+ * @meas_b: Mean measured value for Bayer pattern B
+ */
+struct rkisp1_cif_isp_bls_meas_val {
+	__u16 meas_r;
+	__u16 meas_gr;
+	__u16 meas_gb;
+	__u16 meas_b;
+};
+
+/**
+ * struct rkisp1_cif_isp_ae_stat - statistics auto exposure data
+ *
+ * @exp_mean: Mean luminance value of block xx
+ * @bls_val:  BLS measured values
+ *
+ * Image is divided into 5x5 blocks.
+ */
+struct rkisp1_cif_isp_ae_stat {
+	__u8 exp_mean[RKISP1_CIF_ISP_AE_MEAN_MAX];
+	struct rkisp1_cif_isp_bls_meas_val bls_val;
+};
+
+/**
+ * struct rkisp1_cif_isp_af_meas_val - AF measured values
+ *
+ * @sum: sharpness value
+ * @lum: luminance value
+ */
+struct rkisp1_cif_isp_af_meas_val {
+	__u32 sum;
+	__u32 lum;
+};
+
+/**
+ * struct rkisp1_cif_isp_af_stat - statistics auto focus data
+ *
+ * @window: AF measured value of window x
+ *
+ * The module measures the sharpness in 3 windows of selectable size via
+ * register settings(ISP_AFM_*_A/B/C)
+ */
+struct rkisp1_cif_isp_af_stat {
+	struct rkisp1_cif_isp_af_meas_val window[RKISP1_CIF_ISP_AFM_MAX_WINDOWS];
+};
+
+/**
+ * struct rkisp1_cif_isp_hist_stat - statistics histogram data
+ *
+ * @hist_bins: measured bin counters
+ *
+ * Measurement window divided into 25 sub-windows, set
+ * with ISP_HIST_XXX
+ */
+struct rkisp1_cif_isp_hist_stat {
+	__u16 hist_bins[RKISP1_CIF_ISP_HIST_BIN_N_MAX];
+};
+
+/**
+ * struct rkisp1_cif_isp_stat - Rockchip ISP1 Statistics Data
+ *
+ * @awb: statistics data for automatic white balance
+ * @ae: statistics data for auto exposure
+ * @af: statistics data for auto focus
+ * @hist: statistics histogram data
+ */
+struct rkisp1_cif_isp_stat {
+	struct rkisp1_cif_isp_awb_stat awb;
+	struct rkisp1_cif_isp_ae_stat ae;
+	struct rkisp1_cif_isp_af_stat af;
+	struct rkisp1_cif_isp_hist_stat hist;
+};
+
+/**
+ * struct rkisp1_stat_buffer - Rockchip ISP1 Statistics Meta Data
+ *
+ * @meas_type: measurement types (RKISP1_CIF_ISP_STAT_* definitions)
+ * @frame_id: frame ID for sync
+ * @params: statistics data
+ */
+struct rkisp1_stat_buffer {
+	__u32 meas_type;
+	__u32 frame_id;
+	struct rkisp1_cif_isp_stat params;
+};
+
+#endif /* _UAPI_RKISP1_CONFIG_H */
-- 
cgit v1.2.3


From 63ee07d5a8d7f279187984dbdf6bbfd571c937dd Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Sun, 1 Nov 2020 09:31:49 +0000
Subject: media: uapi: Add MEDIA_BUS_FMT_RGB888_3X8_DELTA media bus format

Add media bus format for 24-bit panels that expect their pixel data to
be sent serially on a 8-bit bus, in RGB ordering on odd lines, and in
GBR ordering on even lines (aka delta-RGB).

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Reviewed-by: Sam Ravnborg <sam@ravnborg.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20201101093150.8071-4-paul@crapouillou.net
---
 include/uapi/linux/media-bus-format.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h
index 84fa53ffb13f..5d905ad6dbb2 100644
--- a/include/uapi/linux/media-bus-format.h
+++ b/include/uapi/linux/media-bus-format.h
@@ -34,7 +34,7 @@
 
 #define MEDIA_BUS_FMT_FIXED			0x0001
 
-/* RGB - next is	0x101d */
+/* RGB - next is	0x101e */
 #define MEDIA_BUS_FMT_RGB444_1X12		0x1016
 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_BE	0x1001
 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_LE	0x1002
@@ -56,6 +56,7 @@
 #define MEDIA_BUS_FMT_RGB888_2X12_BE		0x100b
 #define MEDIA_BUS_FMT_RGB888_2X12_LE		0x100c
 #define MEDIA_BUS_FMT_RGB888_3X8		0x101c
+#define MEDIA_BUS_FMT_RGB888_3X8_DELTA		0x101d
 #define MEDIA_BUS_FMT_RGB888_1X7X4_SPWG		0x1011
 #define MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA	0x1012
 #define MEDIA_BUS_FMT_ARGB8888_1X32		0x100d
-- 
cgit v1.2.3


From f73659192b0bdf7bad826587b3530cef43cc048d Mon Sep 17 00:00:00 2001
From: Xie He <xie.he.0141@gmail.com>
Date: Sat, 14 Nov 2020 07:09:21 -0800
Subject: net: wan: Delete the DLCI / SDLA drivers

The DLCI driver (dlci.c) implements the Frame Relay protocol. However,
we already have another newer and better implementation of Frame Relay
provided by the HDLC_FR driver (hdlc_fr.c).

The DLCI driver's implementation of Frame Relay is used by only one
hardware driver in the kernel - the SDLA driver (sdla.c).

The SDLA driver provides Frame Relay support for the Sangoma S50x devices.
However, the vendor provides their own driver (along with their own
multi-WAN-protocol implementations including Frame Relay), called WANPIPE.
I believe most users of the hardware would use the vendor-provided WANPIPE
driver instead.

(The WANPIPE driver was even once in the kernel, but was deleted in
commit 8db60bcf3021 ("[WAN]: Remove broken and unmaintained Sangoma
drivers.") because the vendor no longer updated the in-kernel WANPIPE
driver.)

Cc: Mike McLagan <mike.mclagan@linux.org>
Signed-off-by: Xie He <xie.he.0141@gmail.com>
Link: https://lore.kernel.org/r/20201114150921.685594-1-xie.he.0141@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 CREDITS                                 |    9 -
 Documentation/networking/framerelay.rst |   44 -
 MAINTAINERS                             |    6 -
 arch/arm/configs/ixp4xx_defconfig       |    1 -
 arch/mips/configs/gpr_defconfig         |    1 -
 arch/mips/configs/mtx1_defconfig        |    1 -
 drivers/net/wan/Kconfig                 |   45 -
 drivers/net/wan/Makefile                |    2 -
 drivers/net/wan/dlci.c                  |  541 ----------
 drivers/net/wan/sdla.c                  | 1655 -------------------------------
 include/linux/if_frad.h                 |   92 --
 include/linux/sdla.h                    |  240 -----
 include/uapi/linux/if_frad.h            |  123 ---
 include/uapi/linux/sdla.h               |  117 ---
 net/socket.c                            |   25 -
 15 files changed, 2902 deletions(-)
 delete mode 100644 Documentation/networking/framerelay.rst
 delete mode 100644 drivers/net/wan/dlci.c
 delete mode 100644 drivers/net/wan/sdla.c
 delete mode 100644 include/linux/if_frad.h
 delete mode 100644 include/linux/sdla.h
 delete mode 100644 include/uapi/linux/if_frad.h
 delete mode 100644 include/uapi/linux/sdla.h

(limited to 'include/uapi')

diff --git a/CREDITS b/CREDITS
index 8592e45e3932..67421adb747c 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2499,15 +2499,6 @@ W: http://www.rdrop.com/users/paulmck/
 D: RCU and variants
 D: rcutorture module
 
-N: Mike McLagan
-E: mike.mclagan@linux.org
-W: http://www.invlogic.com/~mmclagan
-D: DLCI/FRAD drivers for Sangoma SDLAs
-S: Innovative Logic Corp
-S: Post Office Box 1068
-S: Laurel, Maryland 20732
-S: USA
-
 N: Bradley McLean
 E: brad@bradpc.gaylord.com
 D: Device driver hacker
diff --git a/Documentation/networking/framerelay.rst b/Documentation/networking/framerelay.rst
deleted file mode 100644
index 6d904399ec6d..000000000000
--- a/Documentation/networking/framerelay.rst
+++ /dev/null
@@ -1,44 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-================
-Frame Relay (FR)
-================
-
-Frame Relay (FR) support for linux is built into a two tiered system of device
-drivers.  The upper layer implements RFC1490 FR specification, and uses the
-Data Link Connection Identifier (DLCI) as its hardware address.  Usually these
-are assigned by your network supplier, they give you the number/numbers of
-the Virtual Connections (VC) assigned to you.
-
-Each DLCI is a point-to-point link between your machine and a remote one.
-As such, a separate device is needed to accommodate the routing.  Within the
-net-tools archives is 'dlcicfg'.  This program will communicate with the
-base "DLCI" device, and create new net devices named 'dlci00', 'dlci01'...
-The configuration script will ask you how many DLCIs you need, as well as
-how many DLCIs you want to assign to each Frame Relay Access Device (FRAD).
-
-The DLCI uses a number of function calls to communicate with the FRAD, all
-of which are stored in the FRAD's private data area.  assoc/deassoc,
-activate/deactivate and dlci_config.  The DLCI supplies a receive function
-to the FRAD to accept incoming packets.
-
-With this initial offering, only 1 FRAD driver is available.  With many thanks
-to Sangoma Technologies, David Mandelstam & Gene Kozin, the S502A, S502E &
-S508 are supported.  This driver is currently set up for only FR, but as
-Sangoma makes more firmware modules available, it can be updated to provide
-them as well.
-
-Configuration of the FRAD makes use of another net-tools program, 'fradcfg'.
-This program makes use of a configuration file (which dlcicfg can also read)
-to specify the types of boards to be configured as FRADs, as well as perform
-any board specific configuration.  The Sangoma module of fradcfg loads the
-FR firmware into the card, sets the irq/port/memory information, and provides
-an initial configuration.
-
-Additional FRAD device drivers can be added as hardware is available.
-
-At this time, the dlcicfg and fradcfg programs have not been incorporated into
-the net-tools distribution.  They can be found at ftp.invlogic.com, in
-/pub/linux.  Note that with OS/2 FTPD, you end up in /pub by default, so just
-use 'cd linux'.  v0.10 is for use on pre-2.0.3 and earlier, v0.15 is for
-pre-2.0.4 and later.
diff --git a/MAINTAINERS b/MAINTAINERS
index af9f6a3ab100..3341959af0c7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6905,12 +6905,6 @@ S:	Maintained
 W:	http://floatingpoint.sourceforge.net/emulator/index.html
 F:	arch/x86/math-emu/
 
-FRAME RELAY DLCI/FRAD (Sangoma drivers too)
-L:	netdev@vger.kernel.org
-S:	Orphan
-F:	drivers/net/wan/dlci.c
-F:	drivers/net/wan/sdla.c
-
 FRAMEBUFFER LAYER
 M:	Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
 L:	dri-devel@lists.freedesktop.org
diff --git a/arch/arm/configs/ixp4xx_defconfig b/arch/arm/configs/ixp4xx_defconfig
index 27e7c0714b96..0d6edeb27659 100644
--- a/arch/arm/configs/ixp4xx_defconfig
+++ b/arch/arm/configs/ixp4xx_defconfig
@@ -141,7 +141,6 @@ CONFIG_HDLC_CISCO=m
 CONFIG_HDLC_FR=m
 CONFIG_HDLC_PPP=m
 CONFIG_HDLC_X25=m
-CONFIG_DLCI=m
 CONFIG_WAN_ROUTER_DRIVERS=m
 CONFIG_ATM_TCP=m
 # CONFIG_INPUT_KEYBOARD is not set
diff --git a/arch/mips/configs/gpr_defconfig b/arch/mips/configs/gpr_defconfig
index 599d5604aabe..8a921c8ac233 100644
--- a/arch/mips/configs/gpr_defconfig
+++ b/arch/mips/configs/gpr_defconfig
@@ -228,7 +228,6 @@ CONFIG_FARSYNC=m
 CONFIG_DSCC4=m
 CONFIG_DSCC4_PCISYNC=y
 CONFIG_DSCC4_PCI_RST=y
-CONFIG_DLCI=m
 CONFIG_LAPBETHER=m
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig
index dc69b054181c..30dacce94198 100644
--- a/arch/mips/configs/mtx1_defconfig
+++ b/arch/mips/configs/mtx1_defconfig
@@ -378,7 +378,6 @@ CONFIG_FARSYNC=m
 CONFIG_DSCC4=m
 CONFIG_DSCC4_PCISYNC=y
 CONFIG_DSCC4_PCI_RST=y
-CONFIG_DLCI=m
 CONFIG_LAPBETHER=m
 # CONFIG_KEYBOARD_ATKBD is not set
 CONFIG_KEYBOARD_GPIO=y
diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig
index 2cf98a732a26..4029fde71a9e 100644
--- a/drivers/net/wan/Kconfig
+++ b/drivers/net/wan/Kconfig
@@ -321,51 +321,6 @@ config IXP4XX_HSS
 	  Say Y here if you want to use built-in HSS ports
 	  on IXP4xx processor.
 
-config DLCI
-	tristate "Frame Relay DLCI support"
-	help
-	  Support for the Frame Relay protocol.
-
-	  Frame Relay is a fast low-cost way to connect to a remote Internet
-	  access provider or to form a private wide area network. The one
-	  physical line from your box to the local "switch" (i.e. the entry
-	  point to the Frame Relay network, usually at the phone company) can
-	  carry several logical point-to-point connections to other computers
-	  connected to the Frame Relay network. For a general explanation of
-	  the protocol, check out <http://www.mplsforum.org/>.
-
-	  To use frame relay, you need supporting hardware (called FRAD) and
-	  certain programs from the net-tools package as explained in
-	  <file:Documentation/networking/framerelay.rst>.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called dlci.
-
-config DLCI_MAX
-	int "Max DLCI per device"
-	depends on DLCI
-	default "8"
-	help
-	  How many logical point-to-point frame relay connections (the
-	  identifiers of which are called DCLIs) should be handled by each
-	  of your hardware frame relay access devices.
-
-	  Go with the default.
-
-config SDLA
-	tristate "SDLA (Sangoma S502/S508) support"
-	depends on DLCI && ISA
-	help
-	  Driver for the Sangoma S502A, S502E, and S508 Frame Relay Access
-	  Devices.
-
-	  These are multi-protocol cards, but only Frame Relay is supported
-	  by the driver at this time. Please read
-	  <file:Documentation/networking/framerelay.rst>.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called sdla.
-
 # X.25 network drivers
 config LAPBETHER
 	tristate "LAPB over Ethernet driver"
diff --git a/drivers/net/wan/Makefile b/drivers/net/wan/Makefile
index 5b9dc85eae34..081666c36ca2 100644
--- a/drivers/net/wan/Makefile
+++ b/drivers/net/wan/Makefile
@@ -21,8 +21,6 @@ obj-$(CONFIG_FARSYNC)		+= farsync.o
 
 obj-$(CONFIG_LANMEDIA)		+= lmc/
 
-obj-$(CONFIG_DLCI)		+= dlci.o 
-obj-$(CONFIG_SDLA)		+= sdla.o
 obj-$(CONFIG_LAPBETHER)		+= lapbether.o
 obj-$(CONFIG_SBNI)		+= sbni.o
 obj-$(CONFIG_N2)		+= n2.o
diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c
deleted file mode 100644
index 3ca4daf63389..000000000000
--- a/drivers/net/wan/dlci.c
+++ /dev/null
@@ -1,541 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * DLCI		Implementation of Frame Relay protocol for Linux, according to
- *		RFC 1490.  This generic device provides en/decapsulation for an
- *		underlying hardware driver.  Routes & IPs are assigned to these
- *		interfaces.  Requires 'dlcicfg' program to create usable 
- *		interfaces, the initial one, 'dlci' is for IOCTL use only.
- *
- * Version:	@(#)dlci.c	0.35	4 Jan 1997
- *
- * Author:	Mike McLagan <mike.mclagan@linux.org>
- *
- * Changes:
- *
- *		0.15	Mike Mclagan	Packet freeing, bug in kmalloc call
- *					DLCI_RET handling
- *		0.20	Mike McLagan	More conservative on which packets
- *					are returned for retry and which are
- *					are dropped.  If DLCI_RET_DROP is
- *					returned from the FRAD, the packet is
- *				 	sent back to Linux for re-transmission
- *		0.25	Mike McLagan	Converted to use SIOC IOCTL calls
- *		0.30	Jim Freeman	Fixed to allow IPX traffic
- *		0.35	Michael Elizabeth	Fixed incorrect memcpy_fromfs
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/fcntl.h>
-#include <linux/interrupt.h>
-#include <linux/ptrace.h>
-#include <linux/ioport.h>
-#include <linux/in.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <linux/if_arp.h>
-#include <linux/if_frad.h>
-#include <linux/bitops.h>
-
-#include <net/sock.h>
-
-#include <asm/io.h>
-#include <asm/dma.h>
-#include <linux/uaccess.h>
-
-static const char version[] = "DLCI driver v0.35, 4 Jan 1997, mike.mclagan@linux.org";
-
-static LIST_HEAD(dlci_devs);
-
-static void dlci_setup(struct net_device *);
-
-/* 
- * these encapsulate the RFC 1490 requirements as well as 
- * deal with packet transmission and reception, working with
- * the upper network layers 
- */
-
-static int dlci_header(struct sk_buff *skb, struct net_device *dev, 
-		       unsigned short type, const void *daddr,
-		       const void *saddr, unsigned len)
-{
-	struct frhdr		hdr;
-	unsigned int		hlen;
-	char			*dest;
-
-	hdr.control = FRAD_I_UI;
-	switch (type)
-	{
-		case ETH_P_IP:
-			hdr.IP_NLPID = FRAD_P_IP;
-			hlen = sizeof(hdr.control) + sizeof(hdr.IP_NLPID);
-			break;
-
-		/* feel free to add other types, if necessary */
-
-		default:
-			hdr.pad = FRAD_P_PADDING;
-			hdr.NLPID = FRAD_P_SNAP;
-			memset(hdr.OUI, 0, sizeof(hdr.OUI));
-			hdr.PID = htons(type);
-			hlen = sizeof(hdr);
-			break;
-	}
-
-	dest = skb_push(skb, hlen);
-	if (!dest)
-		return 0;
-
-	memcpy(dest, &hdr, hlen);
-
-	return hlen;
-}
-
-static void dlci_receive(struct sk_buff *skb, struct net_device *dev)
-{
-	struct frhdr		*hdr;
-	int					process, header;
-
-	if (!pskb_may_pull(skb, sizeof(*hdr))) {
-		netdev_notice(dev, "invalid data no header\n");
-		dev->stats.rx_errors++;
-		kfree_skb(skb);
-		return;
-	}
-
-	hdr = (struct frhdr *) skb->data;
-	process = 0;
-	header = 0;
-	skb->dev = dev;
-
-	if (hdr->control != FRAD_I_UI)
-	{
-		netdev_notice(dev, "Invalid header flag 0x%02X\n",
-			      hdr->control);
-		dev->stats.rx_errors++;
-	}
-	else
-		switch (hdr->IP_NLPID)
-		{
-			case FRAD_P_PADDING:
-				if (hdr->NLPID != FRAD_P_SNAP)
-				{
-					netdev_notice(dev, "Unsupported NLPID 0x%02X\n",
-						      hdr->NLPID);
-					dev->stats.rx_errors++;
-					break;
-				}
-	 
-				if (hdr->OUI[0] + hdr->OUI[1] + hdr->OUI[2] != 0)
-				{
-					netdev_notice(dev, "Unsupported organizationally unique identifier 0x%02X-%02X-%02X\n",
-						      hdr->OUI[0],
-						      hdr->OUI[1],
-						      hdr->OUI[2]);
-					dev->stats.rx_errors++;
-					break;
-				}
-
-				/* at this point, it's an EtherType frame */
-				header = sizeof(struct frhdr);
-				/* Already in network order ! */
-				skb->protocol = hdr->PID;
-				process = 1;
-				break;
-
-			case FRAD_P_IP:
-				header = sizeof(hdr->control) + sizeof(hdr->IP_NLPID);
-				skb->protocol = htons(ETH_P_IP);
-				process = 1;
-				break;
-
-			case FRAD_P_SNAP:
-			case FRAD_P_Q933:
-			case FRAD_P_CLNP:
-				netdev_notice(dev, "Unsupported NLPID 0x%02X\n",
-					      hdr->pad);
-				dev->stats.rx_errors++;
-				break;
-
-			default:
-				netdev_notice(dev, "Invalid pad byte 0x%02X\n",
-					      hdr->pad);
-				dev->stats.rx_errors++;
-				break;				
-		}
-
-	if (process)
-	{
-		/* we've set up the protocol, so discard the header */
-		skb_reset_mac_header(skb);
-		skb_pull(skb, header);
-		dev->stats.rx_bytes += skb->len;
-		netif_rx(skb);
-		dev->stats.rx_packets++;
-	}
-	else
-		dev_kfree_skb(skb);
-}
-
-static netdev_tx_t dlci_transmit(struct sk_buff *skb, struct net_device *dev)
-{
-	struct dlci_local *dlp = netdev_priv(dev);
-
-	if (skb) {
-		struct netdev_queue *txq = skb_get_tx_queue(dev, skb);
-		netdev_start_xmit(skb, dlp->slave, txq, false);
-	}
-	return NETDEV_TX_OK;
-}
-
-static int dlci_config(struct net_device *dev, struct dlci_conf __user *conf, int get)
-{
-	struct dlci_conf	config;
-	struct dlci_local	*dlp;
-	struct frad_local	*flp;
-	int			err;
-
-	dlp = netdev_priv(dev);
-
-	flp = netdev_priv(dlp->slave);
-
-	if (!get)
-	{
-		if (copy_from_user(&config, conf, sizeof(struct dlci_conf)))
-			return -EFAULT;
-		if (config.flags & ~DLCI_VALID_FLAGS)
-			return -EINVAL;
-		memcpy(&dlp->config, &config, sizeof(struct dlci_conf));
-		dlp->configured = 1;
-	}
-
-	err = (*flp->dlci_conf)(dlp->slave, dev, get);
-	if (err)
-		return err;
-
-	if (get)
-	{
-		if (copy_to_user(conf, &dlp->config, sizeof(struct dlci_conf)))
-			return -EFAULT;
-	}
-
-	return 0;
-}
-
-static int dlci_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
-{
-	struct dlci_local *dlp;
-
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
-	dlp = netdev_priv(dev);
-
-	switch (cmd)
-	{
-		case DLCI_GET_SLAVE:
-			if (!*(short *)(dev->dev_addr))
-				return -EINVAL;
-
-			strncpy(ifr->ifr_slave, dlp->slave->name, sizeof(ifr->ifr_slave));
-			break;
-
-		case DLCI_GET_CONF:
-		case DLCI_SET_CONF:
-			if (!*(short *)(dev->dev_addr))
-				return -EINVAL;
-
-			return dlci_config(dev, ifr->ifr_data, cmd == DLCI_GET_CONF);
-
-		default: 
-			return -EOPNOTSUPP;
-	}
-	return 0;
-}
-
-static int dlci_change_mtu(struct net_device *dev, int new_mtu)
-{
-	struct dlci_local *dlp = netdev_priv(dev);
-
-	return dev_set_mtu(dlp->slave, new_mtu);
-}
-
-static int dlci_open(struct net_device *dev)
-{
-	struct dlci_local	*dlp;
-	struct frad_local	*flp;
-	int			err;
-
-	dlp = netdev_priv(dev);
-
-	if (!*(short *)(dev->dev_addr))
-		return -EINVAL;
-
-	if (!netif_running(dlp->slave))
-		return -ENOTCONN;
-
-	flp = netdev_priv(dlp->slave);
-	err = (*flp->activate)(dlp->slave, dev);
-	if (err)
-		return err;
-
-	netif_start_queue(dev);
-
-	return 0;
-}
-
-static int dlci_close(struct net_device *dev)
-{
-	struct dlci_local	*dlp;
-	struct frad_local	*flp;
-
-	netif_stop_queue(dev);
-
-	dlp = netdev_priv(dev);
-
-	flp = netdev_priv(dlp->slave);
-	(*flp->deactivate)(dlp->slave, dev);
-
-	return 0;
-}
-
-static int dlci_add(struct dlci_add *dlci)
-{
-	struct net_device	*master, *slave;
-	struct dlci_local	*dlp;
-	struct frad_local	*flp;
-	int			err = -EINVAL;
-
-
-	/* validate slave device */
-	slave = dev_get_by_name(&init_net, dlci->devname);
-	if (!slave)
-		return -ENODEV;
-
-	if (slave->type != ARPHRD_FRAD || netdev_priv(slave) == NULL)
-		goto err1;
-
-	/* create device name */
-	master = alloc_netdev(sizeof(struct dlci_local), "dlci%d",
-			      NET_NAME_UNKNOWN, dlci_setup);
-	if (!master) {
-		err = -ENOMEM;
-		goto err1;
-	}
-
-	/* make sure same slave not already registered */
-	rtnl_lock();
-	list_for_each_entry(dlp, &dlci_devs, list) {
-		if (dlp->slave == slave) {
-			err = -EBUSY;
-			goto err2;
-		}
-	}
-
-	*(short *)(master->dev_addr) = dlci->dlci;
-
-	dlp = netdev_priv(master);
-	dlp->slave = slave;
-	dlp->master = master;
-
-	flp = netdev_priv(slave);
-	err = (*flp->assoc)(slave, master);
-	if (err < 0)
-		goto err2;
-
-	err = register_netdevice(master);
-	if (err < 0) 
-		goto err2;
-
-	strcpy(dlci->devname, master->name);
-
-	list_add(&dlp->list, &dlci_devs);
-	rtnl_unlock();
-
-	return 0;
-
- err2:
-	rtnl_unlock();
-	free_netdev(master);
- err1:
-	dev_put(slave);
-	return err;
-}
-
-static int dlci_del(struct dlci_add *dlci)
-{
-	struct dlci_local	*dlp;
-	struct frad_local	*flp;
-	struct net_device	*master, *slave;
-	int			err;
-	bool			found = false;
-
-	rtnl_lock();
-
-	/* validate slave device */
-	master = __dev_get_by_name(&init_net, dlci->devname);
-	if (!master) {
-		err = -ENODEV;
-		goto out;
-	}
-
-	list_for_each_entry(dlp, &dlci_devs, list) {
-		if (dlp->master == master) {
-			found = true;
-			break;
-		}
-	}
-	if (!found) {
-		err = -ENODEV;
-		goto out;
-	}
-
-	if (netif_running(master)) {
-		err = -EBUSY;
-		goto out;
-	}
-
-	dlp = netdev_priv(master);
-	slave = dlp->slave;
-	flp = netdev_priv(slave);
-
-	err = (*flp->deassoc)(slave, master);
-	if (!err) {
-		list_del(&dlp->list);
-
-		unregister_netdevice(master);
-
-		dev_put(slave);
-	}
-out:
-	rtnl_unlock();
-	return err;
-}
-
-static int dlci_ioctl(unsigned int cmd, void __user *arg)
-{
-	struct dlci_add add;
-	int err;
-	
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
-	if (copy_from_user(&add, arg, sizeof(struct dlci_add)))
-		return -EFAULT;
-
-	switch (cmd)
-	{
-		case SIOCADDDLCI:
-			err = dlci_add(&add);
-
-			if (!err)
-				if (copy_to_user(arg, &add, sizeof(struct dlci_add)))
-					return -EFAULT;
-			break;
-
-		case SIOCDELDLCI:
-			err = dlci_del(&add);
-			break;
-
-		default:
-			err = -EINVAL;
-	}
-
-	return err;
-}
-
-static const struct header_ops dlci_header_ops = {
-	.create	= dlci_header,
-};
-
-static const struct net_device_ops dlci_netdev_ops = {
-	.ndo_open	= dlci_open,
-	.ndo_stop	= dlci_close,
-	.ndo_do_ioctl	= dlci_dev_ioctl,
-	.ndo_start_xmit	= dlci_transmit,
-	.ndo_change_mtu	= dlci_change_mtu,
-};
-
-static void dlci_setup(struct net_device *dev)
-{
-	struct dlci_local *dlp = netdev_priv(dev);
-
-	dev->flags		= 0;
-	dev->header_ops		= &dlci_header_ops;
-	dev->netdev_ops		= &dlci_netdev_ops;
-	dev->needs_free_netdev	= true;
-
-	dlp->receive		= dlci_receive;
-
-	dev->type		= ARPHRD_DLCI;
-	dev->hard_header_len	= sizeof(struct frhdr);
-	dev->addr_len		= sizeof(short);
-
-}
-
-/* if slave is unregistering, then cleanup master */
-static int dlci_dev_event(struct notifier_block *unused,
-			  unsigned long event, void *ptr)
-{
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-
-	if (dev_net(dev) != &init_net)
-		return NOTIFY_DONE;
-
-	if (event == NETDEV_UNREGISTER) {
-		struct dlci_local *dlp;
-
-		list_for_each_entry(dlp, &dlci_devs, list) {
-			if (dlp->slave == dev) {
-				list_del(&dlp->list);
-				unregister_netdevice(dlp->master);
-				dev_put(dlp->slave);
-				break;
-			}
-		}
-	}
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block dlci_notifier = {
-	.notifier_call = dlci_dev_event,
-};
-
-static int __init init_dlci(void)
-{
-	dlci_ioctl_set(dlci_ioctl);
-	register_netdevice_notifier(&dlci_notifier);
-
-	printk("%s.\n", version);
-
-	return 0;
-}
-
-static void __exit dlci_exit(void)
-{
-	struct dlci_local	*dlp, *nxt;
-	
-	dlci_ioctl_set(NULL);
-	unregister_netdevice_notifier(&dlci_notifier);
-
-	rtnl_lock();
-	list_for_each_entry_safe(dlp, nxt, &dlci_devs, list) {
-		unregister_netdevice(dlp->master);
-		dev_put(dlp->slave);
-	}
-	rtnl_unlock();
-}
-
-module_init(init_dlci);
-module_exit(dlci_exit);
-
-MODULE_AUTHOR("Mike McLagan");
-MODULE_DESCRIPTION("Frame Relay DLCI layer");
-MODULE_LICENSE("GPL");
diff --git a/drivers/net/wan/sdla.c b/drivers/net/wan/sdla.c
deleted file mode 100644
index bc2c1c7fb1a4..000000000000
--- a/drivers/net/wan/sdla.c
+++ /dev/null
@@ -1,1655 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * SDLA		An implementation of a driver for the Sangoma S502/S508 series
- *		multi-protocol PC interface card.  Initial offering is with 
- *		the DLCI driver, providing Frame Relay support for linux.
- *
- *		Global definitions for the Frame relay interface.
- *
- * Version:	@(#)sdla.c   0.30	12 Sep 1996
- *
- * Credits:	Sangoma Technologies, for the use of 2 cards for an extended
- *			period of time.
- *		David Mandelstam <dm@sangoma.com> for getting me started on 
- *			this project, and incentive to complete it.
- *		Gene Kozen <74604.152@compuserve.com> for providing me with
- *			important information about the cards.
- *
- * Author:	Mike McLagan <mike.mclagan@linux.org>
- *
- * Changes:
- *		0.15	Mike McLagan	Improved error handling, packet dropping
- *		0.20	Mike McLagan	New transmit/receive flags for config
- *					If in FR mode, don't accept packets from
- *					non DLCI devices.
- *		0.25	Mike McLagan	Fixed problem with rejecting packets
- *					from non DLCI devices.
- *		0.30	Mike McLagan	Fixed kernel panic when used with modified
- *					ifconfig
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/fcntl.h>
-#include <linux/interrupt.h>
-#include <linux/ptrace.h>
-#include <linux/ioport.h>
-#include <linux/in.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/timer.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <linux/if_arp.h>
-#include <linux/if_frad.h>
-#include <linux/sdla.h>
-#include <linux/bitops.h>
-
-#include <asm/io.h>
-#include <asm/dma.h>
-#include <linux/uaccess.h>
-
-static const char* version = "SDLA driver v0.30, 12 Sep 1996, mike.mclagan@linux.org";
-
-static unsigned int valid_port[] = { 0x250, 0x270, 0x280, 0x300, 0x350, 0x360, 0x380, 0x390};
-
-static unsigned int valid_mem[] = {
-				    0xA0000, 0xA2000, 0xA4000, 0xA6000, 0xA8000, 0xAA000, 0xAC000, 0xAE000, 
-                                    0xB0000, 0xB2000, 0xB4000, 0xB6000, 0xB8000, 0xBA000, 0xBC000, 0xBE000,
-                                    0xC0000, 0xC2000, 0xC4000, 0xC6000, 0xC8000, 0xCA000, 0xCC000, 0xCE000,
-                                    0xD0000, 0xD2000, 0xD4000, 0xD6000, 0xD8000, 0xDA000, 0xDC000, 0xDE000,
-                                    0xE0000, 0xE2000, 0xE4000, 0xE6000, 0xE8000, 0xEA000, 0xEC000, 0xEE000}; 
-
-static DEFINE_SPINLOCK(sdla_lock);
-
-/*********************************************************
- *
- * these are the core routines that access the card itself 
- *
- *********************************************************/
-
-#define SDLA_WINDOW(dev,addr) outb((((addr) >> 13) & 0x1F), (dev)->base_addr + SDLA_REG_Z80_WINDOW)
-
-static void __sdla_read(struct net_device *dev, int addr, void *buf, short len)
-{
-	char          *temp;
-	const void    *base;
-	int           offset, bytes;
-
-	temp = buf;
-	while(len)
-	{	
-		offset = addr & SDLA_ADDR_MASK;
-		bytes = offset + len > SDLA_WINDOW_SIZE ? SDLA_WINDOW_SIZE - offset : len;
-		base = (const void *) (dev->mem_start + offset);
-
-		SDLA_WINDOW(dev, addr);
-		memcpy(temp, base, bytes);
-
-		addr += bytes;
-		temp += bytes;
-		len  -= bytes;
-	}  
-}
-
-static void sdla_read(struct net_device *dev, int addr, void *buf, short len)
-{
-	unsigned long flags;
-	spin_lock_irqsave(&sdla_lock, flags);
-	__sdla_read(dev, addr, buf, len);
-	spin_unlock_irqrestore(&sdla_lock, flags);
-}
-
-static void __sdla_write(struct net_device *dev, int addr, 
-			 const void *buf, short len)
-{
-	const char    *temp;
-	void 	      *base;
-	int           offset, bytes;
-
-	temp = buf;
-	while(len)
-	{
-		offset = addr & SDLA_ADDR_MASK;
-		bytes = offset + len > SDLA_WINDOW_SIZE ? SDLA_WINDOW_SIZE - offset : len;
-		base = (void *) (dev->mem_start + offset);
-
-		SDLA_WINDOW(dev, addr);
-		memcpy(base, temp, bytes);
-
-		addr += bytes;
-		temp += bytes;
-		len  -= bytes;
-	}
-}
-
-static void sdla_write(struct net_device *dev, int addr, 
-		       const void *buf, short len)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&sdla_lock, flags);
-	__sdla_write(dev, addr, buf, len);
-	spin_unlock_irqrestore(&sdla_lock, flags);
-}
-
-
-static void sdla_clear(struct net_device *dev)
-{
-	unsigned long flags;
-	char          *base;
-	int           len, addr, bytes;
-
-	len = 65536;	
-	addr = 0;
-	bytes = SDLA_WINDOW_SIZE;
-	base = (void *) dev->mem_start;
-
-	spin_lock_irqsave(&sdla_lock, flags);
-	while(len)
-	{
-		SDLA_WINDOW(dev, addr);
-		memset(base, 0, bytes);
-
-		addr += bytes;
-		len  -= bytes;
-	}
-	spin_unlock_irqrestore(&sdla_lock, flags);
-
-}
-
-static char sdla_byte(struct net_device *dev, int addr)
-{
-	unsigned long flags;
-	char          byte, *temp;
-
-	temp = (void *) (dev->mem_start + (addr & SDLA_ADDR_MASK));
-
-	spin_lock_irqsave(&sdla_lock, flags);
-	SDLA_WINDOW(dev, addr);
-	byte = *temp;
-	spin_unlock_irqrestore(&sdla_lock, flags);
-
-	return byte;
-}
-
-static void sdla_stop(struct net_device *dev)
-{
-	struct frad_local *flp;
-
-	flp = netdev_priv(dev);
-	switch(flp->type)
-	{
-		case SDLA_S502A:
-			outb(SDLA_S502A_HALT, dev->base_addr + SDLA_REG_CONTROL);
-			flp->state = SDLA_HALT;
-			break;
-		case SDLA_S502E:
-			outb(SDLA_HALT, dev->base_addr + SDLA_REG_Z80_CONTROL);
-			outb(SDLA_S502E_ENABLE, dev->base_addr + SDLA_REG_CONTROL);
-			flp->state = SDLA_S502E_ENABLE;
-			break;
-		case SDLA_S507:
-			flp->state &= ~SDLA_CPUEN;
-			outb(flp->state, dev->base_addr + SDLA_REG_CONTROL);
-			break;
-		case SDLA_S508:
-			flp->state &= ~SDLA_CPUEN;
-			outb(flp->state, dev->base_addr + SDLA_REG_CONTROL);
-			break;
-	}
-}
-
-static void sdla_start(struct net_device *dev)
-{
-	struct frad_local *flp;
-
-	flp = netdev_priv(dev);
-	switch(flp->type)
-	{
-		case SDLA_S502A:
-			outb(SDLA_S502A_NMI, dev->base_addr + SDLA_REG_CONTROL);
-			outb(SDLA_S502A_START, dev->base_addr + SDLA_REG_CONTROL);
-			flp->state = SDLA_S502A_START;
-			break;
-		case SDLA_S502E:
-			outb(SDLA_S502E_CPUEN, dev->base_addr + SDLA_REG_Z80_CONTROL);
-			outb(0x00, dev->base_addr + SDLA_REG_CONTROL);
-			flp->state = 0;
-			break;
-		case SDLA_S507:
-			flp->state |= SDLA_CPUEN;
-			outb(flp->state, dev->base_addr + SDLA_REG_CONTROL);
-			break;
-		case SDLA_S508:
-			flp->state |= SDLA_CPUEN;
-			outb(flp->state, dev->base_addr + SDLA_REG_CONTROL);
-			break;
-	}
-}
-
-/****************************************************
- *
- * this is used for the S502A/E cards to determine
- * the speed of the onboard CPU.  Calibration is
- * necessary for the Frame Relay code uploaded 
- * later.  Incorrect results cause timing problems
- * with link checks & status messages
- *
- ***************************************************/
-
-static int sdla_z80_poll(struct net_device *dev, int z80_addr, int jiffs, char resp1, char resp2)
-{
-	unsigned long start, done, now;
-	char          resp, *temp;
-
-	start = now = jiffies;
-	done = jiffies + jiffs;
-
-	temp = (void *)dev->mem_start;
-	temp += z80_addr & SDLA_ADDR_MASK;
-	
-	resp = ~resp1;
-	while (time_before(jiffies, done) && (resp != resp1) && (!resp2 || (resp != resp2)))
-	{
-		if (jiffies != now)
-		{
-			SDLA_WINDOW(dev, z80_addr);
-			now = jiffies;
-			resp = *temp;
-		}
-	}
-	return time_before(jiffies, done) ? jiffies - start : -1;
-}
-
-/* constants for Z80 CPU speed */
-#define Z80_READY 		'1'	/* Z80 is ready to begin */
-#define LOADER_READY 		'2'	/* driver is ready to begin */
-#define Z80_SCC_OK 		'3'	/* SCC is on board */
-#define Z80_SCC_BAD	 	'4'	/* SCC was not found */
-
-static int sdla_cpuspeed(struct net_device *dev, struct ifreq *ifr)
-{
-	int  jiffs;
-	char data;
-
-	sdla_start(dev);
-	if (sdla_z80_poll(dev, 0, 3*HZ, Z80_READY, 0) < 0)
-		return -EIO;
-
-	data = LOADER_READY;
-	sdla_write(dev, 0, &data, 1);
-
-	if ((jiffs = sdla_z80_poll(dev, 0, 8*HZ, Z80_SCC_OK, Z80_SCC_BAD)) < 0)
-		return -EIO;
-
-	sdla_stop(dev);
-	sdla_read(dev, 0, &data, 1);
-
-	if (data == Z80_SCC_BAD)
-	{
-		printk("%s: SCC bad\n", dev->name);
-		return -EIO;
-	}
-
-	if (data != Z80_SCC_OK)
-		return -EINVAL;
-
-	if (jiffs < 165)
-		ifr->ifr_mtu = SDLA_CPU_16M;
-	else if (jiffs < 220)
-		ifr->ifr_mtu = SDLA_CPU_10M;
-	else if (jiffs < 258)
-		ifr->ifr_mtu = SDLA_CPU_8M;
-	else if (jiffs < 357)
-		ifr->ifr_mtu = SDLA_CPU_7M;
-	else if (jiffs < 467)
-		ifr->ifr_mtu = SDLA_CPU_5M;
-	else
-		ifr->ifr_mtu = SDLA_CPU_3M;
- 
-	return 0;
-}
-
-/************************************************
- *
- *  Direct interaction with the Frame Relay code 
- *  starts here.
- *
- ************************************************/
-
-struct _dlci_stat 
-{
-	short dlci;
-	char  flags;
-} __packed;
-
-struct _frad_stat 
-{
-	char    flags;
-	struct _dlci_stat dlcis[SDLA_MAX_DLCI];
-};
-
-static void sdla_errors(struct net_device *dev, int cmd, int dlci, int ret, int len, void *data) 
-{
-	struct _dlci_stat *pstatus;
-	short             *pdlci;
-	int               i;
-	char              *state, line[30];
-
-	switch (ret)
-	{
-		case SDLA_RET_MODEM:
-			state = data;
-			if (*state & SDLA_MODEM_DCD_LOW)
-				netdev_info(dev, "Modem DCD unexpectedly low!\n");
-			if (*state & SDLA_MODEM_CTS_LOW)
-				netdev_info(dev, "Modem CTS unexpectedly low!\n");
-			/* I should probably do something about this! */
-			break;
-
-		case SDLA_RET_CHANNEL_OFF:
-			netdev_info(dev, "Channel became inoperative!\n");
-			/* same here */
-			break;
-
-		case SDLA_RET_CHANNEL_ON:
-			netdev_info(dev, "Channel became operative!\n");
-			/* same here */
-			break;
-
-		case SDLA_RET_DLCI_STATUS:
-			netdev_info(dev, "Status change reported by Access Node\n");
-			len /= sizeof(struct _dlci_stat);
-			for(pstatus = data, i=0;i < len;i++,pstatus++)
-			{
-				if (pstatus->flags & SDLA_DLCI_NEW)
-					state = "new";
-				else if (pstatus->flags & SDLA_DLCI_DELETED)
-					state = "deleted";
-				else if (pstatus->flags & SDLA_DLCI_ACTIVE)
-					state = "active";
-				else
-				{
-					sprintf(line, "unknown status: %02X", pstatus->flags);
-					state = line;
-				}
-				netdev_info(dev, "DLCI %i: %s\n",
-					    pstatus->dlci, state);
-				/* same here */
-			}
-			break;
-
-		case SDLA_RET_DLCI_UNKNOWN:
-			netdev_info(dev, "Received unknown DLCIs:");
-			len /= sizeof(short);
-			for(pdlci = data,i=0;i < len;i++,pdlci++)
-				pr_cont(" %i", *pdlci);
-			pr_cont("\n");
-			break;
-
-		case SDLA_RET_TIMEOUT:
-			netdev_err(dev, "Command timed out!\n");
-			break;
-
-		case SDLA_RET_BUF_OVERSIZE:
-			netdev_info(dev, "Bc/CIR overflow, acceptable size is %i\n",
-				    len);
-			break;
-
-		case SDLA_RET_BUF_TOO_BIG:
-			netdev_info(dev, "Buffer size over specified max of %i\n",
-				    len);
-			break;
-
-		case SDLA_RET_CHANNEL_INACTIVE:
-		case SDLA_RET_DLCI_INACTIVE:
-		case SDLA_RET_CIR_OVERFLOW:
-		case SDLA_RET_NO_BUFS:
-			if (cmd == SDLA_INFORMATION_WRITE)
-				break;
-			fallthrough;
-
-		default: 
-			netdev_dbg(dev, "Cmd 0x%02X generated return code 0x%02X\n",
-				   cmd, ret);
-			/* Further processing could be done here */
-			break;
-	}
-}
-
-static int sdla_cmd(struct net_device *dev, int cmd, short dlci, short flags, 
-                        void *inbuf, short inlen, void *outbuf, short *outlen)
-{
-	static struct _frad_stat status;
-	struct frad_local        *flp;
-	struct sdla_cmd          *cmd_buf;
-	unsigned long            pflags;
-	unsigned long		 jiffs;
-	int                      ret, waiting, len;
-	long                     window;
-
-	flp = netdev_priv(dev);
-	window = flp->type == SDLA_S508 ? SDLA_508_CMD_BUF : SDLA_502_CMD_BUF;
-	cmd_buf = (struct sdla_cmd *)(dev->mem_start + (window & SDLA_ADDR_MASK));
-	ret = 0;
-	len = 0;
-	jiffs = jiffies + HZ;  /* 1 second is plenty */
-
-	spin_lock_irqsave(&sdla_lock, pflags);
-	SDLA_WINDOW(dev, window);
-	cmd_buf->cmd = cmd;
-	cmd_buf->dlci = dlci;
-	cmd_buf->flags = flags;
-
-	if (inbuf)
-		memcpy(cmd_buf->data, inbuf, inlen);
-
-	cmd_buf->length = inlen;
-
-	cmd_buf->opp_flag = 1;
-	spin_unlock_irqrestore(&sdla_lock, pflags);
-
-	waiting = 1;
-	len = 0;
-	while (waiting && time_before_eq(jiffies, jiffs))
-	{
-		if (waiting++ % 3) 
-		{
-			spin_lock_irqsave(&sdla_lock, pflags);
-			SDLA_WINDOW(dev, window);
-			waiting = ((volatile int)(cmd_buf->opp_flag));
-			spin_unlock_irqrestore(&sdla_lock, pflags);
-		}
-	}
-	
-	if (!waiting)
-	{
-
-		spin_lock_irqsave(&sdla_lock, pflags);
-		SDLA_WINDOW(dev, window);
-		ret = cmd_buf->retval;
-		len = cmd_buf->length;
-		if (outbuf && outlen)
-		{
-			*outlen = *outlen >= len ? len : *outlen;
-
-			if (*outlen)
-				memcpy(outbuf, cmd_buf->data, *outlen);
-		}
-
-		/* This is a local copy that's used for error handling */
-		if (ret)
-			memcpy(&status, cmd_buf->data, len > sizeof(status) ? sizeof(status) : len);
-
-		spin_unlock_irqrestore(&sdla_lock, pflags);
-	}
-	else
-		ret = SDLA_RET_TIMEOUT;
-
-	if (ret != SDLA_RET_OK)
-	   	sdla_errors(dev, cmd, dlci, ret, len, &status);
-
-	return ret;
-}
-
-/***********************************************
- *
- * these functions are called by the DLCI driver 
- *
- ***********************************************/
-
-static int sdla_reconfig(struct net_device *dev);
-
-static int sdla_activate(struct net_device *slave, struct net_device *master)
-{
-	struct frad_local *flp;
-	int i;
-
-	flp = netdev_priv(slave);
-
-	for(i=0;i<CONFIG_DLCI_MAX;i++)
-		if (flp->master[i] == master)
-			break;
-
-	if (i == CONFIG_DLCI_MAX)
-		return -ENODEV;
-
-	flp->dlci[i] = abs(flp->dlci[i]);
-
-	if (netif_running(slave) && (flp->config.station == FRAD_STATION_NODE))
-		sdla_cmd(slave, SDLA_ACTIVATE_DLCI, 0, 0, &flp->dlci[i], sizeof(short), NULL, NULL);
-
-	return 0;
-}
-
-static int sdla_deactivate(struct net_device *slave, struct net_device *master)
-{
-	struct frad_local *flp;
-	int               i;
-
-	flp = netdev_priv(slave);
-
-	for(i=0;i<CONFIG_DLCI_MAX;i++)
-		if (flp->master[i] == master)
-			break;
-
-	if (i == CONFIG_DLCI_MAX)
-		return -ENODEV;
-
-	flp->dlci[i] = -abs(flp->dlci[i]);
-
-	if (netif_running(slave) && (flp->config.station == FRAD_STATION_NODE))
-		sdla_cmd(slave, SDLA_DEACTIVATE_DLCI, 0, 0, &flp->dlci[i], sizeof(short), NULL, NULL);
-
-	return 0;
-}
-
-static int sdla_assoc(struct net_device *slave, struct net_device *master)
-{
-	struct frad_local *flp;
-	int               i;
-
-	if (master->type != ARPHRD_DLCI)
-		return -EINVAL;
-
-	flp = netdev_priv(slave);
-
-	for(i=0;i<CONFIG_DLCI_MAX;i++)
-	{
-		if (!flp->master[i])
-			break;
-		if (abs(flp->dlci[i]) == *(short *)(master->dev_addr))
-			return -EADDRINUSE;
-	} 
-
-	if (i == CONFIG_DLCI_MAX)
-		return -EMLINK;  /* #### Alan: Comments on this ?? */
-
-
-	flp->master[i] = master;
-	flp->dlci[i] = -*(short *)(master->dev_addr);
-	master->mtu = slave->mtu;
-
-	if (netif_running(slave)) {
-		if (flp->config.station == FRAD_STATION_CPE)
-			sdla_reconfig(slave);
-		else
-			sdla_cmd(slave, SDLA_ADD_DLCI, 0, 0, master->dev_addr, sizeof(short), NULL, NULL);
-	}
-
-	return 0;
-}
-
-static int sdla_deassoc(struct net_device *slave, struct net_device *master)
-{
-	struct frad_local *flp;
-	int               i;
-
-	flp = netdev_priv(slave);
-
-	for(i=0;i<CONFIG_DLCI_MAX;i++)
-		if (flp->master[i] == master)
-			break;
-
-	if (i == CONFIG_DLCI_MAX)
-		return -ENODEV;
-
-	flp->master[i] = NULL;
-	flp->dlci[i] = 0;
-
-
-	if (netif_running(slave)) {
-		if (flp->config.station == FRAD_STATION_CPE)
-			sdla_reconfig(slave);
-		else
-			sdla_cmd(slave, SDLA_DELETE_DLCI, 0, 0, master->dev_addr, sizeof(short), NULL, NULL);
-	}
-
-	return 0;
-}
-
-static int sdla_dlci_conf(struct net_device *slave, struct net_device *master, int get)
-{
-	struct frad_local *flp;
-	struct dlci_local *dlp;
-	int               i;
-	short             len, ret;
-
-	flp = netdev_priv(slave);
-
-	for(i=0;i<CONFIG_DLCI_MAX;i++)
-		if (flp->master[i] == master)
-			break;
-
-	if (i == CONFIG_DLCI_MAX)
-		return -ENODEV;
-
-	dlp = netdev_priv(master);
-
-	ret = SDLA_RET_OK;
-	len = sizeof(struct dlci_conf);
-	if (netif_running(slave)) {
-		if (get)
-			ret = sdla_cmd(slave, SDLA_READ_DLCI_CONFIGURATION, abs(flp->dlci[i]), 0,  
-			            NULL, 0, &dlp->config, &len);
-		else
-			ret = sdla_cmd(slave, SDLA_SET_DLCI_CONFIGURATION, abs(flp->dlci[i]), 0,  
-			            &dlp->config, sizeof(struct dlci_conf) - 4 * sizeof(short), NULL, NULL);
-	}
-
-	return ret == SDLA_RET_OK ? 0 : -EIO;
-}
-
-/**************************
- *
- * now for the Linux driver 
- *
- **************************/
-
-/* NOTE: the DLCI driver deals with freeing the SKB!! */
-static netdev_tx_t sdla_transmit(struct sk_buff *skb,
-				 struct net_device *dev)
-{
-	struct frad_local *flp;
-	int               ret, addr, accept, i;
-	short             size;
-	unsigned long     flags;
-	struct buf_entry  *pbuf;
-
-	flp = netdev_priv(dev);
-	ret = 0;
-	accept = 1;
-
-	netif_stop_queue(dev);
-
-	/*
-	 * stupid GateD insists on setting up the multicast router thru us
-	 * and we're ill equipped to handle a non Frame Relay packet at this
-	 * time!
-	 */
-
-	accept = 1;
-	switch (dev->type)
-	{
-		case ARPHRD_FRAD:
-			if (skb->dev->type != ARPHRD_DLCI)
-			{
-				netdev_warn(dev, "Non DLCI device, type %i, tried to send on FRAD module\n",
-					    skb->dev->type);
-				accept = 0;
-			}
-			break;
-		default:
-			netdev_warn(dev, "unknown firmware type 0x%04X\n",
-				    dev->type);
-			accept = 0;
-			break;
-	}
-	if (accept)
-	{
-		/* this is frame specific, but till there's a PPP module, it's the default */
-		switch (flp->type)
-		{
-			case SDLA_S502A:
-			case SDLA_S502E:
-				ret = sdla_cmd(dev, SDLA_INFORMATION_WRITE, *(short *)(skb->dev->dev_addr), 0, skb->data, skb->len, NULL, NULL);
-				break;
-				case SDLA_S508:
-				size = sizeof(addr);
-				ret = sdla_cmd(dev, SDLA_INFORMATION_WRITE, *(short *)(skb->dev->dev_addr), 0, NULL, skb->len, &addr, &size);
-				if (ret == SDLA_RET_OK)
-				{
-
-					spin_lock_irqsave(&sdla_lock, flags);
-					SDLA_WINDOW(dev, addr);
-					pbuf = (void *)(dev->mem_start + (addr & SDLA_ADDR_MASK));
-					__sdla_write(dev, pbuf->buf_addr, skb->data, skb->len);
-					SDLA_WINDOW(dev, addr);
-					pbuf->opp_flag = 1;
-					spin_unlock_irqrestore(&sdla_lock, flags);
-				}
-				break;
-		}
-
-		switch (ret)
-		{
-			case SDLA_RET_OK:
-				dev->stats.tx_packets++;
-				break;
-
-			case SDLA_RET_CIR_OVERFLOW:
-			case SDLA_RET_BUF_OVERSIZE:
-			case SDLA_RET_NO_BUFS:
-				dev->stats.tx_dropped++;
-				break;
-
-			default:
-				dev->stats.tx_errors++;
-				break;
-		}
-	}
-	netif_wake_queue(dev);
-	for(i=0;i<CONFIG_DLCI_MAX;i++)
-	{
-		if(flp->master[i]!=NULL)
-			netif_wake_queue(flp->master[i]);
-	}		
-
-	dev_kfree_skb(skb);
-	return NETDEV_TX_OK;
-}
-
-static void sdla_receive(struct net_device *dev)
-{
-	struct net_device	  *master;
-	struct frad_local *flp;
-	struct dlci_local *dlp;
-	struct sk_buff	 *skb;
-
-	struct sdla_cmd	*cmd;
-	struct buf_info	*pbufi;
-	struct buf_entry  *pbuf;
-
-	unsigned long	  flags;
-	int               i=0, received, success, addr, buf_base, buf_top;
-	short             dlci, len, len2, split;
-
-	flp = netdev_priv(dev);
-	success = 1;
-	received = addr = buf_top = buf_base = 0;
-	len = dlci = 0;
-	skb = NULL;
-	master = NULL;
-	cmd = NULL;
-	pbufi = NULL;
-	pbuf = NULL;
-
-	spin_lock_irqsave(&sdla_lock, flags);
-
-	switch (flp->type)
-	{
-		case SDLA_S502A:
-		case SDLA_S502E:
-			cmd = (void *) (dev->mem_start + (SDLA_502_RCV_BUF & SDLA_ADDR_MASK));
-			SDLA_WINDOW(dev, SDLA_502_RCV_BUF);
-			success = cmd->opp_flag;
-			if (!success)
-				break;
-
-			dlci = cmd->dlci;
-			len = cmd->length;
-			break;
-
-		case SDLA_S508:
-			pbufi = (void *) (dev->mem_start + (SDLA_508_RXBUF_INFO & SDLA_ADDR_MASK));
-			SDLA_WINDOW(dev, SDLA_508_RXBUF_INFO);
-			pbuf = (void *) (dev->mem_start + ((pbufi->rse_base + flp->buffer * sizeof(struct buf_entry)) & SDLA_ADDR_MASK));
-			success = pbuf->opp_flag;
-			if (!success)
-				break;
-
-			buf_top = pbufi->buf_top;
-			buf_base = pbufi->buf_base;
-			dlci = pbuf->dlci;
-			len = pbuf->length;
-			addr = pbuf->buf_addr;
-			break;
-	}
-
-	/* common code, find the DLCI and get the SKB */
-	if (success)
-	{
-		for (i=0;i<CONFIG_DLCI_MAX;i++)
-			if (flp->dlci[i] == dlci)
-				break;
-
-		if (i == CONFIG_DLCI_MAX)
-		{
-			netdev_notice(dev, "Received packet from invalid DLCI %i, ignoring\n",
-				      dlci);
-			dev->stats.rx_errors++;
-			success = 0;
-		}
-	}
-
-	if (success)
-	{
-		master = flp->master[i];
-		skb = dev_alloc_skb(len + sizeof(struct frhdr));
-		if (skb == NULL) 
-		{
-			netdev_notice(dev, "Memory squeeze, dropping packet\n");
-			dev->stats.rx_dropped++;
-			success = 0;
-		}
-		else
-			skb_reserve(skb, sizeof(struct frhdr));
-	}
-
-	/* pick up the data */
-	switch (flp->type)
-	{
-		case SDLA_S502A:
-		case SDLA_S502E:
-			if (success)
-				__sdla_read(dev, SDLA_502_RCV_BUF + SDLA_502_DATA_OFS, skb_put(skb,len), len);
-
-			SDLA_WINDOW(dev, SDLA_502_RCV_BUF);
-			cmd->opp_flag = 0;
-			break;
-
-		case SDLA_S508:
-			if (success)
-			{
-				/* is this buffer split off the end of the internal ring buffer */
-				split = addr + len > buf_top + 1 ? len - (buf_top - addr + 1) : 0;
-				len2 = len - split;
-
-				__sdla_read(dev, addr, skb_put(skb, len2), len2);
-				if (split)
-					__sdla_read(dev, buf_base, skb_put(skb, split), split);
-			}
-
-			/* increment the buffer we're looking at */
-			SDLA_WINDOW(dev, SDLA_508_RXBUF_INFO);
-			flp->buffer = (flp->buffer + 1) % pbufi->rse_num;
-			pbuf->opp_flag = 0;
-			break;
-	}
-
-	if (success)
-	{
-		dev->stats.rx_packets++;
-		dlp = netdev_priv(master);
-		(*dlp->receive)(skb, master);
-	}
-
-	spin_unlock_irqrestore(&sdla_lock, flags);
-}
-
-static irqreturn_t sdla_isr(int dummy, void *dev_id)
-{
-	struct net_device     *dev;
-	struct frad_local *flp;
-	char              byte;
-
-	dev = dev_id;
-
-	flp = netdev_priv(dev);
-
-	if (!flp->initialized)
-	{
-		netdev_warn(dev, "irq %d for uninitialized device\n", dev->irq);
-		return IRQ_NONE;
-	}
-
-	byte = sdla_byte(dev, flp->type == SDLA_S508 ? SDLA_508_IRQ_INTERFACE : SDLA_502_IRQ_INTERFACE);
-	switch (byte)
-	{
-		case SDLA_INTR_RX:
-			sdla_receive(dev);
-			break;
-
-		/* the command will get an error return, which is processed above */
-		case SDLA_INTR_MODEM:
-		case SDLA_INTR_STATUS:
-			sdla_cmd(dev, SDLA_READ_DLC_STATUS, 0, 0, NULL, 0, NULL, NULL);
-			break;
-
-		case SDLA_INTR_TX:
-		case SDLA_INTR_COMPLETE:
-		case SDLA_INTR_TIMER:
-			netdev_warn(dev, "invalid irq flag 0x%02X\n", byte);
-			break;
-	}
-
-	/* the S502E requires a manual acknowledgement of the interrupt */ 
-	if (flp->type == SDLA_S502E)
-	{
-		flp->state &= ~SDLA_S502E_INTACK;
-		outb(flp->state, dev->base_addr + SDLA_REG_CONTROL);
-		flp->state |= SDLA_S502E_INTACK;
-		outb(flp->state, dev->base_addr + SDLA_REG_CONTROL);
-	}
-
-	/* this clears the byte, informing the Z80 we're done */
-	byte = 0;
-	sdla_write(dev, flp->type == SDLA_S508 ? SDLA_508_IRQ_INTERFACE : SDLA_502_IRQ_INTERFACE, &byte, sizeof(byte));
-	return IRQ_HANDLED;
-}
-
-static void sdla_poll(struct timer_list *t)
-{
-	struct frad_local *flp = from_timer(flp, t, timer);
-	struct net_device *dev = flp->dev;
-
-	if (sdla_byte(dev, SDLA_502_RCV_BUF))
-		sdla_receive(dev);
-
-	flp->timer.expires = 1;
-	add_timer(&flp->timer);
-}
-
-static int sdla_close(struct net_device *dev)
-{
-	struct frad_local *flp;
-	struct intr_info  intr;
-	int               len, i;
-	short             dlcis[CONFIG_DLCI_MAX];
-
-	flp = netdev_priv(dev);
-
-	len = 0;
-	for(i=0;i<CONFIG_DLCI_MAX;i++)
-		if (flp->dlci[i])
-			dlcis[len++] = abs(flp->dlci[i]);
-	len *= 2;
-
-	if (flp->config.station == FRAD_STATION_NODE)
-	{
-		for(i=0;i<CONFIG_DLCI_MAX;i++)
-			if (flp->dlci[i] > 0) 
-				sdla_cmd(dev, SDLA_DEACTIVATE_DLCI, 0, 0, dlcis, len, NULL, NULL);
-		sdla_cmd(dev, SDLA_DELETE_DLCI, 0, 0, &flp->dlci[i], sizeof(flp->dlci[i]), NULL, NULL);
-	}
-
-	memset(&intr, 0, sizeof(intr));
-	/* let's start up the reception */
-	switch(flp->type)
-	{
-		case SDLA_S502A:
-			del_timer(&flp->timer); 
-			break;
-
-		case SDLA_S502E:
-			sdla_cmd(dev, SDLA_SET_IRQ_TRIGGER, 0, 0, &intr, sizeof(char) + sizeof(short), NULL, NULL);
-			flp->state &= ~SDLA_S502E_INTACK;
-			outb(flp->state, dev->base_addr + SDLA_REG_CONTROL);
-			break;
-
-		case SDLA_S507:
-			break;
-
-		case SDLA_S508:
-			sdla_cmd(dev, SDLA_SET_IRQ_TRIGGER, 0, 0, &intr, sizeof(struct intr_info), NULL, NULL);
-			flp->state &= ~SDLA_S508_INTEN;
-			outb(flp->state, dev->base_addr + SDLA_REG_CONTROL);
-			break;
-	}
-
-	sdla_cmd(dev, SDLA_DISABLE_COMMUNICATIONS, 0, 0, NULL, 0, NULL, NULL);
-
-	netif_stop_queue(dev);
-	
-	return 0;
-}
-
-struct conf_data {
-	struct frad_conf config;
-	short            dlci[CONFIG_DLCI_MAX];
-};
-
-static int sdla_open(struct net_device *dev)
-{
-	struct frad_local *flp;
-	struct dlci_local *dlp;
-	struct conf_data  data;
-	struct intr_info  intr;
-	int               len, i;
-	char              byte;
-
-	flp = netdev_priv(dev);
-
-	if (!flp->initialized)
-		return -EPERM;
-
-	if (!flp->configured)
-		return -EPERM;
-
-	/* time to send in the configuration */
-	len = 0;
-	for(i=0;i<CONFIG_DLCI_MAX;i++)
-		if (flp->dlci[i])
-			data.dlci[len++] = abs(flp->dlci[i]);
-	len *= 2;
-
-	memcpy(&data.config, &flp->config, sizeof(struct frad_conf));
-	len += sizeof(struct frad_conf);
-
-	sdla_cmd(dev, SDLA_DISABLE_COMMUNICATIONS, 0, 0, NULL, 0, NULL, NULL);
-	sdla_cmd(dev, SDLA_SET_DLCI_CONFIGURATION, 0, 0, &data, len, NULL, NULL);
-
-	if (flp->type == SDLA_S508)
-		flp->buffer = 0;
-
-	sdla_cmd(dev, SDLA_ENABLE_COMMUNICATIONS, 0, 0, NULL, 0, NULL, NULL);
-
-	/* let's start up the reception */
-	memset(&intr, 0, sizeof(intr));
-	switch(flp->type)
-	{
-		case SDLA_S502A:
-			flp->timer.expires = 1;
-			add_timer(&flp->timer);
-			break;
-
-		case SDLA_S502E:
-			flp->state |= SDLA_S502E_ENABLE;
-			outb(flp->state, dev->base_addr + SDLA_REG_CONTROL);
-			flp->state |= SDLA_S502E_INTACK;
-			outb(flp->state, dev->base_addr + SDLA_REG_CONTROL);
-			byte = 0;
-			sdla_write(dev, SDLA_502_IRQ_INTERFACE, &byte, sizeof(byte));
-			intr.flags = SDLA_INTR_RX | SDLA_INTR_STATUS | SDLA_INTR_MODEM;
-			sdla_cmd(dev, SDLA_SET_IRQ_TRIGGER, 0, 0, &intr, sizeof(char) + sizeof(short), NULL, NULL);
-			break;
-
-		case SDLA_S507:
-			break;
-
-		case SDLA_S508:
-			flp->state |= SDLA_S508_INTEN;
-			outb(flp->state, dev->base_addr + SDLA_REG_CONTROL);
-			byte = 0;
-			sdla_write(dev, SDLA_508_IRQ_INTERFACE, &byte, sizeof(byte));
-			intr.flags = SDLA_INTR_RX | SDLA_INTR_STATUS | SDLA_INTR_MODEM;
-			intr.irq = dev->irq;
-			sdla_cmd(dev, SDLA_SET_IRQ_TRIGGER, 0, 0, &intr, sizeof(struct intr_info), NULL, NULL);
-			break;
-	}
-
-	if (flp->config.station == FRAD_STATION_CPE)
-	{
-		byte = SDLA_ICS_STATUS_ENQ;
-		sdla_cmd(dev, SDLA_ISSUE_IN_CHANNEL_SIGNAL, 0, 0, &byte, sizeof(byte), NULL, NULL);
-	}
-	else
-	{
-		sdla_cmd(dev, SDLA_ADD_DLCI, 0, 0, data.dlci, len - sizeof(struct frad_conf), NULL, NULL);
-		for(i=0;i<CONFIG_DLCI_MAX;i++)
-			if (flp->dlci[i] > 0)
-				sdla_cmd(dev, SDLA_ACTIVATE_DLCI, 0, 0, &flp->dlci[i], 2*sizeof(flp->dlci[i]), NULL, NULL);
-	}
-
-	/* configure any specific DLCI settings */
-	for(i=0;i<CONFIG_DLCI_MAX;i++)
-		if (flp->dlci[i])
-		{
-			dlp = netdev_priv(flp->master[i]);
-			if (dlp->configured)
-				sdla_cmd(dev, SDLA_SET_DLCI_CONFIGURATION, abs(flp->dlci[i]), 0, &dlp->config, sizeof(struct dlci_conf), NULL, NULL);
-		}
-
-	netif_start_queue(dev);
-	
-	return 0;
-}
-
-static int sdla_config(struct net_device *dev, struct frad_conf __user *conf, int get)
-{
-	struct frad_local *flp;
-	struct conf_data  data;
-	int               i;
-	short             size;
-
-	if (dev->type == 0xFFFF)
-		return -EUNATCH;
-
-	flp = netdev_priv(dev);
-
-	if (!get)
-	{
-		if (netif_running(dev))
-			return -EBUSY;
-
-		if(copy_from_user(&data.config, conf, sizeof(struct frad_conf)))
-			return -EFAULT;
-
-		if (data.config.station & ~FRAD_STATION_NODE)
-			return -EINVAL;
-
-		if (data.config.flags & ~FRAD_VALID_FLAGS)
-			return -EINVAL;
-
-		if ((data.config.kbaud < 0) || 
-			 ((data.config.kbaud > 128) && (flp->type != SDLA_S508)))
-			return -EINVAL;
-
-		if (data.config.clocking & ~(FRAD_CLOCK_INT | SDLA_S508_PORT_RS232))
-			return -EINVAL;
-
-		if ((data.config.mtu < 0) || (data.config.mtu > SDLA_MAX_MTU))
-			return -EINVAL;
-
-		if ((data.config.T391 < 5) || (data.config.T391 > 30))
-			return -EINVAL;
-
-		if ((data.config.T392 < 5) || (data.config.T392 > 30))
-			return -EINVAL;
-
-		if ((data.config.N391 < 1) || (data.config.N391 > 255))
-			return -EINVAL;
-
-		if ((data.config.N392 < 1) || (data.config.N392 > 10))
-			return -EINVAL;
-
-		if ((data.config.N393 < 1) || (data.config.N393 > 10))
-			return -EINVAL;
-
-		memcpy(&flp->config, &data.config, sizeof(struct frad_conf));
-		flp->config.flags |= SDLA_DIRECT_RECV;
-
-		if (flp->type == SDLA_S508)
-			flp->config.flags |= SDLA_TX70_RX30;
-
-		if (dev->mtu != flp->config.mtu)
-		{
-			/* this is required to change the MTU */
-			dev->mtu = flp->config.mtu;
-			for(i=0;i<CONFIG_DLCI_MAX;i++)
-				if (flp->master[i])
-					flp->master[i]->mtu = flp->config.mtu;
-		}
-
-		flp->config.mtu += sizeof(struct frhdr);
-
-		/* off to the races! */
-		if (!flp->configured)
-			sdla_start(dev);
-
-		flp->configured = 1;
-	}
-	else
-	{
-		/* no sense reading if the CPU isn't started */
-		if (netif_running(dev))
-		{
-			size = sizeof(data);
-			if (sdla_cmd(dev, SDLA_READ_DLCI_CONFIGURATION, 0, 0, NULL, 0, &data, &size) != SDLA_RET_OK)
-				return -EIO;
-		}
-		else
-			if (flp->configured)
-				memcpy(&data.config, &flp->config, sizeof(struct frad_conf));
-			else
-				memset(&data.config, 0, sizeof(struct frad_conf));
-
-		memcpy(&flp->config, &data.config, sizeof(struct frad_conf));
-		data.config.flags &= FRAD_VALID_FLAGS;
-		data.config.mtu -= data.config.mtu > sizeof(struct frhdr) ? sizeof(struct frhdr) : data.config.mtu;
-		return copy_to_user(conf, &data.config, sizeof(struct frad_conf))?-EFAULT:0;
-	}
-
-	return 0;
-}
-
-static int sdla_xfer(struct net_device *dev, struct sdla_mem __user *info, int read)
-{
-	struct sdla_mem mem;
-	char	*temp;
-
-	if(copy_from_user(&mem, info, sizeof(mem)))
-		return -EFAULT;
-		
-	if (read)
-	{	
-		temp = kzalloc(mem.len, GFP_KERNEL);
-		if (!temp)
-			return -ENOMEM;
-		sdla_read(dev, mem.addr, temp, mem.len);
-		if(copy_to_user(mem.data, temp, mem.len))
-		{
-			kfree(temp);
-			return -EFAULT;
-		}
-		kfree(temp);
-	}
-	else
-	{
-		temp = memdup_user(mem.data, mem.len);
-		if (IS_ERR(temp))
-			return PTR_ERR(temp);
-		sdla_write(dev, mem.addr, temp, mem.len);
-		kfree(temp);
-	}
-	return 0;
-}
-
-static int sdla_reconfig(struct net_device *dev)
-{
-	struct frad_local *flp;
-	struct conf_data  data;
-	int               i, len;
-
-	flp = netdev_priv(dev);
-
-	len = 0;
-	for(i=0;i<CONFIG_DLCI_MAX;i++)
-		if (flp->dlci[i])
-			data.dlci[len++] = flp->dlci[i];
-	len *= 2;
-
-	memcpy(&data, &flp->config, sizeof(struct frad_conf));
-	len += sizeof(struct frad_conf);
-
-	sdla_cmd(dev, SDLA_DISABLE_COMMUNICATIONS, 0, 0, NULL, 0, NULL, NULL);
-	sdla_cmd(dev, SDLA_SET_DLCI_CONFIGURATION, 0, 0, &data, len, NULL, NULL);
-	sdla_cmd(dev, SDLA_ENABLE_COMMUNICATIONS, 0, 0, NULL, 0, NULL, NULL);
-
-	return 0;
-}
-
-static int sdla_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
-{
-	struct frad_local *flp;
-
-	if(!capable(CAP_NET_ADMIN))
-		return -EPERM;
-		
-	flp = netdev_priv(dev);
-
-	if (!flp->initialized)
-		return -EINVAL;
-
-	switch (cmd)
-	{
-		case FRAD_GET_CONF:
-		case FRAD_SET_CONF:
-			return sdla_config(dev, ifr->ifr_data, cmd == FRAD_GET_CONF);
-
-		case SDLA_IDENTIFY:
-			ifr->ifr_flags = flp->type;
-			break;
-
-		case SDLA_CPUSPEED:
-			return sdla_cpuspeed(dev, ifr);
-
-/* ==========================================================
-NOTE:  This is rather a useless action right now, as the
-       current driver does not support protocols other than
-       FR.  However, Sangoma has modules for a number of
-       other protocols in the works.
-============================================================*/
-		case SDLA_PROTOCOL:
-			if (flp->configured)
-				return -EALREADY;
-
-			switch (ifr->ifr_flags)
-			{
-				case ARPHRD_FRAD:
-					dev->type = ifr->ifr_flags;
-					break;
-				default:
-					return -ENOPROTOOPT;
-			}
-			break;
-
-		case SDLA_CLEARMEM:
-			sdla_clear(dev);
-			break;
-
-		case SDLA_WRITEMEM:
-		case SDLA_READMEM:
-			if(!capable(CAP_SYS_RAWIO))
-				return -EPERM;
-			return sdla_xfer(dev, ifr->ifr_data, cmd == SDLA_READMEM);
-
-		case SDLA_START:
-			sdla_start(dev);
-			break;
-
-		case SDLA_STOP:
-			sdla_stop(dev);
-			break;
-
-		default:
-			return -EOPNOTSUPP;
-	}
-	return 0;
-}
-
-static int sdla_change_mtu(struct net_device *dev, int new_mtu)
-{
-	if (netif_running(dev))
-		return -EBUSY;
-
-	/* for now, you can't change the MTU! */
-	return -EOPNOTSUPP;
-}
-
-static int sdla_set_config(struct net_device *dev, struct ifmap *map)
-{
-	struct frad_local *flp;
-	int               i;
-	char              byte;
-	unsigned base;
-	int err = -EINVAL;
-
-	flp = netdev_priv(dev);
-
-	if (flp->initialized)
-		return -EINVAL;
-
-	for(i=0; i < ARRAY_SIZE(valid_port); i++)
-		if (valid_port[i] == map->base_addr)
-			break;   
-
-	if (i == ARRAY_SIZE(valid_port))
-		return -EINVAL;
-
-	if (!request_region(map->base_addr, SDLA_IO_EXTENTS, dev->name)){
-		pr_warn("io-port 0x%04lx in use\n", dev->base_addr);
-		return -EINVAL;
-	}
-	base = map->base_addr;
-
-	/* test for card types, S502A, S502E, S507, S508                 */
-	/* these tests shut down the card completely, so clear the state */
-	flp->type = SDLA_UNKNOWN;
-	flp->state = 0;
-   
-	for(i=1;i<SDLA_IO_EXTENTS;i++)
-		if (inb(base + i) != 0xFF)
-			break;
-
-	if (i == SDLA_IO_EXTENTS) {   
-		outb(SDLA_HALT, base + SDLA_REG_Z80_CONTROL);
-		if ((inb(base + SDLA_S502_STS) & 0x0F) == 0x08) {
-			outb(SDLA_S502E_INTACK, base + SDLA_REG_CONTROL);
-			if ((inb(base + SDLA_S502_STS) & 0x0F) == 0x0C) {
-				outb(SDLA_HALT, base + SDLA_REG_CONTROL);
-				flp->type = SDLA_S502E;
-				goto got_type;
-			}
-		}
-	}
-
-	for(byte=inb(base),i=0;i<SDLA_IO_EXTENTS;i++)
-		if (inb(base + i) != byte)
-			break;
-
-	if (i == SDLA_IO_EXTENTS) {
-		outb(SDLA_HALT, base + SDLA_REG_CONTROL);
-		if ((inb(base + SDLA_S502_STS) & 0x7E) == 0x30) {
-			outb(SDLA_S507_ENABLE, base + SDLA_REG_CONTROL);
-			if ((inb(base + SDLA_S502_STS) & 0x7E) == 0x32) {
-				outb(SDLA_HALT, base + SDLA_REG_CONTROL);
-				flp->type = SDLA_S507;
-				goto got_type;
-			}
-		}
-	}
-
-	outb(SDLA_HALT, base + SDLA_REG_CONTROL);
-	if ((inb(base + SDLA_S508_STS) & 0x3F) == 0x00) {
-		outb(SDLA_S508_INTEN, base + SDLA_REG_CONTROL);
-		if ((inb(base + SDLA_S508_STS) & 0x3F) == 0x10) {
-			outb(SDLA_HALT, base + SDLA_REG_CONTROL);
-			flp->type = SDLA_S508;
-			goto got_type;
-		}
-	}
-
-	outb(SDLA_S502A_HALT, base + SDLA_REG_CONTROL);
-	if (inb(base + SDLA_S502_STS) == 0x40) {
-		outb(SDLA_S502A_START, base + SDLA_REG_CONTROL);
-		if (inb(base + SDLA_S502_STS) == 0x40) {
-			outb(SDLA_S502A_INTEN, base + SDLA_REG_CONTROL);
-			if (inb(base + SDLA_S502_STS) == 0x44) {
-				outb(SDLA_S502A_START, base + SDLA_REG_CONTROL);
-				flp->type = SDLA_S502A;
-				goto got_type;
-			}
-		}
-	}
-
-	netdev_notice(dev, "Unknown card type\n");
-	err = -ENODEV;
-	goto fail;
-
-got_type:
-	switch(base) {
-		case 0x270:
-		case 0x280:
-		case 0x380: 
-		case 0x390:
-			if (flp->type != SDLA_S508 && flp->type != SDLA_S507)
-				goto fail;
-	}
-
-	switch (map->irq) {
-		case 2:
-			if (flp->type != SDLA_S502E)
-				goto fail;
-			break;
-
-		case 10:
-		case 11:
-		case 12:
-		case 15:
-		case 4:
-			if (flp->type != SDLA_S508 && flp->type != SDLA_S507)
-				goto fail;
-			break;
-		case 3:
-		case 5:
-		case 7:
-			if (flp->type == SDLA_S502A)
-				goto fail;
-			break;
-
-		default:
-			goto fail;
-	}
-
-	err = -EAGAIN;
-	if (request_irq(dev->irq, sdla_isr, 0, dev->name, dev)) 
-		goto fail;
-
-	if (flp->type == SDLA_S507) {
-		switch(dev->irq) {
-			case 3:
-				flp->state = SDLA_S507_IRQ3;
-				break;
-			case 4:
-				flp->state = SDLA_S507_IRQ4;
-				break;
-			case 5:
-				flp->state = SDLA_S507_IRQ5;
-				break;
-			case 7:
-				flp->state = SDLA_S507_IRQ7;
-				break;
-			case 10:
-				flp->state = SDLA_S507_IRQ10;
-				break;
-			case 11:
-				flp->state = SDLA_S507_IRQ11;
-				break;
-			case 12:
-				flp->state = SDLA_S507_IRQ12;
-				break;
-			case 15:
-				flp->state = SDLA_S507_IRQ15;
-				break;
-		}
-	}
-
-	for(i=0; i < ARRAY_SIZE(valid_mem); i++)
-		if (valid_mem[i] == map->mem_start)
-			break;   
-
-	err = -EINVAL;
-	if (i == ARRAY_SIZE(valid_mem))
-		goto fail2;
-
-	if (flp->type == SDLA_S502A && (map->mem_start & 0xF000) >> 12 == 0x0E)
-		goto fail2;
-
-	if (flp->type != SDLA_S507 && map->mem_start >> 16 == 0x0B)
-		goto fail2;
-
-	if (flp->type == SDLA_S507 && map->mem_start >> 16 == 0x0D)
-		goto fail2;
-
-	byte = flp->type != SDLA_S508 ? SDLA_8K_WINDOW : 0;
-	byte |= (map->mem_start & 0xF000) >> (12 + (flp->type == SDLA_S508 ? 1 : 0));
-	switch(flp->type) {
-		case SDLA_S502A:
-		case SDLA_S502E:
-			switch (map->mem_start >> 16) {
-				case 0x0A:
-					byte |= SDLA_S502_SEG_A;
-					break;
-				case 0x0C:
-					byte |= SDLA_S502_SEG_C;
-					break;
-				case 0x0D:
-					byte |= SDLA_S502_SEG_D;
-					break;
-				case 0x0E:
-					byte |= SDLA_S502_SEG_E;
-					break;
-			}
-			break;
-		case SDLA_S507:
-			switch (map->mem_start >> 16) {
-				case 0x0A:
-					byte |= SDLA_S507_SEG_A;
-					break;
-				case 0x0B:
-					byte |= SDLA_S507_SEG_B;
-					break;
-				case 0x0C:
-					byte |= SDLA_S507_SEG_C;
-					break;
-				case 0x0E:
-					byte |= SDLA_S507_SEG_E;
-					break;
-			}
-			break;
-		case SDLA_S508:
-			switch (map->mem_start >> 16) {
-				case 0x0A:
-					byte |= SDLA_S508_SEG_A;
-					break;
-				case 0x0C:
-					byte |= SDLA_S508_SEG_C;
-					break;
-				case 0x0D:
-					byte |= SDLA_S508_SEG_D;
-					break;
-				case 0x0E:
-					byte |= SDLA_S508_SEG_E;
-					break;
-			}
-			break;
-	}
-
-	/* set the memory bits, and enable access */
-	outb(byte, base + SDLA_REG_PC_WINDOW);
-
-	switch(flp->type)
-	{
-		case SDLA_S502E:
-			flp->state = SDLA_S502E_ENABLE;
-			break;
-		case SDLA_S507:
-			flp->state |= SDLA_MEMEN;
-			break;
-		case SDLA_S508:
-			flp->state = SDLA_MEMEN;
-			break;
-	}
-	outb(flp->state, base + SDLA_REG_CONTROL);
-
-	dev->irq = map->irq;
-	dev->base_addr = base;
-	dev->mem_start = map->mem_start;
-	dev->mem_end = dev->mem_start + 0x2000;
-	flp->initialized = 1;
-	return 0;
-
-fail2:
-	free_irq(map->irq, dev);
-fail:
-	release_region(base, SDLA_IO_EXTENTS);
-	return err;
-}
- 
-static const struct net_device_ops sdla_netdev_ops = {
-	.ndo_open	= sdla_open,
-	.ndo_stop	= sdla_close,
-	.ndo_do_ioctl	= sdla_ioctl,
-	.ndo_set_config	= sdla_set_config,
-	.ndo_start_xmit	= sdla_transmit,
-	.ndo_change_mtu	= sdla_change_mtu,
-};
-
-static void setup_sdla(struct net_device *dev)
-{
-	struct frad_local *flp = netdev_priv(dev);
-
-	netdev_boot_setup_check(dev);
-
-	dev->netdev_ops		= &sdla_netdev_ops;
-	dev->flags		= 0;
-	dev->type		= 0xFFFF;
-	dev->hard_header_len	= 0;
-	dev->addr_len		= 0;
-	dev->mtu		= SDLA_MAX_MTU;
-
-	flp->activate		= sdla_activate;
-	flp->deactivate		= sdla_deactivate;
-	flp->assoc		= sdla_assoc;
-	flp->deassoc		= sdla_deassoc;
-	flp->dlci_conf		= sdla_dlci_conf;
-	flp->dev		= dev;
-
-	timer_setup(&flp->timer, sdla_poll, 0);
-	flp->timer.expires	= 1;
-}
-
-static struct net_device *sdla;
-
-static int __init init_sdla(void)
-{
-	int err;
-
-	printk("%s.\n", version);
-
-	sdla = alloc_netdev(sizeof(struct frad_local), "sdla0",
-			    NET_NAME_UNKNOWN, setup_sdla);
-	if (!sdla) 
-		return -ENOMEM;
-
-	err = register_netdev(sdla);
-	if (err) 
-		free_netdev(sdla);
-
-	return err;
-}
-
-static void __exit exit_sdla(void)
-{
-	struct frad_local *flp = netdev_priv(sdla);
-
-	unregister_netdev(sdla);
-	if (flp->initialized) {
-		free_irq(sdla->irq, sdla);
-		release_region(sdla->base_addr, SDLA_IO_EXTENTS);
-	}
-	del_timer_sync(&flp->timer);
-	free_netdev(sdla);
-}
-
-MODULE_LICENSE("GPL");
-
-module_init(init_sdla);
-module_exit(exit_sdla);
diff --git a/include/linux/if_frad.h b/include/linux/if_frad.h
deleted file mode 100644
index 52224de798aa..000000000000
--- a/include/linux/if_frad.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * DLCI/FRAD	Definitions for Frame Relay Access Devices.  DLCI devices are
- *		created for each DLCI associated with a FRAD.  The FRAD driver
- *		is not truly a network device, but the lower level device
- *		handler.  This allows other FRAD manufacturers to use the DLCI
- *		code, including its RFC1490 encapsulation alongside the current
- *		implementation for the Sangoma cards.
- *
- * Version:	@(#)if_ifrad.h	0.15	31 Mar 96
- *
- * Author:	Mike McLagan <mike.mclagan@linux.org>
- *
- * Changes:
- *		0.15	Mike McLagan	changed structure defs (packed)
- *					re-arranged flags
- *					added DLCI_RET vars
- */
-#ifndef _FRAD_H_
-#define _FRAD_H_
-
-#include <uapi/linux/if_frad.h>
-
-
-#if defined(CONFIG_DLCI) || defined(CONFIG_DLCI_MODULE)
-
-/* these are the fields of an RFC 1490 header */
-struct frhdr
-{
-   unsigned char  control;
-
-   /* for IP packets, this can be the NLPID */
-   unsigned char  pad;
-
-   unsigned char  NLPID;
-   unsigned char  OUI[3];
-   __be16 PID;
-
-#define IP_NLPID pad 
-} __packed;
-
-/* see RFC 1490 for the definition of the following */
-#define FRAD_I_UI		0x03
-
-#define FRAD_P_PADDING		0x00
-#define FRAD_P_Q933		0x08
-#define FRAD_P_SNAP		0x80
-#define FRAD_P_CLNP		0x81
-#define FRAD_P_IP		0xCC
-
-struct dlci_local
-{
-   struct net_device      *master;
-   struct net_device      *slave;
-   struct dlci_conf       config;
-   int                    configured;
-   struct list_head	  list;
-
-   /* callback function */
-   void              (*receive)(struct sk_buff *skb, struct net_device *);
-};
-
-struct frad_local
-{
-   /* devices which this FRAD is slaved to */
-   struct net_device     *master[CONFIG_DLCI_MAX];
-   short             dlci[CONFIG_DLCI_MAX];
-
-   struct frad_conf  config;
-   int               configured;	/* has this device been configured */
-   int               initialized;	/* mem_start, port, irq set ? */
-
-   /* callback functions */
-   int               (*activate)(struct net_device *, struct net_device *);
-   int               (*deactivate)(struct net_device *, struct net_device *);
-   int               (*assoc)(struct net_device *, struct net_device *);
-   int               (*deassoc)(struct net_device *, struct net_device *);
-   int               (*dlci_conf)(struct net_device *, struct net_device *, int get);
-
-   /* fields that are used by the Sangoma SDLA cards */
-   struct timer_list timer;
-   struct net_device *dev;
-   int               type;		/* adapter type */
-   int               state;		/* state of the S502/8 control latch */
-   int               buffer;		/* current buffer for S508 firmware */
-};
-
-#endif /* CONFIG_DLCI || CONFIG_DLCI_MODULE */
-
-extern void dlci_ioctl_set(int (*hook)(unsigned int, void __user *));
-
-#endif
diff --git a/include/linux/sdla.h b/include/linux/sdla.h
deleted file mode 100644
index 00e8b3b614f0..000000000000
--- a/include/linux/sdla.h
+++ /dev/null
@@ -1,240 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * INET		An implementation of the TCP/IP protocol suite for the LINUX
- *		operating system.  INET is implemented using the  BSD Socket
- *		interface as the means of communication with the user level.
- *
- *		Global definitions for the Frame relay interface.
- *
- * Version:	@(#)if_ifrad.h	0.20	13 Apr 96
- *
- * Author:	Mike McLagan <mike.mclagan@linux.org>
- *
- * Changes:
- *		0.15	Mike McLagan	Structure packing
- *
- *		0.20	Mike McLagan	New flags for S508 buffer handling
- */
-#ifndef SDLA_H
-#define SDLA_H
-
-#include <uapi/linux/sdla.h>
-
-
-/* important Z80 window addresses */
-#define SDLA_CONTROL_WND		0xE000
-
-#define SDLA_502_CMD_BUF		0xEF60
-#define SDLA_502_RCV_BUF		0xA900
-#define	SDLA_502_TXN_AVAIL		0xFFF1
-#define SDLA_502_RCV_AVAIL		0xFFF2
-#define SDLA_502_EVENT_FLAGS		0xFFF3
-#define SDLA_502_MDM_STATUS		0xFFF4
-#define SDLA_502_IRQ_INTERFACE		0xFFFD
-#define SDLA_502_IRQ_PERMISSION		0xFFFE
-#define SDLA_502_DATA_OFS		0x0010
-
-#define SDLA_508_CMD_BUF		0xE000
-#define SDLA_508_TXBUF_INFO		0xF100
-#define SDLA_508_RXBUF_INFO		0xF120
-#define SDLA_508_EVENT_FLAGS		0xF003
-#define SDLA_508_MDM_STATUS		0xF004
-#define SDLA_508_IRQ_INTERFACE		0xF010
-#define SDLA_508_IRQ_PERMISSION		0xF011
-#define SDLA_508_TSE_OFFSET		0xF012
-
-/* Event flags */
-#define SDLA_EVENT_STATUS		0x01
-#define SDLA_EVENT_DLCI_STATUS		0x02
-#define SDLA_EVENT_BAD_DLCI		0x04
-#define SDLA_EVENT_LINK_DOWN		0x40
-
-/* IRQ Trigger flags */
-#define SDLA_INTR_RX			0x01
-#define SDLA_INTR_TX			0x02
-#define SDLA_INTR_MODEM			0x04
-#define SDLA_INTR_COMPLETE		0x08
-#define SDLA_INTR_STATUS		0x10
-#define SDLA_INTR_TIMER			0x20
-
-/* DLCI status bits */
-#define SDLA_DLCI_DELETED		0x01
-#define SDLA_DLCI_ACTIVE		0x02
-#define SDLA_DLCI_WAITING		0x04
-#define SDLA_DLCI_NEW			0x08
-#define SDLA_DLCI_INCLUDED		0x40
-
-/* valid command codes */
-#define	SDLA_INFORMATION_WRITE		0x01
-#define	SDLA_INFORMATION_READ		0x02
-#define SDLA_ISSUE_IN_CHANNEL_SIGNAL	0x03
-#define	SDLA_SET_DLCI_CONFIGURATION	0x10
-#define	SDLA_READ_DLCI_CONFIGURATION	0x11
-#define	SDLA_DISABLE_COMMUNICATIONS	0x12
-#define	SDLA_ENABLE_COMMUNICATIONS	0x13
-#define	SDLA_READ_DLC_STATUS		0x14
-#define	SDLA_READ_DLC_STATISTICS	0x15
-#define	SDLA_FLUSH_DLC_STATISTICS	0x16
-#define	SDLA_LIST_ACTIVE_DLCI		0x17
-#define	SDLA_FLUSH_INFORMATION_BUFFERS	0x18
-#define	SDLA_ADD_DLCI			0x20
-#define	SDLA_DELETE_DLCI		0x21
-#define	SDLA_ACTIVATE_DLCI		0x22
-#define	SDLA_DEACTIVATE_DLCI		0x23
-#define	SDLA_READ_MODEM_STATUS		0x30
-#define	SDLA_SET_MODEM_STATUS		0x31
-#define	SDLA_READ_COMMS_ERR_STATS	0x32
-#define SDLA_FLUSH_COMMS_ERR_STATS	0x33
-#define	SDLA_READ_CODE_VERSION		0x40
-#define SDLA_SET_IRQ_TRIGGER		0x50
-#define SDLA_GET_IRQ_TRIGGER		0x51
-
-/* In channel signal types */
-#define SDLA_ICS_LINK_VERIFY		0x02
-#define SDLA_ICS_STATUS_ENQ		0x03
-
-/* modem status flags */
-#define SDLA_MODEM_DTR_HIGH		0x01
-#define SDLA_MODEM_RTS_HIGH		0x02
-#define SDLA_MODEM_DCD_HIGH		0x08
-#define SDLA_MODEM_CTS_HIGH		0x20
-
-/* used for RET_MODEM interpretation */
-#define SDLA_MODEM_DCD_LOW		0x01
-#define SDLA_MODEM_CTS_LOW		0x02
-
-/* return codes */
-#define SDLA_RET_OK			0x00
-#define SDLA_RET_COMMUNICATIONS		0x01
-#define SDLA_RET_CHANNEL_INACTIVE	0x02
-#define SDLA_RET_DLCI_INACTIVE		0x03
-#define SDLA_RET_DLCI_CONFIG		0x04
-#define SDLA_RET_BUF_TOO_BIG		0x05
-#define SDLA_RET_NO_DATA		0x05
-#define SDLA_RET_BUF_OVERSIZE		0x06
-#define SDLA_RET_CIR_OVERFLOW		0x07
-#define SDLA_RET_NO_BUFS		0x08
-#define SDLA_RET_TIMEOUT		0x0A
-#define SDLA_RET_MODEM			0x10
-#define SDLA_RET_CHANNEL_OFF		0x11
-#define SDLA_RET_CHANNEL_ON		0x12
-#define SDLA_RET_DLCI_STATUS		0x13
-#define SDLA_RET_DLCI_UNKNOWN       	0x14
-#define SDLA_RET_COMMAND_INVALID    	0x1F
-
-/* Configuration flags */
-#define SDLA_DIRECT_RECV		0x0080
-#define SDLA_TX_NO_EXCEPT		0x0020
-#define SDLA_NO_ICF_MSGS		0x1000
-#define SDLA_TX50_RX50			0x0000
-#define SDLA_TX70_RX30			0x2000
-#define SDLA_TX30_RX70			0x4000
-
-/* IRQ selection flags */
-#define SDLA_IRQ_RECEIVE		0x01
-#define SDLA_IRQ_TRANSMIT		0x02
-#define SDLA_IRQ_MODEM_STAT		0x04
-#define SDLA_IRQ_COMMAND		0x08
-#define SDLA_IRQ_CHANNEL		0x10
-#define SDLA_IRQ_TIMER			0x20
-
-/* definitions for PC memory mapping */
-#define SDLA_8K_WINDOW			0x01
-#define SDLA_S502_SEG_A			0x10
-#define SDLA_S502_SEG_C			0x20
-#define SDLA_S502_SEG_D			0x00
-#define SDLA_S502_SEG_E			0x30
-#define SDLA_S507_SEG_A			0x00
-#define SDLA_S507_SEG_B			0x40
-#define SDLA_S507_SEG_C			0x80
-#define SDLA_S507_SEG_E			0xC0
-#define SDLA_S508_SEG_A			0x00
-#define SDLA_S508_SEG_C			0x10
-#define SDLA_S508_SEG_D			0x08
-#define SDLA_S508_SEG_E			0x18
-
-/* SDLA adapter port constants */
-#define SDLA_IO_EXTENTS			0x04
-	
-#define SDLA_REG_CONTROL		0x00
-#define SDLA_REG_PC_WINDOW		0x01	/* offset for PC window select latch */
-#define SDLA_REG_Z80_WINDOW 		0x02	/* offset for Z80 window select latch */
-#define SDLA_REG_Z80_CONTROL		0x03	/* offset for Z80 control latch */
-	
-#define SDLA_S502_STS			0x00	/* status reg for 502, 502E, 507 */
-#define SDLA_S508_GNRL			0x00	/* general purp. reg for 508 */
-#define SDLA_S508_STS			0x01	/* status reg for 508 */
-#define SDLA_S508_IDR			0x02	/* ID reg for 508 */
-	
-/* control register flags */
-#define SDLA_S502A_START		0x00	/* start the CPU */
-#define SDLA_S502A_INTREQ		0x02
-#define SDLA_S502A_INTEN		0x04
-#define SDLA_S502A_HALT			0x08	/* halt the CPU */	
-#define SDLA_S502A_NMI			0x10	/* issue an NMI to the CPU */
-
-#define SDLA_S502E_CPUEN		0x01
-#define SDLA_S502E_ENABLE		0x02
-#define SDLA_S502E_INTACK		0x04
-	
-#define SDLA_S507_ENABLE		0x01
-#define SDLA_S507_IRQ3			0x00
-#define SDLA_S507_IRQ4			0x20
-#define SDLA_S507_IRQ5			0x40
-#define SDLA_S507_IRQ7			0x60
-#define SDLA_S507_IRQ10			0x80
-#define SDLA_S507_IRQ11			0xA0
-#define SDLA_S507_IRQ12			0xC0
-#define SDLA_S507_IRQ15			0xE0
-	
-#define SDLA_HALT			0x00
-#define SDLA_CPUEN			0x02
-#define SDLA_MEMEN			0x04
-#define SDLA_S507_EPROMWR		0x08
-#define SDLA_S507_EPROMCLK		0x10
-#define SDLA_S508_INTRQ			0x08
-#define SDLA_S508_INTEN			0x10
-
-struct sdla_cmd {
-   char  opp_flag;
-   char  cmd;
-   short length;
-   char  retval;
-   short dlci;
-   char  flags;
-   short rxlost_int;
-   long  rxlost_app;
-   char  reserve[2];
-   char  data[SDLA_MAX_DATA];	/* transfer data buffer */
-} __attribute__((packed));
-
-struct intr_info {
-   char  flags;
-   short txlen;
-   char  irq;
-   char  flags2;
-   short timeout;
-} __attribute__((packed));
-
-/* found in the 508's control window at RXBUF_INFO */
-struct buf_info {
-   unsigned short rse_num;
-   unsigned long  rse_base;
-   unsigned long  rse_next;
-   unsigned long  buf_base;
-   unsigned short reserved;
-   unsigned long  buf_top;
-} __attribute__((packed));
-
-/* structure pointed to by rse_base in RXBUF_INFO struct */
-struct buf_entry {
-   char  opp_flag;
-   short length;
-   short dlci;
-   char  flags;
-   short timestamp;
-   short reserved[2];
-   long  buf_addr;
-} __attribute__((packed));
-
-#endif
diff --git a/include/uapi/linux/if_frad.h b/include/uapi/linux/if_frad.h
deleted file mode 100644
index 3c6ee85f6262..000000000000
--- a/include/uapi/linux/if_frad.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
-/*
- * DLCI/FRAD	Definitions for Frame Relay Access Devices.  DLCI devices are
- *		created for each DLCI associated with a FRAD.  The FRAD driver
- *		is not truly a network device, but the lower level device
- *		handler.  This allows other FRAD manufacturers to use the DLCI
- *		code, including its RFC1490 encapsulation alongside the current
- *		implementation for the Sangoma cards.
- *
- * Version:	@(#)if_ifrad.h	0.15	31 Mar 96
- *
- * Author:	Mike McLagan <mike.mclagan@linux.org>
- *
- * Changes:
- *		0.15	Mike McLagan	changed structure defs (packed)
- *					re-arranged flags
- *					added DLCI_RET vars
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- */
-
-#ifndef _UAPI_FRAD_H_
-#define _UAPI_FRAD_H_
-
-#include <linux/if.h>
-
-/* Structures and constants associated with the DLCI device driver */
-
-struct dlci_add
-{
-   char  devname[IFNAMSIZ];
-   short dlci;
-};
-
-#define DLCI_GET_CONF	(SIOCDEVPRIVATE + 2)
-#define DLCI_SET_CONF	(SIOCDEVPRIVATE + 3)
-
-/* 
- * These are related to the Sangoma SDLA and should remain in order. 
- * Code within the SDLA module is based on the specifics of this 
- * structure.  Change at your own peril.
- */
-struct dlci_conf {
-   short flags;
-   short CIR_fwd;
-   short Bc_fwd;
-   short Be_fwd;
-   short CIR_bwd;
-   short Bc_bwd;
-   short Be_bwd; 
-
-/* these are part of the status read */
-   short Tc_fwd;
-   short Tc_bwd;
-   short Tf_max;
-   short Tb_max;
-
-/* add any new fields here above is a mirror of sdla_dlci_conf */
-};
-
-#define DLCI_GET_SLAVE	(SIOCDEVPRIVATE + 4)
-
-/* configuration flags for DLCI */
-#define DLCI_IGNORE_CIR_OUT	0x0001
-#define DLCI_ACCOUNT_CIR_IN	0x0002
-#define DLCI_BUFFER_IF		0x0008
-
-#define DLCI_VALID_FLAGS	0x000B
-
-/* defines for the actual Frame Relay hardware */
-#define FRAD_GET_CONF	(SIOCDEVPRIVATE)
-#define FRAD_SET_CONF	(SIOCDEVPRIVATE + 1)
-
-#define FRAD_LAST_IOCTL	FRAD_SET_CONF
-
-/*
- * Based on the setup for the Sangoma SDLA.  If changes are 
- * necessary to this structure, a routine will need to be 
- * added to that module to copy fields.
- */
-struct frad_conf 
-{
-   short station;
-   short flags;
-   short kbaud;
-   short clocking;
-   short mtu;
-   short T391;
-   short T392;
-   short N391;
-   short N392;
-   short N393;
-   short CIR_fwd;
-   short Bc_fwd;
-   short Be_fwd;
-   short CIR_bwd;
-   short Bc_bwd;
-   short Be_bwd;
-
-/* Add new fields here, above is a mirror of the sdla_conf */
-
-};
-
-#define FRAD_STATION_CPE	0x0000
-#define FRAD_STATION_NODE	0x0001
-
-#define FRAD_TX_IGNORE_CIR	0x0001
-#define FRAD_RX_ACCOUNT_CIR	0x0002
-#define FRAD_DROP_ABORTED	0x0004
-#define FRAD_BUFFERIF		0x0008
-#define FRAD_STATS		0x0010
-#define FRAD_MCI		0x0100
-#define FRAD_AUTODLCI		0x8000
-#define FRAD_VALID_FLAGS	0x811F
-
-#define FRAD_CLOCK_INT		0x0001
-#define FRAD_CLOCK_EXT		0x0000
-
-
-#endif /* _UAPI_FRAD_H_ */
diff --git a/include/uapi/linux/sdla.h b/include/uapi/linux/sdla.h
deleted file mode 100644
index 1e3735be6511..000000000000
--- a/include/uapi/linux/sdla.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
-/*
- * INET		An implementation of the TCP/IP protocol suite for the LINUX
- *		operating system.  INET is implemented using the  BSD Socket
- *		interface as the means of communication with the user level.
- *
- *		Global definitions for the Frame relay interface.
- *
- * Version:	@(#)if_ifrad.h	0.20	13 Apr 96
- *
- * Author:	Mike McLagan <mike.mclagan@linux.org>
- *
- * Changes:
- *		0.15	Mike McLagan	Structure packing
- *
- *		0.20	Mike McLagan	New flags for S508 buffer handling
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- */
-
-#ifndef _UAPISDLA_H
-#define _UAPISDLA_H
-
-/* adapter type */
-#define SDLA_TYPES
-#define SDLA_S502A			5020
-#define SDLA_S502E			5021
-#define SDLA_S503			5030
-#define SDLA_S507			5070
-#define SDLA_S508			5080
-#define SDLA_S509			5090
-#define SDLA_UNKNOWN			-1
-
-/* port selection flags for the S508 */
-#define SDLA_S508_PORT_V35		0x00
-#define SDLA_S508_PORT_RS232		0x02
-
-/* Z80 CPU speeds */
-#define SDLA_CPU_3M			0x00
-#define SDLA_CPU_5M			0x01
-#define SDLA_CPU_7M			0x02
-#define SDLA_CPU_8M			0x03
-#define SDLA_CPU_10M			0x04
-#define SDLA_CPU_16M			0x05
-#define SDLA_CPU_12M			0x06
-
-/* some private IOCTLs */
-#define SDLA_IDENTIFY			(FRAD_LAST_IOCTL + 1)
-#define SDLA_CPUSPEED			(FRAD_LAST_IOCTL + 2)
-#define SDLA_PROTOCOL			(FRAD_LAST_IOCTL + 3)
-
-#define SDLA_CLEARMEM			(FRAD_LAST_IOCTL + 4)
-#define SDLA_WRITEMEM			(FRAD_LAST_IOCTL + 5)
-#define SDLA_READMEM			(FRAD_LAST_IOCTL + 6)
-
-struct sdla_mem {
-   int  addr;
-   int  len;
-   void __user *data;
-};
-
-#define SDLA_START			(FRAD_LAST_IOCTL + 7)
-#define SDLA_STOP			(FRAD_LAST_IOCTL + 8)
-
-/* some offsets in the Z80's memory space */
-#define SDLA_NMIADDR			0x0000
-#define SDLA_CONF_ADDR			0x0010
-#define SDLA_S502A_NMIADDR		0x0066
-#define SDLA_CODE_BASEADDR		0x0100
-#define SDLA_WINDOW_SIZE		0x2000
-#define SDLA_ADDR_MASK			0x1FFF
-
-/* largest handleable block of data */
-#define SDLA_MAX_DATA			4080
-#define SDLA_MAX_MTU			4072	/* MAX_DATA - sizeof(fradhdr) */
-#define SDLA_MAX_DLCI			24
-
-/* this should be the same as frad_conf */
-struct sdla_conf {
-   short station;
-   short config;
-   short kbaud;
-   short clocking;
-   short max_frm;
-   short T391;
-   short T392;
-   short N391;
-   short N392;
-   short N393;
-   short CIR_fwd;
-   short Bc_fwd;
-   short Be_fwd;
-   short CIR_bwd;
-   short Bc_bwd;
-   short Be_bwd;
-};
-
-/* this should be the same as dlci_conf */
-struct sdla_dlci_conf {
-   short config;
-   short CIR_fwd;
-   short Bc_fwd;
-   short Be_fwd;
-   short CIR_bwd;
-   short Bc_bwd;
-   short Be_bwd; 
-   short Tc_fwd;
-   short Tc_bwd;
-   short Tf_max;
-   short Tb_max;
-};
-
-
-#endif /* _UAPISDLA_H */
diff --git a/net/socket.c b/net/socket.c
index 6e6cccc2104f..152b1dcf93c6 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -64,7 +64,6 @@
 #include <linux/seq_file.h>
 #include <linux/mutex.h>
 #include <linux/if_bridge.h>
-#include <linux/if_frad.h>
 #include <linux/if_vlan.h>
 #include <linux/ptp_classify.h>
 #include <linux/init.h>
@@ -1027,17 +1026,6 @@ void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
 }
 EXPORT_SYMBOL(vlan_ioctl_set);
 
-static DEFINE_MUTEX(dlci_ioctl_mutex);
-static int (*dlci_ioctl_hook) (unsigned int, void __user *);
-
-void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
-{
-	mutex_lock(&dlci_ioctl_mutex);
-	dlci_ioctl_hook = hook;
-	mutex_unlock(&dlci_ioctl_mutex);
-}
-EXPORT_SYMBOL(dlci_ioctl_set);
-
 static long sock_do_ioctl(struct net *net, struct socket *sock,
 			  unsigned int cmd, unsigned long arg)
 {
@@ -1156,17 +1144,6 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 				err = vlan_ioctl_hook(net, argp);
 			mutex_unlock(&vlan_ioctl_mutex);
 			break;
-		case SIOCADDDLCI:
-		case SIOCDELDLCI:
-			err = -ENOPKG;
-			if (!dlci_ioctl_hook)
-				request_module("dlci");
-
-			mutex_lock(&dlci_ioctl_mutex);
-			if (dlci_ioctl_hook)
-				err = dlci_ioctl_hook(cmd, argp);
-			mutex_unlock(&dlci_ioctl_mutex);
-			break;
 		case SIOCGSKNS:
 			err = -EPERM;
 			if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
@@ -3427,8 +3404,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
 	case SIOCBRDELBR:
 	case SIOCGIFVLAN:
 	case SIOCSIFVLAN:
-	case SIOCADDDLCI:
-	case SIOCDELDLCI:
 	case SIOCGSKNS:
 	case SIOCGSTAMP_NEW:
 	case SIOCGSTAMPNS_NEW:
-- 
cgit v1.2.3


From 3f6719c7b62f0327c9091e26d0da10e65668229e Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Tue, 17 Nov 2020 23:29:28 +0000
Subject: bpf: Add bpf_bprm_opts_set helper

The helper allows modification of certain bits on the linux_binprm
struct starting with the secureexec bit which can be updated using the
BPF_F_BPRM_SECUREEXEC flag.

secureexec can be set by the LSM for privilege gaining executions to set
the AT_SECURE auxv for glibc.  When set, the dynamic linker disables the
use of certain environment variables (like LD_PRELOAD).

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201117232929.2156341-1-kpsingh@chromium.org
---
 include/uapi/linux/bpf.h       | 16 ++++++++++++++++
 kernel/bpf/bpf_lsm.c           | 26 ++++++++++++++++++++++++++
 scripts/bpf_helpers_doc.py     |  2 ++
 tools/include/uapi/linux/bpf.h | 16 ++++++++++++++++
 4 files changed, 60 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 162999b12790..a52299b80b9d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3787,6 +3787,16 @@ union bpf_attr {
  *		*ARG_PTR_TO_BTF_ID* of type *task_struct*.
  *	Return
  *		Pointer to the current task.
+ *
+ * long bpf_bprm_opts_set(struct linux_binprm *bprm, u64 flags)
+ *	Description
+ *		Set or clear certain options on *bprm*:
+ *
+ *		**BPF_F_BPRM_SECUREEXEC** Set the secureexec bit
+ *		which sets the **AT_SECURE** auxv for glibc. The bit
+ *		is cleared if the flag is not specified.
+ *	Return
+ *		**-EINVAL** if invalid *flags* are passed, zero otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3948,6 +3958,7 @@ union bpf_attr {
 	FN(task_storage_get),		\
 	FN(task_storage_delete),	\
 	FN(get_current_task_btf),	\
+	FN(bprm_opts_set),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4119,6 +4130,11 @@ enum bpf_lwt_encap_mode {
 	BPF_LWT_ENCAP_IP,
 };
 
+/* Flags for bpf_bprm_opts_set helper */
+enum {
+	BPF_F_BPRM_SECUREEXEC	= (1ULL << 0),
+};
+
 #define __bpf_md_ptr(type, name)	\
 union {					\
 	type name;			\
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 553107f4706a..b4f27a874092 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -7,6 +7,7 @@
 #include <linux/filter.h>
 #include <linux/bpf.h>
 #include <linux/btf.h>
+#include <linux/binfmts.h>
 #include <linux/lsm_hooks.h>
 #include <linux/bpf_lsm.h>
 #include <linux/kallsyms.h>
@@ -51,6 +52,29 @@ int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
 	return 0;
 }
 
+/* Mask for all the currently supported BPRM option flags */
+#define BPF_F_BRPM_OPTS_MASK	BPF_F_BPRM_SECUREEXEC
+
+BPF_CALL_2(bpf_bprm_opts_set, struct linux_binprm *, bprm, u64, flags)
+{
+	if (flags & ~BPF_F_BRPM_OPTS_MASK)
+		return -EINVAL;
+
+	bprm->secureexec = (flags & BPF_F_BPRM_SECUREEXEC);
+	return 0;
+}
+
+BTF_ID_LIST_SINGLE(bpf_bprm_opts_set_btf_ids, struct, linux_binprm)
+
+const static struct bpf_func_proto bpf_bprm_opts_set_proto = {
+	.func		= bpf_bprm_opts_set,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_BTF_ID,
+	.arg1_btf_id	= &bpf_bprm_opts_set_btf_ids[0],
+	.arg2_type	= ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *
 bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -71,6 +95,8 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_task_storage_get_proto;
 	case BPF_FUNC_task_storage_delete:
 		return &bpf_task_storage_delete_proto;
+	case BPF_FUNC_bprm_opts_set:
+		return &bpf_bprm_opts_set_proto;
 	default:
 		return tracing_prog_func_proto(func_id, prog);
 	}
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index 31484377b8b1..c5bc947a70ad 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -418,6 +418,7 @@ class PrinterHelpers(Printer):
             'struct bpf_tcp_sock',
             'struct bpf_tunnel_key',
             'struct bpf_xfrm_state',
+            'struct linux_binprm',
             'struct pt_regs',
             'struct sk_reuseport_md',
             'struct sockaddr',
@@ -465,6 +466,7 @@ class PrinterHelpers(Printer):
             'struct bpf_tcp_sock',
             'struct bpf_tunnel_key',
             'struct bpf_xfrm_state',
+            'struct linux_binprm',
             'struct pt_regs',
             'struct sk_reuseport_md',
             'struct sockaddr',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 162999b12790..a52299b80b9d 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3787,6 +3787,16 @@ union bpf_attr {
  *		*ARG_PTR_TO_BTF_ID* of type *task_struct*.
  *	Return
  *		Pointer to the current task.
+ *
+ * long bpf_bprm_opts_set(struct linux_binprm *bprm, u64 flags)
+ *	Description
+ *		Set or clear certain options on *bprm*:
+ *
+ *		**BPF_F_BPRM_SECUREEXEC** Set the secureexec bit
+ *		which sets the **AT_SECURE** auxv for glibc. The bit
+ *		is cleared if the flag is not specified.
+ *	Return
+ *		**-EINVAL** if invalid *flags* are passed, zero otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3948,6 +3958,7 @@ union bpf_attr {
 	FN(task_storage_get),		\
 	FN(task_storage_delete),	\
 	FN(get_current_task_btf),	\
+	FN(bprm_opts_set),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4119,6 +4130,11 @@ enum bpf_lwt_encap_mode {
 	BPF_LWT_ENCAP_IP,
 };
 
+/* Flags for bpf_bprm_opts_set helper */
+enum {
+	BPF_F_BPRM_SECUREEXEC	= (1ULL << 0),
+};
+
 #define __bpf_md_ptr(type, name)	\
 union {					\
 	type name;			\
-- 
cgit v1.2.3


From d055126180564a57fe533728a4e93d0cb53d49b3 Mon Sep 17 00:00:00 2001
From: Dmitrii Banshchikov <me@ubique.spb.ru>
Date: Tue, 17 Nov 2020 18:45:49 +0000
Subject: bpf: Add bpf_ktime_get_coarse_ns helper

The helper uses CLOCK_MONOTONIC_COARSE source of time that is less
accurate but more performant.

We have a BPF CGROUP_SKB firewall that supports event logging through
bpf_perf_event_output(). Each event has a timestamp and currently we use
bpf_ktime_get_ns() for it. Use of bpf_ktime_get_coarse_ns() saves ~15-20
ns in time required for event logging.

bpf_ktime_get_ns():
EgressLogByRemoteEndpoint                              113.82ns    8.79M

bpf_ktime_get_coarse_ns():
EgressLogByRemoteEndpoint                               95.40ns   10.48M

Signed-off-by: Dmitrii Banshchikov <me@ubique.spb.ru>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201117184549.257280-1-me@ubique.spb.ru
---
 include/linux/bpf.h            |  1 +
 include/uapi/linux/bpf.h       | 11 +++++++++++
 kernel/bpf/core.c              |  1 +
 kernel/bpf/helpers.c           | 13 +++++++++++++
 kernel/trace/bpf_trace.c       |  2 ++
 tools/include/uapi/linux/bpf.h | 11 +++++++++++
 6 files changed, 39 insertions(+)

(limited to 'include/uapi')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 581b2a2e78eb..e1bcb6d7345c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1842,6 +1842,7 @@ extern const struct bpf_func_proto bpf_copy_from_user_proto;
 extern const struct bpf_func_proto bpf_snprintf_btf_proto;
 extern const struct bpf_func_proto bpf_per_cpu_ptr_proto;
 extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
+extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto;
 
 const struct bpf_func_proto *bpf_tracing_func_proto(
 	enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a52299b80b9d..3ca6146f001a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3797,6 +3797,16 @@ union bpf_attr {
  *		is cleared if the flag is not specified.
  *	Return
  *		**-EINVAL** if invalid *flags* are passed, zero otherwise.
+ *
+ * u64 bpf_ktime_get_coarse_ns(void)
+ * 	Description
+ * 		Return a coarse-grained version of the time elapsed since
+ * 		system boot, in nanoseconds. Does not include time the system
+ * 		was suspended.
+ *
+ * 		See: **clock_gettime**\ (**CLOCK_MONOTONIC_COARSE**)
+ * 	Return
+ * 		Current *ktime*.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3959,6 +3969,7 @@ union bpf_attr {
 	FN(task_storage_delete),	\
 	FN(get_current_task_btf),	\
 	FN(bprm_opts_set),		\
+	FN(ktime_get_coarse_ns),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 55454d2278b1..ff55cbcfbab4 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2211,6 +2211,7 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
 const struct bpf_func_proto bpf_get_numa_node_id_proto __weak;
 const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
 const struct bpf_func_proto bpf_ktime_get_boot_ns_proto __weak;
+const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto __weak;
 
 const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
 const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 25520f5eeaf6..2c395deae279 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -167,6 +167,17 @@ const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = {
 	.ret_type	= RET_INTEGER,
 };
 
+BPF_CALL_0(bpf_ktime_get_coarse_ns)
+{
+	return ktime_get_coarse_ns();
+}
+
+const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto = {
+	.func		= bpf_ktime_get_coarse_ns,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+};
+
 BPF_CALL_0(bpf_get_current_pid_tgid)
 {
 	struct task_struct *task = current;
@@ -685,6 +696,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_ktime_get_ns_proto;
 	case BPF_FUNC_ktime_get_boot_ns:
 		return &bpf_ktime_get_boot_ns_proto;
+	case BPF_FUNC_ktime_get_coarse_ns:
+		return &bpf_ktime_get_coarse_ns_proto;
 	case BPF_FUNC_ringbuf_output:
 		return &bpf_ringbuf_output_proto;
 	case BPF_FUNC_ringbuf_reserve:
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 02986c7b90eb..d255bc9b2bfa 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1280,6 +1280,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_ktime_get_ns_proto;
 	case BPF_FUNC_ktime_get_boot_ns:
 		return &bpf_ktime_get_boot_ns_proto;
+	case BPF_FUNC_ktime_get_coarse_ns:
+		return &bpf_ktime_get_coarse_ns_proto;
 	case BPF_FUNC_tail_call:
 		return &bpf_tail_call_proto;
 	case BPF_FUNC_get_current_pid_tgid:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index a52299b80b9d..3ca6146f001a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3797,6 +3797,16 @@ union bpf_attr {
  *		is cleared if the flag is not specified.
  *	Return
  *		**-EINVAL** if invalid *flags* are passed, zero otherwise.
+ *
+ * u64 bpf_ktime_get_coarse_ns(void)
+ * 	Description
+ * 		Return a coarse-grained version of the time elapsed since
+ * 		system boot, in nanoseconds. Does not include time the system
+ * 		was suspended.
+ *
+ * 		See: **clock_gettime**\ (**CLOCK_MONOTONIC_COARSE**)
+ * 	Return
+ * 		Current *ktime*.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3959,6 +3969,7 @@ union bpf_attr {
 	FN(task_storage_delete),	\
 	FN(get_current_task_btf),	\
 	FN(bprm_opts_set),		\
+	FN(ktime_get_coarse_ns),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 480a6ca2dc6ed82c783faf7e4a9644769b8397d8 Mon Sep 17 00:00:00 2001
From: Dietmar Eggemann <dietmar.eggemann@arm.com>
Date: Fri, 13 Nov 2020 12:34:54 +0100
Subject: sched/uclamp: Allow to reset a task uclamp constraint value

In case the user wants to stop controlling a uclamp constraint value
for a task, use the magic value -1 in sched_util_{min,max} with the
appropriate sched_flags (SCHED_FLAG_UTIL_CLAMP_{MIN,MAX}) to indicate
the reset.

The advantage over the 'additional flag' approach (i.e. introducing
SCHED_FLAG_UTIL_CLAMP_RESET) is that no additional flag has to be
exported via uapi. This avoids the need to document how this new flag
has be used in conjunction with the existing uclamp related flags.

The following subtle issue is fixed as well. When a uclamp constraint
value is set on a !user_defined uclamp_se it is currently first reset
and then set.
Fix this by AND'ing !user_defined with !SCHED_FLAG_UTIL_CLAMP which
stands for the 'sched class change' case.
The related condition 'if (uc_se->user_defined)' moved from
__setscheduler_uclamp() into uclamp_reset().

Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Yun Hsiang <hsiang023167@gmail.com>
Link: https://lkml.kernel.org/r/20201113113454.25868-1-dietmar.eggemann@arm.com
---
 include/uapi/linux/sched/types.h |  2 ++
 kernel/sched/core.c              | 70 +++++++++++++++++++++++++++++-----------
 2 files changed, 53 insertions(+), 19 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/sched/types.h b/include/uapi/linux/sched/types.h
index c852153ddb0d..f2c4589d4dbf 100644
--- a/include/uapi/linux/sched/types.h
+++ b/include/uapi/linux/sched/types.h
@@ -96,6 +96,8 @@ struct sched_param {
  * on a CPU with a capacity big enough to fit the specified value.
  * A task with a max utilization value smaller than 1024 is more likely
  * scheduled on a CPU with no more capacity than the specified value.
+ *
+ * A task utilization boundary can be reset by setting the attribute to -1.
  */
 struct sched_attr {
 	__u32 size;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a9e6d630eb83..e6473ecaab3c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1413,17 +1413,24 @@ done:
 static int uclamp_validate(struct task_struct *p,
 			   const struct sched_attr *attr)
 {
-	unsigned int lower_bound = p->uclamp_req[UCLAMP_MIN].value;
-	unsigned int upper_bound = p->uclamp_req[UCLAMP_MAX].value;
+	int util_min = p->uclamp_req[UCLAMP_MIN].value;
+	int util_max = p->uclamp_req[UCLAMP_MAX].value;
 
-	if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN)
-		lower_bound = attr->sched_util_min;
-	if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX)
-		upper_bound = attr->sched_util_max;
+	if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
+		util_min = attr->sched_util_min;
 
-	if (lower_bound > upper_bound)
-		return -EINVAL;
-	if (upper_bound > SCHED_CAPACITY_SCALE)
+		if (util_min + 1 > SCHED_CAPACITY_SCALE + 1)
+			return -EINVAL;
+	}
+
+	if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) {
+		util_max = attr->sched_util_max;
+
+		if (util_max + 1 > SCHED_CAPACITY_SCALE + 1)
+			return -EINVAL;
+	}
+
+	if (util_min != -1 && util_max != -1 && util_min > util_max)
 		return -EINVAL;
 
 	/*
@@ -1438,20 +1445,41 @@ static int uclamp_validate(struct task_struct *p,
 	return 0;
 }
 
+static bool uclamp_reset(const struct sched_attr *attr,
+			 enum uclamp_id clamp_id,
+			 struct uclamp_se *uc_se)
+{
+	/* Reset on sched class change for a non user-defined clamp value. */
+	if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)) &&
+	    !uc_se->user_defined)
+		return true;
+
+	/* Reset on sched_util_{min,max} == -1. */
+	if (clamp_id == UCLAMP_MIN &&
+	    attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN &&
+	    attr->sched_util_min == -1) {
+		return true;
+	}
+
+	if (clamp_id == UCLAMP_MAX &&
+	    attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX &&
+	    attr->sched_util_max == -1) {
+		return true;
+	}
+
+	return false;
+}
+
 static void __setscheduler_uclamp(struct task_struct *p,
 				  const struct sched_attr *attr)
 {
 	enum uclamp_id clamp_id;
 
-	/*
-	 * On scheduling class change, reset to default clamps for tasks
-	 * without a task-specific value.
-	 */
 	for_each_clamp_id(clamp_id) {
 		struct uclamp_se *uc_se = &p->uclamp_req[clamp_id];
+		unsigned int value;
 
-		/* Keep using defined clamps across class changes */
-		if (uc_se->user_defined)
+		if (!uclamp_reset(attr, clamp_id, uc_se))
 			continue;
 
 		/*
@@ -1459,21 +1487,25 @@ static void __setscheduler_uclamp(struct task_struct *p,
 		 * at runtime.
 		 */
 		if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN))
-			__uclamp_update_util_min_rt_default(p);
+			value = sysctl_sched_uclamp_util_min_rt_default;
 		else
-			uclamp_se_set(uc_se, uclamp_none(clamp_id), false);
+			value = uclamp_none(clamp_id);
+
+		uclamp_se_set(uc_se, value, false);
 
 	}
 
 	if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)))
 		return;
 
-	if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
+	if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN &&
+	    attr->sched_util_min != -1) {
 		uclamp_se_set(&p->uclamp_req[UCLAMP_MIN],
 			      attr->sched_util_min, true);
 	}
 
-	if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) {
+	if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX &&
+	    attr->sched_util_max != -1) {
 		uclamp_se_set(&p->uclamp_req[UCLAMP_MAX],
 			      attr->sched_util_max, true);
 	}
-- 
cgit v1.2.3


From 52aa300fabe5ee17fd1dc85cef4723816b62f4b6 Mon Sep 17 00:00:00 2001
From: Simon Ser <contact@emersion.fr>
Date: Thu, 19 Nov 2020 10:03:20 +0000
Subject: drm: improve kernel-docs in drm_mode.h

- Remove duplicate doc-comments for struct members
- Add missing @member markers for in-line member comments

Signed-off-by: Simon Ser <contact@emersion.fr>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/grZIqIAOSUM7eNL0PurBsaWoILFwN2hEKd40Ylgzg@cp7-web-041.plabs.ch
---
 include/uapi/drm/drm_mode.h | 66 ++++++++++++++++++++++-----------------------
 1 file changed, 32 insertions(+), 34 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 5ad10ab2a577..f29c1d37be67 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -905,24 +905,23 @@ struct drm_format_modifier {
 
 /**
  * struct drm_mode_create_blob - Create New block property
- * @data: Pointer to data to copy.
- * @length: Length of data to copy.
- * @blob_id: new property ID.
+ *
  * Create a new 'blob' data property, copying length bytes from data pointer,
  * and returning new blob ID.
  */
 struct drm_mode_create_blob {
-	/** Pointer to data to copy. */
+	/** @data: Pointer to data to copy. */
 	__u64 data;
-	/** Length of data to copy. */
+	/** @length: Length of data to copy. */
 	__u32 length;
-	/** Return: new property ID. */
+	/** @blob_id: Return: new property ID. */
 	__u32 blob_id;
 };
 
 /**
  * struct drm_mode_destroy_blob - Destroy user blob
  * @blob_id: blob_id to destroy
+ *
  * Destroy a user-created blob property.
  *
  * User-space can release blobs as soon as they do not need to refer to them by
@@ -937,36 +936,32 @@ struct drm_mode_destroy_blob {
 
 /**
  * struct drm_mode_create_lease - Create lease
- * @object_ids: Pointer to array of object ids.
- * @object_count: Number of object ids.
- * @flags: flags for new FD.
- * @lessee_id: unique identifier for lessee.
- * @fd: file descriptor to new drm_master file.
+ *
  * Lease mode resources, creating another drm_master.
  */
 struct drm_mode_create_lease {
-	/** Pointer to array of object ids (__u32) */
+	/** @object_ids: Pointer to array of object ids (__u32) */
 	__u64 object_ids;
-	/** Number of object ids */
+	/** @object_count: Number of object ids */
 	__u32 object_count;
-	/** flags for new FD (O_CLOEXEC, etc) */
+	/** @flags: flags for new FD (O_CLOEXEC, etc) */
 	__u32 flags;
 
-	/** Return: unique identifier for lessee. */
+	/** @lessee_id: Return: unique identifier for lessee. */
 	__u32 lessee_id;
-	/** Return: file descriptor to new drm_master file */
+	/** @fd: Return: file descriptor to new drm_master file */
 	__u32 fd;
 };
 
 /**
  * struct drm_mode_list_lessees - List lessees
- * @count_lessees: Number of lessees.
- * @pad: pad.
- * @lessees_ptr: Pointer to lessess.
- * List lesses from a drm_master
+ *
+ * List lesses from a drm_master.
  */
 struct drm_mode_list_lessees {
-	/** Number of lessees.
+	/**
+	 * @count_lessees: Number of lessees.
+	 *
 	 * On input, provides length of the array.
 	 * On output, provides total number. No
 	 * more than the input number will be written
@@ -974,23 +969,26 @@ struct drm_mode_list_lessees {
 	 * the size and then the data.
 	 */
 	__u32 count_lessees;
+	/** @pad: Padding. */
 	__u32 pad;
 
-	/** Pointer to lessees.
-	 * pointer to __u64 array of lessee ids
+	/**
+	 * @lessees_ptr: Pointer to lessees.
+	 *
+	 * Pointer to __u64 array of lessee ids
 	 */
 	__u64 lessees_ptr;
 };
 
 /**
  * struct drm_mode_get_lease - Get Lease
- * @count_objects: Number of leased objects.
- * @pad: pad.
- * @objects_ptr: Pointer to objects.
- * Get leased objects
+ *
+ * Get leased objects.
  */
 struct drm_mode_get_lease {
-	/** Number of leased objects.
+	/**
+	 * @count_objects: Number of leased objects.
+	 *
 	 * On input, provides length of the array.
 	 * On output, provides total number. No
 	 * more than the input number will be written
@@ -998,22 +996,22 @@ struct drm_mode_get_lease {
 	 * the size and then the data.
 	 */
 	__u32 count_objects;
+	/** @pad: Padding. */
 	__u32 pad;
 
-	/** Pointer to objects.
-	 * pointer to __u32 array of object ids
+	/**
+	 * @objects_ptr: Pointer to objects.
+	 *
+	 * Pointer to __u32 array of object ids.
 	 */
 	__u64 objects_ptr;
 };
 
 /**
  * struct drm_mode_revoke_lease - Revoke lease
- * @lessee_id: Unique ID of lessee.
- * Revoke lease
  */
 struct drm_mode_revoke_lease {
-	/** Unique ID of lessee
-	 */
+	/** @lessee_id: Unique ID of lessee */
 	__u32 lessee_id;
 };
 
-- 
cgit v1.2.3


From ea7800565a128c1adafa1791ce80afd6016fe21c Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Tue, 10 Nov 2020 11:18:45 +0100
Subject: can: add optional DLC element to Classical CAN frame structure

ISO 11898-1 Chapter 8.4.2.3 defines a 4 bit data length code (DLC) table which
maps the DLC to the payload length of the CAN frame in bytes:

    DLC      ->  payload length
    0 .. 8   ->  0 .. 8
    9 .. 15  ->  8

Although the DLC values 8 .. 15 in Classical CAN always result in a payload
length of 8 bytes these DLC values are transparently transmitted on the CAN
bus. As the struct can_frame only provides a 'len' element (formerly 'can_dlc')
which contains the plain payload length ( 0 .. 8 ) of the CAN frame, the raw
DLC is not visible to the application programmer, e.g. for testing use-cases.

To access the raw DLC values 9 .. 15 the len8_dlc element is introduced, which
is only valid when the payload length 'len' is 8 and the DLC is greater than 8.

The len8_dlc element is filled by the CAN interface driver and used for CAN
frame creation by the CAN driver when the CAN_CTRLMODE_CC_LEN8_DLC flag is
supported by the driver and enabled via netlink configuration interface.

Reported-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Link: https://lore.kernel.org/r/20201110101852.1973-2-socketcan@hartkopp.net
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can.h         | 38 ++++++++++++++++++++++++--------------
 include/uapi/linux/can/netlink.h |  1 +
 2 files changed, 25 insertions(+), 14 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h
index 6a6d2c7655ff..f75238ac6dce 100644
--- a/include/uapi/linux/can.h
+++ b/include/uapi/linux/can.h
@@ -84,6 +84,7 @@ typedef __u32 can_err_mask_t;
 
 /* CAN payload length and DLC definitions according to ISO 11898-1 */
 #define CAN_MAX_DLC 8
+#define CAN_MAX_RAW_DLC 15
 #define CAN_MAX_DLEN 8
 
 /* CAN FD payload length and DLC definitions according to ISO 11898-7 */
@@ -91,23 +92,32 @@ typedef __u32 can_err_mask_t;
 #define CANFD_MAX_DLEN 64
 
 /**
- * struct can_frame - basic CAN frame structure
- * @can_id:  CAN ID of the frame and CAN_*_FLAG flags, see canid_t definition
- * @can_dlc: frame payload length in byte (0 .. 8) aka data length code
- *           N.B. the DLC field from ISO 11898-1 Chapter 8.4.2.3 has a 1:1
- *           mapping of the 'data length code' to the real payload length
- * @__pad:   padding
- * @__res0:  reserved / padding
- * @__res1:  reserved / padding
- * @data:    CAN frame payload (up to 8 byte)
+ * struct can_frame - Classical CAN frame structure (aka CAN 2.0B)
+ * @can_id:   CAN ID of the frame and CAN_*_FLAG flags, see canid_t definition
+ * @len:      CAN frame payload length in byte (0 .. 8)
+ * @can_dlc:  deprecated name for CAN frame payload length in byte (0 .. 8)
+ * @__pad:    padding
+ * @__res0:   reserved / padding
+ * @len8_dlc: optional DLC value (9 .. 15) at 8 byte payload length
+ *            len8_dlc contains values from 9 .. 15 when the payload length is
+ *            8 bytes but the DLC value (see ISO 11898-1) is greater then 8.
+ *            CAN_CTRLMODE_CC_LEN8_DLC flag has to be enabled in CAN driver.
+ * @data:     CAN frame payload (up to 8 byte)
  */
 struct can_frame {
 	canid_t can_id;  /* 32 bit CAN_ID + EFF/RTR/ERR flags */
-	__u8    can_dlc; /* frame payload length in byte (0 .. CAN_MAX_DLEN) */
-	__u8    __pad;   /* padding */
-	__u8    __res0;  /* reserved / padding */
-	__u8    __res1;  /* reserved / padding */
-	__u8    data[CAN_MAX_DLEN] __attribute__((aligned(8)));
+	union {
+		/* CAN frame payload length in byte (0 .. CAN_MAX_DLEN)
+		 * was previously named can_dlc so we need to carry that
+		 * name for legacy support
+		 */
+		__u8 len;
+		__u8 can_dlc; /* deprecated */
+	};
+	__u8 __pad; /* padding */
+	__u8 __res0; /* reserved / padding */
+	__u8 len8_dlc; /* optional DLC for 8 byte payload length (9 .. 15) */
+	__u8 data[CAN_MAX_DLEN] __attribute__((aligned(8)));
 };
 
 /*
diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h
index 6f598b73839e..f730d443b918 100644
--- a/include/uapi/linux/can/netlink.h
+++ b/include/uapi/linux/can/netlink.h
@@ -100,6 +100,7 @@ struct can_ctrlmode {
 #define CAN_CTRLMODE_FD			0x20	/* CAN FD mode */
 #define CAN_CTRLMODE_PRESUME_ACK	0x40	/* Ignore missing CAN ACKs */
 #define CAN_CTRLMODE_FD_NON_ISO		0x80	/* CAN FD in non-ISO mode */
+#define CAN_CTRLMODE_CC_LEN8_DLC	0x100	/* Classic CAN DLC option */
 
 /*
  * CAN device statistics
-- 
cgit v1.2.3


From c12096085b623024416d02db435005cdf79a71bb Mon Sep 17 00:00:00 2001
From: Anthoine Bourgeois <anthoine.bourgeois@gmail.com>
Date: Wed, 18 Nov 2020 17:08:07 -0800
Subject: virtio-gpu api: Add a comment on VIRTIO_GPU_SHM_ID_HOST_VISIBLE

This provides a description of how the kernel driver uses the
shmid to determine capabilities.

Signed-off-by: Anthoine Bourgeois <anthoine.bourgeois@gmail.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20201119010809.528-3-gurchetansingh@chromium.org
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 include/uapi/linux/virtio_gpu.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h
index 0ec6b610402c..97523a95781d 100644
--- a/include/uapi/linux/virtio_gpu.h
+++ b/include/uapi/linux/virtio_gpu.h
@@ -115,6 +115,10 @@ enum virtio_gpu_ctrl_type {
 
 enum virtio_gpu_shm_id {
 	VIRTIO_GPU_SHM_ID_UNDEFINED = 0,
+	/*
+	 * VIRTIO_GPU_CMD_RESOURCE_MAP_BLOB
+	 * VIRTIO_GPU_CMD_RESOURCE_UNMAP_BLOB
+	 */
 	VIRTIO_GPU_SHM_ID_HOST_VISIBLE = 1
 };
 
-- 
cgit v1.2.3


From 2ac5ef3b23629e9740000948c48f4141bacb5abb Mon Sep 17 00:00:00 2001
From: Simon Ser <contact@emersion.fr>
Date: Fri, 20 Nov 2020 08:57:33 +0000
Subject: drm: document drm_mode_get_connector

Document how to perform a GETCONNECTOR ioctl. Document the various
struct fields. Also document how to perform a forced probe, and when
should user-space do it.

Signed-off-by: Simon Ser <contact@emersion.fr>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Pekka Paalanen <ppaalanen@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/4NxrTtynzPiPX4SOCzxmA1sRB8fVLfeiabVpi5j3Y@cp7-web-041.plabs.ch
---
 include/uapi/drm/drm_mode.h | 78 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 73 insertions(+), 5 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index f29c1d37be67..3979389fcc4f 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -368,27 +368,95 @@ enum drm_mode_subconnector {
 #define DRM_MODE_CONNECTOR_WRITEBACK	18
 #define DRM_MODE_CONNECTOR_SPI		19
 
+/**
+ * struct drm_mode_get_connector - Get connector metadata.
+ *
+ * User-space can perform a GETCONNECTOR ioctl to retrieve information about a
+ * connector. User-space is expected to retrieve encoders, modes and properties
+ * by performing this ioctl at least twice: the first time to retrieve the
+ * number of elements, the second time to retrieve the elements themselves.
+ *
+ * To retrieve the number of elements, set @count_props and @count_encoders to
+ * zero, set @count_modes to 1, and set @modes_ptr to a temporary struct
+ * drm_mode_modeinfo element.
+ *
+ * To retrieve the elements, allocate arrays for @encoders_ptr, @modes_ptr,
+ * @props_ptr and @prop_values_ptr, then set @count_modes, @count_props and
+ * @count_encoders to their capacity.
+ *
+ * Performing the ioctl only twice may be racy: the number of elements may have
+ * changed with a hotplug event in-between the two ioctls. User-space is
+ * expected to retry the last ioctl until the number of elements stabilizes.
+ * The kernel won't fill any array which doesn't have the expected length.
+ *
+ * **Force-probing a connector**
+ *
+ * If the @count_modes field is set to zero, the kernel will perform a forced
+ * probe on the connector to refresh the connector status, modes and EDID.
+ * A forced-probe can be slow and the ioctl will block. A force-probe can cause
+ * flickering and temporary freezes, so it should not be performed
+ * automatically.
+ *
+ * User-space shouldn't need to force-probe connectors in general: the kernel
+ * will automatically take care of probing connectors that don't support
+ * hot-plug detection when appropriate. However, user-space may force-probe
+ * connectors on user request (e.g. clicking a "Scan connectors" button, or
+ * opening a UI to manage screens).
+ */
 struct drm_mode_get_connector {
-
+	/** @encoders_ptr: Pointer to ``__u32`` array of object IDs. */
 	__u64 encoders_ptr;
+	/** @modes_ptr: Pointer to struct drm_mode_modeinfo array. */
 	__u64 modes_ptr;
+	/** @props_ptr: Pointer to ``__u32`` array of property IDs. */
 	__u64 props_ptr;
+	/** @prop_values_ptr: Pointer to ``__u64`` array of property values. */
 	__u64 prop_values_ptr;
 
+	/** @count_modes: Number of modes. */
 	__u32 count_modes;
+	/** @count_props: Number of properties. */
 	__u32 count_props;
+	/** @count_encoders: Number of encoders. */
 	__u32 count_encoders;
 
-	__u32 encoder_id; /**< Current Encoder */
-	__u32 connector_id; /**< Id */
+	/** @encoder_id: Object ID of the current encoder. */
+	__u32 encoder_id;
+	/** @connector_id: Object ID of the connector. */
+	__u32 connector_id;
+	/**
+	 * @connector_type: Type of the connector.
+	 *
+	 * See DRM_MODE_CONNECTOR_* defines.
+	 */
 	__u32 connector_type;
+	/**
+	 * @connector_type_id: Type-specific connector number.
+	 *
+	 * This is not an object ID. This is a per-type connector number. Each
+	 * (type, type_id) combination is unique across all connectors of a DRM
+	 * device.
+	 */
 	__u32 connector_type_id;
 
+	/**
+	 * @connection: Status of the connector.
+	 *
+	 * See enum drm_connector_status.
+	 */
 	__u32 connection;
-	__u32 mm_width;  /**< width in millimeters */
-	__u32 mm_height; /**< height in millimeters */
+	/** @mm_width: Width of the connected sink in millimeters. */
+	__u32 mm_width;
+	/** @mm_height: Height of the connected sink in millimeters. */
+	__u32 mm_height;
+	/**
+	 * @subpixel: Subpixel order of the connected sink.
+	 *
+	 * See enum subpixel_order.
+	 */
 	__u32 subpixel;
 
+	/** @pad: Padding, must be zero. */
 	__u32 pad;
 };
 
-- 
cgit v1.2.3


From 22f0d89805a44c06a263f36a0d0f192f333df16e Mon Sep 17 00:00:00 2001
From: Simon Ser <contact@emersion.fr>
Date: Fri, 20 Nov 2020 09:46:28 +0000
Subject: drm: document drm_mode_modeinfo

This allows `struct drm_mode_modeinfo` references to be linkified.

Some descriptions are borrowed from struct drm_display_mode.

Signed-off-by: Simon Ser <contact@emersion.fr>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/BBtyuxgs3DvcrMtbRyb7KBEWUviGy1dtWO61eB4@cp3-web-016.plabs.ch
---
 include/drm/drm_modes.h     |  3 +++
 include/uapi/drm/drm_mode.h | 21 +++++++++++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'include/uapi')

diff --git a/include/drm/drm_modes.h b/include/drm/drm_modes.h
index cdf2a299ccd4..a0d79d1c51e2 100644
--- a/include/drm/drm_modes.h
+++ b/include/drm/drm_modes.h
@@ -195,6 +195,9 @@ enum drm_mode_status {
  * @crtc_vsync_end: hardware mode vertical sync end
  * @crtc_vtotal: hardware mode vertical total size
  *
+ * This is the kernel API display mode information structure. For the
+ * user-space version see struct drm_mode_modeinfo.
+ *
  * The horizontal and vertical timings are defined per the following diagram.
  *
  * ::
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 3979389fcc4f..b49fbf2bdc40 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -218,6 +218,27 @@ extern "C" {
 #define DRM_MODE_CONTENT_PROTECTION_DESIRED     1
 #define DRM_MODE_CONTENT_PROTECTION_ENABLED     2
 
+/**
+ * struct drm_mode_modeinfo - Display mode information.
+ * @clock: pixel clock in kHz
+ * @hdisplay: horizontal display size
+ * @hsync_start: horizontal sync start
+ * @hsync_end: horizontal sync end
+ * @htotal: horizontal total size
+ * @hskew: horizontal skew
+ * @vdisplay: vertical display size
+ * @vsync_start: vertical sync start
+ * @vsync_end: vertical sync end
+ * @vtotal: vertical total size
+ * @vscan: vertical scan
+ * @vrefresh: approximate vertical refresh rate in Hz
+ * @flags: bitmask of misc. flags, see DRM_MODE_FLAG_* defines
+ * @type: bitmask of type flags, see DRM_MODE_TYPE_* defines
+ * @name: string describing the mode resolution
+ *
+ * This is the user-space API display mode information structure. For the
+ * kernel version see struct drm_display_mode.
+ */
 struct drm_mode_modeinfo {
 	__u32 clock;
 	__u16 hdisplay;
-- 
cgit v1.2.3


From 94c23097f991cd4568388564b3d2816b0b83f924 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Thu, 19 Nov 2020 09:49:21 +0100
Subject: can: gw: support modification of Classical CAN DLCs

Add support for data length code modifications for Classical CAN.

The netlink configuration interface always allowed to pass any value
that fits into a byte, therefore only the modification process had to be
extended to handle the raw DLC represenation of Classical CAN frames.

When a DLC value from 0 .. F is provided for Classical CAN frame
modifications the 'len' value is modified as-is with the exception that
potentially existing 9 .. F DLC values in the len8_dlc element are moved
to the 'len' element for the modification operation by mod_retrieve_ccdlc().

After the modification the Classical CAN frame DLC information is brought
back into the correct format by mod_store_ccdlc() which is filling 'len'
and 'len8_dlc' accordingly.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Link: https://lore.kernel.org/r/20201119084921.2621-1-socketcan@hartkopp.net
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can/gw.h |  4 +--
 net/can/gw.c                | 78 ++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 72 insertions(+), 10 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/can/gw.h b/include/uapi/linux/can/gw.h
index c2190bbe21d8..e4f0957554f3 100644
--- a/include/uapi/linux/can/gw.h
+++ b/include/uapi/linux/can/gw.h
@@ -98,8 +98,8 @@ enum {
 
 /* CAN frame elements that are affected by curr. 3 CAN frame modifications */
 #define CGW_MOD_ID	0x01
-#define CGW_MOD_DLC	0x02		/* contains the data length in bytes */
-#define CGW_MOD_LEN	CGW_MOD_DLC	/* CAN FD length representation */
+#define CGW_MOD_DLC	0x02		/* Classical CAN data length code */
+#define CGW_MOD_LEN	CGW_MOD_DLC	/* CAN FD (plain) data length */
 #define CGW_MOD_DATA	0x04
 #define CGW_MOD_FLAGS	0x08		/* CAN FD flags */
 
diff --git a/net/can/gw.c b/net/can/gw.c
index de5e8859ec9b..8598d9da0e5f 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -199,6 +199,68 @@ static void mod_set_fddata(struct canfd_frame *cf, struct cf_mod *mod)
 	memcpy(cf->data, mod->modframe.set.data, CANFD_MAX_DLEN);
 }
 
+/* retrieve valid CC DLC value and store it into 'len' */
+static void mod_retrieve_ccdlc(struct canfd_frame *cf)
+{
+	struct can_frame *ccf = (struct can_frame *)cf;
+
+	/* len8_dlc is only valid if len == CAN_MAX_DLEN */
+	if (ccf->len != CAN_MAX_DLEN)
+		return;
+
+	/* do we have a valid len8_dlc value from 9 .. 15 ? */
+	if (ccf->len8_dlc > CAN_MAX_DLEN && ccf->len8_dlc <= CAN_MAX_RAW_DLC)
+		ccf->len = ccf->len8_dlc;
+}
+
+/* convert valid CC DLC value in 'len' into struct can_frame elements */
+static void mod_store_ccdlc(struct canfd_frame *cf)
+{
+	struct can_frame *ccf = (struct can_frame *)cf;
+
+	/* clear potential leftovers */
+	ccf->len8_dlc = 0;
+
+	/* plain data length 0 .. 8 - that was easy */
+	if (ccf->len <= CAN_MAX_DLEN)
+		return;
+
+	/* potentially broken values are catched in can_can_gw_rcv() */
+	if (ccf->len > CAN_MAX_RAW_DLC)
+		return;
+
+	/* we have a valid dlc value from 9 .. 15 in ccf->len */
+	ccf->len8_dlc = ccf->len;
+	ccf->len = CAN_MAX_DLEN;
+}
+
+static void mod_and_ccdlc(struct canfd_frame *cf, struct cf_mod *mod)
+{
+	mod_retrieve_ccdlc(cf);
+	mod_and_len(cf, mod);
+	mod_store_ccdlc(cf);
+}
+
+static void mod_or_ccdlc(struct canfd_frame *cf, struct cf_mod *mod)
+{
+	mod_retrieve_ccdlc(cf);
+	mod_or_len(cf, mod);
+	mod_store_ccdlc(cf);
+}
+
+static void mod_xor_ccdlc(struct canfd_frame *cf, struct cf_mod *mod)
+{
+	mod_retrieve_ccdlc(cf);
+	mod_xor_len(cf, mod);
+	mod_store_ccdlc(cf);
+}
+
+static void mod_set_ccdlc(struct canfd_frame *cf, struct cf_mod *mod)
+{
+	mod_set_len(cf, mod);
+	mod_store_ccdlc(cf);
+}
+
 static void canframecpy(struct canfd_frame *dst, struct can_frame *src)
 {
 	/* Copy the struct members separately to ensure that no uninitialized
@@ -842,8 +904,8 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
 			if (mb.modtype & CGW_MOD_ID)
 				mod->modfunc[modidx++] = mod_and_id;
 
-			if (mb.modtype & CGW_MOD_LEN)
-				mod->modfunc[modidx++] = mod_and_len;
+			if (mb.modtype & CGW_MOD_DLC)
+				mod->modfunc[modidx++] = mod_and_ccdlc;
 
 			if (mb.modtype & CGW_MOD_DATA)
 				mod->modfunc[modidx++] = mod_and_data;
@@ -858,8 +920,8 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
 			if (mb.modtype & CGW_MOD_ID)
 				mod->modfunc[modidx++] = mod_or_id;
 
-			if (mb.modtype & CGW_MOD_LEN)
-				mod->modfunc[modidx++] = mod_or_len;
+			if (mb.modtype & CGW_MOD_DLC)
+				mod->modfunc[modidx++] = mod_or_ccdlc;
 
 			if (mb.modtype & CGW_MOD_DATA)
 				mod->modfunc[modidx++] = mod_or_data;
@@ -874,8 +936,8 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
 			if (mb.modtype & CGW_MOD_ID)
 				mod->modfunc[modidx++] = mod_xor_id;
 
-			if (mb.modtype & CGW_MOD_LEN)
-				mod->modfunc[modidx++] = mod_xor_len;
+			if (mb.modtype & CGW_MOD_DLC)
+				mod->modfunc[modidx++] = mod_xor_ccdlc;
 
 			if (mb.modtype & CGW_MOD_DATA)
 				mod->modfunc[modidx++] = mod_xor_data;
@@ -890,8 +952,8 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
 			if (mb.modtype & CGW_MOD_ID)
 				mod->modfunc[modidx++] = mod_set_id;
 
-			if (mb.modtype & CGW_MOD_LEN)
-				mod->modfunc[modidx++] = mod_set_len;
+			if (mb.modtype & CGW_MOD_DLC)
+				mod->modfunc[modidx++] = mod_set_ccdlc;
 
 			if (mb.modtype & CGW_MOD_DATA)
 				mod->modfunc[modidx++] = mod_set_data;
-- 
cgit v1.2.3


From 341917490d7d68d2f7267a265b8820fc3f8ead1b Mon Sep 17 00:00:00 2001
From: Gustavo Pimentel <Gustavo.Pimentel@synopsys.com>
Date: Wed, 18 Nov 2020 23:49:20 +0100
Subject: PCI: Decode PCIe 64 GT/s link speed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PCIe r6.0, sec 7.5.3.18, defines a new 64.0 GT/s bit in the Supported Link
Speeds Vector of Link Capabilities 2.

This patch does not affect the speed of the link, which should be
negotiated automatically by the hardware; it only adds decoding when
showing the speed to the user.

Decode this new speed.  Previously, reading the speed of a link operating
at this speed showed "Unknown speed" instead of "64.0 GT/s".

Link: https://lore.kernel.org/r/aaaab33fe18975e123a84aebce2adb85f44e2bbe.1605739760.git.gustavo.pimentel@synopsys.com
Signed-off-by: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Krzysztof Wilczyński <kw@linux.com>
---
 drivers/pci/pci.h             | 6 ++++--
 drivers/pci/probe.c           | 3 ++-
 include/linux/pci.h           | 1 +
 include/uapi/linux/pci_regs.h | 4 ++++
 4 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index f86cae9aa1f4..81bf905b545c 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -294,7 +294,8 @@ void pci_bus_put(struct pci_bus *bus);
 
 /* PCIe link information from Link Capabilities 2 */
 #define PCIE_LNKCAP2_SLS2SPEED(lnkcap2) \
-	((lnkcap2) & PCI_EXP_LNKCAP2_SLS_32_0GB ? PCIE_SPEED_32_0GT : \
+	((lnkcap2) & PCI_EXP_LNKCAP2_SLS_64_0GB ? PCIE_SPEED_64_0GT : \
+	 (lnkcap2) & PCI_EXP_LNKCAP2_SLS_32_0GB ? PCIE_SPEED_32_0GT : \
 	 (lnkcap2) & PCI_EXP_LNKCAP2_SLS_16_0GB ? PCIE_SPEED_16_0GT : \
 	 (lnkcap2) & PCI_EXP_LNKCAP2_SLS_8_0GB ? PCIE_SPEED_8_0GT : \
 	 (lnkcap2) & PCI_EXP_LNKCAP2_SLS_5_0GB ? PCIE_SPEED_5_0GT : \
@@ -303,7 +304,8 @@ void pci_bus_put(struct pci_bus *bus);
 
 /* PCIe speed to Mb/s reduced by encoding overhead */
 #define PCIE_SPEED2MBS_ENC(speed) \
-	((speed) == PCIE_SPEED_32_0GT ? 32000*128/130 : \
+	((speed) == PCIE_SPEED_64_0GT ? 64000*128/130 : \
+	 (speed) == PCIE_SPEED_32_0GT ? 32000*128/130 : \
 	 (speed) == PCIE_SPEED_16_0GT ? 16000*128/130 : \
 	 (speed) == PCIE_SPEED_8_0GT  ?  8000*128/130 : \
 	 (speed) == PCIE_SPEED_5_0GT  ?  5000*8/10 : \
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 4289030b0fff..fe2e00f5fc4c 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -677,7 +677,7 @@ const unsigned char pcie_link_speed[] = {
 	PCIE_SPEED_8_0GT,		/* 3 */
 	PCIE_SPEED_16_0GT,		/* 4 */
 	PCIE_SPEED_32_0GT,		/* 5 */
-	PCI_SPEED_UNKNOWN,		/* 6 */
+	PCIE_SPEED_64_0GT,		/* 6 */
 	PCI_SPEED_UNKNOWN,		/* 7 */
 	PCI_SPEED_UNKNOWN,		/* 8 */
 	PCI_SPEED_UNKNOWN,		/* 9 */
@@ -719,6 +719,7 @@ const char *pci_speed_string(enum pci_bus_speed speed)
 	    "8.0 GT/s PCIe",		/* 0x16 */
 	    "16.0 GT/s PCIe",		/* 0x17 */
 	    "32.0 GT/s PCIe",		/* 0x18 */
+	    "64.0 GT/s PCIe",		/* 0x19 */
 	};
 
 	if (speed < ARRAY_SIZE(speed_strings))
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 22207a79762c..e007bc3e8b6e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -281,6 +281,7 @@ enum pci_bus_speed {
 	PCIE_SPEED_8_0GT		= 0x16,
 	PCIE_SPEED_16_0GT		= 0x17,
 	PCIE_SPEED_32_0GT		= 0x18,
+	PCIE_SPEED_64_0GT		= 0x19,
 	PCI_SPEED_UNKNOWN		= 0xff,
 };
 
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index a95d55f9f257..fe9d5dba2ba1 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -531,6 +531,7 @@
 #define  PCI_EXP_LNKCAP_SLS_8_0GB 0x00000003 /* LNKCAP2 SLS Vector bit 2 */
 #define  PCI_EXP_LNKCAP_SLS_16_0GB 0x00000004 /* LNKCAP2 SLS Vector bit 3 */
 #define  PCI_EXP_LNKCAP_SLS_32_0GB 0x00000005 /* LNKCAP2 SLS Vector bit 4 */
+#define  PCI_EXP_LNKCAP_SLS_64_0GB 0x00000006 /* LNKCAP2 SLS Vector bit 5 */
 #define  PCI_EXP_LNKCAP_MLW	0x000003f0 /* Maximum Link Width */
 #define  PCI_EXP_LNKCAP_ASPMS	0x00000c00 /* ASPM Support */
 #define  PCI_EXP_LNKCAP_ASPM_L0S 0x00000400 /* ASPM L0s Support */
@@ -562,6 +563,7 @@
 #define  PCI_EXP_LNKSTA_CLS_8_0GB 0x0003 /* Current Link Speed 8.0GT/s */
 #define  PCI_EXP_LNKSTA_CLS_16_0GB 0x0004 /* Current Link Speed 16.0GT/s */
 #define  PCI_EXP_LNKSTA_CLS_32_0GB 0x0005 /* Current Link Speed 32.0GT/s */
+#define  PCI_EXP_LNKSTA_CLS_64_0GB 0x0006 /* Current Link Speed 64.0GT/s */
 #define  PCI_EXP_LNKSTA_NLW	0x03f0	/* Negotiated Link Width */
 #define  PCI_EXP_LNKSTA_NLW_X1	0x0010	/* Current Link Width x1 */
 #define  PCI_EXP_LNKSTA_NLW_X2	0x0020	/* Current Link Width x2 */
@@ -670,6 +672,7 @@
 #define  PCI_EXP_LNKCAP2_SLS_8_0GB	0x00000008 /* Supported Speed 8GT/s */
 #define  PCI_EXP_LNKCAP2_SLS_16_0GB	0x00000010 /* Supported Speed 16GT/s */
 #define  PCI_EXP_LNKCAP2_SLS_32_0GB	0x00000020 /* Supported Speed 32GT/s */
+#define  PCI_EXP_LNKCAP2_SLS_64_0GB	0x00000040 /* Supported Speed 64GT/s */
 #define  PCI_EXP_LNKCAP2_CROSSLINK	0x00000100 /* Crosslink supported */
 #define PCI_EXP_LNKCTL2		48	/* Link Control 2 */
 #define  PCI_EXP_LNKCTL2_TLS		0x000f
@@ -678,6 +681,7 @@
 #define  PCI_EXP_LNKCTL2_TLS_8_0GT	0x0003 /* Supported Speed 8GT/s */
 #define  PCI_EXP_LNKCTL2_TLS_16_0GT	0x0004 /* Supported Speed 16GT/s */
 #define  PCI_EXP_LNKCTL2_TLS_32_0GT	0x0005 /* Supported Speed 32GT/s */
+#define  PCI_EXP_LNKCTL2_TLS_64_0GT	0x0006 /* Supported Speed 64GT/s */
 #define  PCI_EXP_LNKCTL2_ENTER_COMP	0x0010 /* Enter Compliance */
 #define  PCI_EXP_LNKCTL2_TX_MARGIN	0x0380 /* Transmit Margin */
 #define  PCI_EXP_LNKCTL2_HASD		0x0020 /* HW Autonomous Speed Disable */
-- 
cgit v1.2.3


From 36f4fa6886a81266d7c82b1c90a65205e73a7c85 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 5 Sep 2020 11:14:22 -0600
Subject: io_uring: add support for shutdown(2)

This adds support for the shutdown(2) system call, which is useful for
dealing with sockets.

shutdown(2) may block, so we have to punt it to async context.

Suggested-by: Norman Maurer <norman.maurer@googlemail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                 | 52 +++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/io_uring.h |  1 +
 2 files changed, 53 insertions(+)

(limited to 'include/uapi')

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 3cc1e59dd789..d17198733f6a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -541,6 +541,11 @@ struct io_statx {
 	struct statx __user		*buffer;
 };
 
+struct io_shutdown {
+	struct file			*file;
+	int				how;
+};
+
 struct io_completion {
 	struct file			*file;
 	struct list_head		list;
@@ -667,6 +672,7 @@ struct io_kiocb {
 		struct io_splice	splice;
 		struct io_provide_buf	pbuf;
 		struct io_statx		statx;
+		struct io_shutdown	shutdown;
 		/* use only after cleaning per-op data, see io_clean_op() */
 		struct io_completion	compl;
 	};
@@ -934,6 +940,9 @@ static const struct io_op_def io_op_defs[] = {
 		.hash_reg_file		= 1,
 		.unbound_nonreg_file	= 1,
 	},
+	[IORING_OP_SHUTDOWN] = {
+		.needs_file		= 1,
+	},
 };
 
 enum io_mem_account {
@@ -3591,6 +3600,44 @@ out_free:
 	return ret;
 }
 
+static int io_shutdown_prep(struct io_kiocb *req,
+			    const struct io_uring_sqe *sqe)
+{
+#if defined(CONFIG_NET)
+	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+		return -EINVAL;
+	if (sqe->ioprio || sqe->off || sqe->addr || sqe->rw_flags ||
+	    sqe->buf_index)
+		return -EINVAL;
+
+	req->shutdown.how = READ_ONCE(sqe->len);
+	return 0;
+#else
+	return -EOPNOTSUPP;
+#endif
+}
+
+static int io_shutdown(struct io_kiocb *req, bool force_nonblock)
+{
+#if defined(CONFIG_NET)
+	struct socket *sock;
+	int ret;
+
+	if (force_nonblock)
+		return -EAGAIN;
+
+	sock = sock_from_file(req->file, &ret);
+	if (unlikely(!sock))
+		return ret;
+
+	ret = __sys_shutdown_sock(sock, req->shutdown.how);
+	io_req_complete(req, ret);
+	return 0;
+#else
+	return -EOPNOTSUPP;
+#endif
+}
+
 static int __io_splice_prep(struct io_kiocb *req,
 			    const struct io_uring_sqe *sqe)
 {
@@ -5775,6 +5822,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		return io_remove_buffers_prep(req, sqe);
 	case IORING_OP_TEE:
 		return io_tee_prep(req, sqe);
+	case IORING_OP_SHUTDOWN:
+		return io_shutdown_prep(req, sqe);
 	}
 
 	printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@@ -6018,6 +6067,9 @@ static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock,
 	case IORING_OP_TEE:
 		ret = io_tee(req, force_nonblock);
 		break;
+	case IORING_OP_SHUTDOWN:
+		ret = io_shutdown(req, force_nonblock);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 98d8e06dea22..e943bf07c959 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -132,6 +132,7 @@ enum {
 	IORING_OP_PROVIDE_BUFFERS,
 	IORING_OP_REMOVE_BUFFERS,
 	IORING_OP_TEE,
+	IORING_OP_SHUTDOWN,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
-- 
cgit v1.2.3


From 1d82b7898f2ad9cc414805aef23b99b742218f10 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Thu, 12 Nov 2020 18:53:33 -0800
Subject: arch: move SA_* definitions to generic headers

Most architectures with the exception of alpha, mips, parisc and
sparc use the same values for these flags. Move their definitions into
asm-generic/signal-defs.h and allow the architectures with non-standard
values to override them. Also, document the non-standard flag values
in order to make it easier to add new generic flags in the future.

A consequence of this change is that on powerpc and x86, the constants'
values aside from SA_RESETHAND change signedness from unsigned
to signed. This is not expected to impact realistic use of these
constants. In particular the typical use of the constants where they
are or'ed together and assigned to sa_flags (or another int variable)
would not be affected.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Link: https://linux-review.googlesource.com/id/Ia3849f18b8009bf41faca374e701cdca36974528
Link: https://lkml.kernel.org/r/b6d0d1ec34f9ee93e1105f14f288fba5f89d1f24.1605235762.git.pcc@google.com
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 arch/alpha/include/uapi/asm/signal.h   | 14 ----------
 arch/arm/include/uapi/asm/signal.h     | 27 +++----------------
 arch/h8300/include/uapi/asm/signal.h   | 24 -----------------
 arch/ia64/include/uapi/asm/signal.h    | 24 -----------------
 arch/m68k/include/uapi/asm/signal.h    | 24 -----------------
 arch/mips/include/uapi/asm/signal.h    | 12 ---------
 arch/parisc/include/uapi/asm/signal.h  | 13 ----------
 arch/powerpc/include/uapi/asm/signal.h | 24 -----------------
 arch/s390/include/uapi/asm/signal.h    | 24 -----------------
 arch/sparc/include/uapi/asm/signal.h   |  4 +--
 arch/x86/include/uapi/asm/signal.h     | 24 -----------------
 arch/xtensa/include/uapi/asm/signal.h  | 24 -----------------
 include/uapi/asm-generic/signal-defs.h | 47 ++++++++++++++++++++++++++++++++++
 include/uapi/asm-generic/signal.h      | 29 ---------------------
 14 files changed, 51 insertions(+), 263 deletions(-)

(limited to 'include/uapi')

diff --git a/arch/alpha/include/uapi/asm/signal.h b/arch/alpha/include/uapi/asm/signal.h
index 74c750bf1c1a..a69dd8d080a8 100644
--- a/arch/alpha/include/uapi/asm/signal.h
+++ b/arch/alpha/include/uapi/asm/signal.h
@@ -60,20 +60,6 @@ typedef unsigned long sigset_t;
 #define SIGRTMIN	32
 #define SIGRTMAX	_NSIG
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-
 #define SA_ONSTACK	0x00000001
 #define SA_RESTART	0x00000002
 #define SA_NOCLDSTOP	0x00000004
diff --git a/arch/arm/include/uapi/asm/signal.h b/arch/arm/include/uapi/asm/signal.h
index 9b4185ba4f8a..c9a3ea1d8d41 100644
--- a/arch/arm/include/uapi/asm/signal.h
+++ b/arch/arm/include/uapi/asm/signal.h
@@ -60,33 +60,12 @@ typedef unsigned long sigset_t;
 #define SIGSWI		32
 
 /*
- * SA_FLAGS values:
- *
- * SA_NOCLDSTOP		flag to turn off SIGCHLD when children stop.
- * SA_NOCLDWAIT		flag on SIGCHLD to inhibit zombies.
- * SA_SIGINFO		deliver the signal with SIGINFO structs
- * SA_THIRTYTWO		delivers the signal in 32-bit mode, even if the task 
- *			is running in 26-bit.
- * SA_ONSTACK		allows alternate signal stacks (see sigaltstack(2)).
- * SA_RESTART		flag to get restarting signals (which were the default long ago)
- * SA_NODEFER		prevents the current signal from being masked in the handler.
- * SA_RESETHAND		clears the handler when the signal is delivered.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
+ * SA_THIRTYTWO historically meant deliver the signal in 32-bit mode, even if
+ * the task is running in 26-bit. But since the kernel no longer supports
+ * 26-bit mode, the flag has no effect.
  */
-#define SA_NOCLDSTOP	0x00000001
-#define SA_NOCLDWAIT	0x00000002
-#define SA_SIGINFO	0x00000004
 #define SA_THIRTYTWO	0x02000000
 #define SA_RESTORER	0x04000000
-#define SA_ONSTACK	0x08000000
-#define SA_RESTART	0x10000000
-#define SA_NODEFER	0x40000000
-#define SA_RESETHAND	0x80000000
-
-#define SA_NOMASK	SA_NODEFER
-#define SA_ONESHOT	SA_RESETHAND
 
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
diff --git a/arch/h8300/include/uapi/asm/signal.h b/arch/h8300/include/uapi/asm/signal.h
index e15521037348..2cd0dce2b6a6 100644
--- a/arch/h8300/include/uapi/asm/signal.h
+++ b/arch/h8300/include/uapi/asm/signal.h
@@ -57,30 +57,6 @@ typedef unsigned long sigset_t;
 #define SIGRTMIN	32
 #define SIGRTMAX	_NSIG
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP	0x00000001
-#define SA_NOCLDWAIT	0x00000002 /* not supported yet */
-#define SA_SIGINFO	0x00000004
-#define SA_ONSTACK	0x08000000
-#define SA_RESTART	0x10000000
-#define SA_NODEFER	0x40000000
-#define SA_RESETHAND	0x80000000
-
-#define SA_NOMASK	SA_NODEFER
-#define SA_ONESHOT	SA_RESETHAND
-
 #define SA_RESTORER	0x04000000
 
 #define MINSIGSTKSZ	2048
diff --git a/arch/ia64/include/uapi/asm/signal.h b/arch/ia64/include/uapi/asm/signal.h
index aa98ff1b9e22..38166a88e4c9 100644
--- a/arch/ia64/include/uapi/asm/signal.h
+++ b/arch/ia64/include/uapi/asm/signal.h
@@ -53,30 +53,6 @@
 #define SIGRTMIN	32
 #define SIGRTMAX	_NSIG
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP	0x00000001
-#define SA_NOCLDWAIT	0x00000002
-#define SA_SIGINFO	0x00000004
-#define SA_ONSTACK	0x08000000
-#define SA_RESTART	0x10000000
-#define SA_NODEFER	0x40000000
-#define SA_RESETHAND	0x80000000
-
-#define SA_NOMASK	SA_NODEFER
-#define SA_ONESHOT	SA_RESETHAND
-
 #define SA_RESTORER	0x04000000
 
 /*
diff --git a/arch/m68k/include/uapi/asm/signal.h b/arch/m68k/include/uapi/asm/signal.h
index 915cc755a184..4619291df601 100644
--- a/arch/m68k/include/uapi/asm/signal.h
+++ b/arch/m68k/include/uapi/asm/signal.h
@@ -57,30 +57,6 @@ typedef unsigned long sigset_t;
 #define SIGRTMIN	32
 #define SIGRTMAX	_NSIG
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP	0x00000001
-#define SA_NOCLDWAIT	0x00000002
-#define SA_SIGINFO	0x00000004
-#define SA_ONSTACK	0x08000000
-#define SA_RESTART	0x10000000
-#define SA_NODEFER	0x40000000
-#define SA_RESETHAND	0x80000000
-
-#define SA_NOMASK	SA_NODEFER
-#define SA_ONESHOT	SA_RESETHAND
-
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 
diff --git a/arch/mips/include/uapi/asm/signal.h b/arch/mips/include/uapi/asm/signal.h
index 53104b10aae2..e6c78a15cb2f 100644
--- a/arch/mips/include/uapi/asm/signal.h
+++ b/arch/mips/include/uapi/asm/signal.h
@@ -62,18 +62,6 @@ typedef unsigned long old_sigset_t;		/* at least 32 bits */
 #define SIGRTMAX	_NSIG
 
 /*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- *
  * SA_RESTORER used to be defined as 0x04000000 but only the O32 ABI ever
  * supported its use and no libc was using it, so the entire sa-restorer
  * functionality was removed with lmo commit 39bffc12c3580ab for 2.5.48
diff --git a/arch/parisc/include/uapi/asm/signal.h b/arch/parisc/include/uapi/asm/signal.h
index 9e6f87bc8a73..e5a2657477ac 100644
--- a/arch/parisc/include/uapi/asm/signal.h
+++ b/arch/parisc/include/uapi/asm/signal.h
@@ -41,19 +41,6 @@
 #define SIGRTMIN	32
 #define SIGRTMAX	_NSIG
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
 #define SA_ONSTACK	0x00000001
 #define SA_RESETHAND	0x00000004
 #define SA_NOCLDSTOP	0x00000008
diff --git a/arch/powerpc/include/uapi/asm/signal.h b/arch/powerpc/include/uapi/asm/signal.h
index 85b0a7aa43e7..04873dd311c2 100644
--- a/arch/powerpc/include/uapi/asm/signal.h
+++ b/arch/powerpc/include/uapi/asm/signal.h
@@ -60,30 +60,6 @@ typedef struct {
 #define SIGRTMIN	32
 #define SIGRTMAX	_NSIG
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK is not currently supported, but will allow sigaltstack(2).
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP	0x00000001U
-#define SA_NOCLDWAIT	0x00000002U
-#define SA_SIGINFO	0x00000004U
-#define SA_ONSTACK	0x08000000U
-#define SA_RESTART	0x10000000U
-#define SA_NODEFER	0x40000000U
-#define SA_RESETHAND	0x80000000U
-
-#define SA_NOMASK	SA_NODEFER
-#define SA_ONESHOT	SA_RESETHAND
-
 #define SA_RESTORER	0x04000000U
 
 #define MINSIGSTKSZ	2048
diff --git a/arch/s390/include/uapi/asm/signal.h b/arch/s390/include/uapi/asm/signal.h
index 9a14a611ed82..0189f326aac5 100644
--- a/arch/s390/include/uapi/asm/signal.h
+++ b/arch/s390/include/uapi/asm/signal.h
@@ -65,30 +65,6 @@ typedef unsigned long sigset_t;
 #define SIGRTMIN        32
 #define SIGRTMAX        _NSIG
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP    0x00000001
-#define SA_NOCLDWAIT    0x00000002
-#define SA_SIGINFO      0x00000004
-#define SA_ONSTACK      0x08000000
-#define SA_RESTART      0x10000000
-#define SA_NODEFER      0x40000000
-#define SA_RESETHAND    0x80000000
-
-#define SA_NOMASK       SA_NODEFER
-#define SA_ONESHOT      SA_RESETHAND
-
 #define SA_RESTORER     0x04000000
 
 #define MINSIGSTKSZ     2048
diff --git a/arch/sparc/include/uapi/asm/signal.h b/arch/sparc/include/uapi/asm/signal.h
index ff9505923b9a..53758d53ac0e 100644
--- a/arch/sparc/include/uapi/asm/signal.h
+++ b/arch/sparc/include/uapi/asm/signal.h
@@ -137,13 +137,11 @@ struct sigstack {
 #define SA_STACK	_SV_SSTACK
 #define SA_ONSTACK	_SV_SSTACK
 #define SA_RESTART	_SV_INTR
-#define SA_ONESHOT	_SV_RESET
+#define SA_RESETHAND	_SV_RESET
 #define SA_NODEFER	0x20u
 #define SA_NOCLDWAIT    0x100u
 #define SA_SIGINFO      0x200u
 
-#define SA_NOMASK	SA_NODEFER
-
 #define SIG_BLOCK          0x01	/* for blocking signals */
 #define SIG_UNBLOCK        0x02	/* for unblocking signals */
 #define SIG_SETMASK        0x04	/* for setting the signal mask */
diff --git a/arch/x86/include/uapi/asm/signal.h b/arch/x86/include/uapi/asm/signal.h
index e5745d593dc7..164a22a72984 100644
--- a/arch/x86/include/uapi/asm/signal.h
+++ b/arch/x86/include/uapi/asm/signal.h
@@ -62,30 +62,6 @@ typedef unsigned long sigset_t;
 #define SIGRTMIN	32
 #define SIGRTMAX	_NSIG
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP	0x00000001u
-#define SA_NOCLDWAIT	0x00000002u
-#define SA_SIGINFO	0x00000004u
-#define SA_ONSTACK	0x08000000u
-#define SA_RESTART	0x10000000u
-#define SA_NODEFER	0x40000000u
-#define SA_RESETHAND	0x80000000u
-
-#define SA_NOMASK	SA_NODEFER
-#define SA_ONESHOT	SA_RESETHAND
-
 #define SA_RESTORER	0x04000000
 
 #define MINSIGSTKSZ	2048
diff --git a/arch/xtensa/include/uapi/asm/signal.h b/arch/xtensa/include/uapi/asm/signal.h
index 005dec5bfde4..79ddabaa4e5d 100644
--- a/arch/xtensa/include/uapi/asm/signal.h
+++ b/arch/xtensa/include/uapi/asm/signal.h
@@ -72,30 +72,6 @@ typedef struct {
 #define SIGRTMIN	32
 #define SIGRTMAX	(_NSIG-1)
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP	0x00000001
-#define SA_NOCLDWAIT	0x00000002 /* not supported yet */
-#define SA_SIGINFO	0x00000004
-#define SA_ONSTACK	0x08000000
-#define SA_RESTART	0x10000000
-#define SA_NODEFER	0x40000000
-#define SA_RESETHAND	0x80000000
-
-#define SA_NOMASK	SA_NODEFER
-#define SA_ONESHOT	SA_RESETHAND
-
 #define SA_RESTORER	0x04000000
 
 #define MINSIGSTKSZ	2048
diff --git a/include/uapi/asm-generic/signal-defs.h b/include/uapi/asm-generic/signal-defs.h
index e9304c95ceea..493953fe319b 100644
--- a/include/uapi/asm-generic/signal-defs.h
+++ b/include/uapi/asm-generic/signal-defs.h
@@ -4,6 +4,53 @@
 
 #include <linux/compiler.h>
 
+/*
+ * SA_FLAGS values:
+ *
+ * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
+ * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
+ * SA_SIGINFO delivers the signal with SIGINFO structs.
+ * SA_ONSTACK indicates that a registered stack_t will be used.
+ * SA_RESTART flag to get restarting signals (which were the default long ago)
+ * SA_NODEFER prevents the current signal from being masked in the handler.
+ * SA_RESETHAND clears the handler when the signal is delivered.
+ *
+ * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
+ * Unix names RESETHAND and NODEFER respectively.
+ *
+ * The following bits are used in architecture-specific SA_* definitions and
+ * should be avoided for new generic flags: 3, 4, 5, 6, 7, 8, 9, 16, 24, 25, 26.
+ */
+#ifndef SA_NOCLDSTOP
+#define SA_NOCLDSTOP	0x00000001
+#endif
+#ifndef SA_NOCLDWAIT
+#define SA_NOCLDWAIT	0x00000002
+#endif
+#ifndef SA_SIGINFO
+#define SA_SIGINFO	0x00000004
+#endif
+#ifndef SA_ONSTACK
+#define SA_ONSTACK	0x08000000
+#endif
+#ifndef SA_RESTART
+#define SA_RESTART	0x10000000
+#endif
+#ifndef SA_NODEFER
+#define SA_NODEFER	0x40000000
+#endif
+#ifndef SA_RESETHAND
+#define SA_RESETHAND	0x80000000
+#endif
+
+#define SA_NOMASK	SA_NODEFER
+#define SA_ONESHOT	SA_RESETHAND
+
+/*
+ * New architectures should not define the obsolete
+ *	SA_RESTORER	0x04000000
+ */
+
 #ifndef SIG_BLOCK
 #define SIG_BLOCK          0	/* for blocking signals */
 #endif
diff --git a/include/uapi/asm-generic/signal.h b/include/uapi/asm-generic/signal.h
index 5c716a952cbe..f634822906e4 100644
--- a/include/uapi/asm-generic/signal.h
+++ b/include/uapi/asm-generic/signal.h
@@ -52,35 +52,6 @@
 #define SIGRTMAX	_NSIG
 #endif
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP	0x00000001
-#define SA_NOCLDWAIT	0x00000002
-#define SA_SIGINFO	0x00000004
-#define SA_ONSTACK	0x08000000
-#define SA_RESTART	0x10000000
-#define SA_NODEFER	0x40000000
-#define SA_RESETHAND	0x80000000
-
-#define SA_NOMASK	SA_NODEFER
-#define SA_ONESHOT	SA_RESETHAND
-
-/*
- * New architectures should not define the obsolete
- *	SA_RESTORER	0x04000000
- */
-
 #if !defined MINSIGSTKSZ || !defined SIGSTKSZ
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
-- 
cgit v1.2.3


From 7da5082a2f9a1d16eded00c204fdb52a855c2bb2 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Mon, 16 Nov 2020 19:17:24 -0800
Subject: arch: provide better documentation for the arch-specific SA_* flags

Instead of documenting the arch-specific flag values in a comment at
the top where they may be easily overlooked, document them in comments
inline with the definitions in numerical order so that it is clear
why specific values must be chosen for new generic flags and to reduce
the likelihood of conflicts between generic and arch-specific flags.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Link: https://linux-review.googlesource.com/id/I40a129cf7c3a71ba1bfd6d936c544072ee3b7ce6
Link: https://lkml.kernel.org/r/198c8b68c76bf3ed73117d817c7cdf9bc0eb174f.1605582887.git.pcc@google.com
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/uapi/asm-generic/signal-defs.h | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/asm-generic/signal-defs.h b/include/uapi/asm-generic/signal-defs.h
index 493953fe319b..44f070982752 100644
--- a/include/uapi/asm-generic/signal-defs.h
+++ b/include/uapi/asm-generic/signal-defs.h
@@ -17,9 +17,6 @@
  *
  * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
  * Unix names RESETHAND and NODEFER respectively.
- *
- * The following bits are used in architecture-specific SA_* definitions and
- * should be avoided for new generic flags: 3, 4, 5, 6, 7, 8, 9, 16, 24, 25, 26.
  */
 #ifndef SA_NOCLDSTOP
 #define SA_NOCLDSTOP	0x00000001
@@ -30,6 +27,20 @@
 #ifndef SA_SIGINFO
 #define SA_SIGINFO	0x00000004
 #endif
+/* 0x00000008 used on alpha, mips, parisc */
+/* 0x00000010 used on alpha, parisc */
+/* 0x00000020 used on alpha, parisc, sparc */
+/* 0x00000040 used on alpha, parisc */
+/* 0x00000080 used on parisc */
+/* 0x00000100 used on sparc */
+/* 0x00000200 used on sparc */
+/* 0x00010000 used on mips */
+/* 0x01000000 used on x86 */
+/* 0x02000000 used on x86 */
+/*
+ * New architectures should not define the obsolete
+ *	SA_RESTORER	0x04000000
+ */
 #ifndef SA_ONSTACK
 #define SA_ONSTACK	0x08000000
 #endif
@@ -46,11 +57,6 @@
 #define SA_NOMASK	SA_NODEFER
 #define SA_ONESHOT	SA_RESETHAND
 
-/*
- * New architectures should not define the obsolete
- *	SA_RESTORER	0x04000000
- */
-
 #ifndef SIG_BLOCK
 #define SIG_BLOCK          0	/* for blocking signals */
 #endif
-- 
cgit v1.2.3


From a54f0dfda754c5cecc89a14dab68a3edc1e497b5 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Mon, 16 Nov 2020 19:17:25 -0800
Subject: signal: define the SA_UNSUPPORTED bit in sa_flags

Define a sa_flags bit, SA_UNSUPPORTED, which will never be supported
in the uapi. The purpose of this flag bit is to allow userspace to
distinguish an old kernel that does not clear unknown sa_flags bits
from a kernel that supports every flag bit.

In other words, if userspace does something like:

  act.sa_flags |= SA_UNSUPPORTED;
  sigaction(SIGSEGV, &act, 0);
  sigaction(SIGSEGV, 0, &oldact);

and finds that SA_UNSUPPORTED remains set in oldact.sa_flags, it means
that the kernel cannot be trusted to have cleared unknown flag bits
from sa_flags, so no assumptions about flag bit support can be made.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Link: https://linux-review.googlesource.com/id/Ic2501ad150a3a79c1cf27fb8c99be342e9dffbcb
Link: https://lkml.kernel.org/r/bda7ddff8895a9bc4ffc5f3cf3d4d37a32118077.1605582887.git.pcc@google.com
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/uapi/asm-generic/signal-defs.h | 7 +++++++
 kernel/signal.c                        | 6 ++++++
 2 files changed, 13 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/asm-generic/signal-defs.h b/include/uapi/asm-generic/signal-defs.h
index 44f070982752..c790f67304ba 100644
--- a/include/uapi/asm-generic/signal-defs.h
+++ b/include/uapi/asm-generic/signal-defs.h
@@ -14,6 +14,12 @@
  * SA_RESTART flag to get restarting signals (which were the default long ago)
  * SA_NODEFER prevents the current signal from being masked in the handler.
  * SA_RESETHAND clears the handler when the signal is delivered.
+ * SA_UNSUPPORTED is a flag bit that will never be supported. Kernels from
+ * before the introduction of SA_UNSUPPORTED did not clear unknown bits from
+ * sa_flags when read using the oldact argument to sigaction and rt_sigaction,
+ * so this bit allows flag bit support to be detected from userspace while
+ * allowing an old kernel to be distinguished from a kernel that supports every
+ * flag bit.
  *
  * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
  * Unix names RESETHAND and NODEFER respectively.
@@ -34,6 +40,7 @@
 /* 0x00000080 used on parisc */
 /* 0x00000100 used on sparc */
 /* 0x00000200 used on sparc */
+#define SA_UNSUPPORTED	0x00000400
 /* 0x00010000 used on mips */
 /* 0x01000000 used on x86 */
 /* 0x02000000 used on x86 */
diff --git a/kernel/signal.c b/kernel/signal.c
index 8f5bd12ee41b..8f34819e80de 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3985,6 +3985,12 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
 	if (oact)
 		*oact = *k;
 
+	/*
+	 * Make sure that we never accidentally claim to support SA_UNSUPPORTED,
+	 * e.g. by having an architecture use the bit in their uapi.
+	 */
+	BUILD_BUG_ON(UAPI_SA_FLAGS & SA_UNSUPPORTED);
+
 	/*
 	 * Clear unknown flag bits in order to allow userspace to detect missing
 	 * support for flag bits and to allow the kernel to use non-uapi bits
-- 
cgit v1.2.3


From 6ac05e832a9e96f9b1c42a8917cdd317d7b6c8fa Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Fri, 20 Nov 2020 12:33:45 -0800
Subject: signal: define the SA_EXPOSE_TAGBITS bit in sa_flags

Architectures that support address tagging, such as arm64, may want to
expose fault address tag bits to the signal handler to help diagnose
memory errors. However, these bits have not been previously set,
and their presence may confuse unaware user applications. Therefore,
introduce a SA_EXPOSE_TAGBITS flag bit in sa_flags that a signal
handler may use to explicitly request that the bits are set.

The generic signal handler APIs expect to receive tagged addresses.
Architectures may specify how to untag addresses in the case where
SA_EXPOSE_TAGBITS is clear by defining the arch_untagged_si_addr
function.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Link: https://linux-review.googlesource.com/id/I16dd0ed2081f091fce97be0190cb8caa874c26cb
Link: https://lkml.kernel.org/r/13cf24d00ebdd8e1f55caf1821c7c29d54100191.1605904350.git.pcc@google.com
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/signal.h                 | 14 ++++++++++++++
 include/linux/signal_types.h           |  2 +-
 include/uapi/asm-generic/signal-defs.h |  3 +++
 kernel/signal.c                        | 24 ++++++++++++++++++++++++
 4 files changed, 42 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index b256f9c65661..205526c4003a 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -469,4 +469,18 @@ struct seq_file;
 extern void render_sigset_t(struct seq_file *, const char *, sigset_t *);
 #endif
 
+#ifndef arch_untagged_si_addr
+/*
+ * Given a fault address and a signal and si_code which correspond to the
+ * _sigfault union member, returns the address that must appear in si_addr if
+ * the signal handler does not have SA_EXPOSE_TAGBITS enabled in sa_flags.
+ */
+static inline void __user *arch_untagged_si_addr(void __user *addr,
+						 unsigned long sig,
+						 unsigned long si_code)
+{
+	return addr;
+}
+#endif
+
 #endif /* _LINUX_SIGNAL_H */
diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h
index a7887ad84d36..68e06c75c5b2 100644
--- a/include/linux/signal_types.h
+++ b/include/linux/signal_types.h
@@ -78,6 +78,6 @@ struct ksignal {
 
 #define UAPI_SA_FLAGS                                                          \
 	(SA_NOCLDSTOP | SA_NOCLDWAIT | SA_SIGINFO | SA_ONSTACK | SA_RESTART |  \
-	 SA_NODEFER | SA_RESETHAND | __ARCH_UAPI_SA_FLAGS)
+	 SA_NODEFER | SA_RESETHAND | SA_EXPOSE_TAGBITS | __ARCH_UAPI_SA_FLAGS)
 
 #endif /* _LINUX_SIGNAL_TYPES_H */
diff --git a/include/uapi/asm-generic/signal-defs.h b/include/uapi/asm-generic/signal-defs.h
index c790f67304ba..fe929e7b77ca 100644
--- a/include/uapi/asm-generic/signal-defs.h
+++ b/include/uapi/asm-generic/signal-defs.h
@@ -20,6 +20,8 @@
  * so this bit allows flag bit support to be detected from userspace while
  * allowing an old kernel to be distinguished from a kernel that supports every
  * flag bit.
+ * SA_EXPOSE_TAGBITS exposes an architecture-defined set of tag bits in
+ * siginfo.si_addr.
  *
  * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
  * Unix names RESETHAND and NODEFER respectively.
@@ -41,6 +43,7 @@
 /* 0x00000100 used on sparc */
 /* 0x00000200 used on sparc */
 #define SA_UNSUPPORTED	0x00000400
+#define SA_EXPOSE_TAGBITS	0x00000800
 /* 0x00010000 used on mips */
 /* 0x01000000 used on x86 */
 /* 0x02000000 used on x86 */
diff --git a/kernel/signal.c b/kernel/signal.c
index 8f34819e80de..26018c59821d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2524,6 +2524,26 @@ static int ptrace_signal(int signr, kernel_siginfo_t *info)
 	return signr;
 }
 
+static void hide_si_addr_tag_bits(struct ksignal *ksig)
+{
+	switch (siginfo_layout(ksig->sig, ksig->info.si_code)) {
+	case SIL_FAULT:
+	case SIL_FAULT_MCEERR:
+	case SIL_FAULT_BNDERR:
+	case SIL_FAULT_PKUERR:
+		ksig->info.si_addr = arch_untagged_si_addr(
+			ksig->info.si_addr, ksig->sig, ksig->info.si_code);
+		break;
+	case SIL_KILL:
+	case SIL_TIMER:
+	case SIL_POLL:
+	case SIL_CHLD:
+	case SIL_RT:
+	case SIL_SYS:
+		break;
+	}
+}
+
 bool get_signal(struct ksignal *ksig)
 {
 	struct sighand_struct *sighand = current->sighand;
@@ -2761,6 +2781,10 @@ relock:
 	spin_unlock_irq(&sighand->siglock);
 
 	ksig->sig = signr;
+
+	if (!(ksig->ka.sa.sa_flags & SA_EXPOSE_TAGBITS))
+		hide_si_addr_tag_bits(ksig);
+
 	return ksig->sig > 0;
 }
 
-- 
cgit v1.2.3


From bde493349025ca0559e2fff88592935af3b8df19 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 13 Nov 2020 13:19:18 -0800
Subject: fs-verity: move structs needed for file signing to UAPI header

Although it isn't used directly by the ioctls,
"struct fsverity_descriptor" is required by userspace programs that need
to compute fs-verity file digests in a standalone way.  Therefore
it's also needed to sign files in a standalone way.

Similarly, "struct fsverity_formatted_digest" (previously called
"struct fsverity_signed_digest" which was misleading) is also needed to
sign files if the built-in signature verification is being used.

Therefore, move these structs to the UAPI header.

While doing this, try to make it clear that the signature-related fields
in fsverity_descriptor aren't used in the file digest computation.

Acked-by: Luca Boccassi <luca.boccassi@microsoft.com>
Link: https://lore.kernel.org/r/20201113211918.71883-5-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 Documentation/filesystems/fsverity.rst |  6 +----
 fs/verity/fsverity_private.h           | 37 -------------------------
 include/uapi/linux/fsverity.h          | 49 ++++++++++++++++++++++++++++++++++
 3 files changed, 50 insertions(+), 42 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/filesystems/fsverity.rst b/Documentation/filesystems/fsverity.rst
index 2eee558b7f5f..e0204a23e997 100644
--- a/Documentation/filesystems/fsverity.rst
+++ b/Documentation/filesystems/fsverity.rst
@@ -334,17 +334,13 @@ root hash as well as other fields such as the file size::
             __u8 hash_algorithm;    /* Merkle tree hash algorithm */
             __u8 log_blocksize;     /* log2 of size of data and tree blocks */
             __u8 salt_size;         /* size of salt in bytes; 0 if none */
-            __le32 sig_size;        /* must be 0 */
+            __le32 __reserved_0x04; /* must be 0 */
             __le64 data_size;       /* size of file the Merkle tree is built over */
             __u8 root_hash[64];     /* Merkle tree root hash */
             __u8 salt[32];          /* salt prepended to each hashed block */
             __u8 __reserved[144];   /* must be 0's */
     };
 
-Note that the ``sig_size`` field must be set to 0 for the purpose of
-computing the file measurement, even if a signature was provided (or
-will be provided) to `FS_IOC_ENABLE_VERITY`_.
-
 Built-in signature verification
 ===============================
 
diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h
index 21e9930d65fb..96f7b332f54f 100644
--- a/fs/verity/fsverity_private.h
+++ b/fs/verity/fsverity_private.h
@@ -77,49 +77,12 @@ struct fsverity_info {
 	const struct inode *inode;
 };
 
-/*
- * Merkle tree properties.  The fs-verity file digest is the hash of this
- * structure excluding the signature and with the sig_size field set to 0.
- */
-struct fsverity_descriptor {
-	__u8 version;		/* must be 1 */
-	__u8 hash_algorithm;	/* Merkle tree hash algorithm */
-	__u8 log_blocksize;	/* log2 of size of data and tree blocks */
-	__u8 salt_size;		/* size of salt in bytes; 0 if none */
-	__le32 sig_size;	/* size of signature in bytes; 0 if none */
-	__le64 data_size;	/* size of file the Merkle tree is built over */
-	__u8 root_hash[64];	/* Merkle tree root hash */
-	__u8 salt[32];		/* salt prepended to each hashed block */
-	__u8 __reserved[144];	/* must be 0's */
-	__u8 signature[];	/* optional PKCS#7 signature */
-};
-
 /* Arbitrary limit to bound the kmalloc() size.  Can be changed. */
 #define FS_VERITY_MAX_DESCRIPTOR_SIZE	16384
 
 #define FS_VERITY_MAX_SIGNATURE_SIZE	(FS_VERITY_MAX_DESCRIPTOR_SIZE - \
 					 sizeof(struct fsverity_descriptor))
 
-/*
- * Format in which fs-verity file digests are signed in built-in signatures.
- * This is the same as 'struct fsverity_digest', except here some magic bytes
- * are prepended to provide some context about what is being signed in case the
- * same key is used for non-fsverity purposes, and here the fields have fixed
- * endianness.
- *
- * This struct is specific to the built-in signature verification support, which
- * is optional.  fs-verity users may also verify signatures in userspace, in
- * which case userspace is responsible for deciding on what bytes are signed.
- * This struct may still be used, but it doesn't have to be.  For example,
- * userspace could instead use a string like "sha256:$digest_as_hex_string".
- */
-struct fsverity_formatted_digest {
-	char magic[8];			/* must be "FSVerity" */
-	__le16 digest_algorithm;
-	__le16 digest_size;
-	__u8 digest[];
-};
-
 /* hash_algs.c */
 
 extern struct fsverity_hash_alg fsverity_hash_algs[];
diff --git a/include/uapi/linux/fsverity.h b/include/uapi/linux/fsverity.h
index da0daf6c193b..33f44156f8ea 100644
--- a/include/uapi/linux/fsverity.h
+++ b/include/uapi/linux/fsverity.h
@@ -34,6 +34,55 @@ struct fsverity_digest {
 	__u8 digest[];
 };
 
+/*
+ * Struct containing a file's Merkle tree properties.  The fs-verity file digest
+ * is the hash of this struct.  A userspace program needs this struct only if it
+ * needs to compute fs-verity file digests itself, e.g. in order to sign files.
+ * It isn't needed just to enable fs-verity on a file.
+ *
+ * Note: when computing the file digest, 'sig_size' and 'signature' must be left
+ * zero and empty, respectively.  These fields are present only because some
+ * filesystems reuse this struct as part of their on-disk format.
+ */
+struct fsverity_descriptor {
+	__u8 version;		/* must be 1 */
+	__u8 hash_algorithm;	/* Merkle tree hash algorithm */
+	__u8 log_blocksize;	/* log2 of size of data and tree blocks */
+	__u8 salt_size;		/* size of salt in bytes; 0 if none */
+#ifdef __KERNEL__
+	__le32 sig_size;
+#else
+	__le32 __reserved_0x04;	/* must be 0 */
+#endif
+	__le64 data_size;	/* size of file the Merkle tree is built over */
+	__u8 root_hash[64];	/* Merkle tree root hash */
+	__u8 salt[32];		/* salt prepended to each hashed block */
+	__u8 __reserved[144];	/* must be 0's */
+#ifdef __KERNEL__
+	__u8 signature[];
+#endif
+};
+
+/*
+ * Format in which fs-verity file digests are signed in built-in signatures.
+ * This is the same as 'struct fsverity_digest', except here some magic bytes
+ * are prepended to provide some context about what is being signed in case the
+ * same key is used for non-fsverity purposes, and here the fields have fixed
+ * endianness.
+ *
+ * This struct is specific to the built-in signature verification support, which
+ * is optional.  fs-verity users may also verify signatures in userspace, in
+ * which case userspace is responsible for deciding on what bytes are signed.
+ * This struct may still be used, but it doesn't have to be.  For example,
+ * userspace could instead use a string like "sha256:$digest_as_hex_string".
+ */
+struct fsverity_formatted_digest {
+	char magic[8];			/* must be "FSVerity" */
+	__le16 digest_algorithm;
+	__le16 digest_size;
+	__u8 digest[];
+};
+
 #define FS_IOC_ENABLE_VERITY	_IOW('f', 133, struct fsverity_enable_arg)
 #define FS_IOC_MEASURE_VERITY	_IOWR('f', 134, struct fsverity_digest)
 
-- 
cgit v1.2.3


From 6bef038011a023db41f1b33f0776224729d52344 Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Fri, 20 Nov 2020 14:42:38 -0700
Subject: rpmsg: Introduce __rpmsg{16|32|64} types

Introduce __rpmsg{16|32|64} types along with byte order conversion
functions based on an rpmsg_device operation as a foundation to
make RPMSG modular and transport agnostic.

Tested-by: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
Suggested-by: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Reviewed-by: Arnaud Pouliquen <arnaud.pouliquen@st.com>
Reviewed-by: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
Link: https://lore.kernel.org/r/20201120214245.172963-2-mathieu.poirier@linaro.org
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 include/linux/rpmsg.h            | 51 ++++++++++++++++++++++++++++++
 include/linux/rpmsg/byteorder.h  | 67 ++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/rpmsg_types.h | 11 +++++++
 3 files changed, 129 insertions(+)
 create mode 100644 include/linux/rpmsg/byteorder.h
 create mode 100644 include/uapi/linux/rpmsg_types.h

(limited to 'include/uapi')

diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h
index 9fe156d1c018..faf2daff6238 100644
--- a/include/linux/rpmsg.h
+++ b/include/linux/rpmsg.h
@@ -17,6 +17,7 @@
 #include <linux/kref.h>
 #include <linux/mutex.h>
 #include <linux/poll.h>
+#include <linux/rpmsg/byteorder.h>
 
 #define RPMSG_ADDR_ANY		0xFFFFFFFF
 
@@ -46,6 +47,7 @@ struct rpmsg_channel_info {
  * @dst: destination address
  * @ept: the rpmsg endpoint of this channel
  * @announce: if set, rpmsg will announce the creation/removal of this channel
+ * @little_endian: True if transport is using little endian byte representation
  */
 struct rpmsg_device {
 	struct device dev;
@@ -55,6 +57,7 @@ struct rpmsg_device {
 	u32 dst;
 	struct rpmsg_endpoint *ept;
 	bool announce;
+	bool little_endian;
 
 	const struct rpmsg_device_ops *ops;
 };
@@ -111,6 +114,54 @@ struct rpmsg_driver {
 	int (*callback)(struct rpmsg_device *, void *, int, void *, u32);
 };
 
+static inline u16 rpmsg16_to_cpu(struct rpmsg_device *rpdev, __rpmsg16 val)
+{
+	if (!rpdev)
+		return __rpmsg16_to_cpu(rpmsg_is_little_endian(), val);
+	else
+		return __rpmsg16_to_cpu(rpdev->little_endian, val);
+}
+
+static inline __rpmsg16 cpu_to_rpmsg16(struct rpmsg_device *rpdev, u16 val)
+{
+	if (!rpdev)
+		return __cpu_to_rpmsg16(rpmsg_is_little_endian(), val);
+	else
+		return __cpu_to_rpmsg16(rpdev->little_endian, val);
+}
+
+static inline u32 rpmsg32_to_cpu(struct rpmsg_device *rpdev, __rpmsg32 val)
+{
+	if (!rpdev)
+		return __rpmsg32_to_cpu(rpmsg_is_little_endian(), val);
+	else
+		return __rpmsg32_to_cpu(rpdev->little_endian, val);
+}
+
+static inline __rpmsg32 cpu_to_rpmsg32(struct rpmsg_device *rpdev, u32 val)
+{
+	if (!rpdev)
+		return __cpu_to_rpmsg32(rpmsg_is_little_endian(), val);
+	else
+		return __cpu_to_rpmsg32(rpdev->little_endian, val);
+}
+
+static inline u64 rpmsg64_to_cpu(struct rpmsg_device *rpdev, __rpmsg64 val)
+{
+	if (!rpdev)
+		return __rpmsg64_to_cpu(rpmsg_is_little_endian(), val);
+	else
+		return __rpmsg64_to_cpu(rpdev->little_endian, val);
+}
+
+static inline __rpmsg64 cpu_to_rpmsg64(struct rpmsg_device *rpdev, u64 val)
+{
+	if (!rpdev)
+		return __cpu_to_rpmsg64(rpmsg_is_little_endian(), val);
+	else
+		return __cpu_to_rpmsg64(rpdev->little_endian, val);
+}
+
 #if IS_ENABLED(CONFIG_RPMSG)
 
 int register_rpmsg_device(struct rpmsg_device *dev);
diff --git a/include/linux/rpmsg/byteorder.h b/include/linux/rpmsg/byteorder.h
new file mode 100644
index 000000000000..c0f565dbad6d
--- /dev/null
+++ b/include/linux/rpmsg/byteorder.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Follows implementation found in linux/virtio_byteorder.h
+ */
+#ifndef _LINUX_RPMSG_BYTEORDER_H
+#define _LINUX_RPMSG_BYTEORDER_H
+#include <linux/types.h>
+#include <uapi/linux/rpmsg_types.h>
+
+static inline bool rpmsg_is_little_endian(void)
+{
+#ifdef __LITTLE_ENDIAN
+	return true;
+#else
+	return false;
+#endif
+}
+
+static inline u16 __rpmsg16_to_cpu(bool little_endian, __rpmsg16 val)
+{
+	if (little_endian)
+		return le16_to_cpu((__force __le16)val);
+	else
+		return be16_to_cpu((__force __be16)val);
+}
+
+static inline __rpmsg16 __cpu_to_rpmsg16(bool little_endian, u16 val)
+{
+	if (little_endian)
+		return (__force __rpmsg16)cpu_to_le16(val);
+	else
+		return (__force __rpmsg16)cpu_to_be16(val);
+}
+
+static inline u32 __rpmsg32_to_cpu(bool little_endian, __rpmsg32 val)
+{
+	if (little_endian)
+		return le32_to_cpu((__force __le32)val);
+	else
+		return be32_to_cpu((__force __be32)val);
+}
+
+static inline __rpmsg32 __cpu_to_rpmsg32(bool little_endian, u32 val)
+{
+	if (little_endian)
+		return (__force __rpmsg32)cpu_to_le32(val);
+	else
+		return (__force __rpmsg32)cpu_to_be32(val);
+}
+
+static inline u64 __rpmsg64_to_cpu(bool little_endian, __rpmsg64 val)
+{
+	if (little_endian)
+		return le64_to_cpu((__force __le64)val);
+	else
+		return be64_to_cpu((__force __be64)val);
+}
+
+static inline __rpmsg64 __cpu_to_rpmsg64(bool little_endian, u64 val)
+{
+	if (little_endian)
+		return (__force __rpmsg64)cpu_to_le64(val);
+	else
+		return (__force __rpmsg64)cpu_to_be64(val);
+}
+
+#endif /* _LINUX_RPMSG_BYTEORDER_H */
diff --git a/include/uapi/linux/rpmsg_types.h b/include/uapi/linux/rpmsg_types.h
new file mode 100644
index 000000000000..36e3b9404391
--- /dev/null
+++ b/include/uapi/linux/rpmsg_types.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_RPMSG_TYPES_H
+#define _UAPI_LINUX_RPMSG_TYPES_H
+
+#include <linux/types.h>
+
+typedef __u16 __bitwise __rpmsg16;
+typedef __u32 __bitwise __rpmsg32;
+typedef __u64 __bitwise __rpmsg64;
+
+#endif /* _UAPI_LINUX_RPMSG_TYPES_H */
-- 
cgit v1.2.3


From 5204bb683c1633e550c2124ccc2358dd645a80db Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Mon, 23 Nov 2020 07:36:25 +0200
Subject: devlink: Fix reload stats structure

Fix reload stats structure exposed to the user. Change stats structure
hierarchy to have the reload action as a parent of the stat entry and
then stat entry includes value per limit. This will also help to avoid
string concatenation on iproute2 output.

Reload stats structure before this fix:
"stats": {
    "reload": {
        "driver_reinit": 2,
        "fw_activate": 1,
        "fw_activate_no_reset": 0
     }
}

After this fix:
"stats": {
    "reload": {
        "driver_reinit": {
            "unspecified": 2
        },
        "fw_activate": {
            "unspecified": 1,
            "no_reset": 0
        }
}

Fixes: a254c264267e ("devlink: Add reload stats")
Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/1606109785-25197-1-git-send-email-moshe@mellanox.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/devlink.h |  2 ++
 net/core/devlink.c           | 49 +++++++++++++++++++++++++++++---------------
 2 files changed, 35 insertions(+), 16 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 0113bc4db9f5..5203f54a2be1 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -526,6 +526,8 @@ enum devlink_attr {
 	DEVLINK_ATTR_RELOAD_STATS_LIMIT,	/* u8 */
 	DEVLINK_ATTR_RELOAD_STATS_VALUE,	/* u32 */
 	DEVLINK_ATTR_REMOTE_RELOAD_STATS,	/* nested */
+	DEVLINK_ATTR_RELOAD_ACTION_INFO,        /* nested */
+	DEVLINK_ATTR_RELOAD_ACTION_STATS,       /* nested */
 
 	/* add new attributes above here, update the policy in devlink.c */
 
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 4b0211590aac..c91e15b7a2bd 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -517,7 +517,7 @@ devlink_reload_limit_is_supported(struct devlink *devlink, enum devlink_reload_l
 	return test_bit(limit, &devlink->ops->reload_limits);
 }
 
-static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_action action,
+static int devlink_reload_stat_put(struct sk_buff *msg,
 				   enum devlink_reload_limit limit, u32 value)
 {
 	struct nlattr *reload_stats_entry;
@@ -526,8 +526,7 @@ static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_acti
 	if (!reload_stats_entry)
 		return -EMSGSIZE;
 
-	if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, action) ||
-	    nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) ||
+	if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) ||
 	    nla_put_u32(msg, DEVLINK_ATTR_RELOAD_STATS_VALUE, value))
 		goto nla_put_failure;
 	nla_nest_end(msg, reload_stats_entry);
@@ -540,7 +539,7 @@ nla_put_failure:
 
 static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink, bool is_remote)
 {
-	struct nlattr *reload_stats_attr;
+	struct nlattr *reload_stats_attr, *act_info, *act_stats;
 	int i, j, stat_idx;
 	u32 value;
 
@@ -552,17 +551,29 @@ static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink
 	if (!reload_stats_attr)
 		return -EMSGSIZE;
 
-	for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) {
-		/* Remote stats are shown even if not locally supported. Stats
-		 * of actions with unspecified limit are shown though drivers
-		 * don't need to register unspecified limit.
-		 */
-		if (!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC &&
-		    !devlink_reload_limit_is_supported(devlink, j))
+	for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) {
+		if ((!is_remote &&
+		     !devlink_reload_action_is_supported(devlink, i)) ||
+		    i == DEVLINK_RELOAD_ACTION_UNSPEC)
 			continue;
-		for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) {
-			if ((!is_remote && !devlink_reload_action_is_supported(devlink, i)) ||
-			    i == DEVLINK_RELOAD_ACTION_UNSPEC ||
+		act_info = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_INFO);
+		if (!act_info)
+			goto nla_put_failure;
+
+		if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, i))
+			goto action_info_nest_cancel;
+		act_stats = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_STATS);
+		if (!act_stats)
+			goto action_info_nest_cancel;
+
+		for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) {
+			/* Remote stats are shown even if not locally supported.
+			 * Stats of actions with unspecified limit are shown
+			 * though drivers don't need to register unspecified
+			 * limit.
+			 */
+			if ((!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC &&
+			     !devlink_reload_limit_is_supported(devlink, j)) ||
 			    devlink_reload_combination_is_invalid(i, j))
 				continue;
 
@@ -571,13 +582,19 @@ static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink
 				value = devlink->stats.reload_stats[stat_idx];
 			else
 				value = devlink->stats.remote_reload_stats[stat_idx];
-			if (devlink_reload_stat_put(msg, i, j, value))
-				goto nla_put_failure;
+			if (devlink_reload_stat_put(msg, j, value))
+				goto action_stats_nest_cancel;
 		}
+		nla_nest_end(msg, act_stats);
+		nla_nest_end(msg, act_info);
 	}
 	nla_nest_end(msg, reload_stats_attr);
 	return 0;
 
+action_stats_nest_cancel:
+	nla_nest_cancel(msg, act_stats);
+action_info_nest_cancel:
+	nla_nest_cancel(msg, act_info);
 nla_put_failure:
 	nla_nest_cancel(msg, reload_stats_attr);
 	return -EMSGSIZE;
-- 
cgit v1.2.3


From 5b10b62989219aa527ee4fa555d1995a3b70981b Mon Sep 17 00:00:00 2001
From: Karol Trzcinski <karolx.trzcinski@linux.intel.com>
Date: Tue, 24 Nov 2020 20:00:17 +0200
Subject: ASoC: SOF: Add `memory_info` file to debugfs

This file content describes memory allocation status
at run-time, typically to detect memory leaks.

Signed-off-by: Karol Trzcinski <karolx.trzcinski@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Link: https://lore.kernel.org/r/20201124180017.2232128-5-kai.vehmanen@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/sof/debug.h        |  41 ++++++++++++++
 include/sound/sof/ext_manifest.h |   1 +
 include/sound/sof/header.h       |   4 ++
 include/uapi/sound/sof/abi.h     |   2 +-
 sound/soc/sof/debug.c            | 117 +++++++++++++++++++++++++++++++++++++++
 sound/soc/sof/ipc.c              |   9 +++
 sound/soc/sof/loader.c           |  10 ++++
 sound/soc/sof/sof-priv.h         |   2 +
 8 files changed, 185 insertions(+), 1 deletion(-)
 create mode 100644 include/sound/sof/debug.h

(limited to 'include/uapi')

diff --git a/include/sound/sof/debug.h b/include/sound/sof/debug.h
new file mode 100644
index 000000000000..3ecb5793789d
--- /dev/null
+++ b/include/sound/sof/debug.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * Copyright(c) 2020 Intel Corporation. All rights reserved.
+ *
+ * Author: Karol Trzcinski <karolx.trzcinski@linux.intel.com>
+ */
+
+#ifndef __INCLUDE_SOUND_SOF_DEBUG_H__
+#define __INCLUDE_SOUND_SOF_DEBUG_H__
+
+#include <sound/sof/header.h>
+
+/** ABI3.18 */
+enum sof_ipc_dbg_mem_zone {
+	SOF_IPC_MEM_ZONE_SYS		= 0,	/**< System zone */
+	SOF_IPC_MEM_ZONE_SYS_RUNTIME	= 1,	/**< System-runtime zone */
+	SOF_IPC_MEM_ZONE_RUNTIME	= 2,	/**< Runtime zone */
+	SOF_IPC_MEM_ZONE_BUFFER		= 3,	/**< Buffer zone */
+};
+
+/** ABI3.18 */
+struct sof_ipc_dbg_mem_usage_elem {
+	uint32_t zone;		/**< see sof_ipc_dbg_mem_zone */
+	uint32_t id;		/**< heap index within zone */
+	uint32_t used;		/**< number of bytes used in zone */
+	uint32_t free;		/**< number of bytes free to use within zone */
+	uint32_t reserved;	/**< for future use */
+} __packed;
+
+/** ABI3.18 */
+struct sof_ipc_dbg_mem_usage {
+	struct sof_ipc_reply rhdr;			/**< generic IPC reply header */
+	uint32_t reserved[4];				/**< reserved for future use */
+	uint32_t num_elems;				/**< elems[] counter */
+	struct sof_ipc_dbg_mem_usage_elem elems[];	/**< memory usage information */
+} __packed;
+
+#endif
diff --git a/include/sound/sof/ext_manifest.h b/include/sound/sof/ext_manifest.h
index 31da6e611c6e..e05cb21023e5 100644
--- a/include/sound/sof/ext_manifest.h
+++ b/include/sound/sof/ext_manifest.h
@@ -104,6 +104,7 @@ struct ext_man_dbg_abi {
 enum config_elem_type {
 	SOF_EXT_MAN_CONFIG_EMPTY		= 0,
 	SOF_EXT_MAN_CONFIG_IPC_MSG_SIZE		= 1,
+	SOF_EXT_MAN_CONFIG_MEMORY_USAGE_SCAN	= 2, /**< ABI 3.18 */
 };
 
 struct sof_config_elem {
diff --git a/include/sound/sof/header.h b/include/sound/sof/header.h
index 13256d4fb0dd..c93f08334bbe 100644
--- a/include/sound/sof/header.h
+++ b/include/sound/sof/header.h
@@ -52,6 +52,7 @@
 #define SOF_IPC_GLB_GDB_DEBUG			SOF_GLB_TYPE(0xAU)
 #define SOF_IPC_GLB_TEST_MSG			SOF_GLB_TYPE(0xBU)
 #define SOF_IPC_GLB_PROBE			SOF_GLB_TYPE(0xCU)
+#define SOF_IPC_GLB_DEBUG			SOF_GLB_TYPE(0xDU)
 
 /*
  * DSP Command Message Types
@@ -118,6 +119,9 @@
 #define SOF_IPC_TRACE_DMA_POSITION		SOF_CMD_TYPE(0x002)
 #define SOF_IPC_TRACE_DMA_PARAMS_EXT		SOF_CMD_TYPE(0x003)
 
+/* debug */
+#define SOF_IPC_DEBUG_MEM_USAGE			SOF_CMD_TYPE(0x001)
+
 /* test */
 #define SOF_IPC_TEST_IPC_FLOOD			SOF_CMD_TYPE(0x001)
 
diff --git a/include/uapi/sound/sof/abi.h b/include/uapi/sound/sof/abi.h
index 6af32f82fb99..fe2cfae94b45 100644
--- a/include/uapi/sound/sof/abi.h
+++ b/include/uapi/sound/sof/abi.h
@@ -26,7 +26,7 @@
 
 /* SOF ABI version major, minor and patch numbers */
 #define SOF_ABI_MAJOR 3
-#define SOF_ABI_MINOR 17
+#define SOF_ABI_MINOR 18
 #define SOF_ABI_PATCH 0
 
 /* SOF ABI version number. Format within 32bit word is MMmmmppp */
diff --git a/sound/soc/sof/debug.c b/sound/soc/sof/debug.c
index 9419a99bab53..143117334ae5 100644
--- a/sound/soc/sof/debug.c
+++ b/sound/soc/sof/debug.c
@@ -14,6 +14,8 @@
 #include <linux/debugfs.h>
 #include <linux/io.h>
 #include <linux/pm_runtime.h>
+#include <sound/sof/ext_manifest.h>
+#include <sound/sof/debug.h>
 #include "sof-priv.h"
 #include "ops.h"
 
@@ -626,6 +628,121 @@ int snd_sof_debugfs_buf_item(struct snd_sof_dev *sdev,
 }
 EXPORT_SYMBOL_GPL(snd_sof_debugfs_buf_item);
 
+static int memory_info_update(struct snd_sof_dev *sdev, char *buf, size_t buff_size)
+{
+	struct sof_ipc_cmd_hdr msg = {
+		.size = sizeof(struct sof_ipc_cmd_hdr),
+		.cmd = SOF_IPC_GLB_DEBUG | SOF_IPC_DEBUG_MEM_USAGE,
+	};
+	struct sof_ipc_dbg_mem_usage *reply;
+	int len;
+	int ret;
+	int i;
+
+	reply = kmalloc(SOF_IPC_MSG_MAX_SIZE, GFP_KERNEL);
+	if (!reply)
+		return -ENOMEM;
+
+	ret = pm_runtime_get_sync(sdev->dev);
+	if (ret < 0 && ret != -EACCES) {
+		pm_runtime_put_noidle(sdev->dev);
+		dev_err(sdev->dev, "error: enabling device failed: %d\n", ret);
+		goto error;
+	}
+
+	ret = sof_ipc_tx_message(sdev->ipc, msg.cmd, &msg, msg.size, reply, SOF_IPC_MSG_MAX_SIZE);
+	pm_runtime_mark_last_busy(sdev->dev);
+	pm_runtime_put_autosuspend(sdev->dev);
+	if (ret < 0 || reply->rhdr.error < 0) {
+		ret = min(ret, reply->rhdr.error);
+		dev_err(sdev->dev, "error: reading memory info failed, %d\n", ret);
+		goto error;
+	}
+
+	if (struct_size(reply, elems, reply->num_elems) != reply->rhdr.hdr.size) {
+		dev_err(sdev->dev, "error: invalid memory info ipc struct size, %d\n",
+			reply->rhdr.hdr.size);
+		ret = -EINVAL;
+		goto error;
+	}
+
+	for (i = 0, len = 0; i < reply->num_elems; i++) {
+		ret = snprintf(buf + len, buff_size - len, "zone %d.%d used %#8x free %#8x\n",
+			       reply->elems[i].zone, reply->elems[i].id,
+			       reply->elems[i].used, reply->elems[i].free);
+		if (ret < 0)
+			goto error;
+		len += ret;
+	}
+
+	ret = len;
+error:
+	kfree(reply);
+	return ret;
+}
+
+static ssize_t memory_info_read(struct file *file, char __user *to, size_t count, loff_t *ppos)
+{
+	struct snd_sof_dfsentry *dfse = file->private_data;
+	struct snd_sof_dev *sdev = dfse->sdev;
+	int data_length;
+
+	/* read memory info from FW only once for each file read */
+	if (!*ppos) {
+		dfse->buf_data_size = 0;
+		data_length = memory_info_update(sdev, dfse->buf, dfse->size);
+		if (data_length < 0)
+			return data_length;
+		dfse->buf_data_size = data_length;
+	}
+
+	return simple_read_from_buffer(to, count, ppos, dfse->buf, dfse->buf_data_size);
+}
+
+static int memory_info_open(struct inode *inode, struct file *file)
+{
+	struct snd_sof_dfsentry *dfse = inode->i_private;
+	struct snd_sof_dev *sdev = dfse->sdev;
+
+	file->private_data = dfse;
+
+	/* allocate buffer memory only in first open run, to save memory when unused */
+	if (!dfse->buf) {
+		dfse->buf = devm_kmalloc(sdev->dev, PAGE_SIZE, GFP_KERNEL);
+		if (!dfse->buf)
+			return -ENOMEM;
+		dfse->size = PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+static const struct file_operations memory_info_fops = {
+	.open = memory_info_open,
+	.read = memory_info_read,
+	.llseek = default_llseek,
+};
+
+int snd_sof_dbg_memory_info_init(struct snd_sof_dev *sdev)
+{
+	struct snd_sof_dfsentry *dfse;
+
+	dfse = devm_kzalloc(sdev->dev, sizeof(*dfse), GFP_KERNEL);
+	if (!dfse)
+		return -ENOMEM;
+
+	/* don't allocate buffer before first usage, to save memory when unused */
+	dfse->type = SOF_DFSENTRY_TYPE_BUF;
+	dfse->sdev = sdev;
+
+	debugfs_create_file("memory_info", 0444, sdev->debugfs_root, dfse, &memory_info_fops);
+
+	/* add to dfsentry list */
+	list_add(&dfse->list, &sdev->dfsentry_list);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(snd_sof_dbg_memory_info_init);
+
 int snd_sof_dbg_init(struct snd_sof_dev *sdev)
 {
 	const struct snd_sof_dsp_ops *ops = sof_ops(sdev);
diff --git a/sound/soc/sof/ipc.c b/sound/soc/sof/ipc.c
index fd2b96ae4943..fc13bb06dbf3 100644
--- a/sound/soc/sof/ipc.c
+++ b/sound/soc/sof/ipc.c
@@ -181,6 +181,15 @@ static void ipc_log_header(struct device *dev, u8 *text, u32 cmd)
 			str2 = "unknown type"; break;
 		}
 		break;
+	case SOF_IPC_GLB_DEBUG:
+		str = "GLB_DEBUG";
+		switch (type) {
+		case SOF_IPC_DEBUG_MEM_USAGE:
+			str2 = "MEM_USAGE"; break;
+		default:
+			str2 = "unknown type"; break;
+		}
+		break;
 	default:
 		str = "unknown GLB command"; break;
 	}
diff --git a/sound/soc/sof/loader.c b/sound/soc/sof/loader.c
index 33d3be774380..2a8c9bff9963 100644
--- a/sound/soc/sof/loader.c
+++ b/sound/soc/sof/loader.c
@@ -205,6 +205,7 @@ static int ext_man_get_config_data(struct snd_sof_dev *sdev,
 	const struct sof_config_elem *elem;
 	int elems_counter;
 	int elems_size;
+	int ret = 0;
 	int i;
 
 	/* calculate elements counter */
@@ -225,11 +226,20 @@ static int ext_man_get_config_data(struct snd_sof_dev *sdev,
 		case SOF_EXT_MAN_CONFIG_IPC_MSG_SIZE:
 			/* TODO: use ipc msg size from config data */
 			break;
+		case SOF_EXT_MAN_CONFIG_MEMORY_USAGE_SCAN:
+			if (sdev->first_boot && elem->value)
+				ret = snd_sof_dbg_memory_info_init(sdev);
+			break;
 		default:
 			dev_info(sdev->dev, "Unknown firmware configuration token %d value %d",
 				 elem->token, elem->value);
 			break;
 		}
+		if (ret < 0) {
+			dev_err(sdev->dev, "error: processing sof_ext_man_config_data failed for token %d value 0x%x, %d\n",
+				elem->token, elem->value, ret);
+			return ret;
+		}
 	}
 
 	return 0;
diff --git a/sound/soc/sof/sof-priv.h b/sound/soc/sof/sof-priv.h
index 0aed2a7ab858..d8bc0178dc89 100644
--- a/sound/soc/sof/sof-priv.h
+++ b/sound/soc/sof/sof-priv.h
@@ -290,6 +290,7 @@ enum sof_debugfs_access_type {
 /* FS entry for debug files that can expose DSP memories, registers */
 struct snd_sof_dfsentry {
 	size_t size;
+	size_t buf_data_size;  /* length of buffered data for file read operation */
 	enum sof_dfsentry_type type;
 	/*
 	 * access_type specifies if the
@@ -523,6 +524,7 @@ void snd_sof_get_status(struct snd_sof_dev *sdev, u32 panic_code,
 			void *stack, size_t stack_words);
 int snd_sof_init_trace_ipc(struct snd_sof_dev *sdev);
 void snd_sof_handle_fw_exception(struct snd_sof_dev *sdev);
+int snd_sof_dbg_memory_info_init(struct snd_sof_dev *sdev);
 
 /*
  * Platform specific ops.
-- 
cgit v1.2.3


From cea357bc25713f909bd9d6f0d71eae5e3a5b4e85 Mon Sep 17 00:00:00 2001
From: Sean Young <sean@mess.org>
Date: Wed, 18 Nov 2020 11:43:40 +0100
Subject: media: lirc: ensure RC_PROTO_MAX has documentation

The enum rc_proto value RC_PROTO_MAX has no documentation, this is causing
a warning while building the documentation.

Fixes: 72e637fec558 ("media: rc: validate that "rc_proto" is reasonable")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/uapi/linux/lirc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/lirc.h b/include/uapi/linux/lirc.h
index c1eb960adde3..bf0672a6d341 100644
--- a/include/uapi/linux/lirc.h
+++ b/include/uapi/linux/lirc.h
@@ -196,6 +196,7 @@ struct lirc_scancode {
  * @RC_PROTO_RCMM24: RC-MM protocol 24 bits
  * @RC_PROTO_RCMM32: RC-MM protocol 32 bits
  * @RC_PROTO_XBOX_DVD: Xbox DVD Movie Playback Kit protocol
+ * @RC_PROTO_MAX: Maximum value of enum rc_proto
  */
 enum rc_proto {
 	RC_PROTO_UNKNOWN	= 0,
-- 
cgit v1.2.3


From f460019b4c9e0389b932e1ca2c01b598c7ae769e Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vlad@buslov.dev>
Date: Tue, 24 Nov 2020 18:40:54 +0200
Subject: net: sched: alias action flags with TCA_ACT_ prefix

Currently both filter and action flags use same "TCA_" prefix which makes
them hard to distinguish to code and confusing for users. Create aliases
for existing action flags constants with "TCA_ACT_" prefix.

Signed-off-by: Vlad Buslov <vlad@buslov.dev>
Link: https://lore.kernel.org/r/20201124164054.893168-1-vlad@buslov.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/rtnetlink.h | 12 +++++++-----
 net/sched/act_api.c            | 10 +++++-----
 2 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 2ffbef5da6c1..b841caa4657e 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -768,16 +768,18 @@ enum {
 #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
 /* tcamsg flags stored in attribute TCA_ROOT_FLAGS
  *
- * TCA_FLAG_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO
- * actions in a dump. All dump responses will contain the number of actions
- * being dumped stored in for user app's consumption in TCA_ROOT_COUNT
+ * TCA_ACT_FLAG_LARGE_DUMP_ON user->kernel to request for larger than
+ * TCA_ACT_MAX_PRIO actions in a dump. All dump responses will contain the
+ * number of actions being dumped stored in for user app's consumption in
+ * TCA_ROOT_COUNT
  *
- * TCA_FLAG_TERSE_DUMP user->kernel to request terse (brief) dump that only
+ * TCA_ACT_FLAG_TERSE_DUMP user->kernel to request terse (brief) dump that only
  * includes essential action info (kind, index, etc.)
  *
  */
 #define TCA_FLAG_LARGE_DUMP_ON		(1 << 0)
-#define TCA_FLAG_TERSE_DUMP		(1 << 1)
+#define TCA_ACT_FLAG_LARGE_DUMP_ON	TCA_FLAG_LARGE_DUMP_ON
+#define TCA_ACT_FLAG_TERSE_DUMP		(1 << 1)
 
 /* New extended info filters for IFLA_EXT_MASK */
 #define RTEXT_FILTER_VF		(1 << 0)
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index fc23f46a315c..99db1c77426b 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -278,7 +278,7 @@ static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
 			index--;
 			goto nla_put_failure;
 		}
-		err = (act_flags & TCA_FLAG_TERSE_DUMP) ?
+		err = (act_flags & TCA_ACT_FLAG_TERSE_DUMP) ?
 			tcf_action_dump_terse(skb, p, true) :
 			tcf_action_dump_1(skb, p, 0, 0);
 		if (err < 0) {
@@ -288,7 +288,7 @@ static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
 		}
 		nla_nest_end(skb, nest);
 		n_i++;
-		if (!(act_flags & TCA_FLAG_LARGE_DUMP_ON) &&
+		if (!(act_flags & TCA_ACT_FLAG_LARGE_DUMP_ON) &&
 		    n_i >= TCA_ACT_MAX_PRIO)
 			goto done;
 	}
@@ -298,7 +298,7 @@ done:
 
 	mutex_unlock(&idrinfo->lock);
 	if (n_i) {
-		if (act_flags & TCA_FLAG_LARGE_DUMP_ON)
+		if (act_flags & TCA_ACT_FLAG_LARGE_DUMP_ON)
 			cb->args[1] = n_i;
 	}
 	return n_i;
@@ -1473,8 +1473,8 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
 }
 
 static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = {
-	[TCA_ROOT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_FLAG_LARGE_DUMP_ON |
-						 TCA_FLAG_TERSE_DUMP),
+	[TCA_ROOT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_ACT_FLAG_LARGE_DUMP_ON |
+						 TCA_ACT_FLAG_TERSE_DUMP),
 	[TCA_ROOT_TIME_DELTA]      = { .type = NLA_U32 },
 };
 
-- 
cgit v1.2.3


From bfd042321a7afa769c855c37f2bbe2703dc72ef2 Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Tue, 24 Nov 2020 09:25:25 +0100
Subject: bridge: mrp: Implement LC mode for MRP

Extend MRP to support LC mode(link check) for the interconnect port.
This applies only to the interconnect ring.

Opposite to RC mode(ring check) the LC mode is using CFM frames to
detect when the link goes up or down and based on that the userspace
will need to react.
One advantage of the LC mode over RC mode is that there will be fewer
frames in the normal rings. Because RC mode generates InTest on all
ports while LC mode sends CFM frame only on the interconnect port.

All 4 nodes part of the interconnect ring needs to have the same mode.
And it is not possible to have running LC and RC mode at the same time
on a node.

Whenever the MIM starts it needs to detect the status of the other 3
nodes in the interconnect ring so it would send a frame called
InLinkStatus, on which the clients needs to reply with their link
status.

This patch adds InLinkStatus frame type and extends existing rules on
how to forward this frame.

Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Link: https://lore.kernel.org/r/20201124082525.273820-1-horatiu.vultur@microchip.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/mrp_bridge.h |  1 +
 net/bridge/br_mrp.c             | 18 +++++++++++++++---
 2 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/mrp_bridge.h b/include/uapi/linux/mrp_bridge.h
index 6aeb13ef0b1e..9744773de5ff 100644
--- a/include/uapi/linux/mrp_bridge.h
+++ b/include/uapi/linux/mrp_bridge.h
@@ -61,6 +61,7 @@ enum br_mrp_tlv_header_type {
 	BR_MRP_TLV_HEADER_IN_TOPO = 0x7,
 	BR_MRP_TLV_HEADER_IN_LINK_DOWN = 0x8,
 	BR_MRP_TLV_HEADER_IN_LINK_UP = 0x9,
+	BR_MRP_TLV_HEADER_IN_LINK_STATUS = 0xa,
 	BR_MRP_TLV_HEADER_OPTION = 0x7f,
 };
 
diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c
index bb12fbf9aaf2..cec2c4e4561d 100644
--- a/net/bridge/br_mrp.c
+++ b/net/bridge/br_mrp.c
@@ -858,7 +858,8 @@ static bool br_mrp_in_frame(struct sk_buff *skb)
 	if (hdr->type == BR_MRP_TLV_HEADER_IN_TEST ||
 	    hdr->type == BR_MRP_TLV_HEADER_IN_TOPO ||
 	    hdr->type == BR_MRP_TLV_HEADER_IN_LINK_DOWN ||
-	    hdr->type == BR_MRP_TLV_HEADER_IN_LINK_UP)
+	    hdr->type == BR_MRP_TLV_HEADER_IN_LINK_UP ||
+	    hdr->type == BR_MRP_TLV_HEADER_IN_LINK_STATUS)
 		return true;
 
 	return false;
@@ -1126,9 +1127,9 @@ static int br_mrp_rcv(struct net_bridge_port *p,
 						goto no_forward;
 				}
 			} else {
-				/* MIM should forward IntLinkChange and
+				/* MIM should forward IntLinkChange/Status and
 				 * IntTopoChange between ring ports but MIM
-				 * should not forward IntLinkChange and
+				 * should not forward IntLinkChange/Status and
 				 * IntTopoChange if the frame was received at
 				 * the interconnect port
 				 */
@@ -1155,6 +1156,17 @@ static int br_mrp_rcv(struct net_bridge_port *p,
 			     in_type == BR_MRP_TLV_HEADER_IN_LINK_DOWN))
 				goto forward;
 
+			/* MIC should forward IntLinkStatus frames only to
+			 * interconnect port if it was received on a ring port.
+			 * If it is received on interconnect port then, it
+			 * should be forward on both ring ports
+			 */
+			if (br_mrp_is_ring_port(p_port, s_port, p) &&
+			    in_type == BR_MRP_TLV_HEADER_IN_LINK_STATUS) {
+				p_dst = NULL;
+				s_dst = NULL;
+			}
+
 			/* Should forward the InTopo frames only between the
 			 * ring ports
 			 */
-- 
cgit v1.2.3


From 27672f0d280a3f286a410a8db2004f46ace72a17 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Tue, 24 Nov 2020 15:12:09 +0000
Subject: bpf: Add a BPF helper for getting the IMA hash of an inode

Provide a wrapper function to get the IMA hash of an inode. This helper
is useful in fingerprinting files (e.g executables on execution) and
using these fingerprints in detections like an executable unlinking
itself.

Since the ima_inode_hash can sleep, it's only allowed for sleepable
LSM hooks.

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20201124151210.1081188-3-kpsingh@chromium.org
---
 include/uapi/linux/bpf.h       | 11 +++++++++++
 kernel/bpf/bpf_lsm.c           | 26 ++++++++++++++++++++++++++
 scripts/bpf_helpers_doc.py     |  2 ++
 tools/include/uapi/linux/bpf.h | 11 +++++++++++
 4 files changed, 50 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3ca6146f001a..c3458ec1f30a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3807,6 +3807,16 @@ union bpf_attr {
  * 		See: **clock_gettime**\ (**CLOCK_MONOTONIC_COARSE**)
  * 	Return
  * 		Current *ktime*.
+ *
+ * long bpf_ima_inode_hash(struct inode *inode, void *dst, u32 size)
+ *	Description
+ *		Returns the stored IMA hash of the *inode* (if it's avaialable).
+ *		If the hash is larger than *size*, then only *size*
+ *		bytes will be copied to *dst*
+ *	Return
+ *		The **hash_algo** is returned on success,
+ *		**-EOPNOTSUP** if IMA is disabled or **-EINVAL** if
+ *		invalid arguments are passed.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3970,6 +3980,7 @@ union bpf_attr {
 	FN(get_current_task_btf),	\
 	FN(bprm_opts_set),		\
 	FN(ktime_get_coarse_ns),	\
+	FN(ima_inode_hash),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index b4f27a874092..70e5e0b6d69d 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -15,6 +15,7 @@
 #include <net/bpf_sk_storage.h>
 #include <linux/bpf_local_storage.h>
 #include <linux/btf_ids.h>
+#include <linux/ima.h>
 
 /* For every LSM hook that allows attachment of BPF programs, declare a nop
  * function where a BPF program can be attached.
@@ -75,6 +76,29 @@ const static struct bpf_func_proto bpf_bprm_opts_set_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_3(bpf_ima_inode_hash, struct inode *, inode, void *, dst, u32, size)
+{
+	return ima_inode_hash(inode, dst, size);
+}
+
+static bool bpf_ima_inode_hash_allowed(const struct bpf_prog *prog)
+{
+	return bpf_lsm_is_sleepable_hook(prog->aux->attach_btf_id);
+}
+
+BTF_ID_LIST_SINGLE(bpf_ima_inode_hash_btf_ids, struct, inode)
+
+const static struct bpf_func_proto bpf_ima_inode_hash_proto = {
+	.func		= bpf_ima_inode_hash,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_BTF_ID,
+	.arg1_btf_id	= &bpf_ima_inode_hash_btf_ids[0],
+	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+	.allowed	= bpf_ima_inode_hash_allowed,
+};
+
 static const struct bpf_func_proto *
 bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -97,6 +121,8 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_task_storage_delete_proto;
 	case BPF_FUNC_bprm_opts_set:
 		return &bpf_bprm_opts_set_proto;
+	case BPF_FUNC_ima_inode_hash:
+		return prog->aux->sleepable ? &bpf_ima_inode_hash_proto : NULL;
 	default:
 		return tracing_prog_func_proto(func_id, prog);
 	}
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index c5bc947a70ad..8b829748d488 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -436,6 +436,7 @@ class PrinterHelpers(Printer):
             'struct xdp_md',
             'struct path',
             'struct btf_ptr',
+            'struct inode',
     ]
     known_types = {
             '...',
@@ -480,6 +481,7 @@ class PrinterHelpers(Printer):
             'struct task_struct',
             'struct path',
             'struct btf_ptr',
+            'struct inode',
     }
     mapped_types = {
             'u8': '__u8',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 3ca6146f001a..c3458ec1f30a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3807,6 +3807,16 @@ union bpf_attr {
  * 		See: **clock_gettime**\ (**CLOCK_MONOTONIC_COARSE**)
  * 	Return
  * 		Current *ktime*.
+ *
+ * long bpf_ima_inode_hash(struct inode *inode, void *dst, u32 size)
+ *	Description
+ *		Returns the stored IMA hash of the *inode* (if it's avaialable).
+ *		If the hash is larger than *size*, then only *size*
+ *		bytes will be copied to *dst*
+ *	Return
+ *		The **hash_algo** is returned on success,
+ *		**-EOPNOTSUP** if IMA is disabled or **-EINVAL** if
+ *		invalid arguments are passed.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3970,6 +3980,7 @@ union bpf_attr {
 	FN(get_current_task_btf),	\
 	FN(bprm_opts_set),		\
 	FN(ktime_get_coarse_ns),	\
+	FN(ima_inode_hash),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 4fe21dec6c2830dfcad107ff1ba050c7328f122b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 26 Nov 2020 13:55:39 +0100
Subject: media: rc: improve LIRC documentation

Add documentation for enum rc_proto and struct lirc_scancode
at the generated docs.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Sean Young <sean@mess.org>
---
 Documentation/userspace-api/media/rc/lirc-dev-intro.rst | 11 +++++++++--
 include/uapi/linux/lirc.h                               |  2 +-
 2 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/userspace-api/media/rc/lirc-dev-intro.rst b/Documentation/userspace-api/media/rc/lirc-dev-intro.rst
index 266b646d584e..c88973732282 100644
--- a/Documentation/userspace-api/media/rc/lirc-dev-intro.rst
+++ b/Documentation/userspace-api/media/rc/lirc-dev-intro.rst
@@ -57,12 +57,12 @@ on the following table.
 
     This mode is for both sending and receiving IR.
 
-    For transmitting (aka sending), create a ``struct lirc_scancode`` with
+    For transmitting (aka sending), create a struct lirc_scancode with
     the desired scancode set in the ``scancode`` member, :c:type:`rc_proto`
     set to the :ref:`IR protocol <Remote_controllers_Protocols>`, and all other
     members set to 0. Write this struct to the lirc device.
 
-    For receiving, you read ``struct lirc_scancode`` from the LIRC device.
+    For receiving, you read struct lirc_scancode from the LIRC device.
     The ``scancode`` field is set to the received scancode and the
     :ref:`IR protocol <Remote_controllers_Protocols>` is set in
     :c:type:`rc_proto`. If the scancode maps to a valid key code, this is set
@@ -136,6 +136,13 @@ on the following table.
 
     This mode is used only for IR send.
 
+*************************************
+Data types used by LIRC_MODE_SCANCODE
+*************************************
+
+.. kernel-doc:: include/uapi/linux/lirc.h
+    :identifiers: lirc_scancode rc_proto
+
 ********************
 BPF based IR decoder
 ********************
diff --git a/include/uapi/linux/lirc.h b/include/uapi/linux/lirc.h
index bf0672a6d341..c45a4eaea667 100644
--- a/include/uapi/linux/lirc.h
+++ b/include/uapi/linux/lirc.h
@@ -139,7 +139,7 @@
  */
 #define LIRC_GET_REC_TIMEOUT	       _IOR('i', 0x00000024, __u32)
 
-/*
+/**
  * struct lirc_scancode - decoded scancode with protocol for use with
  *	LIRC_MODE_SCANCODE
  *
-- 
cgit v1.2.3


From 7da3ad6c26f41f403fe6823c3de242551db09c37 Mon Sep 17 00:00:00 2001
From: Muhammad Sammar <muhammads@nvidia.com>
Date: Fri, 20 Nov 2020 15:03:27 -0800
Subject: net/mlx5: Add misc4 to mlx5_ifc_fte_match_param_bits

Add misc4 match params to enable matching on prog_sample_fields.

Signed-off-by: Muhammad Sammar <muhammads@nvidia.com>
Reviewed-by: Alex Vesker <valex@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h |  2 +-
 include/linux/mlx5/device.h                       |  1 +
 include/linux/mlx5/mlx5_ifc.h                     | 25 ++++++++++++++++++++++-
 include/uapi/rdma/mlx5_user_ioctl_cmds.h          |  2 +-
 4 files changed, 27 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index afe7f0bffb93..b24a9849c45e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -194,7 +194,7 @@ struct mlx5_ft_underlay_qp {
 	u32 qpn;
 };
 
-#define MLX5_FTE_MATCH_PARAM_RESERVED	reserved_at_a00
+#define MLX5_FTE_MATCH_PARAM_RESERVED	reserved_at_c00
 /* Calculate the fte_match_param length and without the reserved length.
  * Make sure the reserved field is the last.
  */
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index cf824366a7d1..e9639c4cf2ed 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1076,6 +1076,7 @@ enum {
 	MLX5_MATCH_INNER_HEADERS	= 1 << 2,
 	MLX5_MATCH_MISC_PARAMETERS_2	= 1 << 3,
 	MLX5_MATCH_MISC_PARAMETERS_3	= 1 << 4,
+	MLX5_MATCH_MISC_PARAMETERS_4	= 1 << 5,
 };
 
 enum {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 2f2add4bd5e1..11c24fafd7f2 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -623,6 +623,26 @@ struct mlx5_ifc_fte_match_set_misc3_bits {
 	u8         reserved_at_140[0xc0];
 };
 
+struct mlx5_ifc_fte_match_set_misc4_bits {
+	u8         prog_sample_field_value_0[0x20];
+
+	u8         prog_sample_field_id_0[0x20];
+
+	u8         prog_sample_field_value_1[0x20];
+
+	u8         prog_sample_field_id_1[0x20];
+
+	u8         prog_sample_field_value_2[0x20];
+
+	u8         prog_sample_field_id_2[0x20];
+
+	u8         prog_sample_field_value_3[0x20];
+
+	u8         prog_sample_field_id_3[0x20];
+
+	u8         reserved_at_100[0x100];
+};
+
 struct mlx5_ifc_cmd_pas_bits {
 	u8         pa_h[0x20];
 
@@ -1669,7 +1689,9 @@ struct mlx5_ifc_fte_match_param_bits {
 
 	struct mlx5_ifc_fte_match_set_misc3_bits misc_parameters_3;
 
-	u8         reserved_at_a00[0x600];
+	struct mlx5_ifc_fte_match_set_misc4_bits misc_parameters_4;
+
+	u8         reserved_at_c00[0x400];
 };
 
 enum {
@@ -5462,6 +5484,7 @@ enum {
 	MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_INNER_HEADERS    = 0x2,
 	MLX5_QUERY_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_2 = 0x3,
 	MLX5_QUERY_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_3 = 0x4,
+	MLX5_QUERY_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_4 = 0x5,
 };
 
 struct mlx5_ifc_query_flow_group_out_bits {
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index e24d66d278cf..3fd9b380a091 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -232,7 +232,7 @@ enum mlx5_ib_device_query_context_attrs {
 	MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX = (1U << UVERBS_ID_NS_SHIFT),
 };
 
-#define MLX5_IB_DW_MATCH_PARAM 0x80
+#define MLX5_IB_DW_MATCH_PARAM 0x90
 
 struct mlx5_ib_match_params {
 	__u32	match_params[MLX5_IB_DW_MATCH_PARAM];
-- 
cgit v1.2.3


From f43d3870cafa2a0f3854c1819c8385733db8f9ae Mon Sep 17 00:00:00 2001
From: Dean Camera <dean@fourwalledcubicle.com>
Date: Thu, 26 Nov 2020 09:39:57 +1100
Subject: HID: hidraw: Add additional hidraw input/output report ioctls.

Currently the hidraw module can only read and write feature HID reports on
demand, via dedicated ioctls. Input reports are read from the device through
the read() interface, while output reports are written through the write
interface().

This is insufficient; it is desirable in many situations to be able to read and
write input and output reports through the control interface to cover
additional scenarios:

  - Reading an input report by its report ID, to get initial state
  - Writing an input report, to set initial input state in the device
  - Reading an output report by its report ID, to obtain current state
  - Writing an output report by its report ID, out of band

This patch adds these missing ioctl requests to read and write the remaining
HID report types. Note that not all HID backends will neccesarily support this
(e.g. while the USB link layer supports setting Input reports, others may not).

Also included are documentation and example updates. The current hidraw
documentation states that feature reports read from the device does *not*
include the report ID, however this is not the case and the returned report
will have its report ID prepended by conforming HID devices, as the report data
sent from the device over the control endpoint must be indentical in format to
those sent over the regular transport.

Signed-off-by: Dean Camera <dean@fourwalledcubicle.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 Documentation/hid/hidraw.rst | 45 ++++++++++++++++++++++++++++++++++++++++++--
 drivers/hid/hidraw.c         | 24 ++++++++++++++++++++++-
 include/uapi/linux/hidraw.h  |  6 ++++++
 samples/hidraw/hid-example.c |  2 +-
 4 files changed, 73 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/hid/hidraw.rst b/Documentation/hid/hidraw.rst
index 4a4a0ba1f362..f41c1f0f6252 100644
--- a/Documentation/hid/hidraw.rst
+++ b/Documentation/hid/hidraw.rst
@@ -123,8 +123,49 @@ HIDIOCGFEATURE(len):
 This ioctl will request a feature report from the device using the control
 endpoint.  The first byte of the supplied buffer should be set to the report
 number of the requested report.  For devices which do not use numbered
-reports, set the first byte to 0.  The report will be returned starting at
-the first byte of the buffer (ie: the report number is not returned).
+reports, set the first byte to 0.  The returned report buffer will contain the
+report number in the first byte, followed by the report data read from the
+device.  For devices which do not use numbered reports, the report data will
+begin at the first byte of the returned buffer.
+
+HIDIOCSINPUT(len):
+	Send an Input Report
+
+This ioctl will send an input report to the device, using the control endpoint.
+In most cases, setting an input HID report on a device is meaningless and has
+no effect, but some devices may choose to use this to set or reset an initial
+state of a report.  The format of the buffer issued with this report is identical
+to that of HIDIOCSFEATURE.
+
+HIDIOCGINPUT(len):
+	Get an Input Report
+
+This ioctl will request an input report from the device using the control
+endpoint.  This is slower on most devices where a dedicated In endpoint exists
+for regular input reports, but allows the host to request the value of a
+specific report number.  Typically, this is used to request the initial states of
+an input report of a device, before an application listens for normal reports via
+the regular device read() interface.  The format of the buffer issued with this report
+is identical to that of HIDIOCGFEATURE.
+
+HIDIOCSOUTPUT(len):
+	Send an Output Report
+
+This ioctl will send an output report to the device, using the control endpoint.
+This is slower on most devices where a dedicated Out endpoint exists for regular
+output reports, but is added for completeness.  Typically, this is used to set
+the initial states of an output report of a device, before an application sends
+updates via the regular device write() interface. The format of the buffer issued
+with this report is identical to that of HIDIOCSFEATURE.
+
+HIDIOCGOUTPUT(len):
+	Get an Output Report
+
+This ioctl will request an output report from the device using the control
+endpoint.  Typically, this is used to retrive the initial state of
+an output report of a device, before an application updates it as necessary either
+via a HIDIOCSOUTPUT request, or the regular device write() interface.  The format
+of the buffer issued with this report is identical to that of HIDIOCGFEATURE.
 
 Example
 -------
diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index 2eee5e31c2b7..79faac87a06f 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -170,7 +170,7 @@ static ssize_t hidraw_write(struct file *file, const char __user *buffer, size_t
 /*
  * This function performs a Get_Report transfer over the control endpoint
  * per section 7.2.1 of the HID specification, version 1.1.  The first byte
- * of buffer is the report number to request, or 0x0 if the defice does not
+ * of buffer is the report number to request, or 0x0 if the device does not
  * use numbered reports. The report_type parameter can be HID_FEATURE_REPORT
  * or HID_INPUT_REPORT.
  */
@@ -428,6 +428,28 @@ static long hidraw_ioctl(struct file *file, unsigned int cmd,
 					break;
 				}
 
+				if (_IOC_NR(cmd) == _IOC_NR(HIDIOCSINPUT(0))) {
+					int len = _IOC_SIZE(cmd);
+					ret = hidraw_send_report(file, user_arg, len, HID_INPUT_REPORT);
+					break;
+				}
+				if (_IOC_NR(cmd) == _IOC_NR(HIDIOCGINPUT(0))) {
+					int len = _IOC_SIZE(cmd);
+					ret = hidraw_get_report(file, user_arg, len, HID_INPUT_REPORT);
+					break;
+				}
+
+				if (_IOC_NR(cmd) == _IOC_NR(HIDIOCSOUTPUT(0))) {
+					int len = _IOC_SIZE(cmd);
+					ret = hidraw_send_report(file, user_arg, len, HID_OUTPUT_REPORT);
+					break;
+				}
+				if (_IOC_NR(cmd) == _IOC_NR(HIDIOCGOUTPUT(0))) {
+					int len = _IOC_SIZE(cmd);
+					ret = hidraw_get_report(file, user_arg, len, HID_OUTPUT_REPORT);
+					break;
+				}
+
 				/* Begin Read-only ioctls. */
 				if (_IOC_DIR(cmd) != _IOC_READ) {
 					ret = -EINVAL;
diff --git a/include/uapi/linux/hidraw.h b/include/uapi/linux/hidraw.h
index 4913539e5bcc..33ebad81720a 100644
--- a/include/uapi/linux/hidraw.h
+++ b/include/uapi/linux/hidraw.h
@@ -40,6 +40,12 @@ struct hidraw_devinfo {
 #define HIDIOCSFEATURE(len)    _IOC(_IOC_WRITE|_IOC_READ, 'H', 0x06, len)
 #define HIDIOCGFEATURE(len)    _IOC(_IOC_WRITE|_IOC_READ, 'H', 0x07, len)
 #define HIDIOCGRAWUNIQ(len)     _IOC(_IOC_READ, 'H', 0x08, len)
+/* The first byte of SINPUT and GINPUT is the report number */
+#define HIDIOCSINPUT(len)    _IOC(_IOC_WRITE|_IOC_READ, 'H', 0x09, len)
+#define HIDIOCGINPUT(len)    _IOC(_IOC_WRITE|_IOC_READ, 'H', 0x0A, len)
+/* The first byte of SOUTPUT and GOUTPUT is the report number */
+#define HIDIOCSOUTPUT(len)    _IOC(_IOC_WRITE|_IOC_READ, 'H', 0x0B, len)
+#define HIDIOCGOUTPUT(len)    _IOC(_IOC_WRITE|_IOC_READ, 'H', 0x0C, len)
 
 #define HIDRAW_FIRST_MINOR 0
 #define HIDRAW_MAX_DEVICES 64
diff --git a/samples/hidraw/hid-example.c b/samples/hidraw/hid-example.c
index 37a0ffcb4d63..0f73ace3c6c3 100644
--- a/samples/hidraw/hid-example.c
+++ b/samples/hidraw/hid-example.c
@@ -128,7 +128,7 @@ int main(int argc, char **argv)
 		perror("HIDIOCGFEATURE");
 	} else {
 		printf("ioctl HIDIOCGFEATURE returned: %d\n", res);
-		printf("Report data (not containing the report number):\n\t");
+		printf("Report data:\n\t");
 		for (i = 0; i < res; i++)
 			printf("%hhx ", buf[i]);
 		puts("\n");
-- 
cgit v1.2.3


From 69929d4c49e182f8526d42c43b37b460d562d3a0 Mon Sep 17 00:00:00 2001
From: Eelco Chaudron <echaudro@redhat.com>
Date: Tue, 24 Nov 2020 07:34:44 -0500
Subject: net: openvswitch: fix TTL decrement action netlink message format

Currently, the openvswitch module is not accepting the correctly formated
netlink message for the TTL decrement action. For both setting and getting
the dec_ttl action, the actions should be nested in the
OVS_DEC_TTL_ATTR_ACTION attribute as mentioned in the openvswitch.h uapi.

When the original patch was sent, it was tested with a private OVS userspace
implementation. This implementation was unfortunately not upstreamed and
reviewed, hence an erroneous version of this patch was sent out.

Leaving the patch as-is would cause problems as the kernel module could
interpret additional attributes as actions and vice-versa, due to the
actions not being encapsulated/nested within the actual attribute, but
being concatinated after it.

Fixes: 744676e77720 ("openvswitch: add TTL decrement action")
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Link: https://lore.kernel.org/r/160622121495.27296.888010441924340582.stgit@wsfd-netdev64.ntdv.lab.eng.bos.redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/openvswitch.h |  2 ++
 net/openvswitch/actions.c        |  7 ++--
 net/openvswitch/flow_netlink.c   | 74 +++++++++++++++++++++++++++++-----------
 3 files changed, 60 insertions(+), 23 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 8300cc29dec8..8d16744edc31 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -1058,4 +1058,6 @@ enum ovs_dec_ttl_attr {
 	__OVS_DEC_TTL_ATTR_MAX
 };
 
+#define OVS_DEC_TTL_ATTR_MAX (__OVS_DEC_TTL_ATTR_MAX - 1)
+
 #endif /* _LINUX_OPENVSWITCH_H */
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index b87bfc82f44f..5829a020b81c 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -958,14 +958,13 @@ static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
 {
 	/* The first action is always 'OVS_DEC_TTL_ATTR_ARG'. */
 	struct nlattr *dec_ttl_arg = nla_data(attr);
-	int rem = nla_len(attr);
 
 	if (nla_len(dec_ttl_arg)) {
-		struct nlattr *actions = nla_next(dec_ttl_arg, &rem);
+		struct nlattr *actions = nla_data(dec_ttl_arg);
 
 		if (actions)
-			return clone_execute(dp, skb, key, 0, actions, rem,
-					     last, false);
+			return clone_execute(dp, skb, key, 0, nla_data(actions),
+					     nla_len(actions), last, false);
 	}
 	consume_skb(skb);
 	return 0;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 9d3e50c4d29f..ec0689ddc635 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2503,28 +2503,42 @@ static int validate_and_copy_dec_ttl(struct net *net,
 				     __be16 eth_type, __be16 vlan_tci,
 				     u32 mpls_label_count, bool log)
 {
-	int start, err;
-	u32 nested = true;
+	const struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1];
+	int start, action_start, err, rem;
+	const struct nlattr *a, *actions;
+
+	memset(attrs, 0, sizeof(attrs));
+	nla_for_each_nested(a, attr, rem) {
+		int type = nla_type(a);
 
-	if (!nla_len(attr))
-		return ovs_nla_add_action(sfa, OVS_ACTION_ATTR_DEC_TTL,
-					  NULL, 0, log);
+		/* Ignore unknown attributes to be future proof. */
+		if (type > OVS_DEC_TTL_ATTR_MAX)
+			continue;
+
+		if (!type || attrs[type])
+			return -EINVAL;
+
+		attrs[type] = a;
+	}
+
+	actions = attrs[OVS_DEC_TTL_ATTR_ACTION];
+	if (rem || !actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
+		return -EINVAL;
 
 	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_DEC_TTL, log);
 	if (start < 0)
 		return start;
 
-	err = ovs_nla_add_action(sfa, OVS_DEC_TTL_ATTR_ACTION, &nested,
-				 sizeof(nested), log);
-
-	if (err)
-		return err;
+	action_start = add_nested_action_start(sfa, OVS_DEC_TTL_ATTR_ACTION, log);
+	if (action_start < 0)
+		return start;
 
-	err = __ovs_nla_copy_actions(net, attr, key, sfa, eth_type,
+	err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type,
 				     vlan_tci, mpls_label_count, log);
 	if (err)
 		return err;
 
+	add_nested_action_end(*sfa, action_start);
 	add_nested_action_end(*sfa, start);
 	return 0;
 }
@@ -3487,20 +3501,42 @@ out:
 static int dec_ttl_action_to_attr(const struct nlattr *attr,
 				  struct sk_buff *skb)
 {
-	int err = 0, rem = nla_len(attr);
-	struct nlattr *start;
+	struct nlattr *start, *action_start;
+	const struct nlattr *a;
+	int err = 0, rem;
 
 	start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_DEC_TTL);
-
 	if (!start)
 		return -EMSGSIZE;
 
-	err = ovs_nla_put_actions(nla_data(attr), rem, skb);
-	if (err)
-		nla_nest_cancel(skb, start);
-	else
-		nla_nest_end(skb, start);
+	nla_for_each_attr(a, nla_data(attr), nla_len(attr), rem) {
+		switch (nla_type(a)) {
+		case OVS_DEC_TTL_ATTR_ACTION:
+
+			action_start = nla_nest_start_noflag(skb, OVS_DEC_TTL_ATTR_ACTION);
+			if (!action_start) {
+				err = -EMSGSIZE;
+				goto out;
+			}
+
+			err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
+			if (err)
+				goto out;
+
+			nla_nest_end(skb, action_start);
+			break;
 
+		default:
+			/* Ignore all other option to be future compatible */
+			break;
+		}
+	}
+
+	nla_nest_end(skb, start);
+	return 0;
+
+out:
+	nla_nest_cancel(skb, start);
 	return err;
 }
 
-- 
cgit v1.2.3


From 923c40c4651ed8b30cbd9fbac0f0ab612216cccc Mon Sep 17 00:00:00 2001
From: Vadim Fedorenko <vfedorenko@novek.ru>
Date: Tue, 24 Nov 2020 18:24:47 +0300
Subject: net/tls: add CHACHA20-POLY1305 specific defines and structures

To provide support for ChaCha-Poly cipher we need to define
specific constants and structures.

Signed-off-by: Vadim Fedorenko <vfedorenko@novek.ru>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/tls.h        |  1 +
 include/uapi/linux/tls.h | 15 +++++++++++++++
 2 files changed, 16 insertions(+)

(limited to 'include/uapi')

diff --git a/include/net/tls.h b/include/net/tls.h
index d04ce73e54c9..e4e9c2ae689e 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -211,6 +211,7 @@ union tls_crypto_context {
 	union {
 		struct tls12_crypto_info_aes_gcm_128 aes_gcm_128;
 		struct tls12_crypto_info_aes_gcm_256 aes_gcm_256;
+		struct tls12_crypto_info_chacha20_poly1305 chacha20_poly1305;
 	};
 };
 
diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h
index bcd2869ed472..0d54baea1d8d 100644
--- a/include/uapi/linux/tls.h
+++ b/include/uapi/linux/tls.h
@@ -77,6 +77,13 @@
 #define TLS_CIPHER_AES_CCM_128_TAG_SIZE		16
 #define TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE		8
 
+#define TLS_CIPHER_CHACHA20_POLY1305			54
+#define TLS_CIPHER_CHACHA20_POLY1305_IV_SIZE		12
+#define TLS_CIPHER_CHACHA20_POLY1305_KEY_SIZE	32
+#define TLS_CIPHER_CHACHA20_POLY1305_SALT_SIZE		0
+#define TLS_CIPHER_CHACHA20_POLY1305_TAG_SIZE	16
+#define TLS_CIPHER_CHACHA20_POLY1305_REC_SEQ_SIZE	8
+
 #define TLS_SET_RECORD_TYPE	1
 #define TLS_GET_RECORD_TYPE	2
 
@@ -109,6 +116,14 @@ struct tls12_crypto_info_aes_ccm_128 {
 	unsigned char rec_seq[TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE];
 };
 
+struct tls12_crypto_info_chacha20_poly1305 {
+	struct tls_crypto_info info;
+	unsigned char iv[TLS_CIPHER_CHACHA20_POLY1305_IV_SIZE];
+	unsigned char key[TLS_CIPHER_CHACHA20_POLY1305_KEY_SIZE];
+	unsigned char salt[TLS_CIPHER_CHACHA20_POLY1305_SALT_SIZE];
+	unsigned char rec_seq[TLS_CIPHER_CHACHA20_POLY1305_REC_SEQ_SIZE];
+};
+
 enum {
 	TLS_INFO_UNSPEC,
 	TLS_INFO_VERSION,
-- 
cgit v1.2.3


From 4bb1f2f3fb31ed60a23064a8fc4d5ecde5d1002d Mon Sep 17 00:00:00 2001
From: Tal Cohen <talcohen@habana.ai>
Date: Wed, 3 Jun 2020 09:25:27 +0300
Subject: habanalabs: use enum for CB allocation options

In the future there will be situations where queues can accept either
kernel allocated CBs or user allocated CBs, depending on different
states.

Therefore, instead of using a boolean variable of kernel/user allocated
CB, we need to use a bitmask to indicate that, which will allow to
combine the two options.

Add a flag to the uapi so the user will be able to indicate whether
the CB was allocated by kernel or by user. Of course the driver
validates that.

Signed-off-by: Tal Cohen <talcohen@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../misc/habanalabs/common/command_submission.c    | 31 ++++++++++++++++++++--
 drivers/misc/habanalabs/common/habanalabs.h        | 19 ++++++++++---
 drivers/misc/habanalabs/gaudi/gaudi.c              | 13 ++++++---
 drivers/misc/habanalabs/goya/goya.c                |  6 ++---
 include/uapi/misc/habanalabs.h                     | 16 +++++++++++
 5 files changed, 73 insertions(+), 12 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 0e37aad85930..cd3422bfe6f8 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -568,9 +568,36 @@ static int validate_queue_index(struct hl_device *hdev,
 		return -EINVAL;
 	}
 
-	*queue_type = hw_queue_prop->type;
-	*is_kernel_allocated_cb = !!hw_queue_prop->requires_kernel_cb;
+	/* When hw queue type isn't QUEUE_TYPE_HW,
+	 * USER_ALLOC_CB flag shall be referred as "don't care".
+	 */
+	if (hw_queue_prop->type == QUEUE_TYPE_HW) {
+		if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) {
+			if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) {
+				dev_err(hdev->dev,
+					"Queue index %d doesn't support user CB\n",
+					chunk->queue_index);
+				return -EINVAL;
+			}
 
+			*is_kernel_allocated_cb = false;
+		} else {
+			if (!(hw_queue_prop->cb_alloc_flags &
+					CB_ALLOC_KERNEL)) {
+				dev_err(hdev->dev,
+					"Queue index %d doesn't support kernel CB\n",
+					chunk->queue_index);
+				return -EINVAL;
+			}
+
+			*is_kernel_allocated_cb = true;
+		}
+	} else {
+		*is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags
+						& CB_ALLOC_KERNEL);
+	}
+
+	*queue_type = hw_queue_prop->type;
 	return 0;
 }
 
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index b5a34936e22d..0823798f292e 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -206,6 +206,17 @@ struct hl_outbound_pci_region {
 	u64	size;
 };
 
+/*
+ * enum queue_cb_alloc_flags - Indicates queue support for CBs that
+ * allocated by Kernel or by User
+ * @CB_ALLOC_KERNEL: support only CBs that allocated by Kernel
+ * @CB_ALLOC_USER: support only CBs that allocated by User
+ */
+enum queue_cb_alloc_flags {
+	CB_ALLOC_KERNEL = 0x1,
+	CB_ALLOC_USER   = 0x2
+};
+
 /*
  * struct hl_hw_sob - H/W SOB info.
  * @hdev: habanalabs device structure.
@@ -223,16 +234,18 @@ struct hl_hw_sob {
 /**
  * struct hw_queue_properties - queue information.
  * @type: queue type.
+ * @queue_cb_alloc_flags: bitmap which indicates if the hw queue supports CB
+ *                        that allocated by the Kernel driver and therefore,
+ *                        a CB handle can be provided for jobs on this queue.
+ *                        Otherwise, a CB address must be provided.
  * @driver_only: true if only the driver is allowed to send a job to this queue,
  *               false otherwise.
- * @requires_kernel_cb: true if a CB handle must be provided for jobs on this
- *                      queue, false otherwise (a CB address must be provided).
  * @supports_sync_stream: True if queue supports sync stream
  */
 struct hw_queue_properties {
 	enum hl_queue_type	type;
+	enum queue_cb_alloc_flags cb_alloc_flags;
 	u8			driver_only;
-	u8			requires_kernel_cb;
 	u8			supports_sync_stream;
 };
 
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 2dd9b732299a..9393e34b9719 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -381,23 +381,28 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
 		if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
 			prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
 			prop->hw_queues_props[i].driver_only = 0;
-			prop->hw_queues_props[i].requires_kernel_cb = 1;
 			prop->hw_queues_props[i].supports_sync_stream = 1;
+			prop->hw_queues_props[i].cb_alloc_flags =
+				CB_ALLOC_KERNEL;
 			num_sync_stream_queues++;
 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
 			prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
 			prop->hw_queues_props[i].driver_only = 1;
-			prop->hw_queues_props[i].requires_kernel_cb = 0;
 			prop->hw_queues_props[i].supports_sync_stream = 0;
+			prop->hw_queues_props[i].cb_alloc_flags =
+				CB_ALLOC_KERNEL;
 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
 			prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
 			prop->hw_queues_props[i].driver_only = 0;
-			prop->hw_queues_props[i].requires_kernel_cb = 0;
+			prop->hw_queues_props[i].supports_sync_stream = 0;
+			prop->hw_queues_props[i].cb_alloc_flags =
+				CB_ALLOC_USER;
 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) {
 			prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
 			prop->hw_queues_props[i].driver_only = 0;
-			prop->hw_queues_props[i].requires_kernel_cb = 0;
 			prop->hw_queues_props[i].supports_sync_stream = 0;
+			prop->hw_queues_props[i].cb_alloc_flags =
+				CB_ALLOC_USER;
 		}
 	}
 
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index d873f613acb0..74c44278166b 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -373,20 +373,20 @@ int goya_get_fixed_properties(struct hl_device *hdev)
 	for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
 		prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
 		prop->hw_queues_props[i].driver_only = 0;
-		prop->hw_queues_props[i].requires_kernel_cb = 1;
+		prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_KERNEL;
 	}
 
 	for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) {
 		prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
 		prop->hw_queues_props[i].driver_only = 1;
-		prop->hw_queues_props[i].requires_kernel_cb = 0;
+		prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_KERNEL;
 	}
 
 	for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES +
 			NUMBER_OF_INT_HW_QUEUES; i++) {
 		prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
 		prop->hw_queues_props[i].driver_only = 0;
-		prop->hw_queues_props[i].requires_kernel_cb = 0;
+		prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_USER;
 	}
 
 	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 9705b8adb60c..5753157e71b3 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -490,6 +490,22 @@ union hl_cb_args {
 	struct hl_cb_out out;
 };
 
+/* HL_CS_CHUNK_FLAGS_ values
+ *
+ * HL_CS_CHUNK_FLAGS_USER_ALLOC_CB:
+ *      Indicates if the CB was allocated and mapped by userspace.
+ *      User allocated CB is a command buffer allocated by the user, via malloc
+ *      (or similar). After allocating the CB, the user invokes “memory ioctl”
+ *      to map the user memory into a device virtual address. The user provides
+ *      this address via the cb_handle field. The interface provides the
+ *      ability to create a large CBs, Which aren’t limited to
+ *      “HL_MAX_CB_SIZE”. Therefore, it increases the PCI-DMA queues
+ *      throughput. This CB allocation method also reduces the use of Linux
+ *      DMA-able memory pool. Which are limited and used by other Linux
+ *      sub-systems.
+ */
+#define HL_CS_CHUNK_FLAGS_USER_ALLOC_CB 0x1
+
 /*
  * This structure size must always be fixed to 64-bytes for backward
  * compatibility
-- 
cgit v1.2.3


From 5fe1c17ddf2e5e5d05e983b56ebbc0d1c702b16a Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Thu, 10 Sep 2020 10:10:55 +0300
Subject: habanalabs: sync stream collective infrastructure

Define new API for collective wait support and modify sync stream
common flow. In addition add kernel CB allocation support for
internal queues.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../misc/habanalabs/common/command_submission.c    | 38 ++++++++++++---
 drivers/misc/habanalabs/common/habanalabs.h        | 55 +++++++++++++++++++---
 drivers/misc/habanalabs/common/hw_queue.c          | 44 +++++++++++++++--
 drivers/misc/habanalabs/gaudi/gaudi.c              | 22 ++++++++-
 drivers/misc/habanalabs/goya/goya.c                | 22 ++++++++-
 include/uapi/misc/habanalabs.h                     | 17 +++++--
 6 files changed, 176 insertions(+), 22 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index cd3422bfe6f8..2dbd42b6ad0c 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -85,7 +85,8 @@ static void hl_fence_release(struct kref *kref)
 		goto free;
 
 	if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
-			(hl_cs_cmpl->type == CS_TYPE_WAIT)) {
+		(hl_cs_cmpl->type == CS_TYPE_WAIT) ||
+		(hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)) {
 
 		dev_dbg(hdev->dev,
 			"CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n",
@@ -112,6 +113,10 @@ static void hl_fence_release(struct kref *kref)
 		 * hence the above scenario is avoided.
 		 */
 		kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset);
+
+		if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
+			hdev->asic_funcs->reset_sob_group(hdev,
+					hl_cs_cmpl->sob_group);
 	}
 
 free:
@@ -247,9 +252,11 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
 	/* For H/W queue jobs, if a user CB was allocated by driver and MMU is
 	 * enabled, the user CB isn't released in cs_parser() and thus should be
 	 * released here.
+	 * This is also true for INT queues jobs which were allocated by driver
 	 */
-	if (job->queue_type == QUEUE_TYPE_HW &&
-			job->is_kernel_allocated_cb && hdev->mmu_enable) {
+	if (job->is_kernel_allocated_cb &&
+		((job->queue_type == QUEUE_TYPE_HW && hdev->mmu_enable) ||
+				job->queue_type == QUEUE_TYPE_INT)) {
 		spin_lock(&job->user_cb->lock);
 		job->user_cb->cs_cnt--;
 		spin_unlock(&job->user_cb->lock);
@@ -931,7 +938,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 	struct hl_cs_compl *sig_waitcs_cmpl;
 	struct hl_cs *cs;
 	enum hl_queue_type q_type;
-	u32 size_to_copy, q_idx;
+	u32 size_to_copy, q_idx, collective_engine_id;
 	u64 signal_seq;
 	int rc;
 
@@ -981,7 +988,18 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 		goto free_cs_chunk_array;
 	}
 
-	if (cs_type == CS_TYPE_WAIT) {
+	if (cs_type == CS_TYPE_COLLECTIVE_WAIT) {
+		if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
+			dev_err(hdev->dev,
+				"Queue index %d is invalid\n", q_idx);
+			rc = -EINVAL;
+			goto free_cs_chunk_array;
+		}
+
+		collective_engine_id = chunk->collective_engine_id;
+	}
+
+	if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) {
 		rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq);
 		if (rc)
 			goto free_cs_chunk_array;
@@ -1026,7 +1044,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 
 	rc = allocate_cs(hdev, ctx, cs_type, &cs);
 	if (rc) {
-		if (cs_type == CS_TYPE_WAIT)
+		if (cs_type == CS_TYPE_WAIT ||
+			cs_type == CS_TYPE_COLLECTIVE_WAIT)
 			hl_fence_put(sig_fence);
 		hl_ctx_put(ctx);
 		goto free_cs_chunk_array;
@@ -1036,7 +1055,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 	 * Save the signal CS fence for later initialization right before
 	 * hanging the wait CS on the queue.
 	 */
-	if (cs_type == CS_TYPE_WAIT)
+	if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT)
 		cs->signal_fence = sig_fence;
 
 	hl_debugfs_add_cs(cs);
@@ -1046,6 +1065,9 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 	if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL)
 		rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type,
 				q_idx);
+	else
+		rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
+				cs, q_idx, collective_engine_id);
 
 	if (rc)
 		goto put_cs;
@@ -1120,6 +1142,8 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
 		cs_type = CS_TYPE_SIGNAL;
 	else if (args->in.cs_flags & HL_CS_FLAGS_WAIT)
 		cs_type = CS_TYPE_WAIT;
+	else if (args->in.cs_flags & HL_CS_FLAGS_COLLECTIVE_WAIT)
+		cs_type = CS_TYPE_COLLECTIVE_WAIT;
 	else
 		cs_type = CS_TYPE_DEFAULT;
 
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 0823798f292e..98249a2c97e7 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -68,6 +68,11 @@
 #define HL_RSVD_SOBS			4
 #define HL_RSVD_MONS			2
 
+/*
+ * HL_COLLECTIVE_RSVD_MSTR_MONS 'collective' reserved monitors per QMAN stream
+ */
+#define HL_COLLECTIVE_RSVD_MSTR_MONS	2
+
 #define HL_MAX_SOB_VAL			(1 << 15)
 
 #define IS_POWER_OF_2(n)		(n != 0 && ((n & (n - 1)) == 0))
@@ -177,7 +182,8 @@ enum hl_queue_type {
 enum hl_cs_type {
 	CS_TYPE_DEFAULT,
 	CS_TYPE_SIGNAL,
-	CS_TYPE_WAIT
+	CS_TYPE_WAIT,
+	CS_TYPE_COLLECTIVE_WAIT
 };
 
 /*
@@ -231,6 +237,12 @@ struct hl_hw_sob {
 	u32			q_idx;
 };
 
+enum hl_collective_mode {
+	HL_COLLECTIVE_NOT_SUPPORTED = 0x0,
+	HL_COLLECTIVE_MASTER = 0x1,
+	HL_COLLECTIVE_SLAVE = 0x2
+};
+
 /**
  * struct hw_queue_properties - queue information.
  * @type: queue type.
@@ -238,6 +250,7 @@ struct hl_hw_sob {
  *                        that allocated by the Kernel driver and therefore,
  *                        a CB handle can be provided for jobs on this queue.
  *                        Otherwise, a CB address must be provided.
+ * @collective_mode: collective mode of current queue
  * @driver_only: true if only the driver is allowed to send a job to this queue,
  *               false otherwise.
  * @supports_sync_stream: True if queue supports sync stream
@@ -245,6 +258,7 @@ struct hl_hw_sob {
 struct hw_queue_properties {
 	enum hl_queue_type	type;
 	enum queue_cb_alloc_flags cb_alloc_flags;
+	enum hl_collective_mode	collective_mode;
 	u8			driver_only;
 	u8			supports_sync_stream;
 };
@@ -358,6 +372,8 @@ struct hl_mmu_properties {
  * @cb_pool_cb_size: size of each CB in the CB pool.
  * @max_pending_cs: maximum of concurrent pending command submissions
  * @max_queues: maximum amount of queues in the system
+ * @collective_first_sob: first sync object available for collective use
+ * @collective_first_mon: first monitor available for collective use
  * @sync_stream_first_sob: first sync object available for sync stream use
  * @sync_stream_first_mon: first monitor available for sync stream use
  * @first_available_user_sob: first sob available for the user
@@ -410,6 +426,8 @@ struct asic_fixed_properties {
 	u32				cb_pool_cb_size;
 	u32				max_pending_cs;
 	u32				max_queues;
+	u16				collective_first_sob;
+	u16				collective_first_mon;
 	u16				sync_stream_first_sob;
 	u16				sync_stream_first_mon;
 	u16				first_available_user_sob[HL_MAX_DCORES];
@@ -441,6 +459,7 @@ struct hl_fence {
  * @cs_seq: command submission sequence number.
  * @type: type of the CS - signal/wait.
  * @sob_val: the SOB value that is used in this signal/wait CS.
+ * @sob_group: the SOB group that is used in this collective wait CS.
  */
 struct hl_cs_compl {
 	struct hl_fence		base_fence;
@@ -450,6 +469,7 @@ struct hl_cs_compl {
 	u64			cs_seq;
 	enum hl_cs_type		type;
 	u16			sob_val;
+	u16			sob_group;
 };
 
 /*
@@ -512,6 +532,7 @@ struct hl_cb {
  * QUEUES
  */
 
+struct hl_cs;
 struct hl_cs_job;
 
 /* Queue length of external and HW queues */
@@ -540,15 +561,24 @@ struct hl_cs_job;
  * @next_sob_val: the next value to use for the currently used SOB.
  * @base_sob_id: the base SOB id of the SOBs used by this queue.
  * @base_mon_id: the base MON id of the MONs used by this queue.
+ * @collective_mstr_mon_id: the MON ids of the MONs used by this master queue
+ *                          in order to sync with all slave queues.
+ * @collective_slave_mon_id: the MON id used by this slave queue in order to
+ *                           sync with its master queue.
+ * @collective_sob_id: current SOB id used by this collective slave queue
+ *                     to signal its collective master queue upon completion.
  * @curr_sob_offset: the id offset to the currently used SOB from the
  *                   HL_RSVD_SOBS that are being used by this queue.
  */
 struct hl_sync_stream_properties {
-	struct hl_hw_sob	hw_sob[HL_RSVD_SOBS];
-	u16			next_sob_val;
-	u16			base_sob_id;
-	u16			base_mon_id;
-	u8			curr_sob_offset;
+	struct hl_hw_sob hw_sob[HL_RSVD_SOBS];
+	u16		next_sob_val;
+	u16		base_sob_id;
+	u16		base_mon_id;
+	u16		collective_mstr_mon_id[HL_COLLECTIVE_RSVD_MSTR_MONS];
+	u16		collective_slave_mon_id;
+	u16		collective_sob_id;
+	u8		curr_sob_offset;
 };
 
 /**
@@ -556,6 +586,7 @@ struct hl_sync_stream_properties {
  * @shadow_queue: pointer to a shadow queue that holds pointers to jobs.
  * @sync_stream_prop: sync stream queue properties
  * @queue_type: type of queue.
+ * @collective_mode: collective mode of current queue
  * @kernel_address: holds the queue's kernel virtual address.
  * @bus_address: holds the queue's DMA address.
  * @pi: holds the queue's pi value.
@@ -572,6 +603,7 @@ struct hl_hw_queue {
 	struct hl_cs_job			**shadow_queue;
 	struct hl_sync_stream_properties	sync_stream_prop;
 	enum hl_queue_type			queue_type;
+	enum hl_collective_mode			collective_mode;
 	void					*kernel_address;
 	dma_addr_t				bus_address;
 	u32					pi;
@@ -764,9 +796,13 @@ enum div_select_defs {
  * @gen_signal_cb: Generate a signal CB.
  * @gen_wait_cb: Generate a wait CB.
  * @reset_sob: Reset a SOB.
+ * @reset_sob_group: Reset SOB group
  * @set_dma_mask_from_fw: set the DMA mask in the driver according to the
  *                        firmware configuration
  * @get_device_time: Get the device time.
+ * @collective_wait_init_cs: Generate collective master/slave packets
+ *                           and place them in the relevant cs jobs
+ * @collective_wait_create_jobs: allocate collective wait cs jobs
  */
 struct hl_asic_funcs {
 	int (*early_init)(struct hl_device *hdev);
@@ -868,8 +904,13 @@ struct hl_asic_funcs {
 	u32 (*gen_wait_cb)(struct hl_device *hdev,
 			struct hl_gen_wait_properties *prop);
 	void (*reset_sob)(struct hl_device *hdev, void *data);
+	void (*reset_sob_group)(struct hl_device *hdev, u16 sob_group);
 	void (*set_dma_mask_from_fw)(struct hl_device *hdev);
 	u64 (*get_device_time)(struct hl_device *hdev);
+	void (*collective_wait_init_cs)(struct hl_cs *cs);
+	int (*collective_wait_create_jobs)(struct hl_device *hdev,
+			struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
+			u32 collective_engine_id);
 };
 
 
@@ -1656,6 +1697,7 @@ struct hl_mmu_funcs {
  * @stop_on_err: true if engines should stop on error.
  * @supports_sync_stream: is sync stream supported.
  * @sync_stream_queue_idx: helper index for sync stream queues initialization.
+ * @collective_mon_idx: helper index for collective initialization
  * @supports_coresight: is CoreSight supported.
  * @supports_soft_reset: is soft reset supported.
  * @supports_cb_mapping: is mapping a CB to the device's MMU supported.
@@ -1756,6 +1798,7 @@ struct hl_device {
 	u8				stop_on_err;
 	u8				supports_sync_stream;
 	u8				sync_stream_queue_idx;
+	u8				collective_mon_idx;
 	u8				supports_coresight;
 	u8				supports_soft_reset;
 	u8				supports_cb_mapping;
diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c
index be1d0e2c99d8..d9448375beac 100644
--- a/drivers/misc/habanalabs/common/hw_queue.c
+++ b/drivers/misc/habanalabs/common/hw_queue.c
@@ -333,7 +333,14 @@ static void int_queue_schedule_job(struct hl_cs_job *job)
 
 	bd.ctl = 0;
 	bd.len = cpu_to_le32(job->job_cb_size);
-	bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb);
+
+	if (job->is_kernel_allocated_cb)
+		/* bus_address is actually a mmu mapped address
+		 * allocated from an internal pool
+		 */
+		bd.ptr = cpu_to_le64(job->user_cb->bus_address);
+	else
+		bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb);
 
 	pi = q->kernel_address + (q->pi & (q->int_queue_len - 1)) * sizeof(bd);
 
@@ -562,6 +569,8 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
 
 	if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT))
 		init_signal_wait_cs(cs);
+	else if (cs->type == CS_TYPE_COLLECTIVE_WAIT)
+		hdev->asic_funcs->collective_wait_init_cs(cs);
 
 	spin_lock(&hdev->hw_queues_mirror_lock);
 	list_add_tail(&cs->mirror_node, &hdev->hw_queues_mirror_list);
@@ -741,12 +750,40 @@ static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx)
 	struct hl_sync_stream_properties *sync_stream_prop;
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct hl_hw_sob *hw_sob;
-	int sob, queue_idx;
+	int sob, reserved_mon_idx, queue_idx;
+
+	sync_stream_prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
+
+	/* We use 'collective_mon_idx' as a running index in order to reserve
+	 * monitors for collective master/slave queues.
+	 * collective master queue gets 2 reserved monitors
+	 * collective slave queue gets 1 reserved monitor
+	 */
+	if (hdev->kernel_queues[q_idx].collective_mode ==
+			HL_COLLECTIVE_MASTER) {
+		reserved_mon_idx = hdev->collective_mon_idx;
+
+		/* reserve the first monitor for collective master queue */
+		sync_stream_prop->collective_mstr_mon_id[0] =
+			prop->collective_first_mon + reserved_mon_idx;
+
+		/* reserve the second monitor for collective master queue */
+		sync_stream_prop->collective_mstr_mon_id[1] =
+			prop->collective_first_mon + reserved_mon_idx + 1;
+
+		hdev->collective_mon_idx += HL_COLLECTIVE_RSVD_MSTR_MONS;
+	} else if (hdev->kernel_queues[q_idx].collective_mode ==
+			HL_COLLECTIVE_SLAVE) {
+		reserved_mon_idx = hdev->collective_mon_idx++;
+
+		/* reserve a monitor for collective slave queue */
+		sync_stream_prop->collective_slave_mon_id =
+			prop->collective_first_mon + reserved_mon_idx;
+	}
 
 	if (!hdev->kernel_queues[q_idx].supports_sync_stream)
 		return;
 
-	sync_stream_prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
 	queue_idx = hdev->sync_stream_queue_idx++;
 
 	sync_stream_prop->base_sob_id = prop->sync_stream_first_sob +
@@ -897,6 +934,7 @@ int hl_hw_queues_create(struct hl_device *hdev)
 		q->queue_type = asic->hw_queues_props[i].type;
 		q->supports_sync_stream =
 				asic->hw_queues_props[i].supports_sync_stream;
+		q->collective_mode = asic->hw_queues_props[i].collective_mode;
 		rc = queue_init(hdev, q, i);
 		if (rc) {
 			dev_err(hdev->dev,
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 9393e34b9719..d4218c4b4887 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -793,6 +793,23 @@ out:
 	return rc;
 }
 
+static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_groupt)
+{
+
+}
+
+static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
+{
+
+}
+
+static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
+		struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
+		u32 collective_engine_id)
+{
+	return -EINVAL;
+}
+
 static int gaudi_late_init(struct hl_device *hdev)
 {
 	struct gaudi_device *gaudi = hdev->asic_specific;
@@ -7358,8 +7375,11 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.gen_signal_cb = gaudi_gen_signal_cb,
 	.gen_wait_cb = gaudi_gen_wait_cb,
 	.reset_sob = gaudi_reset_sob,
+	.reset_sob_group = gaudi_reset_sob_group,
 	.set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
-	.get_device_time = gaudi_get_device_time
+	.get_device_time = gaudi_get_device_time,
+	.collective_wait_init_cs = gaudi_collective_wait_init_cs,
+	.collective_wait_create_jobs = gaudi_collective_wait_create_jobs
 };
 
 /**
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 74c44278166b..9332580b038d 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -5305,6 +5305,11 @@ static void goya_reset_sob(struct hl_device *hdev, void *data)
 
 }
 
+void goya_reset_sob_group(struct hl_device *hdev, u16 sob_group)
+{
+
+}
+
 static void goya_set_dma_mask_from_fw(struct hl_device *hdev)
 {
 	if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
@@ -5326,6 +5331,18 @@ u64 goya_get_device_time(struct hl_device *hdev)
 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
 }
 
+void goya_collective_wait_init_cs(struct hl_cs *cs)
+{
+
+}
+
+int goya_collective_wait_create_jobs(struct hl_device *hdev,
+		struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
+		u32 collective_engine_id)
+{
+	return -EINVAL;
+}
+
 static const struct hl_asic_funcs goya_funcs = {
 	.early_init = goya_early_init,
 	.early_fini = goya_early_fini,
@@ -5397,8 +5414,11 @@ static const struct hl_asic_funcs goya_funcs = {
 	.gen_signal_cb = goya_gen_signal_cb,
 	.gen_wait_cb = goya_gen_wait_cb,
 	.reset_sob = goya_reset_sob,
+	.reset_sob_group = goya_reset_sob_group,
 	.set_dma_mask_from_fw = goya_set_dma_mask_from_fw,
-	.get_device_time = goya_get_device_time
+	.get_device_time = goya_get_device_time,
+	.collective_wait_init_cs = goya_collective_wait_init_cs,
+	.collective_wait_create_jobs = goya_collective_wait_create_jobs
 };
 
 /*
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 5753157e71b3..2b244d0bdc26 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -523,7 +523,8 @@ struct hl_cs_chunk {
 		 */
 		__u64 cb_handle;
 
-		/* Relevant only when HL_CS_FLAGS_WAIT is set.
+		/* Relevant only when HL_CS_FLAGS_WAIT or
+		 * HL_CS_FLAGS_COLLECTIVE_WAIT is set.
 		 * This holds address of array of u64 values that contain
 		 * signal CS sequence numbers. The wait described by this job
 		 * will listen on all those signals (wait event per signal)
@@ -541,7 +542,8 @@ struct hl_cs_chunk {
 		 */
 		__u32 cb_size;
 
-		/* Relevant only when HL_CS_FLAGS_WAIT is set.
+		/* Relevant only when HL_CS_FLAGS_WAIT or
+		 * HL_CS_FLAGS_COLLECTIVE_WAIT is set.
 		 * Number of entries in signal_seq_arr
 		 */
 		__u32 num_signal_seq_arr;
@@ -550,14 +552,21 @@ struct hl_cs_chunk {
 	/* HL_CS_CHUNK_FLAGS_* */
 	__u32 cs_chunk_flags;
 
+	/* Relevant only when HL_CS_FLAGS_COLLECTIVE_WAIT is set.
+	 * This holds the collective engine ID. The wait described by this job
+	 * will sync with this engine and with all NICs before completion.
+	 */
+	__u32 collective_engine_id;
+
 	/* Align structure to 64 bytes */
-	__u32 pad[11];
+	__u32 pad[10];
 };
 
-/* SIGNAL and WAIT flags are mutually exclusive */
+/* SIGNAL and WAIT/COLLECTIVE_WAIT flags are mutually exclusive */
 #define HL_CS_FLAGS_FORCE_RESTORE	0x1
 #define HL_CS_FLAGS_SIGNAL		0x2
 #define HL_CS_FLAGS_WAIT		0x4
+#define HL_CS_FLAGS_COLLECTIVE_WAIT	0x8
 
 #define HL_CS_STATUS_SUCCESS		0
 
-- 
cgit v1.2.3


From 0940cabafde98466bec8ba32dd567f0ec060478d Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Mon, 31 Aug 2020 08:52:56 +0300
Subject: habanalabs/gaudi: Set DMA5 QMAN internal

DMA5 QMAN is designated to be used for reduction process, hence it will
be no longer configured as external queue.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi/gaudi.c  | 31 +++++++++++++------------------
 drivers/misc/habanalabs/gaudi/gaudiP.h |  8 ++++----
 include/uapi/misc/habanalabs.h         | 12 ++++++------
 3 files changed, 23 insertions(+), 28 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index d4218c4b4887..7e1557f8a73f 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -38,7 +38,7 @@
  *
  * MMU is always enabled.
  *
- * QMAN DMA channels 0,1,5 (PCI DMAN):
+ * QMAN DMA channels 0,1 (PCI DMAN):
  *     - DMA is not secured.
  *     - PQ and CQ are secured.
  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
@@ -55,7 +55,7 @@
  *       idle)
  *     - MMU page tables area clear (happens on init)
  *
- * QMAN DMA 2-4,6,7, TPC, MME, NIC:
+ * QMAN DMA 2-7, TPC, MME, NIC:
  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
  * CQ, CP and the engine are not secured
  *
@@ -113,12 +113,12 @@ static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
 	[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
 	[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
-	[GAUDI_PCI_DMA_3] = GAUDI_ENGINE_ID_DMA_5,
 	[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
 	[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
 	[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
-	[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_6,
-	[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_7
+	[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
+	[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
+	[GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
 };
 
 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
@@ -130,10 +130,6 @@ static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
 	[5] = GAUDI_QUEUE_ID_DMA_1_1,
 	[6] = GAUDI_QUEUE_ID_DMA_1_2,
 	[7] = GAUDI_QUEUE_ID_DMA_1_3,
-	[8] = GAUDI_QUEUE_ID_DMA_5_0,
-	[9] = GAUDI_QUEUE_ID_DMA_5_1,
-	[10] = GAUDI_QUEUE_ID_DMA_5_2,
-	[11] = GAUDI_QUEUE_ID_DMA_5_3
 };
 
 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
@@ -249,10 +245,10 @@ static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
-	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_0 */
-	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_1 */
-	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_2 */
-	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_3 */
+	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
+	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
+	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
+	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
@@ -978,8 +974,7 @@ static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
 		q = &gaudi->internal_qmans[i];
 
 		switch (i) {
-		case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_4_3:
-		case GAUDI_QUEUE_ID_DMA_6_0 ... GAUDI_QUEUE_ID_DMA_7_3:
+		case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
 			q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
 			break;
 		case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
@@ -3424,21 +3419,21 @@ static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
 		break;
 
 	case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
-		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_3];
+		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
 		break;
 
 	case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
-		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
+		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
 		break;
 
 	case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
-		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
+		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h
index 277c391272ac..16871d9ff88b 100644
--- a/drivers/misc/habanalabs/gaudi/gaudiP.h
+++ b/drivers/misc/habanalabs/gaudi/gaudiP.h
@@ -15,7 +15,7 @@
 #include "../include/gaudi/gaudi.h"
 #include "../include/gaudi/gaudi_async_events.h"
 
-#define NUMBER_OF_EXT_HW_QUEUES		12
+#define NUMBER_OF_EXT_HW_QUEUES		8
 #define NUMBER_OF_CMPLT_QUEUES		NUMBER_OF_EXT_HW_QUEUES
 #define NUMBER_OF_CPU_HW_QUEUES		1
 #define NUMBER_OF_INT_HW_QUEUES		100
@@ -62,8 +62,8 @@
 #error "GAUDI_MAX_PENDING_CS must be power of 2 and greater than 1"
 #endif
 
-#define PCI_DMA_NUMBER_OF_CHNLS		3
-#define HBM_DMA_NUMBER_OF_CHNLS		5
+#define PCI_DMA_NUMBER_OF_CHNLS		2
+#define HBM_DMA_NUMBER_OF_CHNLS		6
 #define DMA_NUMBER_OF_CHNLS		(PCI_DMA_NUMBER_OF_CHNLS + \
 						HBM_DMA_NUMBER_OF_CHNLS)
 
@@ -205,12 +205,12 @@
 enum gaudi_dma_channels {
 	GAUDI_PCI_DMA_1,
 	GAUDI_PCI_DMA_2,
-	GAUDI_PCI_DMA_3,
 	GAUDI_HBM_DMA_1,
 	GAUDI_HBM_DMA_2,
 	GAUDI_HBM_DMA_3,
 	GAUDI_HBM_DMA_4,
 	GAUDI_HBM_DMA_5,
+	GAUDI_HBM_DMA_6,
 	GAUDI_DMA_MAX
 };
 
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 2b244d0bdc26..4661a74f0425 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -18,8 +18,8 @@
 #define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START		0x8000	/* 32KB */
 #define GAUDI_DRIVER_SRAM_RESERVED_SIZE_FROM_START	0x80	/* 128 bytes */
 
-#define GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT		48
-#define GAUDI_FIRST_AVAILABLE_W_S_MONITOR		24
+#define GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT		32
+#define GAUDI_FIRST_AVAILABLE_W_S_MONITOR		16
 /*
  * Goya queue Numbering
  *
@@ -76,10 +76,10 @@ enum gaudi_queue_id {
 	GAUDI_QUEUE_ID_DMA_4_1 = 18,	/* internal */
 	GAUDI_QUEUE_ID_DMA_4_2 = 19,	/* internal */
 	GAUDI_QUEUE_ID_DMA_4_3 = 20,	/* internal */
-	GAUDI_QUEUE_ID_DMA_5_0 = 21,	/* external */
-	GAUDI_QUEUE_ID_DMA_5_1 = 22,	/* external */
-	GAUDI_QUEUE_ID_DMA_5_2 = 23,	/* external */
-	GAUDI_QUEUE_ID_DMA_5_3 = 24,	/* external */
+	GAUDI_QUEUE_ID_DMA_5_0 = 21,	/* internal */
+	GAUDI_QUEUE_ID_DMA_5_1 = 22,	/* internal */
+	GAUDI_QUEUE_ID_DMA_5_2 = 23,	/* internal */
+	GAUDI_QUEUE_ID_DMA_5_3 = 24,	/* internal */
 	GAUDI_QUEUE_ID_DMA_6_0 = 25,	/* internal */
 	GAUDI_QUEUE_ID_DMA_6_1 = 26,	/* internal */
 	GAUDI_QUEUE_ID_DMA_6_2 = 27,	/* internal */
-- 
cgit v1.2.3


From 5de406c0b5c747c40277861ecf204ebfa095caa5 Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Thu, 10 Sep 2020 10:56:26 +0300
Subject: habanalabs: sync stream collective support

Implement sync stream collective for GAUDI. Need to allocate additional
resources for that and add ctx_fini() to clean up those resources.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../misc/habanalabs/common/command_submission.c    |  11 +-
 drivers/misc/habanalabs/common/context.c           |   1 +
 drivers/misc/habanalabs/common/habanalabs.h        |   7 +-
 drivers/misc/habanalabs/gaudi/gaudi.c              | 846 +++++++++++++++++++--
 drivers/misc/habanalabs/gaudi/gaudiP.h             |  40 +-
 drivers/misc/habanalabs/goya/goya.c                |   6 +
 include/uapi/misc/habanalabs.h                     |  14 +-
 7 files changed, 862 insertions(+), 63 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 2dbd42b6ad0c..5ece52588ec6 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -142,7 +142,7 @@ static void hl_fence_init(struct hl_fence *fence)
 	init_completion(&fence->completion);
 }
 
-static void cs_get(struct hl_cs *cs)
+void cs_get(struct hl_cs *cs)
 {
 	kref_get(&cs->refcount);
 }
@@ -917,6 +917,9 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
 	job->job_cb_size = job->user_cb_size;
 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
 
+	/* increment refcount as for external queues we get completion */
+	cs_get(cs);
+
 	cs->jobs_in_queue_cnt[job->hw_queue_id]++;
 
 	list_add_tail(&job->cs_node, &cs->job_list);
@@ -1070,11 +1073,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 				cs, q_idx, collective_engine_id);
 
 	if (rc)
-		goto put_cs;
-
-
-	/* increment refcount as for external queues we get completion */
-	cs_get(cs);
+		goto free_cs_object;
 
 	rc = hl_hw_queue_schedule_cs(cs);
 	if (rc) {
diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c
index 7a59dd7c6450..2077bbe3606a 100644
--- a/drivers/misc/habanalabs/common/context.c
+++ b/drivers/misc/habanalabs/common/context.c
@@ -40,6 +40,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
 		if ((hdev->in_debug) && (hdev->compute_ctx == ctx))
 			hl_device_set_debug_mode(hdev, false);
 
+		hdev->asic_funcs->ctx_fini(ctx);
 		hl_cb_va_pool_fini(ctx);
 		hl_vm_ctx_fini(ctx);
 		hl_asid_free(hdev, ctx->asid);
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 98249a2c97e7..d6eb5c6a2873 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -65,8 +65,8 @@
  * HL_RSVD_SOBS 'sync stream' reserved sync objects per QMAN stream
  * HL_RSVD_MONS 'sync stream' reserved monitors per QMAN stream
  */
-#define HL_RSVD_SOBS			4
-#define HL_RSVD_MONS			2
+#define HL_RSVD_SOBS			2
+#define HL_RSVD_MONS			1
 
 /*
  * HL_COLLECTIVE_RSVD_MSTR_MONS 'collective' reserved monitors per QMAN stream
@@ -785,6 +785,7 @@ enum div_select_defs {
  * @wreg: Write a register. Needed for simulator support.
  * @halt_coresight: stop the ETF and ETR traces.
  * @ctx_init: context dependent initialization.
+ * @ctx_fini: context dependent cleanup.
  * @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz
  * @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index.
  * @read_device_fw_version: read the device's firmware versions that are
@@ -891,6 +892,7 @@ struct hl_asic_funcs {
 	void (*wreg)(struct hl_device *hdev, u32 reg, u32 val);
 	void (*halt_coresight)(struct hl_device *hdev);
 	int (*ctx_init)(struct hl_ctx *ctx);
+	void (*ctx_fini)(struct hl_ctx *ctx);
 	int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
 	u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx);
 	void (*read_device_fw_version)(struct hl_device *hdev,
@@ -1992,6 +1994,7 @@ void hl_sob_reset_error(struct kref *ref);
 int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask);
 void hl_fence_put(struct hl_fence *fence);
 void hl_fence_get(struct hl_fence *fence);
+void cs_get(struct hl_cs *cs);
 
 void goya_set_asic_funcs(struct hl_device *hdev);
 void gaudi_set_asic_funcs(struct hl_device *hdev);
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 7e1557f8a73f..c8e59a8338fb 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -358,6 +358,31 @@ static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
 static int gaudi_cpucp_info_get(struct hl_device *hdev);
 static void gaudi_disable_clock_gating(struct hl_device *hdev);
 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
+static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
+				u32 size);
+static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
+				struct hl_gen_wait_properties *prop);
+
+static inline enum hl_collective_mode
+get_collective_mode(struct hl_device *hdev, u32 queue_id)
+{
+	if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
+		return HL_COLLECTIVE_MASTER;
+
+	if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
+			queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
+		return HL_COLLECTIVE_SLAVE;
+
+	if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
+			queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
+		return HL_COLLECTIVE_SLAVE;
+
+	if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
+			queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
+		return HL_COLLECTIVE_SLAVE;
+
+	return HL_COLLECTIVE_NOT_SUPPORTED;
+}
 
 static int gaudi_get_fixed_properties(struct hl_device *hdev)
 {
@@ -393,18 +418,28 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
 			prop->hw_queues_props[i].supports_sync_stream = 0;
 			prop->hw_queues_props[i].cb_alloc_flags =
 				CB_ALLOC_USER;
-		} else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) {
-			prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
-			prop->hw_queues_props[i].driver_only = 0;
-			prop->hw_queues_props[i].supports_sync_stream = 0;
-			prop->hw_queues_props[i].cb_alloc_flags =
-				CB_ALLOC_USER;
+
 		}
+		prop->hw_queues_props[i].collective_mode =
+						get_collective_mode(hdev, i);
 	}
 
 	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
-	prop->sync_stream_first_sob = 0;
-	prop->sync_stream_first_mon = 0;
+	prop->collective_first_sob = 0;
+	prop->collective_first_mon = 0;
+
+	/* 2 SOBs per internal queue stream are reserved for collective */
+	prop->sync_stream_first_sob =
+			ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
+			* QMAN_STREAMS * HL_RSVD_SOBS;
+
+	/* 1 monitor per internal queue stream are reserved for collective
+	 * 2 monitors per external queue stream are reserved for collective
+	 */
+	prop->sync_stream_first_mon =
+			(NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
+			(NUMBER_OF_EXT_HW_QUEUES * 2);
+
 	prop->dram_base_address = DRAM_PHYS_BASE;
 	prop->dram_size = GAUDI_HBM_SIZE_32GB;
 	prop->dram_end_address = prop->dram_base_address +
@@ -789,21 +824,451 @@ out:
 	return rc;
 }
 
-static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_groupt)
+static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
+{
+	struct gaudi_device *gaudi = hdev->asic_specific;
+	struct gaudi_collective_properties *prop = &gaudi->collective_props;
+	struct hl_hw_queue *q;
+	u32 i, sob_id, sob_group_id, queue_id;
+
+	/* Iterate through SOB groups and assign a SOB for each slave queue */
+	sob_group_id =
+		stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
+	sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
+
+	queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
+	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
+		q = &hdev->kernel_queues[queue_id + (4 * i)];
+		q->sync_stream_prop.collective_sob_id = sob_id + i;
+	}
+
+	/* Both DMA5 and TPC7 use the same resources since only a single
+	 * engine need to participate in the reduction process
+	 */
+	queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
+	q = &hdev->kernel_queues[queue_id];
+	q->sync_stream_prop.collective_sob_id =
+			sob_id + NIC_NUMBER_OF_ENGINES;
+
+	queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
+	q = &hdev->kernel_queues[queue_id];
+	q->sync_stream_prop.collective_sob_id =
+			sob_id + NIC_NUMBER_OF_ENGINES;
+}
+
+static void gaudi_sob_group_hw_reset(struct kref *ref)
+{
+	struct gaudi_hw_sob_group *hw_sob_group =
+		container_of(ref, struct gaudi_hw_sob_group, kref);
+	struct hl_device *hdev = hw_sob_group->hdev;
+	int i;
+
+	for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
+		WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
+				(hw_sob_group->base_sob_id + i) * 4, 0);
+
+	kref_init(&hw_sob_group->kref);
+}
+
+static void gaudi_sob_group_reset_error(struct kref *ref)
+{
+	struct gaudi_hw_sob_group *hw_sob_group =
+		container_of(ref, struct gaudi_hw_sob_group, kref);
+	struct hl_device *hdev = hw_sob_group->hdev;
+
+	dev_crit(hdev->dev,
+		"SOB release shouldn't be called here, base_sob_id: %d\n",
+		hw_sob_group->base_sob_id);
+}
+
+static int gaudi_collective_init(struct hl_device *hdev)
+{
+	u32 i, master_monitor_sobs, sob_id, reserved_sobs_per_group;
+	struct gaudi_collective_properties *prop;
+	struct gaudi_device *gaudi;
+
+	gaudi = hdev->asic_specific;
+	prop = &gaudi->collective_props;
+	sob_id = hdev->asic_prop.collective_first_sob;
+
+	/* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
+	reserved_sobs_per_group =
+		ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
+
+	/* Init SOB groups */
+	for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
+		prop->hw_sob_group[i].hdev = hdev;
+		prop->hw_sob_group[i].base_sob_id = sob_id;
+		sob_id += reserved_sobs_per_group;
+		gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
+	}
+
+	for (i = 0 ; i < QMAN_STREAMS; i++) {
+		prop->next_sob_group_val[i] = 1;
+		prop->curr_sob_group_idx[i] = 0;
+		gaudi_collective_map_sobs(hdev, i);
+	}
+
+	prop->mstr_sob_mask[0] = 0;
+	master_monitor_sobs = HL_MAX_SOBS_PER_MONITOR;
+	for (i = 0 ; i < master_monitor_sobs ; i++)
+		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
+			prop->mstr_sob_mask[0] |= BIT(i);
+
+	prop->mstr_sob_mask[1] = 0;
+	master_monitor_sobs =
+		NIC_NUMBER_OF_ENGINES - HL_MAX_SOBS_PER_MONITOR;
+	for (i = 0 ; i < master_monitor_sobs; i++) {
+		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
+			prop->mstr_sob_mask[1] |= BIT(i);
+	}
+
+	/* Set collective engine bit */
+	prop->mstr_sob_mask[1] |= BIT(i);
+
+	return 0;
+}
+
+static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
+{
+	struct gaudi_device *gaudi = hdev->asic_specific;
+	struct gaudi_collective_properties *cprop = &gaudi->collective_props;
+
+	kref_put(&cprop->hw_sob_group[sob_group].kref,
+					gaudi_sob_group_hw_reset);
+}
+
+static void gaudi_collective_master_init_job(struct hl_device *hdev,
+		struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
+{
+	u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
+	struct gaudi_collective_properties *cprop;
+	struct hl_gen_wait_properties wait_prop;
+	struct hl_sync_stream_properties *prop;
+	struct gaudi_device *gaudi;
+
+	gaudi = hdev->asic_specific;
+	cprop = &gaudi->collective_props;
+	queue_id = job->hw_queue_id;
+	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
+
+	master_sob_base =
+		cprop->hw_sob_group[sob_group_offset].base_sob_id;
+	master_monitor = prop->collective_mstr_mon_id[0];
+
+	dev_dbg(hdev->dev,
+		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
+		master_sob_base, cprop->mstr_sob_mask[0],
+		cprop->next_sob_group_val[stream],
+		master_monitor, queue_id);
+
+	wait_prop.data = (void *) job->patched_cb;
+	wait_prop.sob_base = master_sob_base;
+	wait_prop.sob_mask = cprop->mstr_sob_mask[0];
+	wait_prop.sob_val = cprop->next_sob_group_val[stream];
+	wait_prop.mon_id = master_monitor;
+	wait_prop.q_idx = queue_id;
+	wait_prop.size = cb_size;
+	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
+
+	master_sob_base += HL_MAX_SOBS_PER_MONITOR;
+	master_monitor = prop->collective_mstr_mon_id[1];
+
+	dev_dbg(hdev->dev,
+		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
+		master_sob_base, cprop->mstr_sob_mask[1],
+		cprop->next_sob_group_val[stream],
+		master_monitor, queue_id);
+
+	wait_prop.sob_base = master_sob_base;
+	wait_prop.sob_mask = cprop->mstr_sob_mask[1];
+	wait_prop.mon_id = master_monitor;
+	wait_prop.size = cb_size;
+	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
+}
+
+static void gaudi_collective_slave_init_job(struct hl_device *hdev,
+		struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
 {
+	struct hl_gen_wait_properties wait_prop;
+	struct hl_sync_stream_properties *prop;
+	u32 queue_id, cb_size = 0;
+
+	queue_id = job->hw_queue_id;
+	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
+
+	/* Add to wait CBs using slave monitor */
+	wait_prop.data = (void *) job->user_cb;
+	wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
+	wait_prop.sob_mask = 0x1;
+	wait_prop.sob_val = cs_cmpl->sob_val;
+	wait_prop.mon_id = prop->collective_slave_mon_id;
+	wait_prop.q_idx = queue_id;
+	wait_prop.size = cb_size;
+
+	dev_dbg(hdev->dev,
+		"Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
+		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
+		prop->collective_slave_mon_id, queue_id);
+
+	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
+
+	dev_dbg(hdev->dev,
+		"generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
+		prop->collective_sob_id, queue_id);
 
+	cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
+			prop->collective_sob_id, cb_size);
 }
 
 static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
 {
+	struct hl_cs_compl *signal_cs_cmpl =
+		container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
+	struct hl_cs_compl *cs_cmpl =
+		container_of(cs->fence, struct hl_cs_compl, base_fence);
+	struct gaudi_collective_properties *cprop;
+	u32 stream, queue_id, sob_group_offset;
+	struct gaudi_device *gaudi;
+	struct hl_device *hdev;
+	struct hl_cs_job *job;
+	struct hl_ctx *ctx;
+
+	ctx = cs->ctx;
+	hdev = ctx->hdev;
+	gaudi = hdev->asic_specific;
+	cprop = &gaudi->collective_props;
+
+	/* copy the SOB id and value of the signal CS */
+	cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
+	cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
+
+	/* Calculate the stream from collective master queue (1st job) */
+	job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
+	stream = job->hw_queue_id % 4;
+	sob_group_offset =
+		stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
+
+	list_for_each_entry(job, &cs->job_list, cs_node) {
+		queue_id = job->hw_queue_id;
+
+		if (hdev->kernel_queues[queue_id].collective_mode ==
+				HL_COLLECTIVE_MASTER)
+			gaudi_collective_master_init_job(hdev, job, stream,
+						sob_group_offset);
+		else
+			gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
+	}
+
+	cs_cmpl->sob_group = sob_group_offset;
+
+	/* Handle sob group kref and wraparound */
+	kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
+	cprop->next_sob_group_val[stream]++;
 
+	if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
+		/*
+		 * Decrement as we reached the max value.
+		 * The release function won't be called here as we've
+		 * just incremented the refcount.
+		 */
+		kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
+				gaudi_sob_group_reset_error);
+		cprop->next_sob_group_val[stream] = 1;
+		/* only two SOBs are currently in use */
+		cprop->curr_sob_group_idx[stream] =
+			(cprop->curr_sob_group_idx[stream] + 1) &
+							(HL_RSVD_SOBS - 1);
+
+		gaudi_collective_map_sobs(hdev, stream);
+
+		dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
+				cprop->curr_sob_group_idx[stream], stream);
+	}
+
+	/* Increment kref since all slave queues are now waiting on it */
+	kref_get(&cs_cmpl->hw_sob->kref);
+	/*
+	 * Must put the signal fence after the SOB refcnt increment so
+	 * the SOB refcnt won't turn 0 and reset the SOB before the
+	 * wait CS was submitted.
+	 */
+	mb();
+	hl_fence_put(cs->signal_fence);
+	cs->signal_fence = NULL;
+}
+
+static int gaudi_collective_wait_create_job(struct hl_device *hdev,
+		struct hl_ctx *ctx, struct hl_cs *cs,
+		enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
+{
+	struct hw_queue_properties *hw_queue_prop;
+	struct hl_cs_counters_atomic *cntr;
+	struct hl_cs_job *job;
+	struct hl_cb *cb;
+	u32 cb_size;
+	bool patched_cb;
+
+	cntr = &hdev->aggregated_cs_counters;
+
+	if (mode == HL_COLLECTIVE_MASTER) {
+		/* CB size of collective master queue contains
+		 * 4 msg short packets for monitor 1 configuration
+		 * 1 fence packet
+		 * 4 msg short packets for monitor 2 configuration
+		 * 1 fence packet
+		 * 2 msg prot packets for completion and MSI-X
+		 */
+		cb_size = sizeof(struct packet_msg_short) * 8 +
+				sizeof(struct packet_fence) * 2 +
+				sizeof(struct packet_msg_prot) * 2;
+		patched_cb = true;
+	} else {
+		/* CB size of collective slave queues contains
+		 * 4 msg short packets for monitor configuration
+		 * 1 fence packet
+		 * 1 additional msg short packet for sob signal
+		 */
+		cb_size = sizeof(struct packet_msg_short) * 5 +
+				sizeof(struct packet_fence);
+		patched_cb = false;
+	}
+
+	hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
+	job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
+	if (!job) {
+		ctx->cs_counters.out_of_mem_drop_cnt++;
+		atomic64_inc(&cntr->out_of_mem_drop_cnt);
+		dev_err(hdev->dev, "Failed to allocate a new job\n");
+		return -ENOMEM;
+	}
+
+	/* Allocate internal mapped CB for non patched CBs */
+	cb = hl_cb_kernel_create(hdev, cb_size,
+			hdev->mmu_enable && !patched_cb);
+	if (!cb) {
+		ctx->cs_counters.out_of_mem_drop_cnt++;
+		atomic64_inc(&cntr->out_of_mem_drop_cnt);
+		kfree(job);
+		return -EFAULT;
+	}
+
+	job->id = 0;
+	job->cs = cs;
+	job->user_cb = cb;
+	job->user_cb->cs_cnt++;
+	job->user_cb_size = cb_size;
+	job->hw_queue_id = queue_id;
+
+	/*
+	 * No need in parsing, user CB is the patched CB.
+	 * We call hl_cb_destroy() out of two reasons - we don't need
+	 * the CB in the CB idr anymore and to decrement its refcount as
+	 * it was incremented inside hl_cb_kernel_create().
+	 */
+	if (patched_cb)
+		job->patched_cb = job->user_cb;
+	else
+		job->patched_cb = NULL;
+
+	job->job_cb_size = job->user_cb_size;
+	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
+
+	/* increment refcount as for external queues we get completion */
+	if (hw_queue_prop->type == QUEUE_TYPE_EXT)
+		cs_get(cs);
+
+	cs->jobs_in_queue_cnt[job->hw_queue_id]++;
+
+	list_add_tail(&job->cs_node, &cs->job_list);
+
+	hl_debugfs_add_job(hdev, job);
+
+	return 0;
 }
 
 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
 		struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
 		u32 collective_engine_id)
 {
-	return -EINVAL;
+	struct gaudi_device *gaudi = hdev->asic_specific;
+	struct hw_queue_properties *hw_queue_prop;
+	u32 queue_id, collective_queue, num_jobs;
+	u32 stream, nic_queue, nic_idx = 0;
+	bool skip;
+	int i, rc;
+
+	/* Verify wait queue id is configured as master */
+	hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
+	if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
+		dev_err(hdev->dev,
+			"Queue %d is not configured as collective master\n",
+			wait_queue_id);
+		return -EINVAL;
+	}
+
+	/* Verify engine id is supported */
+	if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
+			collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
+		dev_err(hdev->dev,
+			"Collective wait does not support engine %u\n",
+			collective_engine_id);
+		return -EINVAL;
+	}
+
+	stream = wait_queue_id % 4;
+
+	if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
+		collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
+	else if (collective_engine_id == GAUDI_ENGINE_ID_TPC_7)
+		collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
+	else
+		return -EINVAL;
+
+	num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
+	nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
+
+	/* First job goes to the collective master queue, it will wait for
+	 * the collective slave queues to finish execution.
+	 * The synchronization is done using two monitors:
+	 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
+	 * reduction engine (DMA5/TPC7).
+	 *
+	 * Rest of the jobs goes to the collective slave queues which will
+	 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
+	 */
+	for (i = 0 ; i < num_jobs ; i++) {
+		if (i == 0) {
+			queue_id = wait_queue_id;
+			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
+				HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
+		} else {
+			if (nic_idx < NIC_NUMBER_OF_ENGINES) {
+				if (gaudi->hw_cap_initialized &
+					BIT(HW_CAP_NIC_SHIFT + nic_idx))
+					skip = false;
+				else
+					skip = true;
+
+				queue_id = nic_queue;
+				nic_queue += 4;
+				nic_idx++;
+
+				if (skip)
+					continue;
+			} else {
+				queue_id = collective_queue;
+			}
+
+			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
+				HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
+		}
+
+		if (rc)
+			return rc;
+	}
+
+	return rc;
 }
 
 static int gaudi_late_init(struct hl_device *hdev)
@@ -860,6 +1325,12 @@ static int gaudi_late_init(struct hl_device *hdev)
 		goto disable_pci_access;
 	}
 
+	rc = gaudi_collective_init(hdev);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to init collective\n");
+		goto disable_pci_access;
+	}
+
 	return 0;
 
 disable_pci_access:
@@ -2041,21 +2512,29 @@ static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
 					int qman_id, u64 qman_base_addr)
 {
-	u32 mtr_base_lo, mtr_base_hi;
-	u32 so_base_lo, so_base_hi;
+	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
+	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
 	u32 q_off, dma_qm_offset;
 	u32 dma_qm_err_cfg;
 
 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
 
-	mtr_base_lo = lower_32_bits(CFG_BASE +
-				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
-	mtr_base_hi = upper_32_bits(CFG_BASE +
+	mtr_base_en_lo = lower_32_bits(CFG_BASE +
+			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
+	mtr_base_en_hi = upper_32_bits(CFG_BASE +
 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
-	so_base_lo = lower_32_bits(CFG_BASE +
+	so_base_en_lo = lower_32_bits(CFG_BASE +
 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
-	so_base_hi = upper_32_bits(CFG_BASE +
+	so_base_en_hi = upper_32_bits(CFG_BASE +
 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
+	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
+				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
+	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
+				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
+	so_base_ws_lo = lower_32_bits(CFG_BASE +
+				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
+	so_base_ws_hi = upper_32_bits(CFG_BASE +
+				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
 
 	q_off = dma_qm_offset + qman_id * 4;
 
@@ -2113,10 +2592,22 @@ static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
 				QMAN_INTERNAL_MAKE_TRUSTED);
 	}
 
-	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
-	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
-	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
-	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
+	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
+	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
+	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
+	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
+
+	/* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
+	if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
+		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
+				mtr_base_ws_lo);
+		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
+				mtr_base_ws_hi);
+		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
+				so_base_ws_lo);
+		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
+				so_base_ws_hi);
+	}
 }
 
 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
@@ -2279,22 +2770,33 @@ static void gaudi_init_mme_qmans(struct hl_device *hdev)
 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
 				int qman_id, u64 qman_base_addr)
 {
-	u32 mtr_base_lo, mtr_base_hi;
-	u32 so_base_lo, so_base_hi;
+	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
+	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
 	u32 q_off, tpc_id;
 	u32 tpc_qm_err_cfg;
 
-	mtr_base_lo = lower_32_bits(CFG_BASE +
-				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
-	mtr_base_hi = upper_32_bits(CFG_BASE +
+	mtr_base_en_lo = lower_32_bits(CFG_BASE +
+			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
+	mtr_base_en_hi = upper_32_bits(CFG_BASE +
 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
-	so_base_lo = lower_32_bits(CFG_BASE +
+	so_base_en_lo = lower_32_bits(CFG_BASE +
 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
-	so_base_hi = upper_32_bits(CFG_BASE +
+	so_base_en_hi = upper_32_bits(CFG_BASE +
 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
+	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
+				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
+	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
+				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
+	so_base_ws_lo = lower_32_bits(CFG_BASE +
+				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
+	so_base_ws_hi = upper_32_bits(CFG_BASE +
+				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
 
 	q_off = tpc_offset + qman_id * 4;
 
+	tpc_id = tpc_offset /
+			(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
+
 	if (qman_id < 4) {
 		WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
 					lower_32_bits(qman_base_addr));
@@ -2320,9 +2822,6 @@ static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
 							QMAN_LDMA_DST_OFFSET);
 
 		/* Configure RAZWI IRQ */
-		tpc_id = tpc_offset /
-				(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
-
 		tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
 		if (hdev->stop_on_err) {
 			tpc_qm_err_cfg |=
@@ -2352,10 +2851,22 @@ static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
 				QMAN_INTERNAL_MAKE_TRUSTED);
 	}
 
-	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
-	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
-	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
-	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
+	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
+	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
+	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
+	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
+
+	/* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
+	if (tpc_id == 6) {
+		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
+				mtr_base_ws_lo);
+		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
+				mtr_base_ws_hi);
+		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
+				so_base_ws_lo);
+		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
+				so_base_ws_hi);
+	}
 }
 
 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
@@ -2406,19 +2917,27 @@ static void gaudi_init_tpc_qmans(struct hl_device *hdev)
 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
 				int qman_id, u64 qman_base_addr, int nic_id)
 {
-	u32 mtr_base_lo, mtr_base_hi;
-	u32 so_base_lo, so_base_hi;
+	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
+	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
 	u32 q_off;
 	u32 nic_qm_err_cfg;
 
-	mtr_base_lo = lower_32_bits(CFG_BASE +
-				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
-	mtr_base_hi = upper_32_bits(CFG_BASE +
+	mtr_base_en_lo = lower_32_bits(CFG_BASE +
+			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
+	mtr_base_en_hi = upper_32_bits(CFG_BASE +
 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
-	so_base_lo = lower_32_bits(CFG_BASE +
+	so_base_en_lo = lower_32_bits(CFG_BASE +
 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
-	so_base_hi = upper_32_bits(CFG_BASE +
+	so_base_en_hi = upper_32_bits(CFG_BASE +
 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
+	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
+				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
+	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
+				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
+	so_base_ws_lo = lower_32_bits(CFG_BASE +
+				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
+	so_base_ws_hi = upper_32_bits(CFG_BASE +
+				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
 
 	q_off = nic_offset + qman_id * 4;
 
@@ -2429,14 +2948,23 @@ static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
 	WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
 	WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
 
-	WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
-	WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
-	WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
+	WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
+							QMAN_LDMA_SIZE_OFFSET);
+	WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
+							QMAN_LDMA_SRC_OFFSET);
+	WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
+							QMAN_LDMA_DST_OFFSET);
+
+	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
+	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
+	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
+	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
 
-	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
-	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
-	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
-	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
+	/* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
+	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
+	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
+	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
+	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
 
 	if (qman_id == 0) {
 		/* Configure RAZWI IRQ */
@@ -7011,11 +7539,152 @@ static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
 	return RREG32(mmHW_STATE);
 }
 
+static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
+		struct hl_ctx *ctx)
+{
+	struct gaudi_device *gaudi = hdev->asic_specific;
+	bool flush_pte;
+	u64 va, pa;
+	s64 off;
+	int min_alloc_order, rc, collective_cb_size;
+
+	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
+		return 0;
+
+	hdev->internal_cb_pool_virt_addr =
+			hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
+					HOST_SPACE_INTERNAL_CB_SZ,
+					&hdev->internal_cb_pool_dma_addr,
+					GFP_KERNEL | __GFP_ZERO);
+
+	if (!hdev->internal_cb_pool_virt_addr)
+		return -ENOMEM;
+
+	collective_cb_size = sizeof(struct packet_msg_short) * 5 +
+			sizeof(struct packet_fence);
+	min_alloc_order = ilog2(collective_cb_size);
+
+	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
+	if (!hdev->internal_cb_pool) {
+		dev_err(hdev->dev,
+			"Failed to create internal CB pool\n");
+		rc = -ENOMEM;
+		goto free_internal_cb_pool;
+	}
+
+	rc = gen_pool_add(hdev->internal_cb_pool,
+				(uintptr_t) hdev->internal_cb_pool_virt_addr,
+				HOST_SPACE_INTERNAL_CB_SZ, -1);
+	if (rc) {
+		dev_err(hdev->dev,
+			"Failed to add memory to internal CB pool\n");
+		rc = -EFAULT;
+		goto destroy_internal_cb_pool;
+	}
+
+	hdev->internal_cb_va_base = VA_HOST_SPACE_INTERNAL_CB_START;
+
+	mutex_lock(&ctx->mmu_lock);
+
+	/* The mapping is done page by page since we can't assure allocated ptr
+	 * is aligned to HOST_SPACE_INTERNAL_CB_SZ
+	 */
+	for (off = 0 ; off < HOST_SPACE_INTERNAL_CB_SZ ; off += PAGE_SIZE_4KB) {
+		va = VA_HOST_SPACE_INTERNAL_CB_START + off;
+		pa = hdev->internal_cb_pool_dma_addr + off;
+		flush_pte = (off + PAGE_SIZE_4KB) >= HOST_SPACE_INTERNAL_CB_SZ;
+		rc = hl_mmu_map(ctx, va, pa, PAGE_SIZE_4KB, flush_pte);
+		if (rc) {
+			dev_err(hdev->dev,
+				"Map failed for va 0x%llx to pa 0x%llx\n",
+				va, pa);
+			goto unmap;
+		}
+	}
+
+	hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
+
+	mutex_unlock(&ctx->mmu_lock);
+
+	return 0;
+
+unmap:
+	for (; off >= 0 ; off -= PAGE_SIZE_4KB) {
+		va = VA_HOST_SPACE_INTERNAL_CB_START + off;
+		flush_pte = (off - (s32) PAGE_SIZE_4KB) < 0;
+		if (hl_mmu_unmap(ctx, va, PAGE_SIZE_4KB, flush_pte))
+			dev_warn_ratelimited(hdev->dev,
+					"failed to unmap va 0x%llx\n", va);
+	}
+
+	hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
+
+	mutex_unlock(&ctx->mmu_lock);
+
+destroy_internal_cb_pool:
+	gen_pool_destroy(hdev->internal_cb_pool);
+
+free_internal_cb_pool:
+	hdev->asic_funcs->asic_dma_free_coherent(hdev,
+			HOST_SPACE_INTERNAL_CB_SZ,
+			hdev->internal_cb_pool_virt_addr,
+			hdev->internal_cb_pool_dma_addr);
+
+	return rc;
+}
+
+static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
+		struct hl_ctx *ctx)
+{
+	struct gaudi_device *gaudi = hdev->asic_specific;
+	bool flush_pte = false;
+	u64 va, off;
+
+	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
+		return;
+
+	mutex_lock(&ctx->mmu_lock);
+
+	for (off = 0 ; off < HOST_SPACE_INTERNAL_CB_SZ ; off += PAGE_SIZE_4KB) {
+		va = VA_HOST_SPACE_INTERNAL_CB_START + off;
+
+		if (off + PAGE_SIZE_4KB >= HOST_SPACE_INTERNAL_CB_SZ)
+			flush_pte = true;
+
+		if (hl_mmu_unmap(ctx, va, PAGE_SIZE_4KB, flush_pte))
+			dev_warn_ratelimited(hdev->dev,
+					"failed to unmap va 0x%llx\n", va);
+	}
+
+	hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
+
+	mutex_unlock(&ctx->mmu_lock);
+
+	gen_pool_destroy(hdev->internal_cb_pool);
+
+	hdev->asic_funcs->asic_dma_free_coherent(hdev,
+			HOST_SPACE_INTERNAL_CB_SZ,
+			hdev->internal_cb_pool_virt_addr,
+			hdev->internal_cb_pool_dma_addr);
+}
+
 static int gaudi_ctx_init(struct hl_ctx *ctx)
 {
 	gaudi_mmu_prepare(ctx->hdev, ctx->asid);
+	return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
+}
 
-	return 0;
+void gaudi_ctx_fini(struct hl_ctx *ctx)
+{
+	struct hl_device *hdev = ctx->hdev;
+
+	/* Gaudi will NEVER support more then a single compute context.
+	 * Therefore, don't clear anything unless it is the compute context
+	 */
+	if (hdev->compute_ctx != ctx)
+		return;
+
+	gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
 }
 
 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
@@ -7037,7 +7706,7 @@ static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
 }
 
 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
-		u32 size)
+				u32 size)
 {
 	struct hl_cb *cb = (struct hl_cb *) data;
 	struct packet_msg_short *pkt;
@@ -7156,7 +7825,7 @@ static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
 
 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
 {
-	u32 offset;
+	u32 offset, nic_index;
 
 	switch (queue_id) {
 	case GAUDI_QUEUE_ID_DMA_0_0:
@@ -7195,6 +7864,78 @@ static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
 	case GAUDI_QUEUE_ID_DMA_5_3:
 		offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
 		break;
+	case GAUDI_QUEUE_ID_TPC_7_0:
+		offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
+		break;
+	case GAUDI_QUEUE_ID_TPC_7_1:
+		offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
+		break;
+	case GAUDI_QUEUE_ID_TPC_7_2:
+		offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
+		break;
+	case GAUDI_QUEUE_ID_TPC_7_3:
+		offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
+		break;
+	case GAUDI_QUEUE_ID_NIC_0_0:
+	case GAUDI_QUEUE_ID_NIC_1_0:
+	case GAUDI_QUEUE_ID_NIC_2_0:
+	case GAUDI_QUEUE_ID_NIC_3_0:
+	case GAUDI_QUEUE_ID_NIC_4_0:
+	case GAUDI_QUEUE_ID_NIC_5_0:
+	case GAUDI_QUEUE_ID_NIC_6_0:
+	case GAUDI_QUEUE_ID_NIC_7_0:
+	case GAUDI_QUEUE_ID_NIC_8_0:
+	case GAUDI_QUEUE_ID_NIC_9_0:
+		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
+		offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
+				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
+				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
+		break;
+	case GAUDI_QUEUE_ID_NIC_0_1:
+	case GAUDI_QUEUE_ID_NIC_1_1:
+	case GAUDI_QUEUE_ID_NIC_2_1:
+	case GAUDI_QUEUE_ID_NIC_3_1:
+	case GAUDI_QUEUE_ID_NIC_4_1:
+	case GAUDI_QUEUE_ID_NIC_5_1:
+	case GAUDI_QUEUE_ID_NIC_6_1:
+	case GAUDI_QUEUE_ID_NIC_7_1:
+	case GAUDI_QUEUE_ID_NIC_8_1:
+	case GAUDI_QUEUE_ID_NIC_9_1:
+		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
+		offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
+				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
+				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
+		break;
+	case GAUDI_QUEUE_ID_NIC_0_2:
+	case GAUDI_QUEUE_ID_NIC_1_2:
+	case GAUDI_QUEUE_ID_NIC_2_2:
+	case GAUDI_QUEUE_ID_NIC_3_2:
+	case GAUDI_QUEUE_ID_NIC_4_2:
+	case GAUDI_QUEUE_ID_NIC_5_2:
+	case GAUDI_QUEUE_ID_NIC_6_2:
+	case GAUDI_QUEUE_ID_NIC_7_2:
+	case GAUDI_QUEUE_ID_NIC_8_2:
+	case GAUDI_QUEUE_ID_NIC_9_2:
+		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
+		offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
+				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
+				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
+		break;
+	case GAUDI_QUEUE_ID_NIC_0_3:
+	case GAUDI_QUEUE_ID_NIC_1_3:
+	case GAUDI_QUEUE_ID_NIC_2_3:
+	case GAUDI_QUEUE_ID_NIC_3_3:
+	case GAUDI_QUEUE_ID_NIC_4_3:
+	case GAUDI_QUEUE_ID_NIC_5_3:
+	case GAUDI_QUEUE_ID_NIC_6_3:
+	case GAUDI_QUEUE_ID_NIC_7_3:
+	case GAUDI_QUEUE_ID_NIC_8_3:
+	case GAUDI_QUEUE_ID_NIC_9_3:
+		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
+		offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
+				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
+				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -7360,6 +8101,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.wreg = hl_wreg,
 	.halt_coresight = gaudi_halt_coresight,
 	.ctx_init = gaudi_ctx_init,
+	.ctx_fini = gaudi_ctx_fini,
 	.get_clk_rate = gaudi_get_clk_rate,
 	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
 	.read_device_fw_version = gaudi_read_device_fw_version,
diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h
index 16871d9ff88b..297a96dbf4e5 100644
--- a/drivers/misc/habanalabs/gaudi/gaudiP.h
+++ b/drivers/misc/habanalabs/gaudi/gaudiP.h
@@ -23,6 +23,10 @@
 					NUMBER_OF_CPU_HW_QUEUES + \
 					NUMBER_OF_INT_HW_QUEUES)
 
+/* 10 NIC QMANs, DMA5 QMAN, TPC7 QMAN */
+#define NUMBER_OF_COLLECTIVE_QUEUES	12
+#define NUMBER_OF_SOBS_IN_GRP		11
+
 /*
  * Number of MSI interrupts IDS:
  * Each completion queue has 1 ID
@@ -149,10 +153,14 @@
 
 /* Virtual address space */
 #define VA_HOST_SPACE_START	0x1000000000000ull	/* 256TB */
-#define VA_HOST_SPACE_END	0x3FF8000000000ull	/* 1PB - 1TB */
+#define VA_HOST_SPACE_END	0x3FF7FFFE00000ull	/* 1PB - 1TB */
 #define VA_HOST_SPACE_SIZE	(VA_HOST_SPACE_END - \
 					VA_HOST_SPACE_START) /* 767TB */
 
+#define VA_HOST_SPACE_INTERNAL_CB_START	0x3FF7FFFE00000ull /* 1PB - 1TB - 2MB */
+#define VA_HOST_SPACE_INTERNAL_CB_END	0x3FF8000000000ull /* 1PB - 1TB */
+#define HOST_SPACE_INTERNAL_CB_SZ	SZ_2M
+
 #define HW_CAP_PLL		BIT(0)
 #define HW_CAP_HBM		BIT(1)
 #define HW_CAP_MMU		BIT(2)
@@ -240,6 +248,34 @@ enum gaudi_nic_mask {
 	GAUDI_NIC_MASK_ALL = 0x3FF
 };
 
+/*
+ * struct gaudi_hw_sob_group - H/W SOB group info.
+ * @hdev: habanalabs device structure.
+ * @kref: refcount of this SOB group. group will reset once refcount is zero.
+ * @base_sob_id: base sob id of this SOB group.
+ */
+struct gaudi_hw_sob_group {
+	struct hl_device	*hdev;
+	struct kref		kref;
+	u32			base_sob_id;
+};
+
+#define NUM_SOB_GROUPS (HL_RSVD_SOBS * QMAN_STREAMS)
+/**
+ * struct gaudi_collective_properties -
+ *     holds all SOB groups and queues info reserved for the collective
+ * @hw_sob_group: H/W SOB groups.
+ * @next_sob_group_val: the next value to use for the currently used SOB group.
+ * @curr_sob_group_idx: the index of the currently used SOB group.
+ * @mstr_sob_mask: pre-defined masks for collective master monitors
+ */
+struct gaudi_collective_properties {
+	struct gaudi_hw_sob_group hw_sob_group[NUM_SOB_GROUPS];
+	u16			next_sob_group_val[QMAN_STREAMS];
+	u8			curr_sob_group_idx[QMAN_STREAMS];
+	u8			mstr_sob_mask[HL_COLLECTIVE_RSVD_MSTR_MONS];
+};
+
 /**
  * struct gaudi_internal_qman_info - Internal QMAN information.
  * @pq_kernel_addr: Kernel address of the PQ memory area in the host.
@@ -285,6 +321,8 @@ struct gaudi_device {
 
 	struct gaudi_internal_qman_info	internal_qmans[GAUDI_QUEUE_ID_SIZE];
 
+	struct gaudi_collective_properties collective_props;
+
 	u64				hbm_bar_cur_addr;
 	u64				max_freq_value;
 
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 9332580b038d..374881ff551d 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -5343,6 +5343,11 @@ int goya_collective_wait_create_jobs(struct hl_device *hdev,
 	return -EINVAL;
 }
 
+static void goya_ctx_fini(struct hl_ctx *ctx)
+{
+
+}
+
 static const struct hl_asic_funcs goya_funcs = {
 	.early_init = goya_early_init,
 	.early_fini = goya_early_fini,
@@ -5404,6 +5409,7 @@ static const struct hl_asic_funcs goya_funcs = {
 	.wreg = hl_wreg,
 	.halt_coresight = goya_halt_coresight,
 	.ctx_init = goya_ctx_init,
+	.ctx_fini = goya_ctx_fini,
 	.get_clk_rate = goya_get_clk_rate,
 	.get_queue_id_for_cq = goya_get_queue_id_for_cq,
 	.read_device_fw_version = goya_read_device_fw_version,
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 4661a74f0425..0185311b679b 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -18,8 +18,18 @@
 #define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START		0x8000	/* 32KB */
 #define GAUDI_DRIVER_SRAM_RESERVED_SIZE_FROM_START	0x80	/* 128 bytes */
 
-#define GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT		32
-#define GAUDI_FIRST_AVAILABLE_W_S_MONITOR		16
+/*
+ * 128 SOBs reserved for collective wait
+ * 16 SOBs reserved for sync stream
+ */
+#define GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT		144
+
+/*
+ * 64 monitors reserved for collective wait
+ * 8 monitors reserved for sync stream
+ */
+#define GAUDI_FIRST_AVAILABLE_W_S_MONITOR		72
+
 /*
  * Goya queue Numbering
  *
-- 
cgit v1.2.3


From e753643d516c7c38f69f3d73169bb00cd70a60b9 Mon Sep 17 00:00:00 2001
From: farah kassabri <fkassabri@habana.ai>
Date: Mon, 12 Oct 2020 14:30:26 +0300
Subject: habanalabs: fix cs counters structure

Fix cs counters structure in uapi to be one flat structure instead
of two instances of the same other structure.
use atomic read/increment for context counters so we could use
one structure for both aggregated and context counters.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../misc/habanalabs/common/command_submission.c    | 18 +++---
 drivers/misc/habanalabs/common/habanalabs.h        | 73 +++++++++++-----------
 drivers/misc/habanalabs/common/habanalabs_ioctl.c  | 35 +++++++----
 drivers/misc/habanalabs/common/hw_queue.c          |  5 +-
 drivers/misc/habanalabs/gaudi/gaudi.c              |  4 +-
 include/uapi/misc/habanalabs.h                     | 35 ++++++-----
 6 files changed, 95 insertions(+), 75 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 26822cfd1491..e123101b74d6 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -462,7 +462,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 	if (other && !completion_done(&other->completion)) {
 		dev_dbg_ratelimited(hdev->dev,
 			"Rejecting CS because of too many in-flights CS\n");
-		ctx->cs_counters.max_cs_in_flight_drop_cnt++;
+		atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt);
 		atomic64_inc(&cntr->max_cs_in_flight_drop_cnt);
 		rc = -EAGAIN;
 		goto free_fence;
@@ -720,7 +720,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 		rc = validate_queue_index(hdev, chunk, &queue_type,
 						&is_kernel_allocated_cb);
 		if (rc) {
-			hpriv->ctx->cs_counters.parsing_drop_cnt++;
+			atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
 			atomic64_inc(&cntr->parsing_drop_cnt);
 			goto free_cs_object;
 		}
@@ -728,7 +728,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 		if (is_kernel_allocated_cb) {
 			cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
 			if (!cb) {
-				hpriv->ctx->cs_counters.parsing_drop_cnt++;
+				atomic64_inc(
+				&hpriv->ctx->cs_counters.parsing_drop_cnt);
 				atomic64_inc(&cntr->parsing_drop_cnt);
 				rc = -EINVAL;
 				goto free_cs_object;
@@ -743,7 +744,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 		job = hl_cs_allocate_job(hdev, queue_type,
 						is_kernel_allocated_cb);
 		if (!job) {
-			hpriv->ctx->cs_counters.out_of_mem_drop_cnt++;
+			atomic64_inc(
+			&hpriv->ctx->cs_counters.out_of_mem_drop_cnt);
 			atomic64_inc(&cntr->out_of_mem_drop_cnt);
 			dev_err(hdev->dev, "Failed to allocate a new job\n");
 			rc = -ENOMEM;
@@ -777,7 +779,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 
 		rc = cs_parser(hpriv, job);
 		if (rc) {
-			hpriv->ctx->cs_counters.parsing_drop_cnt++;
+			atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
 			atomic64_inc(&cntr->parsing_drop_cnt);
 			dev_err(hdev->dev,
 				"Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
@@ -787,7 +789,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 	}
 
 	if (int_queues_only) {
-		hpriv->ctx->cs_counters.parsing_drop_cnt++;
+		atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
 		atomic64_inc(&cntr->parsing_drop_cnt);
 		dev_err(hdev->dev,
 			"Reject CS %d.%llu because only internal queues jobs are present\n",
@@ -880,7 +882,7 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
 
 	job = hl_cs_allocate_job(hdev, q_type, true);
 	if (!job) {
-		ctx->cs_counters.out_of_mem_drop_cnt++;
+		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
 		dev_err(hdev->dev, "Failed to allocate a new job\n");
 		return -ENOMEM;
@@ -894,7 +896,7 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
 	cb = hl_cb_kernel_create(hdev, cb_size,
 				q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
 	if (!cb) {
-		ctx->cs_counters.out_of_mem_drop_cnt++;
+		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
 		kfree(job);
 		return -EFAULT;
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 7f1522b101b4..b1f20f225ff9 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -936,6 +936,22 @@ struct hl_va_range {
 	u64			end_addr;
 };
 
+/**
+ * struct hl_cs_counters_atomic - command submission counters
+ * @out_of_mem_drop_cnt: dropped due to memory allocation issue
+ * @parsing_drop_cnt: dropped due to error in packet parsing
+ * @queue_full_drop_cnt: dropped due to queue full
+ * @device_in_reset_drop_cnt: dropped due to device in reset
+ * @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight
+ */
+struct hl_cs_counters_atomic {
+	atomic64_t out_of_mem_drop_cnt;
+	atomic64_t parsing_drop_cnt;
+	atomic64_t queue_full_drop_cnt;
+	atomic64_t device_in_reset_drop_cnt;
+	atomic64_t max_cs_in_flight_drop_cnt;
+};
+
 /**
  * struct hl_ctx - user/kernel context.
  * @mem_hash: holds mapping from virtual address to virtual memory area
@@ -954,6 +970,7 @@ struct hl_va_range {
  * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the
  *            MMU hash or walking the PGT requires talking this lock.
  * @debugfs_list: node in debugfs list of contexts.
+ * @cs_counters: context command submission counters.
  * @cb_va_pool: device VA pool for command buffers which are mapped to the
  *              device's MMU.
  * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
@@ -976,26 +993,26 @@ struct hl_va_range {
 struct hl_ctx {
 	DECLARE_HASHTABLE(mem_hash, MEM_HASH_TABLE_BITS);
 	DECLARE_HASHTABLE(mmu_shadow_hash, MMU_HASH_TABLE_BITS);
-	struct hl_fpriv		*hpriv;
-	struct hl_device	*hdev;
-	struct kref		refcount;
-	struct hl_fence		**cs_pending;
-	struct hl_va_range	*host_va_range;
-	struct hl_va_range	*host_huge_va_range;
-	struct hl_va_range	*dram_va_range;
-	struct mutex		mem_hash_lock;
-	struct mutex		mmu_lock;
-	struct list_head	debugfs_list;
-	struct hl_cs_counters	cs_counters;
-	struct gen_pool		*cb_va_pool;
-	u64			cs_sequence;
-	u64			*dram_default_hops;
-	spinlock_t		cs_lock;
-	atomic64_t		dram_phys_mem;
-	atomic_t		thread_ctx_switch_token;
-	u32			thread_ctx_switch_wait_token;
-	u32			asid;
-	u32			handle;
+	struct hl_fpriv			*hpriv;
+	struct hl_device		*hdev;
+	struct kref			refcount;
+	struct hl_fence			**cs_pending;
+	struct hl_va_range		*host_va_range;
+	struct hl_va_range		*host_huge_va_range;
+	struct hl_va_range		*dram_va_range;
+	struct mutex			mem_hash_lock;
+	struct mutex			mmu_lock;
+	struct list_head		debugfs_list;
+	struct hl_cs_counters_atomic	cs_counters;
+	struct gen_pool			*cb_va_pool;
+	u64				cs_sequence;
+	u64				*dram_default_hops;
+	spinlock_t			cs_lock;
+	atomic64_t			dram_phys_mem;
+	atomic_t			thread_ctx_switch_token;
+	u32				thread_ctx_switch_wait_token;
+	u32				asid;
+	u32				handle;
 };
 
 /**
@@ -1164,22 +1181,6 @@ struct hl_cs_parser {
 	u8			contains_dma_pkt;
 };
 
-/**
- * struct hl_info_cs_counters - command submission counters
- * @out_of_mem_drop_cnt: dropped due to memory allocation issue
- * @parsing_drop_cnt: dropped due to error in packet parsing
- * @queue_full_drop_cnt: dropped due to queue full
- * @device_in_reset_drop_cnt: dropped due to device in reset
- * @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight
- */
-struct hl_cs_counters_atomic {
-	atomic64_t out_of_mem_drop_cnt;
-	atomic64_t parsing_drop_cnt;
-	atomic64_t queue_full_drop_cnt;
-	atomic64_t device_in_reset_drop_cnt;
-	atomic64_t max_cs_in_flight_drop_cnt;
-};
-
 /*
  * MEMORY STRUCTURE
  */
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 350a768309bd..1d8bea626e78 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -315,7 +315,7 @@ static int clk_throttle_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 {
 	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
-	struct hl_info_cs_counters cs_counters = { {0} };
+	struct hl_info_cs_counters cs_counters = {0};
 	struct hl_device *hdev = hpriv->hdev;
 	struct hl_cs_counters_atomic *cntr;
 	u32 max_size = args->return_size;
@@ -325,23 +325,34 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 	if ((!max_size) || (!out))
 		return -EINVAL;
 
-	memcpy(&cs_counters.cs_counters, &hdev->aggregated_cs_counters,
-			sizeof(struct hl_cs_counters));
-
-	cs_counters.cs_counters.out_of_mem_drop_cnt =
+	cs_counters.total_out_of_mem_drop_cnt =
 			atomic64_read(&cntr->out_of_mem_drop_cnt);
-	cs_counters.cs_counters.parsing_drop_cnt =
+	cs_counters.total_parsing_drop_cnt =
 			atomic64_read(&cntr->parsing_drop_cnt);
-	cs_counters.cs_counters.queue_full_drop_cnt =
+	cs_counters.total_queue_full_drop_cnt =
 			atomic64_read(&cntr->queue_full_drop_cnt);
-	cs_counters.cs_counters.device_in_reset_drop_cnt =
+	cs_counters.total_device_in_reset_drop_cnt =
 			atomic64_read(&cntr->device_in_reset_drop_cnt);
-	cs_counters.cs_counters.max_cs_in_flight_drop_cnt =
+	cs_counters.total_max_cs_in_flight_drop_cnt =
 			atomic64_read(&cntr->max_cs_in_flight_drop_cnt);
 
-	if (hpriv->ctx)
-		memcpy(&cs_counters.ctx_cs_counters, &hpriv->ctx->cs_counters,
-				sizeof(struct hl_cs_counters));
+	if (hpriv->ctx) {
+		cs_counters.ctx_out_of_mem_drop_cnt =
+				atomic64_read(
+				&hpriv->ctx->cs_counters.out_of_mem_drop_cnt);
+		cs_counters.ctx_parsing_drop_cnt =
+				atomic64_read(
+				&hpriv->ctx->cs_counters.parsing_drop_cnt);
+		cs_counters.ctx_queue_full_drop_cnt =
+				atomic64_read(
+				&hpriv->ctx->cs_counters.queue_full_drop_cnt);
+		cs_counters.ctx_device_in_reset_drop_cnt =
+				atomic64_read(
+			&hpriv->ctx->cs_counters.device_in_reset_drop_cnt);
+		cs_counters.ctx_max_cs_in_flight_drop_cnt =
+				atomic64_read(
+			&hpriv->ctx->cs_counters.max_cs_in_flight_drop_cnt);
+	}
 
 	return copy_to_user(out, &cs_counters,
 		min((size_t) max_size, sizeof(cs_counters))) ? -EFAULT : 0;
diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c
index d9448375beac..44155a6e557f 100644
--- a/drivers/misc/habanalabs/common/hw_queue.c
+++ b/drivers/misc/habanalabs/common/hw_queue.c
@@ -523,7 +523,7 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
 	hdev->asic_funcs->hw_queues_lock(hdev);
 
 	if (hl_device_disabled_or_in_reset(hdev)) {
-		ctx->cs_counters.device_in_reset_drop_cnt++;
+		atomic64_inc(&ctx->cs_counters.device_in_reset_drop_cnt);
 		atomic64_inc(&cntr->device_in_reset_drop_cnt);
 		dev_err(hdev->dev,
 			"device is disabled or in reset, CS rejected!\n");
@@ -557,7 +557,8 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
 			}
 
 			if (rc) {
-				ctx->cs_counters.queue_full_drop_cnt++;
+				atomic64_inc(
+					&ctx->cs_counters.queue_full_drop_cnt);
 				atomic64_inc(&cntr->queue_full_drop_cnt);
 				goto unroll_cq_resv;
 			}
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index ab8c9463932f..e640c9fcc932 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -1137,7 +1137,7 @@ static int gaudi_collective_wait_create_job(struct hl_device *hdev,
 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
 	job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
 	if (!job) {
-		ctx->cs_counters.out_of_mem_drop_cnt++;
+		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
 		dev_err(hdev->dev, "Failed to allocate a new job\n");
 		return -ENOMEM;
@@ -1147,7 +1147,7 @@ static int gaudi_collective_wait_create_job(struct hl_device *hdev,
 	cb = hl_cb_kernel_create(hdev, cb_size,
 			hdev->mmu_enable && !patched_cb);
 	if (!cb) {
-		ctx->cs_counters.out_of_mem_drop_cnt++;
+		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
 		kfree(job);
 		return -EFAULT;
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 0185311b679b..61f8f9144b54 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -407,23 +407,28 @@ struct hl_info_sync_manager {
 
 /**
  * struct hl_info_cs_counters - command submission counters
- * @out_of_mem_drop_cnt: dropped due to memory allocation issue
- * @parsing_drop_cnt: dropped due to error in packet parsing
- * @queue_full_drop_cnt: dropped due to queue full
- * @device_in_reset_drop_cnt: dropped due to device in reset
- * @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight
+ * @total_out_of_mem_drop_cnt: total dropped due to memory allocation issue
+ * @ctx_out_of_mem_drop_cnt: context dropped due to memory allocation issue
+ * @total_parsing_drop_cnt: total dropped due to error in packet parsing
+ * @ctx_parsing_drop_cnt: context dropped due to error in packet parsing
+ * @total_queue_full_drop_cnt: total dropped due to queue full
+ * @ctx_queue_full_drop_cnt: context dropped due to queue full
+ * @total_device_in_reset_drop_cnt: total dropped due to device in reset
+ * @ctx_device_in_reset_drop_cnt: context dropped due to device in reset
+ * @total_max_cs_in_flight_drop_cnt: total dropped due to maximum CS in-flight
+ * @ctx_max_cs_in_flight_drop_cnt: context dropped due to maximum CS in-flight
  */
-struct hl_cs_counters {
-	__u64 out_of_mem_drop_cnt;
-	__u64 parsing_drop_cnt;
-	__u64 queue_full_drop_cnt;
-	__u64 device_in_reset_drop_cnt;
-	__u64 max_cs_in_flight_drop_cnt;
-};
-
 struct hl_info_cs_counters {
-	struct hl_cs_counters cs_counters;
-	struct hl_cs_counters ctx_cs_counters;
+	__u64 total_out_of_mem_drop_cnt;
+	__u64 ctx_out_of_mem_drop_cnt;
+	__u64 total_parsing_drop_cnt;
+	__u64 ctx_parsing_drop_cnt;
+	__u64 total_queue_full_drop_cnt;
+	__u64 ctx_queue_full_drop_cnt;
+	__u64 total_device_in_reset_drop_cnt;
+	__u64 ctx_device_in_reset_drop_cnt;
+	__u64 total_max_cs_in_flight_drop_cnt;
+	__u64 ctx_max_cs_in_flight_drop_cnt;
 };
 
 enum gaudi_dcores {
-- 
cgit v1.2.3


From 66a76401c50b2638fd95dd31f365fd64be307d6a Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Mon, 5 Oct 2020 14:40:10 +0300
Subject: habanalabs: add 'needs reset' state in driver

The new state indicates that device should be reset in order
to re-gain funcionality.
This unique state can occur if reset_on_lockup is disabled
and an actual lockup has occurred.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/command_buffer.c    |  5 ++--
 .../misc/habanalabs/common/command_submission.c    |  7 +++--
 drivers/misc/habanalabs/common/debugfs.c           |  6 ++--
 drivers/misc/habanalabs/common/device.c            | 33 ++++++++++++++++------
 drivers/misc/habanalabs/common/habanalabs.h        | 14 +++++++--
 drivers/misc/habanalabs/common/habanalabs_drv.c    | 17 ++++++++---
 drivers/misc/habanalabs/common/habanalabs_ioctl.c  | 12 +++++---
 drivers/misc/habanalabs/common/hw_queue.c          |  6 ++--
 drivers/misc/habanalabs/common/hwmon.c             |  4 +--
 drivers/misc/habanalabs/common/memory.c            |  5 ++--
 drivers/misc/habanalabs/common/sysfs.c             |  8 ++++--
 drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c        |  8 +++---
 drivers/misc/habanalabs/goya/goya_hwmgr.c          | 28 +++++++++---------
 include/uapi/misc/habanalabs.h                     |  3 +-
 14 files changed, 101 insertions(+), 55 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c
index 075679626c7b..03ffcead1855 100644
--- a/drivers/misc/habanalabs/common/command_buffer.c
+++ b/drivers/misc/habanalabs/common/command_buffer.c
@@ -379,13 +379,14 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
 {
 	union hl_cb_args *args = data;
 	struct hl_device *hdev = hpriv->hdev;
+	enum hl_device_status status;
 	u64 handle = 0;
 	int rc;
 
-	if (hl_device_disabled_or_in_reset(hdev)) {
+	if (!hl_device_operational(hdev, &status)) {
 		dev_warn_ratelimited(hdev->dev,
 			"Device is %s. Can't execute CB IOCTL\n",
-			atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
+			hdev->status[status]);
 		return -EBUSY;
 	}
 
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 91bdd6d8b020..20b34fb054ee 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -427,6 +427,8 @@ static void cs_timedout(struct work_struct *work)
 
 	if (hdev->reset_on_lockup)
 		hl_device_reset(hdev, false, false);
+	else
+		hdev->needs_reset = true;
 }
 
 static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
@@ -689,12 +691,13 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
 	struct hl_device *hdev = hpriv->hdev;
 	struct hl_ctx *ctx = hpriv->ctx;
 	u32 cs_type_flags, num_chunks;
+	enum hl_device_status status;
 	enum hl_cs_type cs_type;
 
-	if (hl_device_disabled_or_in_reset(hdev)) {
+	if (!hl_device_operational(hdev, &status)) {
 		dev_warn_ratelimited(hdev->dev,
 			"Device is %s. Can't submit new CS\n",
-			atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
+			hdev->status[status]);
 		return -EBUSY;
 	}
 
diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
index b44193ec3d12..104b9686e57b 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -24,7 +24,7 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
 	struct cpucp_packet pkt;
 	int rc;
 
-	if (hl_device_disabled_or_in_reset(hdev))
+	if (!hl_device_operational(hdev, NULL))
 		return -EBUSY;
 
 	memset(&pkt, 0, sizeof(pkt));
@@ -50,7 +50,7 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
 	struct cpucp_packet pkt;
 	int rc;
 
-	if (hl_device_disabled_or_in_reset(hdev))
+	if (!hl_device_operational(hdev, NULL))
 		return -EBUSY;
 
 	memset(&pkt, 0, sizeof(pkt));
@@ -76,7 +76,7 @@ static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state)
 	struct cpucp_packet pkt;
 	int rc;
 
-	if (hl_device_disabled_or_in_reset(hdev))
+	if (!hl_device_operational(hdev, NULL))
 		return;
 
 	memset(&pkt, 0, sizeof(pkt));
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 3b82020648c7..59308a612b36 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -15,14 +15,6 @@
 
 #define HL_PLDM_PENDING_RESET_PER_SEC	(HL_PENDING_RESET_PER_SEC * 10)
 
-bool hl_device_disabled_or_in_reset(struct hl_device *hdev)
-{
-	if ((hdev->disabled) || (atomic_read(&hdev->in_reset)))
-		return true;
-	else
-		return false;
-}
-
 enum hl_device_status hl_device_status(struct hl_device *hdev)
 {
 	enum hl_device_status status;
@@ -31,12 +23,34 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
 		status = HL_DEVICE_STATUS_MALFUNCTION;
 	else if (atomic_read(&hdev->in_reset))
 		status = HL_DEVICE_STATUS_IN_RESET;
+	else if (hdev->needs_reset)
+		status = HL_DEVICE_STATUS_NEEDS_RESET;
 	else
 		status = HL_DEVICE_STATUS_OPERATIONAL;
 
 	return status;
 }
 
+bool hl_device_operational(struct hl_device *hdev,
+		enum hl_device_status *status)
+{
+	enum hl_device_status current_status;
+
+	current_status = hl_device_status(hdev);
+	if (status)
+		*status = current_status;
+
+	switch (current_status) {
+	case HL_DEVICE_STATUS_IN_RESET:
+	case HL_DEVICE_STATUS_MALFUNCTION:
+	case HL_DEVICE_STATUS_NEEDS_RESET:
+		return false;
+	case HL_DEVICE_STATUS_OPERATIONAL:
+	default:
+		return true;
+	}
+}
+
 static void hpriv_release(struct kref *ref)
 {
 	struct hl_fpriv *hpriv;
@@ -411,7 +425,7 @@ static void hl_device_heartbeat(struct work_struct *work)
 	struct hl_device *hdev = container_of(work, struct hl_device,
 						work_heartbeat.work);
 
-	if (hl_device_disabled_or_in_reset(hdev))
+	if (!hl_device_operational(hdev, NULL))
 		goto reschedule;
 
 	if (!hdev->asic_funcs->send_heartbeat(hdev))
@@ -1091,6 +1105,7 @@ again:
 	}
 
 	atomic_set(&hdev->in_reset, 0);
+	hdev->needs_reset = false;
 
 	if (hard_reset)
 		hdev->hard_reset_cnt++;
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 81ff340b6a5b..9c7594d0ca07 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1432,6 +1432,10 @@ struct hl_dbg_device_entry {
  * DEVICES
  */
 
+#define HL_STR_MAX	32
+
+#define HL_DEV_STS_MAX (HL_DEVICE_STATUS_NEEDS_RESET + 1)
+
 /* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe
  * x16 cards. In extreme cases, there are hosts that can accommodate 16 cards.
  */
@@ -1706,6 +1710,7 @@ struct hl_mmu_funcs {
  * @hwmon_dev: H/W monitor device.
  * @pm_mng_profile: current power management profile.
  * @hl_chip_info: ASIC's sensors information.
+ * @device_status_description: device status description.
  * @hl_debugfs: device's debugfs manager.
  * @cb_pool: list of preallocated CBs.
  * @cb_pool_lock: protects the CB pool.
@@ -1774,6 +1779,8 @@ struct hl_mmu_funcs {
  * @supports_coresight: is CoreSight supported.
  * @supports_soft_reset: is soft reset supported.
  * @supports_cb_mapping: is mapping a CB to the device's MMU supported.
+ * @needs_reset: true if reset_on_lockup is false and device should be reset
+ *               due to lockup.
  */
 struct hl_device {
 	struct pci_dev			*pdev;
@@ -1786,7 +1793,8 @@ struct hl_device {
 	struct device			*dev_ctrl;
 	struct delayed_work		work_freq;
 	struct delayed_work		work_heartbeat;
-	char				asic_name[32];
+	char				asic_name[HL_STR_MAX];
+	char				status[HL_DEV_STS_MAX][HL_STR_MAX];
 	enum hl_asic_type		asic_type;
 	struct hl_cq			*completion_queue;
 	struct workqueue_struct		**cq_wq;
@@ -1876,6 +1884,7 @@ struct hl_device {
 	u8				supports_coresight;
 	u8				supports_soft_reset;
 	u8				supports_cb_mapping;
+	u8				needs_reset;
 
 	/* Parameters for bring-up */
 	u64				nic_ports_mask;
@@ -1978,7 +1987,8 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
 
 int hl_device_open(struct inode *inode, struct file *filp);
 int hl_device_open_ctrl(struct inode *inode, struct file *filp);
-bool hl_device_disabled_or_in_reset(struct hl_device *hdev);
+bool hl_device_operational(struct hl_device *hdev,
+		enum hl_device_status *status);
 enum hl_device_status hl_device_status(struct hl_device *hdev);
 int hl_device_set_debug_mode(struct hl_device *hdev, bool enable);
 int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 20458bd82c5a..aac798f3296e 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -92,6 +92,7 @@ static enum hl_asic_type get_asic_type(u16 device)
  */
 int hl_device_open(struct inode *inode, struct file *filp)
 {
+	enum hl_device_status status;
 	struct hl_device *hdev;
 	struct hl_fpriv *hpriv;
 	int rc;
@@ -124,10 +125,10 @@ int hl_device_open(struct inode *inode, struct file *filp)
 
 	mutex_lock(&hdev->fpriv_list_lock);
 
-	if (hl_device_disabled_or_in_reset(hdev)) {
+	if (!hl_device_operational(hdev, &status)) {
 		dev_err_ratelimited(hdev->dev,
-			"Can't open %s because it is disabled or in reset\n",
-			dev_name(hdev->dev));
+			"Can't open %s because it is %s\n",
+			dev_name(hdev->dev), hdev->status[status]);
 		rc = -EPERM;
 		goto out_err;
 	}
@@ -204,7 +205,7 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
 
 	mutex_lock(&hdev->fpriv_list_lock);
 
-	if (hl_device_disabled_or_in_reset(hdev)) {
+	if (!hl_device_operational(hdev, NULL)) {
 		dev_err_ratelimited(hdev->dev_ctrl,
 			"Can't open %s because it is disabled or in reset\n",
 			dev_name(hdev->dev_ctrl));
@@ -287,6 +288,14 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
 		hdev->asic_type = asic_type;
 	}
 
+	/* Assign status description string */
+	strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION],
+					"disabled", HL_STR_MAX);
+	strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET],
+					"in reset", HL_STR_MAX);
+	strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET],
+					"needs reset", HL_STR_MAX);
+
 	hdev->major = hl_major;
 	hdev->reset_on_lockup = reset_on_lockup;
 	hdev->memory_scrub = memory_scrub;
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 1d8bea626e78..0729cd43f297 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -406,8 +406,10 @@ static int total_energy_consumption_info(struct hl_fpriv *hpriv,
 static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 				struct device *dev)
 {
+	enum hl_device_status status;
 	struct hl_info_args *args = data;
 	struct hl_device *hdev = hpriv->hdev;
+
 	int rc;
 
 	/*
@@ -428,10 +430,10 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 		break;
 	}
 
-	if (hl_device_disabled_or_in_reset(hdev)) {
+	if (!hl_device_operational(hdev, &status)) {
 		dev_warn_ratelimited(dev,
 			"Device is %s. Can't execute INFO IOCTL\n",
-			atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
+			hdev->status[status]);
 		return -EBUSY;
 	}
 
@@ -501,12 +503,14 @@ static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
 {
 	struct hl_debug_args *args = data;
 	struct hl_device *hdev = hpriv->hdev;
+	enum hl_device_status status;
+
 	int rc = 0;
 
-	if (hl_device_disabled_or_in_reset(hdev)) {
+	if (!hl_device_operational(hdev, &status)) {
 		dev_warn_ratelimited(hdev->dev,
 			"Device is %s. Can't execute DEBUG IOCTL\n",
-			atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
+			hdev->status[status]);
 		return -EBUSY;
 	}
 
diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c
index e808e668a007..f9550fcf5500 100644
--- a/drivers/misc/habanalabs/common/hw_queue.c
+++ b/drivers/misc/habanalabs/common/hw_queue.c
@@ -515,6 +515,7 @@ static void init_signal_wait_cs(struct hl_cs *cs)
  */
 int hl_hw_queue_schedule_cs(struct hl_cs *cs)
 {
+	enum hl_device_status status;
 	struct hl_cs_counters_atomic *cntr;
 	struct hl_ctx *ctx = cs->ctx;
 	struct hl_device *hdev = ctx->hdev;
@@ -527,11 +528,10 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
 
 	hdev->asic_funcs->hw_queues_lock(hdev);
 
-	if (hl_device_disabled_or_in_reset(hdev)) {
+	if (!hl_device_operational(hdev, &status)) {
 		atomic64_inc(&ctx->cs_counters.device_in_reset_drop_cnt);
-		atomic64_inc(&cntr->device_in_reset_drop_cnt);
 		dev_err(hdev->dev,
-			"device is disabled or in reset, CS rejected!\n");
+			"device is %s, CS rejected!\n", hdev->status[status]);
 		rc = -EPERM;
 		goto out;
 	}
diff --git a/drivers/misc/habanalabs/common/hwmon.c b/drivers/misc/habanalabs/common/hwmon.c
index 892a5e2b0b9d..ab96401c3752 100644
--- a/drivers/misc/habanalabs/common/hwmon.c
+++ b/drivers/misc/habanalabs/common/hwmon.c
@@ -114,7 +114,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
 	struct hl_device *hdev = dev_get_drvdata(dev);
 	int rc;
 
-	if (hl_device_disabled_or_in_reset(hdev))
+	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
 	switch (type) {
@@ -192,7 +192,7 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type,
 {
 	struct hl_device *hdev = dev_get_drvdata(dev);
 
-	if (hl_device_disabled_or_in_reset(hdev))
+	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
 	switch (type) {
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index 5c1dae6aaf4d..e00ad11dc5f7 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -1237,6 +1237,7 @@ out:
 
 int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
 {
+	enum hl_device_status status;
 	union hl_mem_args *args = data;
 	struct hl_device *hdev = hpriv->hdev;
 	struct hl_ctx *ctx = hpriv->ctx;
@@ -1244,10 +1245,10 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
 	u32 handle = 0;
 	int rc;
 
-	if (hl_device_disabled_or_in_reset(hdev)) {
+	if (!hl_device_operational(hdev, &status)) {
 		dev_warn_ratelimited(hdev->dev,
 			"Device is %s. Can't execute MEMORY IOCTL\n",
-			atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
+			hdev->status[status]);
 		return -EBUSY;
 	}
 
diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c
index 3ceae87016b1..94ca68e62000 100644
--- a/drivers/misc/habanalabs/common/sysfs.c
+++ b/drivers/misc/habanalabs/common/sysfs.c
@@ -276,6 +276,8 @@ static ssize_t status_show(struct device *dev, struct device_attribute *attr,
 		str = "In reset";
 	else if (hdev->disabled)
 		str = "Malfunction";
+	else if (hdev->needs_reset)
+		str = "Needs Reset";
 	else
 		str = "Operational";
 
@@ -304,7 +306,7 @@ static ssize_t max_power_show(struct device *dev, struct device_attribute *attr,
 	struct hl_device *hdev = dev_get_drvdata(dev);
 	long val;
 
-	if (hl_device_disabled_or_in_reset(hdev))
+	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
 	val = hl_get_max_power(hdev);
@@ -319,7 +321,7 @@ static ssize_t max_power_store(struct device *dev,
 	unsigned long value;
 	int rc;
 
-	if (hl_device_disabled_or_in_reset(hdev)) {
+	if (!hl_device_operational(hdev, NULL)) {
 		count = -ENODEV;
 		goto out;
 	}
@@ -347,7 +349,7 @@ static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj,
 	char *data;
 	int rc;
 
-	if (hl_device_disabled_or_in_reset(hdev))
+	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
 	if (!max_size)
diff --git a/drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c b/drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c
index 1076b4932ce2..8c49da4bcbd5 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c
@@ -20,7 +20,7 @@ int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
 {
 	long value;
 
-	if (hl_device_disabled_or_in_reset(hdev))
+	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
 	value = hl_get_frequency(hdev, MME_PLL, false);
@@ -54,7 +54,7 @@ static ssize_t clk_max_freq_mhz_show(struct device *dev,
 	struct gaudi_device *gaudi = hdev->asic_specific;
 	long value;
 
-	if (hl_device_disabled_or_in_reset(hdev))
+	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
 	value = hl_get_frequency(hdev, MME_PLL, false);
@@ -72,7 +72,7 @@ static ssize_t clk_max_freq_mhz_store(struct device *dev,
 	int rc;
 	u64 value;
 
-	if (hl_device_disabled_or_in_reset(hdev)) {
+	if (!hl_device_operational(hdev, NULL)) {
 		count = -ENODEV;
 		goto fail;
 	}
@@ -97,7 +97,7 @@ static ssize_t clk_cur_freq_mhz_show(struct device *dev,
 	struct hl_device *hdev = dev_get_drvdata(dev);
 	long value;
 
-	if (hl_device_disabled_or_in_reset(hdev))
+	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
 	value = hl_get_frequency(hdev, MME_PLL, true);
diff --git a/drivers/misc/habanalabs/goya/goya_hwmgr.c b/drivers/misc/habanalabs/goya/goya_hwmgr.c
index cdd4903e48fa..3acb36a1a902 100644
--- a/drivers/misc/habanalabs/goya/goya_hwmgr.c
+++ b/drivers/misc/habanalabs/goya/goya_hwmgr.c
@@ -36,7 +36,7 @@ int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
 {
 	long value;
 
-	if (hl_device_disabled_or_in_reset(hdev))
+	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
 	value = hl_get_frequency(hdev, MME_PLL, false);
@@ -69,7 +69,7 @@ static ssize_t mme_clk_show(struct device *dev, struct device_attribute *attr,
 	struct hl_device *hdev = dev_get_drvdata(dev);
 	long value;
 
-	if (hl_device_disabled_or_in_reset(hdev))
+	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
 	value = hl_get_frequency(hdev, MME_PLL, false);
@@ -88,7 +88,7 @@ static ssize_t mme_clk_store(struct device *dev, struct device_attribute *attr,
 	int rc;
 	long value;
 
-	if (hl_device_disabled_or_in_reset(hdev)) {
+	if (!hl_device_operational(hdev, NULL)) {
 		count = -ENODEV;
 		goto fail;
 	}
@@ -118,7 +118,7 @@ static ssize_t tpc_clk_show(struct device *dev, struct device_attribute *attr,
 	struct hl_device *hdev = dev_get_drvdata(dev);
 	long value;
 
-	if (hl_device_disabled_or_in_reset(hdev))
+	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
 	value = hl_get_frequency(hdev, TPC_PLL, false);
@@ -137,7 +137,7 @@ static ssize_t tpc_clk_store(struct device *dev, struct device_attribute *attr,
 	int rc;
 	long value;
 
-	if (hl_device_disabled_or_in_reset(hdev)) {
+	if (!hl_device_operational(hdev, NULL)) {
 		count = -ENODEV;
 		goto fail;
 	}
@@ -167,7 +167,7 @@ static ssize_t ic_clk_show(struct device *dev, struct device_attribute *attr,
 	struct hl_device *hdev = dev_get_drvdata(dev);
 	long value;
 
-	if (hl_device_disabled_or_in_reset(hdev))
+	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
 	value = hl_get_frequency(hdev, IC_PLL, false);
@@ -186,7 +186,7 @@ static ssize_t ic_clk_store(struct device *dev, struct device_attribute *attr,
 	int rc;
 	long value;
 
-	if (hl_device_disabled_or_in_reset(hdev)) {
+	if (!hl_device_operational(hdev, NULL)) {
 		count = -ENODEV;
 		goto fail;
 	}
@@ -216,7 +216,7 @@ static ssize_t mme_clk_curr_show(struct device *dev,
 	struct hl_device *hdev = dev_get_drvdata(dev);
 	long value;
 
-	if (hl_device_disabled_or_in_reset(hdev))
+	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
 	value = hl_get_frequency(hdev, MME_PLL, true);
@@ -233,7 +233,7 @@ static ssize_t tpc_clk_curr_show(struct device *dev,
 	struct hl_device *hdev = dev_get_drvdata(dev);
 	long value;
 
-	if (hl_device_disabled_or_in_reset(hdev))
+	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
 	value = hl_get_frequency(hdev, TPC_PLL, true);
@@ -250,7 +250,7 @@ static ssize_t ic_clk_curr_show(struct device *dev,
 	struct hl_device *hdev = dev_get_drvdata(dev);
 	long value;
 
-	if (hl_device_disabled_or_in_reset(hdev))
+	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
 	value = hl_get_frequency(hdev, IC_PLL, true);
@@ -266,7 +266,7 @@ static ssize_t pm_mng_profile_show(struct device *dev,
 {
 	struct hl_device *hdev = dev_get_drvdata(dev);
 
-	if (hl_device_disabled_or_in_reset(hdev))
+	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
 	return sprintf(buf, "%s\n",
@@ -280,7 +280,7 @@ static ssize_t pm_mng_profile_store(struct device *dev,
 {
 	struct hl_device *hdev = dev_get_drvdata(dev);
 
-	if (hl_device_disabled_or_in_reset(hdev)) {
+	if (!hl_device_operational(hdev, NULL)) {
 		count = -ENODEV;
 		goto out;
 	}
@@ -335,7 +335,7 @@ static ssize_t high_pll_show(struct device *dev, struct device_attribute *attr,
 {
 	struct hl_device *hdev = dev_get_drvdata(dev);
 
-	if (hl_device_disabled_or_in_reset(hdev))
+	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
 	return sprintf(buf, "%u\n", hdev->high_pll);
@@ -348,7 +348,7 @@ static ssize_t high_pll_store(struct device *dev, struct device_attribute *attr,
 	long value;
 	int rc;
 
-	if (hl_device_disabled_or_in_reset(hdev)) {
+	if (!hl_device_operational(hdev, NULL)) {
 		count = -ENODEV;
 		goto out;
 	}
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 61f8f9144b54..d9cc782aba21 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -242,7 +242,8 @@ enum gaudi_engine_id {
 enum hl_device_status {
 	HL_DEVICE_STATUS_OPERATIONAL,
 	HL_DEVICE_STATUS_IN_RESET,
-	HL_DEVICE_STATUS_MALFUNCTION
+	HL_DEVICE_STATUS_MALFUNCTION,
+	HL_DEVICE_STATUS_NEEDS_RESET
 };
 
 /* Opcode for management ioctl
-- 
cgit v1.2.3


From 4147864e8d65a0d57dd8573cf306382653616ac2 Mon Sep 17 00:00:00 2001
From: Alon Mizrahi <amizrahi@habana.ai>
Date: Tue, 17 Nov 2020 14:25:14 +0200
Subject: habanalabs: fetch pll frequency from firmware

Once firmware security is enabled, driver must fetch pll frequencies
through the firmware message interface instead of reading the registers
directly.

Signed-off-by: Alon Mizrahi <amizrahi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c       |  24 ++--
 drivers/misc/habanalabs/common/habanalabs.h        |   9 +-
 drivers/misc/habanalabs/common/habanalabs_ioctl.c  |  22 ++++
 drivers/misc/habanalabs/gaudi/gaudi.c              | 135 ++++++++++++++-------
 drivers/misc/habanalabs/gaudi/gaudiP.h             |   8 ++
 drivers/misc/habanalabs/goya/goya.c                |   2 +-
 drivers/misc/habanalabs/include/common/cpucp_if.h  |  40 +++++-
 .../misc/habanalabs/include/common/hl_boot_if.h    |   4 +
 .../habanalabs/include/gaudi/asic_reg/gaudi_regs.h |  14 ++-
 .../include/gaudi/asic_reg/psoc_hbm_pll_regs.h     | 114 -----------------
 .../include/gaudi/asic_reg/psoc_pci_pll_regs.h     | 114 -----------------
 include/uapi/misc/habanalabs.h                     |   9 ++
 12 files changed, 199 insertions(+), 296 deletions(-)
 delete mode 100644 drivers/misc/habanalabs/include/gaudi/asic_reg/psoc_hbm_pll_regs.h
 delete mode 100644 drivers/misc/habanalabs/include/gaudi/asic_reg/psoc_pci_pll_regs.h

(limited to 'include/uapi')

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 8f70d0bbe5e1..c4a8d6ca34bb 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -279,7 +279,8 @@ int hl_fw_send_heartbeat(struct hl_device *hdev)
 	return rc;
 }
 
-int hl_fw_cpucp_info_get(struct hl_device *hdev)
+int hl_fw_cpucp_info_get(struct hl_device *hdev,
+			u32 cpu_security_boot_status_reg)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct cpucp_packet pkt = {};
@@ -324,6 +325,11 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev)
 		goto out;
 	}
 
+	/* Read FW application security bits again */
+	if (hdev->asic_prop.fw_security_status_valid)
+		hdev->asic_prop.fw_app_security_map =
+				RREG32(cpu_security_boot_status_reg);
+
 out:
 	hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
 			sizeof(struct cpucp_info), cpucp_info_cpu_addr);
@@ -446,10 +452,8 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy)
 	return rc;
 }
 
-int hl_fw_cpucp_pll_info_get(struct hl_device *hdev,
-		enum cpucp_pll_type_attributes pll_type,
-		enum cpucp_pll_reg_attributes pll_reg,
-		u32 *pll_info)
+int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index,
+		u16 *pll_freq_arr)
 {
 	struct cpucp_packet pkt;
 	u64 result;
@@ -457,17 +461,19 @@ int hl_fw_cpucp_pll_info_get(struct hl_device *hdev,
 
 	memset(&pkt, 0, sizeof(pkt));
 
-	pkt.ctl = cpu_to_le32(CPUCP_PACKET_PLL_REG_GET <<
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_PLL_INFO_GET <<
 				CPUCP_PKT_CTL_OPCODE_SHIFT);
-	pkt.pll_type = __cpu_to_le16(pll_type);
-	pkt.pll_reg = __cpu_to_le16(pll_reg);
+	pkt.pll_type = __cpu_to_le16(pll_index);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
 			HL_CPUCP_INFO_TIMEOUT_USEC, &result);
 	if (rc)
 		dev_err(hdev->dev, "Failed to read PLL info, error %d\n", rc);
 
-	*pll_info = (u32) result;
+	pll_freq_arr[0] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT0_MASK, result);
+	pll_freq_arr[1] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT1_MASK, result);
+	pll_freq_arr[2] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT2_MASK, result);
+	pll_freq_arr[3] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT3_MASK, result);
 
 	return rc;
 }
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index e1db8301ecbd..9c9c8b24c47a 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2191,16 +2191,15 @@ void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
 void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
 					void *vaddr);
 int hl_fw_send_heartbeat(struct hl_device *hdev);
-int hl_fw_cpucp_info_get(struct hl_device *hdev);
+int hl_fw_cpucp_info_get(struct hl_device *hdev,
+			u32 cpu_security_boot_status_reg);
 int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
 int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
 		struct hl_info_pci_counters *counters);
 int hl_fw_cpucp_total_energy_get(struct hl_device *hdev,
 			u64 *total_energy);
-int hl_fw_cpucp_pll_info_get(struct hl_device *hdev,
-		enum cpucp_pll_type_attributes pll_type,
-		enum cpucp_pll_reg_attributes pll_reg,
-		u32 *pll_info);
+int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index,
+		u16 *pll_freq_arr);
 int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
 			u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
 			u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index ba8217fc9425..32e6af1db4e3 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -403,6 +403,25 @@ static int total_energy_consumption_info(struct hl_fpriv *hpriv,
 		min((size_t) max_size, sizeof(total_energy))) ? -EFAULT : 0;
 }
 
+static int pll_frequency_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+	struct hl_device *hdev = hpriv->hdev;
+	struct hl_pll_frequency_info freq_info = {0};
+	u32 max_size = args->return_size;
+	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+	int rc;
+
+	if ((!max_size) || (!out))
+		return -EINVAL;
+
+	rc = hl_fw_cpucp_pll_info_get(hdev, args->pll_index, freq_info.output);
+	if (rc)
+		return rc;
+
+	return copy_to_user(out, &freq_info,
+		min((size_t) max_size, sizeof(freq_info))) ? -EFAULT : 0;
+}
+
 static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 				struct device *dev)
 {
@@ -480,6 +499,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 	case HL_INFO_TOTAL_ENERGY:
 		return total_energy_consumption_info(hpriv, args);
 
+	case HL_INFO_PLL_FREQUENCY:
+		return pll_frequency_info(hpriv, args);
+
 	default:
 		dev_err(dev, "Invalid request %d\n", args->op);
 		rc = -ENOTTY;
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 49d4b5dda115..732559053133 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -103,6 +103,8 @@
 
 #define HBM_SCRUBBING_TIMEOUT_US	1000000 /* 1s */
 
+#define GAUDI_PLL_MAX 10
+
 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
 		"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
 		"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
@@ -149,6 +151,19 @@ static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
 	[PACKET_LOAD_AND_EXE]	= sizeof(struct packet_load_and_exe)
 };
 
+static const u32 gaudi_pll_base_addresses[GAUDI_PLL_MAX] = {
+	[CPU_PLL] = mmPSOC_CPU_PLL_NR,
+	[PCI_PLL] = mmPSOC_PCI_PLL_NR,
+	[SRAM_PLL] = mmSRAM_W_PLL_NR,
+	[HBM_PLL] = mmPSOC_HBM_PLL_NR,
+	[NIC_PLL] = mmNIC0_PLL_NR,
+	[DMA_PLL] = mmDMA_W_PLL_NR,
+	[MESH_PLL] = mmMESH_W_PLL_NR,
+	[MME_PLL] = mmPSOC_MME_PLL_NR,
+	[TPC_PLL] = mmPSOC_TPC_PLL_NR,
+	[IF_PLL] = mmIF_W_PLL_NR
+};
+
 static inline bool validate_packet_id(enum packet_id id)
 {
 	switch (id) {
@@ -688,61 +703,93 @@ static int gaudi_early_fini(struct hl_device *hdev)
 }
 
 /**
- * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
+ * gaudi_fetch_pll_frequency - Fetch PLL frequency values
  *
  * @hdev: pointer to hl_device structure
+ * @pll_index: index of the pll to fetch frequency from
+ * @pll_freq: pointer to store the pll frequency in MHz in each of the available
+ *            outputs. if a certain output is not available a 0 will be set
  *
  */
-static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
+static int gaudi_fetch_pll_frequency(struct hl_device *hdev,
+				enum gaudi_pll_index pll_index,
+				u16 *pll_freq_arr)
 {
-	struct asic_fixed_properties *prop = &hdev->asic_prop;
-	u32 trace_freq = 0, pll_clk = 0;
-	u32 div_fctr, div_sel, nr, nf, od;
-	int rc;
+	u32 nr = 0, nf = 0, od = 0, pll_clk = 0, div_fctr, div_sel,
+			pll_base_addr = gaudi_pll_base_addresses[pll_index];
+	u16 freq = 0;
+	int i, rc;
+
+	if (hdev->asic_prop.fw_security_status_valid &&
+			(hdev->asic_prop.fw_app_security_map &
+					CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
+		rc = hl_fw_cpucp_pll_info_get(hdev, pll_index, pll_freq_arr);
 
-	if (hdev->asic_prop.fw_security_disabled) {
-		div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
-		div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
-		nr = RREG32(mmPSOC_CPU_PLL_NR);
-		nf = RREG32(mmPSOC_CPU_PLL_NF);
-		od = RREG32(mmPSOC_CPU_PLL_OD);
-	} else {
-		rc = hl_fw_cpucp_pll_info_get(hdev, cpucp_pll_cpu,
-				cpucp_pll_div_factor_reg, &div_fctr);
-		rc |= hl_fw_cpucp_pll_info_get(hdev, cpucp_pll_cpu,
-				cpucp_pll_div_sel_reg, &div_sel);
-		rc |= hl_fw_cpucp_pll_info_get(hdev, cpucp_pll_cpu,
-				cpucp_pll_nr_reg, &nr);
-		rc |= hl_fw_cpucp_pll_info_get(hdev, cpucp_pll_cpu,
-				cpucp_pll_nf_reg, &nf);
-		rc |= hl_fw_cpucp_pll_info_get(hdev, cpucp_pll_cpu,
-				cpucp_pll_od_reg, &od);
 		if (rc)
 			return rc;
-	}
-
-	if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
-		if (div_sel == DIV_SEL_REF_CLK)
-			trace_freq = PLL_REF_CLK;
-		else
-			trace_freq = PLL_REF_CLK / (div_fctr + 1);
-	} else if (div_sel == DIV_SEL_PLL_CLK ||
+	} else if (hdev->asic_prop.fw_security_disabled) {
+		/* Backward compatibility */
+		nr = RREG32(pll_base_addr + PLL_NR_OFFSET);
+		nf = RREG32(pll_base_addr + PLL_NF_OFFSET);
+		od = RREG32(pll_base_addr + PLL_OD_OFFSET);
+
+		for (i = 0; i < HL_PLL_NUM_OUTPUTS; i++) {
+			div_fctr = RREG32(pll_base_addr +
+					PLL_DIV_FACTOR_0_OFFSET + i * 4);
+			div_sel = RREG32(pll_base_addr +
+					PLL_DIV_SEL_0_OFFSET + i * 4);
+
+			if (div_sel == DIV_SEL_REF_CLK ||
+				div_sel == DIV_SEL_DIVIDED_REF) {
+				if (div_sel == DIV_SEL_REF_CLK)
+					freq = PLL_REF_CLK;
+				else
+					freq = PLL_REF_CLK / (div_fctr + 1);
+			} else if (div_sel == DIV_SEL_PLL_CLK ||
 					div_sel == DIV_SEL_DIVIDED_PLL) {
-		pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
-		if (div_sel == DIV_SEL_PLL_CLK)
-			trace_freq = pll_clk;
-		else
-			trace_freq = pll_clk / (div_fctr + 1);
+				pll_clk = PLL_REF_CLK * (nf + 1) /
+						((nr + 1) * (od + 1));
+				if (div_sel == DIV_SEL_PLL_CLK)
+					freq = pll_clk;
+				else
+					freq = pll_clk / (div_fctr + 1);
+			} else {
+				dev_warn(hdev->dev,
+					"Received invalid div select value: %d",
+					div_sel);
+			}
+
+			pll_freq_arr[i] = freq;
+		}
 	} else {
-		dev_warn(hdev->dev,
-			"Received invalid div select value: %d", div_sel);
+		dev_err(hdev->dev, "Failed to fetch PLL frequency values\n");
+		return -EIO;
 	}
 
-	prop->psoc_timestamp_frequency = trace_freq;
-	prop->psoc_pci_pll_nr = nr;
-	prop->psoc_pci_pll_nf = nf;
-	prop->psoc_pci_pll_od = od;
-	prop->psoc_pci_pll_div_factor = div_fctr;
+	return 0;
+}
+
+/**
+ * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
+ *
+ * @hdev: pointer to hl_device structure
+ *
+ */
+static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
+{
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	u16 pll_freq[HL_PLL_NUM_OUTPUTS];
+	int rc;
+
+	rc = gaudi_fetch_pll_frequency(hdev, CPU_PLL, pll_freq);
+	if (rc)
+		return rc;
+
+	prop->psoc_timestamp_frequency = pll_freq[2];
+	prop->psoc_pci_pll_nr = 0;
+	prop->psoc_pci_pll_nf = 0;
+	prop->psoc_pci_pll_od = 0;
+	prop->psoc_pci_pll_div_factor = 0;
 
 	return 0;
 }
@@ -7438,7 +7485,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev)
 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
 		return 0;
 
-	rc = hl_fw_cpucp_info_get(hdev);
+	rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0);
 	if (rc)
 		return rc;
 
diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h
index 1a5e681c720d..f2d91f4fcffe 100644
--- a/drivers/misc/habanalabs/gaudi/gaudiP.h
+++ b/drivers/misc/habanalabs/gaudi/gaudiP.h
@@ -14,6 +14,7 @@
 #include "../include/gaudi/gaudi_packets.h"
 #include "../include/gaudi/gaudi.h"
 #include "../include/gaudi/gaudi_async_events.h"
+#include "../include/gaudi/gaudi_fw_if.h"
 
 #define NUMBER_OF_EXT_HW_QUEUES		8
 #define NUMBER_OF_CMPLT_QUEUES		NUMBER_OF_EXT_HW_QUEUES
@@ -104,6 +105,13 @@
 #define MME_ACC_OFFSET		(mmMME1_ACC_BASE - mmMME0_ACC_BASE)
 #define SRAM_BANK_OFFSET	(mmSRAM_Y0_X1_RTR_BASE - mmSRAM_Y0_X0_RTR_BASE)
 
+#define PLL_NR_OFFSET		0
+#define PLL_NF_OFFSET		(mmPSOC_CPU_PLL_NF - mmPSOC_CPU_PLL_NR)
+#define PLL_OD_OFFSET		(mmPSOC_CPU_PLL_OD - mmPSOC_CPU_PLL_NR)
+#define PLL_DIV_FACTOR_0_OFFSET	(mmPSOC_CPU_PLL_DIV_FACTOR_0 - \
+				mmPSOC_CPU_PLL_NR)
+#define PLL_DIV_SEL_0_OFFSET	(mmPSOC_CPU_PLL_DIV_SEL_0 - mmPSOC_CPU_PLL_NR)
+
 #define NUM_OF_SOB_IN_BLOCK		\
 	(((mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_2047 - \
 	mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0) + 4) >> 2)
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 342227b93778..d91f553b8595 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -5156,7 +5156,7 @@ int goya_cpucp_info_get(struct hl_device *hdev)
 	if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
 		return 0;
 
-	rc = hl_fw_cpucp_info_get(hdev);
+	rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0);
 	if (rc)
 		return rc;
 
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h
index 759c068b2b7a..554f82271d5f 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -252,10 +252,26 @@ enum pq_init_status {
  *       The packet's arguments specify the desired sensor and the field to
  *       set.
  *
- * CPUCP_PACKET_PLL_REG_GET
- *       Fetch register of PLL from the required PLL IP.
- *       The packet's arguments specify the PLL IP and the register to get.
- *       Each register is 32-bit value which is returned in result field.
+ * CPUCP_PACKET_PCIE_THROUGHPUT_GET
+ *       Get throughput of PCIe.
+ *       The packet's arguments specify the transaction direction (TX/RX).
+ *       The window measurement is 10[msec], and the return value is in KB/sec.
+ *
+ * CPUCP_PACKET_PCIE_REPLAY_CNT_GET
+ *       Replay count measures number of "replay" events, which is basicly
+ *       number of retries done by PCIe.
+ *
+ * CPUCP_PACKET_TOTAL_ENERGY_GET
+ *       Total Energy is measurement of energy from the time FW Linux
+ *       is loaded. It is calculated by multiplying the average power
+ *       by time (passed from armcp start). The units are in MilliJouls.
+ *
+ * CPUCP_PACKET_PLL_INFO_GET
+ *       Fetch frequencies of PLL from the required PLL IP.
+ *       The packet's arguments specify the device PLL type
+ *       Pll type is the PLL from device pll_index enum.
+ *       The result is composed of 4 outputs, each is 16-bit
+ *       frequency in MHz.
  *
  */
 
@@ -289,7 +305,7 @@ enum cpucp_packet_id {
 	CPUCP_PACKET_PCIE_THROUGHPUT_GET,	/* internal */
 	CPUCP_PACKET_PCIE_REPLAY_CNT_GET,	/* internal */
 	CPUCP_PACKET_TOTAL_ENERGY_GET,		/* internal */
-	CPUCP_PACKET_PLL_REG_GET,		/* internal */
+	CPUCP_PACKET_PLL_INFO_GET,		/* internal */
 };
 
 #define CPUCP_PACKET_FENCE_VAL	0xFE8CE7A5
@@ -300,6 +316,15 @@ enum cpucp_packet_id {
 #define CPUCP_PKT_CTL_OPCODE_SHIFT	16
 #define CPUCP_PKT_CTL_OPCODE_MASK	0x1FFF0000
 
+#define CPUCP_PKT_RES_PLL_OUT0_SHIFT	0
+#define CPUCP_PKT_RES_PLL_OUT0_MASK	0x000000000000FFFF
+#define CPUCP_PKT_RES_PLL_OUT1_SHIFT	16
+#define CPUCP_PKT_RES_PLL_OUT1_MASK	0x00000000FFFF0000
+#define CPUCP_PKT_RES_PLL_OUT2_SHIFT	32
+#define CPUCP_PKT_RES_PLL_OUT2_MASK	0x0000FFFF00000000
+#define CPUCP_PKT_RES_PLL_OUT3_SHIFT	48
+#define CPUCP_PKT_RES_PLL_OUT3_MASK	0xFFFF000000000000
+
 struct cpucp_packet {
 	union {
 		__le64 value;	/* For SET packets */
@@ -324,8 +349,9 @@ struct cpucp_packet {
 			__u8 pad; /* unused */
 		};
 
-		struct {/* For PLL register fetch */
+		struct {/* For PLL info fetch */
 			__le16 pll_type;
+			/* TODO pll_reg is kept temporary before removal */
 			__le16 pll_reg;
 		};
 
@@ -404,6 +430,7 @@ enum cpucp_pcie_throughput_attributes {
 	cpucp_pcie_throughput_rx
 };
 
+/* TODO temporary kept before removal */
 enum cpucp_pll_reg_attributes {
 	cpucp_pll_nr_reg,
 	cpucp_pll_nf_reg,
@@ -412,6 +439,7 @@ enum cpucp_pll_reg_attributes {
 	cpucp_pll_div_sel_reg
 };
 
+/* TODO temporary kept before removal */
 enum cpucp_pll_type_attributes {
 	cpucp_pll_cpu,
 	cpucp_pll_pci,
diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h
index 60916780df35..68ac15c53f37 100644
--- a/drivers/misc/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h
@@ -131,6 +131,9 @@
  *					receiving the halt-machine event.
  *					Initialized in: linux
  *
+ * CPU_BOOT_DEV_STS0_PLL_INFO_EN	FW retrieval of PLL info is enabled.
+ *					Initialized in: linux
+ *
  * CPU_BOOT_DEV_STS0_ENABLED		Device status register enabled.
  *					This is a main indication that the
  *					running FW populates the device status
@@ -150,6 +153,7 @@
 #define CPU_BOOT_DEV_STS0_SRAM_SCR_EN			(1 << 8)
 #define CPU_BOOT_DEV_STS0_DRAM_SCR_EN			(1 << 9)
 #define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN		(1 << 10)
+#define CPU_BOOT_DEV_STS0_PLL_INFO_EN			(1 << 11)
 #define CPU_BOOT_DEV_STS0_ENABLED			(1 << 31)
 
 enum cpu_boot_status {
diff --git a/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h b/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h
index df21a40691e5..5bb54b34a8ae 100644
--- a/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h
+++ b/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h
@@ -81,6 +81,7 @@
 #include "sif_rtr_ctrl_6_regs.h"
 #include "sif_rtr_ctrl_7_regs.h"
 #include "psoc_etr_regs.h"
+#include "psoc_cpu_pll_regs.h"
 
 #include "dma0_qm_masks.h"
 #include "mme0_qm_masks.h"
@@ -102,9 +103,6 @@
 
 #include "nic0_qm0_masks.h"
 
-#include "psoc_hbm_pll_regs.h"
-#include "psoc_cpu_pll_regs.h"
-
 #define GAUDI_ECC_MEM_SEL_OFFSET		0xF18
 #define GAUDI_ECC_ADDRESS_OFFSET		0xF1C
 #define GAUDI_ECC_SYNDROME_OFFSET		0xF20
@@ -307,4 +305,14 @@
 #define mmPCIE_AUX_FLR_CTRL                                          0xC07394
 #define mmPCIE_AUX_DBI                                               0xC07490
 
+#define mmPSOC_PCI_PLL_NR                                            0xC72100
+#define mmSRAM_W_PLL_NR                                              0x4C8100
+#define mmPSOC_HBM_PLL_NR                                            0xC74100
+#define mmNIC0_PLL_NR                                                0xCF9100
+#define mmDMA_W_PLL_NR                                               0x487100
+#define mmMESH_W_PLL_NR                                              0x4C7100
+#define mmPSOC_MME_PLL_NR                                            0xC71100
+#define mmPSOC_TPC_PLL_NR                                            0xC73100
+#define mmIF_W_PLL_NR                                                0x488100
+
 #endif /* ASIC_REG_GAUDI_REGS_H_ */
diff --git a/drivers/misc/habanalabs/include/gaudi/asic_reg/psoc_hbm_pll_regs.h b/drivers/misc/habanalabs/include/gaudi/asic_reg/psoc_hbm_pll_regs.h
deleted file mode 100644
index 687e2255cb19..000000000000
--- a/drivers/misc/habanalabs/include/gaudi/asic_reg/psoc_hbm_pll_regs.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * Copyright 2016-2018 HabanaLabs, Ltd.
- * All Rights Reserved.
- *
- */
-
-/************************************
- ** This is an auto-generated file **
- **       DO NOT EDIT BELOW        **
- ************************************/
-
-#ifndef ASIC_REG_PSOC_HBM_PLL_REGS_H_
-#define ASIC_REG_PSOC_HBM_PLL_REGS_H_
-
-/*
- *****************************************
- *   PSOC_HBM_PLL (Prototype: PLL)
- *****************************************
- */
-
-#define mmPSOC_HBM_PLL_NR                                            0xC74100
-
-#define mmPSOC_HBM_PLL_NF                                            0xC74104
-
-#define mmPSOC_HBM_PLL_OD                                            0xC74108
-
-#define mmPSOC_HBM_PLL_NB                                            0xC7410C
-
-#define mmPSOC_HBM_PLL_CFG                                           0xC74110
-
-#define mmPSOC_HBM_PLL_LOSE_MASK                                     0xC74120
-
-#define mmPSOC_HBM_PLL_LOCK_INTR                                     0xC74128
-
-#define mmPSOC_HBM_PLL_LOCK_BYPASS                                   0xC7412C
-
-#define mmPSOC_HBM_PLL_DATA_CHNG                                     0xC74130
-
-#define mmPSOC_HBM_PLL_RST                                           0xC74134
-
-#define mmPSOC_HBM_PLL_SLIP_WD_CNTR                                  0xC74150
-
-#define mmPSOC_HBM_PLL_DIV_FACTOR_0                                  0xC74200
-
-#define mmPSOC_HBM_PLL_DIV_FACTOR_1                                  0xC74204
-
-#define mmPSOC_HBM_PLL_DIV_FACTOR_2                                  0xC74208
-
-#define mmPSOC_HBM_PLL_DIV_FACTOR_3                                  0xC7420C
-
-#define mmPSOC_HBM_PLL_DIV_FACTOR_CMD_0                              0xC74220
-
-#define mmPSOC_HBM_PLL_DIV_FACTOR_CMD_1                              0xC74224
-
-#define mmPSOC_HBM_PLL_DIV_FACTOR_CMD_2                              0xC74228
-
-#define mmPSOC_HBM_PLL_DIV_FACTOR_CMD_3                              0xC7422C
-
-#define mmPSOC_HBM_PLL_DIV_SEL_0                                     0xC74280
-
-#define mmPSOC_HBM_PLL_DIV_SEL_1                                     0xC74284
-
-#define mmPSOC_HBM_PLL_DIV_SEL_2                                     0xC74288
-
-#define mmPSOC_HBM_PLL_DIV_SEL_3                                     0xC7428C
-
-#define mmPSOC_HBM_PLL_DIV_EN_0                                      0xC742A0
-
-#define mmPSOC_HBM_PLL_DIV_EN_1                                      0xC742A4
-
-#define mmPSOC_HBM_PLL_DIV_EN_2                                      0xC742A8
-
-#define mmPSOC_HBM_PLL_DIV_EN_3                                      0xC742AC
-
-#define mmPSOC_HBM_PLL_DIV_FACTOR_BUSY_0                             0xC742C0
-
-#define mmPSOC_HBM_PLL_DIV_FACTOR_BUSY_1                             0xC742C4
-
-#define mmPSOC_HBM_PLL_DIV_FACTOR_BUSY_2                             0xC742C8
-
-#define mmPSOC_HBM_PLL_DIV_FACTOR_BUSY_3                             0xC742CC
-
-#define mmPSOC_HBM_PLL_CLK_GATER                                     0xC74300
-
-#define mmPSOC_HBM_PLL_CLK_RLX_0                                     0xC74310
-
-#define mmPSOC_HBM_PLL_CLK_RLX_1                                     0xC74314
-
-#define mmPSOC_HBM_PLL_CLK_RLX_2                                     0xC74318
-
-#define mmPSOC_HBM_PLL_CLK_RLX_3                                     0xC7431C
-
-#define mmPSOC_HBM_PLL_REF_CNTR_PERIOD                               0xC74400
-
-#define mmPSOC_HBM_PLL_REF_LOW_THRESHOLD                             0xC74410
-
-#define mmPSOC_HBM_PLL_REF_HIGH_THRESHOLD                            0xC74420
-
-#define mmPSOC_HBM_PLL_PLL_NOT_STABLE                                0xC74430
-
-#define mmPSOC_HBM_PLL_FREQ_CALC_EN                                  0xC74440
-
-#define mmPSOC_HBM_PLL_RLX_BITMAP_CFG                                0xC74500
-
-#define mmPSOC_HBM_PLL_RLX_BITMAP_0                                  0xC74510
-
-#define mmPSOC_HBM_PLL_RLX_BITMAP_1                                  0xC74514
-
-#define mmPSOC_HBM_PLL_RLX_BITMAP_2                                  0xC74518
-
-#define mmPSOC_HBM_PLL_RLX_BITMAP_3                                  0xC7451C
-
-#endif /* ASIC_REG_PSOC_HBM_PLL_REGS_H_ */
diff --git a/drivers/misc/habanalabs/include/gaudi/asic_reg/psoc_pci_pll_regs.h b/drivers/misc/habanalabs/include/gaudi/asic_reg/psoc_pci_pll_regs.h
deleted file mode 100644
index 3dc9bb4542dd..000000000000
--- a/drivers/misc/habanalabs/include/gaudi/asic_reg/psoc_pci_pll_regs.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * Copyright 2016-2018 HabanaLabs, Ltd.
- * All Rights Reserved.
- *
- */
-
-/************************************
- ** This is an auto-generated file **
- **       DO NOT EDIT BELOW        **
- ************************************/
-
-#ifndef ASIC_REG_PSOC_PCI_PLL_REGS_H_
-#define ASIC_REG_PSOC_PCI_PLL_REGS_H_
-
-/*
- *****************************************
- *   PSOC_PCI_PLL (Prototype: PLL)
- *****************************************
- */
-
-#define mmPSOC_PCI_PLL_NR                                            0xC72100
-
-#define mmPSOC_PCI_PLL_NF                                            0xC72104
-
-#define mmPSOC_PCI_PLL_OD                                            0xC72108
-
-#define mmPSOC_PCI_PLL_NB                                            0xC7210C
-
-#define mmPSOC_PCI_PLL_CFG                                           0xC72110
-
-#define mmPSOC_PCI_PLL_LOSE_MASK                                     0xC72120
-
-#define mmPSOC_PCI_PLL_LOCK_INTR                                     0xC72128
-
-#define mmPSOC_PCI_PLL_LOCK_BYPASS                                   0xC7212C
-
-#define mmPSOC_PCI_PLL_DATA_CHNG                                     0xC72130
-
-#define mmPSOC_PCI_PLL_RST                                           0xC72134
-
-#define mmPSOC_PCI_PLL_SLIP_WD_CNTR                                  0xC72150
-
-#define mmPSOC_PCI_PLL_DIV_FACTOR_0                                  0xC72200
-
-#define mmPSOC_PCI_PLL_DIV_FACTOR_1                                  0xC72204
-
-#define mmPSOC_PCI_PLL_DIV_FACTOR_2                                  0xC72208
-
-#define mmPSOC_PCI_PLL_DIV_FACTOR_3                                  0xC7220C
-
-#define mmPSOC_PCI_PLL_DIV_FACTOR_CMD_0                              0xC72220
-
-#define mmPSOC_PCI_PLL_DIV_FACTOR_CMD_1                              0xC72224
-
-#define mmPSOC_PCI_PLL_DIV_FACTOR_CMD_2                              0xC72228
-
-#define mmPSOC_PCI_PLL_DIV_FACTOR_CMD_3                              0xC7222C
-
-#define mmPSOC_PCI_PLL_DIV_SEL_0                                     0xC72280
-
-#define mmPSOC_PCI_PLL_DIV_SEL_1                                     0xC72284
-
-#define mmPSOC_PCI_PLL_DIV_SEL_2                                     0xC72288
-
-#define mmPSOC_PCI_PLL_DIV_SEL_3                                     0xC7228C
-
-#define mmPSOC_PCI_PLL_DIV_EN_0                                      0xC722A0
-
-#define mmPSOC_PCI_PLL_DIV_EN_1                                      0xC722A4
-
-#define mmPSOC_PCI_PLL_DIV_EN_2                                      0xC722A8
-
-#define mmPSOC_PCI_PLL_DIV_EN_3                                      0xC722AC
-
-#define mmPSOC_PCI_PLL_DIV_FACTOR_BUSY_0                             0xC722C0
-
-#define mmPSOC_PCI_PLL_DIV_FACTOR_BUSY_1                             0xC722C4
-
-#define mmPSOC_PCI_PLL_DIV_FACTOR_BUSY_2                             0xC722C8
-
-#define mmPSOC_PCI_PLL_DIV_FACTOR_BUSY_3                             0xC722CC
-
-#define mmPSOC_PCI_PLL_CLK_GATER                                     0xC72300
-
-#define mmPSOC_PCI_PLL_CLK_RLX_0                                     0xC72310
-
-#define mmPSOC_PCI_PLL_CLK_RLX_1                                     0xC72314
-
-#define mmPSOC_PCI_PLL_CLK_RLX_2                                     0xC72318
-
-#define mmPSOC_PCI_PLL_CLK_RLX_3                                     0xC7231C
-
-#define mmPSOC_PCI_PLL_REF_CNTR_PERIOD                               0xC72400
-
-#define mmPSOC_PCI_PLL_REF_LOW_THRESHOLD                             0xC72410
-
-#define mmPSOC_PCI_PLL_REF_HIGH_THRESHOLD                            0xC72420
-
-#define mmPSOC_PCI_PLL_PLL_NOT_STABLE                                0xC72430
-
-#define mmPSOC_PCI_PLL_FREQ_CALC_EN                                  0xC72440
-
-#define mmPSOC_PCI_PLL_RLX_BITMAP_CFG                                0xC72500
-
-#define mmPSOC_PCI_PLL_RLX_BITMAP_0                                  0xC72510
-
-#define mmPSOC_PCI_PLL_RLX_BITMAP_1                                  0xC72514
-
-#define mmPSOC_PCI_PLL_RLX_BITMAP_2                                  0xC72518
-
-#define mmPSOC_PCI_PLL_RLX_BITMAP_3                                  0xC7251C
-
-#endif /* ASIC_REG_PSOC_PCI_PLL_REGS_H_ */
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index d9cc782aba21..96eea49f48bc 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -295,6 +295,7 @@ enum hl_device_status {
 #define HL_INFO_CLK_THROTTLE_REASON	13
 #define HL_INFO_SYNC_MANAGER		14
 #define HL_INFO_TOTAL_ENERGY		15
+#define HL_INFO_PLL_FREQUENCY		16
 
 #define HL_INFO_VERSION_MAX_LEN	128
 #define HL_INFO_CARD_NAME_MAX_LEN	16
@@ -396,6 +397,12 @@ struct hl_info_energy {
 	__u64 total_energy_consumption;
 };
 
+#define HL_PLL_NUM_OUTPUTS 4
+
+struct hl_pll_frequency_info {
+	__u16 output[HL_PLL_NUM_OUTPUTS];
+};
+
 /**
  * struct hl_info_sync_manager - sync manager information
  * @first_available_sync_object: first available sob
@@ -465,6 +472,8 @@ struct hl_info_args {
 		 * resolution.
 		 */
 		__u32 period_ms;
+		/* PLL frequency retrieval */
+		__u32 pll_index;
 	};
 
 	__u32 pad;
-- 
cgit v1.2.3


From 9d127ad5719a865bac668a506dfe924ac11cd9bb Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Tue, 10 Nov 2020 16:30:53 +0200
Subject: habanalabs: indicate to user that a cs is gone

We want to indicate to the user that a certain command submission
is finished long time ago and it is no longer in database.
This means no further information regarding this cs can be obtained.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../misc/habanalabs/common/command_submission.c    | 68 ++++++++++++++++------
 include/uapi/misc/habanalabs.h                     |  5 +-
 2 files changed, 54 insertions(+), 19 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 7309dd2b88a9..f91b17480588 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -11,9 +11,22 @@
 #include <linux/uaccess.h>
 #include <linux/slab.h>
 
+/**
+ * enum hl_cs_wait_status - cs wait status
+ * @CS_WAIT_STATUS_BUSY: cs was not completed yet
+ * @CS_WAIT_STATUS_COMPLETED: cs completed
+ * @CS_WAIT_STATUS_GONE: cs completed but fence is already gone
+ */
+enum hl_cs_wait_status {
+	CS_WAIT_STATUS_BUSY,
+	CS_WAIT_STATUS_COMPLETED,
+	CS_WAIT_STATUS_GONE
+};
+
 static void job_wq_completion(struct work_struct *work);
-static long _hl_cs_wait_ioctl(struct hl_device *hdev,
-		struct hl_ctx *ctx, u64 timeout_us, u64 seq);
+static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
+				u64 timeout_us, u64 seq,
+				enum hl_cs_wait_status *status);
 static void cs_do_release(struct kref *ref);
 
 static void hl_sob_reset(struct kref *ref)
@@ -942,7 +955,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
 	int rc = 0, do_ctx_switch;
 	void __user *chunks;
 	u32 num_chunks, tmp;
-	long ret;
+	int ret;
 
 	do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
 
@@ -996,18 +1009,19 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
 
 		/* Need to wait for restore completion before execution phase */
 		if (num_chunks) {
+			enum hl_cs_wait_status status;
 wait_again:
 			ret = _hl_cs_wait_ioctl(hdev, ctx,
 					jiffies_to_usecs(hdev->timeout_jiffies),
-					*cs_seq);
-			if (ret <= 0) {
+					*cs_seq, &status);
+			if (ret) {
 				if (ret == -ERESTARTSYS) {
 					usleep_range(100, 200);
 					goto wait_again;
 				}
 
 				dev_err(hdev->dev,
-					"Restore CS for context %d failed to complete %ld\n",
+					"Restore CS for context %d failed to complete %d\n",
 					ctx->asid, ret);
 				rc = -ENOEXEC;
 				goto out;
@@ -1337,12 +1351,14 @@ out:
 	return rc;
 }
 
-static long _hl_cs_wait_ioctl(struct hl_device *hdev,
-		struct hl_ctx *ctx, u64 timeout_us, u64 seq)
+static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
+				u64 timeout_us, u64 seq,
+				enum hl_cs_wait_status *status)
 {
 	struct hl_fence *fence;
 	unsigned long timeout;
-	long rc;
+	int rc = 0;
+	long completion_rc;
 
 	if (timeout_us == MAX_SCHEDULE_TIMEOUT)
 		timeout = timeout_us;
@@ -1360,11 +1376,17 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev,
 				seq, ctx->cs_sequence);
 	} else if (fence) {
 		if (!timeout_us)
-			rc = completion_done(&fence->completion);
+			completion_rc = completion_done(&fence->completion);
 		else
-			rc = wait_for_completion_interruptible_timeout(
+			completion_rc =
+				wait_for_completion_interruptible_timeout(
 					&fence->completion, timeout);
 
+		if (completion_rc > 0)
+			*status = CS_WAIT_STATUS_COMPLETED;
+		else
+			*status = CS_WAIT_STATUS_BUSY;
+
 		if (fence->error == -ETIMEDOUT)
 			rc = -ETIMEDOUT;
 		else if (fence->error == -EIO)
@@ -1375,7 +1397,7 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev,
 		dev_dbg(hdev->dev,
 			"Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
 			seq, ctx->cs_sequence);
-		rc = 1;
+		*status = CS_WAIT_STATUS_GONE;
 	}
 
 	hl_ctx_put(ctx);
@@ -1387,14 +1409,16 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 {
 	struct hl_device *hdev = hpriv->hdev;
 	union hl_wait_cs_args *args = data;
+	enum hl_cs_wait_status status;
 	u64 seq = args->in.seq;
-	long rc;
+	int rc;
 
-	rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq);
+	rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq,
+				&status);
 
 	memset(args, 0, sizeof(*args));
 
-	if (rc < 0) {
+	if (rc) {
 		if (rc == -ERESTARTSYS) {
 			dev_err_ratelimited(hdev->dev,
 				"user process got signal while waiting for CS handle %llu\n",
@@ -1415,10 +1439,18 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 		return rc;
 	}
 
-	if (rc == 0)
-		args->out.status = HL_WAIT_CS_STATUS_BUSY;
-	else
+	switch (status) {
+	case CS_WAIT_STATUS_GONE:
+		args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
+		fallthrough;
+	case CS_WAIT_STATUS_COMPLETED:
 		args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
+		break;
+	case CS_WAIT_STATUS_BUSY:
+	default:
+		args->out.status = HL_WAIT_CS_STATUS_BUSY;
+		break;
+	}
 
 	return 0;
 }
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 96eea49f48bc..808d20da024a 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -662,10 +662,13 @@ struct hl_wait_cs_in {
 #define HL_WAIT_CS_STATUS_ABORTED	3
 #define HL_WAIT_CS_STATUS_INTERRUPTED	4
 
+#define HL_WAIT_CS_STATUS_FLAG_GONE	0x1
+
 struct hl_wait_cs_out {
 	/* HL_WAIT_CS_STATUS_* */
 	__u32 status;
-	__u32 pad;
+	/* HL_WAIT_CS_STATUS_FLAG* */
+	__u32 flags;
 };
 
 union hl_wait_cs_args {
-- 
cgit v1.2.3


From bd2f477f2037d2638464dc105f16994994308c20 Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Tue, 10 Nov 2020 17:26:22 +0200
Subject: habanalabs: add support for cs with timestamp

add support for user to request a timestamp upon
cs completion.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../misc/habanalabs/common/command_submission.c    | 44 ++++++++++++++++------
 drivers/misc/habanalabs/common/habanalabs.h        |  4 ++
 include/uapi/misc/habanalabs.h                     |  6 ++-
 3 files changed, 41 insertions(+), 13 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index f91b17480588..bd2f54399020 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -26,7 +26,7 @@ enum hl_cs_wait_status {
 static void job_wq_completion(struct work_struct *work);
 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 				u64 timeout_us, u64 seq,
-				enum hl_cs_wait_status *status);
+				enum hl_cs_wait_status *status, s64 *timestamp);
 static void cs_do_release(struct kref *ref);
 
 static void hl_sob_reset(struct kref *ref)
@@ -150,6 +150,7 @@ static void hl_fence_init(struct hl_fence *fence)
 {
 	kref_init(&fence->refcount);
 	fence->error = 0;
+	fence->timestamp = ktime_set(0, 0);
 	init_completion(&fence->completion);
 }
 
@@ -404,6 +405,8 @@ out:
 	else if (!cs->submitted)
 		cs->fence->error = -EBUSY;
 
+	if (cs->timestamp)
+		cs->fence->timestamp = ktime_get();
 	complete_all(&cs->fence->completion);
 	hl_fence_put(cs->fence);
 
@@ -734,7 +737,8 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
 		return -EBUSY;
 	}
 
-	cs_type_flags = args->in.cs_flags & ~HL_CS_FLAGS_FORCE_RESTORE;
+	cs_type_flags = args->in.cs_flags &
+			~(HL_CS_FLAGS_FORCE_RESTORE | HL_CS_FLAGS_TIMESTAMP);
 
 	if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) {
 		dev_err(hdev->dev,
@@ -798,7 +802,7 @@ static int hl_cs_copy_chunk_array(struct hl_device *hdev,
 }
 
 static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
-				u32 num_chunks, u64 *cs_seq)
+				u32 num_chunks, u64 *cs_seq, bool timestamp)
 {
 	bool int_queues_only = true;
 	struct hl_device *hdev = hpriv->hdev;
@@ -825,6 +829,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 		goto free_cs_chunk_array;
 	}
 
+	cs->timestamp = !!timestamp;
 	*cs_seq = cs->sequence;
 
 	hl_debugfs_add_cs(cs);
@@ -995,7 +1000,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
 			rc = 0;
 		} else {
 			rc = cs_ioctl_default(hpriv, chunks, num_chunks,
-						cs_seq);
+						cs_seq, false);
 		}
 
 		mutex_unlock(&hpriv->restore_phase_mutex);
@@ -1013,7 +1018,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
 wait_again:
 			ret = _hl_cs_wait_ioctl(hdev, ctx,
 					jiffies_to_usecs(hdev->timeout_jiffies),
-					*cs_seq, &status);
+					*cs_seq, &status, NULL);
 			if (ret) {
 				if (ret == -ERESTARTSYS) {
 					usleep_range(100, 200);
@@ -1154,7 +1159,7 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
 
 static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 				void __user *chunks, u32 num_chunks,
-				u64 *cs_seq)
+				u64 *cs_seq, bool timestamp)
 {
 	struct hl_cs_chunk *cs_chunk_array, *chunk;
 	struct hw_queue_properties *hw_queue_prop;
@@ -1259,6 +1264,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 		goto free_cs_chunk_array;
 	}
 
+	cs->timestamp = !!timestamp;
+
 	/*
 	 * Save the signal CS fence for later initialization right before
 	 * hanging the wait CS on the queue.
@@ -1334,10 +1341,11 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
 	case CS_TYPE_WAIT:
 	case CS_TYPE_COLLECTIVE_WAIT:
 		rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks,
-						&cs_seq);
+			&cs_seq, args->in.cs_flags & HL_CS_FLAGS_TIMESTAMP);
 		break;
 	default:
-		rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq);
+		rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
+				args->in.cs_flags & HL_CS_FLAGS_TIMESTAMP);
 		break;
 	}
 
@@ -1353,13 +1361,16 @@ out:
 
 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 				u64 timeout_us, u64 seq,
-				enum hl_cs_wait_status *status)
+				enum hl_cs_wait_status *status, s64 *timestamp)
 {
 	struct hl_fence *fence;
 	unsigned long timeout;
 	int rc = 0;
 	long completion_rc;
 
+	if (timestamp)
+		*timestamp = 0;
+
 	if (timeout_us == MAX_SCHEDULE_TIMEOUT)
 		timeout = timeout_us;
 	else
@@ -1382,10 +1393,13 @@ static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 				wait_for_completion_interruptible_timeout(
 					&fence->completion, timeout);
 
-		if (completion_rc > 0)
+		if (completion_rc > 0) {
 			*status = CS_WAIT_STATUS_COMPLETED;
-		else
+			if (timestamp)
+				*timestamp = ktime_to_ns(fence->timestamp);
+		} else {
 			*status = CS_WAIT_STATUS_BUSY;
+		}
 
 		if (fence->error == -ETIMEDOUT)
 			rc = -ETIMEDOUT;
@@ -1411,10 +1425,11 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 	union hl_wait_cs_args *args = data;
 	enum hl_cs_wait_status status;
 	u64 seq = args->in.seq;
+	s64 timestamp;
 	int rc;
 
 	rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq,
-				&status);
+				&status, &timestamp);
 
 	memset(args, 0, sizeof(*args));
 
@@ -1439,6 +1454,11 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 		return rc;
 	}
 
+	if (timestamp) {
+		args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
+		args->out.timestamp_nsec = timestamp;
+	}
+
 	switch (status) {
 	case CS_WAIT_STATUS_GONE:
 		args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 9c9c8b24c47a..8e2d164d97e8 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -481,12 +481,14 @@ struct asic_fixed_properties {
  * @completion: fence is implemented using completion
  * @refcount: refcount for this fence
  * @error: mark this fence with error
+ * @timestamp: timestamp upon completion
  *
  */
 struct hl_fence {
 	struct completion	completion;
 	struct kref		refcount;
 	int			error;
+	ktime_t			timestamp;
 };
 
 /**
@@ -1127,6 +1129,7 @@ struct hl_userptr {
  * @tdr_active: true if TDR was activated for this CS (to prevent
  *		double TDR activation).
  * @aborted: true if CS was aborted due to some device error.
+ * @timestamp: true if a timestmap must be captured upon completion
  */
 struct hl_cs {
 	u16			*jobs_in_queue_cnt;
@@ -1147,6 +1150,7 @@ struct hl_cs {
 	u8			timedout;
 	u8			tdr_active;
 	u8			aborted;
+	u8			timestamp;
 };
 
 /**
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 808d20da024a..6eff4e05eccb 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -592,6 +592,7 @@ struct hl_cs_chunk {
 #define HL_CS_FLAGS_SIGNAL		0x2
 #define HL_CS_FLAGS_WAIT		0x4
 #define HL_CS_FLAGS_COLLECTIVE_WAIT	0x8
+#define HL_CS_FLAGS_TIMESTAMP		0x20
 
 #define HL_CS_STATUS_SUCCESS		0
 
@@ -662,13 +663,16 @@ struct hl_wait_cs_in {
 #define HL_WAIT_CS_STATUS_ABORTED	3
 #define HL_WAIT_CS_STATUS_INTERRUPTED	4
 
-#define HL_WAIT_CS_STATUS_FLAG_GONE	0x1
+#define HL_WAIT_CS_STATUS_FLAG_GONE		0x1
+#define HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD	0x2
 
 struct hl_wait_cs_out {
 	/* HL_WAIT_CS_STATUS_* */
 	__u32 status;
 	/* HL_WAIT_CS_STATUS_FLAG* */
 	__u32 flags;
+	/* valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set */
+	__s64 timestamp_nsec;
 };
 
 union hl_wait_cs_args {
-- 
cgit v1.2.3


From f44afb5b5a5d04448da843b2fe872e01669bc317 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Wed, 2 Sep 2020 13:43:32 +0300
Subject: habanalabs: Add CB IOCTL opcode to retrieve CB information

Add a new CB IOCTL opcode that enables a user to query about a CB and
get its usage count.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/command_buffer.c | 38 +++++++++++++++++++++++++
 include/uapi/misc/habanalabs.h                  | 15 ++++++++--
 2 files changed, 51 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c
index 2856bb3423ee..6f6a904ab6ca 100644
--- a/drivers/misc/habanalabs/common/command_buffer.c
+++ b/drivers/misc/habanalabs/common/command_buffer.c
@@ -375,12 +375,43 @@ int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle)
 	return rc;
 }
 
+static int hl_cb_info(struct hl_device *hdev, struct hl_cb_mgr *mgr,
+			u64 cb_handle, u32 *usage_cnt)
+{
+	struct hl_cb *cb;
+	u32 handle;
+	int rc = 0;
+
+	/* The CB handle was given to user to do mmap, so need to shift it back
+	 * to the value which was allocated by the IDR module.
+	 */
+	cb_handle >>= PAGE_SHIFT;
+	handle = (u32) cb_handle;
+
+	spin_lock(&mgr->cb_lock);
+
+	cb = idr_find(&mgr->cb_handles, handle);
+	if (!cb) {
+		dev_err(hdev->dev,
+			"CB info failed, no match to handle 0x%x\n", handle);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	*usage_cnt = atomic_read(&cb->cs_cnt);
+
+out:
+	spin_unlock(&mgr->cb_lock);
+	return rc;
+}
+
 int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
 {
 	union hl_cb_args *args = data;
 	struct hl_device *hdev = hpriv->hdev;
 	enum hl_device_status status;
 	u64 handle = 0;
+	u32 usage_cnt = 0;
 	int rc;
 
 	if (!hl_device_operational(hdev, &status)) {
@@ -413,6 +444,13 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
 					args->in.cb_handle);
 		break;
 
+	case HL_CB_OP_INFO:
+		rc = hl_cb_info(hdev, &hpriv->cb_mgr, args->in.cb_handle,
+				&usage_cnt);
+		memset(args, 0, sizeof(*args));
+		args->out.usage_cnt = usage_cnt;
+		break;
+
 	default:
 		rc = -ENOTTY;
 		break;
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 6eff4e05eccb..8c15a7d336a0 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -483,6 +483,8 @@ struct hl_info_args {
 #define HL_CB_OP_CREATE		0
 /* Opcode to destroy previously created command buffer */
 #define HL_CB_OP_DESTROY	1
+/* Opcode to retrieve information about a command buffer */
+#define HL_CB_OP_INFO		2
 
 /* 2MB minus 32 bytes for 2xMSG_PROT */
 #define HL_MAX_CB_SIZE		(0x200000 - 32)
@@ -506,8 +508,17 @@ struct hl_cb_in {
 };
 
 struct hl_cb_out {
-	/* Handle of CB */
-	__u64 cb_handle;
+	union {
+		/* Handle of CB */
+		__u64 cb_handle;
+
+		/* Information about CB */
+		struct {
+			/* Usage count of CB */
+			__u32 usage_cnt;
+			__u32 pad;
+		};
+	};
 };
 
 union hl_cb_args {
-- 
cgit v1.2.3


From 7fd3253a7de6a317a0683f83739479fb880bffc8 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Mon, 30 Nov 2020 19:51:56 +0100
Subject: net: Introduce preferred busy-polling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The existing busy-polling mode, enabled by the SO_BUSY_POLL socket
option or system-wide using the /proc/sys/net/core/busy_read knob, is
an opportunistic. That means that if the NAPI context is not
scheduled, it will poll it. If, after busy-polling, the budget is
exceeded the busy-polling logic will schedule the NAPI onto the
regular softirq handling.

One implication of the behavior above is that a busy/heavy loaded NAPI
context will never enter/allow for busy-polling. Some applications
prefer that most NAPI processing would be done by busy-polling.

This series adds a new socket option, SO_PREFER_BUSY_POLL, that works
in concert with the napi_defer_hard_irqs and gro_flush_timeout
knobs. The napi_defer_hard_irqs and gro_flush_timeout knobs were
introduced in commit 6f8b12d661d0 ("net: napi: add hard irqs deferral
feature"), and allows for a user to defer interrupts to be enabled and
instead schedule the NAPI context from a watchdog timer. When a user
enables the SO_PREFER_BUSY_POLL, again with the other knobs enabled,
and the NAPI context is being processed by a softirq, the softirq NAPI
processing will exit early to allow the busy-polling to be performed.

If the application stops performing busy-polling via a system call,
the watchdog timer defined by gro_flush_timeout will timeout, and
regular softirq handling will resume.

In summary; Heavy traffic applications that prefer busy-polling over
softirq processing should use this option.

Example usage:

  $ echo 2 | sudo tee /sys/class/net/ens785f1/napi_defer_hard_irqs
  $ echo 200000 | sudo tee /sys/class/net/ens785f1/gro_flush_timeout

Note that the timeout should be larger than the userspace processing
window, otherwise the watchdog will timeout and fall back to regular
softirq processing.

Enable the SO_BUSY_POLL/SO_PREFER_BUSY_POLL options on your socket.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/bpf/20201130185205.196029-2-bjorn.topel@gmail.com
---
 arch/alpha/include/uapi/asm/socket.h  |  2 +
 arch/mips/include/uapi/asm/socket.h   |  2 +
 arch/parisc/include/uapi/asm/socket.h |  2 +
 arch/sparc/include/uapi/asm/socket.h  |  2 +
 fs/eventpoll.c                        |  2 +-
 include/linux/netdevice.h             | 35 +++++++++-------
 include/net/busy_poll.h               |  5 ++-
 include/net/sock.h                    |  4 ++
 include/uapi/asm-generic/socket.h     |  2 +
 net/core/dev.c                        | 78 ++++++++++++++++++++++++++++-------
 net/core/sock.c                       |  9 ++++
 11 files changed, 111 insertions(+), 32 deletions(-)

(limited to 'include/uapi')

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index de6c4df61082..538359642554 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -124,6 +124,8 @@
 
 #define SO_DETACH_REUSEPORT_BPF 68
 
+#define SO_PREFER_BUSY_POLL	69
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index d0a9ed2ca2d6..e406e73b5e6e 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -135,6 +135,8 @@
 
 #define SO_DETACH_REUSEPORT_BPF 68
 
+#define SO_PREFER_BUSY_POLL	69
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 10173c32195e..1bc46200889d 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -116,6 +116,8 @@
 
 #define SO_DETACH_REUSEPORT_BPF 0x4042
 
+#define SO_PREFER_BUSY_POLL	0x4043
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 8029b681fc7c..99688cf673a4 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -117,6 +117,8 @@
 
 #define SO_DETACH_REUSEPORT_BPF  0x0047
 
+#define SO_PREFER_BUSY_POLL	 0x0048
+
 #if !defined(__KERNEL__)
 
 
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 4df61129566d..e11fab3a0b9e 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -397,7 +397,7 @@ static void ep_busy_loop(struct eventpoll *ep, int nonblock)
 	unsigned int napi_id = READ_ONCE(ep->napi_id);
 
 	if ((napi_id >= MIN_NAPI_ID) && net_busy_loop_on())
-		napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end, ep);
+		napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end, ep, false);
 }
 
 static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7ce648a564f7..52d1cc2bd8a7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -350,23 +350,25 @@ struct napi_struct {
 };
 
 enum {
-	NAPI_STATE_SCHED,	/* Poll is scheduled */
-	NAPI_STATE_MISSED,	/* reschedule a napi */
-	NAPI_STATE_DISABLE,	/* Disable pending */
-	NAPI_STATE_NPSVC,	/* Netpoll - don't dequeue from poll_list */
-	NAPI_STATE_LISTED,	/* NAPI added to system lists */
-	NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
-	NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
+	NAPI_STATE_SCHED,		/* Poll is scheduled */
+	NAPI_STATE_MISSED,		/* reschedule a napi */
+	NAPI_STATE_DISABLE,		/* Disable pending */
+	NAPI_STATE_NPSVC,		/* Netpoll - don't dequeue from poll_list */
+	NAPI_STATE_LISTED,		/* NAPI added to system lists */
+	NAPI_STATE_NO_BUSY_POLL,	/* Do not add in napi_hash, no busy polling */
+	NAPI_STATE_IN_BUSY_POLL,	/* sk_busy_loop() owns this NAPI */
+	NAPI_STATE_PREFER_BUSY_POLL,	/* prefer busy-polling over softirq processing*/
 };
 
 enum {
-	NAPIF_STATE_SCHED	 = BIT(NAPI_STATE_SCHED),
-	NAPIF_STATE_MISSED	 = BIT(NAPI_STATE_MISSED),
-	NAPIF_STATE_DISABLE	 = BIT(NAPI_STATE_DISABLE),
-	NAPIF_STATE_NPSVC	 = BIT(NAPI_STATE_NPSVC),
-	NAPIF_STATE_LISTED	 = BIT(NAPI_STATE_LISTED),
-	NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
-	NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
+	NAPIF_STATE_SCHED		= BIT(NAPI_STATE_SCHED),
+	NAPIF_STATE_MISSED		= BIT(NAPI_STATE_MISSED),
+	NAPIF_STATE_DISABLE		= BIT(NAPI_STATE_DISABLE),
+	NAPIF_STATE_NPSVC		= BIT(NAPI_STATE_NPSVC),
+	NAPIF_STATE_LISTED		= BIT(NAPI_STATE_LISTED),
+	NAPIF_STATE_NO_BUSY_POLL	= BIT(NAPI_STATE_NO_BUSY_POLL),
+	NAPIF_STATE_IN_BUSY_POLL	= BIT(NAPI_STATE_IN_BUSY_POLL),
+	NAPIF_STATE_PREFER_BUSY_POLL	= BIT(NAPI_STATE_PREFER_BUSY_POLL),
 };
 
 enum gro_result {
@@ -437,6 +439,11 @@ static inline bool napi_disable_pending(struct napi_struct *n)
 	return test_bit(NAPI_STATE_DISABLE, &n->state);
 }
 
+static inline bool napi_prefer_busy_poll(struct napi_struct *n)
+{
+	return test_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state);
+}
+
 bool napi_schedule_prep(struct napi_struct *n);
 
 /**
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index b001fa91c14e..0292b8353d7e 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -43,7 +43,7 @@ bool sk_busy_loop_end(void *p, unsigned long start_time);
 
 void napi_busy_loop(unsigned int napi_id,
 		    bool (*loop_end)(void *, unsigned long),
-		    void *loop_end_arg);
+		    void *loop_end_arg, bool prefer_busy_poll);
 
 #else /* CONFIG_NET_RX_BUSY_POLL */
 static inline unsigned long net_busy_loop_on(void)
@@ -105,7 +105,8 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock)
 	unsigned int napi_id = READ_ONCE(sk->sk_napi_id);
 
 	if (napi_id >= MIN_NAPI_ID)
-		napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk);
+		napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk,
+			       READ_ONCE(sk->sk_prefer_busy_poll));
 #endif
 }
 
diff --git a/include/net/sock.h b/include/net/sock.h
index a5c6ae78df77..d49b89b071b6 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -301,6 +301,7 @@ struct bpf_local_storage;
   *	@sk_ack_backlog: current listen backlog
   *	@sk_max_ack_backlog: listen backlog set in listen()
   *	@sk_uid: user id of owner
+  *	@sk_prefer_busy_poll: prefer busypolling over softirq processing
   *	@sk_priority: %SO_PRIORITY setting
   *	@sk_type: socket type (%SOCK_STREAM, etc)
   *	@sk_protocol: which protocol this socket belongs in this network family
@@ -479,6 +480,9 @@ struct sock {
 	u32			sk_ack_backlog;
 	u32			sk_max_ack_backlog;
 	kuid_t			sk_uid;
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	u8			sk_prefer_busy_poll;
+#endif
 	struct pid		*sk_peer_pid;
 	const struct cred	*sk_peer_cred;
 	long			sk_rcvtimeo;
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 77f7c1638eb1..7dd02408b7ce 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -119,6 +119,8 @@
 
 #define SO_DETACH_REUSEPORT_BPF 68
 
+#define SO_PREFER_BUSY_POLL	69
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
diff --git a/net/core/dev.c b/net/core/dev.c
index 60d325bda0d7..6f8d2cffb7c5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6458,7 +6458,8 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
 
 		WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
 
-		new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED);
+		new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED |
+			      NAPIF_STATE_PREFER_BUSY_POLL);
 
 		/* If STATE_MISSED was set, leave STATE_SCHED set,
 		 * because we will call napi->poll() one more time.
@@ -6497,8 +6498,29 @@ static struct napi_struct *napi_by_id(unsigned int napi_id)
 
 #define BUSY_POLL_BUDGET 8
 
-static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
+static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
 {
+	if (!skip_schedule) {
+		gro_normal_list(napi);
+		__napi_schedule(napi);
+		return;
+	}
+
+	if (napi->gro_bitmask) {
+		/* flush too old packets
+		 * If HZ < 1000, flush all packets.
+		 */
+		napi_gro_flush(napi, HZ >= 1000);
+	}
+
+	gro_normal_list(napi);
+	clear_bit(NAPI_STATE_SCHED, &napi->state);
+}
+
+static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool prefer_busy_poll)
+{
+	bool skip_schedule = false;
+	unsigned long timeout;
 	int rc;
 
 	/* Busy polling means there is a high chance device driver hard irq
@@ -6515,6 +6537,15 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
 
 	local_bh_disable();
 
+	if (prefer_busy_poll) {
+		napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs);
+		timeout = READ_ONCE(napi->dev->gro_flush_timeout);
+		if (napi->defer_hard_irqs_count && timeout) {
+			hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED);
+			skip_schedule = true;
+		}
+	}
+
 	/* All we really want here is to re-enable device interrupts.
 	 * Ideally, a new ndo_busy_poll_stop() could avoid another round.
 	 */
@@ -6525,19 +6556,14 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
 	 */
 	trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
 	netpoll_poll_unlock(have_poll_lock);
-	if (rc == BUSY_POLL_BUDGET) {
-		/* As the whole budget was spent, we still own the napi so can
-		 * safely handle the rx_list.
-		 */
-		gro_normal_list(napi);
-		__napi_schedule(napi);
-	}
+	if (rc == BUSY_POLL_BUDGET)
+		__busy_poll_stop(napi, skip_schedule);
 	local_bh_enable();
 }
 
 void napi_busy_loop(unsigned int napi_id,
 		    bool (*loop_end)(void *, unsigned long),
-		    void *loop_end_arg)
+		    void *loop_end_arg, bool prefer_busy_poll)
 {
 	unsigned long start_time = loop_end ? busy_loop_current_time() : 0;
 	int (*napi_poll)(struct napi_struct *napi, int budget);
@@ -6565,12 +6591,18 @@ restart:
 			 * we avoid dirtying napi->state as much as we can.
 			 */
 			if (val & (NAPIF_STATE_DISABLE | NAPIF_STATE_SCHED |
-				   NAPIF_STATE_IN_BUSY_POLL))
+				   NAPIF_STATE_IN_BUSY_POLL)) {
+				if (prefer_busy_poll)
+					set_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
 				goto count;
+			}
 			if (cmpxchg(&napi->state, val,
 				    val | NAPIF_STATE_IN_BUSY_POLL |
-					  NAPIF_STATE_SCHED) != val)
+					  NAPIF_STATE_SCHED) != val) {
+				if (prefer_busy_poll)
+					set_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
 				goto count;
+			}
 			have_poll_lock = netpoll_poll_lock(napi);
 			napi_poll = napi->poll;
 		}
@@ -6588,7 +6620,7 @@ count:
 
 		if (unlikely(need_resched())) {
 			if (napi_poll)
-				busy_poll_stop(napi, have_poll_lock);
+				busy_poll_stop(napi, have_poll_lock, prefer_busy_poll);
 			preempt_enable();
 			rcu_read_unlock();
 			cond_resched();
@@ -6599,7 +6631,7 @@ count:
 		cpu_relax();
 	}
 	if (napi_poll)
-		busy_poll_stop(napi, have_poll_lock);
+		busy_poll_stop(napi, have_poll_lock, prefer_busy_poll);
 	preempt_enable();
 out:
 	rcu_read_unlock();
@@ -6650,8 +6682,10 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
 	 * NAPI_STATE_MISSED, since we do not react to a device IRQ.
 	 */
 	if (!napi_disable_pending(napi) &&
-	    !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
+	    !test_and_set_bit(NAPI_STATE_SCHED, &napi->state)) {
+		clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
 		__napi_schedule_irqoff(napi);
+	}
 
 	return HRTIMER_NORESTART;
 }
@@ -6709,6 +6743,7 @@ void napi_disable(struct napi_struct *n)
 
 	hrtimer_cancel(&n->timer);
 
+	clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state);
 	clear_bit(NAPI_STATE_DISABLE, &n->state);
 }
 EXPORT_SYMBOL(napi_disable);
@@ -6781,6 +6816,19 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
 		goto out_unlock;
 	}
 
+	/* The NAPI context has more processing work, but busy-polling
+	 * is preferred. Exit early.
+	 */
+	if (napi_prefer_busy_poll(n)) {
+		if (napi_complete_done(n, work)) {
+			/* If timeout is not set, we need to make sure
+			 * that the NAPI is re-scheduled.
+			 */
+			napi_schedule(n);
+		}
+		goto out_unlock;
+	}
+
 	if (n->gro_bitmask) {
 		/* flush too old packets
 		 * If HZ < 1000, flush all packets.
diff --git a/net/core/sock.c b/net/core/sock.c
index 727ea1cc633c..e05f2e52b5a8 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1159,6 +1159,12 @@ set_sndbuf:
 				sk->sk_ll_usec = val;
 		}
 		break;
+	case SO_PREFER_BUSY_POLL:
+		if (valbool && !capable(CAP_NET_ADMIN))
+			ret = -EPERM;
+		else
+			WRITE_ONCE(sk->sk_prefer_busy_poll, valbool);
+		break;
 #endif
 
 	case SO_MAX_PACING_RATE:
@@ -1523,6 +1529,9 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 	case SO_BUSY_POLL:
 		v.val = sk->sk_ll_usec;
 		break;
+	case SO_PREFER_BUSY_POLL:
+		v.val = READ_ONCE(sk->sk_prefer_busy_poll);
+		break;
 #endif
 
 	case SO_MAX_PACING_RATE:
-- 
cgit v1.2.3


From 7c951cafc0cb2e575f1d58677b95ac387ac0a5bd Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Mon, 30 Nov 2020 19:51:57 +0100
Subject: net: Add SO_BUSY_POLL_BUDGET socket option
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This option lets a user set a per socket NAPI budget for
busy-polling. If the options is not set, it will use the default of 8.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/bpf/20201130185205.196029-3-bjorn.topel@gmail.com
---
 arch/alpha/include/uapi/asm/socket.h  |  1 +
 arch/mips/include/uapi/asm/socket.h   |  1 +
 arch/parisc/include/uapi/asm/socket.h |  1 +
 arch/sparc/include/uapi/asm/socket.h  |  1 +
 fs/eventpoll.c                        |  3 ++-
 include/net/busy_poll.h               |  7 +++++--
 include/net/sock.h                    |  2 ++
 include/uapi/asm-generic/socket.h     |  1 +
 net/core/dev.c                        | 21 ++++++++++-----------
 net/core/sock.c                       | 10 ++++++++++
 10 files changed, 34 insertions(+), 14 deletions(-)

(limited to 'include/uapi')

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 538359642554..57420356ce4c 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -125,6 +125,7 @@
 #define SO_DETACH_REUSEPORT_BPF 68
 
 #define SO_PREFER_BUSY_POLL	69
+#define SO_BUSY_POLL_BUDGET	70
 
 #if !defined(__KERNEL__)
 
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index e406e73b5e6e..2d949969313b 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -136,6 +136,7 @@
 #define SO_DETACH_REUSEPORT_BPF 68
 
 #define SO_PREFER_BUSY_POLL	69
+#define SO_BUSY_POLL_BUDGET	70
 
 #if !defined(__KERNEL__)
 
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 1bc46200889d..f60904329bbc 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -117,6 +117,7 @@
 #define SO_DETACH_REUSEPORT_BPF 0x4042
 
 #define SO_PREFER_BUSY_POLL	0x4043
+#define SO_BUSY_POLL_BUDGET	0x4044
 
 #if !defined(__KERNEL__)
 
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 99688cf673a4..848a22fbac20 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -118,6 +118,7 @@
 #define SO_DETACH_REUSEPORT_BPF  0x0047
 
 #define SO_PREFER_BUSY_POLL	 0x0048
+#define SO_BUSY_POLL_BUDGET	 0x0049
 
 #if !defined(__KERNEL__)
 
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index e11fab3a0b9e..73c346e503d7 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -397,7 +397,8 @@ static void ep_busy_loop(struct eventpoll *ep, int nonblock)
 	unsigned int napi_id = READ_ONCE(ep->napi_id);
 
 	if ((napi_id >= MIN_NAPI_ID) && net_busy_loop_on())
-		napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end, ep, false);
+		napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end, ep, false,
+			       BUSY_POLL_BUDGET);
 }
 
 static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep)
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 0292b8353d7e..2f8f51807b83 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -23,6 +23,8 @@
  */
 #define MIN_NAPI_ID ((unsigned int)(NR_CPUS + 1))
 
+#define BUSY_POLL_BUDGET 8
+
 #ifdef CONFIG_NET_RX_BUSY_POLL
 
 struct napi_struct;
@@ -43,7 +45,7 @@ bool sk_busy_loop_end(void *p, unsigned long start_time);
 
 void napi_busy_loop(unsigned int napi_id,
 		    bool (*loop_end)(void *, unsigned long),
-		    void *loop_end_arg, bool prefer_busy_poll);
+		    void *loop_end_arg, bool prefer_busy_poll, u16 budget);
 
 #else /* CONFIG_NET_RX_BUSY_POLL */
 static inline unsigned long net_busy_loop_on(void)
@@ -106,7 +108,8 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock)
 
 	if (napi_id >= MIN_NAPI_ID)
 		napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk,
-			       READ_ONCE(sk->sk_prefer_busy_poll));
+			       READ_ONCE(sk->sk_prefer_busy_poll),
+			       READ_ONCE(sk->sk_busy_poll_budget) ?: BUSY_POLL_BUDGET);
 #endif
 }
 
diff --git a/include/net/sock.h b/include/net/sock.h
index d49b89b071b6..77ba2c2737db 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -302,6 +302,7 @@ struct bpf_local_storage;
   *	@sk_max_ack_backlog: listen backlog set in listen()
   *	@sk_uid: user id of owner
   *	@sk_prefer_busy_poll: prefer busypolling over softirq processing
+  *	@sk_busy_poll_budget: napi processing budget when busypolling
   *	@sk_priority: %SO_PRIORITY setting
   *	@sk_type: socket type (%SOCK_STREAM, etc)
   *	@sk_protocol: which protocol this socket belongs in this network family
@@ -482,6 +483,7 @@ struct sock {
 	kuid_t			sk_uid;
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	u8			sk_prefer_busy_poll;
+	u16			sk_busy_poll_budget;
 #endif
 	struct pid		*sk_peer_pid;
 	const struct cred	*sk_peer_cred;
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 7dd02408b7ce..4dcd13d097a9 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -120,6 +120,7 @@
 #define SO_DETACH_REUSEPORT_BPF 68
 
 #define SO_PREFER_BUSY_POLL	69
+#define SO_BUSY_POLL_BUDGET	70
 
 #if !defined(__KERNEL__)
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 6f8d2cffb7c5..7a1e5936c67f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6496,8 +6496,6 @@ static struct napi_struct *napi_by_id(unsigned int napi_id)
 
 #if defined(CONFIG_NET_RX_BUSY_POLL)
 
-#define BUSY_POLL_BUDGET 8
-
 static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
 {
 	if (!skip_schedule) {
@@ -6517,7 +6515,8 @@ static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
 	clear_bit(NAPI_STATE_SCHED, &napi->state);
 }
 
-static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool prefer_busy_poll)
+static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool prefer_busy_poll,
+			   u16 budget)
 {
 	bool skip_schedule = false;
 	unsigned long timeout;
@@ -6549,21 +6548,21 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool
 	/* All we really want here is to re-enable device interrupts.
 	 * Ideally, a new ndo_busy_poll_stop() could avoid another round.
 	 */
-	rc = napi->poll(napi, BUSY_POLL_BUDGET);
+	rc = napi->poll(napi, budget);
 	/* We can't gro_normal_list() here, because napi->poll() might have
 	 * rearmed the napi (napi_complete_done()) in which case it could
 	 * already be running on another CPU.
 	 */
-	trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
+	trace_napi_poll(napi, rc, budget);
 	netpoll_poll_unlock(have_poll_lock);
-	if (rc == BUSY_POLL_BUDGET)
+	if (rc == budget)
 		__busy_poll_stop(napi, skip_schedule);
 	local_bh_enable();
 }
 
 void napi_busy_loop(unsigned int napi_id,
 		    bool (*loop_end)(void *, unsigned long),
-		    void *loop_end_arg, bool prefer_busy_poll)
+		    void *loop_end_arg, bool prefer_busy_poll, u16 budget)
 {
 	unsigned long start_time = loop_end ? busy_loop_current_time() : 0;
 	int (*napi_poll)(struct napi_struct *napi, int budget);
@@ -6606,8 +6605,8 @@ restart:
 			have_poll_lock = netpoll_poll_lock(napi);
 			napi_poll = napi->poll;
 		}
-		work = napi_poll(napi, BUSY_POLL_BUDGET);
-		trace_napi_poll(napi, work, BUSY_POLL_BUDGET);
+		work = napi_poll(napi, budget);
+		trace_napi_poll(napi, work, budget);
 		gro_normal_list(napi);
 count:
 		if (work > 0)
@@ -6620,7 +6619,7 @@ count:
 
 		if (unlikely(need_resched())) {
 			if (napi_poll)
-				busy_poll_stop(napi, have_poll_lock, prefer_busy_poll);
+				busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
 			preempt_enable();
 			rcu_read_unlock();
 			cond_resched();
@@ -6631,7 +6630,7 @@ count:
 		cpu_relax();
 	}
 	if (napi_poll)
-		busy_poll_stop(napi, have_poll_lock, prefer_busy_poll);
+		busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
 	preempt_enable();
 out:
 	rcu_read_unlock();
diff --git a/net/core/sock.c b/net/core/sock.c
index e05f2e52b5a8..d422a6808405 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1165,6 +1165,16 @@ set_sndbuf:
 		else
 			WRITE_ONCE(sk->sk_prefer_busy_poll, valbool);
 		break;
+	case SO_BUSY_POLL_BUDGET:
+		if (val > READ_ONCE(sk->sk_busy_poll_budget) && !capable(CAP_NET_ADMIN)) {
+			ret = -EPERM;
+		} else {
+			if (val < 0 || val > U16_MAX)
+				ret = -EINVAL;
+			else
+				WRITE_ONCE(sk->sk_busy_poll_budget, val);
+		}
+		break;
 #endif
 
 	case SO_MAX_PACING_RATE:
-- 
cgit v1.2.3


From e8372d9d21451a2f2947c2b63b5184f3d4d0bff9 Mon Sep 17 00:00:00 2001
From: Guvenc Gulce <guvenc@linux.ibm.com>
Date: Tue, 1 Dec 2020 20:20:43 +0100
Subject: net/smc: Introduce generic netlink interface for diagnostic purposes

Introduce generic netlink interface infrastructure to expose
the diagnostic information regarding smc linkgroups, links and devices.

Signed-off-by: Guvenc Gulce <guvenc@linux.ibm.com>
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/smc.h | 11 ++++++++++
 net/smc/Makefile         |  2 +-
 net/smc/af_smc.c         | 10 ++++++++-
 net/smc/smc_netlink.c    | 53 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/smc/smc_netlink.h    | 23 +++++++++++++++++++++
 5 files changed, 97 insertions(+), 2 deletions(-)
 create mode 100644 net/smc/smc_netlink.c
 create mode 100644 net/smc/smc_netlink.h

(limited to 'include/uapi')

diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h
index 0e11ca421ca4..b604d64542e8 100644
--- a/include/uapi/linux/smc.h
+++ b/include/uapi/linux/smc.h
@@ -33,4 +33,15 @@ enum {				/* SMC PNET Table commands */
 #define SMCR_GENL_FAMILY_NAME		"SMC_PNETID"
 #define SMCR_GENL_FAMILY_VERSION	1
 
+/* gennetlink interface to access non-socket information from SMC module */
+#define SMC_GENL_FAMILY_NAME		"SMC_GEN_NETLINK"
+#define SMC_GENL_FAMILY_VERSION		1
+
+/* SMC_GENL_FAMILY top level attributes */
+enum {
+	SMC_GEN_UNSPEC,
+	__SMC_GEN_MAX,
+	SMC_GEN_MAX = __SMC_GEN_MAX - 1
+};
+
 #endif /* _UAPI_LINUX_SMC_H */
diff --git a/net/smc/Makefile b/net/smc/Makefile
index cb1254541f37..77e54fe42b1c 100644
--- a/net/smc/Makefile
+++ b/net/smc/Makefile
@@ -2,4 +2,4 @@
 obj-$(CONFIG_SMC)	+= smc.o
 obj-$(CONFIG_SMC_DIAG)	+= smc_diag.o
 smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
-smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o
+smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index f79b59a972f0..47340b3b514f 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -45,6 +45,7 @@
 #include "smc_ib.h"
 #include "smc_ism.h"
 #include "smc_pnet.h"
+#include "smc_netlink.h"
 #include "smc_tx.h"
 #include "smc_rx.h"
 #include "smc_close.h"
@@ -2495,10 +2496,14 @@ static int __init smc_init(void)
 	smc_ism_init();
 	smc_clc_init();
 
-	rc = smc_pnet_init();
+	rc = smc_nl_init();
 	if (rc)
 		goto out_pernet_subsys;
 
+	rc = smc_pnet_init();
+	if (rc)
+		goto out_nl;
+
 	rc = -ENOMEM;
 	smc_hs_wq = alloc_workqueue("smc_hs_wq", 0, 0);
 	if (!smc_hs_wq)
@@ -2569,6 +2574,8 @@ out_alloc_hs_wq:
 	destroy_workqueue(smc_hs_wq);
 out_pnet:
 	smc_pnet_exit();
+out_nl:
+	smc_nl_exit();
 out_pernet_subsys:
 	unregister_pernet_subsys(&smc_net_ops);
 
@@ -2586,6 +2593,7 @@ static void __exit smc_exit(void)
 	proto_unregister(&smc_proto6);
 	proto_unregister(&smc_proto);
 	smc_pnet_exit();
+	smc_nl_exit();
 	unregister_pernet_subsys(&smc_net_ops);
 	rcu_barrier();
 }
diff --git a/net/smc/smc_netlink.c b/net/smc/smc_netlink.c
new file mode 100644
index 000000000000..4f964d03b372
--- /dev/null
+++ b/net/smc/smc_netlink.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ *  Generic netlink support functions to interact with SMC module
+ *
+ *  Copyright IBM Corp. 2020
+ *
+ *  Author(s):	Guvenc Gulce <guvenc@linux.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/ctype.h>
+#include <linux/mutex.h>
+#include <linux/if.h>
+#include <linux/smc.h>
+
+#include "smc_core.h"
+#include "smc_netlink.h"
+
+#define SMC_CMD_MAX_ATTR 1
+
+/* SMC_GENL generic netlink operation definition */
+static const struct genl_ops smc_gen_nl_ops[] = {
+};
+
+static const struct nla_policy smc_gen_nl_policy[2] = {
+	[SMC_CMD_MAX_ATTR]	= { .type = NLA_REJECT, },
+};
+
+/* SMC_GENL family definition */
+struct genl_family smc_gen_nl_family __ro_after_init = {
+	.hdrsize =	0,
+	.name =		SMC_GENL_FAMILY_NAME,
+	.version =	SMC_GENL_FAMILY_VERSION,
+	.maxattr =	SMC_CMD_MAX_ATTR,
+	.policy =	smc_gen_nl_policy,
+	.netnsok =	true,
+	.module =	THIS_MODULE,
+	.ops =		smc_gen_nl_ops,
+	.n_ops =	ARRAY_SIZE(smc_gen_nl_ops)
+};
+
+int __init smc_nl_init(void)
+{
+	return genl_register_family(&smc_gen_nl_family);
+}
+
+void smc_nl_exit(void)
+{
+	genl_unregister_family(&smc_gen_nl_family);
+}
diff --git a/net/smc/smc_netlink.h b/net/smc/smc_netlink.h
new file mode 100644
index 000000000000..0c757232c0d0
--- /dev/null
+++ b/net/smc/smc_netlink.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ *  SMC Generic netlink operations
+ *
+ *  Copyright IBM Corp. 2020
+ *
+ *  Author(s):	Guvenc Gulce <guvenc@linux.ibm.com>
+ */
+
+#ifndef _SMC_NETLINK_H
+#define _SMC_NETLINK_H
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+extern struct genl_family smc_gen_nl_family;
+
+int smc_nl_init(void) __init;
+void smc_nl_exit(void);
+
+#endif
-- 
cgit v1.2.3


From 099b990bd11a3a96b5d59973f482018e5cbde6c3 Mon Sep 17 00:00:00 2001
From: Guvenc Gulce <guvenc@linux.ibm.com>
Date: Tue, 1 Dec 2020 20:20:44 +0100
Subject: net/smc: Add support for obtaining system information

Add new netlink command to obtain system information
of the smc module.

Signed-off-by: Guvenc Gulce <guvenc@linux.ibm.com>
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/smc.h | 18 +++++++++++++++
 net/smc/smc_clc.c        |  5 ++++
 net/smc/smc_clc.h        |  1 +
 net/smc/smc_core.c       | 60 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/smc/smc_core.h       |  2 ++
 net/smc/smc_netlink.c    |  5 ++++
 net/smc/smc_netlink.h    |  9 ++++++++
 7 files changed, 100 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h
index b604d64542e8..1b8d4e770be9 100644
--- a/include/uapi/linux/smc.h
+++ b/include/uapi/linux/smc.h
@@ -37,11 +37,29 @@ enum {				/* SMC PNET Table commands */
 #define SMC_GENL_FAMILY_NAME		"SMC_GEN_NETLINK"
 #define SMC_GENL_FAMILY_VERSION		1
 
+/* SMC_GENL_FAMILY commands */
+enum {
+	SMC_NETLINK_GET_SYS_INFO = 1,
+};
+
 /* SMC_GENL_FAMILY top level attributes */
 enum {
 	SMC_GEN_UNSPEC,
+	SMC_GEN_SYS_INFO,		/* nest */
 	__SMC_GEN_MAX,
 	SMC_GEN_MAX = __SMC_GEN_MAX - 1
 };
 
+/* SMC_GEN_SYS_INFO attributes */
+enum {
+	SMC_NLA_SYS_UNSPEC,
+	SMC_NLA_SYS_VER,		/* u8 */
+	SMC_NLA_SYS_REL,		/* u8 */
+	SMC_NLA_SYS_IS_ISM_V2,		/* u8 */
+	SMC_NLA_SYS_LOCAL_HOST,		/* string */
+	SMC_NLA_SYS_SEID,		/* string */
+	__SMC_NLA_SYS_MAX,
+	SMC_NLA_SYS_MAX = __SMC_NLA_SYS_MAX - 1
+};
+
 #endif /* _UAPI_LINUX_SMC_H */
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 696d89c2dce4..e286dafd6e88 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -772,6 +772,11 @@ int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact,
 	return len > 0 ? 0 : len;
 }
 
+void smc_clc_get_hostname(u8 **host)
+{
+	*host = &smc_hostname[0];
+}
+
 void __init smc_clc_init(void)
 {
 	struct new_utsname *u;
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 49752c997c51..32d37f7b70f2 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -334,5 +334,6 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
 int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact,
 			u8 version);
 void smc_clc_init(void) __init;
+void smc_clc_get_hostname(u8 **host);
 
 #endif
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 0088511e30bf..c7b1c62c2f2e 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -16,6 +16,8 @@
 #include <linux/wait.h>
 #include <linux/reboot.h>
 #include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/smc.h>
 #include <net/tcp.h>
 #include <net/sock.h>
 #include <rdma/ib_verbs.h>
@@ -30,6 +32,7 @@
 #include "smc_cdc.h"
 #include "smc_close.h"
 #include "smc_ism.h"
+#include "smc_netlink.h"
 
 #define SMC_LGR_NUM_INCR		256
 #define SMC_LGR_FREE_DELAY_SERV		(600 * HZ)
@@ -214,6 +217,63 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
 	conn->lgr = NULL;
 }
 
+int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
+	char hostname[SMC_MAX_HOSTNAME_LEN + 1];
+	char smc_seid[SMC_MAX_EID_LEN + 1];
+	struct smcd_dev *smcd_dev;
+	struct nlattr *attrs;
+	u8 *seid = NULL;
+	u8 *host = NULL;
+	void *nlh;
+
+	nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+			  &smc_gen_nl_family, NLM_F_MULTI,
+			  SMC_NETLINK_GET_SYS_INFO);
+	if (!nlh)
+		goto errmsg;
+	if (cb_ctx->pos[0])
+		goto errout;
+	attrs = nla_nest_start(skb, SMC_GEN_SYS_INFO);
+	if (!attrs)
+		goto errout;
+	if (nla_put_u8(skb, SMC_NLA_SYS_VER, SMC_V2))
+		goto errattr;
+	if (nla_put_u8(skb, SMC_NLA_SYS_REL, SMC_RELEASE))
+		goto errattr;
+	if (nla_put_u8(skb, SMC_NLA_SYS_IS_ISM_V2, smc_ism_is_v2_capable()))
+		goto errattr;
+	smc_clc_get_hostname(&host);
+	if (host) {
+		snprintf(hostname, sizeof(hostname), "%s", host);
+		if (nla_put_string(skb, SMC_NLA_SYS_LOCAL_HOST, hostname))
+			goto errattr;
+	}
+	mutex_lock(&smcd_dev_list.mutex);
+	smcd_dev = list_first_entry_or_null(&smcd_dev_list.list,
+					    struct smcd_dev, list);
+	if (smcd_dev)
+		smc_ism_get_system_eid(smcd_dev, &seid);
+	mutex_unlock(&smcd_dev_list.mutex);
+	if (seid && smc_ism_is_v2_capable()) {
+		snprintf(smc_seid, sizeof(smc_seid), "%s", seid);
+		if (nla_put_string(skb, SMC_NLA_SYS_SEID, smc_seid))
+			goto errattr;
+	}
+	nla_nest_end(skb, attrs);
+	genlmsg_end(skb, nlh);
+	cb_ctx->pos[0] = 1;
+	return skb->len;
+
+errattr:
+	nla_nest_cancel(skb, attrs);
+errout:
+	genlmsg_cancel(skb, nlh);
+errmsg:
+	return skb->len;
+}
+
 void smc_lgr_cleanup_early(struct smc_connection *conn)
 {
 	struct smc_link_group *lgr = conn->lgr;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 3a1bb8e4b81f..eaed25d4e76b 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -14,6 +14,7 @@
 
 #include <linux/atomic.h>
 #include <rdma/ib_verbs.h>
+#include <net/genetlink.h>
 
 #include "smc.h"
 #include "smc_ib.h"
@@ -413,6 +414,7 @@ struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
 				  struct smc_link *from_lnk, bool is_dev_err);
 void smcr_link_down_cond(struct smc_link *lnk);
 void smcr_link_down_cond_sched(struct smc_link *lnk);
+int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb);
 
 static inline struct smc_link_group *smc_get_lgr(struct smc_link *link)
 {
diff --git a/net/smc/smc_netlink.c b/net/smc/smc_netlink.c
index 4f964d03b372..ce06d269a54b 100644
--- a/net/smc/smc_netlink.c
+++ b/net/smc/smc_netlink.c
@@ -23,6 +23,11 @@
 
 /* SMC_GENL generic netlink operation definition */
 static const struct genl_ops smc_gen_nl_ops[] = {
+	{
+		.cmd = SMC_NETLINK_GET_SYS_INFO,
+		/* can be retrieved by unprivileged users */
+		.dumpit = smc_nl_get_sys_info,
+	},
 };
 
 static const struct nla_policy smc_gen_nl_policy[2] = {
diff --git a/net/smc/smc_netlink.h b/net/smc/smc_netlink.h
index 0c757232c0d0..3477265cba6c 100644
--- a/net/smc/smc_netlink.h
+++ b/net/smc/smc_netlink.h
@@ -17,6 +17,15 @@
 
 extern struct genl_family smc_gen_nl_family;
 
+struct smc_nl_dmp_ctx {
+	int pos[2];
+};
+
+static inline struct smc_nl_dmp_ctx *smc_nl_dmp_ctx(struct netlink_callback *c)
+{
+	return (struct smc_nl_dmp_ctx *)c->ctx;
+}
+
 int smc_nl_init(void) __init;
 void smc_nl_exit(void);
 
-- 
cgit v1.2.3


From e9b8c845cb342a3ab3d92235a54d0d1ad06d7204 Mon Sep 17 00:00:00 2001
From: Guvenc Gulce <guvenc@linux.ibm.com>
Date: Tue, 1 Dec 2020 20:20:45 +0100
Subject: net/smc: Introduce SMCR get linkgroup command

Introduce get linkgroup command which loops through
all available SMCR linkgroups. It uses the SMC-R linkgroup
list as entry point, not the socket list, which makes
linkgroup diagnosis possible, in case linkgroup does not
contain active connections anymore.

Signed-off-by: Guvenc Gulce <guvenc@linux.ibm.com>
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/smc.h | 15 +++++++++
 net/smc/smc_core.c       | 85 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/smc/smc_core.h       |  1 +
 net/smc/smc_netlink.c    |  5 +++
 4 files changed, 106 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h
index 1b8d4e770be9..3ae8ca4e5256 100644
--- a/include/uapi/linux/smc.h
+++ b/include/uapi/linux/smc.h
@@ -40,12 +40,14 @@ enum {				/* SMC PNET Table commands */
 /* SMC_GENL_FAMILY commands */
 enum {
 	SMC_NETLINK_GET_SYS_INFO = 1,
+	SMC_NETLINK_GET_LGR_SMCR,
 };
 
 /* SMC_GENL_FAMILY top level attributes */
 enum {
 	SMC_GEN_UNSPEC,
 	SMC_GEN_SYS_INFO,		/* nest */
+	SMC_GEN_LGR_SMCR,		/* nest */
 	__SMC_GEN_MAX,
 	SMC_GEN_MAX = __SMC_GEN_MAX - 1
 };
@@ -62,4 +64,17 @@ enum {
 	SMC_NLA_SYS_MAX = __SMC_NLA_SYS_MAX - 1
 };
 
+/* SMC_GEN_LGR_SMCR attributes */
+enum {
+	SMC_NLA_LGR_R_UNSPEC,
+	SMC_NLA_LGR_R_ID,		/* u32 */
+	SMC_NLA_LGR_R_ROLE,		/* u8 */
+	SMC_NLA_LGR_R_TYPE,		/* u8 */
+	SMC_NLA_LGR_R_PNETID,		/* string */
+	SMC_NLA_LGR_R_VLAN_ID,		/* u8 */
+	SMC_NLA_LGR_R_CONNS_NUM,	/* u32 */
+	__SMC_NLA_LGR_R_MAX,
+	SMC_NLA_LGR_R_MAX = __SMC_NLA_LGR_R_MAX - 1
+};
+
 #endif /* _UAPI_LINUX_SMC_H */
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index c7b1c62c2f2e..e21d068191ad 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -274,6 +274,91 @@ errmsg:
 	return skb->len;
 }
 
+static int smc_nl_fill_lgr(struct smc_link_group *lgr,
+			   struct sk_buff *skb,
+			   struct netlink_callback *cb)
+{
+	char smc_target[SMC_MAX_PNETID_LEN + 1];
+	struct nlattr *attrs;
+
+	attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCR);
+	if (!attrs)
+		goto errout;
+
+	if (nla_put_u32(skb, SMC_NLA_LGR_R_ID, *((u32 *)&lgr->id)))
+		goto errattr;
+	if (nla_put_u32(skb, SMC_NLA_LGR_R_CONNS_NUM, lgr->conns_num))
+		goto errattr;
+	if (nla_put_u8(skb, SMC_NLA_LGR_R_ROLE, lgr->role))
+		goto errattr;
+	if (nla_put_u8(skb, SMC_NLA_LGR_R_TYPE, lgr->type))
+		goto errattr;
+	if (nla_put_u8(skb, SMC_NLA_LGR_R_VLAN_ID, lgr->vlan_id))
+		goto errattr;
+	snprintf(smc_target, sizeof(smc_target), "%s", lgr->pnet_id);
+	if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target))
+		goto errattr;
+
+	nla_nest_end(skb, attrs);
+	return 0;
+errattr:
+	nla_nest_cancel(skb, attrs);
+errout:
+	return -EMSGSIZE;
+}
+
+static int smc_nl_handle_lgr(struct smc_link_group *lgr,
+			     struct sk_buff *skb,
+			     struct netlink_callback *cb)
+{
+	void *nlh;
+
+	nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+			  &smc_gen_nl_family, NLM_F_MULTI,
+			  SMC_NETLINK_GET_LGR_SMCR);
+	if (!nlh)
+		goto errmsg;
+	if (smc_nl_fill_lgr(lgr, skb, cb))
+		goto errout;
+
+	genlmsg_end(skb, nlh);
+	return 0;
+
+errout:
+	genlmsg_cancel(skb, nlh);
+errmsg:
+	return -EMSGSIZE;
+}
+
+static void smc_nl_fill_lgr_list(struct smc_lgr_list *smc_lgr,
+				 struct sk_buff *skb,
+				 struct netlink_callback *cb)
+{
+	struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
+	struct smc_link_group *lgr;
+	int snum = cb_ctx->pos[0];
+	int num = 0;
+
+	spin_lock_bh(&smc_lgr->lock);
+	list_for_each_entry(lgr, &smc_lgr->list, list) {
+		if (num < snum)
+			goto next;
+		if (smc_nl_handle_lgr(lgr, skb, cb))
+			goto errout;
+next:
+		num++;
+	}
+errout:
+	spin_unlock_bh(&smc_lgr->lock);
+	cb_ctx->pos[0] = num;
+}
+
+int smcr_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	smc_nl_fill_lgr_list(&smc_lgr_list, skb, cb);
+	return skb->len;
+}
+
 void smc_lgr_cleanup_early(struct smc_connection *conn)
 {
 	struct smc_link_group *lgr = conn->lgr;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index eaed25d4e76b..662315beb605 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -415,6 +415,7 @@ struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
 void smcr_link_down_cond(struct smc_link *lnk);
 void smcr_link_down_cond_sched(struct smc_link *lnk);
 int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb);
+int smcr_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb);
 
 static inline struct smc_link_group *smc_get_lgr(struct smc_link *link)
 {
diff --git a/net/smc/smc_netlink.c b/net/smc/smc_netlink.c
index ce06d269a54b..490da56c8d3c 100644
--- a/net/smc/smc_netlink.c
+++ b/net/smc/smc_netlink.c
@@ -28,6 +28,11 @@ static const struct genl_ops smc_gen_nl_ops[] = {
 		/* can be retrieved by unprivileged users */
 		.dumpit = smc_nl_get_sys_info,
 	},
+	{
+		.cmd = SMC_NETLINK_GET_LGR_SMCR,
+		/* can be retrieved by unprivileged users */
+		.dumpit = smcr_nl_get_lgr,
+	},
 };
 
 static const struct nla_policy smc_gen_nl_policy[2] = {
-- 
cgit v1.2.3


From 5a7e09d58f3fe2f0d5e8f0da4b1f686491245eb5 Mon Sep 17 00:00:00 2001
From: Guvenc Gulce <guvenc@linux.ibm.com>
Date: Tue, 1 Dec 2020 20:20:46 +0100
Subject: net/smc: Introduce SMCR get link command

Introduce get link command which loops through
all available links of all available link groups. It
uses the SMC-R linkgroup list as entry point, not
the socket list, which makes linkgroup diagnosis
possible, in case linkgroup does not contain active
connections anymore.

Signed-off-by: Guvenc Gulce <guvenc@linux.ibm.com>
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/smc.h | 18 ++++++++++
 net/smc/smc_core.c       | 91 +++++++++++++++++++++++++++++++++++++++++++++---
 net/smc/smc_core.h       | 14 ++++++++
 net/smc/smc_diag.c       | 13 -------
 net/smc/smc_netlink.c    |  5 +++
 5 files changed, 124 insertions(+), 17 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h
index 3ae8ca4e5256..ed638dbfff08 100644
--- a/include/uapi/linux/smc.h
+++ b/include/uapi/linux/smc.h
@@ -41,6 +41,7 @@ enum {				/* SMC PNET Table commands */
 enum {
 	SMC_NETLINK_GET_SYS_INFO = 1,
 	SMC_NETLINK_GET_LGR_SMCR,
+	SMC_NETLINK_GET_LINK_SMCR,
 };
 
 /* SMC_GENL_FAMILY top level attributes */
@@ -48,6 +49,7 @@ enum {
 	SMC_GEN_UNSPEC,
 	SMC_GEN_SYS_INFO,		/* nest */
 	SMC_GEN_LGR_SMCR,		/* nest */
+	SMC_GEN_LINK_SMCR,		/* nest */
 	__SMC_GEN_MAX,
 	SMC_GEN_MAX = __SMC_GEN_MAX - 1
 };
@@ -77,4 +79,20 @@ enum {
 	SMC_NLA_LGR_R_MAX = __SMC_NLA_LGR_R_MAX - 1
 };
 
+/* SMC_GEN_LINK_SMCR attributes */
+enum {
+	SMC_NLA_LINK_UNSPEC,
+	SMC_NLA_LINK_ID,		/* u8 */
+	SMC_NLA_LINK_IB_DEV,		/* string */
+	SMC_NLA_LINK_IB_PORT,		/* u8 */
+	SMC_NLA_LINK_GID,		/* string */
+	SMC_NLA_LINK_PEER_GID,		/* string */
+	SMC_NLA_LINK_CONN_CNT,		/* u32 */
+	SMC_NLA_LINK_NET_DEV,		/* u32 */
+	SMC_NLA_LINK_UID,		/* u32 */
+	SMC_NLA_LINK_PEER_UID,		/* u32 */
+	SMC_NLA_LINK_STATE,		/* u32 */
+	__SMC_NLA_LINK_MAX,
+	SMC_NLA_LINK_MAX = __SMC_NLA_LINK_MAX - 1
+};
 #endif /* _UAPI_LINUX_SMC_H */
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index e21d068191ad..5ad4b742dcc1 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -307,11 +307,74 @@ errout:
 	return -EMSGSIZE;
 }
 
+static int smc_nl_fill_lgr_link(struct smc_link_group *lgr,
+				struct smc_link *link,
+				struct sk_buff *skb,
+				struct netlink_callback *cb)
+{
+	char smc_ibname[IB_DEVICE_NAME_MAX + 1];
+	u8 smc_gid_target[41];
+	struct nlattr *attrs;
+	u32 link_uid = 0;
+	void *nlh;
+
+	nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+			  &smc_gen_nl_family, NLM_F_MULTI,
+			  SMC_NETLINK_GET_LINK_SMCR);
+	if (!nlh)
+		goto errmsg;
+
+	attrs = nla_nest_start(skb, SMC_GEN_LINK_SMCR);
+	if (!attrs)
+		goto errout;
+
+	if (nla_put_u8(skb, SMC_NLA_LINK_ID, link->link_id))
+		goto errattr;
+	if (nla_put_u32(skb, SMC_NLA_LINK_STATE, link->state))
+		goto errattr;
+	if (nla_put_u32(skb, SMC_NLA_LINK_CONN_CNT,
+			atomic_read(&link->conn_cnt)))
+		goto errattr;
+	if (nla_put_u8(skb, SMC_NLA_LINK_IB_PORT, link->ibport))
+		goto errattr;
+	if (nla_put_u32(skb, SMC_NLA_LINK_NET_DEV, link->ndev_ifidx))
+		goto errattr;
+	snprintf(smc_ibname, sizeof(smc_ibname), "%s", link->ibname);
+	if (nla_put_string(skb, SMC_NLA_LINK_IB_DEV, smc_ibname))
+		goto errattr;
+	memcpy(&link_uid, link->link_uid, sizeof(link_uid));
+	if (nla_put_u32(skb, SMC_NLA_LINK_UID, link_uid))
+		goto errattr;
+	memcpy(&link_uid, link->peer_link_uid, sizeof(link_uid));
+	if (nla_put_u32(skb, SMC_NLA_LINK_PEER_UID, link_uid))
+		goto errattr;
+	memset(smc_gid_target, 0, sizeof(smc_gid_target));
+	smc_gid_be16_convert(smc_gid_target, link->gid);
+	if (nla_put_string(skb, SMC_NLA_LINK_GID, smc_gid_target))
+		goto errattr;
+	memset(smc_gid_target, 0, sizeof(smc_gid_target));
+	smc_gid_be16_convert(smc_gid_target, link->peer_gid);
+	if (nla_put_string(skb, SMC_NLA_LINK_PEER_GID, smc_gid_target))
+		goto errattr;
+
+	nla_nest_end(skb, attrs);
+	genlmsg_end(skb, nlh);
+	return 0;
+errattr:
+	nla_nest_cancel(skb, attrs);
+errout:
+	genlmsg_cancel(skb, nlh);
+errmsg:
+	return -EMSGSIZE;
+}
+
 static int smc_nl_handle_lgr(struct smc_link_group *lgr,
 			     struct sk_buff *skb,
-			     struct netlink_callback *cb)
+			     struct netlink_callback *cb,
+			     bool list_links)
 {
 	void *nlh;
+	int i;
 
 	nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 			  &smc_gen_nl_family, NLM_F_MULTI,
@@ -322,6 +385,15 @@ static int smc_nl_handle_lgr(struct smc_link_group *lgr,
 		goto errout;
 
 	genlmsg_end(skb, nlh);
+	if (!list_links)
+		goto out;
+	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+		if (!smc_link_usable(&lgr->lnk[i]))
+			continue;
+		if (smc_nl_fill_lgr_link(lgr, &lgr->lnk[i], skb, cb))
+			goto errout;
+	}
+out:
 	return 0;
 
 errout:
@@ -332,7 +404,8 @@ errmsg:
 
 static void smc_nl_fill_lgr_list(struct smc_lgr_list *smc_lgr,
 				 struct sk_buff *skb,
-				 struct netlink_callback *cb)
+				 struct netlink_callback *cb,
+				 bool list_links)
 {
 	struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
 	struct smc_link_group *lgr;
@@ -343,7 +416,7 @@ static void smc_nl_fill_lgr_list(struct smc_lgr_list *smc_lgr,
 	list_for_each_entry(lgr, &smc_lgr->list, list) {
 		if (num < snum)
 			goto next;
-		if (smc_nl_handle_lgr(lgr, skb, cb))
+		if (smc_nl_handle_lgr(lgr, skb, cb, list_links))
 			goto errout;
 next:
 		num++;
@@ -355,7 +428,17 @@ errout:
 
 int smcr_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	smc_nl_fill_lgr_list(&smc_lgr_list, skb, cb);
+	bool list_links = false;
+
+	smc_nl_fill_lgr_list(&smc_lgr_list, skb, cb, list_links);
+	return skb->len;
+}
+
+int smcr_nl_get_link(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	bool list_links = true;
+
+	smc_nl_fill_lgr_list(&smc_lgr_list, skb, cb, list_links);
 	return skb->len;
 }
 
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 662315beb605..7995621f318d 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -367,6 +367,19 @@ static inline bool smc_link_active(struct smc_link *lnk)
 	return lnk->state == SMC_LNK_ACTIVE;
 }
 
+static inline void smc_gid_be16_convert(__u8 *buf, u8 *gid_raw)
+{
+	sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x",
+		be16_to_cpu(((__be16 *)gid_raw)[0]),
+		be16_to_cpu(((__be16 *)gid_raw)[1]),
+		be16_to_cpu(((__be16 *)gid_raw)[2]),
+		be16_to_cpu(((__be16 *)gid_raw)[3]),
+		be16_to_cpu(((__be16 *)gid_raw)[4]),
+		be16_to_cpu(((__be16 *)gid_raw)[5]),
+		be16_to_cpu(((__be16 *)gid_raw)[6]),
+		be16_to_cpu(((__be16 *)gid_raw)[7]));
+}
+
 struct smc_sock;
 struct smc_clc_msg_accept_confirm;
 struct smc_clc_msg_local;
@@ -416,6 +429,7 @@ void smcr_link_down_cond(struct smc_link *lnk);
 void smcr_link_down_cond_sched(struct smc_link *lnk);
 int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb);
 int smcr_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb);
+int smcr_nl_get_link(struct sk_buff *skb, struct netlink_callback *cb);
 
 static inline struct smc_link_group *smc_get_lgr(struct smc_link *link)
 {
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index c2225231f679..c952986a6aca 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -31,19 +31,6 @@ static struct smc_diag_dump_ctx *smc_dump_context(struct netlink_callback *cb)
 	return (struct smc_diag_dump_ctx *)cb->ctx;
 }
 
-static void smc_gid_be16_convert(__u8 *buf, u8 *gid_raw)
-{
-	sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x",
-		be16_to_cpu(((__be16 *)gid_raw)[0]),
-		be16_to_cpu(((__be16 *)gid_raw)[1]),
-		be16_to_cpu(((__be16 *)gid_raw)[2]),
-		be16_to_cpu(((__be16 *)gid_raw)[3]),
-		be16_to_cpu(((__be16 *)gid_raw)[4]),
-		be16_to_cpu(((__be16 *)gid_raw)[5]),
-		be16_to_cpu(((__be16 *)gid_raw)[6]),
-		be16_to_cpu(((__be16 *)gid_raw)[7]));
-}
-
 static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk)
 {
 	struct smc_sock *smc = smc_sk(sk);
diff --git a/net/smc/smc_netlink.c b/net/smc/smc_netlink.c
index 490da56c8d3c..a41f78f488a2 100644
--- a/net/smc/smc_netlink.c
+++ b/net/smc/smc_netlink.c
@@ -33,6 +33,11 @@ static const struct genl_ops smc_gen_nl_ops[] = {
 		/* can be retrieved by unprivileged users */
 		.dumpit = smcr_nl_get_lgr,
 	},
+	{
+		.cmd = SMC_NETLINK_GET_LINK_SMCR,
+		/* can be retrieved by unprivileged users */
+		.dumpit = smcr_nl_get_link,
+	},
 };
 
 static const struct nla_policy smc_gen_nl_policy[2] = {
-- 
cgit v1.2.3


From 8f9dde4bf230f5c54a24c42a989dd9d88ec95695 Mon Sep 17 00:00:00 2001
From: Guvenc Gulce <guvenc@linux.ibm.com>
Date: Tue, 1 Dec 2020 20:20:47 +0100
Subject: net/smc: Add SMC-D Linkgroup diagnostic support

Deliver SMCD Linkgroup information via netlink based
diagnostic interface.

Signed-off-by: Guvenc Gulce <guvenc@linux.ibm.com>
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/smc.h |  27 ++++++++++
 net/smc/smc_core.c       | 130 +++++++++++++++++++++++++++++++++++++++++++++++
 net/smc/smc_core.h       |   1 +
 net/smc/smc_netlink.c    |   5 ++
 4 files changed, 163 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h
index ed638dbfff08..707e8af4f0c8 100644
--- a/include/uapi/linux/smc.h
+++ b/include/uapi/linux/smc.h
@@ -42,6 +42,7 @@ enum {
 	SMC_NETLINK_GET_SYS_INFO = 1,
 	SMC_NETLINK_GET_LGR_SMCR,
 	SMC_NETLINK_GET_LINK_SMCR,
+	SMC_NETLINK_GET_LGR_SMCD,
 };
 
 /* SMC_GENL_FAMILY top level attributes */
@@ -50,6 +51,7 @@ enum {
 	SMC_GEN_SYS_INFO,		/* nest */
 	SMC_GEN_LGR_SMCR,		/* nest */
 	SMC_GEN_LINK_SMCR,		/* nest */
+	SMC_GEN_LGR_SMCD,		/* nest */
 	__SMC_GEN_MAX,
 	SMC_GEN_MAX = __SMC_GEN_MAX - 1
 };
@@ -66,6 +68,15 @@ enum {
 	SMC_NLA_SYS_MAX = __SMC_NLA_SYS_MAX - 1
 };
 
+/* SMC_NLA_LGR_V2 nested attributes */
+enum {
+	SMC_NLA_LGR_V2_VER,		/* u8 */
+	SMC_NLA_LGR_V2_REL,		/* u8 */
+	SMC_NLA_LGR_V2_OS,		/* u8 */
+	SMC_NLA_LGR_V2_NEG_EID,		/* string */
+	SMC_NLA_LGR_V2_PEER_HOST,	/* string */
+};
+
 /* SMC_GEN_LGR_SMCR attributes */
 enum {
 	SMC_NLA_LGR_R_UNSPEC,
@@ -95,4 +106,20 @@ enum {
 	__SMC_NLA_LINK_MAX,
 	SMC_NLA_LINK_MAX = __SMC_NLA_LINK_MAX - 1
 };
+
+/* SMC_GEN_LGR_SMCD attributes */
+enum {
+	SMC_NLA_LGR_D_UNSPEC,
+	SMC_NLA_LGR_D_ID,		/* u32 */
+	SMC_NLA_LGR_D_GID,		/* u64 */
+	SMC_NLA_LGR_D_PEER_GID,		/* u64 */
+	SMC_NLA_LGR_D_VLAN_ID,		/* u8 */
+	SMC_NLA_LGR_D_CONNS_NUM,	/* u32 */
+	SMC_NLA_LGR_D_PNETID,		/* string */
+	SMC_NLA_LGR_D_CHID,		/* u16 */
+	SMC_NLA_LGR_D_PAD,		/* flag */
+	SMC_NLA_LGR_V2,			/* nest */
+	__SMC_NLA_LGR_D_MAX,
+	SMC_NLA_LGR_D_MAX = __SMC_NLA_LGR_D_MAX - 1
+};
 #endif /* _UAPI_LINUX_SMC_H */
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 5ad4b742dcc1..ac2cc593f25f 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -426,6 +426,130 @@ errout:
 	cb_ctx->pos[0] = num;
 }
 
+static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
+				struct sk_buff *skb,
+				struct netlink_callback *cb)
+{
+	char smc_host[SMC_MAX_HOSTNAME_LEN + 1];
+	char smc_pnet[SMC_MAX_PNETID_LEN + 1];
+	char smc_eid[SMC_MAX_EID_LEN + 1];
+	struct nlattr *v2_attrs;
+	struct nlattr *attrs;
+	void *nlh;
+
+	nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+			  &smc_gen_nl_family, NLM_F_MULTI,
+			  SMC_NETLINK_GET_LGR_SMCD);
+	if (!nlh)
+		goto errmsg;
+
+	attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCD);
+	if (!attrs)
+		goto errout;
+
+	if (nla_put_u32(skb, SMC_NLA_LGR_D_ID, *((u32 *)&lgr->id)))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_GID, lgr->smcd->local_gid,
+			      SMC_NLA_LGR_D_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_GID, lgr->peer_gid,
+			      SMC_NLA_LGR_D_PAD))
+		goto errattr;
+	if (nla_put_u8(skb, SMC_NLA_LGR_D_VLAN_ID, lgr->vlan_id))
+		goto errattr;
+	if (nla_put_u32(skb, SMC_NLA_LGR_D_CONNS_NUM, lgr->conns_num))
+		goto errattr;
+	if (nla_put_u32(skb, SMC_NLA_LGR_D_CHID, smc_ism_get_chid(lgr->smcd)))
+		goto errattr;
+	snprintf(smc_pnet, sizeof(smc_pnet), "%s", lgr->smcd->pnetid);
+	if (nla_put_string(skb, SMC_NLA_LGR_D_PNETID, smc_pnet))
+		goto errattr;
+
+	v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_V2);
+	if (!v2_attrs)
+		goto errattr;
+	if (nla_put_u8(skb, SMC_NLA_LGR_V2_VER, lgr->smc_version))
+		goto errv2attr;
+	if (nla_put_u8(skb, SMC_NLA_LGR_V2_REL, lgr->peer_smc_release))
+		goto errv2attr;
+	if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os))
+		goto errv2attr;
+	snprintf(smc_host, sizeof(smc_host), "%s", lgr->peer_hostname);
+	if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host))
+		goto errv2attr;
+	snprintf(smc_eid, sizeof(smc_eid), "%s", lgr->negotiated_eid);
+	if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid))
+		goto errv2attr;
+
+	nla_nest_end(skb, v2_attrs);
+	nla_nest_end(skb, attrs);
+	genlmsg_end(skb, nlh);
+	return 0;
+
+errv2attr:
+	nla_nest_cancel(skb, v2_attrs);
+errattr:
+	nla_nest_cancel(skb, attrs);
+errout:
+	genlmsg_cancel(skb, nlh);
+errmsg:
+	return -EMSGSIZE;
+}
+
+static int smc_nl_handle_smcd_lgr(struct smcd_dev *dev,
+				  struct sk_buff *skb,
+				  struct netlink_callback *cb)
+{
+	struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
+	struct smc_link_group *lgr;
+	int snum = cb_ctx->pos[1];
+	int rc = 0, num = 0;
+
+	spin_lock_bh(&dev->lgr_lock);
+	list_for_each_entry(lgr, &dev->lgr_list, list) {
+		if (!lgr->is_smcd)
+			continue;
+		if (num < snum)
+			goto next;
+		rc = smc_nl_fill_smcd_lgr(lgr, skb, cb);
+		if (rc)
+			goto errout;
+next:
+		num++;
+	}
+errout:
+	spin_unlock_bh(&dev->lgr_lock);
+	cb_ctx->pos[1] = num;
+	return rc;
+}
+
+static int smc_nl_fill_smcd_dev(struct smcd_dev_list *dev_list,
+				struct sk_buff *skb,
+				struct netlink_callback *cb)
+{
+	struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
+	struct smcd_dev *smcd_dev;
+	int snum = cb_ctx->pos[0];
+	int rc = 0, num = 0;
+
+	mutex_lock(&dev_list->mutex);
+	list_for_each_entry(smcd_dev, &dev_list->list, list) {
+		if (list_empty(&smcd_dev->lgr_list))
+			continue;
+		if (num < snum)
+			goto next;
+		rc = smc_nl_handle_smcd_lgr(smcd_dev, skb, cb);
+		if (rc)
+			goto errout;
+next:
+		num++;
+	}
+errout:
+	mutex_unlock(&dev_list->mutex);
+	cb_ctx->pos[0] = num;
+	return rc;
+}
+
 int smcr_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	bool list_links = false;
@@ -442,6 +566,12 @@ int smcr_nl_get_link(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
+int smcd_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	smc_nl_fill_smcd_dev(&smcd_dev_list, skb, cb);
+	return skb->len;
+}
+
 void smc_lgr_cleanup_early(struct smc_connection *conn)
 {
 	struct smc_link_group *lgr = conn->lgr;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 7995621f318d..0b6899a7f634 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -430,6 +430,7 @@ void smcr_link_down_cond_sched(struct smc_link *lnk);
 int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb);
 int smcr_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb);
 int smcr_nl_get_link(struct sk_buff *skb, struct netlink_callback *cb);
+int smcd_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb);
 
 static inline struct smc_link_group *smc_get_lgr(struct smc_link *link)
 {
diff --git a/net/smc/smc_netlink.c b/net/smc/smc_netlink.c
index a41f78f488a2..95bce936534f 100644
--- a/net/smc/smc_netlink.c
+++ b/net/smc/smc_netlink.c
@@ -38,6 +38,11 @@ static const struct genl_ops smc_gen_nl_ops[] = {
 		/* can be retrieved by unprivileged users */
 		.dumpit = smcr_nl_get_link,
 	},
+	{
+		.cmd = SMC_NETLINK_GET_LGR_SMCD,
+		/* can be retrieved by unprivileged users */
+		.dumpit = smcd_nl_get_lgr,
+	},
 };
 
 static const struct nla_policy smc_gen_nl_policy[2] = {
-- 
cgit v1.2.3


From aaf95523d5824ebc2c8c185a2de51063a750c446 Mon Sep 17 00:00:00 2001
From: Guvenc Gulce <guvenc@linux.ibm.com>
Date: Tue, 1 Dec 2020 20:20:48 +0100
Subject: net/smc: Add support for obtaining SMCD device list

Deliver SMCD device information via netlink based
diagnostic interface.

Signed-off-by: Guvenc Gulce <guvenc@linux.ibm.com>
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/smc.h | 28 +++++++++++++++
 net/smc/smc_core.h       | 28 +++++++++++++++
 net/smc/smc_ism.c        | 91 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/smc/smc_ism.h        |  1 +
 net/smc/smc_netlink.c    |  6 ++++
 5 files changed, 154 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h
index 707e8af4f0c8..3cb40ab049d9 100644
--- a/include/uapi/linux/smc.h
+++ b/include/uapi/linux/smc.h
@@ -37,12 +37,15 @@ enum {				/* SMC PNET Table commands */
 #define SMC_GENL_FAMILY_NAME		"SMC_GEN_NETLINK"
 #define SMC_GENL_FAMILY_VERSION		1
 
+#define SMC_PCI_ID_STR_LEN		16 /* Max length of pci id string */
+
 /* SMC_GENL_FAMILY commands */
 enum {
 	SMC_NETLINK_GET_SYS_INFO = 1,
 	SMC_NETLINK_GET_LGR_SMCR,
 	SMC_NETLINK_GET_LINK_SMCR,
 	SMC_NETLINK_GET_LGR_SMCD,
+	SMC_NETLINK_GET_DEV_SMCD,
 };
 
 /* SMC_GENL_FAMILY top level attributes */
@@ -52,6 +55,7 @@ enum {
 	SMC_GEN_LGR_SMCR,		/* nest */
 	SMC_GEN_LINK_SMCR,		/* nest */
 	SMC_GEN_LGR_SMCD,		/* nest */
+	SMC_GEN_DEV_SMCD,		/* nest */
 	__SMC_GEN_MAX,
 	SMC_GEN_MAX = __SMC_GEN_MAX - 1
 };
@@ -122,4 +126,28 @@ enum {
 	__SMC_NLA_LGR_D_MAX,
 	SMC_NLA_LGR_D_MAX = __SMC_NLA_LGR_D_MAX - 1
 };
+
+/* SMC_NLA_DEV_PORT attributes */
+enum {
+	SMC_NLA_DEV_PORT_UNSPEC,
+	SMC_NLA_DEV_PORT_PNET_USR,	/* u8 */
+	SMC_NLA_DEV_PORT_PNETID,	/* string */
+	__SMC_NLA_DEV_PORT_MAX,
+	SMC_NLA_DEV_PORT_MAX = __SMC_NLA_DEV_PORT_MAX - 1
+};
+
+/* SMC_GEN_DEV_SMCD attributes */
+enum {
+	SMC_NLA_DEV_UNSPEC,
+	SMC_NLA_DEV_USE_CNT,		/* u32 */
+	SMC_NLA_DEV_IS_CRIT,		/* u8 */
+	SMC_NLA_DEV_PCI_FID,		/* u32 */
+	SMC_NLA_DEV_PCI_CHID,		/* u16 */
+	SMC_NLA_DEV_PCI_VENDOR,		/* u16 */
+	SMC_NLA_DEV_PCI_DEVICE,		/* u16 */
+	SMC_NLA_DEV_PCI_ID,		/* string */
+	SMC_NLA_DEV_PORT,		/* nest */
+	__SMC_NLA_DEV_MAX,
+	SMC_NLA_DEV_MAX = __SMC_NLA_DEV_MAX - 1
+};
 #endif /* _UAPI_LINUX_SMC_H */
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 0b6899a7f634..e8e448771f85 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -13,6 +13,8 @@
 #define _SMC_CORE_H
 
 #include <linux/atomic.h>
+#include <linux/smc.h>
+#include <linux/pci.h>
 #include <rdma/ib_verbs.h>
 #include <net/genetlink.h>
 
@@ -380,6 +382,32 @@ static inline void smc_gid_be16_convert(__u8 *buf, u8 *gid_raw)
 		be16_to_cpu(((__be16 *)gid_raw)[7]));
 }
 
+struct smc_pci_dev {
+	__u32		pci_fid;
+	__u16		pci_pchid;
+	__u16		pci_vendor;
+	__u16		pci_device;
+	__u8		pci_id[SMC_PCI_ID_STR_LEN];
+};
+
+static inline void smc_set_pci_values(struct pci_dev *pci_dev,
+				      struct smc_pci_dev *smc_dev)
+{
+	smc_dev->pci_vendor = pci_dev->vendor;
+	smc_dev->pci_device = pci_dev->device;
+	snprintf(smc_dev->pci_id, sizeof(smc_dev->pci_id), "%s",
+		 pci_name(pci_dev));
+#if IS_ENABLED(CONFIG_S390)
+	{ /* Set s390 specific PCI information */
+	struct zpci_dev *zdev;
+
+	zdev = to_zpci(pci_dev);
+	smc_dev->pci_fid = zdev->fid;
+	smc_dev->pci_pchid = zdev->pchid;
+	}
+#endif
+}
+
 struct smc_sock;
 struct smc_clc_msg_accept_confirm;
 struct smc_clc_msg_local;
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index 2456ee8228cd..524ef64a191a 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -15,6 +15,7 @@
 #include "smc_core.h"
 #include "smc_ism.h"
 #include "smc_pnet.h"
+#include "smc_netlink.h"
 
 struct smcd_dev_list smcd_dev_list = {
 	.list = LIST_HEAD_INIT(smcd_dev_list.list),
@@ -207,6 +208,96 @@ int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len,
 	return rc;
 }
 
+static int smc_nl_handle_smcd_dev(struct smcd_dev *smcd,
+				  struct sk_buff *skb,
+				  struct netlink_callback *cb)
+{
+	char smc_pnet[SMC_MAX_PNETID_LEN + 1];
+	struct smc_pci_dev smc_pci_dev;
+	struct nlattr *port_attrs;
+	struct nlattr *attrs;
+	int use_cnt = 0;
+	void *nlh;
+
+	nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+			  &smc_gen_nl_family, NLM_F_MULTI,
+			  SMC_NETLINK_GET_DEV_SMCD);
+	if (!nlh)
+		goto errmsg;
+	attrs = nla_nest_start(skb, SMC_GEN_DEV_SMCD);
+	if (!attrs)
+		goto errout;
+	use_cnt = atomic_read(&smcd->lgr_cnt);
+	if (nla_put_u32(skb, SMC_NLA_DEV_USE_CNT, use_cnt))
+		goto errattr;
+	if (nla_put_u8(skb, SMC_NLA_DEV_IS_CRIT, use_cnt > 0))
+		goto errattr;
+	memset(&smc_pci_dev, 0, sizeof(smc_pci_dev));
+	smc_set_pci_values(to_pci_dev(smcd->dev.parent), &smc_pci_dev);
+	if (nla_put_u32(skb, SMC_NLA_DEV_PCI_FID, smc_pci_dev.pci_fid))
+		goto errattr;
+	if (nla_put_u16(skb, SMC_NLA_DEV_PCI_CHID, smc_pci_dev.pci_pchid))
+		goto errattr;
+	if (nla_put_u16(skb, SMC_NLA_DEV_PCI_VENDOR, smc_pci_dev.pci_vendor))
+		goto errattr;
+	if (nla_put_u16(skb, SMC_NLA_DEV_PCI_DEVICE, smc_pci_dev.pci_device))
+		goto errattr;
+	if (nla_put_string(skb, SMC_NLA_DEV_PCI_ID, smc_pci_dev.pci_id))
+		goto errattr;
+
+	port_attrs = nla_nest_start(skb, SMC_NLA_DEV_PORT);
+	if (!port_attrs)
+		goto errattr;
+	if (nla_put_u8(skb, SMC_NLA_DEV_PORT_PNET_USR, smcd->pnetid_by_user))
+		goto errportattr;
+	snprintf(smc_pnet, sizeof(smc_pnet), "%s", smcd->pnetid);
+	if (nla_put_string(skb, SMC_NLA_DEV_PORT_PNETID, smc_pnet))
+		goto errportattr;
+
+	nla_nest_end(skb, port_attrs);
+	nla_nest_end(skb, attrs);
+	genlmsg_end(skb, nlh);
+	return 0;
+
+errportattr:
+	nla_nest_cancel(skb, port_attrs);
+errattr:
+	nla_nest_cancel(skb, attrs);
+errout:
+	nlmsg_cancel(skb, nlh);
+errmsg:
+	return -EMSGSIZE;
+}
+
+static void smc_nl_prep_smcd_dev(struct smcd_dev_list *dev_list,
+				 struct sk_buff *skb,
+				 struct netlink_callback *cb)
+{
+	struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
+	int snum = cb_ctx->pos[0];
+	struct smcd_dev *smcd;
+	int num = 0;
+
+	mutex_lock(&dev_list->mutex);
+	list_for_each_entry(smcd, &dev_list->list, list) {
+		if (num < snum)
+			goto next;
+		if (smc_nl_handle_smcd_dev(smcd, skb, cb))
+			goto errout;
+next:
+		num++;
+	}
+errout:
+	mutex_unlock(&dev_list->mutex);
+	cb_ctx->pos[0] = num;
+}
+
+int smcd_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	smc_nl_prep_smcd_dev(&smcd_dev_list, skb, cb);
+	return skb->len;
+}
+
 struct smc_ism_event_work {
 	struct work_struct work;
 	struct smcd_dev *smcd;
diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h
index 481a4b7df30b..113efc7352ed 100644
--- a/net/smc/smc_ism.h
+++ b/net/smc/smc_ism.h
@@ -52,4 +52,5 @@ void smc_ism_get_system_eid(struct smcd_dev *dev, u8 **eid);
 u16 smc_ism_get_chid(struct smcd_dev *dev);
 bool smc_ism_is_v2_capable(void);
 void smc_ism_init(void);
+int smcd_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb);
 #endif
diff --git a/net/smc/smc_netlink.c b/net/smc/smc_netlink.c
index 95bce936534f..debdeec53728 100644
--- a/net/smc/smc_netlink.c
+++ b/net/smc/smc_netlink.c
@@ -17,6 +17,7 @@
 #include <linux/smc.h>
 
 #include "smc_core.h"
+#include "smc_ism.h"
 #include "smc_netlink.h"
 
 #define SMC_CMD_MAX_ATTR 1
@@ -43,6 +44,11 @@ static const struct genl_ops smc_gen_nl_ops[] = {
 		/* can be retrieved by unprivileged users */
 		.dumpit = smcd_nl_get_lgr,
 	},
+	{
+		.cmd = SMC_NETLINK_GET_DEV_SMCD,
+		/* can be retrieved by unprivileged users */
+		.dumpit = smcd_nl_get_device,
+	},
 };
 
 static const struct nla_policy smc_gen_nl_policy[2] = {
-- 
cgit v1.2.3


From a3db10efcc4cc9c03a6375920179ade75ea2df7a Mon Sep 17 00:00:00 2001
From: Guvenc Gulce <guvenc@linux.ibm.com>
Date: Tue, 1 Dec 2020 20:20:49 +0100
Subject: net/smc: Add support for obtaining SMCR device list

Deliver SMCR device information via netlink based
diagnostic interface.

Signed-off-by: Guvenc Gulce <guvenc@linux.ibm.com>
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/smc.h |  13 +++-
 net/smc/smc_core.c       |   2 +-
 net/smc/smc_ib.c         | 156 +++++++++++++++++++++++++++++++++++++++++++++++
 net/smc/smc_ib.h         |   2 +
 net/smc/smc_netlink.c    |   6 ++
 5 files changed, 176 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h
index 3cb40ab049d9..3e68da07fba2 100644
--- a/include/uapi/linux/smc.h
+++ b/include/uapi/linux/smc.h
@@ -46,6 +46,7 @@ enum {
 	SMC_NETLINK_GET_LINK_SMCR,
 	SMC_NETLINK_GET_LGR_SMCD,
 	SMC_NETLINK_GET_DEV_SMCD,
+	SMC_NETLINK_GET_DEV_SMCR,
 };
 
 /* SMC_GENL_FAMILY top level attributes */
@@ -56,6 +57,7 @@ enum {
 	SMC_GEN_LINK_SMCR,		/* nest */
 	SMC_GEN_LGR_SMCD,		/* nest */
 	SMC_GEN_DEV_SMCD,		/* nest */
+	SMC_GEN_DEV_SMCR,		/* nest */
 	__SMC_GEN_MAX,
 	SMC_GEN_MAX = __SMC_GEN_MAX - 1
 };
@@ -127,16 +129,20 @@ enum {
 	SMC_NLA_LGR_D_MAX = __SMC_NLA_LGR_D_MAX - 1
 };
 
-/* SMC_NLA_DEV_PORT attributes */
+/* SMC_NLA_DEV_PORT nested attributes */
 enum {
 	SMC_NLA_DEV_PORT_UNSPEC,
 	SMC_NLA_DEV_PORT_PNET_USR,	/* u8 */
 	SMC_NLA_DEV_PORT_PNETID,	/* string */
+	SMC_NLA_DEV_PORT_NETDEV,	/* u32 */
+	SMC_NLA_DEV_PORT_STATE,		/* u8 */
+	SMC_NLA_DEV_PORT_VALID,		/* u8 */
+	SMC_NLA_DEV_PORT_LNK_CNT,	/* u32 */
 	__SMC_NLA_DEV_PORT_MAX,
 	SMC_NLA_DEV_PORT_MAX = __SMC_NLA_DEV_PORT_MAX - 1
 };
 
-/* SMC_GEN_DEV_SMCD attributes */
+/* SMC_GEN_DEV_SMCD and SMC_GEN_DEV_SMCR attributes */
 enum {
 	SMC_NLA_DEV_UNSPEC,
 	SMC_NLA_DEV_USE_CNT,		/* u32 */
@@ -147,7 +153,10 @@ enum {
 	SMC_NLA_DEV_PCI_DEVICE,		/* u16 */
 	SMC_NLA_DEV_PCI_ID,		/* string */
 	SMC_NLA_DEV_PORT,		/* nest */
+	SMC_NLA_DEV_PORT2,		/* nest */
+	SMC_NLA_DEV_IB_NAME,		/* string */
 	__SMC_NLA_DEV_MAX,
 	SMC_NLA_DEV_MAX = __SMC_NLA_DEV_MAX - 1
 };
+
 #endif /* _UAPI_LINUX_SMC_H */
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index ac2cc593f25f..59342b519e34 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -38,7 +38,7 @@
 #define SMC_LGR_FREE_DELAY_SERV		(600 * HZ)
 #define SMC_LGR_FREE_DELAY_CLNT		(SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
 
-static struct smc_lgr_list smc_lgr_list = {	/* established link groups */
+struct smc_lgr_list smc_lgr_list = {	/* established link groups */
 	.lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
 	.list = LIST_HEAD_INIT(smc_lgr_list.list),
 	.num = 0,
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 61b025c912a9..89ea10675a7d 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -25,6 +25,7 @@
 #include "smc_core.h"
 #include "smc_wr.h"
 #include "smc.h"
+#include "smc_netlink.h"
 
 #define SMC_MAX_CQE 32766	/* max. # of completion queue elements */
 
@@ -326,6 +327,161 @@ int smc_ib_create_protection_domain(struct smc_link *lnk)
 	return rc;
 }
 
+static bool smcr_diag_is_dev_critical(struct smc_lgr_list *smc_lgr,
+				      struct smc_ib_device *smcibdev)
+{
+	struct smc_link_group *lgr;
+	bool rc = false;
+	int i;
+
+	spin_lock_bh(&smc_lgr->lock);
+	list_for_each_entry(lgr, &smc_lgr->list, list) {
+		if (lgr->is_smcd)
+			continue;
+		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+			if (lgr->lnk[i].state == SMC_LNK_UNUSED ||
+			    lgr->lnk[i].smcibdev != smcibdev)
+				continue;
+			if (lgr->type == SMC_LGR_SINGLE ||
+			    lgr->type == SMC_LGR_ASYMMETRIC_LOCAL) {
+				rc = true;
+				goto out;
+			}
+		}
+	}
+out:
+	spin_unlock_bh(&smc_lgr->lock);
+	return rc;
+}
+
+static int smc_nl_handle_dev_port(struct sk_buff *skb,
+				  struct ib_device *ibdev,
+				  struct smc_ib_device *smcibdev,
+				  int port)
+{
+	char smc_pnet[SMC_MAX_PNETID_LEN + 1];
+	struct nlattr *port_attrs;
+	unsigned char port_state;
+	int lnk_count = 0;
+
+	port_attrs = nla_nest_start(skb, SMC_NLA_DEV_PORT + port);
+	if (!port_attrs)
+		goto errout;
+
+	if (nla_put_u8(skb, SMC_NLA_DEV_PORT_PNET_USR,
+		       smcibdev->pnetid_by_user[port]))
+		goto errattr;
+	snprintf(smc_pnet, sizeof(smc_pnet), "%s",
+		 (char *)&smcibdev->pnetid[port]);
+	if (nla_put_string(skb, SMC_NLA_DEV_PORT_PNETID, smc_pnet))
+		goto errattr;
+	if (nla_put_u32(skb, SMC_NLA_DEV_PORT_NETDEV,
+			smcibdev->ndev_ifidx[port]))
+		goto errattr;
+	if (nla_put_u8(skb, SMC_NLA_DEV_PORT_VALID, 1))
+		goto errattr;
+	port_state = smc_ib_port_active(smcibdev, port + 1);
+	if (nla_put_u8(skb, SMC_NLA_DEV_PORT_STATE, port_state))
+		goto errattr;
+	lnk_count = atomic_read(&smcibdev->lnk_cnt_by_port[port]);
+	if (nla_put_u32(skb, SMC_NLA_DEV_PORT_LNK_CNT, lnk_count))
+		goto errattr;
+	nla_nest_end(skb, port_attrs);
+	return 0;
+errattr:
+	nla_nest_cancel(skb, port_attrs);
+errout:
+	return -EMSGSIZE;
+}
+
+static int smc_nl_handle_smcr_dev(struct smc_ib_device *smcibdev,
+				  struct sk_buff *skb,
+				  struct netlink_callback *cb)
+{
+	char smc_ibname[IB_DEVICE_NAME_MAX + 1];
+	struct smc_pci_dev smc_pci_dev;
+	struct pci_dev *pci_dev;
+	unsigned char is_crit;
+	struct nlattr *attrs;
+	void *nlh;
+	int i;
+
+	nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+			  &smc_gen_nl_family, NLM_F_MULTI,
+			  SMC_NETLINK_GET_DEV_SMCR);
+	if (!nlh)
+		goto errmsg;
+	attrs = nla_nest_start(skb, SMC_GEN_DEV_SMCR);
+	if (!attrs)
+		goto errout;
+	is_crit = smcr_diag_is_dev_critical(&smc_lgr_list, smcibdev);
+	if (nla_put_u8(skb, SMC_NLA_DEV_IS_CRIT, is_crit))
+		goto errattr;
+	memset(&smc_pci_dev, 0, sizeof(smc_pci_dev));
+	pci_dev = to_pci_dev(smcibdev->ibdev->dev.parent);
+	smc_set_pci_values(pci_dev, &smc_pci_dev);
+	if (nla_put_u32(skb, SMC_NLA_DEV_PCI_FID, smc_pci_dev.pci_fid))
+		goto errattr;
+	if (nla_put_u16(skb, SMC_NLA_DEV_PCI_CHID, smc_pci_dev.pci_pchid))
+		goto errattr;
+	if (nla_put_u16(skb, SMC_NLA_DEV_PCI_VENDOR, smc_pci_dev.pci_vendor))
+		goto errattr;
+	if (nla_put_u16(skb, SMC_NLA_DEV_PCI_DEVICE, smc_pci_dev.pci_device))
+		goto errattr;
+	if (nla_put_string(skb, SMC_NLA_DEV_PCI_ID, smc_pci_dev.pci_id))
+		goto errattr;
+	snprintf(smc_ibname, sizeof(smc_ibname), "%s", smcibdev->ibdev->name);
+	if (nla_put_string(skb, SMC_NLA_DEV_IB_NAME, smc_ibname))
+		goto errattr;
+	for (i = 1; i <= SMC_MAX_PORTS; i++) {
+		if (!rdma_is_port_valid(smcibdev->ibdev, i))
+			continue;
+		if (smc_nl_handle_dev_port(skb, smcibdev->ibdev,
+					   smcibdev, i - 1))
+			goto errattr;
+	}
+
+	nla_nest_end(skb, attrs);
+	genlmsg_end(skb, nlh);
+	return 0;
+
+errattr:
+	nla_nest_cancel(skb, attrs);
+errout:
+	genlmsg_cancel(skb, nlh);
+errmsg:
+	return -EMSGSIZE;
+}
+
+static void smc_nl_prep_smcr_dev(struct smc_ib_devices *dev_list,
+				 struct sk_buff *skb,
+				 struct netlink_callback *cb)
+{
+	struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
+	struct smc_ib_device *smcibdev;
+	int snum = cb_ctx->pos[0];
+	int num = 0;
+
+	mutex_lock(&dev_list->mutex);
+	list_for_each_entry(smcibdev, &dev_list->list, list) {
+		if (num < snum)
+			goto next;
+		if (smc_nl_handle_smcr_dev(smcibdev, skb, cb))
+			goto errout;
+next:
+		num++;
+	}
+errout:
+	mutex_unlock(&dev_list->mutex);
+	cb_ctx->pos[0] = num;
+}
+
+int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	smc_nl_prep_smcr_dev(&smc_ib_devices, skb, cb);
+	return skb->len;
+}
+
 static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)
 {
 	struct smc_link *lnk = (struct smc_link *)priv;
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index ab37da341fa8..3085f5180da7 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -30,6 +30,7 @@ struct smc_ib_devices {			/* list of smc ib devices definition */
 };
 
 extern struct smc_ib_devices	smc_ib_devices; /* list of smc ib devices */
+extern struct smc_lgr_list smc_lgr_list; /* list of linkgroups */
 
 struct smc_ib_device {				/* ib-device infos for smc */
 	struct list_head	list;
@@ -91,4 +92,5 @@ void smc_ib_sync_sg_for_device(struct smc_link *lnk,
 int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
 			 unsigned short vlan_id, u8 gid[], u8 *sgid_index);
 bool smc_ib_is_valid_local_systemid(void);
+int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb);
 #endif
diff --git a/net/smc/smc_netlink.c b/net/smc/smc_netlink.c
index debdeec53728..140419a19dbf 100644
--- a/net/smc/smc_netlink.c
+++ b/net/smc/smc_netlink.c
@@ -18,6 +18,7 @@
 
 #include "smc_core.h"
 #include "smc_ism.h"
+#include "smc_ib.h"
 #include "smc_netlink.h"
 
 #define SMC_CMD_MAX_ATTR 1
@@ -49,6 +50,11 @@ static const struct genl_ops smc_gen_nl_ops[] = {
 		/* can be retrieved by unprivileged users */
 		.dumpit = smcd_nl_get_device,
 	},
+	{
+		.cmd = SMC_NETLINK_GET_DEV_SMCR,
+		/* can be retrieved by unprivileged users */
+		.dumpit = smcr_nl_get_device,
+	},
 };
 
 static const struct nla_policy smc_gen_nl_policy[2] = {
-- 
cgit v1.2.3


From 1d7637d89cfce54a4f4a41c2325288c2f47470e8 Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@collabora.com>
Date: Fri, 27 Nov 2020 14:32:33 -0500
Subject: signal: Expose SYS_USER_DISPATCH si_code type

SYS_USER_DISPATCH will be triggered when a syscall is sent to userspace
by the Syscall User Dispatch mechanism.  This adjusts eventual
BUILD_BUG_ON around the tree.

Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20201127193238.821364-3-krisman@collabora.com
---
 arch/x86/kernel/signal_compat.c    | 2 +-
 include/uapi/asm-generic/siginfo.h | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index a7f3e12cfbdb..d7b51870f16b 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -31,7 +31,7 @@ static inline void signal_compat_build_tests(void)
 	BUILD_BUG_ON(NSIGBUS  != 5);
 	BUILD_BUG_ON(NSIGTRAP != 5);
 	BUILD_BUG_ON(NSIGCHLD != 6);
-	BUILD_BUG_ON(NSIGSYS  != 1);
+	BUILD_BUG_ON(NSIGSYS  != 2);
 
 	/* This is part of the ABI and can never change in size: */
 	BUILD_BUG_ON(sizeof(compat_siginfo_t) != 128);
diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h
index 7aacf9389010..d2597000407a 100644
--- a/include/uapi/asm-generic/siginfo.h
+++ b/include/uapi/asm-generic/siginfo.h
@@ -286,7 +286,8 @@ typedef struct siginfo {
  * SIGSYS si_codes
  */
 #define SYS_SECCOMP	1	/* seccomp triggered */
-#define NSIGSYS		1
+#define SYS_USER_DISPATCH 2	/* syscall user dispatch triggered */
+#define NSIGSYS		2
 
 /*
  * SIGEMT si_codes
-- 
cgit v1.2.3


From 1446e1df9eb183fdf81c3f0715402f1d7595d4cb Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@collabora.com>
Date: Fri, 27 Nov 2020 14:32:34 -0500
Subject: kernel: Implement selective syscall userspace redirection

Introduce a mechanism to quickly disable/enable syscall handling for a
specific process and redirect to userspace via SIGSYS.  This is useful
for processes with parts that require syscall redirection and parts that
don't, but who need to perform this boundary crossing really fast,
without paying the cost of a system call to reconfigure syscall handling
on each boundary transition.  This is particularly important for Windows
games running over Wine.

The proposed interface looks like this:

  prctl(PR_SET_SYSCALL_USER_DISPATCH, <op>, <off>, <length>, [selector])

The range [<offset>,<offset>+<length>) is a part of the process memory
map that is allowed to by-pass the redirection code and dispatch
syscalls directly, such that in fast paths a process doesn't need to
disable the trap nor the kernel has to check the selector.  This is
essential to return from SIGSYS to a blocked area without triggering
another SIGSYS from rt_sigreturn.

selector is an optional pointer to a char-sized userspace memory region
that has a key switch for the mechanism. This key switch is set to
either PR_SYS_DISPATCH_ON, PR_SYS_DISPATCH_OFF to enable and disable the
redirection without calling the kernel.

The feature is meant to be set per-thread and it is disabled on
fork/clone/execv.

Internally, this doesn't add overhead to the syscall hot path, and it
requires very little per-architecture support.  I avoided using seccomp,
even though it duplicates some functionality, due to previous feedback
that maybe it shouldn't mix with seccomp since it is not a security
mechanism.  And obviously, this should never be considered a security
mechanism, since any part of the program can by-pass it by using the
syscall dispatcher.

For the sysinfo benchmark, which measures the overhead added to
executing a native syscall that doesn't require interception, the
overhead using only the direct dispatcher region to issue syscalls is
pretty much irrelevant.  The overhead of using the selector goes around
40ns for a native (unredirected) syscall in my system, and it is (as
expected) dominated by the supervisor-mode user-address access.  In
fact, with SMAP off, the overhead is consistently less than 5ns on my
test box.

Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20201127193238.821364-4-krisman@collabora.com
---
 fs/exec.c                             |   3 +
 include/linux/sched.h                 |   2 +
 include/linux/syscall_user_dispatch.h |  40 +++++++++++++
 include/linux/thread_info.h           |   2 +
 include/uapi/linux/prctl.h            |   5 ++
 kernel/entry/Makefile                 |   2 +-
 kernel/entry/common.h                 |   7 +++
 kernel/entry/syscall_user_dispatch.c  | 104 ++++++++++++++++++++++++++++++++++
 kernel/fork.c                         |   1 +
 kernel/sys.c                          |   5 ++
 10 files changed, 170 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/syscall_user_dispatch.h
 create mode 100644 kernel/entry/common.h
 create mode 100644 kernel/entry/syscall_user_dispatch.c

(limited to 'include/uapi')

diff --git a/fs/exec.c b/fs/exec.c
index 547a2390baf5..aee36e5733ce 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -64,6 +64,7 @@
 #include <linux/compat.h>
 #include <linux/vmalloc.h>
 #include <linux/io_uring.h>
+#include <linux/syscall_user_dispatch.h>
 
 #include <linux/uaccess.h>
 #include <asm/mmu_context.h>
@@ -1302,6 +1303,8 @@ int begin_new_exec(struct linux_binprm * bprm)
 	flush_thread();
 	me->personality &= ~bprm->per_clear;
 
+	clear_syscall_work_syscall_user_dispatch(me);
+
 	/*
 	 * We have to apply CLOEXEC before we change whether the process is
 	 * dumpable (in setup_new_exec) to avoid a race with a process in userspace
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 063cd120b459..5a24a033b3f8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -28,6 +28,7 @@
 #include <linux/sched/prio.h>
 #include <linux/sched/types.h>
 #include <linux/signal_types.h>
+#include <linux/syscall_user_dispatch.h>
 #include <linux/mm_types_task.h>
 #include <linux/task_io_accounting.h>
 #include <linux/posix-timers.h>
@@ -965,6 +966,7 @@ struct task_struct {
 	unsigned int			sessionid;
 #endif
 	struct seccomp			seccomp;
+	struct syscall_user_dispatch	syscall_dispatch;
 
 	/* Thread group tracking: */
 	u64				parent_exec_id;
diff --git a/include/linux/syscall_user_dispatch.h b/include/linux/syscall_user_dispatch.h
new file mode 100644
index 000000000000..a0ae443fb7df
--- /dev/null
+++ b/include/linux/syscall_user_dispatch.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Collabora Ltd.
+ */
+#ifndef _SYSCALL_USER_DISPATCH_H
+#define _SYSCALL_USER_DISPATCH_H
+
+#include <linux/thread_info.h>
+
+#ifdef CONFIG_GENERIC_ENTRY
+
+struct syscall_user_dispatch {
+	char __user	*selector;
+	unsigned long	offset;
+	unsigned long	len;
+	bool		on_dispatch;
+};
+
+int set_syscall_user_dispatch(unsigned long mode, unsigned long offset,
+			      unsigned long len, char __user *selector);
+
+#define clear_syscall_work_syscall_user_dispatch(tsk) \
+	clear_task_syscall_work(tsk, SYSCALL_USER_DISPATCH)
+
+#else
+struct syscall_user_dispatch {};
+
+static inline int set_syscall_user_dispatch(unsigned long mode, unsigned long offset,
+					    unsigned long len, char __user *selector)
+{
+	return -EINVAL;
+}
+
+static inline void clear_syscall_work_syscall_user_dispatch(struct task_struct *tsk)
+{
+}
+
+#endif /* CONFIG_GENERIC_ENTRY */
+
+#endif /* _SYSCALL_USER_DISPATCH_H */
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index ca80a214df09..c8a974cead73 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -42,6 +42,7 @@ enum syscall_work_bit {
 	SYSCALL_WORK_BIT_SYSCALL_TRACE,
 	SYSCALL_WORK_BIT_SYSCALL_EMU,
 	SYSCALL_WORK_BIT_SYSCALL_AUDIT,
+	SYSCALL_WORK_BIT_SYSCALL_USER_DISPATCH,
 };
 
 #define SYSCALL_WORK_SECCOMP		BIT(SYSCALL_WORK_BIT_SECCOMP)
@@ -49,6 +50,7 @@ enum syscall_work_bit {
 #define SYSCALL_WORK_SYSCALL_TRACE	BIT(SYSCALL_WORK_BIT_SYSCALL_TRACE)
 #define SYSCALL_WORK_SYSCALL_EMU	BIT(SYSCALL_WORK_BIT_SYSCALL_EMU)
 #define SYSCALL_WORK_SYSCALL_AUDIT	BIT(SYSCALL_WORK_BIT_SYSCALL_AUDIT)
+#define SYSCALL_WORK_SYSCALL_USER_DISPATCH BIT(SYSCALL_WORK_BIT_SYSCALL_USER_DISPATCH)
 #endif
 
 #include <asm/thread_info.h>
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 7f0827705c9a..90deb41c8a34 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -247,4 +247,9 @@ struct prctl_mm_map {
 #define PR_SET_IO_FLUSHER		57
 #define PR_GET_IO_FLUSHER		58
 
+/* Dispatch syscalls to a userspace handler */
+#define PR_SET_SYSCALL_USER_DISPATCH	59
+# define PR_SYS_DISPATCH_OFF		0
+# define PR_SYS_DISPATCH_ON		1
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/entry/Makefile b/kernel/entry/Makefile
index 34c8a3f1c735..095c775e001e 100644
--- a/kernel/entry/Makefile
+++ b/kernel/entry/Makefile
@@ -9,5 +9,5 @@ KCOV_INSTRUMENT := n
 CFLAGS_REMOVE_common.o	 = -fstack-protector -fstack-protector-strong
 CFLAGS_common.o		+= -fno-stack-protector
 
-obj-$(CONFIG_GENERIC_ENTRY) 		+= common.o
+obj-$(CONFIG_GENERIC_ENTRY) 		+= common.o syscall_user_dispatch.o
 obj-$(CONFIG_KVM_XFER_TO_GUEST_WORK)	+= kvm.o
diff --git a/kernel/entry/common.h b/kernel/entry/common.h
new file mode 100644
index 000000000000..f6e6d02f07fe
--- /dev/null
+++ b/kernel/entry/common.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _COMMON_H
+#define _COMMON_H
+
+bool syscall_user_dispatch(struct pt_regs *regs);
+
+#endif
diff --git a/kernel/entry/syscall_user_dispatch.c b/kernel/entry/syscall_user_dispatch.c
new file mode 100644
index 000000000000..b0338a5625d9
--- /dev/null
+++ b/kernel/entry/syscall_user_dispatch.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 Collabora Ltd.
+ */
+#include <linux/sched.h>
+#include <linux/prctl.h>
+#include <linux/syscall_user_dispatch.h>
+#include <linux/uaccess.h>
+#include <linux/signal.h>
+#include <linux/elf.h>
+
+#include <linux/sched/signal.h>
+#include <linux/sched/task_stack.h>
+
+#include <asm/syscall.h>
+
+#include "common.h"
+
+static void trigger_sigsys(struct pt_regs *regs)
+{
+	struct kernel_siginfo info;
+
+	clear_siginfo(&info);
+	info.si_signo = SIGSYS;
+	info.si_code = SYS_USER_DISPATCH;
+	info.si_call_addr = (void __user *)KSTK_EIP(current);
+	info.si_errno = 0;
+	info.si_arch = syscall_get_arch(current);
+	info.si_syscall = syscall_get_nr(current, regs);
+
+	force_sig_info(&info);
+}
+
+bool syscall_user_dispatch(struct pt_regs *regs)
+{
+	struct syscall_user_dispatch *sd = &current->syscall_dispatch;
+	char state;
+
+	if (likely(instruction_pointer(regs) - sd->offset < sd->len))
+		return false;
+
+	if (unlikely(arch_syscall_is_vdso_sigreturn(regs)))
+		return false;
+
+	if (likely(sd->selector)) {
+		/*
+		 * access_ok() is performed once, at prctl time, when
+		 * the selector is loaded by userspace.
+		 */
+		if (unlikely(__get_user(state, sd->selector)))
+			do_exit(SIGSEGV);
+
+		if (likely(state == PR_SYS_DISPATCH_OFF))
+			return false;
+
+		if (state != PR_SYS_DISPATCH_ON)
+			do_exit(SIGSYS);
+	}
+
+	sd->on_dispatch = true;
+	syscall_rollback(current, regs);
+	trigger_sigsys(regs);
+
+	return true;
+}
+
+int set_syscall_user_dispatch(unsigned long mode, unsigned long offset,
+			      unsigned long len, char __user *selector)
+{
+	switch (mode) {
+	case PR_SYS_DISPATCH_OFF:
+		if (offset || len || selector)
+			return -EINVAL;
+		break;
+	case PR_SYS_DISPATCH_ON:
+		/*
+		 * Validate the direct dispatcher region just for basic
+		 * sanity against overflow and a 0-sized dispatcher
+		 * region.  If the user is able to submit a syscall from
+		 * an address, that address is obviously valid.
+		 */
+		if (offset && offset + len <= offset)
+			return -EINVAL;
+
+		if (selector && !access_ok(selector, sizeof(*selector)))
+			return -EFAULT;
+
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	current->syscall_dispatch.selector = selector;
+	current->syscall_dispatch.offset = offset;
+	current->syscall_dispatch.len = len;
+	current->syscall_dispatch.on_dispatch = false;
+
+	if (mode == PR_SYS_DISPATCH_ON)
+		set_syscall_work(SYSCALL_USER_DISPATCH);
+	else
+		clear_syscall_work(SYSCALL_USER_DISPATCH);
+
+	return 0;
+}
diff --git a/kernel/fork.c b/kernel/fork.c
index 02b689a23457..4a5ecb41f440 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -906,6 +906,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	clear_user_return_notifier(tsk);
 	clear_tsk_need_resched(tsk);
 	set_task_stack_end_magic(tsk);
+	clear_syscall_work_syscall_user_dispatch(tsk);
 
 #ifdef CONFIG_STACKPROTECTOR
 	tsk->stack_canary = get_random_canary();
diff --git a/kernel/sys.c b/kernel/sys.c
index a730c03ee607..51f00fe20e4d 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -42,6 +42,7 @@
 #include <linux/syscore_ops.h>
 #include <linux/version.h>
 #include <linux/ctype.h>
+#include <linux/syscall_user_dispatch.h>
 
 #include <linux/compat.h>
 #include <linux/syscalls.h>
@@ -2530,6 +2531,10 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 
 		error = (current->flags & PR_IO_FLUSHER) == PR_IO_FLUSHER;
 		break;
+	case PR_SET_SYSCALL_USER_DISPATCH:
+		error = set_syscall_user_dispatch(arg2, arg3, arg4,
+						  (char __user *) arg5);
+		break;
 	default:
 		error = -EINVAL;
 		break;
-- 
cgit v1.2.3


From e1e8debec6564d0c355aab84c93de8e357f397bd Mon Sep 17 00:00:00 2001
From: Daeho Jeong <daehojeong@google.com>
Date: Fri, 30 Oct 2020 13:10:35 +0900
Subject: f2fs: add F2FS_IOC_SET_COMPRESS_OPTION ioctl

Added a new F2FS_IOC_SET_COMPRESS_OPTION ioctl to change file
compression option of a file.

struct f2fs_comp_option {
    u8 algorithm;         => compression algorithm
                          => 0:lzo, 1:lz4, 2:zstd, 3:lzorle
    u8 log_cluster_size;  => log scale cluster size
                          => 2 ~ 8
};

struct f2fs_comp_option option;

option.algorithm = 1;
option.log_cluster_size = 7;

ioctl(fd, F2FS_IOC_SET_COMPRESS_OPTION, &option);

Signed-off-by: Daeho Jeong <daehojeong@google.com>
[Chao Yu: remove f2fs_is_compress_algorithm_valid()]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/file.c            | 54 +++++++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/f2fs.h |  2 ++
 2 files changed, 56 insertions(+)

(limited to 'include/uapi')

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index c747f5dd595c..ea2f1ba408c4 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -3972,6 +3972,57 @@ static int f2fs_ioc_get_compress_option(struct file *filp, unsigned long arg)
 	return 0;
 }
 
+static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg)
+{
+	struct inode *inode = file_inode(filp);
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	struct f2fs_comp_option option;
+	int ret = 0;
+
+	if (!f2fs_sb_has_compression(sbi))
+		return -EOPNOTSUPP;
+
+	if (!(filp->f_mode & FMODE_WRITE))
+		return -EBADF;
+
+	if (copy_from_user(&option, (struct f2fs_comp_option __user *)arg,
+				sizeof(option)))
+		return -EFAULT;
+
+	if (!f2fs_compressed_file(inode) ||
+			option.log_cluster_size < MIN_COMPRESS_LOG_SIZE ||
+			option.log_cluster_size > MAX_COMPRESS_LOG_SIZE ||
+			option.algorithm >= COMPRESS_MAX)
+		return -EINVAL;
+
+	file_start_write(filp);
+	inode_lock(inode);
+
+	if (f2fs_is_mmap_file(inode) || get_dirty_pages(inode)) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	if (inode->i_size != 0) {
+		ret = -EFBIG;
+		goto out;
+	}
+
+	F2FS_I(inode)->i_compress_algorithm = option.algorithm;
+	F2FS_I(inode)->i_log_cluster_size = option.log_cluster_size;
+	F2FS_I(inode)->i_cluster_size = 1 << option.log_cluster_size;
+	f2fs_mark_inode_dirty_sync(inode, true);
+
+	if (!f2fs_is_compress_backend_ready(inode))
+		f2fs_warn(sbi, "compression algorithm is successfully set, "
+			"but current kernel doesn't support this algorithm.");
+out:
+	inode_unlock(inode);
+	file_end_write(filp);
+
+	return ret;
+}
+
 long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
@@ -4062,6 +4113,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		return f2fs_sec_trim_file(filp, arg);
 	case F2FS_IOC_GET_COMPRESS_OPTION:
 		return f2fs_ioc_get_compress_option(filp, arg);
+	case F2FS_IOC_SET_COMPRESS_OPTION:
+		return f2fs_ioc_set_compress_option(filp, arg);
 	default:
 		return -ENOTTY;
 	}
@@ -4233,6 +4286,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
 	case F2FS_IOC_SEC_TRIM_FILE:
 	case F2FS_IOC_GET_COMPRESS_OPTION:
+	case F2FS_IOC_SET_COMPRESS_OPTION:
 		break;
 	default:
 		return -ENOIOCTLCMD;
diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h
index 872e61d78f29..f00199a2e38b 100644
--- a/include/uapi/linux/f2fs.h
+++ b/include/uapi/linux/f2fs.h
@@ -38,6 +38,8 @@
 						struct f2fs_sectrim_range)
 #define F2FS_IOC_GET_COMPRESS_OPTION	_IOR(F2FS_IOCTL_MAGIC, 21,	\
 						struct f2fs_comp_option)
+#define F2FS_IOC_SET_COMPRESS_OPTION	_IOW(F2FS_IOCTL_MAGIC, 22,	\
+						struct f2fs_comp_option)
 
 /*
  * should be same as XFS_IOC_GOINGDOWN.
-- 
cgit v1.2.3


From 5fdb322ff2c2b4ad519f490dcb7ebb96c5439af7 Mon Sep 17 00:00:00 2001
From: Daeho Jeong <daehojeong@google.com>
Date: Thu, 3 Dec 2020 15:56:15 +0900
Subject: f2fs: add F2FS_IOC_DECOMPRESS_FILE and F2FS_IOC_COMPRESS_FILE

Added two ioctl to decompress/compress explicitly the compression
enabled file in "compress_mode=user" mount option.

Using these two ioctls, the users can make a control of compression
and decompression of their files.

Signed-off-by: Daeho Jeong <daehojeong@google.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/file.c            | 185 ++++++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/f2fs.h |   2 +
 2 files changed, 187 insertions(+)

(limited to 'include/uapi')

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 8241b8bdb33c..16ea10f2bcf5 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -4026,6 +4026,185 @@ out:
 	return ret;
 }
 
+static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len)
+{
+	DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, page_idx);
+	struct address_space *mapping = inode->i_mapping;
+	struct page *page;
+	pgoff_t redirty_idx = page_idx;
+	int i, page_len = 0, ret = 0;
+
+	page_cache_ra_unbounded(&ractl, len, 0);
+
+	for (i = 0; i < len; i++, page_idx++) {
+		page = read_cache_page(mapping, page_idx, NULL, NULL);
+		if (IS_ERR(page)) {
+			ret = PTR_ERR(page);
+			break;
+		}
+		page_len++;
+	}
+
+	for (i = 0; i < page_len; i++, redirty_idx++) {
+		page = find_lock_page(mapping, redirty_idx);
+		if (!page)
+			ret = -ENOENT;
+		set_page_dirty(page);
+		f2fs_put_page(page, 1);
+		f2fs_put_page(page, 0);
+	}
+
+	return ret;
+}
+
+static int f2fs_ioc_decompress_file(struct file *filp, unsigned long arg)
+{
+	struct inode *inode = file_inode(filp);
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	struct f2fs_inode_info *fi = F2FS_I(inode);
+	pgoff_t page_idx = 0, last_idx;
+	unsigned int blk_per_seg = sbi->blocks_per_seg;
+	int cluster_size = F2FS_I(inode)->i_cluster_size;
+	int count, ret;
+
+	if (!f2fs_sb_has_compression(sbi) ||
+			F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER)
+		return -EOPNOTSUPP;
+
+	if (!(filp->f_mode & FMODE_WRITE))
+		return -EBADF;
+
+	if (!f2fs_compressed_file(inode))
+		return -EINVAL;
+
+	f2fs_balance_fs(F2FS_I_SB(inode), true);
+
+	file_start_write(filp);
+	inode_lock(inode);
+
+	if (!f2fs_is_compress_backend_ready(inode)) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (f2fs_is_mmap_file(inode)) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
+	if (ret)
+		goto out;
+
+	if (!atomic_read(&fi->i_compr_blocks))
+		goto out;
+
+	last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+
+	count = last_idx - page_idx;
+	while (count) {
+		int len = min(cluster_size, count);
+
+		ret = redirty_blocks(inode, page_idx, len);
+		if (ret < 0)
+			break;
+
+		if (get_dirty_pages(inode) >= blk_per_seg)
+			filemap_fdatawrite(inode->i_mapping);
+
+		count -= len;
+		page_idx += len;
+	}
+
+	if (!ret)
+		ret = filemap_write_and_wait_range(inode->i_mapping, 0,
+							LLONG_MAX);
+
+	if (ret)
+		f2fs_warn(sbi, "%s: The file might be partially decompressed "
+				"(errno=%d). Please delete the file.\n",
+				__func__, ret);
+out:
+	inode_unlock(inode);
+	file_end_write(filp);
+
+	return ret;
+}
+
+static int f2fs_ioc_compress_file(struct file *filp, unsigned long arg)
+{
+	struct inode *inode = file_inode(filp);
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	pgoff_t page_idx = 0, last_idx;
+	unsigned int blk_per_seg = sbi->blocks_per_seg;
+	int cluster_size = F2FS_I(inode)->i_cluster_size;
+	int count, ret;
+
+	if (!f2fs_sb_has_compression(sbi) ||
+			F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER)
+		return -EOPNOTSUPP;
+
+	if (!(filp->f_mode & FMODE_WRITE))
+		return -EBADF;
+
+	if (!f2fs_compressed_file(inode))
+		return -EINVAL;
+
+	f2fs_balance_fs(F2FS_I_SB(inode), true);
+
+	file_start_write(filp);
+	inode_lock(inode);
+
+	if (!f2fs_is_compress_backend_ready(inode)) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (f2fs_is_mmap_file(inode)) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
+	if (ret)
+		goto out;
+
+	set_inode_flag(inode, FI_ENABLE_COMPRESS);
+
+	last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+
+	count = last_idx - page_idx;
+	while (count) {
+		int len = min(cluster_size, count);
+
+		ret = redirty_blocks(inode, page_idx, len);
+		if (ret < 0)
+			break;
+
+		if (get_dirty_pages(inode) >= blk_per_seg)
+			filemap_fdatawrite(inode->i_mapping);
+
+		count -= len;
+		page_idx += len;
+	}
+
+	if (!ret)
+		ret = filemap_write_and_wait_range(inode->i_mapping, 0,
+							LLONG_MAX);
+
+	clear_inode_flag(inode, FI_ENABLE_COMPRESS);
+
+	if (ret)
+		f2fs_warn(sbi, "%s: The file might be partially compressed "
+				"(errno=%d). Please delete the file.\n",
+				__func__, ret);
+out:
+	inode_unlock(inode);
+	file_end_write(filp);
+
+	return ret;
+}
+
 static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	switch (cmd) {
@@ -4113,6 +4292,10 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		return f2fs_ioc_get_compress_option(filp, arg);
 	case F2FS_IOC_SET_COMPRESS_OPTION:
 		return f2fs_ioc_set_compress_option(filp, arg);
+	case F2FS_IOC_DECOMPRESS_FILE:
+		return f2fs_ioc_decompress_file(filp, arg);
+	case F2FS_IOC_COMPRESS_FILE:
+		return f2fs_ioc_compress_file(filp, arg);
 	default:
 		return -ENOTTY;
 	}
@@ -4352,6 +4535,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case F2FS_IOC_SEC_TRIM_FILE:
 	case F2FS_IOC_GET_COMPRESS_OPTION:
 	case F2FS_IOC_SET_COMPRESS_OPTION:
+	case F2FS_IOC_DECOMPRESS_FILE:
+	case F2FS_IOC_COMPRESS_FILE:
 		break;
 	default:
 		return -ENOIOCTLCMD;
diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h
index f00199a2e38b..352a822d4370 100644
--- a/include/uapi/linux/f2fs.h
+++ b/include/uapi/linux/f2fs.h
@@ -40,6 +40,8 @@
 						struct f2fs_comp_option)
 #define F2FS_IOC_SET_COMPRESS_OPTION	_IOW(F2FS_IOCTL_MAGIC, 22,	\
 						struct f2fs_comp_option)
+#define F2FS_IOC_DECOMPRESS_FILE	_IO(F2FS_IOCTL_MAGIC, 23)
+#define F2FS_IOC_COMPRESS_FILE		_IO(F2FS_IOCTL_MAGIC, 24)
 
 /*
  * should be same as XFS_IOC_GOINGDOWN.
-- 
cgit v1.2.3


From 35aaa6e650c24212316781b63005f52c1988cd4e Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Thu, 26 Nov 2020 10:36:09 +0100
Subject: media: Rename stateful codec control macros

For historical reasons, stateful codec controls are named
as {}_MPEG_{}. While we can't at this point sanely
change all control IDs (such as V4L2_CID_MPEG_VIDEO_VP8_FRAME_HEADER),
we can least change the more meaningful macros such as classes
macros.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Tested-by: Jernej Skrabec <jernej.skrabec@siol.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../userspace-api/media/v4l/dev-mem2mem.rst        |   2 +-
 .../userspace-api/media/v4l/ext-ctrls-codec.rst    |   4 +-
 .../userspace-api/media/v4l/extended-controls.rst  |   8 +-
 .../userspace-api/media/v4l/vidioc-g-ext-ctrls.rst |   6 +-
 drivers/media/common/cx2341x.c                     |   4 +-
 drivers/media/platform/s5p-mfc/s5p_mfc_dec.c       |   2 +-
 drivers/media/platform/s5p-mfc/s5p_mfc_enc.c       |   2 +-
 drivers/media/v4l2-core/v4l2-ctrls.c               |   4 +-
 include/media/fwht-ctrls.h                         |   2 +-
 include/media/h264-ctrls.h                         |  16 +-
 include/media/hevc-ctrls.h                         |  10 +-
 include/media/mpeg2-ctrls.h                        |   4 +-
 include/media/vp8-ctrls.h                          |   2 +-
 include/uapi/linux/v4l2-controls.h                 | 409 +++++++++++----------
 14 files changed, 242 insertions(+), 233 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/userspace-api/media/v4l/dev-mem2mem.rst b/Documentation/userspace-api/media/v4l/dev-mem2mem.rst
index d8db46886555..7041bb3d5b8d 100644
--- a/Documentation/userspace-api/media/v4l/dev-mem2mem.rst
+++ b/Documentation/userspace-api/media/v4l/dev-mem2mem.rst
@@ -32,7 +32,7 @@ file handle is visible through another file handle).
 One of the most common memory-to-memory device is the codec. Codecs
 are more complicated than most and require additional setup for
 their codec parameters. This is done through codec controls.
-See :ref:`mpeg-controls`. More details on how to use codec memory-to-memory
+See :ref:`codec-controls`. More details on how to use codec memory-to-memory
 devices are given in the following sections.
 
 .. toctree::
diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
index ce728c757eaf..b31c52d9b1da 100644
--- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
+++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
@@ -1,6 +1,6 @@
 .. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later
 
-.. _mpeg-controls:
+.. _codec-controls:
 
 ***********************
 Codec Control Reference
@@ -26,7 +26,7 @@ Generic Codec Controls
 Codec Control IDs
 -----------------
 
-``V4L2_CID_MPEG_CLASS (class)``
+``V4L2_CID_CODEC_CLASS (class)``
     The Codec class descriptor. Calling
     :ref:`VIDIOC_QUERYCTRL` for this control will
     return a description of this control class. This description can be
diff --git a/Documentation/userspace-api/media/v4l/extended-controls.rst b/Documentation/userspace-api/media/v4l/extended-controls.rst
index 70301538d222..44fcd67f20bf 100644
--- a/Documentation/userspace-api/media/v4l/extended-controls.rst
+++ b/Documentation/userspace-api/media/v4l/extended-controls.rst
@@ -55,8 +55,8 @@ controls in that array and a control class. Control classes are used to
 group similar controls into a single class. For example, control class
 ``V4L2_CTRL_CLASS_USER`` contains all user controls (i. e. all controls
 that can also be set using the old :ref:`VIDIOC_S_CTRL <VIDIOC_G_CTRL>`
-ioctl). Control class ``V4L2_CTRL_CLASS_MPEG`` contains all controls
-relating to MPEG encoding, etc.
+ioctl). Control class ``V4L2_CTRL_CLASS_CODEC`` contains controls
+relating to codecs.
 
 All controls in the control array must belong to the specified control
 class. An error is returned if this is not the case.
@@ -130,9 +130,9 @@ control class is found:
 
 .. code-block:: c
 
-    qctrl.id = V4L2_CTRL_CLASS_MPEG | V4L2_CTRL_FLAG_NEXT_CTRL;
+    qctrl.id = V4L2_CTRL_CLASS_CODEC | V4L2_CTRL_FLAG_NEXT_CTRL;
     while (0 == ioctl(fd, VIDIOC_QUERYCTRL, &qctrl)) {
-	if (V4L2_CTRL_ID2CLASS(qctrl.id) != V4L2_CTRL_CLASS_MPEG)
+	if (V4L2_CTRL_ID2CLASS(qctrl.id) != V4L2_CTRL_CLASS_CODEC)
 	    break;
 	/* ... */
 	qctrl.id |= V4L2_CTRL_FLAG_NEXT_CTRL;
diff --git a/Documentation/userspace-api/media/v4l/vidioc-g-ext-ctrls.rst b/Documentation/userspace-api/media/v4l/vidioc-g-ext-ctrls.rst
index 9d28ac3fc237..2196cdf61ca0 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-g-ext-ctrls.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-g-ext-ctrls.rst
@@ -322,10 +322,10 @@ still cause this situation.
 	:ref:`VIDIOC_S_CTRL <VIDIOC_G_CTRL>` and
 	:ref:`VIDIOC_G_CTRL <VIDIOC_G_CTRL>` ioctl belong to this
 	class.
-    * - ``V4L2_CTRL_CLASS_MPEG``
+    * - ``V4L2_CTRL_CLASS_CODEC``
       - 0x990000
-      - The class containing MPEG compression controls. These controls are
-	described in :ref:`mpeg-controls`.
+      - The class containing stateful codec controls. These controls are
+	described in :ref:`codec-controls`.
     * - ``V4L2_CTRL_CLASS_CAMERA``
       - 0x9a0000
       - The class containing camera controls. These controls are described
diff --git a/drivers/media/common/cx2341x.c b/drivers/media/common/cx2341x.c
index 1f67e021138f..1392bd6b0026 100644
--- a/drivers/media/common/cx2341x.c
+++ b/drivers/media/common/cx2341x.c
@@ -166,7 +166,7 @@ static void cx2341x_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *ty
 
 /* Must be sorted from low to high control ID! */
 const u32 cx2341x_mpeg_ctrls[] = {
-	V4L2_CID_MPEG_CLASS,
+	V4L2_CID_CODEC_CLASS,
 	V4L2_CID_MPEG_STREAM_TYPE,
 	V4L2_CID_MPEG_STREAM_VBI_FMT,
 	V4L2_CID_MPEG_AUDIO_SAMPLING_FREQ,
@@ -574,7 +574,7 @@ int cx2341x_ctrl_query(const struct cx2341x_mpeg_params *params,
 	int err;
 
 	switch (qctrl->id) {
-	case V4L2_CID_MPEG_CLASS:
+	case V4L2_CID_CODEC_CLASS:
 		return v4l2_ctrl_query_fill(qctrl, 0, 0, 0, 0);
 	case V4L2_CID_MPEG_STREAM_TYPE:
 		return v4l2_ctrl_query_fill(qctrl,
diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_dec.c b/drivers/media/platform/s5p-mfc/s5p_mfc_dec.c
index 61e144a35201..a71753d459ba 100644
--- a/drivers/media/platform/s5p-mfc/s5p_mfc_dec.c
+++ b/drivers/media/platform/s5p-mfc/s5p_mfc_dec.c
@@ -1109,7 +1109,7 @@ const struct v4l2_ioctl_ops *get_dec_v4l2_ioctl_ops(void)
 	return &s5p_mfc_dec_ioctl_ops;
 }
 
-#define IS_MFC51_PRIV(x) ((V4L2_CTRL_ID2WHICH(x) == V4L2_CTRL_CLASS_MPEG) \
+#define IS_MFC51_PRIV(x) ((V4L2_CTRL_ID2WHICH(x) == V4L2_CTRL_CLASS_CODEC) \
 						&& V4L2_CTRL_DRIVER_PRIV(x))
 
 int s5p_mfc_dec_ctrls_setup(struct s5p_mfc_ctx *ctx)
diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c b/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c
index acc2217dd7e9..1fad99edb091 100644
--- a/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c
+++ b/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c
@@ -2614,7 +2614,7 @@ const struct v4l2_ioctl_ops *get_enc_v4l2_ioctl_ops(void)
 	return &s5p_mfc_enc_ioctl_ops;
 }
 
-#define IS_MFC51_PRIV(x) ((V4L2_CTRL_ID2WHICH(x) == V4L2_CTRL_CLASS_MPEG) \
+#define IS_MFC51_PRIV(x) ((V4L2_CTRL_ID2WHICH(x) == V4L2_CTRL_CLASS_CODEC) \
 						&& V4L2_CTRL_DRIVER_PRIV(x))
 
 int s5p_mfc_enc_ctrls_setup(struct s5p_mfc_ctx *ctx)
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 3979e7924007..88ad475bd716 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -830,7 +830,7 @@ const char *v4l2_ctrl_get_name(u32 id)
 	/* The MPEG controls are applicable to all codec controls
 	 * and the 'MPEG' part of the define is historical */
 	/* Keep the order of the 'case's the same as in videodev2.h! */
-	case V4L2_CID_MPEG_CLASS:		return "Codec Controls";
+	case V4L2_CID_CODEC_CLASS:		return "Codec Controls";
 	case V4L2_CID_MPEG_STREAM_TYPE:		return "Stream Type";
 	case V4L2_CID_MPEG_STREAM_PID_PMT:	return "Stream PMT Program ID";
 	case V4L2_CID_MPEG_STREAM_PID_AUDIO:	return "Stream Audio Program ID";
@@ -1358,7 +1358,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 		break;
 	case V4L2_CID_USER_CLASS:
 	case V4L2_CID_CAMERA_CLASS:
-	case V4L2_CID_MPEG_CLASS:
+	case V4L2_CID_CODEC_CLASS:
 	case V4L2_CID_FM_TX_CLASS:
 	case V4L2_CID_FLASH_CLASS:
 	case V4L2_CID_JPEG_CLASS:
diff --git a/include/media/fwht-ctrls.h b/include/media/fwht-ctrls.h
index 615027410e47..a918b49609e1 100644
--- a/include/media/fwht-ctrls.h
+++ b/include/media/fwht-ctrls.h
@@ -13,7 +13,7 @@
 
 #define V4L2_CTRL_TYPE_FWHT_PARAMS 0x0105
 
-#define V4L2_CID_MPEG_VIDEO_FWHT_PARAMS	(V4L2_CID_MPEG_BASE + 292)
+#define V4L2_CID_MPEG_VIDEO_FWHT_PARAMS	(V4L2_CID_CODEC_BASE + 292)
 
 struct v4l2_ctrl_fwht_params {
 	__u64 backward_ref_ts;
diff --git a/include/media/h264-ctrls.h b/include/media/h264-ctrls.h
index 893e21354a9b..f86345b8efd1 100644
--- a/include/media/h264-ctrls.h
+++ b/include/media/h264-ctrls.h
@@ -29,14 +29,14 @@
  * would be added during the phase where those controls are not
  * stable. It should be fixed eventually.
  */
-#define V4L2_CID_MPEG_VIDEO_H264_SPS		(V4L2_CID_MPEG_BASE+1000)
-#define V4L2_CID_MPEG_VIDEO_H264_PPS		(V4L2_CID_MPEG_BASE+1001)
-#define V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX	(V4L2_CID_MPEG_BASE+1002)
-#define V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS	(V4L2_CID_MPEG_BASE+1003)
-#define V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS	(V4L2_CID_MPEG_BASE+1004)
-#define V4L2_CID_MPEG_VIDEO_H264_DECODE_MODE	(V4L2_CID_MPEG_BASE+1005)
-#define V4L2_CID_MPEG_VIDEO_H264_START_CODE	(V4L2_CID_MPEG_BASE+1006)
-#define V4L2_CID_MPEG_VIDEO_H264_PRED_WEIGHTS	(V4L2_CID_MPEG_BASE+1007)
+#define V4L2_CID_MPEG_VIDEO_H264_SPS		(V4L2_CID_CODEC_BASE+1000)
+#define V4L2_CID_MPEG_VIDEO_H264_PPS		(V4L2_CID_CODEC_BASE+1001)
+#define V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX	(V4L2_CID_CODEC_BASE+1002)
+#define V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS	(V4L2_CID_CODEC_BASE+1003)
+#define V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS	(V4L2_CID_CODEC_BASE+1004)
+#define V4L2_CID_MPEG_VIDEO_H264_DECODE_MODE	(V4L2_CID_CODEC_BASE+1005)
+#define V4L2_CID_MPEG_VIDEO_H264_START_CODE	(V4L2_CID_CODEC_BASE+1006)
+#define V4L2_CID_MPEG_VIDEO_H264_PRED_WEIGHTS	(V4L2_CID_CODEC_BASE+1007)
 
 /* enum v4l2_ctrl_type type values */
 #define V4L2_CTRL_TYPE_H264_SPS			0x0110
diff --git a/include/media/hevc-ctrls.h b/include/media/hevc-ctrls.h
index 1009cf0891cc..b4cb2ef02f17 100644
--- a/include/media/hevc-ctrls.h
+++ b/include/media/hevc-ctrls.h
@@ -16,11 +16,11 @@
 /* The pixel format isn't stable at the moment and will likely be renamed. */
 #define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */
 
-#define V4L2_CID_MPEG_VIDEO_HEVC_SPS		(V4L2_CID_MPEG_BASE + 1008)
-#define V4L2_CID_MPEG_VIDEO_HEVC_PPS		(V4L2_CID_MPEG_BASE + 1009)
-#define V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS	(V4L2_CID_MPEG_BASE + 1010)
-#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE	(V4L2_CID_MPEG_BASE + 1015)
-#define V4L2_CID_MPEG_VIDEO_HEVC_START_CODE	(V4L2_CID_MPEG_BASE + 1016)
+#define V4L2_CID_MPEG_VIDEO_HEVC_SPS		(V4L2_CID_CODEC_BASE + 1008)
+#define V4L2_CID_MPEG_VIDEO_HEVC_PPS		(V4L2_CID_CODEC_BASE + 1009)
+#define V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS	(V4L2_CID_CODEC_BASE + 1010)
+#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE	(V4L2_CID_CODEC_BASE + 1015)
+#define V4L2_CID_MPEG_VIDEO_HEVC_START_CODE	(V4L2_CID_CODEC_BASE + 1016)
 
 /* enum v4l2_ctrl_type type values */
 #define V4L2_CTRL_TYPE_HEVC_SPS 0x0120
diff --git a/include/media/mpeg2-ctrls.h b/include/media/mpeg2-ctrls.h
index 6601455b3d5e..2a4ae6701166 100644
--- a/include/media/mpeg2-ctrls.h
+++ b/include/media/mpeg2-ctrls.h
@@ -11,8 +11,8 @@
 #ifndef _MPEG2_CTRLS_H_
 #define _MPEG2_CTRLS_H_
 
-#define V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS		(V4L2_CID_MPEG_BASE+250)
-#define V4L2_CID_MPEG_VIDEO_MPEG2_QUANTIZATION		(V4L2_CID_MPEG_BASE+251)
+#define V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS		(V4L2_CID_CODEC_BASE+250)
+#define V4L2_CID_MPEG_VIDEO_MPEG2_QUANTIZATION		(V4L2_CID_CODEC_BASE+251)
 
 /* enum v4l2_ctrl_type type values */
 #define V4L2_CTRL_TYPE_MPEG2_SLICE_PARAMS 0x0103
diff --git a/include/media/vp8-ctrls.h b/include/media/vp8-ctrls.h
index a920493361dc..3969550df148 100644
--- a/include/media/vp8-ctrls.h
+++ b/include/media/vp8-ctrls.h
@@ -15,7 +15,7 @@
 
 #define V4L2_PIX_FMT_VP8_FRAME v4l2_fourcc('V', 'P', '8', 'F')
 
-#define V4L2_CID_MPEG_VIDEO_VP8_FRAME_HEADER (V4L2_CID_MPEG_BASE + 2000)
+#define V4L2_CID_MPEG_VIDEO_VP8_FRAME_HEADER (V4L2_CID_CODEC_BASE + 2000)
 #define V4L2_CTRL_TYPE_VP8_FRAME_HEADER 0x301
 
 #define V4L2_VP8_SEGMENT_HEADER_FLAG_ENABLED              0x01
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 7035f4fb182c..75cf87b9a377 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -54,7 +54,7 @@
 
 /* Control classes */
 #define V4L2_CTRL_CLASS_USER		0x00980000	/* Old-style 'user' controls */
-#define V4L2_CTRL_CLASS_MPEG		0x00990000	/* MPEG-compression controls */
+#define V4L2_CTRL_CLASS_CODEC		0x00990000	/* Stateful codec controls */
 #define V4L2_CTRL_CLASS_CAMERA		0x009a0000	/* Camera class controls */
 #define V4L2_CTRL_CLASS_FM_TX		0x009b0000	/* FM Modulator controls */
 #define V4L2_CTRL_CLASS_FLASH		0x009c0000	/* Camera flash controls */
@@ -208,11 +208,11 @@ enum v4l2_colorfx {
 /* The MPEG controls are applicable to all codec controls
  * and the 'MPEG' part of the define is historical */
 
-#define V4L2_CID_MPEG_BASE			(V4L2_CTRL_CLASS_MPEG | 0x900)
-#define V4L2_CID_MPEG_CLASS			(V4L2_CTRL_CLASS_MPEG | 1)
+#define V4L2_CID_CODEC_BASE			(V4L2_CTRL_CLASS_CODEC | 0x900)
+#define V4L2_CID_CODEC_CLASS			(V4L2_CTRL_CLASS_CODEC | 1)
 
 /*  MPEG streams, specific to multiplexed streams */
-#define V4L2_CID_MPEG_STREAM_TYPE		(V4L2_CID_MPEG_BASE+0)
+#define V4L2_CID_MPEG_STREAM_TYPE		(V4L2_CID_CODEC_BASE+0)
 enum v4l2_mpeg_stream_type {
 	V4L2_MPEG_STREAM_TYPE_MPEG2_PS   = 0, /* MPEG-2 program stream */
 	V4L2_MPEG_STREAM_TYPE_MPEG2_TS   = 1, /* MPEG-2 transport stream */
@@ -221,26 +221,26 @@ enum v4l2_mpeg_stream_type {
 	V4L2_MPEG_STREAM_TYPE_MPEG1_VCD  = 4, /* MPEG-1 VCD-compatible stream */
 	V4L2_MPEG_STREAM_TYPE_MPEG2_SVCD = 5, /* MPEG-2 SVCD-compatible stream */
 };
-#define V4L2_CID_MPEG_STREAM_PID_PMT		(V4L2_CID_MPEG_BASE+1)
-#define V4L2_CID_MPEG_STREAM_PID_AUDIO		(V4L2_CID_MPEG_BASE+2)
-#define V4L2_CID_MPEG_STREAM_PID_VIDEO		(V4L2_CID_MPEG_BASE+3)
-#define V4L2_CID_MPEG_STREAM_PID_PCR		(V4L2_CID_MPEG_BASE+4)
-#define V4L2_CID_MPEG_STREAM_PES_ID_AUDIO	(V4L2_CID_MPEG_BASE+5)
-#define V4L2_CID_MPEG_STREAM_PES_ID_VIDEO	(V4L2_CID_MPEG_BASE+6)
-#define V4L2_CID_MPEG_STREAM_VBI_FMT		(V4L2_CID_MPEG_BASE+7)
+#define V4L2_CID_MPEG_STREAM_PID_PMT		(V4L2_CID_CODEC_BASE+1)
+#define V4L2_CID_MPEG_STREAM_PID_AUDIO		(V4L2_CID_CODEC_BASE+2)
+#define V4L2_CID_MPEG_STREAM_PID_VIDEO		(V4L2_CID_CODEC_BASE+3)
+#define V4L2_CID_MPEG_STREAM_PID_PCR		(V4L2_CID_CODEC_BASE+4)
+#define V4L2_CID_MPEG_STREAM_PES_ID_AUDIO	(V4L2_CID_CODEC_BASE+5)
+#define V4L2_CID_MPEG_STREAM_PES_ID_VIDEO	(V4L2_CID_CODEC_BASE+6)
+#define V4L2_CID_MPEG_STREAM_VBI_FMT		(V4L2_CID_CODEC_BASE+7)
 enum v4l2_mpeg_stream_vbi_fmt {
 	V4L2_MPEG_STREAM_VBI_FMT_NONE = 0,  /* No VBI in the MPEG stream */
 	V4L2_MPEG_STREAM_VBI_FMT_IVTV = 1,  /* VBI in private packets, IVTV format */
 };
 
 /*  MPEG audio controls specific to multiplexed streams  */
-#define V4L2_CID_MPEG_AUDIO_SAMPLING_FREQ	(V4L2_CID_MPEG_BASE+100)
+#define V4L2_CID_MPEG_AUDIO_SAMPLING_FREQ	(V4L2_CID_CODEC_BASE+100)
 enum v4l2_mpeg_audio_sampling_freq {
 	V4L2_MPEG_AUDIO_SAMPLING_FREQ_44100 = 0,
 	V4L2_MPEG_AUDIO_SAMPLING_FREQ_48000 = 1,
 	V4L2_MPEG_AUDIO_SAMPLING_FREQ_32000 = 2,
 };
-#define V4L2_CID_MPEG_AUDIO_ENCODING		(V4L2_CID_MPEG_BASE+101)
+#define V4L2_CID_MPEG_AUDIO_ENCODING		(V4L2_CID_CODEC_BASE+101)
 enum v4l2_mpeg_audio_encoding {
 	V4L2_MPEG_AUDIO_ENCODING_LAYER_1 = 0,
 	V4L2_MPEG_AUDIO_ENCODING_LAYER_2 = 1,
@@ -248,7 +248,7 @@ enum v4l2_mpeg_audio_encoding {
 	V4L2_MPEG_AUDIO_ENCODING_AAC     = 3,
 	V4L2_MPEG_AUDIO_ENCODING_AC3     = 4,
 };
-#define V4L2_CID_MPEG_AUDIO_L1_BITRATE		(V4L2_CID_MPEG_BASE+102)
+#define V4L2_CID_MPEG_AUDIO_L1_BITRATE		(V4L2_CID_CODEC_BASE+102)
 enum v4l2_mpeg_audio_l1_bitrate {
 	V4L2_MPEG_AUDIO_L1_BITRATE_32K  = 0,
 	V4L2_MPEG_AUDIO_L1_BITRATE_64K  = 1,
@@ -265,7 +265,7 @@ enum v4l2_mpeg_audio_l1_bitrate {
 	V4L2_MPEG_AUDIO_L1_BITRATE_416K = 12,
 	V4L2_MPEG_AUDIO_L1_BITRATE_448K = 13,
 };
-#define V4L2_CID_MPEG_AUDIO_L2_BITRATE		(V4L2_CID_MPEG_BASE+103)
+#define V4L2_CID_MPEG_AUDIO_L2_BITRATE		(V4L2_CID_CODEC_BASE+103)
 enum v4l2_mpeg_audio_l2_bitrate {
 	V4L2_MPEG_AUDIO_L2_BITRATE_32K  = 0,
 	V4L2_MPEG_AUDIO_L2_BITRATE_48K  = 1,
@@ -282,7 +282,7 @@ enum v4l2_mpeg_audio_l2_bitrate {
 	V4L2_MPEG_AUDIO_L2_BITRATE_320K = 12,
 	V4L2_MPEG_AUDIO_L2_BITRATE_384K = 13,
 };
-#define V4L2_CID_MPEG_AUDIO_L3_BITRATE		(V4L2_CID_MPEG_BASE+104)
+#define V4L2_CID_MPEG_AUDIO_L3_BITRATE		(V4L2_CID_CODEC_BASE+104)
 enum v4l2_mpeg_audio_l3_bitrate {
 	V4L2_MPEG_AUDIO_L3_BITRATE_32K  = 0,
 	V4L2_MPEG_AUDIO_L3_BITRATE_40K  = 1,
@@ -299,34 +299,34 @@ enum v4l2_mpeg_audio_l3_bitrate {
 	V4L2_MPEG_AUDIO_L3_BITRATE_256K = 12,
 	V4L2_MPEG_AUDIO_L3_BITRATE_320K = 13,
 };
-#define V4L2_CID_MPEG_AUDIO_MODE		(V4L2_CID_MPEG_BASE+105)
+#define V4L2_CID_MPEG_AUDIO_MODE		(V4L2_CID_CODEC_BASE+105)
 enum v4l2_mpeg_audio_mode {
 	V4L2_MPEG_AUDIO_MODE_STEREO       = 0,
 	V4L2_MPEG_AUDIO_MODE_JOINT_STEREO = 1,
 	V4L2_MPEG_AUDIO_MODE_DUAL         = 2,
 	V4L2_MPEG_AUDIO_MODE_MONO         = 3,
 };
-#define V4L2_CID_MPEG_AUDIO_MODE_EXTENSION	(V4L2_CID_MPEG_BASE+106)
+#define V4L2_CID_MPEG_AUDIO_MODE_EXTENSION	(V4L2_CID_CODEC_BASE+106)
 enum v4l2_mpeg_audio_mode_extension {
 	V4L2_MPEG_AUDIO_MODE_EXTENSION_BOUND_4  = 0,
 	V4L2_MPEG_AUDIO_MODE_EXTENSION_BOUND_8  = 1,
 	V4L2_MPEG_AUDIO_MODE_EXTENSION_BOUND_12 = 2,
 	V4L2_MPEG_AUDIO_MODE_EXTENSION_BOUND_16 = 3,
 };
-#define V4L2_CID_MPEG_AUDIO_EMPHASIS		(V4L2_CID_MPEG_BASE+107)
+#define V4L2_CID_MPEG_AUDIO_EMPHASIS		(V4L2_CID_CODEC_BASE+107)
 enum v4l2_mpeg_audio_emphasis {
 	V4L2_MPEG_AUDIO_EMPHASIS_NONE         = 0,
 	V4L2_MPEG_AUDIO_EMPHASIS_50_DIV_15_uS = 1,
 	V4L2_MPEG_AUDIO_EMPHASIS_CCITT_J17    = 2,
 };
-#define V4L2_CID_MPEG_AUDIO_CRC			(V4L2_CID_MPEG_BASE+108)
+#define V4L2_CID_MPEG_AUDIO_CRC			(V4L2_CID_CODEC_BASE+108)
 enum v4l2_mpeg_audio_crc {
 	V4L2_MPEG_AUDIO_CRC_NONE  = 0,
 	V4L2_MPEG_AUDIO_CRC_CRC16 = 1,
 };
-#define V4L2_CID_MPEG_AUDIO_MUTE		(V4L2_CID_MPEG_BASE+109)
-#define V4L2_CID_MPEG_AUDIO_AAC_BITRATE		(V4L2_CID_MPEG_BASE+110)
-#define V4L2_CID_MPEG_AUDIO_AC3_BITRATE		(V4L2_CID_MPEG_BASE+111)
+#define V4L2_CID_MPEG_AUDIO_MUTE		(V4L2_CID_CODEC_BASE+109)
+#define V4L2_CID_MPEG_AUDIO_AAC_BITRATE		(V4L2_CID_CODEC_BASE+110)
+#define V4L2_CID_MPEG_AUDIO_AC3_BITRATE		(V4L2_CID_CODEC_BASE+111)
 enum v4l2_mpeg_audio_ac3_bitrate {
 	V4L2_MPEG_AUDIO_AC3_BITRATE_32K  = 0,
 	V4L2_MPEG_AUDIO_AC3_BITRATE_40K  = 1,
@@ -348,7 +348,7 @@ enum v4l2_mpeg_audio_ac3_bitrate {
 	V4L2_MPEG_AUDIO_AC3_BITRATE_576K = 17,
 	V4L2_MPEG_AUDIO_AC3_BITRATE_640K = 18,
 };
-#define V4L2_CID_MPEG_AUDIO_DEC_PLAYBACK	(V4L2_CID_MPEG_BASE+112)
+#define V4L2_CID_MPEG_AUDIO_DEC_PLAYBACK	(V4L2_CID_CODEC_BASE+112)
 enum v4l2_mpeg_audio_dec_playback {
 	V4L2_MPEG_AUDIO_DEC_PLAYBACK_AUTO	    = 0,
 	V4L2_MPEG_AUDIO_DEC_PLAYBACK_STEREO	    = 1,
@@ -357,52 +357,52 @@ enum v4l2_mpeg_audio_dec_playback {
 	V4L2_MPEG_AUDIO_DEC_PLAYBACK_MONO	    = 4,
 	V4L2_MPEG_AUDIO_DEC_PLAYBACK_SWAPPED_STEREO = 5,
 };
-#define V4L2_CID_MPEG_AUDIO_DEC_MULTILINGUAL_PLAYBACK (V4L2_CID_MPEG_BASE+113)
+#define V4L2_CID_MPEG_AUDIO_DEC_MULTILINGUAL_PLAYBACK (V4L2_CID_CODEC_BASE+113)
 
 /*  MPEG video controls specific to multiplexed streams */
-#define V4L2_CID_MPEG_VIDEO_ENCODING		(V4L2_CID_MPEG_BASE+200)
+#define V4L2_CID_MPEG_VIDEO_ENCODING		(V4L2_CID_CODEC_BASE+200)
 enum v4l2_mpeg_video_encoding {
 	V4L2_MPEG_VIDEO_ENCODING_MPEG_1     = 0,
 	V4L2_MPEG_VIDEO_ENCODING_MPEG_2     = 1,
 	V4L2_MPEG_VIDEO_ENCODING_MPEG_4_AVC = 2,
 };
-#define V4L2_CID_MPEG_VIDEO_ASPECT		(V4L2_CID_MPEG_BASE+201)
+#define V4L2_CID_MPEG_VIDEO_ASPECT		(V4L2_CID_CODEC_BASE+201)
 enum v4l2_mpeg_video_aspect {
 	V4L2_MPEG_VIDEO_ASPECT_1x1     = 0,
 	V4L2_MPEG_VIDEO_ASPECT_4x3     = 1,
 	V4L2_MPEG_VIDEO_ASPECT_16x9    = 2,
 	V4L2_MPEG_VIDEO_ASPECT_221x100 = 3,
 };
-#define V4L2_CID_MPEG_VIDEO_B_FRAMES		(V4L2_CID_MPEG_BASE+202)
-#define V4L2_CID_MPEG_VIDEO_GOP_SIZE		(V4L2_CID_MPEG_BASE+203)
-#define V4L2_CID_MPEG_VIDEO_GOP_CLOSURE		(V4L2_CID_MPEG_BASE+204)
-#define V4L2_CID_MPEG_VIDEO_PULLDOWN		(V4L2_CID_MPEG_BASE+205)
-#define V4L2_CID_MPEG_VIDEO_BITRATE_MODE	(V4L2_CID_MPEG_BASE+206)
+#define V4L2_CID_MPEG_VIDEO_B_FRAMES		(V4L2_CID_CODEC_BASE+202)
+#define V4L2_CID_MPEG_VIDEO_GOP_SIZE		(V4L2_CID_CODEC_BASE+203)
+#define V4L2_CID_MPEG_VIDEO_GOP_CLOSURE		(V4L2_CID_CODEC_BASE+204)
+#define V4L2_CID_MPEG_VIDEO_PULLDOWN		(V4L2_CID_CODEC_BASE+205)
+#define V4L2_CID_MPEG_VIDEO_BITRATE_MODE	(V4L2_CID_CODEC_BASE+206)
 enum v4l2_mpeg_video_bitrate_mode {
 	V4L2_MPEG_VIDEO_BITRATE_MODE_VBR = 0,
 	V4L2_MPEG_VIDEO_BITRATE_MODE_CBR = 1,
 	V4L2_MPEG_VIDEO_BITRATE_MODE_CQ  = 2,
 };
-#define V4L2_CID_MPEG_VIDEO_BITRATE		(V4L2_CID_MPEG_BASE+207)
-#define V4L2_CID_MPEG_VIDEO_BITRATE_PEAK	(V4L2_CID_MPEG_BASE+208)
-#define V4L2_CID_MPEG_VIDEO_TEMPORAL_DECIMATION (V4L2_CID_MPEG_BASE+209)
-#define V4L2_CID_MPEG_VIDEO_MUTE		(V4L2_CID_MPEG_BASE+210)
-#define V4L2_CID_MPEG_VIDEO_MUTE_YUV		(V4L2_CID_MPEG_BASE+211)
-#define V4L2_CID_MPEG_VIDEO_DECODER_SLICE_INTERFACE		(V4L2_CID_MPEG_BASE+212)
-#define V4L2_CID_MPEG_VIDEO_DECODER_MPEG4_DEBLOCK_FILTER	(V4L2_CID_MPEG_BASE+213)
-#define V4L2_CID_MPEG_VIDEO_CYCLIC_INTRA_REFRESH_MB		(V4L2_CID_MPEG_BASE+214)
-#define V4L2_CID_MPEG_VIDEO_FRAME_RC_ENABLE			(V4L2_CID_MPEG_BASE+215)
-#define V4L2_CID_MPEG_VIDEO_HEADER_MODE				(V4L2_CID_MPEG_BASE+216)
+#define V4L2_CID_MPEG_VIDEO_BITRATE		(V4L2_CID_CODEC_BASE+207)
+#define V4L2_CID_MPEG_VIDEO_BITRATE_PEAK	(V4L2_CID_CODEC_BASE+208)
+#define V4L2_CID_MPEG_VIDEO_TEMPORAL_DECIMATION (V4L2_CID_CODEC_BASE+209)
+#define V4L2_CID_MPEG_VIDEO_MUTE		(V4L2_CID_CODEC_BASE+210)
+#define V4L2_CID_MPEG_VIDEO_MUTE_YUV		(V4L2_CID_CODEC_BASE+211)
+#define V4L2_CID_MPEG_VIDEO_DECODER_SLICE_INTERFACE		(V4L2_CID_CODEC_BASE+212)
+#define V4L2_CID_MPEG_VIDEO_DECODER_MPEG4_DEBLOCK_FILTER	(V4L2_CID_CODEC_BASE+213)
+#define V4L2_CID_MPEG_VIDEO_CYCLIC_INTRA_REFRESH_MB		(V4L2_CID_CODEC_BASE+214)
+#define V4L2_CID_MPEG_VIDEO_FRAME_RC_ENABLE			(V4L2_CID_CODEC_BASE+215)
+#define V4L2_CID_MPEG_VIDEO_HEADER_MODE				(V4L2_CID_CODEC_BASE+216)
 enum v4l2_mpeg_video_header_mode {
 	V4L2_MPEG_VIDEO_HEADER_MODE_SEPARATE			= 0,
 	V4L2_MPEG_VIDEO_HEADER_MODE_JOINED_WITH_1ST_FRAME	= 1,
 
 };
-#define V4L2_CID_MPEG_VIDEO_MAX_REF_PIC			(V4L2_CID_MPEG_BASE+217)
-#define V4L2_CID_MPEG_VIDEO_MB_RC_ENABLE		(V4L2_CID_MPEG_BASE+218)
-#define V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MAX_BYTES	(V4L2_CID_MPEG_BASE+219)
-#define V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MAX_MB		(V4L2_CID_MPEG_BASE+220)
-#define V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MODE		(V4L2_CID_MPEG_BASE+221)
+#define V4L2_CID_MPEG_VIDEO_MAX_REF_PIC			(V4L2_CID_CODEC_BASE+217)
+#define V4L2_CID_MPEG_VIDEO_MB_RC_ENABLE		(V4L2_CID_CODEC_BASE+218)
+#define V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MAX_BYTES	(V4L2_CID_CODEC_BASE+219)
+#define V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MAX_MB		(V4L2_CID_CODEC_BASE+220)
+#define V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MODE		(V4L2_CID_CODEC_BASE+221)
 enum v4l2_mpeg_video_multi_slice_mode {
 	V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_SINGLE		= 0,
 	V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_MAX_MB		= 1,
@@ -413,24 +413,24 @@ enum v4l2_mpeg_video_multi_slice_mode {
 	V4L2_MPEG_VIDEO_MULTI_SICE_MODE_MAX_BYTES	= 2,
 #endif
 };
-#define V4L2_CID_MPEG_VIDEO_VBV_SIZE			(V4L2_CID_MPEG_BASE+222)
-#define V4L2_CID_MPEG_VIDEO_DEC_PTS			(V4L2_CID_MPEG_BASE+223)
-#define V4L2_CID_MPEG_VIDEO_DEC_FRAME			(V4L2_CID_MPEG_BASE+224)
-#define V4L2_CID_MPEG_VIDEO_VBV_DELAY			(V4L2_CID_MPEG_BASE+225)
-#define V4L2_CID_MPEG_VIDEO_REPEAT_SEQ_HEADER		(V4L2_CID_MPEG_BASE+226)
-#define V4L2_CID_MPEG_VIDEO_MV_H_SEARCH_RANGE		(V4L2_CID_MPEG_BASE+227)
-#define V4L2_CID_MPEG_VIDEO_MV_V_SEARCH_RANGE		(V4L2_CID_MPEG_BASE+228)
-#define V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME		(V4L2_CID_MPEG_BASE+229)
+#define V4L2_CID_MPEG_VIDEO_VBV_SIZE			(V4L2_CID_CODEC_BASE+222)
+#define V4L2_CID_MPEG_VIDEO_DEC_PTS			(V4L2_CID_CODEC_BASE+223)
+#define V4L2_CID_MPEG_VIDEO_DEC_FRAME			(V4L2_CID_CODEC_BASE+224)
+#define V4L2_CID_MPEG_VIDEO_VBV_DELAY			(V4L2_CID_CODEC_BASE+225)
+#define V4L2_CID_MPEG_VIDEO_REPEAT_SEQ_HEADER		(V4L2_CID_CODEC_BASE+226)
+#define V4L2_CID_MPEG_VIDEO_MV_H_SEARCH_RANGE		(V4L2_CID_CODEC_BASE+227)
+#define V4L2_CID_MPEG_VIDEO_MV_V_SEARCH_RANGE		(V4L2_CID_CODEC_BASE+228)
+#define V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME		(V4L2_CID_CODEC_BASE+229)
 
 /* CIDs for the MPEG-2 Part 2 (H.262) codec */
-#define V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL			(V4L2_CID_MPEG_BASE+270)
+#define V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL			(V4L2_CID_CODEC_BASE+270)
 enum v4l2_mpeg_video_mpeg2_level {
 	V4L2_MPEG_VIDEO_MPEG2_LEVEL_LOW		= 0,
 	V4L2_MPEG_VIDEO_MPEG2_LEVEL_MAIN	= 1,
 	V4L2_MPEG_VIDEO_MPEG2_LEVEL_HIGH_1440	= 2,
 	V4L2_MPEG_VIDEO_MPEG2_LEVEL_HIGH	= 3,
 };
-#define V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE		(V4L2_CID_MPEG_BASE+271)
+#define V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE		(V4L2_CID_CODEC_BASE+271)
 enum v4l2_mpeg_video_mpeg2_profile {
 	V4L2_MPEG_VIDEO_MPEG2_PROFILE_SIMPLE				= 0,
 	V4L2_MPEG_VIDEO_MPEG2_PROFILE_MAIN				= 1,
@@ -441,28 +441,28 @@ enum v4l2_mpeg_video_mpeg2_profile {
 };
 
 /* CIDs for the FWHT codec as used by the vicodec driver. */
-#define V4L2_CID_FWHT_I_FRAME_QP             (V4L2_CID_MPEG_BASE + 290)
-#define V4L2_CID_FWHT_P_FRAME_QP             (V4L2_CID_MPEG_BASE + 291)
-
-#define V4L2_CID_MPEG_VIDEO_H263_I_FRAME_QP		(V4L2_CID_MPEG_BASE+300)
-#define V4L2_CID_MPEG_VIDEO_H263_P_FRAME_QP		(V4L2_CID_MPEG_BASE+301)
-#define V4L2_CID_MPEG_VIDEO_H263_B_FRAME_QP		(V4L2_CID_MPEG_BASE+302)
-#define V4L2_CID_MPEG_VIDEO_H263_MIN_QP			(V4L2_CID_MPEG_BASE+303)
-#define V4L2_CID_MPEG_VIDEO_H263_MAX_QP			(V4L2_CID_MPEG_BASE+304)
-#define V4L2_CID_MPEG_VIDEO_H264_I_FRAME_QP		(V4L2_CID_MPEG_BASE+350)
-#define V4L2_CID_MPEG_VIDEO_H264_P_FRAME_QP		(V4L2_CID_MPEG_BASE+351)
-#define V4L2_CID_MPEG_VIDEO_H264_B_FRAME_QP		(V4L2_CID_MPEG_BASE+352)
-#define V4L2_CID_MPEG_VIDEO_H264_MIN_QP			(V4L2_CID_MPEG_BASE+353)
-#define V4L2_CID_MPEG_VIDEO_H264_MAX_QP			(V4L2_CID_MPEG_BASE+354)
-#define V4L2_CID_MPEG_VIDEO_H264_8X8_TRANSFORM		(V4L2_CID_MPEG_BASE+355)
-#define V4L2_CID_MPEG_VIDEO_H264_CPB_SIZE		(V4L2_CID_MPEG_BASE+356)
-#define V4L2_CID_MPEG_VIDEO_H264_ENTROPY_MODE		(V4L2_CID_MPEG_BASE+357)
+#define V4L2_CID_FWHT_I_FRAME_QP             (V4L2_CID_CODEC_BASE + 290)
+#define V4L2_CID_FWHT_P_FRAME_QP             (V4L2_CID_CODEC_BASE + 291)
+
+#define V4L2_CID_MPEG_VIDEO_H263_I_FRAME_QP		(V4L2_CID_CODEC_BASE+300)
+#define V4L2_CID_MPEG_VIDEO_H263_P_FRAME_QP		(V4L2_CID_CODEC_BASE+301)
+#define V4L2_CID_MPEG_VIDEO_H263_B_FRAME_QP		(V4L2_CID_CODEC_BASE+302)
+#define V4L2_CID_MPEG_VIDEO_H263_MIN_QP			(V4L2_CID_CODEC_BASE+303)
+#define V4L2_CID_MPEG_VIDEO_H263_MAX_QP			(V4L2_CID_CODEC_BASE+304)
+#define V4L2_CID_MPEG_VIDEO_H264_I_FRAME_QP		(V4L2_CID_CODEC_BASE+350)
+#define V4L2_CID_MPEG_VIDEO_H264_P_FRAME_QP		(V4L2_CID_CODEC_BASE+351)
+#define V4L2_CID_MPEG_VIDEO_H264_B_FRAME_QP		(V4L2_CID_CODEC_BASE+352)
+#define V4L2_CID_MPEG_VIDEO_H264_MIN_QP			(V4L2_CID_CODEC_BASE+353)
+#define V4L2_CID_MPEG_VIDEO_H264_MAX_QP			(V4L2_CID_CODEC_BASE+354)
+#define V4L2_CID_MPEG_VIDEO_H264_8X8_TRANSFORM		(V4L2_CID_CODEC_BASE+355)
+#define V4L2_CID_MPEG_VIDEO_H264_CPB_SIZE		(V4L2_CID_CODEC_BASE+356)
+#define V4L2_CID_MPEG_VIDEO_H264_ENTROPY_MODE		(V4L2_CID_CODEC_BASE+357)
 enum v4l2_mpeg_video_h264_entropy_mode {
 	V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CAVLC	= 0,
 	V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CABAC	= 1,
 };
-#define V4L2_CID_MPEG_VIDEO_H264_I_PERIOD		(V4L2_CID_MPEG_BASE+358)
-#define V4L2_CID_MPEG_VIDEO_H264_LEVEL			(V4L2_CID_MPEG_BASE+359)
+#define V4L2_CID_MPEG_VIDEO_H264_I_PERIOD		(V4L2_CID_CODEC_BASE+358)
+#define V4L2_CID_MPEG_VIDEO_H264_LEVEL			(V4L2_CID_CODEC_BASE+359)
 enum v4l2_mpeg_video_h264_level {
 	V4L2_MPEG_VIDEO_H264_LEVEL_1_0	= 0,
 	V4L2_MPEG_VIDEO_H264_LEVEL_1B	= 1,
@@ -485,15 +485,15 @@ enum v4l2_mpeg_video_h264_level {
 	V4L2_MPEG_VIDEO_H264_LEVEL_6_1	= 18,
 	V4L2_MPEG_VIDEO_H264_LEVEL_6_2	= 19,
 };
-#define V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_ALPHA	(V4L2_CID_MPEG_BASE+360)
-#define V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_BETA	(V4L2_CID_MPEG_BASE+361)
-#define V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_MODE	(V4L2_CID_MPEG_BASE+362)
+#define V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_ALPHA	(V4L2_CID_CODEC_BASE+360)
+#define V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_BETA	(V4L2_CID_CODEC_BASE+361)
+#define V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_MODE	(V4L2_CID_CODEC_BASE+362)
 enum v4l2_mpeg_video_h264_loop_filter_mode {
 	V4L2_MPEG_VIDEO_H264_LOOP_FILTER_MODE_ENABLED				= 0,
 	V4L2_MPEG_VIDEO_H264_LOOP_FILTER_MODE_DISABLED				= 1,
 	V4L2_MPEG_VIDEO_H264_LOOP_FILTER_MODE_DISABLED_AT_SLICE_BOUNDARY	= 2,
 };
-#define V4L2_CID_MPEG_VIDEO_H264_PROFILE		(V4L2_CID_MPEG_BASE+363)
+#define V4L2_CID_MPEG_VIDEO_H264_PROFILE		(V4L2_CID_CODEC_BASE+363)
 enum v4l2_mpeg_video_h264_profile {
 	V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE			= 0,
 	V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_BASELINE	= 1,
@@ -514,10 +514,10 @@ enum v4l2_mpeg_video_h264_profile {
 	V4L2_MPEG_VIDEO_H264_PROFILE_MULTIVIEW_HIGH		= 16,
 	V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_HIGH		= 17,
 };
-#define V4L2_CID_MPEG_VIDEO_H264_VUI_EXT_SAR_HEIGHT	(V4L2_CID_MPEG_BASE+364)
-#define V4L2_CID_MPEG_VIDEO_H264_VUI_EXT_SAR_WIDTH	(V4L2_CID_MPEG_BASE+365)
-#define V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_ENABLE		(V4L2_CID_MPEG_BASE+366)
-#define V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_IDC		(V4L2_CID_MPEG_BASE+367)
+#define V4L2_CID_MPEG_VIDEO_H264_VUI_EXT_SAR_HEIGHT	(V4L2_CID_CODEC_BASE+364)
+#define V4L2_CID_MPEG_VIDEO_H264_VUI_EXT_SAR_WIDTH	(V4L2_CID_CODEC_BASE+365)
+#define V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_ENABLE		(V4L2_CID_CODEC_BASE+366)
+#define V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_IDC		(V4L2_CID_CODEC_BASE+367)
 enum v4l2_mpeg_video_h264_vui_sar_idc {
 	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_UNSPECIFIED	= 0,
 	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_1x1		= 1,
@@ -538,9 +538,9 @@ enum v4l2_mpeg_video_h264_vui_sar_idc {
 	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_2x1		= 16,
 	V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_EXTENDED	= 17,
 };
-#define V4L2_CID_MPEG_VIDEO_H264_SEI_FRAME_PACKING		(V4L2_CID_MPEG_BASE+368)
-#define V4L2_CID_MPEG_VIDEO_H264_SEI_FP_CURRENT_FRAME_0		(V4L2_CID_MPEG_BASE+369)
-#define V4L2_CID_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE	(V4L2_CID_MPEG_BASE+370)
+#define V4L2_CID_MPEG_VIDEO_H264_SEI_FRAME_PACKING		(V4L2_CID_CODEC_BASE+368)
+#define V4L2_CID_MPEG_VIDEO_H264_SEI_FP_CURRENT_FRAME_0		(V4L2_CID_CODEC_BASE+369)
+#define V4L2_CID_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE	(V4L2_CID_CODEC_BASE+370)
 enum v4l2_mpeg_video_h264_sei_fp_arrangement_type {
 	V4L2_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE_CHECKERBOARD	= 0,
 	V4L2_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE_COLUMN		= 1,
@@ -549,8 +549,8 @@ enum v4l2_mpeg_video_h264_sei_fp_arrangement_type {
 	V4L2_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE_TOP_BOTTOM		= 4,
 	V4L2_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE_TEMPORAL		= 5,
 };
-#define V4L2_CID_MPEG_VIDEO_H264_FMO			(V4L2_CID_MPEG_BASE+371)
-#define V4L2_CID_MPEG_VIDEO_H264_FMO_MAP_TYPE		(V4L2_CID_MPEG_BASE+372)
+#define V4L2_CID_MPEG_VIDEO_H264_FMO			(V4L2_CID_CODEC_BASE+371)
+#define V4L2_CID_MPEG_VIDEO_H264_FMO_MAP_TYPE		(V4L2_CID_CODEC_BASE+372)
 enum v4l2_mpeg_video_h264_fmo_map_type {
 	V4L2_MPEG_VIDEO_H264_FMO_MAP_TYPE_INTERLEAVED_SLICES		= 0,
 	V4L2_MPEG_VIDEO_H264_FMO_MAP_TYPE_SCATTERED_SLICES		= 1,
@@ -560,36 +560,36 @@ enum v4l2_mpeg_video_h264_fmo_map_type {
 	V4L2_MPEG_VIDEO_H264_FMO_MAP_TYPE_WIPE_SCAN			= 5,
 	V4L2_MPEG_VIDEO_H264_FMO_MAP_TYPE_EXPLICIT			= 6,
 };
-#define V4L2_CID_MPEG_VIDEO_H264_FMO_SLICE_GROUP	(V4L2_CID_MPEG_BASE+373)
-#define V4L2_CID_MPEG_VIDEO_H264_FMO_CHANGE_DIRECTION	(V4L2_CID_MPEG_BASE+374)
+#define V4L2_CID_MPEG_VIDEO_H264_FMO_SLICE_GROUP	(V4L2_CID_CODEC_BASE+373)
+#define V4L2_CID_MPEG_VIDEO_H264_FMO_CHANGE_DIRECTION	(V4L2_CID_CODEC_BASE+374)
 enum v4l2_mpeg_video_h264_fmo_change_dir {
 	V4L2_MPEG_VIDEO_H264_FMO_CHANGE_DIR_RIGHT	= 0,
 	V4L2_MPEG_VIDEO_H264_FMO_CHANGE_DIR_LEFT	= 1,
 };
-#define V4L2_CID_MPEG_VIDEO_H264_FMO_CHANGE_RATE	(V4L2_CID_MPEG_BASE+375)
-#define V4L2_CID_MPEG_VIDEO_H264_FMO_RUN_LENGTH		(V4L2_CID_MPEG_BASE+376)
-#define V4L2_CID_MPEG_VIDEO_H264_ASO			(V4L2_CID_MPEG_BASE+377)
-#define V4L2_CID_MPEG_VIDEO_H264_ASO_SLICE_ORDER	(V4L2_CID_MPEG_BASE+378)
-#define V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING		(V4L2_CID_MPEG_BASE+379)
-#define V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_TYPE	(V4L2_CID_MPEG_BASE+380)
+#define V4L2_CID_MPEG_VIDEO_H264_FMO_CHANGE_RATE	(V4L2_CID_CODEC_BASE+375)
+#define V4L2_CID_MPEG_VIDEO_H264_FMO_RUN_LENGTH		(V4L2_CID_CODEC_BASE+376)
+#define V4L2_CID_MPEG_VIDEO_H264_ASO			(V4L2_CID_CODEC_BASE+377)
+#define V4L2_CID_MPEG_VIDEO_H264_ASO_SLICE_ORDER	(V4L2_CID_CODEC_BASE+378)
+#define V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING		(V4L2_CID_CODEC_BASE+379)
+#define V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_TYPE	(V4L2_CID_CODEC_BASE+380)
 enum v4l2_mpeg_video_h264_hierarchical_coding_type {
 	V4L2_MPEG_VIDEO_H264_HIERARCHICAL_CODING_B	= 0,
 	V4L2_MPEG_VIDEO_H264_HIERARCHICAL_CODING_P	= 1,
 };
-#define V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_LAYER	(V4L2_CID_MPEG_BASE+381)
-#define V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_LAYER_QP	(V4L2_CID_MPEG_BASE+382)
-#define V4L2_CID_MPEG_VIDEO_H264_CONSTRAINED_INTRA_PREDICTION	(V4L2_CID_MPEG_BASE+383)
-#define V4L2_CID_MPEG_VIDEO_H264_CHROMA_QP_INDEX_OFFSET		(V4L2_CID_MPEG_BASE+384)
-#define V4L2_CID_MPEG_VIDEO_H264_I_FRAME_MIN_QP	(V4L2_CID_MPEG_BASE+385)
-#define V4L2_CID_MPEG_VIDEO_H264_I_FRAME_MAX_QP	(V4L2_CID_MPEG_BASE+386)
-#define V4L2_CID_MPEG_VIDEO_H264_P_FRAME_MIN_QP	(V4L2_CID_MPEG_BASE+387)
-#define V4L2_CID_MPEG_VIDEO_H264_P_FRAME_MAX_QP	(V4L2_CID_MPEG_BASE+388)
-#define V4L2_CID_MPEG_VIDEO_MPEG4_I_FRAME_QP	(V4L2_CID_MPEG_BASE+400)
-#define V4L2_CID_MPEG_VIDEO_MPEG4_P_FRAME_QP	(V4L2_CID_MPEG_BASE+401)
-#define V4L2_CID_MPEG_VIDEO_MPEG4_B_FRAME_QP	(V4L2_CID_MPEG_BASE+402)
-#define V4L2_CID_MPEG_VIDEO_MPEG4_MIN_QP	(V4L2_CID_MPEG_BASE+403)
-#define V4L2_CID_MPEG_VIDEO_MPEG4_MAX_QP	(V4L2_CID_MPEG_BASE+404)
-#define V4L2_CID_MPEG_VIDEO_MPEG4_LEVEL		(V4L2_CID_MPEG_BASE+405)
+#define V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_LAYER	(V4L2_CID_CODEC_BASE+381)
+#define V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_LAYER_QP	(V4L2_CID_CODEC_BASE+382)
+#define V4L2_CID_MPEG_VIDEO_H264_CONSTRAINED_INTRA_PREDICTION	(V4L2_CID_CODEC_BASE+383)
+#define V4L2_CID_MPEG_VIDEO_H264_CHROMA_QP_INDEX_OFFSET		(V4L2_CID_CODEC_BASE+384)
+#define V4L2_CID_MPEG_VIDEO_H264_I_FRAME_MIN_QP	(V4L2_CID_CODEC_BASE+385)
+#define V4L2_CID_MPEG_VIDEO_H264_I_FRAME_MAX_QP	(V4L2_CID_CODEC_BASE+386)
+#define V4L2_CID_MPEG_VIDEO_H264_P_FRAME_MIN_QP	(V4L2_CID_CODEC_BASE+387)
+#define V4L2_CID_MPEG_VIDEO_H264_P_FRAME_MAX_QP	(V4L2_CID_CODEC_BASE+388)
+#define V4L2_CID_MPEG_VIDEO_MPEG4_I_FRAME_QP	(V4L2_CID_CODEC_BASE+400)
+#define V4L2_CID_MPEG_VIDEO_MPEG4_P_FRAME_QP	(V4L2_CID_CODEC_BASE+401)
+#define V4L2_CID_MPEG_VIDEO_MPEG4_B_FRAME_QP	(V4L2_CID_CODEC_BASE+402)
+#define V4L2_CID_MPEG_VIDEO_MPEG4_MIN_QP	(V4L2_CID_CODEC_BASE+403)
+#define V4L2_CID_MPEG_VIDEO_MPEG4_MAX_QP	(V4L2_CID_CODEC_BASE+404)
+#define V4L2_CID_MPEG_VIDEO_MPEG4_LEVEL		(V4L2_CID_CODEC_BASE+405)
 enum v4l2_mpeg_video_mpeg4_level {
 	V4L2_MPEG_VIDEO_MPEG4_LEVEL_0	= 0,
 	V4L2_MPEG_VIDEO_MPEG4_LEVEL_0B	= 1,
@@ -600,7 +600,7 @@ enum v4l2_mpeg_video_mpeg4_level {
 	V4L2_MPEG_VIDEO_MPEG4_LEVEL_4	= 6,
 	V4L2_MPEG_VIDEO_MPEG4_LEVEL_5	= 7,
 };
-#define V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE	(V4L2_CID_MPEG_BASE+406)
+#define V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE	(V4L2_CID_CODEC_BASE+406)
 enum v4l2_mpeg_video_mpeg4_profile {
 	V4L2_MPEG_VIDEO_MPEG4_PROFILE_SIMPLE				= 0,
 	V4L2_MPEG_VIDEO_MPEG4_PROFILE_ADVANCED_SIMPLE			= 1,
@@ -608,40 +608,40 @@ enum v4l2_mpeg_video_mpeg4_profile {
 	V4L2_MPEG_VIDEO_MPEG4_PROFILE_SIMPLE_SCALABLE			= 3,
 	V4L2_MPEG_VIDEO_MPEG4_PROFILE_ADVANCED_CODING_EFFICIENCY	= 4,
 };
-#define V4L2_CID_MPEG_VIDEO_MPEG4_QPEL		(V4L2_CID_MPEG_BASE+407)
+#define V4L2_CID_MPEG_VIDEO_MPEG4_QPEL		(V4L2_CID_CODEC_BASE+407)
 
 /*  Control IDs for VP8 streams
  *  Although VP8 is not part of MPEG we add these controls to the MPEG class
  *  as that class is already handling other video compression standards
  */
-#define V4L2_CID_MPEG_VIDEO_VPX_NUM_PARTITIONS		(V4L2_CID_MPEG_BASE+500)
+#define V4L2_CID_MPEG_VIDEO_VPX_NUM_PARTITIONS		(V4L2_CID_CODEC_BASE+500)
 enum v4l2_vp8_num_partitions {
 	V4L2_CID_MPEG_VIDEO_VPX_1_PARTITION	= 0,
 	V4L2_CID_MPEG_VIDEO_VPX_2_PARTITIONS	= 1,
 	V4L2_CID_MPEG_VIDEO_VPX_4_PARTITIONS	= 2,
 	V4L2_CID_MPEG_VIDEO_VPX_8_PARTITIONS	= 3,
 };
-#define V4L2_CID_MPEG_VIDEO_VPX_IMD_DISABLE_4X4		(V4L2_CID_MPEG_BASE+501)
-#define V4L2_CID_MPEG_VIDEO_VPX_NUM_REF_FRAMES		(V4L2_CID_MPEG_BASE+502)
+#define V4L2_CID_MPEG_VIDEO_VPX_IMD_DISABLE_4X4		(V4L2_CID_CODEC_BASE+501)
+#define V4L2_CID_MPEG_VIDEO_VPX_NUM_REF_FRAMES		(V4L2_CID_CODEC_BASE+502)
 enum v4l2_vp8_num_ref_frames {
 	V4L2_CID_MPEG_VIDEO_VPX_1_REF_FRAME	= 0,
 	V4L2_CID_MPEG_VIDEO_VPX_2_REF_FRAME	= 1,
 	V4L2_CID_MPEG_VIDEO_VPX_3_REF_FRAME	= 2,
 };
-#define V4L2_CID_MPEG_VIDEO_VPX_FILTER_LEVEL		(V4L2_CID_MPEG_BASE+503)
-#define V4L2_CID_MPEG_VIDEO_VPX_FILTER_SHARPNESS	(V4L2_CID_MPEG_BASE+504)
-#define V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_REF_PERIOD	(V4L2_CID_MPEG_BASE+505)
-#define V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_SEL	(V4L2_CID_MPEG_BASE+506)
+#define V4L2_CID_MPEG_VIDEO_VPX_FILTER_LEVEL		(V4L2_CID_CODEC_BASE+503)
+#define V4L2_CID_MPEG_VIDEO_VPX_FILTER_SHARPNESS	(V4L2_CID_CODEC_BASE+504)
+#define V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_REF_PERIOD	(V4L2_CID_CODEC_BASE+505)
+#define V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_SEL	(V4L2_CID_CODEC_BASE+506)
 enum v4l2_vp8_golden_frame_sel {
 	V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_USE_PREV		= 0,
 	V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_USE_REF_PERIOD	= 1,
 };
-#define V4L2_CID_MPEG_VIDEO_VPX_MIN_QP			(V4L2_CID_MPEG_BASE+507)
-#define V4L2_CID_MPEG_VIDEO_VPX_MAX_QP			(V4L2_CID_MPEG_BASE+508)
-#define V4L2_CID_MPEG_VIDEO_VPX_I_FRAME_QP		(V4L2_CID_MPEG_BASE+509)
-#define V4L2_CID_MPEG_VIDEO_VPX_P_FRAME_QP		(V4L2_CID_MPEG_BASE+510)
+#define V4L2_CID_MPEG_VIDEO_VPX_MIN_QP			(V4L2_CID_CODEC_BASE+507)
+#define V4L2_CID_MPEG_VIDEO_VPX_MAX_QP			(V4L2_CID_CODEC_BASE+508)
+#define V4L2_CID_MPEG_VIDEO_VPX_I_FRAME_QP		(V4L2_CID_CODEC_BASE+509)
+#define V4L2_CID_MPEG_VIDEO_VPX_P_FRAME_QP		(V4L2_CID_CODEC_BASE+510)
 
-#define V4L2_CID_MPEG_VIDEO_VP8_PROFILE			(V4L2_CID_MPEG_BASE+511)
+#define V4L2_CID_MPEG_VIDEO_VP8_PROFILE			(V4L2_CID_CODEC_BASE+511)
 enum v4l2_mpeg_video_vp8_profile {
 	V4L2_MPEG_VIDEO_VP8_PROFILE_0				= 0,
 	V4L2_MPEG_VIDEO_VP8_PROFILE_1				= 1,
@@ -650,14 +650,14 @@ enum v4l2_mpeg_video_vp8_profile {
 };
 /* Deprecated alias for compatibility reasons. */
 #define V4L2_CID_MPEG_VIDEO_VPX_PROFILE	V4L2_CID_MPEG_VIDEO_VP8_PROFILE
-#define V4L2_CID_MPEG_VIDEO_VP9_PROFILE			(V4L2_CID_MPEG_BASE+512)
+#define V4L2_CID_MPEG_VIDEO_VP9_PROFILE			(V4L2_CID_CODEC_BASE+512)
 enum v4l2_mpeg_video_vp9_profile {
 	V4L2_MPEG_VIDEO_VP9_PROFILE_0				= 0,
 	V4L2_MPEG_VIDEO_VP9_PROFILE_1				= 1,
 	V4L2_MPEG_VIDEO_VP9_PROFILE_2				= 2,
 	V4L2_MPEG_VIDEO_VP9_PROFILE_3				= 3,
 };
-#define V4L2_CID_MPEG_VIDEO_VP9_LEVEL			(V4L2_CID_MPEG_BASE+513)
+#define V4L2_CID_MPEG_VIDEO_VP9_LEVEL			(V4L2_CID_CODEC_BASE+513)
 enum v4l2_mpeg_video_vp9_level {
 	V4L2_MPEG_VIDEO_VP9_LEVEL_1_0	= 0,
 	V4L2_MPEG_VIDEO_VP9_LEVEL_1_1	= 1,
@@ -677,32 +677,32 @@ enum v4l2_mpeg_video_vp9_level {
 
 /* CIDs for HEVC encoding. */
 
-#define V4L2_CID_MPEG_VIDEO_HEVC_MIN_QP		(V4L2_CID_MPEG_BASE + 600)
-#define V4L2_CID_MPEG_VIDEO_HEVC_MAX_QP		(V4L2_CID_MPEG_BASE + 601)
-#define V4L2_CID_MPEG_VIDEO_HEVC_I_FRAME_QP	(V4L2_CID_MPEG_BASE + 602)
-#define V4L2_CID_MPEG_VIDEO_HEVC_P_FRAME_QP	(V4L2_CID_MPEG_BASE + 603)
-#define V4L2_CID_MPEG_VIDEO_HEVC_B_FRAME_QP	(V4L2_CID_MPEG_BASE + 604)
-#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_QP	(V4L2_CID_MPEG_BASE + 605)
-#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_TYPE (V4L2_CID_MPEG_BASE + 606)
+#define V4L2_CID_MPEG_VIDEO_HEVC_MIN_QP		(V4L2_CID_CODEC_BASE + 600)
+#define V4L2_CID_MPEG_VIDEO_HEVC_MAX_QP		(V4L2_CID_CODEC_BASE + 601)
+#define V4L2_CID_MPEG_VIDEO_HEVC_I_FRAME_QP	(V4L2_CID_CODEC_BASE + 602)
+#define V4L2_CID_MPEG_VIDEO_HEVC_P_FRAME_QP	(V4L2_CID_CODEC_BASE + 603)
+#define V4L2_CID_MPEG_VIDEO_HEVC_B_FRAME_QP	(V4L2_CID_CODEC_BASE + 604)
+#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_QP	(V4L2_CID_CODEC_BASE + 605)
+#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_TYPE (V4L2_CID_CODEC_BASE + 606)
 enum v4l2_mpeg_video_hevc_hier_coding_type {
 	V4L2_MPEG_VIDEO_HEVC_HIERARCHICAL_CODING_B	= 0,
 	V4L2_MPEG_VIDEO_HEVC_HIERARCHICAL_CODING_P	= 1,
 };
-#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_LAYER	(V4L2_CID_MPEG_BASE + 607)
-#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L0_QP	(V4L2_CID_MPEG_BASE + 608)
-#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L1_QP	(V4L2_CID_MPEG_BASE + 609)
-#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L2_QP	(V4L2_CID_MPEG_BASE + 610)
-#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L3_QP	(V4L2_CID_MPEG_BASE + 611)
-#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L4_QP	(V4L2_CID_MPEG_BASE + 612)
-#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L5_QP	(V4L2_CID_MPEG_BASE + 613)
-#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L6_QP	(V4L2_CID_MPEG_BASE + 614)
-#define V4L2_CID_MPEG_VIDEO_HEVC_PROFILE	(V4L2_CID_MPEG_BASE + 615)
+#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_LAYER	(V4L2_CID_CODEC_BASE + 607)
+#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L0_QP	(V4L2_CID_CODEC_BASE + 608)
+#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L1_QP	(V4L2_CID_CODEC_BASE + 609)
+#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L2_QP	(V4L2_CID_CODEC_BASE + 610)
+#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L3_QP	(V4L2_CID_CODEC_BASE + 611)
+#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L4_QP	(V4L2_CID_CODEC_BASE + 612)
+#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L5_QP	(V4L2_CID_CODEC_BASE + 613)
+#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L6_QP	(V4L2_CID_CODEC_BASE + 614)
+#define V4L2_CID_MPEG_VIDEO_HEVC_PROFILE	(V4L2_CID_CODEC_BASE + 615)
 enum v4l2_mpeg_video_hevc_profile {
 	V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN = 0,
 	V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN_STILL_PICTURE = 1,
 	V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN_10 = 2,
 };
-#define V4L2_CID_MPEG_VIDEO_HEVC_LEVEL		(V4L2_CID_MPEG_BASE + 616)
+#define V4L2_CID_MPEG_VIDEO_HEVC_LEVEL		(V4L2_CID_CODEC_BASE + 616)
 enum v4l2_mpeg_video_hevc_level {
 	V4L2_MPEG_VIDEO_HEVC_LEVEL_1	= 0,
 	V4L2_MPEG_VIDEO_HEVC_LEVEL_2	= 1,
@@ -718,56 +718,56 @@ enum v4l2_mpeg_video_hevc_level {
 	V4L2_MPEG_VIDEO_HEVC_LEVEL_6_1	= 11,
 	V4L2_MPEG_VIDEO_HEVC_LEVEL_6_2	= 12,
 };
-#define V4L2_CID_MPEG_VIDEO_HEVC_FRAME_RATE_RESOLUTION	(V4L2_CID_MPEG_BASE + 617)
-#define V4L2_CID_MPEG_VIDEO_HEVC_TIER			(V4L2_CID_MPEG_BASE + 618)
+#define V4L2_CID_MPEG_VIDEO_HEVC_FRAME_RATE_RESOLUTION	(V4L2_CID_CODEC_BASE + 617)
+#define V4L2_CID_MPEG_VIDEO_HEVC_TIER			(V4L2_CID_CODEC_BASE + 618)
 enum v4l2_mpeg_video_hevc_tier {
 	V4L2_MPEG_VIDEO_HEVC_TIER_MAIN = 0,
 	V4L2_MPEG_VIDEO_HEVC_TIER_HIGH = 1,
 };
-#define V4L2_CID_MPEG_VIDEO_HEVC_MAX_PARTITION_DEPTH	(V4L2_CID_MPEG_BASE + 619)
-#define V4L2_CID_MPEG_VIDEO_HEVC_LOOP_FILTER_MODE	(V4L2_CID_MPEG_BASE + 620)
+#define V4L2_CID_MPEG_VIDEO_HEVC_MAX_PARTITION_DEPTH	(V4L2_CID_CODEC_BASE + 619)
+#define V4L2_CID_MPEG_VIDEO_HEVC_LOOP_FILTER_MODE	(V4L2_CID_CODEC_BASE + 620)
 enum v4l2_cid_mpeg_video_hevc_loop_filter_mode {
 	V4L2_MPEG_VIDEO_HEVC_LOOP_FILTER_MODE_DISABLED			 = 0,
 	V4L2_MPEG_VIDEO_HEVC_LOOP_FILTER_MODE_ENABLED			 = 1,
 	V4L2_MPEG_VIDEO_HEVC_LOOP_FILTER_MODE_DISABLED_AT_SLICE_BOUNDARY = 2,
 };
-#define V4L2_CID_MPEG_VIDEO_HEVC_LF_BETA_OFFSET_DIV2	(V4L2_CID_MPEG_BASE + 621)
-#define V4L2_CID_MPEG_VIDEO_HEVC_LF_TC_OFFSET_DIV2	(V4L2_CID_MPEG_BASE + 622)
-#define V4L2_CID_MPEG_VIDEO_HEVC_REFRESH_TYPE		(V4L2_CID_MPEG_BASE + 623)
+#define V4L2_CID_MPEG_VIDEO_HEVC_LF_BETA_OFFSET_DIV2	(V4L2_CID_CODEC_BASE + 621)
+#define V4L2_CID_MPEG_VIDEO_HEVC_LF_TC_OFFSET_DIV2	(V4L2_CID_CODEC_BASE + 622)
+#define V4L2_CID_MPEG_VIDEO_HEVC_REFRESH_TYPE		(V4L2_CID_CODEC_BASE + 623)
 enum v4l2_cid_mpeg_video_hevc_refresh_type {
 	V4L2_MPEG_VIDEO_HEVC_REFRESH_NONE		= 0,
 	V4L2_MPEG_VIDEO_HEVC_REFRESH_CRA		= 1,
 	V4L2_MPEG_VIDEO_HEVC_REFRESH_IDR		= 2,
 };
-#define V4L2_CID_MPEG_VIDEO_HEVC_REFRESH_PERIOD		(V4L2_CID_MPEG_BASE + 624)
-#define V4L2_CID_MPEG_VIDEO_HEVC_LOSSLESS_CU		(V4L2_CID_MPEG_BASE + 625)
-#define V4L2_CID_MPEG_VIDEO_HEVC_CONST_INTRA_PRED	(V4L2_CID_MPEG_BASE + 626)
-#define V4L2_CID_MPEG_VIDEO_HEVC_WAVEFRONT		(V4L2_CID_MPEG_BASE + 627)
-#define V4L2_CID_MPEG_VIDEO_HEVC_GENERAL_PB		(V4L2_CID_MPEG_BASE + 628)
-#define V4L2_CID_MPEG_VIDEO_HEVC_TEMPORAL_ID		(V4L2_CID_MPEG_BASE + 629)
-#define V4L2_CID_MPEG_VIDEO_HEVC_STRONG_SMOOTHING	(V4L2_CID_MPEG_BASE + 630)
-#define V4L2_CID_MPEG_VIDEO_HEVC_MAX_NUM_MERGE_MV_MINUS1	(V4L2_CID_MPEG_BASE + 631)
-#define V4L2_CID_MPEG_VIDEO_HEVC_INTRA_PU_SPLIT		(V4L2_CID_MPEG_BASE + 632)
-#define V4L2_CID_MPEG_VIDEO_HEVC_TMV_PREDICTION		(V4L2_CID_MPEG_BASE + 633)
-#define V4L2_CID_MPEG_VIDEO_HEVC_WITHOUT_STARTCODE	(V4L2_CID_MPEG_BASE + 634)
-#define V4L2_CID_MPEG_VIDEO_HEVC_SIZE_OF_LENGTH_FIELD	(V4L2_CID_MPEG_BASE + 635)
+#define V4L2_CID_MPEG_VIDEO_HEVC_REFRESH_PERIOD		(V4L2_CID_CODEC_BASE + 624)
+#define V4L2_CID_MPEG_VIDEO_HEVC_LOSSLESS_CU		(V4L2_CID_CODEC_BASE + 625)
+#define V4L2_CID_MPEG_VIDEO_HEVC_CONST_INTRA_PRED	(V4L2_CID_CODEC_BASE + 626)
+#define V4L2_CID_MPEG_VIDEO_HEVC_WAVEFRONT		(V4L2_CID_CODEC_BASE + 627)
+#define V4L2_CID_MPEG_VIDEO_HEVC_GENERAL_PB		(V4L2_CID_CODEC_BASE + 628)
+#define V4L2_CID_MPEG_VIDEO_HEVC_TEMPORAL_ID		(V4L2_CID_CODEC_BASE + 629)
+#define V4L2_CID_MPEG_VIDEO_HEVC_STRONG_SMOOTHING	(V4L2_CID_CODEC_BASE + 630)
+#define V4L2_CID_MPEG_VIDEO_HEVC_MAX_NUM_MERGE_MV_MINUS1	(V4L2_CID_CODEC_BASE + 631)
+#define V4L2_CID_MPEG_VIDEO_HEVC_INTRA_PU_SPLIT		(V4L2_CID_CODEC_BASE + 632)
+#define V4L2_CID_MPEG_VIDEO_HEVC_TMV_PREDICTION		(V4L2_CID_CODEC_BASE + 633)
+#define V4L2_CID_MPEG_VIDEO_HEVC_WITHOUT_STARTCODE	(V4L2_CID_CODEC_BASE + 634)
+#define V4L2_CID_MPEG_VIDEO_HEVC_SIZE_OF_LENGTH_FIELD	(V4L2_CID_CODEC_BASE + 635)
 enum v4l2_cid_mpeg_video_hevc_size_of_length_field {
 	V4L2_MPEG_VIDEO_HEVC_SIZE_0		= 0,
 	V4L2_MPEG_VIDEO_HEVC_SIZE_1		= 1,
 	V4L2_MPEG_VIDEO_HEVC_SIZE_2		= 2,
 	V4L2_MPEG_VIDEO_HEVC_SIZE_4		= 3,
 };
-#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L0_BR	(V4L2_CID_MPEG_BASE + 636)
-#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L1_BR	(V4L2_CID_MPEG_BASE + 637)
-#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L2_BR	(V4L2_CID_MPEG_BASE + 638)
-#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L3_BR	(V4L2_CID_MPEG_BASE + 639)
-#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L4_BR	(V4L2_CID_MPEG_BASE + 640)
-#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L5_BR	(V4L2_CID_MPEG_BASE + 641)
-#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L6_BR	(V4L2_CID_MPEG_BASE + 642)
-#define V4L2_CID_MPEG_VIDEO_REF_NUMBER_FOR_PFRAMES	(V4L2_CID_MPEG_BASE + 643)
-#define V4L2_CID_MPEG_VIDEO_PREPEND_SPSPPS_TO_IDR	(V4L2_CID_MPEG_BASE + 644)
-#define V4L2_CID_MPEG_VIDEO_CONSTANT_QUALITY		(V4L2_CID_MPEG_BASE + 645)
-#define V4L2_CID_MPEG_VIDEO_FRAME_SKIP_MODE		(V4L2_CID_MPEG_BASE + 646)
+#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L0_BR	(V4L2_CID_CODEC_BASE + 636)
+#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L1_BR	(V4L2_CID_CODEC_BASE + 637)
+#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L2_BR	(V4L2_CID_CODEC_BASE + 638)
+#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L3_BR	(V4L2_CID_CODEC_BASE + 639)
+#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L4_BR	(V4L2_CID_CODEC_BASE + 640)
+#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L5_BR	(V4L2_CID_CODEC_BASE + 641)
+#define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L6_BR	(V4L2_CID_CODEC_BASE + 642)
+#define V4L2_CID_MPEG_VIDEO_REF_NUMBER_FOR_PFRAMES	(V4L2_CID_CODEC_BASE + 643)
+#define V4L2_CID_MPEG_VIDEO_PREPEND_SPSPPS_TO_IDR	(V4L2_CID_CODEC_BASE + 644)
+#define V4L2_CID_MPEG_VIDEO_CONSTANT_QUALITY		(V4L2_CID_CODEC_BASE + 645)
+#define V4L2_CID_MPEG_VIDEO_FRAME_SKIP_MODE		(V4L2_CID_CODEC_BASE + 646)
 enum v4l2_mpeg_video_frame_skip_mode {
 	V4L2_MPEG_VIDEO_FRAME_SKIP_MODE_DISABLED	= 0,
 	V4L2_MPEG_VIDEO_FRAME_SKIP_MODE_LEVEL_LIMIT	= 1,
@@ -775,14 +775,14 @@ enum v4l2_mpeg_video_frame_skip_mode {
 };
 
 /*  MPEG-class control IDs specific to the CX2341x driver as defined by V4L2 */
-#define V4L2_CID_MPEG_CX2341X_BASE				(V4L2_CTRL_CLASS_MPEG | 0x1000)
-#define V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE		(V4L2_CID_MPEG_CX2341X_BASE+0)
+#define V4L2_CID_CODEC_CX2341X_BASE				(V4L2_CTRL_CLASS_CODEC | 0x1000)
+#define V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE		(V4L2_CID_CODEC_CX2341X_BASE+0)
 enum v4l2_mpeg_cx2341x_video_spatial_filter_mode {
 	V4L2_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE_MANUAL = 0,
 	V4L2_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE_AUTO   = 1,
 };
-#define V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER		(V4L2_CID_MPEG_CX2341X_BASE+1)
-#define V4L2_CID_MPEG_CX2341X_VIDEO_LUMA_SPATIAL_FILTER_TYPE	(V4L2_CID_MPEG_CX2341X_BASE+2)
+#define V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER		(V4L2_CID_CODEC_CX2341X_BASE+1)
+#define V4L2_CID_MPEG_CX2341X_VIDEO_LUMA_SPATIAL_FILTER_TYPE	(V4L2_CID_CODEC_CX2341X_BASE+2)
 enum v4l2_mpeg_cx2341x_video_luma_spatial_filter_type {
 	V4L2_MPEG_CX2341X_VIDEO_LUMA_SPATIAL_FILTER_TYPE_OFF                  = 0,
 	V4L2_MPEG_CX2341X_VIDEO_LUMA_SPATIAL_FILTER_TYPE_1D_HOR               = 1,
@@ -790,18 +790,18 @@ enum v4l2_mpeg_cx2341x_video_luma_spatial_filter_type {
 	V4L2_MPEG_CX2341X_VIDEO_LUMA_SPATIAL_FILTER_TYPE_2D_HV_SEPARABLE      = 3,
 	V4L2_MPEG_CX2341X_VIDEO_LUMA_SPATIAL_FILTER_TYPE_2D_SYM_NON_SEPARABLE = 4,
 };
-#define V4L2_CID_MPEG_CX2341X_VIDEO_CHROMA_SPATIAL_FILTER_TYPE	(V4L2_CID_MPEG_CX2341X_BASE+3)
+#define V4L2_CID_MPEG_CX2341X_VIDEO_CHROMA_SPATIAL_FILTER_TYPE	(V4L2_CID_CODEC_CX2341X_BASE+3)
 enum v4l2_mpeg_cx2341x_video_chroma_spatial_filter_type {
 	V4L2_MPEG_CX2341X_VIDEO_CHROMA_SPATIAL_FILTER_TYPE_OFF    = 0,
 	V4L2_MPEG_CX2341X_VIDEO_CHROMA_SPATIAL_FILTER_TYPE_1D_HOR = 1,
 };
-#define V4L2_CID_MPEG_CX2341X_VIDEO_TEMPORAL_FILTER_MODE	(V4L2_CID_MPEG_CX2341X_BASE+4)
+#define V4L2_CID_MPEG_CX2341X_VIDEO_TEMPORAL_FILTER_MODE	(V4L2_CID_CODEC_CX2341X_BASE+4)
 enum v4l2_mpeg_cx2341x_video_temporal_filter_mode {
 	V4L2_MPEG_CX2341X_VIDEO_TEMPORAL_FILTER_MODE_MANUAL = 0,
 	V4L2_MPEG_CX2341X_VIDEO_TEMPORAL_FILTER_MODE_AUTO   = 1,
 };
-#define V4L2_CID_MPEG_CX2341X_VIDEO_TEMPORAL_FILTER		(V4L2_CID_MPEG_CX2341X_BASE+5)
-#define V4L2_CID_MPEG_CX2341X_VIDEO_MEDIAN_FILTER_TYPE		(V4L2_CID_MPEG_CX2341X_BASE+6)
+#define V4L2_CID_MPEG_CX2341X_VIDEO_TEMPORAL_FILTER		(V4L2_CID_CODEC_CX2341X_BASE+5)
+#define V4L2_CID_MPEG_CX2341X_VIDEO_MEDIAN_FILTER_TYPE		(V4L2_CID_CODEC_CX2341X_BASE+6)
 enum v4l2_mpeg_cx2341x_video_median_filter_type {
 	V4L2_MPEG_CX2341X_VIDEO_MEDIAN_FILTER_TYPE_OFF      = 0,
 	V4L2_MPEG_CX2341X_VIDEO_MEDIAN_FILTER_TYPE_HOR      = 1,
@@ -809,38 +809,38 @@ enum v4l2_mpeg_cx2341x_video_median_filter_type {
 	V4L2_MPEG_CX2341X_VIDEO_MEDIAN_FILTER_TYPE_HOR_VERT = 3,
 	V4L2_MPEG_CX2341X_VIDEO_MEDIAN_FILTER_TYPE_DIAG     = 4,
 };
-#define V4L2_CID_MPEG_CX2341X_VIDEO_LUMA_MEDIAN_FILTER_BOTTOM	(V4L2_CID_MPEG_CX2341X_BASE+7)
-#define V4L2_CID_MPEG_CX2341X_VIDEO_LUMA_MEDIAN_FILTER_TOP	(V4L2_CID_MPEG_CX2341X_BASE+8)
-#define V4L2_CID_MPEG_CX2341X_VIDEO_CHROMA_MEDIAN_FILTER_BOTTOM	(V4L2_CID_MPEG_CX2341X_BASE+9)
-#define V4L2_CID_MPEG_CX2341X_VIDEO_CHROMA_MEDIAN_FILTER_TOP	(V4L2_CID_MPEG_CX2341X_BASE+10)
-#define V4L2_CID_MPEG_CX2341X_STREAM_INSERT_NAV_PACKETS		(V4L2_CID_MPEG_CX2341X_BASE+11)
+#define V4L2_CID_MPEG_CX2341X_VIDEO_LUMA_MEDIAN_FILTER_BOTTOM	(V4L2_CID_CODEC_CX2341X_BASE+7)
+#define V4L2_CID_MPEG_CX2341X_VIDEO_LUMA_MEDIAN_FILTER_TOP	(V4L2_CID_CODEC_CX2341X_BASE+8)
+#define V4L2_CID_MPEG_CX2341X_VIDEO_CHROMA_MEDIAN_FILTER_BOTTOM	(V4L2_CID_CODEC_CX2341X_BASE+9)
+#define V4L2_CID_MPEG_CX2341X_VIDEO_CHROMA_MEDIAN_FILTER_TOP	(V4L2_CID_CODEC_CX2341X_BASE+10)
+#define V4L2_CID_MPEG_CX2341X_STREAM_INSERT_NAV_PACKETS		(V4L2_CID_CODEC_CX2341X_BASE+11)
 
 /*  MPEG-class control IDs specific to the Samsung MFC 5.1 driver as defined by V4L2 */
-#define V4L2_CID_MPEG_MFC51_BASE				(V4L2_CTRL_CLASS_MPEG | 0x1100)
+#define V4L2_CID_CODEC_MFC51_BASE				(V4L2_CTRL_CLASS_CODEC | 0x1100)
 
-#define V4L2_CID_MPEG_MFC51_VIDEO_DECODER_H264_DISPLAY_DELAY		(V4L2_CID_MPEG_MFC51_BASE+0)
-#define V4L2_CID_MPEG_MFC51_VIDEO_DECODER_H264_DISPLAY_DELAY_ENABLE	(V4L2_CID_MPEG_MFC51_BASE+1)
-#define V4L2_CID_MPEG_MFC51_VIDEO_FRAME_SKIP_MODE			(V4L2_CID_MPEG_MFC51_BASE+2)
+#define V4L2_CID_MPEG_MFC51_VIDEO_DECODER_H264_DISPLAY_DELAY		(V4L2_CID_CODEC_MFC51_BASE+0)
+#define V4L2_CID_MPEG_MFC51_VIDEO_DECODER_H264_DISPLAY_DELAY_ENABLE	(V4L2_CID_CODEC_MFC51_BASE+1)
+#define V4L2_CID_MPEG_MFC51_VIDEO_FRAME_SKIP_MODE			(V4L2_CID_CODEC_MFC51_BASE+2)
 enum v4l2_mpeg_mfc51_video_frame_skip_mode {
 	V4L2_MPEG_MFC51_VIDEO_FRAME_SKIP_MODE_DISABLED		= 0,
 	V4L2_MPEG_MFC51_VIDEO_FRAME_SKIP_MODE_LEVEL_LIMIT	= 1,
 	V4L2_MPEG_MFC51_VIDEO_FRAME_SKIP_MODE_BUF_LIMIT		= 2,
 };
-#define V4L2_CID_MPEG_MFC51_VIDEO_FORCE_FRAME_TYPE			(V4L2_CID_MPEG_MFC51_BASE+3)
+#define V4L2_CID_MPEG_MFC51_VIDEO_FORCE_FRAME_TYPE			(V4L2_CID_CODEC_MFC51_BASE+3)
 enum v4l2_mpeg_mfc51_video_force_frame_type {
 	V4L2_MPEG_MFC51_VIDEO_FORCE_FRAME_TYPE_DISABLED		= 0,
 	V4L2_MPEG_MFC51_VIDEO_FORCE_FRAME_TYPE_I_FRAME		= 1,
 	V4L2_MPEG_MFC51_VIDEO_FORCE_FRAME_TYPE_NOT_CODED	= 2,
 };
-#define V4L2_CID_MPEG_MFC51_VIDEO_PADDING				(V4L2_CID_MPEG_MFC51_BASE+4)
-#define V4L2_CID_MPEG_MFC51_VIDEO_PADDING_YUV				(V4L2_CID_MPEG_MFC51_BASE+5)
-#define V4L2_CID_MPEG_MFC51_VIDEO_RC_FIXED_TARGET_BIT			(V4L2_CID_MPEG_MFC51_BASE+6)
-#define V4L2_CID_MPEG_MFC51_VIDEO_RC_REACTION_COEFF			(V4L2_CID_MPEG_MFC51_BASE+7)
-#define V4L2_CID_MPEG_MFC51_VIDEO_H264_ADAPTIVE_RC_ACTIVITY		(V4L2_CID_MPEG_MFC51_BASE+50)
-#define V4L2_CID_MPEG_MFC51_VIDEO_H264_ADAPTIVE_RC_DARK			(V4L2_CID_MPEG_MFC51_BASE+51)
-#define V4L2_CID_MPEG_MFC51_VIDEO_H264_ADAPTIVE_RC_SMOOTH		(V4L2_CID_MPEG_MFC51_BASE+52)
-#define V4L2_CID_MPEG_MFC51_VIDEO_H264_ADAPTIVE_RC_STATIC		(V4L2_CID_MPEG_MFC51_BASE+53)
-#define V4L2_CID_MPEG_MFC51_VIDEO_H264_NUM_REF_PIC_FOR_P		(V4L2_CID_MPEG_MFC51_BASE+54)
+#define V4L2_CID_MPEG_MFC51_VIDEO_PADDING				(V4L2_CID_CODEC_MFC51_BASE+4)
+#define V4L2_CID_MPEG_MFC51_VIDEO_PADDING_YUV				(V4L2_CID_CODEC_MFC51_BASE+5)
+#define V4L2_CID_MPEG_MFC51_VIDEO_RC_FIXED_TARGET_BIT			(V4L2_CID_CODEC_MFC51_BASE+6)
+#define V4L2_CID_MPEG_MFC51_VIDEO_RC_REACTION_COEFF			(V4L2_CID_CODEC_MFC51_BASE+7)
+#define V4L2_CID_MPEG_MFC51_VIDEO_H264_ADAPTIVE_RC_ACTIVITY		(V4L2_CID_CODEC_MFC51_BASE+50)
+#define V4L2_CID_MPEG_MFC51_VIDEO_H264_ADAPTIVE_RC_DARK			(V4L2_CID_CODEC_MFC51_BASE+51)
+#define V4L2_CID_MPEG_MFC51_VIDEO_H264_ADAPTIVE_RC_SMOOTH		(V4L2_CID_CODEC_MFC51_BASE+52)
+#define V4L2_CID_MPEG_MFC51_VIDEO_H264_ADAPTIVE_RC_STATIC		(V4L2_CID_CODEC_MFC51_BASE+53)
+#define V4L2_CID_MPEG_MFC51_VIDEO_H264_NUM_REF_PIC_FOR_P		(V4L2_CID_CODEC_MFC51_BASE+54)
 
 /*  Camera class control IDs */
 
@@ -1177,4 +1177,13 @@ enum v4l2_detect_md_mode {
 #define V4L2_CID_DETECT_MD_THRESHOLD_GRID	(V4L2_CID_DETECT_CLASS_BASE + 3)
 #define V4L2_CID_DETECT_MD_REGION_GRID		(V4L2_CID_DETECT_CLASS_BASE + 4)
 
+/* MPEG-compression definitions kept for backwards compatibility */
+#ifndef __KERNEL__
+#define V4L2_CTRL_CLASS_MPEG            V4L2_CTRL_CLASS_CODEC
+#define V4L2_CID_MPEG_CLASS             V4L2_CID_CODEC_CLASS
+#define V4L2_CID_MPEG_BASE              V4L2_CID_CODEC_BASE
+#define V4L2_CID_MPEG_CX2341X_BASE      V4L2_CID_CODEC_CX2341X_BASE
+#define V4L2_CID_MPEG_MFC51_BASE        V4L2_CID_CODEC_MFC51_BASE
+#endif
+
 #endif
-- 
cgit v1.2.3


From 008d2bd6e96b5a7e80dacfb02cd182c3839ce819 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Thu, 26 Nov 2020 10:36:12 +0100
Subject: media: controls: Add the stateless codec control class

Add a new control class to hold the stateless codecs controls
that are ready to be moved out of staging.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Tested-by: Jernej Skrabec <jernej.skrabec@siol.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 Documentation/userspace-api/media/v4l/vidioc-g-ext-ctrls.rst | 4 ++++
 drivers/media/v4l2-core/v4l2-ctrls.c                         | 5 +++++
 include/uapi/linux/v4l2-controls.h                           | 7 +++++++
 3 files changed, 16 insertions(+)

(limited to 'include/uapi')

diff --git a/Documentation/userspace-api/media/v4l/vidioc-g-ext-ctrls.rst b/Documentation/userspace-api/media/v4l/vidioc-g-ext-ctrls.rst
index 2196cdf61ca0..06a5b175a75d 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-g-ext-ctrls.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-g-ext-ctrls.rst
@@ -362,6 +362,10 @@ still cause this situation.
       - 0xa30000
       - The class containing motion or object detection controls. These controls
         are described in :ref:`detect-controls`.
+    * - ``V4L2_CTRL_CLASS_CODEC_STATELESS``
+      - 0xa40000
+      - The class containing stateless codec controls. These controls are
+	described in :ref:`codec-stateless-controls`.
 
 Return Value
 ============
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 6a17a7487459..fa96e6aa0a86 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -1181,6 +1181,10 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_DETECT_MD_GLOBAL_THRESHOLD: return "MD Global Threshold";
 	case V4L2_CID_DETECT_MD_THRESHOLD_GRID:	return "MD Threshold Grid";
 	case V4L2_CID_DETECT_MD_REGION_GRID:	return "MD Region Grid";
+
+	/* Stateless Codec controls */
+	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
+	case V4L2_CID_CODEC_STATELESS_CLASS:	return "Stateless Codec Controls";
 	default:
 		return NULL;
 	}
@@ -1368,6 +1372,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_FM_RX_CLASS:
 	case V4L2_CID_RF_TUNER_CLASS:
 	case V4L2_CID_DETECT_CLASS:
+	case V4L2_CID_CODEC_STATELESS_CLASS:
 		*type = V4L2_CTRL_TYPE_CTRL_CLASS;
 		/* You can neither read not write these */
 		*flags |= V4L2_CTRL_FLAG_READ_ONLY | V4L2_CTRL_FLAG_WRITE_ONLY;
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 75cf87b9a377..90478ecc2f81 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -65,6 +65,7 @@
 #define V4L2_CTRL_CLASS_FM_RX		0x00a10000	/* FM Receiver controls */
 #define V4L2_CTRL_CLASS_RF_TUNER	0x00a20000	/* RF tuner controls */
 #define V4L2_CTRL_CLASS_DETECT		0x00a30000	/* Detection controls */
+#define V4L2_CTRL_CLASS_CODEC_STATELESS 0x00a40000	/* Stateless codecs controls */
 
 /* User-class control IDs */
 
@@ -1177,6 +1178,12 @@ enum v4l2_detect_md_mode {
 #define V4L2_CID_DETECT_MD_THRESHOLD_GRID	(V4L2_CID_DETECT_CLASS_BASE + 3)
 #define V4L2_CID_DETECT_MD_REGION_GRID		(V4L2_CID_DETECT_CLASS_BASE + 4)
 
+
+/*  Stateless CODECs controls */
+#define V4L2_CID_CODEC_STATELESS_BASE          (V4L2_CTRL_CLASS_CODEC_STATELESS | 0x900)
+#define V4L2_CID_CODEC_STATELESS_CLASS         (V4L2_CTRL_CLASS_CODEC_STATELESS | 1)
+
+
 /* MPEG-compression definitions kept for backwards compatibility */
 #ifndef __KERNEL__
 #define V4L2_CTRL_CLASS_MPEG            V4L2_CTRL_CLASS_CODEC
-- 
cgit v1.2.3


From 95e95ebe9119dcdf04e8aa9e1d9e8de4f1150c67 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Thu, 26 Nov 2020 10:36:13 +0100
Subject: media: uapi: Move parsed H264 pixel format out of staging

Since we are ready to stabilize the H264 stateless API,
start by first moving the parsed H264 pixel format.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Tested-by: Jernej Skrabec <jernej.skrabec@siol.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/media/h264-ctrls.h     | 3 ---
 include/uapi/linux/videodev2.h | 1 +
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/media/h264-ctrls.h b/include/media/h264-ctrls.h
index 083899c3ea19..d98c5b885379 100644
--- a/include/media/h264-ctrls.h
+++ b/include/media/h264-ctrls.h
@@ -21,9 +21,6 @@
 
 #define V4L2_H264_REF_LIST_LEN (2 * V4L2_H264_NUM_DPB_ENTRIES)
 
-/* Our pixel format isn't stable at the moment */
-#define V4L2_PIX_FMT_H264_SLICE v4l2_fourcc('S', '2', '6', '4') /* H264 parsed slices */
-
 /*
  * This is put insanely high to avoid conflicting with controls that
  * would be added during the phase where those controls are not
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 761ac9da3ffd..ed6a85f1442e 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -703,6 +703,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_HEVC     v4l2_fourcc('H', 'E', 'V', 'C') /* HEVC aka H.265 */
 #define V4L2_PIX_FMT_FWHT     v4l2_fourcc('F', 'W', 'H', 'T') /* Fast Walsh Hadamard Transform (vicodec) */
 #define V4L2_PIX_FMT_FWHT_STATELESS     v4l2_fourcc('S', 'F', 'W', 'H') /* Stateless FWHT (vicodec) */
+#define V4L2_PIX_FMT_H264_SLICE v4l2_fourcc('S', '2', '6', '4') /* H264 parsed slices */
 
 /*  Vendor-specific formats   */
 #define V4L2_PIX_FMT_CPIA1    v4l2_fourcc('C', 'P', 'I', 'A') /* cpia1 YUV */
-- 
cgit v1.2.3


From a7ead39700e10df7aaadc13c72e3a0cefcbb7f4e Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Thu, 26 Nov 2020 10:36:15 +0100
Subject: media: uapi: Move the H264 stateless control types out of staging

Move the H264 stateless control types out of staging,
and re-number them to avoid any confusion.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Tested-by: Jernej Skrabec <jernej.skrabec@siol.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/media/h264-ctrls.h     | 8 --------
 include/uapi/linux/videodev2.h | 7 +++++++
 2 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/uapi')

diff --git a/include/media/h264-ctrls.h b/include/media/h264-ctrls.h
index d98c5b885379..c15fed956bf5 100644
--- a/include/media/h264-ctrls.h
+++ b/include/media/h264-ctrls.h
@@ -35,14 +35,6 @@
 #define V4L2_CID_MPEG_VIDEO_H264_START_CODE	(V4L2_CID_CODEC_BASE+1006)
 #define V4L2_CID_MPEG_VIDEO_H264_PRED_WEIGHTS	(V4L2_CID_CODEC_BASE+1007)
 
-/* enum v4l2_ctrl_type type values */
-#define V4L2_CTRL_TYPE_H264_SPS			0x0110
-#define V4L2_CTRL_TYPE_H264_PPS			0x0111
-#define V4L2_CTRL_TYPE_H264_SCALING_MATRIX	0x0112
-#define V4L2_CTRL_TYPE_H264_SLICE_PARAMS	0x0113
-#define V4L2_CTRL_TYPE_H264_DECODE_PARAMS	0x0114
-#define V4L2_CTRL_TYPE_H264_PRED_WEIGHTS	0x0115
-
 /**
  * enum v4l2_mpeg_video_h264_decode_mode - Decoding mode
  *
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index ed6a85f1442e..485654fd2a7e 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1780,6 +1780,13 @@ enum v4l2_ctrl_type {
 	V4L2_CTRL_TYPE_U16	     = 0x0101,
 	V4L2_CTRL_TYPE_U32	     = 0x0102,
 	V4L2_CTRL_TYPE_AREA          = 0x0106,
+
+	V4L2_CTRL_TYPE_H264_SPS             = 0x0200,
+	V4L2_CTRL_TYPE_H264_PPS		    = 0x0201,
+	V4L2_CTRL_TYPE_H264_SCALING_MATRIX  = 0x0202,
+	V4L2_CTRL_TYPE_H264_SLICE_PARAMS    = 0x0203,
+	V4L2_CTRL_TYPE_H264_DECODE_PARAMS   = 0x0204,
+	V4L2_CTRL_TYPE_H264_PRED_WEIGHTS    = 0x0205,
 };
 
 /*  Used in the VIDIOC_QUERYCTRL ioctl for querying controls */
-- 
cgit v1.2.3


From 46a309d27517873b7cd5cd6b81da3a84b48162bc Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Thu, 26 Nov 2020 10:36:17 +0100
Subject: media: uapi: move H264 stateless controls out of staging

The H.264 stateless 'uAPI' was staging and marked explicitly in the
V4L2 specification that it will change and is unstable.

Note that these control IDs were never exported as a public API,
they were only defined in kernel-local headers (h264-ctrls.h).

Now, the H264 stateless controls is ready to be part
of the stable uAPI.

While not too late, let's rename them and re-number their
control IDs, moving them to the newly created stateless
control class, and updating all the drivers accordingly.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Tested-by: Jernej Skrabec <jernej.skrabec@siol.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/v4l2-core/v4l2-ctrls.c            |  36 +--
 drivers/staging/media/hantro/hantro_drv.c       |  26 +-
 drivers/staging/media/hantro/hantro_h264.c      |   8 +-
 drivers/staging/media/rkvdec/rkvdec-h264.c      |   8 +-
 drivers/staging/media/rkvdec/rkvdec.c           |  26 +-
 drivers/staging/media/sunxi/cedrus/cedrus.c     |  24 +-
 drivers/staging/media/sunxi/cedrus/cedrus_dec.c |  12 +-
 include/media/h264-ctrls.h                      | 404 ------------------------
 include/media/v4l2-ctrls.h                      |   1 -
 include/uapi/linux/v4l2-controls.h              | 381 ++++++++++++++++++++++
 include/uapi/linux/videodev2.h                  |   6 +
 11 files changed, 457 insertions(+), 475 deletions(-)
 delete mode 100644 include/media/h264-ctrls.h

(limited to 'include/uapi')

diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 2a5e3d3cc756..88231ba7b0fa 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -693,9 +693,9 @@ const char * const *v4l2_ctrl_get_menu(u32 id)
 		return h264_fp_arrangement_type;
 	case V4L2_CID_MPEG_VIDEO_H264_FMO_MAP_TYPE:
 		return h264_fmo_map_type;
-	case V4L2_CID_MPEG_VIDEO_H264_DECODE_MODE:
+	case V4L2_CID_STATELESS_H264_DECODE_MODE:
 		return h264_decode_mode;
-	case V4L2_CID_MPEG_VIDEO_H264_START_CODE:
+	case V4L2_CID_STATELESS_H264_START_CODE:
 		return h264_start_code;
 	case V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL:
 		return mpeg_mpeg2_level;
@@ -920,14 +920,6 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_MPEG_VIDEO_H264_I_FRAME_MAX_QP:		return "H264 I-Frame Maximum QP Value";
 	case V4L2_CID_MPEG_VIDEO_H264_P_FRAME_MIN_QP:		return "H264 P-Frame Minimum QP Value";
 	case V4L2_CID_MPEG_VIDEO_H264_P_FRAME_MAX_QP:		return "H264 P-Frame Maximum QP Value";
-	case V4L2_CID_MPEG_VIDEO_H264_SPS:			return "H264 Sequence Parameter Set";
-	case V4L2_CID_MPEG_VIDEO_H264_PPS:			return "H264 Picture Parameter Set";
-	case V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX:		return "H264 Scaling Matrix";
-	case V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS:		return "H264 Slice Parameters";
-	case V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS:		return "H264 Decode Parameters";
-	case V4L2_CID_MPEG_VIDEO_H264_DECODE_MODE:		return "H264 Decode Mode";
-	case V4L2_CID_MPEG_VIDEO_H264_START_CODE:		return "H264 Start Code";
-	case V4L2_CID_MPEG_VIDEO_H264_PRED_WEIGHTS:		return "H264 Prediction Weight Table";
 	case V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL:			return "MPEG2 Level";
 	case V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE:			return "MPEG2 Profile";
 	case V4L2_CID_MPEG_VIDEO_MPEG4_I_FRAME_QP:		return "MPEG4 I-Frame QP Value";
@@ -1185,6 +1177,14 @@ const char *v4l2_ctrl_get_name(u32 id)
 	/* Stateless Codec controls */
 	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
 	case V4L2_CID_CODEC_STATELESS_CLASS:	return "Stateless Codec Controls";
+	case V4L2_CID_STATELESS_H264_DECODE_MODE:		return "H264 Decode Mode";
+	case V4L2_CID_STATELESS_H264_START_CODE:		return "H264 Start Code";
+	case V4L2_CID_STATELESS_H264_SPS:			return "H264 Sequence Parameter Set";
+	case V4L2_CID_STATELESS_H264_PPS:			return "H264 Picture Parameter Set";
+	case V4L2_CID_STATELESS_H264_SCALING_MATRIX:		return "H264 Scaling Matrix";
+	case V4L2_CID_STATELESS_H264_PRED_WEIGHTS:		return "H264 Prediction Weight Table";
+	case V4L2_CID_STATELESS_H264_SLICE_PARAMS:		return "H264 Slice Parameters";
+	case V4L2_CID_STATELESS_H264_DECODE_PARAMS:		return "H264 Decode Parameters";
 	default:
 		return NULL;
 	}
@@ -1310,8 +1310,6 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_IDC:
 	case V4L2_CID_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE:
 	case V4L2_CID_MPEG_VIDEO_H264_FMO_MAP_TYPE:
-	case V4L2_CID_MPEG_VIDEO_H264_DECODE_MODE:
-	case V4L2_CID_MPEG_VIDEO_H264_START_CODE:
 	case V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL:
 	case V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE:
 	case V4L2_CID_MPEG_VIDEO_MPEG4_LEVEL:
@@ -1342,6 +1340,8 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_MPEG_VIDEO_HEVC_LOOP_FILTER_MODE:
 	case V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE:
 	case V4L2_CID_MPEG_VIDEO_HEVC_START_CODE:
+	case V4L2_CID_STATELESS_H264_DECODE_MODE:
+	case V4L2_CID_STATELESS_H264_START_CODE:
 	case V4L2_CID_CAMERA_ORIENTATION:
 		*type = V4L2_CTRL_TYPE_MENU;
 		break;
@@ -1436,22 +1436,22 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_MPEG_VIDEO_FWHT_PARAMS:
 		*type = V4L2_CTRL_TYPE_FWHT_PARAMS;
 		break;
-	case V4L2_CID_MPEG_VIDEO_H264_SPS:
+	case V4L2_CID_STATELESS_H264_SPS:
 		*type = V4L2_CTRL_TYPE_H264_SPS;
 		break;
-	case V4L2_CID_MPEG_VIDEO_H264_PPS:
+	case V4L2_CID_STATELESS_H264_PPS:
 		*type = V4L2_CTRL_TYPE_H264_PPS;
 		break;
-	case V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX:
+	case V4L2_CID_STATELESS_H264_SCALING_MATRIX:
 		*type = V4L2_CTRL_TYPE_H264_SCALING_MATRIX;
 		break;
-	case V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS:
+	case V4L2_CID_STATELESS_H264_SLICE_PARAMS:
 		*type = V4L2_CTRL_TYPE_H264_SLICE_PARAMS;
 		break;
-	case V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS:
+	case V4L2_CID_STATELESS_H264_DECODE_PARAMS:
 		*type = V4L2_CTRL_TYPE_H264_DECODE_PARAMS;
 		break;
-	case V4L2_CID_MPEG_VIDEO_H264_PRED_WEIGHTS:
+	case V4L2_CID_STATELESS_H264_PRED_WEIGHTS:
 		*type = V4L2_CTRL_TYPE_H264_PRED_WEIGHTS;
 		break;
 	case V4L2_CID_MPEG_VIDEO_VP8_FRAME_HEADER:
diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c
index 3cd00cc0a364..e5f200e64993 100644
--- a/drivers/staging/media/hantro/hantro_drv.c
+++ b/drivers/staging/media/hantro/hantro_drv.c
@@ -231,7 +231,7 @@ queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq)
 
 static int hantro_try_ctrl(struct v4l2_ctrl *ctrl)
 {
-	if (ctrl->id == V4L2_CID_MPEG_VIDEO_H264_SPS) {
+	if (ctrl->id == V4L2_CID_STATELESS_H264_SPS) {
 		const struct v4l2_ctrl_h264_sps *sps = ctrl->p_new.p_h264_sps;
 
 		if (sps->chroma_format_idc > 1)
@@ -304,39 +304,39 @@ static const struct hantro_ctrl controls[] = {
 	}, {
 		.codec = HANTRO_H264_DECODER,
 		.cfg = {
-			.id = V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS,
+			.id = V4L2_CID_STATELESS_H264_DECODE_PARAMS,
 		},
 	}, {
 		.codec = HANTRO_H264_DECODER,
 		.cfg = {
-			.id = V4L2_CID_MPEG_VIDEO_H264_SPS,
+			.id = V4L2_CID_STATELESS_H264_SPS,
 			.ops = &hantro_ctrl_ops,
 		},
 	}, {
 		.codec = HANTRO_H264_DECODER,
 		.cfg = {
-			.id = V4L2_CID_MPEG_VIDEO_H264_PPS,
+			.id = V4L2_CID_STATELESS_H264_PPS,
 		},
 	}, {
 		.codec = HANTRO_H264_DECODER,
 		.cfg = {
-			.id = V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX,
+			.id = V4L2_CID_STATELESS_H264_SCALING_MATRIX,
 		},
 	}, {
 		.codec = HANTRO_H264_DECODER,
 		.cfg = {
-			.id = V4L2_CID_MPEG_VIDEO_H264_DECODE_MODE,
-			.min = V4L2_MPEG_VIDEO_H264_DECODE_MODE_FRAME_BASED,
-			.def = V4L2_MPEG_VIDEO_H264_DECODE_MODE_FRAME_BASED,
-			.max = V4L2_MPEG_VIDEO_H264_DECODE_MODE_FRAME_BASED,
+			.id = V4L2_CID_STATELESS_H264_DECODE_MODE,
+			.min = V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED,
+			.def = V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED,
+			.max = V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED,
 		},
 	}, {
 		.codec = HANTRO_H264_DECODER,
 		.cfg = {
-			.id = V4L2_CID_MPEG_VIDEO_H264_START_CODE,
-			.min = V4L2_MPEG_VIDEO_H264_START_CODE_ANNEX_B,
-			.def = V4L2_MPEG_VIDEO_H264_START_CODE_ANNEX_B,
-			.max = V4L2_MPEG_VIDEO_H264_START_CODE_ANNEX_B,
+			.id = V4L2_CID_STATELESS_H264_START_CODE,
+			.min = V4L2_STATELESS_H264_START_CODE_ANNEX_B,
+			.def = V4L2_STATELESS_H264_START_CODE_ANNEX_B,
+			.max = V4L2_STATELESS_H264_START_CODE_ANNEX_B,
 		},
 	}, {
 		.codec = HANTRO_H264_DECODER,
diff --git a/drivers/staging/media/hantro/hantro_h264.c b/drivers/staging/media/hantro/hantro_h264.c
index b1bdc00ac262..ed6eaf11d96f 100644
--- a/drivers/staging/media/hantro/hantro_h264.c
+++ b/drivers/staging/media/hantro/hantro_h264.c
@@ -344,22 +344,22 @@ int hantro_h264_dec_prepare_run(struct hantro_ctx *ctx)
 	hantro_start_prepare_run(ctx);
 
 	ctrls->scaling =
-		hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX);
+		hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_SCALING_MATRIX);
 	if (WARN_ON(!ctrls->scaling))
 		return -EINVAL;
 
 	ctrls->decode =
-		hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS);
+		hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_DECODE_PARAMS);
 	if (WARN_ON(!ctrls->decode))
 		return -EINVAL;
 
 	ctrls->sps =
-		hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_H264_SPS);
+		hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_SPS);
 	if (WARN_ON(!ctrls->sps))
 		return -EINVAL;
 
 	ctrls->pps =
-		hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_H264_PPS);
+		hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_PPS);
 	if (WARN_ON(!ctrls->pps))
 		return -EINVAL;
 
diff --git a/drivers/staging/media/rkvdec/rkvdec-h264.c b/drivers/staging/media/rkvdec/rkvdec-h264.c
index 7cc3b478a5f4..76e97cbe2512 100644
--- a/drivers/staging/media/rkvdec/rkvdec-h264.c
+++ b/drivers/staging/media/rkvdec/rkvdec-h264.c
@@ -1067,16 +1067,16 @@ static void rkvdec_h264_run_preamble(struct rkvdec_ctx *ctx,
 	struct v4l2_ctrl *ctrl;
 
 	ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl,
-			      V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS);
+			      V4L2_CID_STATELESS_H264_DECODE_PARAMS);
 	run->decode_params = ctrl ? ctrl->p_cur.p : NULL;
 	ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl,
-			      V4L2_CID_MPEG_VIDEO_H264_SPS);
+			      V4L2_CID_STATELESS_H264_SPS);
 	run->sps = ctrl ? ctrl->p_cur.p : NULL;
 	ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl,
-			      V4L2_CID_MPEG_VIDEO_H264_PPS);
+			      V4L2_CID_STATELESS_H264_PPS);
 	run->pps = ctrl ? ctrl->p_cur.p : NULL;
 	ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl,
-			      V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX);
+			      V4L2_CID_STATELESS_H264_SCALING_MATRIX);
 	run->scaling_matrix = ctrl ? ctrl->p_cur.p : NULL;
 
 	rkvdec_run_preamble(ctx, &run->base);
diff --git a/drivers/staging/media/rkvdec/rkvdec.c b/drivers/staging/media/rkvdec/rkvdec.c
index fe65f55d3b49..aa4f8c287618 100644
--- a/drivers/staging/media/rkvdec/rkvdec.c
+++ b/drivers/staging/media/rkvdec/rkvdec.c
@@ -29,7 +29,7 @@
 
 static int rkvdec_try_ctrl(struct v4l2_ctrl *ctrl)
 {
-	if (ctrl->id == V4L2_CID_MPEG_VIDEO_H264_SPS) {
+	if (ctrl->id == V4L2_CID_STATELESS_H264_SPS) {
 		const struct v4l2_ctrl_h264_sps *sps = ctrl->p_new.p_h264_sps;
 		/*
 		 * TODO: The hardware supports 10-bit and 4:2:2 profiles,
@@ -56,31 +56,31 @@ static const struct v4l2_ctrl_ops rkvdec_ctrl_ops = {
 static const struct rkvdec_ctrl_desc rkvdec_h264_ctrl_descs[] = {
 	{
 		.mandatory = true,
-		.cfg.id = V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS,
+		.cfg.id = V4L2_CID_STATELESS_H264_DECODE_PARAMS,
 	},
 	{
 		.mandatory = true,
-		.cfg.id = V4L2_CID_MPEG_VIDEO_H264_SPS,
+		.cfg.id = V4L2_CID_STATELESS_H264_SPS,
 		.cfg.ops = &rkvdec_ctrl_ops,
 	},
 	{
 		.mandatory = true,
-		.cfg.id = V4L2_CID_MPEG_VIDEO_H264_PPS,
+		.cfg.id = V4L2_CID_STATELESS_H264_PPS,
 	},
 	{
-		.cfg.id = V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX,
+		.cfg.id = V4L2_CID_STATELESS_H264_SCALING_MATRIX,
 	},
 	{
-		.cfg.id = V4L2_CID_MPEG_VIDEO_H264_DECODE_MODE,
-		.cfg.min = V4L2_MPEG_VIDEO_H264_DECODE_MODE_FRAME_BASED,
-		.cfg.max = V4L2_MPEG_VIDEO_H264_DECODE_MODE_FRAME_BASED,
-		.cfg.def = V4L2_MPEG_VIDEO_H264_DECODE_MODE_FRAME_BASED,
+		.cfg.id = V4L2_CID_STATELESS_H264_DECODE_MODE,
+		.cfg.min = V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED,
+		.cfg.max = V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED,
+		.cfg.def = V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED,
 	},
 	{
-		.cfg.id = V4L2_CID_MPEG_VIDEO_H264_START_CODE,
-		.cfg.min = V4L2_MPEG_VIDEO_H264_START_CODE_ANNEX_B,
-		.cfg.def = V4L2_MPEG_VIDEO_H264_START_CODE_ANNEX_B,
-		.cfg.max = V4L2_MPEG_VIDEO_H264_START_CODE_ANNEX_B,
+		.cfg.id = V4L2_CID_STATELESS_H264_START_CODE,
+		.cfg.min = V4L2_STATELESS_H264_START_CODE_ANNEX_B,
+		.cfg.def = V4L2_STATELESS_H264_START_CODE_ANNEX_B,
+		.cfg.max = V4L2_STATELESS_H264_START_CODE_ANNEX_B,
 	},
 	{
 		.cfg.id = V4L2_CID_MPEG_VIDEO_H264_PROFILE,
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.c b/drivers/staging/media/sunxi/cedrus/cedrus.c
index 68548424985d..b1fc6b672a9b 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus.c
@@ -45,60 +45,60 @@ static const struct cedrus_control cedrus_controls[] = {
 	},
 	{
 		.cfg = {
-			.id	= V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS,
+			.id	= V4L2_CID_STATELESS_H264_DECODE_PARAMS,
 		},
 		.codec		= CEDRUS_CODEC_H264,
 		.required	= true,
 	},
 	{
 		.cfg = {
-			.id	= V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS,
+			.id	= V4L2_CID_STATELESS_H264_SLICE_PARAMS,
 		},
 		.codec		= CEDRUS_CODEC_H264,
 		.required	= true,
 	},
 	{
 		.cfg = {
-			.id	= V4L2_CID_MPEG_VIDEO_H264_SPS,
+			.id	= V4L2_CID_STATELESS_H264_SPS,
 		},
 		.codec		= CEDRUS_CODEC_H264,
 		.required	= true,
 	},
 	{
 		.cfg = {
-			.id	= V4L2_CID_MPEG_VIDEO_H264_PPS,
+			.id	= V4L2_CID_STATELESS_H264_PPS,
 		},
 		.codec		= CEDRUS_CODEC_H264,
 		.required	= true,
 	},
 	{
 		.cfg = {
-			.id	= V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX,
+			.id	= V4L2_CID_STATELESS_H264_SCALING_MATRIX,
 		},
 		.codec		= CEDRUS_CODEC_H264,
 		.required	= false,
 	},
 	{
 		.cfg = {
-			.id	= V4L2_CID_MPEG_VIDEO_H264_PRED_WEIGHTS,
+			.id	= V4L2_CID_STATELESS_H264_PRED_WEIGHTS,
 		},
 		.codec		= CEDRUS_CODEC_H264,
 		.required	= false,
 	},
 	{
 		.cfg = {
-			.id	= V4L2_CID_MPEG_VIDEO_H264_DECODE_MODE,
-			.max	= V4L2_MPEG_VIDEO_H264_DECODE_MODE_SLICE_BASED,
-			.def	= V4L2_MPEG_VIDEO_H264_DECODE_MODE_SLICE_BASED,
+			.id	= V4L2_CID_STATELESS_H264_DECODE_MODE,
+			.max	= V4L2_STATELESS_H264_DECODE_MODE_SLICE_BASED,
+			.def	= V4L2_STATELESS_H264_DECODE_MODE_SLICE_BASED,
 		},
 		.codec		= CEDRUS_CODEC_H264,
 		.required	= false,
 	},
 	{
 		.cfg = {
-			.id	= V4L2_CID_MPEG_VIDEO_H264_START_CODE,
-			.max	= V4L2_MPEG_VIDEO_H264_START_CODE_NONE,
-			.def	= V4L2_MPEG_VIDEO_H264_START_CODE_NONE,
+			.id	= V4L2_CID_STATELESS_H264_START_CODE,
+			.max	= V4L2_STATELESS_H264_START_CODE_NONE,
+			.def	= V4L2_STATELESS_H264_START_CODE_NONE,
 		},
 		.codec		= CEDRUS_CODEC_H264,
 		.required	= false,
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
index 1810fb6cc8da..a9090daf626a 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
@@ -48,17 +48,17 @@ void cedrus_device_run(void *priv)
 
 	case V4L2_PIX_FMT_H264_SLICE:
 		run.h264.decode_params = cedrus_find_control_data(ctx,
-			V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS);
+			V4L2_CID_STATELESS_H264_DECODE_PARAMS);
 		run.h264.pps = cedrus_find_control_data(ctx,
-			V4L2_CID_MPEG_VIDEO_H264_PPS);
+			V4L2_CID_STATELESS_H264_PPS);
 		run.h264.scaling_matrix = cedrus_find_control_data(ctx,
-			V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX);
+			V4L2_CID_STATELESS_H264_SCALING_MATRIX);
 		run.h264.slice_params = cedrus_find_control_data(ctx,
-			V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS);
+			V4L2_CID_STATELESS_H264_SLICE_PARAMS);
 		run.h264.sps = cedrus_find_control_data(ctx,
-			V4L2_CID_MPEG_VIDEO_H264_SPS);
+			V4L2_CID_STATELESS_H264_SPS);
 		run.h264.pred_weights = cedrus_find_control_data(ctx,
-			V4L2_CID_MPEG_VIDEO_H264_PRED_WEIGHTS);
+			V4L2_CID_STATELESS_H264_PRED_WEIGHTS);
 		break;
 
 	case V4L2_PIX_FMT_HEVC_SLICE:
diff --git a/include/media/h264-ctrls.h b/include/media/h264-ctrls.h
deleted file mode 100644
index c15fed956bf5..000000000000
--- a/include/media/h264-ctrls.h
+++ /dev/null
@@ -1,404 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * These are the H.264 state controls for use with stateless H.264
- * codec drivers.
- *
- * It turns out that these structs are not stable yet and will undergo
- * more changes. So keep them private until they are stable and ready to
- * become part of the official public API.
- */
-
-#ifndef _H264_CTRLS_H_
-#define _H264_CTRLS_H_
-
-#include <linux/videodev2.h>
-
-/*
- * Maximum DPB size, as specified by section 'A.3.1 Level limits
- * common to the Baseline, Main, and Extended profiles'.
- */
-#define V4L2_H264_NUM_DPB_ENTRIES 16
-
-#define V4L2_H264_REF_LIST_LEN (2 * V4L2_H264_NUM_DPB_ENTRIES)
-
-/*
- * This is put insanely high to avoid conflicting with controls that
- * would be added during the phase where those controls are not
- * stable. It should be fixed eventually.
- */
-#define V4L2_CID_MPEG_VIDEO_H264_SPS		(V4L2_CID_CODEC_BASE+1000)
-#define V4L2_CID_MPEG_VIDEO_H264_PPS		(V4L2_CID_CODEC_BASE+1001)
-#define V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX	(V4L2_CID_CODEC_BASE+1002)
-#define V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS	(V4L2_CID_CODEC_BASE+1003)
-#define V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS	(V4L2_CID_CODEC_BASE+1004)
-#define V4L2_CID_MPEG_VIDEO_H264_DECODE_MODE	(V4L2_CID_CODEC_BASE+1005)
-#define V4L2_CID_MPEG_VIDEO_H264_START_CODE	(V4L2_CID_CODEC_BASE+1006)
-#define V4L2_CID_MPEG_VIDEO_H264_PRED_WEIGHTS	(V4L2_CID_CODEC_BASE+1007)
-
-/**
- * enum v4l2_mpeg_video_h264_decode_mode - Decoding mode
- *
- * @V4L2_MPEG_VIDEO_H264_DECODE_MODE_SLICE_BASED: indicates that decoding
- * is performed one slice at a time. In this mode,
- * V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS must contain the parsed slice
- * parameters and the OUTPUT buffer must contain a single slice.
- * V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF feature is used
- * in order to support multislice frames.
- * @V4L2_MPEG_VIDEO_H264_DECODE_MODE_FRAME_BASED: indicates that
- * decoding is performed per frame. The OUTPUT buffer must contain
- * all slices and also both fields. This mode is typically supported
- * by device drivers that are able to parse the slice(s) header(s)
- * in hardware. When this mode is selected,
- * V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS is not used.
- */
-enum v4l2_mpeg_video_h264_decode_mode {
-	V4L2_MPEG_VIDEO_H264_DECODE_MODE_SLICE_BASED,
-	V4L2_MPEG_VIDEO_H264_DECODE_MODE_FRAME_BASED,
-};
-
-/**
- * enum v4l2_mpeg_video_h264_start_code - Start code
- *
- * @V4L2_MPEG_VIDEO_H264_START_CODE_NONE: slices are passed
- * to the driver without any start code.
- * @V4L2_MPEG_VIDEO_H264_START_CODE_ANNEX_B: slices are passed
- * to the driver with an Annex B start code prefix
- * (legal start codes can be 3-bytes 0x000001 or 4-bytes 0x00000001).
- * This mode is typically supported by device drivers that parse
- * the start code in hardware.
- */
-enum v4l2_mpeg_video_h264_start_code {
-	V4L2_MPEG_VIDEO_H264_START_CODE_NONE,
-	V4L2_MPEG_VIDEO_H264_START_CODE_ANNEX_B,
-};
-
-#define V4L2_H264_SPS_CONSTRAINT_SET0_FLAG			0x01
-#define V4L2_H264_SPS_CONSTRAINT_SET1_FLAG			0x02
-#define V4L2_H264_SPS_CONSTRAINT_SET2_FLAG			0x04
-#define V4L2_H264_SPS_CONSTRAINT_SET3_FLAG			0x08
-#define V4L2_H264_SPS_CONSTRAINT_SET4_FLAG			0x10
-#define V4L2_H264_SPS_CONSTRAINT_SET5_FLAG			0x20
-
-#define V4L2_H264_SPS_FLAG_SEPARATE_COLOUR_PLANE		0x01
-#define V4L2_H264_SPS_FLAG_QPPRIME_Y_ZERO_TRANSFORM_BYPASS	0x02
-#define V4L2_H264_SPS_FLAG_DELTA_PIC_ORDER_ALWAYS_ZERO		0x04
-#define V4L2_H264_SPS_FLAG_GAPS_IN_FRAME_NUM_VALUE_ALLOWED	0x08
-#define V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY			0x10
-#define V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD		0x20
-#define V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE			0x40
-
-#define V4L2_H264_SPS_HAS_CHROMA_FORMAT(sps) \
-	((sps)->profile_idc == 100 || (sps)->profile_idc == 110 || \
-	 (sps)->profile_idc == 122 || (sps)->profile_idc == 244 || \
-	 (sps)->profile_idc == 44  || (sps)->profile_idc == 83  || \
-	 (sps)->profile_idc == 86  || (sps)->profile_idc == 118 || \
-	 (sps)->profile_idc == 128 || (sps)->profile_idc == 138 || \
-	 (sps)->profile_idc == 139 || (sps)->profile_idc == 134 || \
-	 (sps)->profile_idc == 135)
-
-/**
- * struct v4l2_ctrl_h264_sps - H264 sequence parameter set
- *
- * All the members on this sequence parameter set structure match the
- * sequence parameter set syntax as specified by the H264 specification.
- *
- * @profile_idc: see H264 specification.
- * @constraint_set_flags: see H264 specification.
- * @level_idc: see H264 specification.
- * @seq_parameter_set_id: see H264 specification.
- * @chroma_format_idc: see H264 specification.
- * @bit_depth_luma_minus8: see H264 specification.
- * @bit_depth_chroma_minus8: see H264 specification.
- * @log2_max_frame_num_minus4: see H264 specification.
- * @pic_order_cnt_type: see H264 specification.
- * @log2_max_pic_order_cnt_lsb_minus4: see H264 specification.
- * @max_num_ref_frames: see H264 specification.
- * @num_ref_frames_in_pic_order_cnt_cycle: see H264 specification.
- * @offset_for_ref_frame: see H264 specification.
- * @offset_for_non_ref_pic: see H264 specification.
- * @offset_for_top_to_bottom_field: see H264 specification.
- * @pic_width_in_mbs_minus1: see H264 specification.
- * @pic_height_in_map_units_minus1: see H264 specification.
- * @flags: see V4L2_H264_SPS_FLAG_{}.
- */
-struct v4l2_ctrl_h264_sps {
-	__u8 profile_idc;
-	__u8 constraint_set_flags;
-	__u8 level_idc;
-	__u8 seq_parameter_set_id;
-	__u8 chroma_format_idc;
-	__u8 bit_depth_luma_minus8;
-	__u8 bit_depth_chroma_minus8;
-	__u8 log2_max_frame_num_minus4;
-	__u8 pic_order_cnt_type;
-	__u8 log2_max_pic_order_cnt_lsb_minus4;
-	__u8 max_num_ref_frames;
-	__u8 num_ref_frames_in_pic_order_cnt_cycle;
-	__s32 offset_for_ref_frame[255];
-	__s32 offset_for_non_ref_pic;
-	__s32 offset_for_top_to_bottom_field;
-	__u16 pic_width_in_mbs_minus1;
-	__u16 pic_height_in_map_units_minus1;
-	__u32 flags;
-};
-
-#define V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE				0x0001
-#define V4L2_H264_PPS_FLAG_BOTTOM_FIELD_PIC_ORDER_IN_FRAME_PRESENT	0x0002
-#define V4L2_H264_PPS_FLAG_WEIGHTED_PRED				0x0004
-#define V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT		0x0008
-#define V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED			0x0010
-#define V4L2_H264_PPS_FLAG_REDUNDANT_PIC_CNT_PRESENT			0x0020
-#define V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE				0x0040
-#define V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT			0x0080
-
-/**
- * struct v4l2_ctrl_h264_pps - H264 picture parameter set
- *
- * Except where noted, all the members on this picture parameter set
- * structure match the sequence parameter set syntax as specified
- * by the H264 specification.
- *
- * In particular, V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT flag
- * has a specific meaning. This flag should be set if a non-flat
- * scaling matrix applies to the picture. In this case, applications
- * are expected to use V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX,
- * to pass the values of the non-flat matrices.
- *
- * @pic_parameter_set_id: see H264 specification.
- * @seq_parameter_set_id: see H264 specification.
- * @num_slice_groups_minus1: see H264 specification.
- * @num_ref_idx_l0_default_active_minus1: see H264 specification.
- * @num_ref_idx_l1_default_active_minus1: see H264 specification.
- * @weighted_bipred_idc: see H264 specification.
- * @pic_init_qp_minus26: see H264 specification.
- * @pic_init_qs_minus26: see H264 specification.
- * @chroma_qp_index_offset: see H264 specification.
- * @second_chroma_qp_index_offset: see H264 specification.
- * @flags: see V4L2_H264_PPS_FLAG_{}.
- */
-struct v4l2_ctrl_h264_pps {
-	__u8 pic_parameter_set_id;
-	__u8 seq_parameter_set_id;
-	__u8 num_slice_groups_minus1;
-	__u8 num_ref_idx_l0_default_active_minus1;
-	__u8 num_ref_idx_l1_default_active_minus1;
-	__u8 weighted_bipred_idc;
-	__s8 pic_init_qp_minus26;
-	__s8 pic_init_qs_minus26;
-	__s8 chroma_qp_index_offset;
-	__s8 second_chroma_qp_index_offset;
-	__u16 flags;
-};
-
-/**
- * struct v4l2_ctrl_h264_scaling_matrix - H264 scaling matrices
- *
- * @scaling_list_4x4: scaling matrix after applying the inverse
- * scanning process. Expected list order is Intra Y, Intra Cb,
- * Intra Cr, Inter Y, Inter Cb, Inter Cr. The values on each
- * scaling list are expected in raster scan order.
- * @scaling_list_8x8: scaling matrix after applying the inverse
- * scanning process. Expected list order is Intra Y, Inter Y,
- * Intra Cb, Inter Cb, Intra Cr, Inter Cr. The values on each
- * scaling list are expected in raster scan order.
- *
- * Note that the list order is different for the 4x4 and 8x8
- * matrices as per the H264 specification, see table 7-2 "Assignment
- * of mnemonic names to scaling list indices and specification of
- * fall-back rule".
- */
-struct v4l2_ctrl_h264_scaling_matrix {
-	__u8 scaling_list_4x4[6][16];
-	__u8 scaling_list_8x8[6][64];
-};
-
-struct v4l2_h264_weight_factors {
-	__s16 luma_weight[32];
-	__s16 luma_offset[32];
-	__s16 chroma_weight[32][2];
-	__s16 chroma_offset[32][2];
-};
-
-#define V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED(pps, slice) \
-	((((pps)->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED) && \
-	 ((slice)->slice_type == V4L2_H264_SLICE_TYPE_P || \
-	  (slice)->slice_type == V4L2_H264_SLICE_TYPE_SP)) || \
-	 ((pps)->weighted_bipred_idc == 1 && \
-	  (slice)->slice_type == V4L2_H264_SLICE_TYPE_B))
-
-/**
- * struct v4l2_ctrl_h264_pred_weights - Prediction weight table
- *
- * Prediction weight table, which matches the syntax specified
- * by the H264 specification.
- *
- * @luma_log2_weight_denom: see H264 specification.
- * @chroma_log2_weight_denom: see H264 specification.
- * @weight_factors: luma and chroma weight factors.
- */
-struct v4l2_ctrl_h264_pred_weights {
-	__u16 luma_log2_weight_denom;
-	__u16 chroma_log2_weight_denom;
-	struct v4l2_h264_weight_factors weight_factors[2];
-};
-
-#define V4L2_H264_SLICE_TYPE_P				0
-#define V4L2_H264_SLICE_TYPE_B				1
-#define V4L2_H264_SLICE_TYPE_I				2
-#define V4L2_H264_SLICE_TYPE_SP				3
-#define V4L2_H264_SLICE_TYPE_SI				4
-
-#define V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED	0x01
-#define V4L2_H264_SLICE_FLAG_SP_FOR_SWITCH		0x02
-
-#define V4L2_H264_TOP_FIELD_REF				0x1
-#define V4L2_H264_BOTTOM_FIELD_REF			0x2
-#define V4L2_H264_FRAME_REF				0x3
-
-/**
- * struct v4l2_h264_reference - H264 picture reference
- *
- * @fields: indicates how the picture is referenced.
- * Valid values are V4L2_H264_{}_REF.
- * @index: index into v4l2_ctrl_h264_decode_params.dpb[].
- */
-struct v4l2_h264_reference {
-	__u8 fields;
-	__u8 index;
-};
-
-/**
- * struct v4l2_ctrl_h264_slice_params - H264 slice parameters
- *
- * This structure holds the H264 syntax elements that are specified
- * as non-invariant for the slices in a given frame.
- *
- * Slice invariant syntax elements are contained in struct
- * v4l2_ctrl_h264_decode_params. This is done to reduce the API surface
- * on frame-based decoders, where slice header parsing is done by the
- * hardware.
- *
- * Slice invariant syntax elements are specified in specification section
- * "7.4.3 Slice header semantics".
- *
- * Except where noted, the members on this struct match the slice header syntax.
- *
- * @header_bit_size: offset in bits to slice_data() from the beginning of this slice.
- * @first_mb_in_slice: see H264 specification.
- * @slice_type: see H264 specification.
- * @colour_plane_id: see H264 specification.
- * @redundant_pic_cnt: see H264 specification.
- * @cabac_init_idc: see H264 specification.
- * @slice_qp_delta: see H264 specification.
- * @slice_qs_delta: see H264 specification.
- * @disable_deblocking_filter_idc: see H264 specification.
- * @slice_alpha_c0_offset_div2: see H264 specification.
- * @slice_beta_offset_div2: see H264 specification.
- * @num_ref_idx_l0_active_minus1: see H264 specification.
- * @num_ref_idx_l1_active_minus1: see H264 specification.
- * @reserved: padding field. Should be zeroed by applications.
- * @ref_pic_list0: reference picture list 0 after applying the per-slice modifications.
- * @ref_pic_list1: reference picture list 1 after applying the per-slice modifications.
- * @flags: see V4L2_H264_SLICE_FLAG_{}.
- */
-struct v4l2_ctrl_h264_slice_params {
-	__u32 header_bit_size;
-	__u32 first_mb_in_slice;
-	__u8 slice_type;
-	__u8 colour_plane_id;
-	__u8 redundant_pic_cnt;
-	__u8 cabac_init_idc;
-	__s8 slice_qp_delta;
-	__s8 slice_qs_delta;
-	__u8 disable_deblocking_filter_idc;
-	__s8 slice_alpha_c0_offset_div2;
-	__s8 slice_beta_offset_div2;
-	__u8 num_ref_idx_l0_active_minus1;
-	__u8 num_ref_idx_l1_active_minus1;
-
-	__u8 reserved;
-
-	struct v4l2_h264_reference ref_pic_list0[V4L2_H264_REF_LIST_LEN];
-	struct v4l2_h264_reference ref_pic_list1[V4L2_H264_REF_LIST_LEN];
-
-	__u32 flags;
-};
-
-#define V4L2_H264_DPB_ENTRY_FLAG_VALID		0x01
-#define V4L2_H264_DPB_ENTRY_FLAG_ACTIVE		0x02
-#define V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM	0x04
-#define V4L2_H264_DPB_ENTRY_FLAG_FIELD		0x08
-
-/**
- * struct v4l2_h264_dpb_entry - H264 decoded picture buffer entry
- *
- * @reference_ts: timestamp of the V4L2 capture buffer to use as reference.
- * The timestamp refers to the timestamp field in struct v4l2_buffer.
- * Use v4l2_timeval_to_ns() to convert the struct timeval to a __u64.
- * @pic_num: matches PicNum variable assigned during the reference
- * picture lists construction process.
- * @frame_num: frame identifier which matches frame_num syntax element.
- * @fields: indicates how the DPB entry is referenced. Valid values are
- * V4L2_H264_{}_REF.
- * @reserved: padding field. Should be zeroed by applications.
- * @top_field_order_cnt: matches TopFieldOrderCnt picture value.
- * @bottom_field_order_cnt: matches BottomFieldOrderCnt picture value.
- * Note that picture field is indicated by v4l2_buffer.field.
- * @flags: see V4L2_H264_DPB_ENTRY_FLAG_{}.
- */
-struct v4l2_h264_dpb_entry {
-	__u64 reference_ts;
-	__u32 pic_num;
-	__u16 frame_num;
-	__u8 fields;
-	__u8 reserved[5];
-	__s32 top_field_order_cnt;
-	__s32 bottom_field_order_cnt;
-	__u32 flags;
-};
-
-#define V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC		0x01
-#define V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC		0x02
-#define V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD	0x04
-
-/**
- * struct v4l2_ctrl_h264_decode_params - H264 decoding parameters
- *
- * @dpb: decoded picture buffer.
- * @nal_ref_idc: slice header syntax element.
- * @frame_num: slice header syntax element.
- * @top_field_order_cnt: matches TopFieldOrderCnt picture value.
- * @bottom_field_order_cnt: matches BottomFieldOrderCnt picture value.
- * Note that picture field is indicated by v4l2_buffer.field.
- * @idr_pic_id: slice header syntax element.
- * @pic_order_cnt_lsb: slice header syntax element.
- * @delta_pic_order_cnt_bottom: slice header syntax element.
- * @delta_pic_order_cnt0: slice header syntax element.
- * @delta_pic_order_cnt1: slice header syntax element.
- * @dec_ref_pic_marking_bit_size: size in bits of dec_ref_pic_marking()
- * syntax element.
- * @pic_order_cnt_bit_size: size in bits of pic order count syntax.
- * @slice_group_change_cycle: slice header syntax element.
- * @reserved: padding field. Should be zeroed by applications.
- * @flags: see V4L2_H264_DECODE_PARAM_FLAG_{}.
- */
-struct v4l2_ctrl_h264_decode_params {
-	struct v4l2_h264_dpb_entry dpb[V4L2_H264_NUM_DPB_ENTRIES];
-	__u16 nal_ref_idc;
-	__u16 frame_num;
-	__s32 top_field_order_cnt;
-	__s32 bottom_field_order_cnt;
-	__u16 idr_pic_id;
-	__u16 pic_order_cnt_lsb;
-	__s32 delta_pic_order_cnt_bottom;
-	__s32 delta_pic_order_cnt0;
-	__s32 delta_pic_order_cnt1;
-	__u32 dec_ref_pic_marking_bit_size;
-	__u32 pic_order_cnt_bit_size;
-	__u32 slice_group_change_cycle;
-
-	__u32 reserved;
-	__u32 flags;
-};
-
-#endif
diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h
index 4fbace0fc7e5..d25b38f78229 100644
--- a/include/media/v4l2-ctrls.h
+++ b/include/media/v4l2-ctrls.h
@@ -19,7 +19,6 @@
  */
 #include <media/mpeg2-ctrls.h>
 #include <media/fwht-ctrls.h>
-#include <media/h264-ctrls.h>
 #include <media/vp8-ctrls.h>
 #include <media/hevc-ctrls.h>
 
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 90478ecc2f81..3f24663fba4a 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -1183,6 +1183,387 @@ enum v4l2_detect_md_mode {
 #define V4L2_CID_CODEC_STATELESS_BASE          (V4L2_CTRL_CLASS_CODEC_STATELESS | 0x900)
 #define V4L2_CID_CODEC_STATELESS_CLASS         (V4L2_CTRL_CLASS_CODEC_STATELESS | 1)
 
+#define V4L2_CID_STATELESS_H264_DECODE_MODE	(V4L2_CID_CODEC_STATELESS_BASE + 0)
+/**
+ * enum v4l2_stateless_h264_decode_mode - Decoding mode
+ *
+ * @V4L2_STATELESS_H264_DECODE_MODE_SLICE_BASED: indicates that decoding
+ * is performed one slice at a time. In this mode,
+ * V4L2_CID_STATELESS_H264_SLICE_PARAMS must contain the parsed slice
+ * parameters and the OUTPUT buffer must contain a single slice.
+ * V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF feature is used
+ * in order to support multislice frames.
+ * @V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED: indicates that
+ * decoding is performed per frame. The OUTPUT buffer must contain
+ * all slices and also both fields. This mode is typically supported
+ * by device drivers that are able to parse the slice(s) header(s)
+ * in hardware. When this mode is selected,
+ * V4L2_CID_STATELESS_H264_SLICE_PARAMS is not used.
+ */
+enum v4l2_stateless_h264_decode_mode {
+	V4L2_STATELESS_H264_DECODE_MODE_SLICE_BASED,
+	V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED,
+};
+
+#define V4L2_CID_STATELESS_H264_START_CODE	(V4L2_CID_CODEC_STATELESS_BASE + 1)
+/**
+ * enum v4l2_stateless_h264_start_code - Start code
+ *
+ * @V4L2_STATELESS_H264_START_CODE_NONE: slices are passed
+ * to the driver without any start code.
+ * @V4L2_STATELESS_H264_START_CODE_ANNEX_B: slices are passed
+ * to the driver with an Annex B start code prefix
+ * (legal start codes can be 3-bytes 0x000001 or 4-bytes 0x00000001).
+ * This mode is typically supported by device drivers that parse
+ * the start code in hardware.
+ */
+enum v4l2_stateless_h264_start_code {
+	V4L2_STATELESS_H264_START_CODE_NONE,
+	V4L2_STATELESS_H264_START_CODE_ANNEX_B,
+};
+
+#define V4L2_H264_SPS_CONSTRAINT_SET0_FLAG			0x01
+#define V4L2_H264_SPS_CONSTRAINT_SET1_FLAG			0x02
+#define V4L2_H264_SPS_CONSTRAINT_SET2_FLAG			0x04
+#define V4L2_H264_SPS_CONSTRAINT_SET3_FLAG			0x08
+#define V4L2_H264_SPS_CONSTRAINT_SET4_FLAG			0x10
+#define V4L2_H264_SPS_CONSTRAINT_SET5_FLAG			0x20
+
+#define V4L2_H264_SPS_FLAG_SEPARATE_COLOUR_PLANE		0x01
+#define V4L2_H264_SPS_FLAG_QPPRIME_Y_ZERO_TRANSFORM_BYPASS	0x02
+#define V4L2_H264_SPS_FLAG_DELTA_PIC_ORDER_ALWAYS_ZERO		0x04
+#define V4L2_H264_SPS_FLAG_GAPS_IN_FRAME_NUM_VALUE_ALLOWED	0x08
+#define V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY			0x10
+#define V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD		0x20
+#define V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE			0x40
+
+#define V4L2_H264_SPS_HAS_CHROMA_FORMAT(sps) \
+	((sps)->profile_idc == 100 || (sps)->profile_idc == 110 || \
+	 (sps)->profile_idc == 122 || (sps)->profile_idc == 244 || \
+	 (sps)->profile_idc == 44  || (sps)->profile_idc == 83  || \
+	 (sps)->profile_idc == 86  || (sps)->profile_idc == 118 || \
+	 (sps)->profile_idc == 128 || (sps)->profile_idc == 138 || \
+	 (sps)->profile_idc == 139 || (sps)->profile_idc == 134 || \
+	 (sps)->profile_idc == 135)
+
+#define V4L2_CID_STATELESS_H264_SPS		(V4L2_CID_CODEC_STATELESS_BASE + 2)
+/**
+ * struct v4l2_ctrl_h264_sps - H264 sequence parameter set
+ *
+ * All the members on this sequence parameter set structure match the
+ * sequence parameter set syntax as specified by the H264 specification.
+ *
+ * @profile_idc: see H264 specification.
+ * @constraint_set_flags: see H264 specification.
+ * @level_idc: see H264 specification.
+ * @seq_parameter_set_id: see H264 specification.
+ * @chroma_format_idc: see H264 specification.
+ * @bit_depth_luma_minus8: see H264 specification.
+ * @bit_depth_chroma_minus8: see H264 specification.
+ * @log2_max_frame_num_minus4: see H264 specification.
+ * @pic_order_cnt_type: see H264 specification.
+ * @log2_max_pic_order_cnt_lsb_minus4: see H264 specification.
+ * @max_num_ref_frames: see H264 specification.
+ * @num_ref_frames_in_pic_order_cnt_cycle: see H264 specification.
+ * @offset_for_ref_frame: see H264 specification.
+ * @offset_for_non_ref_pic: see H264 specification.
+ * @offset_for_top_to_bottom_field: see H264 specification.
+ * @pic_width_in_mbs_minus1: see H264 specification.
+ * @pic_height_in_map_units_minus1: see H264 specification.
+ * @flags: see V4L2_H264_SPS_FLAG_{}.
+ */
+struct v4l2_ctrl_h264_sps {
+	__u8 profile_idc;
+	__u8 constraint_set_flags;
+	__u8 level_idc;
+	__u8 seq_parameter_set_id;
+	__u8 chroma_format_idc;
+	__u8 bit_depth_luma_minus8;
+	__u8 bit_depth_chroma_minus8;
+	__u8 log2_max_frame_num_minus4;
+	__u8 pic_order_cnt_type;
+	__u8 log2_max_pic_order_cnt_lsb_minus4;
+	__u8 max_num_ref_frames;
+	__u8 num_ref_frames_in_pic_order_cnt_cycle;
+	__s32 offset_for_ref_frame[255];
+	__s32 offset_for_non_ref_pic;
+	__s32 offset_for_top_to_bottom_field;
+	__u16 pic_width_in_mbs_minus1;
+	__u16 pic_height_in_map_units_minus1;
+	__u32 flags;
+};
+
+#define V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE				0x0001
+#define V4L2_H264_PPS_FLAG_BOTTOM_FIELD_PIC_ORDER_IN_FRAME_PRESENT	0x0002
+#define V4L2_H264_PPS_FLAG_WEIGHTED_PRED				0x0004
+#define V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT		0x0008
+#define V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED			0x0010
+#define V4L2_H264_PPS_FLAG_REDUNDANT_PIC_CNT_PRESENT			0x0020
+#define V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE				0x0040
+#define V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT			0x0080
+
+#define V4L2_CID_STATELESS_H264_PPS		(V4L2_CID_CODEC_STATELESS_BASE + 3)
+/**
+ * struct v4l2_ctrl_h264_pps - H264 picture parameter set
+ *
+ * Except where noted, all the members on this picture parameter set
+ * structure match the sequence parameter set syntax as specified
+ * by the H264 specification.
+ *
+ * In particular, V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT flag
+ * has a specific meaning. This flag should be set if a non-flat
+ * scaling matrix applies to the picture. In this case, applications
+ * are expected to use V4L2_CID_STATELESS_H264_SCALING_MATRIX,
+ * to pass the values of the non-flat matrices.
+ *
+ * @pic_parameter_set_id: see H264 specification.
+ * @seq_parameter_set_id: see H264 specification.
+ * @num_slice_groups_minus1: see H264 specification.
+ * @num_ref_idx_l0_default_active_minus1: see H264 specification.
+ * @num_ref_idx_l1_default_active_minus1: see H264 specification.
+ * @weighted_bipred_idc: see H264 specification.
+ * @pic_init_qp_minus26: see H264 specification.
+ * @pic_init_qs_minus26: see H264 specification.
+ * @chroma_qp_index_offset: see H264 specification.
+ * @second_chroma_qp_index_offset: see H264 specification.
+ * @flags: see V4L2_H264_PPS_FLAG_{}.
+ */
+struct v4l2_ctrl_h264_pps {
+	__u8 pic_parameter_set_id;
+	__u8 seq_parameter_set_id;
+	__u8 num_slice_groups_minus1;
+	__u8 num_ref_idx_l0_default_active_minus1;
+	__u8 num_ref_idx_l1_default_active_minus1;
+	__u8 weighted_bipred_idc;
+	__s8 pic_init_qp_minus26;
+	__s8 pic_init_qs_minus26;
+	__s8 chroma_qp_index_offset;
+	__s8 second_chroma_qp_index_offset;
+	__u16 flags;
+};
+
+#define V4L2_CID_STATELESS_H264_SCALING_MATRIX	(V4L2_CID_CODEC_STATELESS_BASE + 4)
+/**
+ * struct v4l2_ctrl_h264_scaling_matrix - H264 scaling matrices
+ *
+ * @scaling_list_4x4: scaling matrix after applying the inverse
+ * scanning process. Expected list order is Intra Y, Intra Cb,
+ * Intra Cr, Inter Y, Inter Cb, Inter Cr. The values on each
+ * scaling list are expected in raster scan order.
+ * @scaling_list_8x8: scaling matrix after applying the inverse
+ * scanning process. Expected list order is Intra Y, Inter Y,
+ * Intra Cb, Inter Cb, Intra Cr, Inter Cr. The values on each
+ * scaling list are expected in raster scan order.
+ *
+ * Note that the list order is different for the 4x4 and 8x8
+ * matrices as per the H264 specification, see table 7-2 "Assignment
+ * of mnemonic names to scaling list indices and specification of
+ * fall-back rule".
+ */
+struct v4l2_ctrl_h264_scaling_matrix {
+	__u8 scaling_list_4x4[6][16];
+	__u8 scaling_list_8x8[6][64];
+};
+
+struct v4l2_h264_weight_factors {
+	__s16 luma_weight[32];
+	__s16 luma_offset[32];
+	__s16 chroma_weight[32][2];
+	__s16 chroma_offset[32][2];
+};
+
+#define V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED(pps, slice) \
+	((((pps)->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED) && \
+	 ((slice)->slice_type == V4L2_H264_SLICE_TYPE_P || \
+	  (slice)->slice_type == V4L2_H264_SLICE_TYPE_SP)) || \
+	 ((pps)->weighted_bipred_idc == 1 && \
+	  (slice)->slice_type == V4L2_H264_SLICE_TYPE_B))
+
+#define V4L2_CID_STATELESS_H264_PRED_WEIGHTS	(V4L2_CID_CODEC_STATELESS_BASE + 5)
+/**
+ * struct v4l2_ctrl_h264_pred_weights - Prediction weight table
+ *
+ * Prediction weight table, which matches the syntax specified
+ * by the H264 specification.
+ *
+ * @luma_log2_weight_denom: see H264 specification.
+ * @chroma_log2_weight_denom: see H264 specification.
+ * @weight_factors: luma and chroma weight factors.
+ */
+struct v4l2_ctrl_h264_pred_weights {
+	__u16 luma_log2_weight_denom;
+	__u16 chroma_log2_weight_denom;
+	struct v4l2_h264_weight_factors weight_factors[2];
+};
+
+#define V4L2_H264_SLICE_TYPE_P				0
+#define V4L2_H264_SLICE_TYPE_B				1
+#define V4L2_H264_SLICE_TYPE_I				2
+#define V4L2_H264_SLICE_TYPE_SP				3
+#define V4L2_H264_SLICE_TYPE_SI				4
+
+#define V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED	0x01
+#define V4L2_H264_SLICE_FLAG_SP_FOR_SWITCH		0x02
+
+#define V4L2_H264_TOP_FIELD_REF				0x1
+#define V4L2_H264_BOTTOM_FIELD_REF			0x2
+#define V4L2_H264_FRAME_REF				0x3
+
+/**
+ * struct v4l2_h264_reference - H264 picture reference
+ *
+ * @fields: indicates how the picture is referenced.
+ * Valid values are V4L2_H264_{}_REF.
+ * @index: index into v4l2_ctrl_h264_decode_params.dpb[].
+ */
+struct v4l2_h264_reference {
+	__u8 fields;
+	__u8 index;
+};
+
+/*
+ * Maximum DPB size, as specified by section 'A.3.1 Level limits
+ * common to the Baseline, Main, and Extended profiles'.
+ */
+#define V4L2_H264_NUM_DPB_ENTRIES 16
+#define V4L2_H264_REF_LIST_LEN (2 * V4L2_H264_NUM_DPB_ENTRIES)
+
+#define V4L2_CID_STATELESS_H264_SLICE_PARAMS	(V4L2_CID_CODEC_STATELESS_BASE + 6)
+/**
+ * struct v4l2_ctrl_h264_slice_params - H264 slice parameters
+ *
+ * This structure holds the H264 syntax elements that are specified
+ * as non-invariant for the slices in a given frame.
+ *
+ * Slice invariant syntax elements are contained in struct
+ * v4l2_ctrl_h264_decode_params. This is done to reduce the API surface
+ * on frame-based decoders, where slice header parsing is done by the
+ * hardware.
+ *
+ * Slice invariant syntax elements are specified in specification section
+ * "7.4.3 Slice header semantics".
+ *
+ * Except where noted, the members on this struct match the slice header syntax.
+ *
+ * @header_bit_size: offset in bits to slice_data() from the beginning of this slice.
+ * @first_mb_in_slice: see H264 specification.
+ * @slice_type: see H264 specification.
+ * @colour_plane_id: see H264 specification.
+ * @redundant_pic_cnt: see H264 specification.
+ * @cabac_init_idc: see H264 specification.
+ * @slice_qp_delta: see H264 specification.
+ * @slice_qs_delta: see H264 specification.
+ * @disable_deblocking_filter_idc: see H264 specification.
+ * @slice_alpha_c0_offset_div2: see H264 specification.
+ * @slice_beta_offset_div2: see H264 specification.
+ * @num_ref_idx_l0_active_minus1: see H264 specification.
+ * @num_ref_idx_l1_active_minus1: see H264 specification.
+ * @reserved: padding field. Should be zeroed by applications.
+ * @ref_pic_list0: reference picture list 0 after applying the per-slice modifications.
+ * @ref_pic_list1: reference picture list 1 after applying the per-slice modifications.
+ * @flags: see V4L2_H264_SLICE_FLAG_{}.
+ */
+struct v4l2_ctrl_h264_slice_params {
+	__u32 header_bit_size;
+	__u32 first_mb_in_slice;
+	__u8 slice_type;
+	__u8 colour_plane_id;
+	__u8 redundant_pic_cnt;
+	__u8 cabac_init_idc;
+	__s8 slice_qp_delta;
+	__s8 slice_qs_delta;
+	__u8 disable_deblocking_filter_idc;
+	__s8 slice_alpha_c0_offset_div2;
+	__s8 slice_beta_offset_div2;
+	__u8 num_ref_idx_l0_active_minus1;
+	__u8 num_ref_idx_l1_active_minus1;
+
+	__u8 reserved;
+
+	struct v4l2_h264_reference ref_pic_list0[V4L2_H264_REF_LIST_LEN];
+	struct v4l2_h264_reference ref_pic_list1[V4L2_H264_REF_LIST_LEN];
+
+	__u32 flags;
+};
+
+#define V4L2_H264_DPB_ENTRY_FLAG_VALID		0x01
+#define V4L2_H264_DPB_ENTRY_FLAG_ACTIVE		0x02
+#define V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM	0x04
+#define V4L2_H264_DPB_ENTRY_FLAG_FIELD		0x08
+
+/**
+ * struct v4l2_h264_dpb_entry - H264 decoded picture buffer entry
+ *
+ * @reference_ts: timestamp of the V4L2 capture buffer to use as reference.
+ * The timestamp refers to the timestamp field in struct v4l2_buffer.
+ * Use v4l2_timeval_to_ns() to convert the struct timeval to a __u64.
+ * @pic_num: matches PicNum variable assigned during the reference
+ * picture lists construction process.
+ * @frame_num: frame identifier which matches frame_num syntax element.
+ * @fields: indicates how the DPB entry is referenced. Valid values are
+ * V4L2_H264_{}_REF.
+ * @reserved: padding field. Should be zeroed by applications.
+ * @top_field_order_cnt: matches TopFieldOrderCnt picture value.
+ * @bottom_field_order_cnt: matches BottomFieldOrderCnt picture value.
+ * Note that picture field is indicated by v4l2_buffer.field.
+ * @flags: see V4L2_H264_DPB_ENTRY_FLAG_{}.
+ */
+struct v4l2_h264_dpb_entry {
+	__u64 reference_ts;
+	__u32 pic_num;
+	__u16 frame_num;
+	__u8 fields;
+	__u8 reserved[5];
+	__s32 top_field_order_cnt;
+	__s32 bottom_field_order_cnt;
+	__u32 flags;
+};
+
+#define V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC		0x01
+#define V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC		0x02
+#define V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD	0x04
+
+#define V4L2_CID_STATELESS_H264_DECODE_PARAMS	(V4L2_CID_CODEC_STATELESS_BASE + 7)
+/**
+ * struct v4l2_ctrl_h264_decode_params - H264 decoding parameters
+ *
+ * @dpb: decoded picture buffer.
+ * @nal_ref_idc: slice header syntax element.
+ * @frame_num: slice header syntax element.
+ * @top_field_order_cnt: matches TopFieldOrderCnt picture value.
+ * @bottom_field_order_cnt: matches BottomFieldOrderCnt picture value.
+ * Note that picture field is indicated by v4l2_buffer.field.
+ * @idr_pic_id: slice header syntax element.
+ * @pic_order_cnt_lsb: slice header syntax element.
+ * @delta_pic_order_cnt_bottom: slice header syntax element.
+ * @delta_pic_order_cnt0: slice header syntax element.
+ * @delta_pic_order_cnt1: slice header syntax element.
+ * @dec_ref_pic_marking_bit_size: size in bits of dec_ref_pic_marking()
+ * syntax element.
+ * @pic_order_cnt_bit_size: size in bits of pic order count syntax.
+ * @slice_group_change_cycle: slice header syntax element.
+ * @reserved: padding field. Should be zeroed by applications.
+ * @flags: see V4L2_H264_DECODE_PARAM_FLAG_{}.
+ */
+struct v4l2_ctrl_h264_decode_params {
+	struct v4l2_h264_dpb_entry dpb[V4L2_H264_NUM_DPB_ENTRIES];
+	__u16 nal_ref_idc;
+	__u16 frame_num;
+	__s32 top_field_order_cnt;
+	__s32 bottom_field_order_cnt;
+	__u16 idr_pic_id;
+	__u16 pic_order_cnt_lsb;
+	__s32 delta_pic_order_cnt_bottom;
+	__s32 delta_pic_order_cnt0;
+	__s32 delta_pic_order_cnt1;
+	__u32 dec_ref_pic_marking_bit_size;
+	__u32 pic_order_cnt_bit_size;
+	__u32 slice_group_change_cycle;
+
+	__u32 reserved;
+	__u32 flags;
+};
+
 
 /* MPEG-compression definitions kept for backwards compatibility */
 #ifndef __KERNEL__
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 485654fd2a7e..76231ca6e101 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1734,6 +1734,12 @@ struct v4l2_ext_control {
 		__u16 __user *p_u16;
 		__u32 __user *p_u32;
 		struct v4l2_area __user *p_area;
+		struct v4l2_ctrl_h264_sps __user *p_h264_sps;
+		struct v4l2_ctrl_h264_pps *p_h264_pps;
+		struct v4l2_ctrl_h264_scaling_matrix __user *p_h264_scaling_matrix;
+		struct v4l2_ctrl_h264_pred_weights __user *p_h264_pred_weights;
+		struct v4l2_ctrl_h264_slice_params __user *p_h264_slice_params;
+		struct v4l2_ctrl_h264_decode_params __user *p_h264_decode_params;
 		void __user *ptr;
 	};
 } __attribute__ ((packed));
-- 
cgit v1.2.3


From 206bc0f6fb945e90cfea677339ef8adfaedc4b4f Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Thu, 26 Nov 2020 14:02:18 +0100
Subject: media: vicodec: mark the stateless FWHT API as stable

The FWHT stateless 'uAPI' was staging and marked explicitly in the
V4L2 specification that it will change and is unstable.
Note that these control IDs were never exported as a public API,
they were only defined in kernel-local headers (fwht-ctrls.h).

Now, the FWHT stateless controls is ready to be part
of the stable uAPI.

While not too late:

- Rename V4L2_CID_MPEG_VIDEO_FWHT_PARAMS to V4L2_CID_STATELESS_FWHT_PARAMS.

- Move the contents of fwht-ctrls.h to v4l2-controls.h.

- Move the public parts of drivers/media/test-drivers/vicodec/codec-fwht.h
  to v4l2-controls.h.

- Add V4L2_CTRL_TYPE_FWHT_PARAMS control initialization and validation.

- Add p_fwht_params to struct v4l2_ext_control.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/test-drivers/vicodec/codec-fwht.c   |  1 +
 drivers/media/test-drivers/vicodec/codec-fwht.h   | 32 -----------
 drivers/media/test-drivers/vicodec/vicodec-core.c |  6 +-
 drivers/media/v4l2-core/v4l2-ctrls.c              | 20 ++++++-
 include/media/fwht-ctrls.h                        | 31 ----------
 include/media/v4l2-ctrls.h                        |  1 -
 include/uapi/linux/v4l2-controls.h                | 70 +++++++++++++++++++++++
 include/uapi/linux/videodev2.h                    |  3 +
 8 files changed, 95 insertions(+), 69 deletions(-)
 delete mode 100644 include/media/fwht-ctrls.h

(limited to 'include/uapi')

diff --git a/drivers/media/test-drivers/vicodec/codec-fwht.c b/drivers/media/test-drivers/vicodec/codec-fwht.c
index 5dbf76fd8185..1ce682e1b85c 100644
--- a/drivers/media/test-drivers/vicodec/codec-fwht.c
+++ b/drivers/media/test-drivers/vicodec/codec-fwht.c
@@ -11,6 +11,7 @@
 
 #include <linux/string.h>
 #include <linux/kernel.h>
+#include <linux/videodev2.h>
 #include "codec-fwht.h"
 
 #define OVERFLOW_BIT BIT(14)
diff --git a/drivers/media/test-drivers/vicodec/codec-fwht.h b/drivers/media/test-drivers/vicodec/codec-fwht.h
index 78e2841762fb..0eab24020e9e 100644
--- a/drivers/media/test-drivers/vicodec/codec-fwht.h
+++ b/drivers/media/test-drivers/vicodec/codec-fwht.h
@@ -56,38 +56,6 @@
 #define FWHT_MAGIC1 0x4f4f4f4f
 #define FWHT_MAGIC2 0xffffffff
 
-#define V4L2_FWHT_VERSION 3
-
-/* Set if this is an interlaced format */
-#define V4L2_FWHT_FL_IS_INTERLACED		BIT(0)
-/* Set if this is a bottom-first (NTSC) interlaced format */
-#define V4L2_FWHT_FL_IS_BOTTOM_FIRST		BIT(1)
-/* Set if each 'frame' contains just one field */
-#define V4L2_FWHT_FL_IS_ALTERNATE		BIT(2)
-/*
- * If V4L2_FWHT_FL_IS_ALTERNATE was set, then this is set if this
- * 'frame' is the bottom field, else it is the top field.
- */
-#define V4L2_FWHT_FL_IS_BOTTOM_FIELD		BIT(3)
-/* Set if this frame is uncompressed */
-#define V4L2_FWHT_FL_LUMA_IS_UNCOMPRESSED	BIT(4)
-#define V4L2_FWHT_FL_CB_IS_UNCOMPRESSED	BIT(5)
-#define V4L2_FWHT_FL_CR_IS_UNCOMPRESSED	BIT(6)
-#define V4L2_FWHT_FL_CHROMA_FULL_HEIGHT	BIT(7)
-#define V4L2_FWHT_FL_CHROMA_FULL_WIDTH	BIT(8)
-#define V4L2_FWHT_FL_ALPHA_IS_UNCOMPRESSED	BIT(9)
-#define V4L2_FWHT_FL_I_FRAME			BIT(10)
-
-/* A 4-values flag - the number of components - 1 */
-#define V4L2_FWHT_FL_COMPONENTS_NUM_MSK	GENMASK(18, 16)
-#define V4L2_FWHT_FL_COMPONENTS_NUM_OFFSET	16
-
-#define V4L2_FWHT_FL_PIXENC_MSK	GENMASK(20, 19)
-#define V4L2_FWHT_FL_PIXENC_OFFSET	19
-#define V4L2_FWHT_FL_PIXENC_YUV	(1 << V4L2_FWHT_FL_PIXENC_OFFSET)
-#define V4L2_FWHT_FL_PIXENC_RGB	(2 << V4L2_FWHT_FL_PIXENC_OFFSET)
-#define V4L2_FWHT_FL_PIXENC_HSV	(3 << V4L2_FWHT_FL_PIXENC_OFFSET)
-
 /*
  * A macro to calculate the needed padding in order to make sure
  * both luma and chroma components resolutions are rounded up to
diff --git a/drivers/media/test-drivers/vicodec/vicodec-core.c b/drivers/media/test-drivers/vicodec/vicodec-core.c
index 084b75c226c5..025f3ff77302 100644
--- a/drivers/media/test-drivers/vicodec/vicodec-core.c
+++ b/drivers/media/test-drivers/vicodec/vicodec-core.c
@@ -1746,7 +1746,7 @@ static int vicodec_try_ctrl(struct v4l2_ctrl *ctrl)
 			V4L2_BUF_TYPE_VIDEO_CAPTURE);
 
 	switch (ctrl->id) {
-	case V4L2_CID_MPEG_VIDEO_FWHT_PARAMS:
+	case V4L2_CID_STATELESS_FWHT_PARAMS:
 		if (!q_dst->info)
 			return -EINVAL;
 		params = ctrl->p_new.p_fwht_params;
@@ -1799,7 +1799,7 @@ static int vicodec_s_ctrl(struct v4l2_ctrl *ctrl)
 	case V4L2_CID_FWHT_P_FRAME_QP:
 		ctx->state.p_frame_qp = ctrl->val;
 		return 0;
-	case V4L2_CID_MPEG_VIDEO_FWHT_PARAMS:
+	case V4L2_CID_STATELESS_FWHT_PARAMS:
 		params = ctrl->p_new.p_fwht_params;
 		update_header_from_stateless_params(ctx, params);
 		ctx->state.ref_frame_ts = params->backward_ref_ts;
@@ -1815,7 +1815,7 @@ static const struct v4l2_ctrl_ops vicodec_ctrl_ops = {
 
 static const struct v4l2_ctrl_config vicodec_ctrl_stateless_state = {
 	.ops		= &vicodec_ctrl_ops,
-	.id		= V4L2_CID_MPEG_VIDEO_FWHT_PARAMS,
+	.id		= V4L2_CID_STATELESS_FWHT_PARAMS,
 	.elem_size      = sizeof(struct v4l2_ctrl_fwht_params),
 };
 
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 88231ba7b0fa..5cbe0ffbf501 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -943,7 +943,6 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME:		return "Force Key Frame";
 	case V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS:		return "MPEG-2 Slice Parameters";
 	case V4L2_CID_MPEG_VIDEO_MPEG2_QUANTIZATION:		return "MPEG-2 Quantization Matrices";
-	case V4L2_CID_MPEG_VIDEO_FWHT_PARAMS:			return "FWHT Stateless Parameters";
 	case V4L2_CID_FWHT_I_FRAME_QP:				return "FWHT I-Frame QP Value";
 	case V4L2_CID_FWHT_P_FRAME_QP:				return "FWHT P-Frame QP Value";
 
@@ -1185,6 +1184,7 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_STATELESS_H264_PRED_WEIGHTS:		return "H264 Prediction Weight Table";
 	case V4L2_CID_STATELESS_H264_SLICE_PARAMS:		return "H264 Slice Parameters";
 	case V4L2_CID_STATELESS_H264_DECODE_PARAMS:		return "H264 Decode Parameters";
+	case V4L2_CID_STATELESS_FWHT_PARAMS:			return "FWHT Stateless Parameters";
 	default:
 		return NULL;
 	}
@@ -1433,7 +1433,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_MPEG_VIDEO_MPEG2_QUANTIZATION:
 		*type = V4L2_CTRL_TYPE_MPEG2_QUANTIZATION;
 		break;
-	case V4L2_CID_MPEG_VIDEO_FWHT_PARAMS:
+	case V4L2_CID_STATELESS_FWHT_PARAMS:
 		*type = V4L2_CTRL_TYPE_FWHT_PARAMS;
 		break;
 	case V4L2_CID_STATELESS_H264_SPS:
@@ -1627,6 +1627,7 @@ static void std_init_compound(const struct v4l2_ctrl *ctrl, u32 idx,
 {
 	struct v4l2_ctrl_mpeg2_slice_params *p_mpeg2_slice_params;
 	struct v4l2_ctrl_vp8_frame_header *p_vp8_frame_header;
+	struct v4l2_ctrl_fwht_params *p_fwht_params;
 	void *p = ptr.p + idx * ctrl->elem_size;
 
 	if (ctrl->p_def.p_const)
@@ -1653,6 +1654,12 @@ static void std_init_compound(const struct v4l2_ctrl *ctrl, u32 idx,
 		p_vp8_frame_header = p;
 		p_vp8_frame_header->num_dct_parts = 1;
 		break;
+	case V4L2_CTRL_TYPE_FWHT_PARAMS:
+		p_fwht_params = p;
+		p_fwht_params->version = V4L2_FWHT_VERSION;
+		p_fwht_params->width = 1280;
+		p_fwht_params->height = 720;
+		break;
 	}
 }
 
@@ -1755,6 +1762,9 @@ static void std_log(const struct v4l2_ctrl *ctrl)
 	case V4L2_CTRL_TYPE_H264_PRED_WEIGHTS:
 		pr_cont("H264_PRED_WEIGHTS");
 		break;
+	case V4L2_CTRL_TYPE_FWHT_PARAMS:
+		pr_cont("FWHT_PARAMS");
+		break;
 	default:
 		pr_cont("unknown type %d", ctrl->type);
 		break;
@@ -1798,6 +1808,7 @@ static int std_validate_compound(const struct v4l2_ctrl *ctrl, u32 idx,
 {
 	struct v4l2_ctrl_mpeg2_slice_params *p_mpeg2_slice_params;
 	struct v4l2_ctrl_vp8_frame_header *p_vp8_frame_header;
+	struct v4l2_ctrl_fwht_params *p_fwht_params;
 	struct v4l2_ctrl_h264_sps *p_h264_sps;
 	struct v4l2_ctrl_h264_pps *p_h264_pps;
 	struct v4l2_ctrl_h264_pred_weights *p_h264_pred_weights;
@@ -1857,6 +1868,11 @@ static int std_validate_compound(const struct v4l2_ctrl *ctrl, u32 idx,
 		break;
 
 	case V4L2_CTRL_TYPE_FWHT_PARAMS:
+		p_fwht_params = p;
+		if (p_fwht_params->version < V4L2_FWHT_VERSION)
+			return -EINVAL;
+		if (!p_fwht_params->width || !p_fwht_params->height)
+			return -EINVAL;
 		break;
 
 	case V4L2_CTRL_TYPE_H264_SPS:
diff --git a/include/media/fwht-ctrls.h b/include/media/fwht-ctrls.h
deleted file mode 100644
index a918b49609e1..000000000000
--- a/include/media/fwht-ctrls.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * These are the FWHT state controls for use with stateless FWHT
- * codec drivers.
- *
- * It turns out that these structs are not stable yet and will undergo
- * more changes. So keep them private until they are stable and ready to
- * become part of the official public API.
- */
-
-#ifndef _FWHT_CTRLS_H_
-#define _FWHT_CTRLS_H_
-
-#define V4L2_CTRL_TYPE_FWHT_PARAMS 0x0105
-
-#define V4L2_CID_MPEG_VIDEO_FWHT_PARAMS	(V4L2_CID_CODEC_BASE + 292)
-
-struct v4l2_ctrl_fwht_params {
-	__u64 backward_ref_ts;
-	__u32 version;
-	__u32 width;
-	__u32 height;
-	__u32 flags;
-	__u32 colorspace;
-	__u32 xfer_func;
-	__u32 ycbcr_enc;
-	__u32 quantization;
-};
-
-
-#endif
diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h
index d25b38f78229..167ca8c8424f 100644
--- a/include/media/v4l2-ctrls.h
+++ b/include/media/v4l2-ctrls.h
@@ -18,7 +18,6 @@
  * This will move to the public headers once this API is fully stable.
  */
 #include <media/mpeg2-ctrls.h>
-#include <media/fwht-ctrls.h>
 #include <media/vp8-ctrls.h>
 #include <media/hevc-ctrls.h>
 
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 3f24663fba4a..823b214aac0c 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -1565,6 +1565,76 @@ struct v4l2_ctrl_h264_decode_params {
 };
 
 
+/* Stateless FWHT control, used by the vicodec driver */
+
+/* Current FWHT version */
+#define V4L2_FWHT_VERSION			3
+
+/* Set if this is an interlaced format */
+#define V4L2_FWHT_FL_IS_INTERLACED		BIT(0)
+/* Set if this is a bottom-first (NTSC) interlaced format */
+#define V4L2_FWHT_FL_IS_BOTTOM_FIRST		BIT(1)
+/* Set if each 'frame' contains just one field */
+#define V4L2_FWHT_FL_IS_ALTERNATE		BIT(2)
+/*
+ * If V4L2_FWHT_FL_IS_ALTERNATE was set, then this is set if this
+ * 'frame' is the bottom field, else it is the top field.
+ */
+#define V4L2_FWHT_FL_IS_BOTTOM_FIELD		BIT(3)
+/* Set if the Y' plane is uncompressed */
+#define V4L2_FWHT_FL_LUMA_IS_UNCOMPRESSED	BIT(4)
+/* Set if the Cb plane is uncompressed */
+#define V4L2_FWHT_FL_CB_IS_UNCOMPRESSED		BIT(5)
+/* Set if the Cr plane is uncompressed */
+#define V4L2_FWHT_FL_CR_IS_UNCOMPRESSED		BIT(6)
+/* Set if the chroma plane is full height, if cleared it is half height */
+#define V4L2_FWHT_FL_CHROMA_FULL_HEIGHT		BIT(7)
+/* Set if the chroma plane is full width, if cleared it is half width */
+#define V4L2_FWHT_FL_CHROMA_FULL_WIDTH		BIT(8)
+/* Set if the alpha plane is uncompressed */
+#define V4L2_FWHT_FL_ALPHA_IS_UNCOMPRESSED	BIT(9)
+/* Set if this is an I Frame */
+#define V4L2_FWHT_FL_I_FRAME			BIT(10)
+
+/* A 4-values flag - the number of components - 1 */
+#define V4L2_FWHT_FL_COMPONENTS_NUM_MSK		GENMASK(18, 16)
+#define V4L2_FWHT_FL_COMPONENTS_NUM_OFFSET	16
+
+/* A 4-values flag - the pixel encoding type */
+#define V4L2_FWHT_FL_PIXENC_MSK			GENMASK(20, 19)
+#define V4L2_FWHT_FL_PIXENC_OFFSET		19
+#define V4L2_FWHT_FL_PIXENC_YUV			(1 << V4L2_FWHT_FL_PIXENC_OFFSET)
+#define V4L2_FWHT_FL_PIXENC_RGB			(2 << V4L2_FWHT_FL_PIXENC_OFFSET)
+#define V4L2_FWHT_FL_PIXENC_HSV			(3 << V4L2_FWHT_FL_PIXENC_OFFSET)
+
+#define V4L2_CID_STATELESS_FWHT_PARAMS		(V4L2_CID_CODEC_STATELESS_BASE + 100)
+/**
+ * struct v4l2_ctrl_fwht_params - FWHT parameters
+ *
+ * @backward_ref_ts: timestamp of the V4L2 capture buffer to use as reference.
+ * The timestamp refers to the timestamp field in struct v4l2_buffer.
+ * Use v4l2_timeval_to_ns() to convert the struct timeval to a __u64.
+ * @version: must be V4L2_FWHT_VERSION.
+ * @width: width of frame.
+ * @height: height of frame.
+ * @flags: FWHT flags (see V4L2_FWHT_FL_*).
+ * @colorspace: the colorspace (enum v4l2_colorspace).
+ * @xfer_func: the transfer function (enum v4l2_xfer_func).
+ * @ycbcr_enc: the Y'CbCr encoding (enum v4l2_ycbcr_encoding).
+ * @quantization: the quantization (enum v4l2_quantization).
+ */
+struct v4l2_ctrl_fwht_params {
+	__u64 backward_ref_ts;
+	__u32 version;
+	__u32 width;
+	__u32 height;
+	__u32 flags;
+	__u32 colorspace;
+	__u32 xfer_func;
+	__u32 ycbcr_enc;
+	__u32 quantization;
+};
+
 /* MPEG-compression definitions kept for backwards compatibility */
 #ifndef __KERNEL__
 #define V4L2_CTRL_CLASS_MPEG            V4L2_CTRL_CLASS_CODEC
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 76231ca6e101..e40e95be79f9 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1740,6 +1740,7 @@ struct v4l2_ext_control {
 		struct v4l2_ctrl_h264_pred_weights __user *p_h264_pred_weights;
 		struct v4l2_ctrl_h264_slice_params __user *p_h264_slice_params;
 		struct v4l2_ctrl_h264_decode_params __user *p_h264_decode_params;
+		struct v4l2_ctrl_fwht_params __user *p_fwht_params;
 		void __user *ptr;
 	};
 } __attribute__ ((packed));
@@ -1793,6 +1794,8 @@ enum v4l2_ctrl_type {
 	V4L2_CTRL_TYPE_H264_SLICE_PARAMS    = 0x0203,
 	V4L2_CTRL_TYPE_H264_DECODE_PARAMS   = 0x0204,
 	V4L2_CTRL_TYPE_H264_PRED_WEIGHTS    = 0x0205,
+
+	V4L2_CTRL_TYPE_FWHT_PARAMS	    = 0x0220,
 };
 
 /*  Used in the VIDIOC_QUERYCTRL ioctl for querying controls */
-- 
cgit v1.2.3


From d4bff72c8401e6f56194ecf455db70ebc22929e2 Mon Sep 17 00:00:00 2001
From: Thomas Karlsson <thomas.karlsson@paneda.se>
Date: Wed, 2 Dec 2020 19:49:58 +0100
Subject: macvlan: Support for high multicast packet rate

Background:
Broadcast and multicast packages are enqueued for later processing.
This queue was previously hardcoded to 1000.

This proved insufficient for handling very high packet rates.
This resulted in packet drops for multicast.
While at the same time unicast worked fine.

The change:
This patch make the queue length adjustable to accommodate
for environments with very high multicast packet rate.
But still keeps the default value of 1000 unless specified.

The queue length is specified as a request per macvlan
using the IFLA_MACVLAN_BC_QUEUE_LEN parameter.

The actual used queue length will then be the maximum of
any macvlan connected to the same port. The actual used
queue length for the port can be retrieved (read only)
by the IFLA_MACVLAN_BC_QUEUE_LEN_USED parameter for verification.

This will be followed up by a patch to iproute2
in order to adjust the parameter from userspace.

Signed-off-by: Thomas Karlsson <thomas.karlsson@paneda.se>
Link: https://lore.kernel.org/r/dd4673b2-7eab-edda-6815-85c67ce87f63@paneda.se
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/macvlan.c              | 40 ++++++++++++++++++++++++++++++++++++--
 include/linux/if_macvlan.h         |  1 +
 include/uapi/linux/if_link.h       |  2 ++
 tools/include/uapi/linux/if_link.h |  2 ++
 4 files changed, 43 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index d9b6c44a5911..fb51329f8964 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -35,7 +35,7 @@
 
 #define MACVLAN_HASH_BITS	8
 #define MACVLAN_HASH_SIZE	(1<<MACVLAN_HASH_BITS)
-#define MACVLAN_BC_QUEUE_LEN	1000
+#define MACVLAN_DEFAULT_BC_QUEUE_LEN	1000
 
 #define MACVLAN_F_PASSTHRU	1
 #define MACVLAN_F_ADDRCHANGE	2
@@ -46,6 +46,7 @@ struct macvlan_port {
 	struct list_head	vlans;
 	struct sk_buff_head	bc_queue;
 	struct work_struct	bc_work;
+	u32			bc_queue_len_used;
 	u32			flags;
 	int			count;
 	struct hlist_head	vlan_source_hash[MACVLAN_HASH_SIZE];
@@ -67,6 +68,7 @@ struct macvlan_skb_cb {
 #define MACVLAN_SKB_CB(__skb) ((struct macvlan_skb_cb *)&((__skb)->cb[0]))
 
 static void macvlan_port_destroy(struct net_device *dev);
+static void update_port_bc_queue_len(struct macvlan_port *port);
 
 static inline bool macvlan_passthru(const struct macvlan_port *port)
 {
@@ -354,7 +356,7 @@ static void macvlan_broadcast_enqueue(struct macvlan_port *port,
 	MACVLAN_SKB_CB(nskb)->src = src;
 
 	spin_lock(&port->bc_queue.lock);
-	if (skb_queue_len(&port->bc_queue) < MACVLAN_BC_QUEUE_LEN) {
+	if (skb_queue_len(&port->bc_queue) < port->bc_queue_len_used) {
 		if (src)
 			dev_hold(src->dev);
 		__skb_queue_tail(&port->bc_queue, nskb);
@@ -1218,6 +1220,7 @@ static int macvlan_port_create(struct net_device *dev)
 	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
 		INIT_HLIST_HEAD(&port->vlan_source_hash[i]);
 
+	port->bc_queue_len_used = 0;
 	skb_queue_head_init(&port->bc_queue);
 	INIT_WORK(&port->bc_work, macvlan_process_broadcast);
 
@@ -1486,6 +1489,10 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 			goto destroy_macvlan_port;
 	}
 
+	vlan->bc_queue_len_req = MACVLAN_DEFAULT_BC_QUEUE_LEN;
+	if (data && data[IFLA_MACVLAN_BC_QUEUE_LEN])
+		vlan->bc_queue_len_req = nla_get_u32(data[IFLA_MACVLAN_BC_QUEUE_LEN]);
+
 	err = register_netdevice(dev);
 	if (err < 0)
 		goto destroy_macvlan_port;
@@ -1496,6 +1503,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 		goto unregister_netdev;
 
 	list_add_tail_rcu(&vlan->list, &port->vlans);
+	update_port_bc_queue_len(vlan->port);
 	netif_stacked_transfer_operstate(lowerdev, dev);
 	linkwatch_fire_event(dev);
 
@@ -1529,6 +1537,7 @@ void macvlan_dellink(struct net_device *dev, struct list_head *head)
 	if (vlan->mode == MACVLAN_MODE_SOURCE)
 		macvlan_flush_sources(vlan->port, vlan);
 	list_del_rcu(&vlan->list);
+	update_port_bc_queue_len(vlan->port);
 	unregister_netdevice_queue(dev, head);
 	netdev_upper_dev_unlink(vlan->lowerdev, dev);
 }
@@ -1572,6 +1581,12 @@ static int macvlan_changelink(struct net_device *dev,
 		}
 		vlan->flags = flags;
 	}
+
+	if (data && data[IFLA_MACVLAN_BC_QUEUE_LEN]) {
+		vlan->bc_queue_len_req = nla_get_u32(data[IFLA_MACVLAN_BC_QUEUE_LEN]);
+		update_port_bc_queue_len(vlan->port);
+	}
+
 	if (set_mode)
 		vlan->mode = mode;
 	if (data && data[IFLA_MACVLAN_MACADDR_MODE]) {
@@ -1602,6 +1617,8 @@ static size_t macvlan_get_size(const struct net_device *dev)
 		+ nla_total_size(2) /* IFLA_MACVLAN_FLAGS */
 		+ nla_total_size(4) /* IFLA_MACVLAN_MACADDR_COUNT */
 		+ macvlan_get_size_mac(vlan) /* IFLA_MACVLAN_MACADDR */
+		+ nla_total_size(4) /* IFLA_MACVLAN_BC_QUEUE_LEN */
+		+ nla_total_size(4) /* IFLA_MACVLAN_BC_QUEUE_LEN_USED */
 		);
 }
 
@@ -1625,6 +1642,7 @@ static int macvlan_fill_info(struct sk_buff *skb,
 				const struct net_device *dev)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
+	struct macvlan_port *port = vlan->port;
 	int i;
 	struct nlattr *nest;
 
@@ -1645,6 +1663,10 @@ static int macvlan_fill_info(struct sk_buff *skb,
 		}
 		nla_nest_end(skb, nest);
 	}
+	if (nla_put_u32(skb, IFLA_MACVLAN_BC_QUEUE_LEN, vlan->bc_queue_len_req))
+		goto nla_put_failure;
+	if (nla_put_u32(skb, IFLA_MACVLAN_BC_QUEUE_LEN_USED, port->bc_queue_len_used))
+		goto nla_put_failure;
 	return 0;
 
 nla_put_failure:
@@ -1658,6 +1680,8 @@ static const struct nla_policy macvlan_policy[IFLA_MACVLAN_MAX + 1] = {
 	[IFLA_MACVLAN_MACADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
 	[IFLA_MACVLAN_MACADDR_DATA] = { .type = NLA_NESTED },
 	[IFLA_MACVLAN_MACADDR_COUNT] = { .type = NLA_U32 },
+	[IFLA_MACVLAN_BC_QUEUE_LEN] = { .type = NLA_U32 },
+	[IFLA_MACVLAN_BC_QUEUE_LEN_USED] = { .type = NLA_REJECT },
 };
 
 int macvlan_link_register(struct rtnl_link_ops *ops)
@@ -1688,6 +1712,18 @@ static struct rtnl_link_ops macvlan_link_ops = {
 	.priv_size      = sizeof(struct macvlan_dev),
 };
 
+static void update_port_bc_queue_len(struct macvlan_port *port)
+{
+	u32 max_bc_queue_len_req = 0;
+	struct macvlan_dev *vlan;
+
+	list_for_each_entry(vlan, &port->vlans, list) {
+		if (vlan->bc_queue_len_req > max_bc_queue_len_req)
+			max_bc_queue_len_req = vlan->bc_queue_len_req;
+	}
+	port->bc_queue_len_used = max_bc_queue_len_req;
+}
+
 static int macvlan_device_event(struct notifier_block *unused,
 				unsigned long event, void *ptr)
 {
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index a367ead4bf4b..96556c64c95d 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -30,6 +30,7 @@ struct macvlan_dev {
 	enum macvlan_mode	mode;
 	u16			flags;
 	unsigned int		macaddr_count;
+	u32			bc_queue_len_req;
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	struct netpoll		*netpoll;
 #endif
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index c4b23f06f69e..874cc12a34d9 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -588,6 +588,8 @@ enum {
 	IFLA_MACVLAN_MACADDR,
 	IFLA_MACVLAN_MACADDR_DATA,
 	IFLA_MACVLAN_MACADDR_COUNT,
+	IFLA_MACVLAN_BC_QUEUE_LEN,
+	IFLA_MACVLAN_BC_QUEUE_LEN_USED,
 	__IFLA_MACVLAN_MAX,
 };
 
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 781e482dc499..d208b2af697f 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -409,6 +409,8 @@ enum {
 	IFLA_MACVLAN_MACADDR,
 	IFLA_MACVLAN_MACADDR_DATA,
 	IFLA_MACVLAN_MACADDR_COUNT,
+	IFLA_MACVLAN_BC_QUEUE_LEN,
+	IFLA_MACVLAN_BC_QUEUE_LEN_USED,
 	__IFLA_MACVLAN_MAX,
 };
 
-- 
cgit v1.2.3


From 72d1249e2ffdbc344e465031ec5335fa3489d62e Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Tue, 1 Dec 2020 17:21:40 -0600
Subject: uapi: fix statx attribute value overlap for DAX & MOUNT_ROOT

STATX_ATTR_MOUNT_ROOT and STATX_ATTR_DAX got merged with the same value,
so one of them needs fixing.  Move STATX_ATTR_DAX.

While we're in here, clarify the value-matching scheme for some of the
attributes, and explain why the value for DAX does not match.

Fixes: 80340fe3605c ("statx: add mount_root")
Fixes: 712b2698e4c0 ("fs/stat: Define DAX statx attribute")
Link: https://lore.kernel.org/linux-fsdevel/7027520f-7c79-087e-1d00-743bdefa1a1e@redhat.com/
Link: https://lore.kernel.org/lkml/20201202214629.1563760-1-ira.weiny@intel.com/
Reported-by: David Howells <dhowells@redhat.com>
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Cc: <stable@vger.kernel.org> # 5.8
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/stat.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
index 82cc58fe9368..1500a0f58041 100644
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -171,9 +171,12 @@ struct statx {
  * be of use to ordinary userspace programs such as GUIs or ls rather than
  * specialised tools.
  *
- * Note that the flags marked [I] correspond to generic FS_IOC_FLAGS
+ * Note that the flags marked [I] correspond to the FS_IOC_SETFLAGS flags
  * semantically.  Where possible, the numerical value is picked to correspond
- * also.
+ * also.  Note that the DAX attribute indicates that the file is in the CPU
+ * direct access state.  It does not correspond to the per-inode flag that
+ * some filesystems support.
+ *
  */
 #define STATX_ATTR_COMPRESSED		0x00000004 /* [I] File is compressed by the fs */
 #define STATX_ATTR_IMMUTABLE		0x00000010 /* [I] File is marked immutable */
@@ -183,7 +186,7 @@ struct statx {
 #define STATX_ATTR_AUTOMOUNT		0x00001000 /* Dir: Automount trigger */
 #define STATX_ATTR_MOUNT_ROOT		0x00002000 /* Root of a mount */
 #define STATX_ATTR_VERITY		0x00100000 /* [I] Verity protected file */
-#define STATX_ATTR_DAX			0x00002000 /* [I] File is DAX */
+#define STATX_ATTR_DAX			0x00200000 /* File is currently in DAX state */
 
 
 #endif /* _UAPI_LINUX_STAT_H */
-- 
cgit v1.2.3


From bccce80bbd44ab50bbec761a51c6293c1ce47e34 Mon Sep 17 00:00:00 2001
From: Eric Farman <farman@linux.ibm.com>
Date: Thu, 3 Dec 2020 22:35:12 +0100
Subject: vfio-ccw: Wire in the request callback

The device is being unplugged, so pass the request to userspace to
ask for a graceful cleanup. This should free up the thread that
would otherwise loop waiting for the device to be fully released.

Signed-off-by: Eric Farman <farman@linux.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/s390/cio/vfio_ccw_ops.c     | 26 ++++++++++++++++++++++++++
 drivers/s390/cio/vfio_ccw_private.h |  4 ++++
 include/uapi/linux/vfio.h           |  1 +
 3 files changed, 31 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index 8b3ed5b45277..68106be4ba7a 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -394,6 +394,7 @@ static int vfio_ccw_mdev_get_irq_info(struct vfio_irq_info *info)
 	switch (info->index) {
 	case VFIO_CCW_IO_IRQ_INDEX:
 	case VFIO_CCW_CRW_IRQ_INDEX:
+	case VFIO_CCW_REQ_IRQ_INDEX:
 		info->count = 1;
 		info->flags = VFIO_IRQ_INFO_EVENTFD;
 		break;
@@ -424,6 +425,9 @@ static int vfio_ccw_mdev_set_irqs(struct mdev_device *mdev,
 	case VFIO_CCW_CRW_IRQ_INDEX:
 		ctx = &private->crw_trigger;
 		break;
+	case VFIO_CCW_REQ_IRQ_INDEX:
+		ctx = &private->req_trigger;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -607,6 +611,27 @@ static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev,
 	}
 }
 
+/* Request removal of the device*/
+static void vfio_ccw_mdev_request(struct mdev_device *mdev, unsigned int count)
+{
+	struct vfio_ccw_private *private = dev_get_drvdata(mdev_parent_dev(mdev));
+
+	if (!private)
+		return;
+
+	if (private->req_trigger) {
+		if (!(count % 10))
+			dev_notice_ratelimited(mdev_dev(private->mdev),
+					       "Relaying device request to user (#%u)\n",
+					       count);
+
+		eventfd_signal(private->req_trigger, 1);
+	} else if (count == 0) {
+		dev_notice(mdev_dev(private->mdev),
+			   "No device request channel registered, blocked until released by user\n");
+	}
+}
+
 static const struct mdev_parent_ops vfio_ccw_mdev_ops = {
 	.owner			= THIS_MODULE,
 	.supported_type_groups  = mdev_type_groups,
@@ -617,6 +642,7 @@ static const struct mdev_parent_ops vfio_ccw_mdev_ops = {
 	.read			= vfio_ccw_mdev_read,
 	.write			= vfio_ccw_mdev_write,
 	.ioctl			= vfio_ccw_mdev_ioctl,
+	.request		= vfio_ccw_mdev_request,
 };
 
 int vfio_ccw_mdev_reg(struct subchannel *sch)
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index 8723156b29ea..b2c762eb42b9 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -84,7 +84,10 @@ struct vfio_ccw_crw {
  * @irb: irb info received from interrupt
  * @scsw: scsw info
  * @io_trigger: eventfd ctx for signaling userspace I/O results
+ * @crw_trigger: eventfd ctx for signaling userspace CRW information
+ * @req_trigger: eventfd ctx for signaling userspace to return device
  * @io_work: work for deferral process of I/O handling
+ * @crw_work: work for deferral process of CRW handling
  */
 struct vfio_ccw_private {
 	struct subchannel	*sch;
@@ -108,6 +111,7 @@ struct vfio_ccw_private {
 
 	struct eventfd_ctx	*io_trigger;
 	struct eventfd_ctx	*crw_trigger;
+	struct eventfd_ctx	*req_trigger;
 	struct work_struct	io_work;
 	struct work_struct	crw_work;
 } __aligned(8);
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 2f313a238a8f..d1812777139f 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -820,6 +820,7 @@ enum {
 enum {
 	VFIO_CCW_IO_IRQ_INDEX,
 	VFIO_CCW_CRW_IRQ_INDEX,
+	VFIO_CCW_REQ_IRQ_INDEX,
 	VFIO_CCW_NUM_IRQS
 };
 
-- 
cgit v1.2.3


From 290248a5b7d829871b3ea3c62578613a580a1744 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 3 Dec 2020 12:46:30 -0800
Subject: bpf: Allow to specify kernel module BTFs when attaching BPF programs

Add ability for user-space programs to specify non-vmlinux BTF when attaching
BTF-powered BPF programs: raw_tp, fentry/fexit/fmod_ret, LSM, etc. For this,
attach_prog_fd (now with the alias name attach_btf_obj_fd) should specify FD
of a module or vmlinux BTF object. For backwards compatibility reasons,
0 denotes vmlinux BTF. Only kernel BTF (vmlinux or module) can be specified.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-11-andrii@kernel.org
---
 include/linux/btf.h            |  1 +
 include/uapi/linux/bpf.h       |  7 +++-
 kernel/bpf/btf.c               |  5 +++
 kernel/bpf/syscall.c           | 82 ++++++++++++++++++++++++++----------------
 tools/include/uapi/linux/bpf.h |  7 +++-
 5 files changed, 69 insertions(+), 33 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/btf.h b/include/linux/btf.h
index fb608e4de076..4c200f5d242b 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -90,6 +90,7 @@ int btf_type_snprintf_show(const struct btf *btf, u32 type_id, void *obj,
 
 int btf_get_fd_by_id(u32 id);
 u32 btf_obj_id(const struct btf *btf);
+bool btf_is_kernel(const struct btf *btf);
 bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
 			   const struct btf_member *m,
 			   u32 expected_offset, u32 expected_size);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c3458ec1f30a..1233f14f659f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -557,7 +557,12 @@ union bpf_attr {
 		__aligned_u64	line_info;	/* line info */
 		__u32		line_info_cnt;	/* number of bpf_line_info records */
 		__u32		attach_btf_id;	/* in-kernel BTF type id to attach to */
-		__u32		attach_prog_fd; /* 0 to attach to vmlinux */
+		union {
+			/* valid prog_fd to attach to bpf prog */
+			__u32		attach_prog_fd;
+			/* or valid module BTF object fd or 0 to attach to vmlinux */
+			__u32		attach_btf_obj_fd;
+		};
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 7a19bf5bfe97..8d6bdb4f4d61 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -5738,6 +5738,11 @@ u32 btf_obj_id(const struct btf *btf)
 	return btf->id;
 }
 
+bool btf_is_kernel(const struct btf *btf)
+{
+	return btf->kernel_btf;
+}
+
 static int btf_id_cmp_func(const void *a, const void *b)
 {
 	const int *pa = a, *pb = b;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 184204169949..0cd3cc2af9c1 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1926,12 +1926,16 @@ static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
 static int
 bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
 			   enum bpf_attach_type expected_attach_type,
-			   u32 btf_id, u32 prog_fd)
+			   struct btf *attach_btf, u32 btf_id,
+			   struct bpf_prog *dst_prog)
 {
 	if (btf_id) {
 		if (btf_id > BTF_MAX_TYPE)
 			return -EINVAL;
 
+		if (!attach_btf && !dst_prog)
+			return -EINVAL;
+
 		switch (prog_type) {
 		case BPF_PROG_TYPE_TRACING:
 		case BPF_PROG_TYPE_LSM:
@@ -1943,7 +1947,10 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
 		}
 	}
 
-	if (prog_fd && prog_type != BPF_PROG_TYPE_TRACING &&
+	if (attach_btf && (!btf_id || dst_prog))
+		return -EINVAL;
+
+	if (dst_prog && prog_type != BPF_PROG_TYPE_TRACING &&
 	    prog_type != BPF_PROG_TYPE_EXT)
 		return -EINVAL;
 
@@ -2060,7 +2067,8 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
 static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 {
 	enum bpf_prog_type type = attr->prog_type;
-	struct bpf_prog *prog;
+	struct bpf_prog *prog, *dst_prog = NULL;
+	struct btf *attach_btf = NULL;
 	int err;
 	char license[128];
 	bool is_gpl;
@@ -2102,44 +2110,56 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 	if (is_perfmon_prog_type(type) && !perfmon_capable())
 		return -EPERM;
 
+	/* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog
+	 * or btf, we need to check which one it is
+	 */
+	if (attr->attach_prog_fd) {
+		dst_prog = bpf_prog_get(attr->attach_prog_fd);
+		if (IS_ERR(dst_prog)) {
+			dst_prog = NULL;
+			attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd);
+			if (IS_ERR(attach_btf))
+				return -EINVAL;
+			if (!btf_is_kernel(attach_btf)) {
+				btf_put(attach_btf);
+				return -EINVAL;
+			}
+		}
+	} else if (attr->attach_btf_id) {
+		/* fall back to vmlinux BTF, if BTF type ID is specified */
+		attach_btf = bpf_get_btf_vmlinux();
+		if (IS_ERR(attach_btf))
+			return PTR_ERR(attach_btf);
+		if (!attach_btf)
+			return -EINVAL;
+		btf_get(attach_btf);
+	}
+
 	bpf_prog_load_fixup_attach_type(attr);
 	if (bpf_prog_load_check_attach(type, attr->expected_attach_type,
-				       attr->attach_btf_id,
-				       attr->attach_prog_fd))
+				       attach_btf, attr->attach_btf_id,
+				       dst_prog)) {
+		if (dst_prog)
+			bpf_prog_put(dst_prog);
+		if (attach_btf)
+			btf_put(attach_btf);
 		return -EINVAL;
+	}
 
 	/* plain bpf_prog allocation */
 	prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
-	if (!prog)
+	if (!prog) {
+		if (dst_prog)
+			bpf_prog_put(dst_prog);
+		if (attach_btf)
+			btf_put(attach_btf);
 		return -ENOMEM;
+	}
 
 	prog->expected_attach_type = attr->expected_attach_type;
+	prog->aux->attach_btf = attach_btf;
 	prog->aux->attach_btf_id = attr->attach_btf_id;
-
-	if (attr->attach_btf_id && !attr->attach_prog_fd) {
-		struct btf *btf;
-
-		btf = bpf_get_btf_vmlinux();
-		if (IS_ERR(btf))
-			return PTR_ERR(btf);
-		if (!btf)
-			return -EINVAL;
-
-		btf_get(btf);
-		prog->aux->attach_btf = btf;
-	}
-
-	if (attr->attach_prog_fd) {
-		struct bpf_prog *dst_prog;
-
-		dst_prog = bpf_prog_get(attr->attach_prog_fd);
-		if (IS_ERR(dst_prog)) {
-			err = PTR_ERR(dst_prog);
-			goto free_prog;
-		}
-		prog->aux->dst_prog = dst_prog;
-	}
-
+	prog->aux->dst_prog = dst_prog;
 	prog->aux->offload_requested = !!attr->prog_ifindex;
 	prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c3458ec1f30a..1233f14f659f 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -557,7 +557,12 @@ union bpf_attr {
 		__aligned_u64	line_info;	/* line info */
 		__u32		line_info_cnt;	/* number of bpf_line_info records */
 		__u32		attach_btf_id;	/* in-kernel BTF type id to attach to */
-		__u32		attach_prog_fd; /* 0 to attach to vmlinux */
+		union {
+			/* valid prog_fd to attach to bpf prog */
+			__u32		attach_prog_fd;
+			/* or valid module BTF object fd or 0 to attach to vmlinux */
+			__u32		attach_btf_obj_fd;
+		};
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
-- 
cgit v1.2.3


From 128254ceea6ffe59300d3cca6fc83b842048f4c4 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sun, 11 Oct 2020 12:25:23 +0200
Subject: batman-adv: Prepare infrastructure for newlink settings

The batadv generic netlink family can be used to retrieve the current state
and set various configuration settings. But there are also settings which
must be set before the actual interface is created.

The rtnetlink already uses IFLA_INFO_DATA to allow net_device families to
transfer such configurations. The minimal required functionality for this
is now available for the batadv rtnl_link_ops. Also a new IFLA class of
attributes will be attached to it because rtnetlink only allows 51
different attributes but batadv_nl_attrs already contains 62 attributes.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batman_adv.h | 20 ++++++++++++++++++++
 net/batman-adv/soft-interface.c | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index bb0ae945b36a..b05399d8a127 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -675,4 +675,24 @@ enum batadv_tp_meter_reason {
 	BATADV_TP_REASON_TOO_MANY		= 133,
 };
 
+/**
+ * enum batadv_ifla_attrs - batman-adv ifla nested attributes
+ */
+enum batadv_ifla_attrs {
+	/**
+	 * @IFLA_BATADV_UNSPEC: unspecified attribute which is not parsed by
+	 *  rtnetlink
+	 */
+	IFLA_BATADV_UNSPEC,
+
+	/* add attributes above here, update the policy in soft-interface.c */
+
+	/**
+	 * @__IFLA_BATADV_MAX: internal use
+	 */
+	__IFLA_BATADV_MAX,
+};
+
+#define IFLA_BATADV_MAX (__IFLA_BATADV_MAX - 1)
+
 #endif /* _UAPI_LINUX_BATMAN_ADV_H_ */
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 82e7ca886605..9c7b89689c97 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -38,6 +38,7 @@
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <linux/types.h>
+#include <net/netlink.h>
 #include <uapi/linux/batadv_packet.h>
 #include <uapi/linux/batman_adv.h>
 
@@ -1073,6 +1074,37 @@ static void batadv_softif_init_early(struct net_device *dev)
 	dev->ethtool_ops = &batadv_ethtool_ops;
 }
 
+/**
+ * batadv_softif_validate() - validate configuration of new batadv link
+ * @tb: IFLA_INFO_DATA netlink attributes
+ * @data: enum batadv_ifla_attrs attributes
+ * @extack: extended ACK report struct
+ *
+ * Return: 0 if successful or error otherwise.
+ */
+static int batadv_softif_validate(struct nlattr *tb[], struct nlattr *data[],
+				  struct netlink_ext_ack *extack)
+{
+	return 0;
+}
+
+/**
+ * batadv_softif_newlink() - pre-initialize and register new batadv link
+ * @src_net: the applicable net namespace
+ * @dev: network device to register
+ * @tb: IFLA_INFO_DATA netlink attributes
+ * @data: enum batadv_ifla_attrs attributes
+ * @extack: extended ACK report struct
+ *
+ * Return: 0 if successful or error otherwise.
+ */
+static int batadv_softif_newlink(struct net *src_net, struct net_device *dev,
+				 struct nlattr *tb[], struct nlattr *data[],
+				 struct netlink_ext_ack *extack)
+{
+	return register_netdevice(dev);
+}
+
 /**
  * batadv_softif_create() - Create and register soft interface
  * @net: the applicable net namespace
@@ -1171,9 +1203,16 @@ bool batadv_softif_is_valid(const struct net_device *net_dev)
 	return false;
 }
 
+static const struct nla_policy batadv_ifla_policy[IFLA_BATADV_MAX + 1] = {
+};
+
 struct rtnl_link_ops batadv_link_ops __read_mostly = {
 	.kind		= "batadv",
 	.priv_size	= sizeof(struct batadv_priv),
 	.setup		= batadv_softif_init_early,
+	.maxtype	= IFLA_BATADV_MAX,
+	.policy		= batadv_ifla_policy,
+	.validate	= batadv_softif_validate,
+	.newlink	= batadv_softif_newlink,
 	.dellink	= batadv_softif_destroy_netlink,
 };
-- 
cgit v1.2.3


From a5ad457eea41ef7209f3a1765f853a2c7f191131 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sun, 11 Oct 2020 12:25:24 +0200
Subject: batman-adv: Allow selection of routing algorithm over rtnetlink

A batadv net_device is associated to a B.A.T.M.A.N. routing algorithm. This
algorithm has to be selected before the interface is initialized and cannot
be changed after that. The only way to select this algorithm was a module
parameter which specifies the default algorithm used during the creation of
the net_device.

This module parameter is writeable over
/sys/module/batman_adv/parameters/routing_algo and thus allows switching of
the routing algorithm:

1. change routing_algo parameter
2. create new batadv net_device

But this is not race free because another process can be scheduled between
1 + 2 and in that time frame change the routing_algo parameter again.

It is much cleaner to directly provide this information inside the
rtnetlink's RTM_NEWLINK message. The two processes would be (in regards of
the creation parameter of their batadv interfaces) be isolated. This also
eases the integration of batadv devices inside tools like network-manager
or systemd-networkd which are not expecting to operate on /sys before a new
net_device is created.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batman_adv.h |  6 ++++++
 net/batman-adv/bat_algo.c       | 10 ++++++++--
 net/batman-adv/bat_algo.h       |  3 ++-
 net/batman-adv/soft-interface.c | 31 ++++++++++++++++++++++++++++---
 4 files changed, 44 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index b05399d8a127..bdb317faa1dc 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -685,6 +685,12 @@ enum batadv_ifla_attrs {
 	 */
 	IFLA_BATADV_UNSPEC,
 
+	/**
+	 * @IFLA_BATADV_ALGO_NAME: routing algorithm (name) which should be
+	 *  used by the newly registered batadv net_device.
+	 */
+	IFLA_BATADV_ALGO_NAME,
+
 	/* add attributes above here, update the policy in soft-interface.c */
 
 	/**
diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c
index 382fbe51fd34..500db94a6b50 100644
--- a/net/batman-adv/bat_algo.c
+++ b/net/batman-adv/bat_algo.c
@@ -34,7 +34,13 @@ void batadv_algo_init(void)
 	INIT_HLIST_HEAD(&batadv_algo_list);
 }
 
-static struct batadv_algo_ops *batadv_algo_get(char *name)
+/**
+ * batadv_algo_get() - Search for algorithm with specific name
+ * @name: algorithm name to find
+ *
+ * Return: Pointer to batadv_algo_ops on success, NULL otherwise
+ */
+struct batadv_algo_ops *batadv_algo_get(const char *name)
 {
 	struct batadv_algo_ops *bat_algo_ops = NULL, *bat_algo_ops_tmp;
 
@@ -97,7 +103,7 @@ int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops)
  *
  * Return: 0 on success or negative error number in case of failure
  */
-int batadv_algo_select(struct batadv_priv *bat_priv, char *name)
+int batadv_algo_select(struct batadv_priv *bat_priv, const char *name)
 {
 	struct batadv_algo_ops *bat_algo_ops;
 
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 686a60bc9492..2ae140eac45d 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -18,8 +18,9 @@ extern char batadv_routing_algo[];
 extern struct list_head batadv_hardif_list;
 
 void batadv_algo_init(void);
+struct batadv_algo_ops *batadv_algo_get(const char *name);
 int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops);
-int batadv_algo_select(struct batadv_priv *bat_priv, char *name);
+int batadv_algo_select(struct batadv_priv *bat_priv, const char *name);
 int batadv_algo_seq_print_text(struct seq_file *seq, void *offset);
 int batadv_algo_dump(struct sk_buff *msg, struct netlink_callback *cb);
 
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 9c7b89689c97..8116631c11c5 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -846,9 +846,11 @@ static int batadv_softif_init_late(struct net_device *dev)
 
 	batadv_nc_init_bat_priv(bat_priv);
 
-	ret = batadv_algo_select(bat_priv, batadv_routing_algo);
-	if (ret < 0)
-		goto free_bat_counters;
+	if (!bat_priv->algo_ops) {
+		ret = batadv_algo_select(bat_priv, batadv_routing_algo);
+		if (ret < 0)
+			goto free_bat_counters;
+	}
 
 	ret = batadv_debugfs_add_meshif(dev);
 	if (ret < 0)
@@ -1085,6 +1087,17 @@ static void batadv_softif_init_early(struct net_device *dev)
 static int batadv_softif_validate(struct nlattr *tb[], struct nlattr *data[],
 				  struct netlink_ext_ack *extack)
 {
+	struct batadv_algo_ops *algo_ops;
+
+	if (!data)
+		return 0;
+
+	if (data[IFLA_BATADV_ALGO_NAME]) {
+		algo_ops = batadv_algo_get(nla_data(data[IFLA_BATADV_ALGO_NAME]));
+		if (!algo_ops)
+			return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -1102,6 +1115,17 @@ static int batadv_softif_newlink(struct net *src_net, struct net_device *dev,
 				 struct nlattr *tb[], struct nlattr *data[],
 				 struct netlink_ext_ack *extack)
 {
+	struct batadv_priv *bat_priv = netdev_priv(dev);
+	const char *algo_name;
+	int err;
+
+	if (data && data[IFLA_BATADV_ALGO_NAME]) {
+		algo_name = nla_data(data[IFLA_BATADV_ALGO_NAME]);
+		err = batadv_algo_select(bat_priv, algo_name);
+		if (err)
+			return -EINVAL;
+	}
+
 	return register_netdevice(dev);
 }
 
@@ -1204,6 +1228,7 @@ bool batadv_softif_is_valid(const struct net_device *net_dev)
 }
 
 static const struct nla_policy batadv_ifla_policy[IFLA_BATADV_MAX + 1] = {
+	[IFLA_BATADV_ALGO_NAME]	= { .type = NLA_NUL_STRING },
 };
 
 struct rtnl_link_ops batadv_link_ops __read_mostly = {
-- 
cgit v1.2.3


From 582f1fb6b721facf04848d2ca57f34468da1813e Mon Sep 17 00:00:00 2001
From: Giuseppe Scrivano <gscrivan@redhat.com>
Date: Wed, 18 Nov 2020 11:47:45 +0100
Subject: fs, close_range: add flag CLOSE_RANGE_CLOEXEC

When the flag CLOSE_RANGE_CLOEXEC is set, close_range doesn't
immediately close the files but it sets the close-on-exec bit.

It is useful for e.g. container runtimes that usually install a
seccomp profile "as late as possible" before execv'ing the container
process itself.  The container runtime could either do:
  1                                  2
- install_seccomp_profile();       - close_range(MIN_FD, MAX_INT, 0);
- close_range(MIN_FD, MAX_INT, 0); - install_seccomp_profile();
- execve(...);                     - execve(...);

Both alternative have some disadvantages.

In the first variant the seccomp_profile cannot block the close_range
syscall, as well as opendir/read/close/... for the fallback on older
kernels.
In the second variant, close_range() can be used only on the fds
that are not going to be needed by the runtime anymore, and it must be
potentially called multiple times to account for the different ranges
that must be closed.

Using close_range(..., ..., CLOSE_RANGE_CLOEXEC) solves these issues.
The runtime is able to use the existing open fds, the seccomp profile
can block close_range() and the syscalls used for its fallback.

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
Link: https://lore.kernel.org/r/20201118104746.873084-2-gscrivan@redhat.com
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 fs/file.c                        | 44 +++++++++++++++++++++++++++++++---------
 include/uapi/linux/close_range.h |  3 +++
 2 files changed, 37 insertions(+), 10 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/file.c b/fs/file.c
index 4559b5fec3bd..e08e4daccac3 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -674,6 +674,35 @@ int __close_fd(struct files_struct *files, unsigned fd)
 }
 EXPORT_SYMBOL(__close_fd); /* for ksys_close() */
 
+static inline void __range_cloexec(struct files_struct *cur_fds,
+				   unsigned int fd, unsigned int max_fd)
+{
+	struct fdtable *fdt;
+
+	if (fd > max_fd)
+		return;
+
+	spin_lock(&cur_fds->file_lock);
+	fdt = files_fdtable(cur_fds);
+	bitmap_set(fdt->close_on_exec, fd, max_fd - fd + 1);
+	spin_unlock(&cur_fds->file_lock);
+}
+
+static inline void __range_close(struct files_struct *cur_fds, unsigned int fd,
+				 unsigned int max_fd)
+{
+	while (fd <= max_fd) {
+		struct file *file;
+
+		file = pick_file(cur_fds, fd++);
+		if (!file)
+			continue;
+
+		filp_close(file, cur_fds);
+		cond_resched();
+	}
+}
+
 /**
  * __close_range() - Close all file descriptors in a given range.
  *
@@ -689,7 +718,7 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
 	struct task_struct *me = current;
 	struct files_struct *cur_fds = me->files, *fds = NULL;
 
-	if (flags & ~CLOSE_RANGE_UNSHARE)
+	if (flags & ~(CLOSE_RANGE_UNSHARE | CLOSE_RANGE_CLOEXEC))
 		return -EINVAL;
 
 	if (fd > max_fd)
@@ -727,16 +756,11 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
 	}
 
 	max_fd = min(max_fd, cur_max);
-	while (fd <= max_fd) {
-		struct file *file;
 
-		file = pick_file(cur_fds, fd++);
-		if (!file)
-			continue;
-
-		filp_close(file, cur_fds);
-		cond_resched();
-	}
+	if (flags & CLOSE_RANGE_CLOEXEC)
+		__range_cloexec(cur_fds, fd, max_fd);
+	else
+		__range_close(cur_fds, fd, max_fd);
 
 	if (fds) {
 		/*
diff --git a/include/uapi/linux/close_range.h b/include/uapi/linux/close_range.h
index 6928a9fdee3c..2d804281554c 100644
--- a/include/uapi/linux/close_range.h
+++ b/include/uapi/linux/close_range.h
@@ -5,5 +5,8 @@
 /* Unshare the file descriptor table before closing file descriptors. */
 #define CLOSE_RANGE_UNSHARE	(1U << 1)
 
+/* Set the FD_CLOEXEC bit instead of closing the file descriptor. */
+#define CLOSE_RANGE_CLOEXEC	(1U << 2)
+
 #endif /* _UAPI_LINUX_CLOSE_RANGE_H */
 
-- 
cgit v1.2.3


From c9d659b60770db94b898f94947192a94bbf95c5c Mon Sep 17 00:00:00 2001
From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Date: Fri, 20 Nov 2020 16:10:23 -0800
Subject: PCI/ERR: Bind RCEC devices to the Root Port driver

If a Root Complex Integrated Endpoint (RCiEP) is implemented, it may signal
errors through a Root Complex Event Collector (RCEC).  Each RCiEP must be
associated with no more than one RCEC.

For an RCEC (which is technically not a Bridge), error messages "received"
from associated RCiEPs must be enabled for "transmission" in order to cause
a System Error via the Root Control register or (when the Advanced Error
Reporting Capability is present) reporting via the Root Error Command
register and logging in the Root Error Status register and Error Source
Identification register.

Given the commonality with Root Ports and the need to also support AER and
PME services for RCECs, extend the Root Port driver to support RCEC devices
by adding the RCEC Class ID to the driver structure.

Co-developed-by: Sean V Kelley <sean.v.kelley@intel.com>
Link: https://lore.kernel.org/r/20201121001036.8560-3-sean.v.kelley@intel.com
Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> # non-native/no RCEC
Signed-off-by: Sean V Kelley <sean.v.kelley@intel.com>
Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
---
 drivers/pci/pcie/portdrv_pci.c | 5 ++++-
 include/linux/pci_ids.h        | 1 +
 include/uapi/linux/pci_regs.h  | 7 +++++++
 3 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
index 3a3ce40ae1ab..4d880679b9b1 100644
--- a/drivers/pci/pcie/portdrv_pci.c
+++ b/drivers/pci/pcie/portdrv_pci.c
@@ -106,7 +106,8 @@ static int pcie_portdrv_probe(struct pci_dev *dev,
 	if (!pci_is_pcie(dev) ||
 	    ((pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT) &&
 	     (pci_pcie_type(dev) != PCI_EXP_TYPE_UPSTREAM) &&
-	     (pci_pcie_type(dev) != PCI_EXP_TYPE_DOWNSTREAM)))
+	     (pci_pcie_type(dev) != PCI_EXP_TYPE_DOWNSTREAM) &&
+	     (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC)))
 		return -ENODEV;
 
 	status = pcie_port_device_register(dev);
@@ -195,6 +196,8 @@ static const struct pci_device_id port_pci_ids[] = {
 	{ PCI_DEVICE_CLASS(((PCI_CLASS_BRIDGE_PCI << 8) | 0x00), ~0) },
 	/* subtractive decode PCI-to-PCI bridge, class type is 060401h */
 	{ PCI_DEVICE_CLASS(((PCI_CLASS_BRIDGE_PCI << 8) | 0x01), ~0) },
+	/* handle any Root Complex Event Collector */
+	{ PCI_DEVICE_CLASS(((PCI_CLASS_SYSTEM_RCEC << 8) | 0x00), ~0) },
 	{ },
 };
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 1ab1e24bcbce..d8156a5dbee8 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -81,6 +81,7 @@
 #define PCI_CLASS_SYSTEM_RTC		0x0803
 #define PCI_CLASS_SYSTEM_PCI_HOTPLUG	0x0804
 #define PCI_CLASS_SYSTEM_SDHCI		0x0805
+#define PCI_CLASS_SYSTEM_RCEC		0x0807
 #define PCI_CLASS_SYSTEM_OTHER		0x0880
 
 #define PCI_BASE_CLASS_INPUT		0x09
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index a95d55f9f257..bccd3e35cb65 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -831,6 +831,13 @@
 #define  PCI_PWR_CAP_BUDGET(x)	((x) & 1)	/* Included in system budget */
 #define PCI_EXT_CAP_PWR_SIZEOF	16
 
+/* Root Complex Event Collector Endpoint Association  */
+#define PCI_RCEC_RCIEP_BITMAP	4	/* Associated Bitmap for RCiEPs */
+#define PCI_RCEC_BUSN		8	/* RCEC Associated Bus Numbers */
+#define  PCI_RCEC_BUSN_REG_VER	0x02	/* Least version with BUSN present */
+#define  PCI_RCEC_BUSN_NEXT(x)	(((x) >> 8) & 0xff)
+#define  PCI_RCEC_BUSN_LAST(x)	(((x) >> 16) & 0xff)
+
 /* Vendor-Specific (VSEC, PCI_EXT_CAP_ID_VNDR) */
 #define PCI_VNDR_HEADER		4	/* Vendor-Specific Header */
 #define  PCI_VNDR_HEADER_ID(x)	((x) & 0xffff)
-- 
cgit v1.2.3


From 664d6f86868bacbfdb3926a975dff29ca9ebe0d0 Mon Sep 17 00:00:00 2001
From: Andrea Mayer <andrea.mayer@uniroma2.it>
Date: Wed, 2 Dec 2020 14:05:14 +0100
Subject: seg6: add support for the SRv6 End.DT4 behavior

SRv6 End.DT4 is defined in the SRv6 Network Programming [1].

The SRv6 End.DT4 is used to implement IPv4 L3VPN use-cases in
multi-tenants environments. It decapsulates the received packets and it
performs IPv4 routing lookup in the routing table of the tenant.

The SRv6 End.DT4 Linux implementation leverages a VRF device in order to
force the routing lookup into the associated routing table.

To make the End.DT4 work properly, it must be guaranteed that the routing
table used for routing lookup operations is bound to one and only one
VRF during the tunnel creation. Such constraint has to be enforced by
enabling the VRF strict_mode sysctl parameter, i.e:
 $ sysctl -wq net.vrf.strict_mode=1.

At JANOG44, LINE corporation presented their multi-tenant DC architecture
using SRv6 [2]. In the slides, they reported that the Linux kernel is
missing the support of SRv6 End.DT4 behavior.

The SRv6 End.DT4 behavior can be instantiated using a command similar to
the following:

 $ ip route add 2001:db8::1 encap seg6local action End.DT4 vrftable 100 dev eth0

We introduce the "vrftable" extension in iproute2 in a following patch.

[1] https://tools.ietf.org/html/draft-ietf-spring-srv6-network-programming
[2] https://speakerdeck.com/line_developers/line-data-center-networking-with-srv6

Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/seg6_local.h |   1 +
 net/ipv6/seg6_local.c           | 287 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 288 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/seg6_local.h b/include/uapi/linux/seg6_local.h
index edc138bdc56d..3b39ef1dbb46 100644
--- a/include/uapi/linux/seg6_local.h
+++ b/include/uapi/linux/seg6_local.h
@@ -26,6 +26,7 @@ enum {
 	SEG6_LOCAL_IIF,
 	SEG6_LOCAL_OIF,
 	SEG6_LOCAL_BPF,
+	SEG6_LOCAL_VRFTABLE,
 	__SEG6_LOCAL_MAX,
 };
 #define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1)
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index da5bf4167a52..24c2616c8c11 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -69,6 +69,28 @@ struct bpf_lwt_prog {
 	char *name;
 };
 
+enum seg6_end_dt_mode {
+	DT_INVALID_MODE	= -EINVAL,
+	DT_LEGACY_MODE	= 0,
+	DT_VRF_MODE	= 1,
+};
+
+struct seg6_end_dt_info {
+	enum seg6_end_dt_mode mode;
+
+	struct net *net;
+	/* VRF device associated to the routing table used by the SRv6
+	 * End.DT4/DT6 behavior for routing IPv4/IPv6 packets.
+	 */
+	int vrf_ifindex;
+	int vrf_table;
+
+	/* tunneled packet proto and family (IPv4 or IPv6) */
+	__be16 proto;
+	u16 family;
+	int hdrlen;
+};
+
 struct seg6_local_lwt {
 	int action;
 	struct ipv6_sr_hdr *srh;
@@ -78,6 +100,9 @@ struct seg6_local_lwt {
 	int iif;
 	int oif;
 	struct bpf_lwt_prog bpf;
+#ifdef CONFIG_NET_L3_MASTER_DEV
+	struct seg6_end_dt_info dt_info;
+#endif
 
 	int headroom;
 	struct seg6_action_desc *desc;
@@ -429,6 +454,203 @@ drop:
 	return -EINVAL;
 }
 
+#ifdef CONFIG_NET_L3_MASTER_DEV
+static struct net *fib6_config_get_net(const struct fib6_config *fib6_cfg)
+{
+	const struct nl_info *nli = &fib6_cfg->fc_nlinfo;
+
+	return nli->nl_net;
+}
+
+static int __seg6_end_dt_vrf_build(struct seg6_local_lwt *slwt, const void *cfg,
+				   u16 family, struct netlink_ext_ack *extack)
+{
+	struct seg6_end_dt_info *info = &slwt->dt_info;
+	int vrf_ifindex;
+	struct net *net;
+
+	net = fib6_config_get_net(cfg);
+
+	/* note that vrf_table was already set by parse_nla_vrftable() */
+	vrf_ifindex = l3mdev_ifindex_lookup_by_table_id(L3MDEV_TYPE_VRF, net,
+							info->vrf_table);
+	if (vrf_ifindex < 0) {
+		if (vrf_ifindex == -EPERM) {
+			NL_SET_ERR_MSG(extack,
+				       "Strict mode for VRF is disabled");
+		} else if (vrf_ifindex == -ENODEV) {
+			NL_SET_ERR_MSG(extack,
+				       "Table has no associated VRF device");
+		} else {
+			pr_debug("seg6local: SRv6 End.DT* creation error=%d\n",
+				 vrf_ifindex);
+		}
+
+		return vrf_ifindex;
+	}
+
+	info->net = net;
+	info->vrf_ifindex = vrf_ifindex;
+
+	switch (family) {
+	case AF_INET:
+		info->proto = htons(ETH_P_IP);
+		info->hdrlen = sizeof(struct iphdr);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	info->family = family;
+	info->mode = DT_VRF_MODE;
+
+	return 0;
+}
+
+/* The SRv6 End.DT4/DT6 behavior extracts the inner (IPv4/IPv6) packet and
+ * routes the IPv4/IPv6 packet by looking at the configured routing table.
+ *
+ * In the SRv6 End.DT4/DT6 use case, we can receive traffic (IPv6+Segment
+ * Routing Header packets) from several interfaces and the outer IPv6
+ * destination address (DA) is used for retrieving the specific instance of the
+ * End.DT4/DT6 behavior that should process the packets.
+ *
+ * However, the inner IPv4/IPv6 packet is not really bound to any receiving
+ * interface and thus the End.DT4/DT6 sets the VRF (associated with the
+ * corresponding routing table) as the *receiving* interface.
+ * In other words, the End.DT4/DT6 processes a packet as if it has been received
+ * directly by the VRF (and not by one of its slave devices, if any).
+ * In this way, the VRF interface is used for routing the IPv4/IPv6 packet in
+ * according to the routing table configured by the End.DT4/DT6 instance.
+ *
+ * This design allows you to get some interesting features like:
+ *  1) the statistics on rx packets;
+ *  2) the possibility to install a packet sniffer on the receiving interface
+ *     (the VRF one) for looking at the incoming packets;
+ *  3) the possibility to leverage the netfilter prerouting hook for the inner
+ *     IPv4 packet.
+ *
+ * This function returns:
+ *  - the sk_buff* when the VRF rcv handler has processed the packet correctly;
+ *  - NULL when the skb is consumed by the VRF rcv handler;
+ *  - a pointer which encodes a negative error number in case of error.
+ *    Note that in this case, the function takes care of freeing the skb.
+ */
+static struct sk_buff *end_dt_vrf_rcv(struct sk_buff *skb, u16 family,
+				      struct net_device *dev)
+{
+	/* based on l3mdev_ip_rcv; we are only interested in the master */
+	if (unlikely(!netif_is_l3_master(dev) && !netif_has_l3_rx_handler(dev)))
+		goto drop;
+
+	if (unlikely(!dev->l3mdev_ops->l3mdev_l3_rcv))
+		goto drop;
+
+	/* the decap packet IPv4/IPv6 does not come with any mac header info.
+	 * We must unset the mac header to allow the VRF device to rebuild it,
+	 * just in case there is a sniffer attached on the device.
+	 */
+	skb_unset_mac_header(skb);
+
+	skb = dev->l3mdev_ops->l3mdev_l3_rcv(dev, skb, family);
+	if (!skb)
+		/* the skb buffer was consumed by the handler */
+		return NULL;
+
+	/* when a packet is received by a VRF or by one of its slaves, the
+	 * master device reference is set into the skb.
+	 */
+	if (unlikely(skb->dev != dev || skb->skb_iif != dev->ifindex))
+		goto drop;
+
+	return skb;
+
+drop:
+	kfree_skb(skb);
+	return ERR_PTR(-EINVAL);
+}
+
+static struct net_device *end_dt_get_vrf_rcu(struct sk_buff *skb,
+					     struct seg6_end_dt_info *info)
+{
+	int vrf_ifindex = info->vrf_ifindex;
+	struct net *net = info->net;
+
+	if (unlikely(vrf_ifindex < 0))
+		goto error;
+
+	if (unlikely(!net_eq(dev_net(skb->dev), net)))
+		goto error;
+
+	return dev_get_by_index_rcu(net, vrf_ifindex);
+
+error:
+	return NULL;
+}
+
+static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb,
+				       struct seg6_local_lwt *slwt)
+{
+	struct seg6_end_dt_info *info = &slwt->dt_info;
+	struct net_device *vrf;
+
+	vrf = end_dt_get_vrf_rcu(skb, info);
+	if (unlikely(!vrf))
+		goto drop;
+
+	skb->protocol = info->proto;
+
+	skb_dst_drop(skb);
+
+	skb_set_transport_header(skb, info->hdrlen);
+
+	return end_dt_vrf_rcv(skb, info->family, vrf);
+
+drop:
+	kfree_skb(skb);
+	return ERR_PTR(-EINVAL);
+}
+
+static int input_action_end_dt4(struct sk_buff *skb,
+				struct seg6_local_lwt *slwt)
+{
+	struct iphdr *iph;
+	int err;
+
+	if (!decap_and_validate(skb, IPPROTO_IPIP))
+		goto drop;
+
+	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+		goto drop;
+
+	skb = end_dt_vrf_core(skb, slwt);
+	if (!skb)
+		/* packet has been processed and consumed by the VRF */
+		return 0;
+
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	iph = ip_hdr(skb);
+
+	err = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev);
+	if (unlikely(err))
+		goto drop;
+
+	return dst_input(skb);
+
+drop:
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+static int seg6_end_dt4_build(struct seg6_local_lwt *slwt, const void *cfg,
+			      struct netlink_ext_ack *extack)
+{
+	return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET, extack);
+}
+#endif
+
 static int input_action_end_dt6(struct sk_buff *skb,
 				struct seg6_local_lwt *slwt)
 {
@@ -617,6 +839,16 @@ static struct seg6_action_desc seg6_action_table[] = {
 		.attrs		= (1 << SEG6_LOCAL_NH4),
 		.input		= input_action_end_dx4,
 	},
+	{
+		.action		= SEG6_LOCAL_ACTION_END_DT4,
+		.attrs		= (1 << SEG6_LOCAL_VRFTABLE),
+#ifdef CONFIG_NET_L3_MASTER_DEV
+		.input		= input_action_end_dt4,
+		.slwt_ops	= {
+					.build_state = seg6_end_dt4_build,
+				  },
+#endif
+	},
 	{
 		.action		= SEG6_LOCAL_ACTION_END_DT6,
 		.attrs		= (1 << SEG6_LOCAL_TABLE),
@@ -677,6 +909,7 @@ static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
 	[SEG6_LOCAL_ACTION]	= { .type = NLA_U32 },
 	[SEG6_LOCAL_SRH]	= { .type = NLA_BINARY },
 	[SEG6_LOCAL_TABLE]	= { .type = NLA_U32 },
+	[SEG6_LOCAL_VRFTABLE]	= { .type = NLA_U32 },
 	[SEG6_LOCAL_NH4]	= { .type = NLA_BINARY,
 				    .len = sizeof(struct in_addr) },
 	[SEG6_LOCAL_NH6]	= { .type = NLA_BINARY,
@@ -766,6 +999,53 @@ static int cmp_nla_table(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
 	return 0;
 }
 
+static struct
+seg6_end_dt_info *seg6_possible_end_dt_info(struct seg6_local_lwt *slwt)
+{
+#ifdef CONFIG_NET_L3_MASTER_DEV
+	return &slwt->dt_info;
+#else
+	return ERR_PTR(-EOPNOTSUPP);
+#endif
+}
+
+static int parse_nla_vrftable(struct nlattr **attrs,
+			      struct seg6_local_lwt *slwt)
+{
+	struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt);
+
+	if (IS_ERR(info))
+		return PTR_ERR(info);
+
+	info->vrf_table = nla_get_u32(attrs[SEG6_LOCAL_VRFTABLE]);
+
+	return 0;
+}
+
+static int put_nla_vrftable(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+	struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt);
+
+	if (IS_ERR(info))
+		return PTR_ERR(info);
+
+	if (nla_put_u32(skb, SEG6_LOCAL_VRFTABLE, info->vrf_table))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+static int cmp_nla_vrftable(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
+{
+	struct seg6_end_dt_info *info_a = seg6_possible_end_dt_info(a);
+	struct seg6_end_dt_info *info_b = seg6_possible_end_dt_info(b);
+
+	if (info_a->vrf_table != info_b->vrf_table)
+		return 1;
+
+	return 0;
+}
+
 static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt)
 {
 	memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]),
@@ -984,6 +1264,10 @@ static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
 				    .cmp = cmp_nla_bpf,
 				    .destroy = destroy_attr_bpf },
 
+	[SEG6_LOCAL_VRFTABLE]	= { .parse = parse_nla_vrftable,
+				    .put = put_nla_vrftable,
+				    .cmp = cmp_nla_vrftable },
+
 };
 
 /* call the destroy() callback (if available) for each set attribute in
@@ -1283,6 +1567,9 @@ static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
 		       nla_total_size(MAX_PROG_NAME) +
 		       nla_total_size(4);
 
+	if (attrs & (1 << SEG6_LOCAL_VRFTABLE))
+		nlsize += nla_total_size(4);
+
 	return nlsize;
 }
 
-- 
cgit v1.2.3


From 4f19cab76136e800a3f04d8c9aa4d8e770e3d3d8 Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Fri, 4 Dec 2020 12:36:05 +0100
Subject: bpf: Add a bpf_sock_from_file helper

While eBPF programs can check whether a file is a socket by file->f_op
== &socket_file_ops, they cannot convert the void private_data pointer
to a struct socket BTF pointer. In order to do this a new helper
wrapping sock_from_file is added.

This is useful to tracing programs but also other program types
inheriting this set of helpers such as iterators or LSM programs.

Signed-off-by: Florent Revest <revest@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: KP Singh <kpsingh@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201204113609.1850150-2-revest@google.com
---
 include/uapi/linux/bpf.h       |  9 +++++++++
 kernel/trace/bpf_trace.c       | 20 ++++++++++++++++++++
 scripts/bpf_helpers_doc.py     |  4 ++++
 tools/include/uapi/linux/bpf.h |  9 +++++++++
 4 files changed, 42 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1233f14f659f..30b477a26482 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3822,6 +3822,14 @@ union bpf_attr {
  *		The **hash_algo** is returned on success,
  *		**-EOPNOTSUP** if IMA is disabled or **-EINVAL** if
  *		invalid arguments are passed.
+ *
+ * struct socket *bpf_sock_from_file(struct file *file)
+ *	Description
+ *		If the given file represents a socket, returns the associated
+ *		socket.
+ *	Return
+ *		A pointer to a struct socket on success or NULL if the file is
+ *		not a socket.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3986,6 +3994,7 @@ union bpf_attr {
 	FN(bprm_opts_set),		\
 	FN(ktime_get_coarse_ns),	\
 	FN(ima_inode_hash),		\
+	FN(sock_from_file),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index cb9d7478ef0c..0cf0a6331482 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1270,6 +1270,24 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = {
 	.arg5_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_1(bpf_sock_from_file, struct file *, file)
+{
+	return (unsigned long) sock_from_file(file);
+}
+
+BTF_ID_LIST(bpf_sock_from_file_btf_ids)
+BTF_ID(struct, socket)
+BTF_ID(struct, file)
+
+static const struct bpf_func_proto bpf_sock_from_file_proto = {
+	.func		= bpf_sock_from_file,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_BTF_ID_OR_NULL,
+	.ret_btf_id	= &bpf_sock_from_file_btf_ids[0],
+	.arg1_type	= ARG_PTR_TO_BTF_ID,
+	.arg1_btf_id	= &bpf_sock_from_file_btf_ids[1],
+};
+
 const struct bpf_func_proto *
 bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -1366,6 +1384,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_per_cpu_ptr_proto;
 	case BPF_FUNC_bpf_this_cpu_ptr:
 		return &bpf_this_cpu_ptr_proto;
+	case BPF_FUNC_sock_from_file:
+		return &bpf_sock_from_file_proto;
 	default:
 		return NULL;
 	}
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index 8b829748d488..867ada23281c 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -437,6 +437,8 @@ class PrinterHelpers(Printer):
             'struct path',
             'struct btf_ptr',
             'struct inode',
+            'struct socket',
+            'struct file',
     ]
     known_types = {
             '...',
@@ -482,6 +484,8 @@ class PrinterHelpers(Printer):
             'struct path',
             'struct btf_ptr',
             'struct inode',
+            'struct socket',
+            'struct file',
     }
     mapped_types = {
             'u8': '__u8',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 1233f14f659f..30b477a26482 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3822,6 +3822,14 @@ union bpf_attr {
  *		The **hash_algo** is returned on success,
  *		**-EOPNOTSUP** if IMA is disabled or **-EINVAL** if
  *		invalid arguments are passed.
+ *
+ * struct socket *bpf_sock_from_file(struct file *file)
+ *	Description
+ *		If the given file represents a socket, returns the associated
+ *		socket.
+ *	Return
+ *		A pointer to a struct socket on success or NULL if the file is
+ *		not a socket.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3986,6 +3994,7 @@ union bpf_attr {
 	FN(bprm_opts_set),		\
 	FN(ktime_get_coarse_ns),	\
 	FN(ima_inode_hash),		\
+	FN(sock_from_file),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 18fb76ed53865c1b5d5f0157b1b825704590beb5 Mon Sep 17 00:00:00 2001
From: Arjun Roy <arjunroy@google.com>
Date: Wed, 2 Dec 2020 14:53:42 -0800
Subject: net-zerocopy: Copy straggler unaligned data for TCP Rx. zerocopy.

When TCP receive zerocopy does not successfully map the entire
requested space, it outputs a 'hint' that the caller should recvmsg().

Augment zerocopy to accept a user buffer that it tries to copy this
hint into - if it is possible to copy the entire hint, it will do so.
This elides a recvmsg() call for received traffic that isn't exactly
page-aligned in size.

This was tested with RPC-style traffic of arbitrary sizes. Normally,
each received message required at least one getsockopt() call, and one
recvmsg() call for the remaining unaligned data.

With this change, almost all of the recvmsg() calls are eliminated,
leading to a savings of about 25%-50% in number of system calls
for RPC-style workloads.

Signed-off-by: Arjun Roy <arjunroy@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/tcp.h |  2 ++
 net/ipv4/tcp.c           | 84 +++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 70 insertions(+), 16 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index cfcb10b75483..62db78b9c1a0 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -349,5 +349,7 @@ struct tcp_zerocopy_receive {
 	__u32 recv_skip_hint;	/* out: amount of bytes to skip */
 	__u32 inq; /* out: amount of bytes in read queue */
 	__s32 err; /* out: socket error */
+	__u64 copybuf_address;	/* in: copybuf address (small reads) */
+	__s32 copybuf_len; /* in/out: copybuf bytes avail/used or error */
 };
 #endif /* _UAPI_LINUX_TCP_H */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 75a28b8f4470..0ad70097da59 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1758,6 +1758,52 @@ int tcp_mmap(struct file *file, struct socket *sock,
 }
 EXPORT_SYMBOL(tcp_mmap);
 
+static int tcp_copy_straggler_data(struct tcp_zerocopy_receive *zc,
+				   struct sk_buff *skb, u32 copylen,
+				   u32 *offset, u32 *seq)
+{
+	unsigned long copy_address = (unsigned long)zc->copybuf_address;
+	struct msghdr msg = {};
+	struct iovec iov;
+	int err;
+
+	if (copy_address != zc->copybuf_address)
+		return -EINVAL;
+
+	err = import_single_range(READ, (void __user *)copy_address,
+				  copylen, &iov, &msg.msg_iter);
+	if (err)
+		return err;
+	err = skb_copy_datagram_msg(skb, *offset, &msg, copylen);
+	if (err)
+		return err;
+	zc->recv_skip_hint -= copylen;
+	*offset += copylen;
+	*seq += copylen;
+	return (__s32)copylen;
+}
+
+static int tcp_zerocopy_handle_leftover_data(struct tcp_zerocopy_receive *zc,
+					     struct sock *sk,
+					     struct sk_buff *skb,
+					     u32 *seq,
+					     s32 copybuf_len)
+{
+	u32 offset, copylen = min_t(u32, copybuf_len, zc->recv_skip_hint);
+
+	if (!copylen)
+		return 0;
+	/* skb is null if inq < PAGE_SIZE. */
+	if (skb)
+		offset = *seq - TCP_SKB_CB(skb)->seq;
+	else
+		skb = tcp_recv_skb(sk, *seq, &offset);
+
+	zc->copybuf_len = tcp_copy_straggler_data(zc, skb, copylen, &offset,
+						  seq);
+	return zc->copybuf_len < 0 ? 0 : copylen;
+}
+
 static int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma,
 					struct page **pages,
 					unsigned long pages_to_map,
@@ -1791,8 +1837,10 @@ static int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma,
 static int tcp_zerocopy_receive(struct sock *sk,
 				struct tcp_zerocopy_receive *zc)
 {
+	u32 length = 0, offset, vma_len, avail_len, aligned_len, copylen = 0;
 	unsigned long address = (unsigned long)zc->address;
-	u32 length = 0, seq, offset, zap_len;
+	s32 copybuf_len = zc->copybuf_len;
+	struct tcp_sock *tp = tcp_sk(sk);
 	#define PAGE_BATCH_SIZE 8
 	struct page *pages[PAGE_BATCH_SIZE];
 	const skb_frag_t *frags = NULL;
@@ -1800,10 +1848,12 @@ static int tcp_zerocopy_receive(struct sock *sk,
 	struct sk_buff *skb = NULL;
 	unsigned long pg_idx = 0;
 	unsigned long curr_addr;
-	struct tcp_sock *tp;
-	int inq;
+	u32 seq = tp->copied_seq;
+	int inq = tcp_inq(sk);
 	int ret;
 
+	zc->copybuf_len = 0;
+
 	if (address & (PAGE_SIZE - 1) || address != zc->address)
 		return -EINVAL;
 
@@ -1812,8 +1862,6 @@ static int tcp_zerocopy_receive(struct sock *sk,
 
 	sock_rps_record_flow(sk);
 
-	tp = tcp_sk(sk);
-
 	mmap_read_lock(current->mm);
 
 	vma = find_vma(current->mm, address);
@@ -1821,17 +1869,16 @@ static int tcp_zerocopy_receive(struct sock *sk,
 		mmap_read_unlock(current->mm);
 		return -EINVAL;
 	}
-	zc->length = min_t(unsigned long, zc->length, vma->vm_end - address);
-
-	seq = tp->copied_seq;
-	inq = tcp_inq(sk);
-	zc->length = min_t(u32, zc->length, inq);
-	zap_len = zc->length & ~(PAGE_SIZE - 1);
-	if (zap_len) {
-		zap_page_range(vma, address, zap_len);
+	vma_len = min_t(unsigned long, zc->length, vma->vm_end - address);
+	avail_len = min_t(u32, vma_len, inq);
+	aligned_len = avail_len & ~(PAGE_SIZE - 1);
+	if (aligned_len) {
+		zap_page_range(vma, address, aligned_len);
+		zc->length = aligned_len;
 		zc->recv_skip_hint = 0;
 	} else {
-		zc->recv_skip_hint = zc->length;
+		zc->length = avail_len;
+		zc->recv_skip_hint = avail_len;
 	}
 	ret = 0;
 	curr_addr = address;
@@ -1900,13 +1947,18 @@ static int tcp_zerocopy_receive(struct sock *sk,
 	}
 out:
 	mmap_read_unlock(current->mm);
-	if (length) {
+	/* Try to copy straggler data. */
+	if (!ret)
+		copylen = tcp_zerocopy_handle_leftover_data(zc, sk, skb, &seq,
+							    copybuf_len);
+
+	if (length + copylen) {
 		WRITE_ONCE(tp->copied_seq, seq);
 		tcp_rcv_space_adjust(sk);
 
 		/* Clean up data we have read: This will do ACK frames. */
 		tcp_recv_skb(sk, seq, &offset);
-		tcp_cleanup_rbuf(sk, length);
+		tcp_cleanup_rbuf(sk, length + copylen);
 		ret = 0;
 		if (length == zc->length)
 			zc->recv_skip_hint = 0;
-- 
cgit v1.2.3


From 94ab9eb9b234ddf23af04a4bc7e8db68e67b8778 Mon Sep 17 00:00:00 2001
From: Arjun Roy <arjunroy@google.com>
Date: Wed, 2 Dec 2020 14:53:49 -0800
Subject: net-zerocopy: Defer vm zap unless actually needed.

Zapping pages is required only if we are calling vm_insert_page into a
region where pages had previously been mapped. Receive zerocopy allows
reusing such regions, and hitherto called zap_page_range() before
calling vm_insert_page() in that range.

zap_page_range() can also be triggered from userspace with
madvise(MADV_DONTNEED). If userspace is configured to call this before
reusing a segment, or if there was nothing mapped at this virtual
address to begin with, we can avoid calling zap_page_range() under the
socket lock. That said, if userspace does not do that, then we are
still responsible for calling zap_page_range().

This patch adds a flag that the user can use to hint to the kernel
that a zap is not required. If the flag is not set, or if an older
user application does not have a flags field at all, then the kernel
calls zap_page_range as before. Also, if the flag is set but a zap is
still required, the kernel performs that zap as necessary. Thus
incorrectly indicating that a zap can be avoided does not change the
correctness of operation. It also increases the batchsize for
vm_insert_pages and prefetches the page struct for the batch since
we're about to bump the refcount.

An alternative mechanism could be to not have a flag, assume by
default a zap is not needed, and fall back to zapping if needed.
However, this would harm performance for older applications for which
a zap is necessary, and thus we implement it with an explicit flag
so newer applications can opt in.

When using RPC-style traffic with medium sized (tens of KB) RPCs, this
change yields an efficency improvement of about 30% for QPS/CPU usage.

Signed-off-by: Arjun Roy <arjunroy@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/tcp.h |   2 +
 net/ipv4/tcp.c           | 147 +++++++++++++++++++++++++++++++----------------
 2 files changed, 99 insertions(+), 50 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 62db78b9c1a0..13ceeb395eb8 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -343,6 +343,7 @@ struct tcp_diag_md5sig {
 
 /* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */
 
+#define TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT 0x1
 struct tcp_zerocopy_receive {
 	__u64 address;		/* in: address of mapping */
 	__u32 length;		/* in/out: number of bytes to map/mapped */
@@ -351,5 +352,6 @@ struct tcp_zerocopy_receive {
 	__s32 err; /* out: socket error */
 	__u64 copybuf_address;	/* in: copybuf address (small reads) */
 	__s32 copybuf_len; /* in/out: copybuf bytes avail/used or error */
+	__u32 flags; /* in: flags */
 };
 #endif /* _UAPI_LINUX_TCP_H */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index df6dd15a5988..3c99d48b65d8 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1924,51 +1924,101 @@ static int tcp_zerocopy_handle_leftover_data(struct tcp_zerocopy_receive *zc,
 	return zc->copybuf_len < 0 ? 0 : copylen;
 }
 
+static int tcp_zerocopy_vm_insert_batch_error(struct vm_area_struct *vma,
+					      struct page **pending_pages,
+					      unsigned long pages_remaining,
+					      unsigned long *address,
+					      u32 *length,
+					      u32 *seq,
+					      struct tcp_zerocopy_receive *zc,
+					      u32 total_bytes_to_map,
+					      int err)
+{
+	/* At least one page did not map. Try zapping if we skipped earlier. */
+	if (err == -EBUSY &&
+	    zc->flags & TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT) {
+		u32 maybe_zap_len;
+
+		maybe_zap_len = total_bytes_to_map -  /* All bytes to map */
+				*length + /* Mapped or pending */
+				(pages_remaining * PAGE_SIZE); /* Failed map. */
+		zap_page_range(vma, *address, maybe_zap_len);
+		err = 0;
+	}
+
+	if (!err) {
+		unsigned long leftover_pages = pages_remaining;
+		int bytes_mapped;
+
+		/* We called zap_page_range, try to reinsert. */
+		err = vm_insert_pages(vma, *address,
+				      pending_pages,
+				      &pages_remaining);
+		bytes_mapped = PAGE_SIZE * (leftover_pages - pages_remaining);
+		*seq += bytes_mapped;
+		*address += bytes_mapped;
+	}
+	if (err) {
+		/* Either we were unable to zap, OR we zapped, retried an
+		 * insert, and still had an issue. Either ways, pages_remaining
+		 * is the number of pages we were unable to map, and we unroll
+		 * some state we speculatively touched before.
+		 */
+		const int bytes_not_mapped = PAGE_SIZE * pages_remaining;
+
+		*length -= bytes_not_mapped;
+		zc->recv_skip_hint += bytes_not_mapped;
+	}
+	return err;
+}
+
 static int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma,
 					struct page **pages,
-					unsigned long pages_to_map,
-					unsigned long *insert_addr,
-					u32 *length_with_pending,
+					unsigned int pages_to_map,
+					unsigned long *address,
+					u32 *length,
 					u32 *seq,
-					struct tcp_zerocopy_receive *zc)
+					struct tcp_zerocopy_receive *zc,
+					u32 total_bytes_to_map)
 {
 	unsigned long pages_remaining = pages_to_map;
-	int bytes_mapped;
-	int ret;
+	unsigned int pages_mapped;
+	unsigned int bytes_mapped;
+	int err;
 
-	ret = vm_insert_pages(vma, *insert_addr, pages, &pages_remaining);
-	bytes_mapped = PAGE_SIZE * (pages_to_map - pages_remaining);
+	err = vm_insert_pages(vma, *address, pages, &pages_remaining);
+	pages_mapped = pages_to_map - (unsigned int)pages_remaining;
+	bytes_mapped = PAGE_SIZE * pages_mapped;
 	/* Even if vm_insert_pages fails, it may have partially succeeded in
 	 * mapping (some but not all of the pages).
 	 */
 	*seq += bytes_mapped;
-	*insert_addr += bytes_mapped;
-	if (ret) {
-		/* But if vm_insert_pages did fail, we have to unroll some state
-		 * we speculatively touched before.
-		 */
-		const int bytes_not_mapped = PAGE_SIZE * pages_remaining;
-		*length_with_pending -= bytes_not_mapped;
-		zc->recv_skip_hint += bytes_not_mapped;
-	}
-	return ret;
+	*address += bytes_mapped;
+
+	if (likely(!err))
+		return 0;
+
+	/* Error: maybe zap and retry + rollback state for failed inserts. */
+	return tcp_zerocopy_vm_insert_batch_error(vma, pages + pages_mapped,
+		pages_remaining, address, length, seq, zc, total_bytes_to_map,
+		err);
 }
 
+#define TCP_ZEROCOPY_PAGE_BATCH_SIZE 32
 static int tcp_zerocopy_receive(struct sock *sk,
 				struct tcp_zerocopy_receive *zc)
 {
-	u32 length = 0, offset, vma_len, avail_len, aligned_len, copylen = 0;
+	u32 length = 0, offset, vma_len, avail_len, copylen = 0;
 	unsigned long address = (unsigned long)zc->address;
+	struct page *pages[TCP_ZEROCOPY_PAGE_BATCH_SIZE];
 	s32 copybuf_len = zc->copybuf_len;
 	struct tcp_sock *tp = tcp_sk(sk);
-	#define PAGE_BATCH_SIZE 8
-	struct page *pages[PAGE_BATCH_SIZE];
 	const skb_frag_t *frags = NULL;
+	unsigned int pages_to_map = 0;
 	struct vm_area_struct *vma;
 	struct sk_buff *skb = NULL;
-	unsigned long pg_idx = 0;
-	unsigned long curr_addr;
 	u32 seq = tp->copied_seq;
+	u32 total_bytes_to_map;
 	int inq = tcp_inq(sk);
 	int ret;
 
@@ -2002,34 +2052,24 @@ static int tcp_zerocopy_receive(struct sock *sk,
 	}
 	vma_len = min_t(unsigned long, zc->length, vma->vm_end - address);
 	avail_len = min_t(u32, vma_len, inq);
-	aligned_len = avail_len & ~(PAGE_SIZE - 1);
-	if (aligned_len) {
-		zap_page_range(vma, address, aligned_len);
-		zc->length = aligned_len;
+	total_bytes_to_map = avail_len & ~(PAGE_SIZE - 1);
+	if (total_bytes_to_map) {
+		if (!(zc->flags & TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT))
+			zap_page_range(vma, address, total_bytes_to_map);
+		zc->length = total_bytes_to_map;
 		zc->recv_skip_hint = 0;
 	} else {
 		zc->length = avail_len;
 		zc->recv_skip_hint = avail_len;
 	}
 	ret = 0;
-	curr_addr = address;
 	while (length + PAGE_SIZE <= zc->length) {
 		int mappable_offset;
+		struct page *page;
 
 		if (zc->recv_skip_hint < PAGE_SIZE) {
 			u32 offset_frag;
 
-			/* If we're here, finish the current batch. */
-			if (pg_idx) {
-				ret = tcp_zerocopy_vm_insert_batch(vma, pages,
-								   pg_idx,
-								   &curr_addr,
-								   &length,
-								   &seq, zc);
-				if (ret)
-					goto out;
-				pg_idx = 0;
-			}
 			if (skb) {
 				if (zc->recv_skip_hint > 0)
 					break;
@@ -2050,24 +2090,31 @@ static int tcp_zerocopy_receive(struct sock *sk,
 			zc->recv_skip_hint = mappable_offset;
 			break;
 		}
-		pages[pg_idx] = skb_frag_page(frags);
-		pg_idx++;
+		page = skb_frag_page(frags);
+		prefetchw(page);
+		pages[pages_to_map++] = page;
 		length += PAGE_SIZE;
 		zc->recv_skip_hint -= PAGE_SIZE;
 		frags++;
-		if (pg_idx == PAGE_BATCH_SIZE) {
-			ret = tcp_zerocopy_vm_insert_batch(vma, pages, pg_idx,
-							   &curr_addr, &length,
-							   &seq, zc);
+		if (pages_to_map == TCP_ZEROCOPY_PAGE_BATCH_SIZE ||
+		    zc->recv_skip_hint < PAGE_SIZE) {
+			/* Either full batch, or we're about to go to next skb
+			 * (and we cannot unroll failed ops across skbs).
+			 */
+			ret = tcp_zerocopy_vm_insert_batch(vma, pages,
+							   pages_to_map,
+							   &address, &length,
+							   &seq, zc,
+							   total_bytes_to_map);
 			if (ret)
 				goto out;
-			pg_idx = 0;
+			pages_to_map = 0;
 		}
 	}
-	if (pg_idx) {
-		ret = tcp_zerocopy_vm_insert_batch(vma, pages, pg_idx,
-						   &curr_addr, &length, &seq,
-						   zc);
+	if (pages_to_map) {
+		ret = tcp_zerocopy_vm_insert_batch(vma, pages, pages_to_map,
+						   &address, &length, &seq,
+						   zc, total_bytes_to_map);
 	}
 out:
 	mmap_read_unlock(current->mm);
-- 
cgit v1.2.3


From 26d060e47e25f2c715a1b2c48fea391f67907a30 Mon Sep 17 00:00:00 2001
From: Kent Gibson <warthog618@gmail.com>
Date: Thu, 15 Oct 2020 07:11:56 +0800
Subject: gpiolib: cdev: allow edge event timestamps to be configured as
 REALTIME

Using CLOCK_REALTIME as the source for event timestamps is crucial for
some specific applications, particularly those requiring timetamps
relative to a PTP clock, so provide an option to switch the event
timestamp source from the default CLOCK_MONOTONIC to CLOCK_REALTIME.

Note that CLOCK_REALTIME was the default source clock for GPIO until
Linux 5.7 when it was changed to CLOCK_MONOTONIC due to issues with the
shifting of the realtime clock.
Providing this option maintains the CLOCK_MONOTONIC as the default,
while also providing a path forward for those dependent on the pre-5.7
behaviour.

Suggested-by: Jack Winch <sunt.un.morcov@gmail.com>
Signed-off-by: Kent Gibson <warthog618@gmail.com>
Link: https://lore.kernel.org/r/20201014231158.34117-2-warthog618@gmail.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-cdev.c | 21 ++++++++++++++++++---
 drivers/gpio/gpiolib.h      |  1 +
 include/uapi/linux/gpio.h   | 12 +++++++++---
 3 files changed, 28 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index 192721f829a3..f64a35767434 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -509,6 +509,7 @@ struct linereq {
 	 GPIO_V2_LINE_DIRECTION_FLAGS | \
 	 GPIO_V2_LINE_DRIVE_FLAGS | \
 	 GPIO_V2_LINE_EDGE_FLAGS | \
+	 GPIO_V2_LINE_FLAG_EVENT_CLOCK_REALTIME | \
 	 GPIO_V2_LINE_BIAS_FLAGS)
 
 static void linereq_put_event(struct linereq *lr,
@@ -529,6 +530,14 @@ static void linereq_put_event(struct linereq *lr,
 		pr_debug_ratelimited("event FIFO is full - event dropped\n");
 }
 
+static u64 line_event_timestamp(struct line *line)
+{
+	if (test_bit(FLAG_EVENT_CLOCK_REALTIME, &line->desc->flags))
+		return ktime_get_real_ns();
+
+	return ktime_get_ns();
+}
+
 static irqreturn_t edge_irq_thread(int irq, void *p)
 {
 	struct line *line = p;
@@ -546,7 +555,7 @@ static irqreturn_t edge_irq_thread(int irq, void *p)
 		 * which case we didn't get the timestamp from
 		 * edge_irq_handler().
 		 */
-		le.timestamp_ns = ktime_get_ns();
+		le.timestamp_ns = line_event_timestamp(line);
 		if (lr->num_lines != 1)
 			line->req_seqno = atomic_inc_return(&lr->seqno);
 	}
@@ -590,7 +599,7 @@ static irqreturn_t edge_irq_handler(int irq, void *p)
 	 * Just store the timestamp in hardirq context so we get it as
 	 * close in time as possible to the actual event.
 	 */
-	line->timestamp_ns = ktime_get_ns();
+	line->timestamp_ns = line_event_timestamp(line);
 
 	if (lr->num_lines != 1)
 		line->req_seqno = atomic_inc_return(&lr->seqno);
@@ -663,7 +672,7 @@ static void debounce_work_func(struct work_struct *work)
 	memset(&le, 0, sizeof(le));
 
 	lr = line->req;
-	le.timestamp_ns = ktime_get_ns();
+	le.timestamp_ns = line_event_timestamp(line);
 	le.offset = gpio_chip_hwgpio(line->desc);
 	line->line_seqno++;
 	le.line_seqno = line->line_seqno;
@@ -967,6 +976,9 @@ static void gpio_v2_line_config_flags_to_desc_flags(u64 flags,
 		   flags & GPIO_V2_LINE_FLAG_BIAS_PULL_DOWN);
 	assign_bit(FLAG_BIAS_DISABLE, flagsp,
 		   flags & GPIO_V2_LINE_FLAG_BIAS_DISABLED);
+
+	assign_bit(FLAG_EVENT_CLOCK_REALTIME, flagsp,
+		   flags & GPIO_V2_LINE_FLAG_EVENT_CLOCK_REALTIME);
 }
 
 static long linereq_get_values(struct linereq *lr, void __user *ip)
@@ -1930,6 +1942,9 @@ static void gpio_desc_to_lineinfo(struct gpio_desc *desc,
 	if (test_bit(FLAG_EDGE_FALLING, &desc->flags))
 		info->flags |= GPIO_V2_LINE_FLAG_EDGE_FALLING;
 
+	if (test_bit(FLAG_EVENT_CLOCK_REALTIME, &desc->flags))
+		info->flags |= GPIO_V2_LINE_FLAG_EVENT_CLOCK_REALTIME;
+
 	debounce_period_us = READ_ONCE(desc->debounce_period_us);
 	if (debounce_period_us) {
 		info->attrs[num_attrs].id = GPIO_V2_LINE_ATTR_ID_DEBOUNCE;
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index 42d81454da21..9c32d4ace572 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -116,6 +116,7 @@ struct gpio_desc {
 #define FLAG_BIAS_DISABLE    15	/* GPIO has pull disabled */
 #define FLAG_EDGE_RISING     16	/* GPIO CDEV detects rising edge events */
 #define FLAG_EDGE_FALLING    17	/* GPIO CDEV detects falling edge events */
+#define FLAG_EVENT_CLOCK_REALTIME	18 /* GPIO CDEV reports REALTIME timestamps in events */
 
 	/* Connection label */
 	const char		*label;
diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h
index 2072c260f5d0..e4eb0b8c5cf9 100644
--- a/include/uapi/linux/gpio.h
+++ b/include/uapi/linux/gpio.h
@@ -65,6 +65,7 @@ struct gpiochip_info {
  * @GPIO_V2_LINE_FLAG_BIAS_PULL_UP: line has pull-up bias enabled
  * @GPIO_V2_LINE_FLAG_BIAS_PULL_DOWN: line has pull-down bias enabled
  * @GPIO_V2_LINE_FLAG_BIAS_DISABLED: line has bias disabled
+ * @GPIO_V2_LINE_FLAG_EVENT_CLOCK_REALTIME: line events contain REALTIME timestamps
  */
 enum gpio_v2_line_flag {
 	GPIO_V2_LINE_FLAG_USED			= _BITULL(0),
@@ -78,6 +79,7 @@ enum gpio_v2_line_flag {
 	GPIO_V2_LINE_FLAG_BIAS_PULL_UP		= _BITULL(8),
 	GPIO_V2_LINE_FLAG_BIAS_PULL_DOWN	= _BITULL(9),
 	GPIO_V2_LINE_FLAG_BIAS_DISABLED		= _BITULL(10),
+	GPIO_V2_LINE_FLAG_EVENT_CLOCK_REALTIME	= _BITULL(11),
 };
 
 /**
@@ -270,9 +272,6 @@ enum gpio_v2_line_event_id {
 /**
  * struct gpio_v2_line_event - The actual event being pushed to userspace
  * @timestamp_ns: best estimate of time of event occurrence, in nanoseconds.
- * The @timestamp_ns is read from %CLOCK_MONOTONIC and is intended to allow
- * the accurate measurement of the time between events. It does not provide
- * the wall-clock time.
  * @id: event identifier with value from &enum gpio_v2_line_event_id
  * @offset: the offset of the line that triggered the event
  * @seqno: the sequence number for this event in the sequence of events for
@@ -280,6 +279,13 @@ enum gpio_v2_line_event_id {
  * @line_seqno: the sequence number for this event in the sequence of
  * events on this particular line
  * @padding: reserved for future use
+ *
+ * By default the @timestamp_ns is read from %CLOCK_MONOTONIC and is
+ * intended to allow the accurate measurement of the time between events.
+ * It does not provide the wall-clock time.
+ *
+ * If the %GPIO_V2_LINE_FLAG_EVENT_CLOCK_REALTIME flag is set then the
+ * @timestamp_ns is read from %CLOCK_REALTIME.
  */
 struct gpio_v2_line_event {
 	__aligned_u64 timestamp_ns;
-- 
cgit v1.2.3


From 3771c031d60f790aba18f16b058ed23a5ac20bd6 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Mon, 7 Dec 2020 00:03:01 +0100
Subject: media: videodev2.h: Remove unneeded comment about 4CC value

The V4L2_PIX_FMT_BGRA444 format has a comment that explains why its 4CC
value is GA12. This explains the development history and isn't of much
interest to readers, it should have been part of a commit message
instead. Drop the comment, anyone interested in history can turn to git.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/uapi/linux/videodev2.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index e40e95be79f9..3f58ab463f6c 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -524,12 +524,6 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_RGBX444 v4l2_fourcc('R', 'X', '1', '2') /* 16  rrrrgggg bbbbxxxx */
 #define V4L2_PIX_FMT_ABGR444 v4l2_fourcc('A', 'B', '1', '2') /* 16  aaaabbbb ggggrrrr */
 #define V4L2_PIX_FMT_XBGR444 v4l2_fourcc('X', 'B', '1', '2') /* 16  xxxxbbbb ggggrrrr */
-
-/*
- * Originally this had 'BA12' as fourcc, but this clashed with the older
- * V4L2_PIX_FMT_SGRBG12 which inexplicably used that same fourcc.
- * So use 'GA12' instead for V4L2_PIX_FMT_BGRA444.
- */
 #define V4L2_PIX_FMT_BGRA444 v4l2_fourcc('G', 'A', '1', '2') /* 16  bbbbgggg rrrraaaa */
 #define V4L2_PIX_FMT_BGRX444 v4l2_fourcc('B', 'X', '1', '2') /* 16  bbbbgggg rrrrxxxx */
 #define V4L2_PIX_FMT_RGB555  v4l2_fourcc('R', 'G', 'B', 'O') /* 16  RGB-5-5-5     */
-- 
cgit v1.2.3


From 0a078e0d8ecb0ca0296755399c3a8f38b60c7b23 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Mon, 7 Dec 2020 00:03:02 +0100
Subject: media: videodev2.h: Move HI240 format to vendor-specific section

V4L2_PIX_FMT_HI240 is a 8-bit dithered RGB format specific to BTTV. Move
it from the packed YUV formats section where it was misplaced to the
vendor-specific formats section.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/uapi/linux/videodev2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 3f58ab463f6c..01140ac57be2 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -589,7 +589,6 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_XYUV32  v4l2_fourcc('X', 'Y', 'U', 'V') /* 32  XYUV-8-8-8-8  */
 #define V4L2_PIX_FMT_VUYA32  v4l2_fourcc('V', 'U', 'Y', 'A') /* 32  VUYA-8-8-8-8  */
 #define V4L2_PIX_FMT_VUYX32  v4l2_fourcc('V', 'U', 'Y', 'X') /* 32  VUYX-8-8-8-8  */
-#define V4L2_PIX_FMT_HI240   v4l2_fourcc('H', 'I', '2', '4') /*  8  8-bit color   */
 #define V4L2_PIX_FMT_HM12    v4l2_fourcc('H', 'M', '1', '2') /*  8  YUV 4:2:0 16x16 macroblocks */
 #define V4L2_PIX_FMT_M420    v4l2_fourcc('M', '4', '2', '0') /* 12  YUV 4:2:0 2 lines y, 1 line uv interleaved */
 
@@ -733,6 +732,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_INZI     v4l2_fourcc('I', 'N', 'Z', 'I') /* Intel Planar Greyscale 10-bit and Depth 16-bit */
 #define V4L2_PIX_FMT_SUNXI_TILED_NV12 v4l2_fourcc('S', 'T', '1', '2') /* Sunxi Tiled NV12 Format */
 #define V4L2_PIX_FMT_CNF4     v4l2_fourcc('C', 'N', 'F', '4') /* Intel 4-bit packed depth confidence information */
+#define V4L2_PIX_FMT_HI240    v4l2_fourcc('H', 'I', '2', '4') /* BTTV 8-bit dithered RGB */
 
 /* 10bit raw bayer packed, 32 bytes for every 25 pixels, last LSB 6 bits unused */
 #define V4L2_PIX_FMT_IPU3_SBGGR10	v4l2_fourcc('i', 'p', '3', 'b') /* IPU3 packed 10-bit BGGR bayer */
-- 
cgit v1.2.3


From 473dbed54fdbac0e8671c2e0d0fa5f3bad281a31 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Mon, 7 Dec 2020 00:03:03 +0100
Subject: media: videodev2.h: Move HM12 format to YUV semi-planar section

V4L2_PIX_FMT_HM12 is a YUV semi-planar macro-block format. Move it from
the packed YUV formats section where it was misplaced to the YUV
semi-planar formats section.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/uapi/linux/videodev2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 01140ac57be2..be07bb01d936 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -589,7 +589,6 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_XYUV32  v4l2_fourcc('X', 'Y', 'U', 'V') /* 32  XYUV-8-8-8-8  */
 #define V4L2_PIX_FMT_VUYA32  v4l2_fourcc('V', 'U', 'Y', 'A') /* 32  VUYA-8-8-8-8  */
 #define V4L2_PIX_FMT_VUYX32  v4l2_fourcc('V', 'U', 'Y', 'X') /* 32  VUYX-8-8-8-8  */
-#define V4L2_PIX_FMT_HM12    v4l2_fourcc('H', 'M', '1', '2') /*  8  YUV 4:2:0 16x16 macroblocks */
 #define V4L2_PIX_FMT_M420    v4l2_fourcc('M', '4', '2', '0') /* 12  YUV 4:2:0 2 lines y, 1 line uv interleaved */
 
 /* two planes -- one Y, one Cr + Cb interleaved  */
@@ -599,6 +598,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_NV61    v4l2_fourcc('N', 'V', '6', '1') /* 16  Y/CrCb 4:2:2  */
 #define V4L2_PIX_FMT_NV24    v4l2_fourcc('N', 'V', '2', '4') /* 24  Y/CbCr 4:4:4  */
 #define V4L2_PIX_FMT_NV42    v4l2_fourcc('N', 'V', '4', '2') /* 24  Y/CrCb 4:4:4  */
+#define V4L2_PIX_FMT_HM12    v4l2_fourcc('H', 'M', '1', '2') /*  8  YUV 4:2:0 16x16 macroblocks */
 
 /* two non contiguous planes - one Y, one Cr + Cb interleaved  */
 #define V4L2_PIX_FMT_NV12M   v4l2_fourcc('N', 'M', '1', '2') /* 12  Y/CbCr 4:2:0  */
-- 
cgit v1.2.3


From e9a66489c383bc11f786db1d79e952e89cb137fe Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Mon, 7 Dec 2020 00:03:06 +0100
Subject: media: doc: pixfmt-rgb: Clarify naming scheme for RGB formats

The naming scheme for the RGB pixel formats has been developed
organically, and isn't consistent between formats using less than 8 bits
per pixels (mostly stored in 1 or 2 bytes per pixel, except for RGB666
that uses 4 bytes per pixel) and formats with 8 bits per pixel (stored
in 3 or 4 bytes). For the latter category, the names use a components
order convention that is the opposite of the first category, and the
opposite of DRM pixel formats. This has led to lots of confusion in the
past, and would really benefit from being explained more precisely. Do
so, which also prepares for the addition of additional RGB pixels
formats.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../userspace-api/media/v4l/pixfmt-rgb.rst         | 255 ++++++++++++++-------
 include/uapi/linux/videodev2.h                     |   4 +-
 2 files changed, 180 insertions(+), 79 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/userspace-api/media/v4l/pixfmt-rgb.rst b/Documentation/userspace-api/media/v4l/pixfmt-rgb.rst
index 5045895e85e1..36236e6ba55a 100644
--- a/Documentation/userspace-api/media/v4l/pixfmt-rgb.rst
+++ b/Documentation/userspace-api/media/v4l/pixfmt-rgb.rst
@@ -6,13 +6,62 @@
 RGB Formats
 ***********
 
-Description
-===========
-
-These formats are designed to match the pixel formats of typical PC
-graphics frame buffers. They occupy 8, 16, 24 or 32 bits per pixel.
-These are all packed-pixel formats, meaning all the data for a pixel lie
-next to each other in memory.
+These formats encode each pixel as a triplet of RGB values. They are packed
+formats, meaning that the RGB values for one pixel are stored consecutively in
+memory and each pixel consumes an integer number of bytes. When the number of
+bits required to store a pixel is not aligned to a byte boundary, the data is
+padded with additional bits to fill the remaining byte.
+
+The formats differ by the number of bits per RGB component (typically but not
+always the same for all components), the order of components in memory, and the
+presence of an alpha component or additional padding bits.
+
+The usage and value of the alpha bits in formats that support them (named ARGB
+or a permutation thereof, collectively referred to as alpha formats) depend on
+the device type and hardware operation. :ref:`Capture <capture>` devices
+(including capture queues of mem-to-mem devices) fill the alpha component in
+memory. When the device captures an alpha channel the alpha component will have
+a meaningful value. Otherwise, when the device doesn't capture an alpha channel
+but can set the alpha bit to a user-configurable value, the
+:ref:`V4L2_CID_ALPHA_COMPONENT <v4l2-alpha-component>` control is used to
+specify that alpha value, and the alpha component of all pixels will be set to
+the value specified by that control. Otherwise a corresponding format without
+an alpha component (XRGB or XBGR) must be used instead of an alpha format.
+
+:ref:`Output <output>` devices (including output queues of mem-to-mem devices
+and :ref:`video output overlay <osd>` devices) read the alpha component from
+memory. When the device processes the alpha channel the alpha component must be
+filled with meaningful values by applications. Otherwise a corresponding format
+without an alpha component (XRGB or XBGR) must be used instead of an alpha
+format.
+
+Formats that contain padding bits are named XRGB (or a permutation thereof).
+The padding bits contain undefined values and must be ignored by applications,
+devices and drivers, for both :ref:`capture` and :ref:`output` devices.
+
+.. note::
+
+   - In all the tables that follow, bit 7 is the most significant bit in a byte.
+   - 'r', 'g' and 'b' denote bits of the red, green and blue components
+     respectively. 'a' denotes bits of the alpha component (if supported by the
+     format), and '-' denotes padding bits.
+
+
+Less Than 8 Bits Per Component
+==============================
+
+These formats store an RGB triplet in one, two or four bytes. They are named
+based on the order of the RGB components as seen in a 8-, 16- or 32-bit word,
+which is then stored in memory in little endian byte order (unless otherwise
+noted by the presence of bit 31 in the 4CC value), and on the number of bits
+for each component. For instance, the RGB565 format stores a pixel in a 16-bit
+word [15:0] laid out at as [R\ :sub:`4` R\ :sub:`3` R\ :sub:`2` R\ :sub:`1`
+R\ :sub:`0` G\ :sub:`5` G\ :sub:`4` G\ :sub:`3` G\ :sub:`2` G\ :sub:`1`
+G\ :sub:`0` B\ :sub:`4` B\ :sub:`3` B\ :sub:`2` B\ :sub:`1` B\ :sub:`0`], and
+stored in memory in two bytes, [R\ :sub:`4` R\ :sub:`3` R\ :sub:`2` R\ :sub:`1`
+R\ :sub:`0` G\ :sub:`5` G\ :sub:`4` G\ :sub:`3`] followed by [G\ :sub:`2`
+G\ :sub:`1` G\ :sub:`0` B\ :sub:`4` B\ :sub:`3` B\ :sub:`2` B\ :sub:`1`
+B\ :sub:`0`].
 
 .. raw:: latex
 
@@ -23,7 +72,7 @@ next to each other in memory.
 .. tabularcolumns:: |p{2.8cm}|p{2.0cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|
 
 
-.. flat-table:: RGB Image Formats
+.. flat-table:: RGB Formats With Less Than 8 Bits Per Component
     :header-rows:  2
     :stub-columns: 0
 
@@ -544,6 +593,122 @@ next to each other in memory.
       - b\ :sub:`1`
       - b\ :sub:`0`
       -
+    * .. _V4L2-PIX-FMT-BGR666:
+
+      - ``V4L2_PIX_FMT_BGR666``
+      - 'BGRH'
+
+      - b\ :sub:`5`
+      - b\ :sub:`4`
+      - b\ :sub:`3`
+      - b\ :sub:`2`
+      - b\ :sub:`1`
+      - b\ :sub:`0`
+      - g\ :sub:`5`
+      - g\ :sub:`4`
+
+      - g\ :sub:`3`
+      - g\ :sub:`2`
+      - g\ :sub:`1`
+      - g\ :sub:`0`
+      - r\ :sub:`5`
+      - r\ :sub:`4`
+      - r\ :sub:`3`
+      - r\ :sub:`2`
+
+      - r\ :sub:`1`
+      - r\ :sub:`0`
+      - `-`
+      - `-`
+      - `-`
+      - `-`
+      - `-`
+      - `-`
+
+      - `-`
+      - `-`
+      - `-`
+      - `-`
+      - `-`
+      - `-`
+      - `-`
+      - `-`
+
+.. raw:: latex
+
+    \endgroup
+
+
+8 Bits Per Component
+====================
+
+These formats store an RGB triplet in three or four bytes. They are named based
+on the order of the RGB components as stored in memory, and on the total number
+of bits per pixel. For instance, RGB24 format stores a pixel with [R\ :sub:`7`
+R\ :sub:`6` R\ :sub:`5` R\ :sub:`4` R\ :sub:`3` R\ :sub:`2` R\ :sub:`1`
+R\ :sub:`0`] in the first byte, [G\ :sub:`7` G\ :sub:`6` G\ :sub:`5` G\ :sub:`4`
+G\ :sub:`3` G\ :sub:`2` G\ :sub:`1` G\ :sub:`0`] in the second byte and
+[B\ :sub:`7` B\ :sub:`6` B\ :sub:`5` B\ :sub:`4` B\ :sub:`3` B\ :sub:`2`
+B\ :sub:`1` B\ :sub:`0`] in the third byte. This differs from the DRM format
+nomenclature that instead use the order of components as seen in a 24- or
+32-bit little endian word.
+
+.. raw:: latex
+
+    \begingroup
+    \tiny
+    \setlength{\tabcolsep}{2pt}
+
+.. tabularcolumns:: |p{2.8cm}|p{2.0cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|
+
+
+.. flat-table:: RGB Formats With 8 Bits Per Component
+    :header-rows:  2
+    :stub-columns: 0
+
+    * - Identifier
+      - Code
+      - :cspan:`7` Byte 0 in memory
+      - :cspan:`7` Byte 1
+      - :cspan:`7` Byte 2
+      - :cspan:`7` Byte 3
+    * -
+      -
+      - 7
+      - 6
+      - 5
+      - 4
+      - 3
+      - 2
+      - 1
+      - 0
+
+      - 7
+      - 6
+      - 5
+      - 4
+      - 3
+      - 2
+      - 1
+      - 0
+
+      - 7
+      - 6
+      - 5
+      - 4
+      - 3
+      - 2
+      - 1
+      - 0
+
+      - 7
+      - 6
+      - 5
+      - 4
+      - 3
+      - 2
+      - 1
+      - 0
     * .. _V4L2-PIX-FMT-BGR24:
 
       - ``V4L2_PIX_FMT_BGR24``
@@ -608,46 +773,6 @@ next to each other in memory.
       - b\ :sub:`1`
       - b\ :sub:`0`
       -
-    * .. _V4L2-PIX-FMT-BGR666:
-
-      - ``V4L2_PIX_FMT_BGR666``
-      - 'BGRH'
-
-      - b\ :sub:`5`
-      - b\ :sub:`4`
-      - b\ :sub:`3`
-      - b\ :sub:`2`
-      - b\ :sub:`1`
-      - b\ :sub:`0`
-      - g\ :sub:`5`
-      - g\ :sub:`4`
-
-      - g\ :sub:`3`
-      - g\ :sub:`2`
-      - g\ :sub:`1`
-      - g\ :sub:`0`
-      - r\ :sub:`5`
-      - r\ :sub:`4`
-      - r\ :sub:`3`
-      - r\ :sub:`2`
-
-      - r\ :sub:`1`
-      - r\ :sub:`0`
-      - `-`
-      - `-`
-      - `-`
-      - `-`
-      - `-`
-      - `-`
-
-      - `-`
-      - `-`
-      - `-`
-      - `-`
-      - `-`
-      - `-`
-      - `-`
-      - `-`
     * .. _V4L2-PIX-FMT-ABGR32:
 
       - ``V4L2_PIX_FMT_ABGR32``
@@ -973,40 +1098,14 @@ next to each other in memory.
 
     \endgroup
 
-.. note:: Bit 7 is the most significant bit.
-
-The usage and value of the alpha bits (a) in the ARGB and ABGR formats
-(collectively referred to as alpha formats) depend on the device type
-and hardware operation. :ref:`Capture <capture>` devices (including
-capture queues of mem-to-mem devices) fill the alpha component in
-memory. When the device outputs an alpha channel the alpha component
-will have a meaningful value. Otherwise, when the device doesn't output
-an alpha channel but can set the alpha bit to a user-configurable value,
-the :ref:`V4L2_CID_ALPHA_COMPONENT <v4l2-alpha-component>` control
-is used to specify that alpha value, and the alpha component of all
-pixels will be set to the value specified by that control. Otherwise a
-corresponding format without an alpha component (XRGB or XBGR) must be
-used instead of an alpha format.
-
-:ref:`Output <output>` devices (including output queues of mem-to-mem
-devices and :ref:`video output overlay <osd>` devices) read the alpha
-component from memory. When the device processes the alpha channel the
-alpha component must be filled with meaningful values by applications.
-Otherwise a corresponding format without an alpha component (XRGB or
-XBGR) must be used instead of an alpha format.
-
-The XRGB and XBGR formats contain undefined bits (-). Applications,
-devices and drivers must ignore those bits, for both
-:ref:`capture` and :ref:`output` devices.
-
 
 Deprecated RGB Formats
 ======================
 
-Formats defined in :ref:`pixfmt-rgb-deprecated` are deprecated and
-must not be used by new drivers. They are documented here for reference.
-The meaning of their alpha bits ``(a)`` are ill-defined and interpreted as in
-either the corresponding ARGB or XRGB format, depending on the driver.
+Formats defined in :ref:`pixfmt-rgb-deprecated` are deprecated and must not be
+used by new drivers. They are documented here for reference. The meaning of
+their alpha bits ``(a)`` is ill-defined and they are interpreted as in either
+the corresponding ARGB or XRGB format, depending on the driver.
 
 .. raw:: latex
 
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index be07bb01d936..79dbde3bcf8d 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -515,7 +515,7 @@ struct v4l2_pix_format {
 
 /*      Pixel format         FOURCC                          depth  Description  */
 
-/* RGB formats */
+/* RGB formats (1 or 2 bytes per pixel) */
 #define V4L2_PIX_FMT_RGB332  v4l2_fourcc('R', 'G', 'B', '1') /*  8  RGB-3-3-2     */
 #define V4L2_PIX_FMT_RGB444  v4l2_fourcc('R', '4', '4', '4') /* 16  xxxxrrrr ggggbbbb */
 #define V4L2_PIX_FMT_ARGB444 v4l2_fourcc('A', 'R', '1', '2') /* 16  aaaarrrr ggggbbbb */
@@ -540,6 +540,8 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_ARGB555X v4l2_fourcc_be('A', 'R', '1', '5') /* 16  ARGB-5-5-5 BE */
 #define V4L2_PIX_FMT_XRGB555X v4l2_fourcc_be('X', 'R', '1', '5') /* 16  XRGB-5-5-5 BE */
 #define V4L2_PIX_FMT_RGB565X v4l2_fourcc('R', 'G', 'B', 'R') /* 16  RGB-5-6-5 BE  */
+
+/* RGB formats (3 or 4 bytes per pixel) */
 #define V4L2_PIX_FMT_BGR666  v4l2_fourcc('B', 'G', 'R', 'H') /* 18  BGR-6-6-6	  */
 #define V4L2_PIX_FMT_BGR24   v4l2_fourcc('B', 'G', 'R', '3') /* 24  BGR-8-8-8     */
 #define V4L2_PIX_FMT_RGB24   v4l2_fourcc('R', 'G', 'B', '3') /* 24  RGB-8-8-8     */
-- 
cgit v1.2.3


From 53ef4999f07d9c75cdc8effb0cc8c581dc39b1a1 Mon Sep 17 00:00:00 2001
From: Weihang Li <liweihang@huawei.com>
Date: Wed, 2 Dec 2020 09:29:20 +0800
Subject: RDMA/hns: Move capability flags of QP and CQ to hns-abi.h

These flags will be returned to the userspace through ABI, so they should
be defined in hns-abi.h. Furthermore, there is no need to include
hns-abi.h in every source files, it just needs to be included in the
common header file.

Link: https://lore.kernel.org/r/1606872560-17823-1-git-send-email-liweihang@huawei.com
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/hns/hns_roce_cq.c     |  1 -
 drivers/infiniband/hw/hns/hns_roce_device.h | 11 +----------
 drivers/infiniband/hw/hns/hns_roce_main.c   |  1 -
 drivers/infiniband/hw/hns/hns_roce_pd.c     |  1 -
 drivers/infiniband/hw/hns/hns_roce_qp.c     |  1 -
 drivers/infiniband/hw/hns/hns_roce_srq.c    |  1 -
 include/uapi/rdma/hns-abi.h                 | 10 ++++++++++
 7 files changed, 11 insertions(+), 15 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 68f355fba425..5e6d68830fa5 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -36,7 +36,6 @@
 #include "hns_roce_device.h"
 #include "hns_roce_cmd.h"
 #include "hns_roce_hem.h"
-#include <rdma/hns-abi.h>
 #include "hns_roce_common.h"
 
 static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 70ae37bad77e..60b8349cd2f8 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -34,6 +34,7 @@
 #define _HNS_ROCE_DEVICE_H
 
 #include <rdma/ib_verbs.h>
+#include <rdma/hns-abi.h>
 
 #define DRV_NAME "hns_roce"
 
@@ -131,16 +132,6 @@ enum {
 	SERV_TYPE_UD,
 };
 
-enum hns_roce_qp_caps {
-	HNS_ROCE_QP_CAP_RQ_RECORD_DB = BIT(0),
-	HNS_ROCE_QP_CAP_SQ_RECORD_DB = BIT(1),
-	HNS_ROCE_QP_CAP_OWNER_DB = BIT(2),
-};
-
-enum hns_roce_cq_flags {
-	HNS_ROCE_CQ_FLAG_RECORD_DB = BIT(0),
-};
-
 enum hns_roce_qp_state {
 	HNS_ROCE_QP_STATE_RST,
 	HNS_ROCE_QP_STATE_INIT,
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index f01590d8c3cf..e8aa8075ffcd 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -40,7 +40,6 @@
 #include <rdma/ib_cache.h>
 #include "hns_roce_common.h"
 #include "hns_roce_device.h"
-#include <rdma/hns-abi.h>
 #include "hns_roce_hem.h"
 
 /**
diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c
index 98f69496adb4..45ec91db1553 100644
--- a/drivers/infiniband/hw/hns/hns_roce_pd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_pd.c
@@ -32,7 +32,6 @@
 
 #include <linux/platform_device.h>
 #include <linux/pci.h>
-#include <uapi/rdma/hns-abi.h>
 #include "hns_roce_device.h"
 
 static int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, unsigned long *pdn)
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 34aa086060d3..121d3b4c2edb 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -39,7 +39,6 @@
 #include "hns_roce_common.h"
 #include "hns_roce_device.h"
 #include "hns_roce_hem.h"
-#include <rdma/hns-abi.h>
 
 static void flush_work_handle(struct work_struct *work)
 {
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index 27646b9e35df..36c6bcb85269 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -4,7 +4,6 @@
  */
 
 #include <rdma/ib_umem.h>
-#include <rdma/hns-abi.h>
 #include "hns_roce_device.h"
 #include "hns_roce_cmd.h"
 #include "hns_roce_hem.h"
diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h
index 9ec85f76e9ac..90b739d05adf 100644
--- a/include/uapi/rdma/hns-abi.h
+++ b/include/uapi/rdma/hns-abi.h
@@ -43,6 +43,10 @@ struct hns_roce_ib_create_cq {
 	__u32 reserved;
 };
 
+enum hns_roce_cq_cap_flags {
+	HNS_ROCE_CQ_FLAG_RECORD_DB = 1 << 0,
+};
+
 struct hns_roce_ib_create_cq_resp {
 	__aligned_u64 cqn; /* Only 32 bits used, 64 for compat */
 	__aligned_u64 cap_flags;
@@ -69,6 +73,12 @@ struct hns_roce_ib_create_qp {
 	__aligned_u64 sdb_addr;
 };
 
+enum hns_roce_qp_cap_flags {
+	HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0,
+	HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1,
+	HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2,
+};
+
 struct hns_roce_ib_create_qp_resp {
 	__aligned_u64 cap_flags;
 };
-- 
cgit v1.2.3


From 7b3d5a90cbb9bc6a48c82fd7c146d24d6fceb0fa Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota@wdc.com>
Date: Tue, 10 Nov 2020 20:26:06 +0900
Subject: btrfs: introduce ZONED feature flag

This patch introduces the ZONED incompat flag. The flag indicates that
the volume management will satisfy the constraints imposed by
host-managed zoned block devices (aligned chunk allocation, append-only
updates, reset zone after filled).

As the zoned support will happen incrementally due to enhancing some
core infrastructure like super block writes, tree-log, raid support, the
feature will appear in sysfs only on debug builds. It will be enabled
once the support is feature complete and applications can reliably check
whether zoned support is present or not.

Reviewed-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/sysfs.c           | 7 +++++++
 include/uapi/linux/btrfs.h | 1 +
 2 files changed, 8 insertions(+)

(limited to 'include/uapi')

diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 293338153c20..4522a1c4cd08 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -263,6 +263,10 @@ BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
 BTRFS_FEAT_ATTR_INCOMPAT(metadata_uuid, METADATA_UUID);
 BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
 BTRFS_FEAT_ATTR_INCOMPAT(raid1c34, RAID1C34);
+/* Remove once support for zoned allocation is feature complete */
+#ifdef CONFIG_BTRFS_DEBUG
+BTRFS_FEAT_ATTR_INCOMPAT(zoned, ZONED);
+#endif
 
 static struct attribute *btrfs_supported_feature_attrs[] = {
 	BTRFS_FEAT_ATTR_PTR(mixed_backref),
@@ -278,6 +282,9 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
 	BTRFS_FEAT_ATTR_PTR(metadata_uuid),
 	BTRFS_FEAT_ATTR_PTR(free_space_tree),
 	BTRFS_FEAT_ATTR_PTR(raid1c34),
+#ifdef CONFIG_BTRFS_DEBUG
+	BTRFS_FEAT_ATTR_PTR(zoned),
+#endif
 	NULL
 };
 
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 2c39d15a2beb..5df73001aad4 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -307,6 +307,7 @@ struct btrfs_ioctl_fs_info_args {
 #define BTRFS_FEATURE_INCOMPAT_NO_HOLES		(1ULL << 9)
 #define BTRFS_FEATURE_INCOMPAT_METADATA_UUID	(1ULL << 10)
 #define BTRFS_FEATURE_INCOMPAT_RAID1C34		(1ULL << 11)
+#define BTRFS_FEATURE_INCOMPAT_ZONED		(1ULL << 12)
 
 struct btrfs_ioctl_feature_flags {
 	__u64 compat_flags;
-- 
cgit v1.2.3


From 0f966cba95c78029f491b433ea95ff38f414a761 Mon Sep 17 00:00:00 2001
From: Todd Kjos <tkjos@google.com>
Date: Fri, 20 Nov 2020 15:37:43 -0800
Subject: binder: add flag to clear buffer on txn complete

Add a per-transaction flag to indicate that the buffer
must be cleared when the transaction is complete to
prevent copies of sensitive data from being preserved
in memory.

Signed-off-by: Todd Kjos <tkjos@google.com>
Link: https://lore.kernel.org/r/20201120233743.3617529-1-tkjos@google.com
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/android/binder.c            |  1 +
 drivers/android/binder_alloc.c      | 48 +++++++++++++++++++++++++++++++++++++
 drivers/android/binder_alloc.h      |  4 +++-
 include/uapi/linux/android/binder.h |  1 +
 4 files changed, 53 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 20b08f52e788..1338209f9f86 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2756,6 +2756,7 @@ static void binder_transaction(struct binder_proc *proc,
 	t->buffer->debug_id = t->debug_id;
 	t->buffer->transaction = t;
 	t->buffer->target_node = target_node;
+	t->buffer->clear_on_free = !!(t->flags & TF_CLEAR_BUF);
 	trace_binder_transaction_alloc_buf(t->buffer);
 
 	if (binder_alloc_copy_user_to_buffer(
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index 2f846b7ae8b8..7caf74ad2405 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -696,6 +696,8 @@ static void binder_free_buf_locked(struct binder_alloc *alloc,
 	binder_insert_free_buffer(alloc, buffer);
 }
 
+static void binder_alloc_clear_buf(struct binder_alloc *alloc,
+				   struct binder_buffer *buffer);
 /**
  * binder_alloc_free_buf() - free a binder buffer
  * @alloc:	binder_alloc for this proc
@@ -706,6 +708,18 @@ static void binder_free_buf_locked(struct binder_alloc *alloc,
 void binder_alloc_free_buf(struct binder_alloc *alloc,
 			    struct binder_buffer *buffer)
 {
+	/*
+	 * We could eliminate the call to binder_alloc_clear_buf()
+	 * from binder_alloc_deferred_release() by moving this to
+	 * binder_alloc_free_buf_locked(). However, that could
+	 * increase contention for the alloc mutex if clear_on_free
+	 * is used frequently for large buffers. The mutex is not
+	 * needed for correctness here.
+	 */
+	if (buffer->clear_on_free) {
+		binder_alloc_clear_buf(alloc, buffer);
+		buffer->clear_on_free = false;
+	}
 	mutex_lock(&alloc->mutex);
 	binder_free_buf_locked(alloc, buffer);
 	mutex_unlock(&alloc->mutex);
@@ -802,6 +816,10 @@ void binder_alloc_deferred_release(struct binder_alloc *alloc)
 		/* Transaction should already have been freed */
 		BUG_ON(buffer->transaction);
 
+		if (buffer->clear_on_free) {
+			binder_alloc_clear_buf(alloc, buffer);
+			buffer->clear_on_free = false;
+		}
 		binder_free_buf_locked(alloc, buffer);
 		buffers++;
 	}
@@ -1135,6 +1153,36 @@ static struct page *binder_alloc_get_page(struct binder_alloc *alloc,
 	return lru_page->page_ptr;
 }
 
+/**
+ * binder_alloc_clear_buf() - zero out buffer
+ * @alloc: binder_alloc for this proc
+ * @buffer: binder buffer to be cleared
+ *
+ * memset the given buffer to 0
+ */
+static void binder_alloc_clear_buf(struct binder_alloc *alloc,
+				   struct binder_buffer *buffer)
+{
+	size_t bytes = binder_alloc_buffer_size(alloc, buffer);
+	binder_size_t buffer_offset = 0;
+
+	while (bytes) {
+		unsigned long size;
+		struct page *page;
+		pgoff_t pgoff;
+		void *kptr;
+
+		page = binder_alloc_get_page(alloc, buffer,
+					     buffer_offset, &pgoff);
+		size = min_t(size_t, bytes, PAGE_SIZE - pgoff);
+		kptr = kmap(page) + pgoff;
+		memset(kptr, 0, size);
+		kunmap(page);
+		bytes -= size;
+		buffer_offset += size;
+	}
+}
+
 /**
  * binder_alloc_copy_user_to_buffer() - copy src user to tgt user
  * @alloc: binder_alloc for this proc
diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h
index 55d8b4106766..6e8e001381af 100644
--- a/drivers/android/binder_alloc.h
+++ b/drivers/android/binder_alloc.h
@@ -23,6 +23,7 @@ struct binder_transaction;
  * @entry:              entry alloc->buffers
  * @rb_node:            node for allocated_buffers/free_buffers rb trees
  * @free:               %true if buffer is free
+ * @clear_on_free:      %true if buffer must be zeroed after use
  * @allow_user_free:    %true if user is allowed to free buffer
  * @async_transaction:  %true if buffer is in use for an async txn
  * @debug_id:           unique ID for debugging
@@ -41,9 +42,10 @@ struct binder_buffer {
 	struct rb_node rb_node; /* free entry by size or allocated entry */
 				/* by address */
 	unsigned free:1;
+	unsigned clear_on_free:1;
 	unsigned allow_user_free:1;
 	unsigned async_transaction:1;
-	unsigned debug_id:29;
+	unsigned debug_id:28;
 
 	struct binder_transaction *transaction;
 
diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h
index f1ce2c4c077e..ec84ad106568 100644
--- a/include/uapi/linux/android/binder.h
+++ b/include/uapi/linux/android/binder.h
@@ -248,6 +248,7 @@ enum transaction_flags {
 	TF_ROOT_OBJECT	= 0x04,	/* contents are the component's root object */
 	TF_STATUS_CODE	= 0x08,	/* contents are a 32-bit status code */
 	TF_ACCEPT_FDS	= 0x10,	/* allow replies with file descriptors */
+	TF_CLEAR_BUF	= 0x20,	/* clear buffer on txn complete */
 };
 
 struct binder_transaction_data {
-- 
cgit v1.2.3


From deb678955360ea87605b8aea1f69c45bddc3f867 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Wed, 2 Dec 2020 14:48:01 +0800
Subject: btrfs: calculate inline extent buffer page size based on page size

Btrfs only support 64K as maximum node size, thus for 4K page system, we
would have at most 16 pages for one extent buffer.

For a system using 64K page size, we would really have just one page.

While we always use 16 pages for extent_buffer::pages, this means for
systems using 64K pages, we are wasting memory for 15 page pointers
which will never be used.

Calculate the array size based on page size and the node size maximum.

- for systems using 4K page size, it will stay 16 pages
- for systems using 64K page size, it will be 1 page

Move the definition of BTRFS_MAX_METADATA_BLOCKSIZE to btrfs_tree.h, to
avoid circular inclusion of ctree.h.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h                | 6 ------
 fs/btrfs/extent_io.c            | 7 +------
 fs/btrfs/extent_io.h            | 4 ++--
 include/uapi/linux/btrfs_tree.h | 3 ++-
 4 files changed, 5 insertions(+), 15 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 112c9a2ae47b..c5ef29078954 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -67,12 +67,6 @@ struct btrfs_ref;
 
 #define BTRFS_OLDEST_GENERATION	0ULL
 
-/*
- * the max metadata block size.  This limit is somewhat artificial,
- * but the memmove costs go through the roof for larger blocks.
- */
-#define BTRFS_MAX_METADATA_BLOCKSIZE 65536
-
 /*
  * we can actually store much bigger names, but lets not confuse the rest
  * of linux
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e70d6944d075..86b2a483c1ab 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5053,12 +5053,7 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
 	atomic_set(&eb->refs, 1);
 	atomic_set(&eb->io_pages, 0);
 
-	/*
-	 * Sanity checks, currently the maximum is 64k covered by 16x 4k pages
-	 */
-	BUILD_BUG_ON(BTRFS_MAX_METADATA_BLOCKSIZE
-		> MAX_INLINE_EXTENT_BUFFER_SIZE);
-	BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE);
+	ASSERT(len <= BTRFS_MAX_METADATA_BLOCKSIZE);
 
 	return eb;
 }
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 16f2ce5cd8ed..77f2211550e3 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -6,6 +6,7 @@
 #include <linux/rbtree.h>
 #include <linux/refcount.h>
 #include <linux/fiemap.h>
+#include <linux/btrfs_tree.h>
 #include "ulist.h"
 
 /*
@@ -74,8 +75,7 @@ typedef blk_status_t (submit_bio_hook_t)(struct inode *inode, struct bio *bio,
 typedef blk_status_t (extent_submit_bio_start_t)(struct inode *inode,
 		struct bio *bio, u64 dio_file_offset);
 
-#define INLINE_EXTENT_BUFFER_PAGES 16
-#define MAX_INLINE_EXTENT_BUFFER_SIZE (INLINE_EXTENT_BUFFER_PAGES * PAGE_SIZE)
+#define INLINE_EXTENT_BUFFER_PAGES     (BTRFS_MAX_METADATA_BLOCKSIZE / PAGE_SIZE)
 struct extent_buffer {
 	u64 start;
 	unsigned long len;
diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index 6b885982ece6..58d7cff9afb1 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -299,7 +299,8 @@
  */
 #define BTRFS_STRING_ITEM_KEY	253
 
-
+/* Maximum metadata block size (nodesize) */
+#define BTRFS_MAX_METADATA_BLOCKSIZE			65536
 
 /* 32 bytes in various csum fields */
 #define BTRFS_CSUM_SIZE 32
-- 
cgit v1.2.3


From 28cea78af44918b920306df150afbd116bd94301 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 14 Sep 2020 10:51:17 -0600
Subject: io_uring: allow non-fixed files with SQPOLL

The restriction of needing fixed files for SQPOLL is problematic, and
prevents/inhibits several valid uses cases. With the referenced
files_struct that we have now, it's trivially supportable.

Treat ->files like we do the mm for the SQPOLL thread - grab a reference
to it (and assign it), and drop it when we're done.

This feature is exposed as IORING_FEAT_SQPOLL_NONFIXED.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                 | 87 ++++++++++++++++++++++++++++++++++---------
 include/uapi/linux/io_uring.h |  1 +
 2 files changed, 70 insertions(+), 18 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/io_uring.c b/fs/io_uring.c
index d17198733f6a..c1f3980945e4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -999,8 +999,9 @@ static inline void io_clean_op(struct io_kiocb *req)
 		__io_clean_op(req);
 }
 
-static void io_sq_thread_drop_mm(void)
+static void io_sq_thread_drop_mm_files(void)
 {
+	struct files_struct *files = current->files;
 	struct mm_struct *mm = current->mm;
 
 	if (mm) {
@@ -1008,6 +1009,40 @@ static void io_sq_thread_drop_mm(void)
 		mmput(mm);
 		current->mm = NULL;
 	}
+	if (files) {
+		struct nsproxy *nsproxy = current->nsproxy;
+
+		task_lock(current);
+		current->files = NULL;
+		current->nsproxy = NULL;
+		task_unlock(current);
+		put_files_struct(files);
+		put_nsproxy(nsproxy);
+	}
+}
+
+static void __io_sq_thread_acquire_files(struct io_ring_ctx *ctx)
+{
+	if (!current->files) {
+		struct files_struct *files;
+		struct nsproxy *nsproxy;
+
+		task_lock(ctx->sqo_task);
+		files = ctx->sqo_task->files;
+		if (!files) {
+			task_unlock(ctx->sqo_task);
+			return;
+		}
+		atomic_inc(&files->count);
+		get_nsproxy(ctx->sqo_task->nsproxy);
+		nsproxy = ctx->sqo_task->nsproxy;
+		task_unlock(ctx->sqo_task);
+
+		task_lock(current);
+		current->files = files;
+		current->nsproxy = nsproxy;
+		task_unlock(current);
+	}
 }
 
 static int __io_sq_thread_acquire_mm(struct io_ring_ctx *ctx)
@@ -1035,12 +1070,21 @@ static int __io_sq_thread_acquire_mm(struct io_ring_ctx *ctx)
 	return -EFAULT;
 }
 
-static int io_sq_thread_acquire_mm(struct io_ring_ctx *ctx,
-				   struct io_kiocb *req)
+static int io_sq_thread_acquire_mm_files(struct io_ring_ctx *ctx,
+					 struct io_kiocb *req)
 {
-	if (!(io_op_defs[req->opcode].work_flags & IO_WQ_WORK_MM))
-		return 0;
-	return __io_sq_thread_acquire_mm(ctx);
+	const struct io_op_def *def = &io_op_defs[req->opcode];
+
+	if (def->work_flags & IO_WQ_WORK_MM) {
+		int ret = __io_sq_thread_acquire_mm(ctx);
+		if (unlikely(ret))
+			return ret;
+	}
+
+	if (def->needs_file || (def->work_flags & IO_WQ_WORK_FILES))
+		__io_sq_thread_acquire_files(ctx);
+
+	return 0;
 }
 
 static void io_sq_thread_associate_blkcg(struct io_ring_ctx *ctx,
@@ -2061,6 +2105,7 @@ static void __io_req_task_submit(struct io_kiocb *req)
 	struct io_ring_ctx *ctx = req->ctx;
 
 	if (!__io_sq_thread_acquire_mm(ctx)) {
+		__io_sq_thread_acquire_files(ctx);
 		mutex_lock(&ctx->uring_lock);
 		__io_queue_sqe(req, NULL);
 		mutex_unlock(&ctx->uring_lock);
@@ -2603,7 +2648,7 @@ static bool io_rw_reissue(struct io_kiocb *req, long res)
 	if ((res != -EAGAIN && res != -EOPNOTSUPP) || io_wq_current_is_worker())
 		return false;
 
-	ret = io_sq_thread_acquire_mm(req->ctx, req);
+	ret = io_sq_thread_acquire_mm_files(req->ctx, req);
 
 	if (io_resubmit_prep(req, ret)) {
 		refcount_inc(&req->refs);
@@ -6168,13 +6213,7 @@ static struct file *io_file_get(struct io_submit_state *state,
 static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
 			   int fd)
 {
-	bool fixed;
-
-	fixed = (req->flags & REQ_F_FIXED_FILE) != 0;
-	if (unlikely(!fixed && io_async_submit(req->ctx)))
-		return -EBADF;
-
-	req->file = io_file_get(state, req, fd, fixed);
+	req->file = io_file_get(state, req, fd, req->flags & REQ_F_FIXED_FILE);
 	if (req->file || io_op_defs[req->opcode].needs_file_no_error)
 		return 0;
 	return -EBADF;
@@ -6551,7 +6590,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	if (unlikely(req->opcode >= IORING_OP_LAST))
 		return -EINVAL;
 
-	if (unlikely(io_sq_thread_acquire_mm(ctx, req)))
+	if (unlikely(io_sq_thread_acquire_mm_files(ctx, req)))
 		return -EFAULT;
 
 	sqe_flags = READ_ONCE(sqe->flags);
@@ -6739,7 +6778,7 @@ again:
 		 * adding ourselves to the waitqueue, as the unuse/drop
 		 * may sleep.
 		 */
-		io_sq_thread_drop_mm();
+		io_sq_thread_drop_mm_files();
 
 		/*
 		 * We're polling. If we're within the defined idle
@@ -6808,11 +6847,18 @@ static void io_sqd_init_new(struct io_sq_data *sqd)
 static int io_sq_thread(void *data)
 {
 	struct cgroup_subsys_state *cur_css = NULL;
+	struct files_struct *old_files = current->files;
+	struct nsproxy *old_nsproxy = current->nsproxy;
 	const struct cred *old_cred = NULL;
 	struct io_sq_data *sqd = data;
 	struct io_ring_ctx *ctx;
 	unsigned long start_jiffies;
 
+	task_lock(current);
+	current->files = NULL;
+	current->nsproxy = NULL;
+	task_unlock(current);
+
 	start_jiffies = jiffies;
 	while (!kthread_should_stop()) {
 		enum sq_ret ret = 0;
@@ -6845,7 +6891,7 @@ static int io_sq_thread(void *data)
 
 			ret |= __io_sq_thread(ctx, start_jiffies, cap_entries);
 
-			io_sq_thread_drop_mm();
+			io_sq_thread_drop_mm_files();
 		}
 
 		if (ret & SQT_SPIN) {
@@ -6870,6 +6916,11 @@ static int io_sq_thread(void *data)
 	if (old_cred)
 		revert_creds(old_cred);
 
+	task_lock(current);
+	current->files = old_files;
+	current->nsproxy = old_nsproxy;
+	task_unlock(current);
+
 	kthread_parkme();
 
 	return 0;
@@ -9415,7 +9466,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
 	p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
 			IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
 			IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
-			IORING_FEAT_POLL_32BITS;
+			IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED;
 
 	if (copy_to_user(params, p, sizeof(*p))) {
 		ret = -EFAULT;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index e943bf07c959..2301c37e86cb 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -254,6 +254,7 @@ struct io_uring_params {
 #define IORING_FEAT_CUR_PERSONALITY	(1U << 4)
 #define IORING_FEAT_FAST_POLL		(1U << 5)
 #define IORING_FEAT_POLL_32BITS 	(1U << 6)
+#define IORING_FEAT_SQPOLL_NONFIXED	(1U << 7)
 
 /*
  * io_uring_register(2) opcodes and arguments
-- 
cgit v1.2.3


From 80a261fd00327898e272ddc84ccc9510c036453c Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 28 Sep 2020 14:23:58 -0600
Subject: io_uring: add support for IORING_OP_RENAMEAT

IORING_OP_RENAMEAT behaves like renameat2(), and takes the same flags
etc.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                 | 70 +++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/io_uring.h |  2 ++
 2 files changed, 72 insertions(+)

(limited to 'include/uapi')

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 1824bd4329ee..94a5e1618368 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -546,6 +546,15 @@ struct io_shutdown {
 	int				how;
 };
 
+struct io_rename {
+	struct file			*file;
+	int				old_dfd;
+	int				new_dfd;
+	struct filename			*oldpath;
+	struct filename			*newpath;
+	int				flags;
+};
+
 struct io_completion {
 	struct file			*file;
 	struct list_head		list;
@@ -673,6 +682,7 @@ struct io_kiocb {
 		struct io_provide_buf	pbuf;
 		struct io_statx		statx;
 		struct io_shutdown	shutdown;
+		struct io_rename	rename;
 		/* use only after cleaning per-op data, see io_clean_op() */
 		struct io_completion	compl;
 	};
@@ -943,6 +953,10 @@ static const struct io_op_def io_op_defs[] = {
 	[IORING_OP_SHUTDOWN] = {
 		.needs_file		= 1,
 	},
+	[IORING_OP_RENAMEAT] = {
+		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_FILES |
+						IO_WQ_WORK_FS | IO_WQ_WORK_BLKCG,
+	},
 };
 
 enum io_mem_account {
@@ -3645,6 +3659,53 @@ out_free:
 	return ret;
 }
 
+static int io_renameat_prep(struct io_kiocb *req,
+			    const struct io_uring_sqe *sqe)
+{
+	struct io_rename *ren = &req->rename;
+	const char __user *oldf, *newf;
+
+	if (unlikely(req->flags & REQ_F_FIXED_FILE))
+		return -EBADF;
+
+	ren->old_dfd = READ_ONCE(sqe->fd);
+	oldf = u64_to_user_ptr(READ_ONCE(sqe->addr));
+	newf = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+	ren->new_dfd = READ_ONCE(sqe->len);
+	ren->flags = READ_ONCE(sqe->rename_flags);
+
+	ren->oldpath = getname(oldf);
+	if (IS_ERR(ren->oldpath))
+		return PTR_ERR(ren->oldpath);
+
+	ren->newpath = getname(newf);
+	if (IS_ERR(ren->newpath)) {
+		putname(ren->oldpath);
+		return PTR_ERR(ren->newpath);
+	}
+
+	req->flags |= REQ_F_NEED_CLEANUP;
+	return 0;
+}
+
+static int io_renameat(struct io_kiocb *req, bool force_nonblock)
+{
+	struct io_rename *ren = &req->rename;
+	int ret;
+
+	if (force_nonblock)
+		return -EAGAIN;
+
+	ret = do_renameat2(ren->old_dfd, ren->oldpath, ren->new_dfd,
+				ren->newpath, ren->flags);
+
+	req->flags &= ~REQ_F_NEED_CLEANUP;
+	if (ret < 0)
+		req_set_fail_links(req);
+	io_req_complete(req, ret);
+	return 0;
+}
+
 static int io_shutdown_prep(struct io_kiocb *req,
 			    const struct io_uring_sqe *sqe)
 {
@@ -5869,6 +5930,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		return io_tee_prep(req, sqe);
 	case IORING_OP_SHUTDOWN:
 		return io_shutdown_prep(req, sqe);
+	case IORING_OP_RENAMEAT:
+		return io_renameat_prep(req, sqe);
 	}
 
 	printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@@ -6006,6 +6069,10 @@ static void __io_clean_op(struct io_kiocb *req)
 			if (req->open.filename)
 				putname(req->open.filename);
 			break;
+		case IORING_OP_RENAMEAT:
+			putname(req->rename.oldpath);
+			putname(req->rename.newpath);
+			break;
 		}
 		req->flags &= ~REQ_F_NEED_CLEANUP;
 	}
@@ -6115,6 +6182,9 @@ static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock,
 	case IORING_OP_SHUTDOWN:
 		ret = io_shutdown(req, force_nonblock);
 		break;
+	case IORING_OP_RENAMEAT:
+		ret = io_renameat(req, force_nonblock);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 2301c37e86cb..c9a58bc7e4be 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -42,6 +42,7 @@ struct io_uring_sqe {
 		__u32		statx_flags;
 		__u32		fadvise_advice;
 		__u32		splice_flags;
+		__u32		rename_flags;
 	};
 	__u64	user_data;	/* data to be passed back at completion time */
 	union {
@@ -133,6 +134,7 @@ enum {
 	IORING_OP_REMOVE_BUFFERS,
 	IORING_OP_TEE,
 	IORING_OP_SHUTDOWN,
+	IORING_OP_RENAMEAT,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
-- 
cgit v1.2.3


From 14a1143b68ee2e4ec4e8d54f71cddb9724f9ec70 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 28 Sep 2020 14:27:37 -0600
Subject: io_uring: add support for IORING_OP_UNLINKAT

IORING_OP_UNLINKAT behaves like unlinkat(2) and takes the same flags
and arguments.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                 | 64 +++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/io_uring.h |  2 ++
 2 files changed, 66 insertions(+)

(limited to 'include/uapi')

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 94a5e1618368..c8ecbc0bd286 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -555,6 +555,13 @@ struct io_rename {
 	int				flags;
 };
 
+struct io_unlink {
+	struct file			*file;
+	int				dfd;
+	int				flags;
+	struct filename			*filename;
+};
+
 struct io_completion {
 	struct file			*file;
 	struct list_head		list;
@@ -683,6 +690,7 @@ struct io_kiocb {
 		struct io_statx		statx;
 		struct io_shutdown	shutdown;
 		struct io_rename	rename;
+		struct io_unlink	unlink;
 		/* use only after cleaning per-op data, see io_clean_op() */
 		struct io_completion	compl;
 	};
@@ -957,6 +965,10 @@ static const struct io_op_def io_op_defs[] = {
 		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_FILES |
 						IO_WQ_WORK_FS | IO_WQ_WORK_BLKCG,
 	},
+	[IORING_OP_UNLINKAT] = {
+		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_FILES |
+						IO_WQ_WORK_FS | IO_WQ_WORK_BLKCG,
+	},
 };
 
 enum io_mem_account {
@@ -3706,6 +3718,50 @@ static int io_renameat(struct io_kiocb *req, bool force_nonblock)
 	return 0;
 }
 
+static int io_unlinkat_prep(struct io_kiocb *req,
+			    const struct io_uring_sqe *sqe)
+{
+	struct io_unlink *un = &req->unlink;
+	const char __user *fname;
+
+	if (unlikely(req->flags & REQ_F_FIXED_FILE))
+		return -EBADF;
+
+	un->dfd = READ_ONCE(sqe->fd);
+
+	un->flags = READ_ONCE(sqe->unlink_flags);
+	if (un->flags & ~AT_REMOVEDIR)
+		return -EINVAL;
+
+	fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
+	un->filename = getname(fname);
+	if (IS_ERR(un->filename))
+		return PTR_ERR(un->filename);
+
+	req->flags |= REQ_F_NEED_CLEANUP;
+	return 0;
+}
+
+static int io_unlinkat(struct io_kiocb *req, bool force_nonblock)
+{
+	struct io_unlink *un = &req->unlink;
+	int ret;
+
+	if (force_nonblock)
+		return -EAGAIN;
+
+	if (un->flags & AT_REMOVEDIR)
+		ret = do_rmdir(un->dfd, un->filename);
+	else
+		ret = do_unlinkat(un->dfd, un->filename);
+
+	req->flags &= ~REQ_F_NEED_CLEANUP;
+	if (ret < 0)
+		req_set_fail_links(req);
+	io_req_complete(req, ret);
+	return 0;
+}
+
 static int io_shutdown_prep(struct io_kiocb *req,
 			    const struct io_uring_sqe *sqe)
 {
@@ -5932,6 +5988,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		return io_shutdown_prep(req, sqe);
 	case IORING_OP_RENAMEAT:
 		return io_renameat_prep(req, sqe);
+	case IORING_OP_UNLINKAT:
+		return io_unlinkat_prep(req, sqe);
 	}
 
 	printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@@ -6073,6 +6131,9 @@ static void __io_clean_op(struct io_kiocb *req)
 			putname(req->rename.oldpath);
 			putname(req->rename.newpath);
 			break;
+		case IORING_OP_UNLINKAT:
+			putname(req->unlink.filename);
+			break;
 		}
 		req->flags &= ~REQ_F_NEED_CLEANUP;
 	}
@@ -6185,6 +6246,9 @@ static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock,
 	case IORING_OP_RENAMEAT:
 		ret = io_renameat(req, force_nonblock);
 		break;
+	case IORING_OP_UNLINKAT:
+		ret = io_unlinkat(req, force_nonblock);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index c9a58bc7e4be..557e7eae497f 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -43,6 +43,7 @@ struct io_uring_sqe {
 		__u32		fadvise_advice;
 		__u32		splice_flags;
 		__u32		rename_flags;
+		__u32		unlink_flags;
 	};
 	__u64	user_data;	/* data to be passed back at completion time */
 	union {
@@ -135,6 +136,7 @@ enum {
 	IORING_OP_TEE,
 	IORING_OP_SHUTDOWN,
 	IORING_OP_RENAMEAT,
+	IORING_OP_UNLINKAT,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
-- 
cgit v1.2.3


From c73ebb685fb6dfb513d394cbea64fb81ba3d994f Mon Sep 17 00:00:00 2001
From: Hao Xu <haoxu@linux.alibaba.com>
Date: Tue, 3 Nov 2020 10:54:37 +0800
Subject: io_uring: add timeout support for io_uring_enter()

Now users who want to get woken when waiting for events should submit a
timeout command first. It is not safe for applications that split SQ and
CQ handling between two threads, such as mysql. Users should synchronize
the two threads explicitly to protect SQ and that will impact the
performance.

This patch adds support for timeout to existing io_uring_enter(). To
avoid overloading arguments, it introduces a new parameter structure
which contains sigmask and timeout.

I have tested the workloads with one thread submiting nop requests
while the other reaping the cqe with timeout. It shows 1.8~2x faster
when the iodepth is 16.

Signed-off-by: Jiufei Xue <jiufei.xue@linux.alibaba.com>
Signed-off-by: Hao Xu <haoxu@linux.alibaba.com>
[axboe: various cleanups/fixes, and name change to SIG_IS_DATA]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                 | 69 ++++++++++++++++++++++++++++++++++++++-----
 include/linux/syscalls.h      |  2 +-
 include/uapi/linux/io_uring.h |  9 ++++++
 3 files changed, 72 insertions(+), 8 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 11ce97d6259c..ee25c70527aa 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7118,7 +7118,8 @@ static int io_run_task_work_sig(void)
  * application must reap them itself, as they reside on the shared cq ring.
  */
 static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
-			  const sigset_t __user *sig, size_t sigsz)
+			  const sigset_t __user *sig, size_t sigsz,
+			  struct __kernel_timespec __user *uts)
 {
 	struct io_wait_queue iowq = {
 		.wq = {
@@ -7130,6 +7131,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 		.to_wait	= min_events,
 	};
 	struct io_rings *rings = ctx->rings;
+	struct timespec64 ts;
+	signed long timeout = 0;
 	int ret = 0;
 
 	do {
@@ -7152,6 +7155,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 			return ret;
 	}
 
+	if (uts) {
+		if (get_timespec64(&ts, uts))
+			return -EFAULT;
+		timeout = timespec64_to_jiffies(&ts);
+	}
+
 	iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
 	trace_io_uring_cqring_wait(ctx, min_events);
 	do {
@@ -7165,7 +7174,15 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 			break;
 		if (io_should_wake(&iowq, false))
 			break;
-		schedule();
+		if (uts) {
+			timeout = schedule_timeout(timeout);
+			if (timeout == 0) {
+				ret = -ETIME;
+				break;
+			}
+		} else {
+			schedule();
+		}
 	} while (1);
 	finish_wait(&ctx->wait, &iowq.wq);
 
@@ -9167,9 +9184,39 @@ static void io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
 	finish_wait(&ctx->sqo_sq_wait, &wait);
 }
 
+static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz,
+			  struct __kernel_timespec __user **ts,
+			  const sigset_t __user **sig)
+{
+	struct io_uring_getevents_arg arg;
+
+	/*
+	 * If EXT_ARG isn't set, then we have no timespec and the argp pointer
+	 * is just a pointer to the sigset_t.
+	 */
+	if (!(flags & IORING_ENTER_EXT_ARG)) {
+		*sig = (const sigset_t __user *) argp;
+		*ts = NULL;
+		return 0;
+	}
+
+	/*
+	 * EXT_ARG is set - ensure we agree on the size of it and copy in our
+	 * timespec and sigset_t pointers if good.
+	 */
+	if (*argsz != sizeof(arg))
+		return -EINVAL;
+	if (copy_from_user(&arg, argp, sizeof(arg)))
+		return -EFAULT;
+	*sig = u64_to_user_ptr(arg.sigmask);
+	*argsz = arg.sigmask_sz;
+	*ts = u64_to_user_ptr(arg.ts);
+	return 0;
+}
+
 SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
-		u32, min_complete, u32, flags, const sigset_t __user *, sig,
-		size_t, sigsz)
+		u32, min_complete, u32, flags, const void __user *, argp,
+		size_t, argsz)
 {
 	struct io_ring_ctx *ctx;
 	long ret = -EBADF;
@@ -9179,7 +9226,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 	io_run_task_work();
 
 	if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP |
-			IORING_ENTER_SQ_WAIT))
+			IORING_ENTER_SQ_WAIT | IORING_ENTER_EXT_ARG))
 		return -EINVAL;
 
 	f = fdget(fd);
@@ -9225,6 +9272,13 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 			goto out;
 	}
 	if (flags & IORING_ENTER_GETEVENTS) {
+		const sigset_t __user *sig;
+		struct __kernel_timespec __user *ts;
+
+		ret = io_get_ext_arg(flags, argp, &argsz, &ts, &sig);
+		if (unlikely(ret))
+			goto out;
+
 		min_complete = min(min_complete, ctx->cq_entries);
 
 		/*
@@ -9237,7 +9291,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 		    !(ctx->flags & IORING_SETUP_SQPOLL)) {
 			ret = io_iopoll_check(ctx, min_complete);
 		} else {
-			ret = io_cqring_wait(ctx, min_complete, sig, sigsz);
+			ret = io_cqring_wait(ctx, min_complete, sig, argsz, ts);
 		}
 	}
 
@@ -9600,7 +9654,8 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
 	p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
 			IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
 			IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
-			IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED;
+			IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED |
+			IORING_FEAT_EXT_ARG;
 
 	if (copy_to_user(params, p, sizeof(*p))) {
 		ret = -EFAULT;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 37bea07c12f2..8576e8bf92fe 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -317,7 +317,7 @@ asmlinkage long sys_io_uring_setup(u32 entries,
 				struct io_uring_params __user *p);
 asmlinkage long sys_io_uring_enter(unsigned int fd, u32 to_submit,
 				u32 min_complete, u32 flags,
-				const sigset_t __user *sig, size_t sigsz);
+				const void __user *argp, size_t argsz);
 asmlinkage long sys_io_uring_register(unsigned int fd, unsigned int op,
 				void __user *arg, unsigned int nr_args);
 
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 557e7eae497f..6bb8229de892 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -231,6 +231,7 @@ struct io_cqring_offsets {
 #define IORING_ENTER_GETEVENTS	(1U << 0)
 #define IORING_ENTER_SQ_WAKEUP	(1U << 1)
 #define IORING_ENTER_SQ_WAIT	(1U << 2)
+#define IORING_ENTER_EXT_ARG	(1U << 3)
 
 /*
  * Passed in for io_uring_setup(2). Copied back with updated info on success
@@ -259,6 +260,7 @@ struct io_uring_params {
 #define IORING_FEAT_FAST_POLL		(1U << 5)
 #define IORING_FEAT_POLL_32BITS 	(1U << 6)
 #define IORING_FEAT_SQPOLL_NONFIXED	(1U << 7)
+#define IORING_FEAT_EXT_ARG		(1U << 8)
 
 /*
  * io_uring_register(2) opcodes and arguments
@@ -335,4 +337,11 @@ enum {
 	IORING_RESTRICTION_LAST
 };
 
+struct io_uring_getevents_arg {
+	__u64	sigmask;
+	__u32	sigmask_sz;
+	__u32	pad;
+	__u64	ts;
+};
+
 #endif
-- 
cgit v1.2.3


From 9c8e11b36c9b640a85a4a33a9e9dff418993cc34 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 30 Nov 2020 19:11:16 +0000
Subject: io_uring: add timeout update

Support timeout updates through IORING_OP_TIMEOUT_REMOVE with passed in
IORING_TIMEOUT_UPDATE. Updates doesn't support offset timeout mode.
Oirignal timeout.off will be ignored as well.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
[axboe: remove now unused 'ret' variable]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                 | 54 +++++++++++++++++++++++++++++++++++++++----
 include/uapi/linux/io_uring.h |  1 +
 2 files changed, 51 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 3930b11dcd58..b40083cde733 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -453,6 +453,10 @@ struct io_timeout {
 struct io_timeout_rem {
 	struct file			*file;
 	u64				addr;
+
+	/* timeout update */
+	struct timespec64		ts;
+	u32				flags;
 };
 
 struct io_rw {
@@ -867,7 +871,10 @@ static const struct io_op_def io_op_defs[] = {
 		.async_size		= sizeof(struct io_timeout_data),
 		.work_flags		= IO_WQ_WORK_MM,
 	},
-	[IORING_OP_TIMEOUT_REMOVE] = {},
+	[IORING_OP_TIMEOUT_REMOVE] = {
+		/* used by timeout updates' prep() */
+		.work_flags		= IO_WQ_WORK_MM,
+	},
 	[IORING_OP_ACCEPT] = {
 		.needs_file		= 1,
 		.unbound_nonreg_file	= 1,
@@ -5671,17 +5678,48 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
 	return 0;
 }
 
+static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
+			     struct timespec64 *ts, enum hrtimer_mode mode)
+{
+	struct io_kiocb *req = io_timeout_extract(ctx, user_data);
+	struct io_timeout_data *data;
+
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	req->timeout.off = 0; /* noseq */
+	data = req->async_data;
+	list_add_tail(&req->timeout.list, &ctx->timeout_list);
+	hrtimer_init(&data->timer, CLOCK_MONOTONIC, mode);
+	data->timer.function = io_timeout_fn;
+	hrtimer_start(&data->timer, timespec64_to_ktime(*ts), mode);
+	return 0;
+}
+
 static int io_timeout_remove_prep(struct io_kiocb *req,
 				  const struct io_uring_sqe *sqe)
 {
+	struct io_timeout_rem *tr = &req->timeout_rem;
+
 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
 		return -EINVAL;
 	if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
 		return -EINVAL;
-	if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->timeout_flags)
+	if (sqe->ioprio || sqe->buf_index || sqe->len)
+		return -EINVAL;
+
+	tr->addr = READ_ONCE(sqe->addr);
+	tr->flags = READ_ONCE(sqe->timeout_flags);
+	if (tr->flags & IORING_TIMEOUT_UPDATE) {
+		if (tr->flags & ~(IORING_TIMEOUT_UPDATE|IORING_TIMEOUT_ABS))
+			return -EINVAL;
+		if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2)))
+			return -EFAULT;
+	} else if (tr->flags) {
+		/* timeout removal doesn't support flags */
 		return -EINVAL;
+	}
 
-	req->timeout_rem.addr = READ_ONCE(sqe->addr);
 	return 0;
 }
 
@@ -5690,11 +5728,19 @@ static int io_timeout_remove_prep(struct io_kiocb *req,
  */
 static int io_timeout_remove(struct io_kiocb *req)
 {
+	struct io_timeout_rem *tr = &req->timeout_rem;
 	struct io_ring_ctx *ctx = req->ctx;
 	int ret;
 
 	spin_lock_irq(&ctx->completion_lock);
-	ret = io_timeout_cancel(ctx, req->timeout_rem.addr);
+	if (req->timeout_rem.flags & IORING_TIMEOUT_UPDATE) {
+		enum hrtimer_mode mode = (tr->flags & IORING_TIMEOUT_ABS)
+					? HRTIMER_MODE_ABS : HRTIMER_MODE_REL;
+
+		ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode);
+	} else {
+		ret = io_timeout_cancel(ctx, tr->addr);
+	}
 
 	io_cqring_fill_event(req, ret);
 	io_commit_cqring(ctx);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 6bb8229de892..d31a2a1e8ef9 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -151,6 +151,7 @@ enum {
  * sqe->timeout_flags
  */
 #define IORING_TIMEOUT_ABS	(1U << 0)
+#define IORING_TIMEOUT_UPDATE	(1U << 1)
 
 /*
  * sqe->splice_flags
-- 
cgit v1.2.3


From 921ca574cd382142add8b12d0a7117f495510de5 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Sun, 6 Dec 2020 15:47:31 +0100
Subject: can: isotp: add SF_BROADCAST support for functional addressing

When CAN_ISOTP_SF_BROADCAST is set in the CAN_ISOTP_OPTS flags the CAN_ISOTP
socket is switched into functional addressing mode, where only single frame
(SF) protocol data units can be send on the specified CAN interface and the
given tp.tx_id after bind().

In opposite to normal and extended addressing this socket does not register a
CAN-ID for reception which would be needed for a 1-to-1 ISOTP connection with a
segmented bi-directional data transfer.

Sending SFs on this socket is therefore a TX-only 'broadcast' operation.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Thomas Wagner <thwa1@web.de>
Link: https://lore.kernel.org/r/20201206144731.4609-1-socketcan@hartkopp.net
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can/isotp.h |  2 +-
 net/can/isotp.c                | 42 +++++++++++++++++++++++++++++-------------
 2 files changed, 30 insertions(+), 14 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h
index 7793b26aa154..c55935b64ccc 100644
--- a/include/uapi/linux/can/isotp.h
+++ b/include/uapi/linux/can/isotp.h
@@ -135,7 +135,7 @@ struct can_isotp_ll_options {
 #define CAN_ISOTP_FORCE_RXSTMIN	0x100	/* ignore CFs depending on rx stmin */
 #define CAN_ISOTP_RX_EXT_ADDR	0x200	/* different rx extended addressing */
 #define CAN_ISOTP_WAIT_TX_DONE	0x400	/* wait for tx completion */
-
+#define CAN_ISOTP_SF_BROADCAST	0x800	/* 1-to-N functional addressing */
 
 /* default values */
 
diff --git a/net/can/isotp.c b/net/can/isotp.c
index d78ab13bd8be..09f781b63d66 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -865,6 +865,14 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 	if (!size || size > MAX_MSG_LENGTH)
 		return -EINVAL;
 
+	/* take care of a potential SF_DL ESC offset for TX_DL > 8 */
+	off = (so->tx.ll_dl > CAN_MAX_DLEN) ? 1 : 0;
+
+	/* does the given data fit into a single frame for SF_BROADCAST? */
+	if ((so->opt.flags & CAN_ISOTP_SF_BROADCAST) &&
+	    (size > so->tx.ll_dl - SF_PCI_SZ4 - ae - off))
+		return -EINVAL;
+
 	err = memcpy_from_msg(so->tx.buf, msg, size);
 	if (err < 0)
 		return err;
@@ -891,9 +899,6 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 	cf = (struct canfd_frame *)skb->data;
 	skb_put(skb, so->ll.mtu);
 
-	/* take care of a potential SF_DL ESC offset for TX_DL > 8 */
-	off = (so->tx.ll_dl > CAN_MAX_DLEN) ? 1 : 0;
-
 	/* check for single frame transmission depending on TX_DL */
 	if (size <= so->tx.ll_dl - SF_PCI_SZ4 - ae - off) {
 		/* The message size generally fits into a SingleFrame - good.
@@ -1016,7 +1021,7 @@ static int isotp_release(struct socket *sock)
 	hrtimer_cancel(&so->rxtimer);
 
 	/* remove current filters & unregister */
-	if (so->bound) {
+	if (so->bound && (!(so->opt.flags & CAN_ISOTP_SF_BROADCAST))) {
 		if (so->ifindex) {
 			struct net_device *dev;
 
@@ -1052,15 +1057,25 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
 	struct net_device *dev;
 	int err = 0;
 	int notify_enetdown = 0;
+	int do_rx_reg = 1;
 
 	if (len < CAN_REQUIRED_SIZE(struct sockaddr_can, can_addr.tp))
 		return -EINVAL;
 
-	if (addr->can_addr.tp.rx_id == addr->can_addr.tp.tx_id)
-		return -EADDRNOTAVAIL;
+	/* do not register frame reception for functional addressing */
+	if (so->opt.flags & CAN_ISOTP_SF_BROADCAST)
+		do_rx_reg = 0;
+
+	/* do not validate rx address for functional addressing */
+	if (do_rx_reg) {
+		if (addr->can_addr.tp.rx_id == addr->can_addr.tp.tx_id)
+			return -EADDRNOTAVAIL;
+
+		if (addr->can_addr.tp.rx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG))
+			return -EADDRNOTAVAIL;
+	}
 
-	if ((addr->can_addr.tp.rx_id | addr->can_addr.tp.tx_id) &
-	    (CAN_ERR_FLAG | CAN_RTR_FLAG))
+	if (addr->can_addr.tp.tx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG))
 		return -EADDRNOTAVAIL;
 
 	if (!addr->can_ifindex)
@@ -1093,13 +1108,14 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
 
 	ifindex = dev->ifindex;
 
-	can_rx_register(net, dev, addr->can_addr.tp.rx_id,
-			SINGLE_MASK(addr->can_addr.tp.rx_id), isotp_rcv, sk,
-			"isotp", sk);
+	if (do_rx_reg)
+		can_rx_register(net, dev, addr->can_addr.tp.rx_id,
+				SINGLE_MASK(addr->can_addr.tp.rx_id),
+				isotp_rcv, sk, "isotp", sk);
 
 	dev_put(dev);
 
-	if (so->bound) {
+	if (so->bound && do_rx_reg) {
 		/* unregister old filter */
 		if (so->ifindex) {
 			dev = dev_get_by_index(net, so->ifindex);
@@ -1299,7 +1315,7 @@ static int isotp_notifier(struct notifier_block *nb, unsigned long msg,
 	case NETDEV_UNREGISTER:
 		lock_sock(sk);
 		/* remove current filters & unregister */
-		if (so->bound)
+		if (so->bound && (!(so->opt.flags & CAN_ISOTP_SF_BROADCAST)))
 			can_rx_unregister(dev_net(dev), dev, so->rxid,
 					  SINGLE_MASK(so->rxid),
 					  isotp_rcv, sk);
-- 
cgit v1.2.3


From 4cf476ced45d7f12df30a68e833b263e7a2202d1 Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Thu, 10 Dec 2020 15:50:57 +0000
Subject: ppp: add PPPIOCBRIDGECHAN and PPPIOCUNBRIDGECHAN ioctls

This new ioctl pair allows two ppp channels to be bridged together:
frames arriving in one channel are transmitted in the other channel
and vice versa.

The practical use for this is primarily to support the L2TP Access
Concentrator use-case.  The end-user session is presented as a ppp
channel (typically PPPoE, although it could be e.g. PPPoA, or even PPP
over a serial link) and is switched into a PPPoL2TP session for
transmission to the LNS.  At the LNS the PPP session is terminated in
the ISP's network.

When a PPP channel is bridged to another it takes a reference on the
other's struct ppp_file.  This reference is dropped when the channels
are unbridged, which can occur either explicitly on userspace calling
the PPPIOCUNBRIDGECHAN ioctl, or implicitly when either channel in the
bridge is unregistered.

In order to implement the channel bridge, struct channel is extended
with a new field, 'bridge', which points to the other struct channel
making up the bridge.

This pointer is RCU protected to avoid adding another lock to the data
path.

To guard against concurrent writes to the pointer, the existing struct
channel lock 'upl' coverage is extended rather than adding a new lock.

The 'upl' lock is used to protect the existing unit pointer.  Since the
bridge effectively replaces the unit (they're mutually exclusive for a
channel) it makes coding easier to use the same lock to cover them
both.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp/ppp_generic.c  | 152 ++++++++++++++++++++++++++++++++++++++++-
 include/uapi/linux/ppp-ioctl.h |   2 +
 2 files changed, 151 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 7d005896a0f9..09c27f7773f9 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -174,7 +174,8 @@ struct channel {
 	struct ppp	*ppp;		/* ppp unit we're connected to */
 	struct net	*chan_net;	/* the net channel belongs to */
 	struct list_head clist;		/* link in list of channels per unit */
-	rwlock_t	upl;		/* protects `ppp' */
+	rwlock_t	upl;		/* protects `ppp' and 'bridge' */
+	struct channel __rcu *bridge;	/* "bridged" ppp channel */
 #ifdef CONFIG_PPP_MULTILINK
 	u8		avail;		/* flag used in multilink stuff */
 	u8		had_frag;	/* >= 1 fragments have been sent */
@@ -606,6 +607,83 @@ static struct bpf_prog *compat_ppp_get_filter(struct sock_fprog32 __user *p)
 #endif
 #endif
 
+/* Bridge one PPP channel to another.
+ * When two channels are bridged, ppp_input on one channel is redirected to
+ * the other's ops->start_xmit handler.
+ * In order to safely bridge channels we must reject channels which are already
+ * part of a bridge instance, or which form part of an existing unit.
+ * Once successfully bridged, each channel holds a reference on the other
+ * to prevent it being freed while the bridge is extant.
+ */
+static int ppp_bridge_channels(struct channel *pch, struct channel *pchb)
+{
+	write_lock_bh(&pch->upl);
+	if (pch->ppp ||
+	    rcu_dereference_protected(pch->bridge, lockdep_is_held(&pch->upl))) {
+		write_unlock_bh(&pch->upl);
+		return -EALREADY;
+	}
+	rcu_assign_pointer(pch->bridge, pchb);
+	write_unlock_bh(&pch->upl);
+
+	write_lock_bh(&pchb->upl);
+	if (pchb->ppp ||
+	    rcu_dereference_protected(pchb->bridge, lockdep_is_held(&pchb->upl))) {
+		write_unlock_bh(&pchb->upl);
+		goto err_unset;
+	}
+	rcu_assign_pointer(pchb->bridge, pch);
+	write_unlock_bh(&pchb->upl);
+
+	refcount_inc(&pch->file.refcnt);
+	refcount_inc(&pchb->file.refcnt);
+
+	return 0;
+
+err_unset:
+	write_lock_bh(&pch->upl);
+	RCU_INIT_POINTER(pch->bridge, NULL);
+	write_unlock_bh(&pch->upl);
+	synchronize_rcu();
+	return -EALREADY;
+}
+
+static int ppp_unbridge_channels(struct channel *pch)
+{
+	struct channel *pchb, *pchbb;
+
+	write_lock_bh(&pch->upl);
+	pchb = rcu_dereference_protected(pch->bridge, lockdep_is_held(&pch->upl));
+	if (!pchb) {
+		write_unlock_bh(&pch->upl);
+		return -EINVAL;
+	}
+	RCU_INIT_POINTER(pch->bridge, NULL);
+	write_unlock_bh(&pch->upl);
+
+	/* Only modify pchb if phcb->bridge points back to pch.
+	 * If not, it implies that there has been a race unbridging (and possibly
+	 * even rebridging) pchb.  We should leave pchb alone to avoid either a
+	 * refcount underflow, or breaking another established bridge instance.
+	 */
+	write_lock_bh(&pchb->upl);
+	pchbb = rcu_dereference_protected(pchb->bridge, lockdep_is_held(&pchb->upl));
+	if (pchbb == pch)
+		RCU_INIT_POINTER(pchb->bridge, NULL);
+	write_unlock_bh(&pchb->upl);
+
+	synchronize_rcu();
+
+	if (pchbb == pch)
+		if (refcount_dec_and_test(&pch->file.refcnt))
+			ppp_destroy_channel(pch);
+
+	if (refcount_dec_and_test(&pchb->file.refcnt))
+		ppp_destroy_channel(pchb);
+
+	return 0;
+}
+
 static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct ppp_file *pf;
@@ -641,8 +719,9 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	}
 
 	if (pf->kind == CHANNEL) {
-		struct channel *pch;
+		struct channel *pch, *pchb;
 		struct ppp_channel *chan;
+		struct ppp_net *pn;
 
 		pch = PF_TO_CHANNEL(pf);
 
@@ -657,6 +736,31 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 			err = ppp_disconnect_channel(pch);
 			break;
 
+		case PPPIOCBRIDGECHAN:
+			if (get_user(unit, p))
+				break;
+			err = -ENXIO;
+			pn = ppp_pernet(current->nsproxy->net_ns);
+			spin_lock_bh(&pn->all_channels_lock);
+			pchb = ppp_find_channel(pn, unit);
+			/* Hold a reference to prevent pchb being freed while
+			 * we establish the bridge.
+			 */
+			if (pchb)
+				refcount_inc(&pchb->file.refcnt);
+			spin_unlock_bh(&pn->all_channels_lock);
+			if (!pchb)
+				break;
+			err = ppp_bridge_channels(pch, pchb);
+			/* Drop earlier refcount now bridge establishment is complete */
+			if (refcount_dec_and_test(&pchb->file.refcnt))
+				ppp_destroy_channel(pchb);
+			break;
+
+		case PPPIOCUNBRIDGECHAN:
+			err = ppp_unbridge_channels(pch);
+			break;
+
 		default:
 			down_read(&pch->chan_sem);
 			chan = pch->chan;
@@ -2089,6 +2193,40 @@ static bool ppp_decompress_proto(struct sk_buff *skb)
 	return pskb_may_pull(skb, 2);
 }
 
+/* Attempt to handle a frame via. a bridged channel, if one exists.
+ * If the channel is bridged, the frame is consumed by the bridge.
+ * If not, the caller must handle the frame by normal recv mechanisms.
+ * Returns true if the frame is consumed, false otherwise.
+ */
+static bool ppp_channel_bridge_input(struct channel *pch, struct sk_buff *skb)
+{
+	struct channel *pchb;
+
+	rcu_read_lock();
+	pchb = rcu_dereference(pch->bridge);
+	if (!pchb)
+		goto out_rcu;
+
+	spin_lock(&pchb->downl);
+	if (!pchb->chan) {
+		/* channel got unregistered */
+		kfree_skb(skb);
+		goto outl;
+	}
+
+	skb_scrub_packet(skb, !net_eq(pch->chan_net, pchb->chan_net));
+	if (!pchb->chan->ops->start_xmit(pchb->chan, skb))
+		kfree_skb(skb);
+
+outl:
+	spin_unlock(&pchb->downl);
+out_rcu:
+	rcu_read_unlock();
+
+	/* If pchb is set then we've consumed the packet */
+	return !!pchb;
+}
+
 void
 ppp_input(struct ppp_channel *chan, struct sk_buff *skb)
 {
@@ -2100,6 +2238,10 @@ ppp_input(struct ppp_channel *chan, struct sk_buff *skb)
 		return;
 	}
 
+	/* If the channel is bridged, transmit via. bridge */
+	if (ppp_channel_bridge_input(pch, skb))
+		return;
+
 	read_lock_bh(&pch->upl);
 	if (!ppp_decompress_proto(skb)) {
 		kfree_skb(skb);
@@ -2796,8 +2938,11 @@ ppp_unregister_channel(struct ppp_channel *chan)
 	list_del(&pch->list);
 	spin_unlock_bh(&pn->all_channels_lock);
 
+	ppp_unbridge_channels(pch);
+
 	pch->file.dead = 1;
 	wake_up_interruptible(&pch->file.rwait);
+
 	if (refcount_dec_and_test(&pch->file.refcnt))
 		ppp_destroy_channel(pch);
 }
@@ -3270,7 +3415,8 @@ ppp_connect_channel(struct channel *pch, int unit)
 		goto out;
 	write_lock_bh(&pch->upl);
 	ret = -EINVAL;
-	if (pch->ppp)
+	if (pch->ppp ||
+	    rcu_dereference_protected(pch->bridge, lockdep_is_held(&pch->upl)))
 		goto outl;
 
 	ppp_lock(ppp);
diff --git a/include/uapi/linux/ppp-ioctl.h b/include/uapi/linux/ppp-ioctl.h
index 7bd2a5a75348..8dbecb3ad036 100644
--- a/include/uapi/linux/ppp-ioctl.h
+++ b/include/uapi/linux/ppp-ioctl.h
@@ -115,6 +115,8 @@ struct pppol2tp_ioc_stats {
 #define PPPIOCATTCHAN	_IOW('t', 56, int)	/* attach to ppp channel */
 #define PPPIOCGCHAN	_IOR('t', 55, int)	/* get ppp channel number */
 #define PPPIOCGL2TPSTATS _IOR('t', 54, struct pppol2tp_ioc_stats)
+#define PPPIOCBRIDGECHAN _IOW('t', 53, int)	/* bridge one channel to another */
+#define PPPIOCUNBRIDGECHAN _IO('t', 54)	/* unbridge channel */
 
 #define SIOCGPPPSTATS   (SIOCDEVPRIVATE + 0)
 #define SIOCGPPPVER     (SIOCDEVPRIVATE + 1)	/* NEVER change this!! */
-- 
cgit v1.2.3


From 14486c82612a177cb910980c70ba900827ca0894 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Wed, 4 Nov 2020 15:46:41 +0200
Subject: rfkill: add a reason to the HW rfkill state

The WLAN device may exist yet not be usable. This can happen
when the WLAN device is controllable by both the host and
some platform internal component.
We need some arbritration that is vendor specific, but when
the device is not available for the host, we need to reflect
this state towards the user space.

Add a reason field to the rfkill object (and event) so that
userspace can know why the device is in rfkill: because some
other platform component currently owns the device, or
because the actual hw rfkill signal is asserted.

Capable userspace can now determine the reason for the rfkill
and possibly do some negotiation on a side band channel using
a proprietary protocol to gain ownership on the device in case
the device is owned by some other component. When the host
gains ownership on the device, the kernel can remove the
RFKILL_HARD_BLOCK_NOT_OWNER reason and the hw rfkill state
will be off. Then, the userspace can bring the device up and
start normal operation.

The rfkill_event structure is enlarged to include the additional
byte, it is now 9 bytes long. Old user space will ask to read
only 8 bytes so that the kernel can know not to feed them with
more data. When the user space writes 8 bytes, new kernels will
just read what is present in the file descriptor. This new byte
is read only from the userspace standpoint anyway.

If a new user space uses an old kernel, it'll ask to read 9 bytes
but will get only 8, and it'll know that it didn't get the new
state. When it'll write 9 bytes, the kernel will again ignore
this new byte which is read only from the userspace standpoint.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Link: https://lore.kernel.org/r/20201104134641.28816-1-emmanuel.grumbach@intel.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/rfkill.h      | 24 +++++++++++++++++++++++-
 include/uapi/linux/rfkill.h | 16 +++++++++++++++-
 net/rfkill/core.c           | 41 ++++++++++++++++++++++++++++++++++-------
 3 files changed, 72 insertions(+), 9 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index 8ad2487a86d5..231e06b74b50 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -137,6 +137,17 @@ void rfkill_unregister(struct rfkill *rfkill);
  */
 void rfkill_destroy(struct rfkill *rfkill);
 
+/**
+ * rfkill_set_hw_state_reason - Set the internal rfkill hardware block state
+ *	with a reason
+ * @rfkill: pointer to the rfkill class to modify.
+ * @blocked: the current hardware block state to set
+ * @reason: one of &enum rfkill_hard_block_reasons
+ *
+ * Prefer to use rfkill_set_hw_state if you don't need any special reason.
+ */
+bool rfkill_set_hw_state_reason(struct rfkill *rfkill,
+				bool blocked, unsigned long reason);
 /**
  * rfkill_set_hw_state - Set the internal rfkill hardware block state
  * @rfkill: pointer to the rfkill class to modify.
@@ -156,7 +167,11 @@ void rfkill_destroy(struct rfkill *rfkill);
  * should be blocked) so that drivers need not keep track of the soft
  * block state -- which they might not be able to.
  */
-bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked);
+static inline bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked)
+{
+	return rfkill_set_hw_state_reason(rfkill, blocked,
+					  RFKILL_HARD_BLOCK_SIGNAL);
+}
 
 /**
  * rfkill_set_sw_state - Set the internal rfkill software block state
@@ -256,6 +271,13 @@ static inline void rfkill_destroy(struct rfkill *rfkill)
 {
 }
 
+static inline bool rfkill_set_hw_state_reason(struct rfkill *rfkill,
+					      bool blocked,
+					      unsigned long reason)
+{
+	return blocked;
+}
+
 static inline bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked)
 {
 	return blocked;
diff --git a/include/uapi/linux/rfkill.h b/include/uapi/linux/rfkill.h
index 2e00dcebebd0..03e8af87b364 100644
--- a/include/uapi/linux/rfkill.h
+++ b/include/uapi/linux/rfkill.h
@@ -69,6 +69,16 @@ enum rfkill_operation {
 	RFKILL_OP_CHANGE_ALL,
 };
 
+/**
+ * enum rfkill_hard_block_reasons - hard block reasons
+ * @RFKILL_HARD_BLOCK_SIGNAL: the hardware rfkill signal is active
+ * @RFKILL_HARD_BLOCK_NOT_OWNER: the NIC is not owned by the host
+ */
+enum rfkill_hard_block_reasons {
+	RFKILL_HARD_BLOCK_SIGNAL	= 1 << 0,
+	RFKILL_HARD_BLOCK_NOT_OWNER	= 1 << 1,
+};
+
 /**
  * struct rfkill_event - events for userspace on /dev/rfkill
  * @idx: index of dev rfkill
@@ -76,6 +86,8 @@ enum rfkill_operation {
  * @op: operation code
  * @hard: hard state (0/1)
  * @soft: soft state (0/1)
+ * @hard_block_reasons: valid if hard is set. One or several reasons from
+ *	&enum rfkill_hard_block_reasons.
  *
  * Structure used for userspace communication on /dev/rfkill,
  * used for events from the kernel and control to the kernel.
@@ -84,7 +96,9 @@ struct rfkill_event {
 	__u32 idx;
 	__u8  type;
 	__u8  op;
-	__u8  soft, hard;
+	__u8  soft;
+	__u8  hard;
+	__u8  hard_block_reasons;
 } __attribute__((packed));
 
 /*
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 97101c55763d..68d6ef9e59fc 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -40,6 +40,7 @@ struct rfkill {
 	enum rfkill_type	type;
 
 	unsigned long		state;
+	unsigned long		hard_block_reasons;
 
 	u32			idx;
 
@@ -265,6 +266,7 @@ static void rfkill_fill_event(struct rfkill_event *ev, struct rfkill *rfkill,
 	ev->hard = !!(rfkill->state & RFKILL_BLOCK_HW);
 	ev->soft = !!(rfkill->state & (RFKILL_BLOCK_SW |
 					RFKILL_BLOCK_SW_PREV));
+	ev->hard_block_reasons = rfkill->hard_block_reasons;
 	spin_unlock_irqrestore(&rfkill->lock, flags);
 }
 
@@ -522,19 +524,29 @@ bool rfkill_get_global_sw_state(const enum rfkill_type type)
 }
 #endif
 
-bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked)
+bool rfkill_set_hw_state_reason(struct rfkill *rfkill,
+				bool blocked, unsigned long reason)
 {
 	unsigned long flags;
 	bool ret, prev;
 
 	BUG_ON(!rfkill);
 
+	if (WARN(reason &
+	    ~(RFKILL_HARD_BLOCK_SIGNAL | RFKILL_HARD_BLOCK_NOT_OWNER),
+	    "hw_state reason not supported: 0x%lx", reason))
+		return blocked;
+
 	spin_lock_irqsave(&rfkill->lock, flags);
-	prev = !!(rfkill->state & RFKILL_BLOCK_HW);
-	if (blocked)
+	prev = !!(rfkill->hard_block_reasons & reason);
+	if (blocked) {
 		rfkill->state |= RFKILL_BLOCK_HW;
-	else
-		rfkill->state &= ~RFKILL_BLOCK_HW;
+		rfkill->hard_block_reasons |= reason;
+	} else {
+		rfkill->hard_block_reasons &= ~reason;
+		if (!rfkill->hard_block_reasons)
+			rfkill->state &= ~RFKILL_BLOCK_HW;
+	}
 	ret = !!(rfkill->state & RFKILL_BLOCK_ANY);
 	spin_unlock_irqrestore(&rfkill->lock, flags);
 
@@ -546,7 +558,7 @@ bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked)
 
 	return ret;
 }
-EXPORT_SYMBOL(rfkill_set_hw_state);
+EXPORT_SYMBOL(rfkill_set_hw_state_reason);
 
 static void __rfkill_set_sw_state(struct rfkill *rfkill, bool blocked)
 {
@@ -744,6 +756,16 @@ static ssize_t soft_store(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RW(soft);
 
+static ssize_t hard_block_reasons_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	struct rfkill *rfkill = to_rfkill(dev);
+
+	return sprintf(buf, "0x%lx\n", rfkill->hard_block_reasons);
+}
+static DEVICE_ATTR_RO(hard_block_reasons);
+
 static u8 user_state_from_blocked(unsigned long state)
 {
 	if (state & RFKILL_BLOCK_HW)
@@ -796,6 +818,7 @@ static struct attribute *rfkill_dev_attrs[] = {
 	&dev_attr_state.attr,
 	&dev_attr_soft.attr,
 	&dev_attr_hard.attr,
+	&dev_attr_hard_block_reasons.attr,
 	NULL,
 };
 ATTRIBUTE_GROUPS(rfkill_dev);
@@ -811,6 +834,7 @@ static int rfkill_dev_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
 	struct rfkill *rfkill = to_rfkill(dev);
 	unsigned long flags;
+	unsigned long reasons;
 	u32 state;
 	int error;
 
@@ -823,10 +847,13 @@ static int rfkill_dev_uevent(struct device *dev, struct kobj_uevent_env *env)
 		return error;
 	spin_lock_irqsave(&rfkill->lock, flags);
 	state = rfkill->state;
+	reasons = rfkill->hard_block_reasons;
 	spin_unlock_irqrestore(&rfkill->lock, flags);
 	error = add_uevent_var(env, "RFKILL_STATE=%d",
 			       user_state_from_blocked(state));
-	return error;
+	if (error)
+		return error;
+	return add_uevent_var(env, "RFKILL_HW_BLOCK_REASON=0x%lx", reasons);
 }
 
 void rfkill_pause_polling(struct rfkill *rfkill)
-- 
cgit v1.2.3


From 669b84134a2be14d333d4f82b65943d467404f87 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sun, 29 Nov 2020 17:30:55 +0200
Subject: cfg80211: include block-tx flag in channel switch started event

In the NL80211_CMD_CH_SWITCH_STARTED_NOTIFY event, include the
NL80211_ATTR_CH_SWITCH_BLOCK_TX flag attribute if block-tx was
requested by the AP.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20201129172929.8953ef22cc64.Ifee9cab337a4369938545920ba5590559e91327a@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  3 ++-
 include/uapi/linux/nl80211.h |  3 ++-
 net/mac80211/cfg.c           |  2 +-
 net/mac80211/mlme.c          |  2 +-
 net/wireless/nl80211.c       | 17 +++++++++++------
 5 files changed, 17 insertions(+), 10 deletions(-)

(limited to 'include/uapi')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f3dfb26b50b9..d9b67eed4f75 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -7532,6 +7532,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
  * @dev: the device on which the channel switch started
  * @chandef: the future channel definition
  * @count: the number of TBTTs until the channel switch happens
+ * @quiet: whether or not immediate quiet was requested by the AP
  *
  * Inform the userspace about the channel switch that has just
  * started, so that it can take appropriate actions (eg. starting
@@ -7539,7 +7540,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
  */
 void cfg80211_ch_switch_started_notify(struct net_device *dev,
 				       struct cfg80211_chan_def *chandef,
-				       u8 count);
+				       u8 count, bool quiet);
 
 /**
  * ieee80211_operating_class_to_band - convert operating class to band
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 3e0d4a038ab6..83c860395dd6 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2079,7 +2079,8 @@ enum nl80211_commands {
  *	until the channel switch event.
  * @NL80211_ATTR_CH_SWITCH_BLOCK_TX: flag attribute specifying that transmission
  *	must be blocked on the current channel (before the channel switch
- *	operation).
+ *	operation). Also included in the channel switch started event if quiet
+ *	was requested by the AP.
  * @NL80211_ATTR_CSA_IES: Nested set of attributes containing the IE information
  *	for the time while performing a channel switch.
  * @NL80211_ATTR_CNTDWN_OFFS_BEACON: An array of offsets (u16) to the channel
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index c0d0b15c10fd..7da343efd090 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3450,7 +3450,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
 					  IEEE80211_QUEUE_STOP_REASON_CSA);
 
 	cfg80211_ch_switch_started_notify(sdata->dev, &sdata->csa_chandef,
-					  params->count);
+					  params->count, params->block_tx);
 
 	if (changed) {
 		ieee80211_bss_info_change_notify(sdata, changed);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 67829667d394..d4da9822a111 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1509,7 +1509,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 	mutex_unlock(&local->mtx);
 
 	cfg80211_ch_switch_started_notify(sdata->dev, &csa_ie.chandef,
-					  csa_ie.count);
+					  csa_ie.count, csa_ie.mode);
 
 	if (local->ops->channel_switch) {
 		/* use driver's channel switch callback */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 4a7ef3b584be..c8d31181a660 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -17062,7 +17062,7 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
 				     struct cfg80211_chan_def *chandef,
 				     gfp_t gfp,
 				     enum nl80211_commands notif,
-				     u8 count)
+				     u8 count, bool quiet)
 {
 	struct sk_buff *msg;
 	void *hdr;
@@ -17083,9 +17083,13 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
 	if (nl80211_send_chandef(msg, chandef))
 		goto nla_put_failure;
 
-	if ((notif == NL80211_CMD_CH_SWITCH_STARTED_NOTIFY) &&
-	    (nla_put_u32(msg, NL80211_ATTR_CH_SWITCH_COUNT, count)))
+	if (notif == NL80211_CMD_CH_SWITCH_STARTED_NOTIFY) {
+		if (nla_put_u32(msg, NL80211_ATTR_CH_SWITCH_COUNT, count))
 			goto nla_put_failure;
+		if (quiet &&
+		    nla_put_flag(msg, NL80211_ATTR_CH_SWITCH_BLOCK_TX))
+			goto nla_put_failure;
+	}
 
 	genlmsg_end(msg, hdr);
 
@@ -17118,13 +17122,13 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
 	cfg80211_sched_dfs_chan_update(rdev);
 
 	nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL,
-				 NL80211_CMD_CH_SWITCH_NOTIFY, 0);
+				 NL80211_CMD_CH_SWITCH_NOTIFY, 0, false);
 }
 EXPORT_SYMBOL(cfg80211_ch_switch_notify);
 
 void cfg80211_ch_switch_started_notify(struct net_device *dev,
 				       struct cfg80211_chan_def *chandef,
-				       u8 count)
+				       u8 count, bool quiet)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct wiphy *wiphy = wdev->wiphy;
@@ -17133,7 +17137,8 @@ void cfg80211_ch_switch_started_notify(struct net_device *dev,
 	trace_cfg80211_ch_switch_started_notify(dev, chandef);
 
 	nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL,
-				 NL80211_CMD_CH_SWITCH_STARTED_NOTIFY, count);
+				 NL80211_CMD_CH_SWITCH_STARTED_NOTIFY,
+				 count, quiet);
 }
 EXPORT_SYMBOL(cfg80211_ch_switch_started_notify);
 
-- 
cgit v1.2.3


From 3bb02143ff55fec55558da4ad48425bf368eb8ed Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sun, 6 Dec 2020 14:54:42 +0200
Subject: cfg80211: support immediate reconnect request hint

There are cases where it's necessary to disconnect, but an
immediate reconnection is desired. Support a hint to userspace
that this is the case, by including a new attribute in the
deauth or disassoc event.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20201206145305.58d33941fb9d.I0e7168c205c7949529c8e3b86f3c9b12c01a7017@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  4 +++-
 include/uapi/linux/nl80211.h |  6 ++++++
 net/mac80211/mlme.c          |  5 +++--
 net/wireless/mlme.c          | 26 +++++++++++++++-----------
 net/wireless/nl80211.c       | 23 +++++++++++++++--------
 net/wireless/nl80211.h       |  8 +++++---
 net/wireless/trace.h         | 12 ++++++++----
 7 files changed, 55 insertions(+), 29 deletions(-)

(limited to 'include/uapi')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 1a79d6baa254..f7470eac2fc8 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -6406,13 +6406,15 @@ void cfg80211_abandon_assoc(struct net_device *dev, struct cfg80211_bss *bss);
  * @dev: network device
  * @buf: 802.11 frame (header + body)
  * @len: length of the frame data
+ * @reconnect: immediate reconnect is desired (include the nl80211 attribute)
  *
  * This function is called whenever deauthentication has been processed in
  * station mode. This includes both received deauthentication frames and
  * locally generated ones. This function may sleep. The caller must hold the
  * corresponding wdev's mutex.
  */
-void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len);
+void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len,
+			   bool reconnect);
 
 /**
  * cfg80211_rx_unprot_mlme_mgmt - notification of unprotected mlme mgmt frame
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 83c860395dd6..c5b729e91068 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2535,6 +2535,10 @@ enum nl80211_commands {
  *	This is a u8 attribute that encapsulates one of the values from
  *	&enum nl80211_sae_pwe_mechanism.
  *
+ * @NL80211_ATTR_RECONNECT_REQUESTED: flag attribute, used with deauth and
+ *	disassoc events to indicate that an immediate reconnect to the AP
+ *	is desired.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -3026,6 +3030,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_SAE_PWE,
 
+	NL80211_ATTR_RECONNECT_REQUESTED,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index d4da9822a111..5a2828dccfa5 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2741,7 +2741,7 @@ static void ieee80211_report_disconnect(struct ieee80211_sub_if_data *sdata,
 	};
 
 	if (tx)
-		cfg80211_tx_mlme_mgmt(sdata->dev, buf, len);
+		cfg80211_tx_mlme_mgmt(sdata->dev, buf, len, false);
 	else
 		cfg80211_rx_mlme_mgmt(sdata->dev, buf, len);
 
@@ -4721,7 +4721,8 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata)
 		if (ifmgd->auth_data)
 			ieee80211_destroy_auth_data(sdata, false);
 		cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf,
-				      IEEE80211_DEAUTH_FRAME_LEN);
+				      IEEE80211_DEAUTH_FRAME_LEN,
+				      false);
 	}
 
 	/* This is a bit of a hack - we should find a better and more generic
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 0ac820780437..e1e90761dc00 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -4,7 +4,7 @@
  *
  * Copyright (c) 2009, Jouni Malinen <j@w1.fi>
  * Copyright (c) 2015		Intel Deutschland GmbH
- * Copyright (C) 2019 Intel Corporation
+ * Copyright (C) 2019-2020 Intel Corporation
  */
 
 #include <linux/kernel.h>
@@ -81,7 +81,8 @@ static void cfg80211_process_auth(struct wireless_dev *wdev,
 }
 
 static void cfg80211_process_deauth(struct wireless_dev *wdev,
-				    const u8 *buf, size_t len)
+				    const u8 *buf, size_t len,
+				    bool reconnect)
 {
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
@@ -89,7 +90,7 @@ static void cfg80211_process_deauth(struct wireless_dev *wdev,
 	u16 reason_code = le16_to_cpu(mgmt->u.deauth.reason_code);
 	bool from_ap = !ether_addr_equal(mgmt->sa, wdev->netdev->dev_addr);
 
-	nl80211_send_deauth(rdev, wdev->netdev, buf, len, GFP_KERNEL);
+	nl80211_send_deauth(rdev, wdev->netdev, buf, len, reconnect, GFP_KERNEL);
 
 	if (!wdev->current_bss ||
 	    !ether_addr_equal(wdev->current_bss->pub.bssid, bssid))
@@ -100,7 +101,8 @@ static void cfg80211_process_deauth(struct wireless_dev *wdev,
 }
 
 static void cfg80211_process_disassoc(struct wireless_dev *wdev,
-				      const u8 *buf, size_t len)
+				      const u8 *buf, size_t len,
+				      bool reconnect)
 {
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
@@ -108,7 +110,8 @@ static void cfg80211_process_disassoc(struct wireless_dev *wdev,
 	u16 reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code);
 	bool from_ap = !ether_addr_equal(mgmt->sa, wdev->netdev->dev_addr);
 
-	nl80211_send_disassoc(rdev, wdev->netdev, buf, len, GFP_KERNEL);
+	nl80211_send_disassoc(rdev, wdev->netdev, buf, len, reconnect,
+			      GFP_KERNEL);
 
 	if (WARN_ON(!wdev->current_bss ||
 		    !ether_addr_equal(wdev->current_bss->pub.bssid, bssid)))
@@ -133,9 +136,9 @@ void cfg80211_rx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len)
 	if (ieee80211_is_auth(mgmt->frame_control))
 		cfg80211_process_auth(wdev, buf, len);
 	else if (ieee80211_is_deauth(mgmt->frame_control))
-		cfg80211_process_deauth(wdev, buf, len);
+		cfg80211_process_deauth(wdev, buf, len, false);
 	else if (ieee80211_is_disassoc(mgmt->frame_control))
-		cfg80211_process_disassoc(wdev, buf, len);
+		cfg80211_process_disassoc(wdev, buf, len, false);
 }
 EXPORT_SYMBOL(cfg80211_rx_mlme_mgmt);
 
@@ -180,22 +183,23 @@ void cfg80211_abandon_assoc(struct net_device *dev, struct cfg80211_bss *bss)
 }
 EXPORT_SYMBOL(cfg80211_abandon_assoc);
 
-void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len)
+void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len,
+			   bool reconnect)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct ieee80211_mgmt *mgmt = (void *)buf;
 
 	ASSERT_WDEV_LOCK(wdev);
 
-	trace_cfg80211_tx_mlme_mgmt(dev, buf, len);
+	trace_cfg80211_tx_mlme_mgmt(dev, buf, len, reconnect);
 
 	if (WARN_ON(len < 2))
 		return;
 
 	if (ieee80211_is_deauth(mgmt->frame_control))
-		cfg80211_process_deauth(wdev, buf, len);
+		cfg80211_process_deauth(wdev, buf, len, reconnect);
 	else
-		cfg80211_process_disassoc(wdev, buf, len);
+		cfg80211_process_disassoc(wdev, buf, len, reconnect);
 }
 EXPORT_SYMBOL(cfg80211_tx_mlme_mgmt);
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 910872974f2d..390e6e0f23ac 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -718,6 +718,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_SAE_PWE] =
 		NLA_POLICY_RANGE(NLA_U8, NL80211_SAE_PWE_HUNT_AND_PECK,
 				 NL80211_SAE_PWE_BOTH),
+	[NL80211_ATTR_RECONNECT_REQUESTED] = { .type = NLA_REJECT },
 };
 
 /* policy for the key attributes */
@@ -15855,7 +15856,7 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev,
 				    const u8 *buf, size_t len,
 				    enum nl80211_commands cmd, gfp_t gfp,
 				    int uapsd_queues, const u8 *req_ies,
-				    size_t req_ies_len)
+				    size_t req_ies_len, bool reconnect)
 {
 	struct sk_buff *msg;
 	void *hdr;
@@ -15877,6 +15878,9 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev,
 	     nla_put(msg, NL80211_ATTR_REQ_IE, req_ies_len, req_ies)))
 		goto nla_put_failure;
 
+	if (reconnect && nla_put_flag(msg, NL80211_ATTR_RECONNECT_REQUESTED))
+		goto nla_put_failure;
+
 	if (uapsd_queues >= 0) {
 		struct nlattr *nla_wmm =
 			nla_nest_start_noflag(msg, NL80211_ATTR_STA_WME);
@@ -15905,7 +15909,8 @@ void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev,
 			  size_t len, gfp_t gfp)
 {
 	nl80211_send_mlme_event(rdev, netdev, buf, len,
-				NL80211_CMD_AUTHENTICATE, gfp, -1, NULL, 0);
+				NL80211_CMD_AUTHENTICATE, gfp, -1, NULL, 0,
+				false);
 }
 
 void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev,
@@ -15915,23 +15920,25 @@ void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev,
 {
 	nl80211_send_mlme_event(rdev, netdev, buf, len,
 				NL80211_CMD_ASSOCIATE, gfp, uapsd_queues,
-				req_ies, req_ies_len);
+				req_ies, req_ies_len, false);
 }
 
 void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
 			 struct net_device *netdev, const u8 *buf,
-			 size_t len, gfp_t gfp)
+			 size_t len, bool reconnect, gfp_t gfp)
 {
 	nl80211_send_mlme_event(rdev, netdev, buf, len,
-				NL80211_CMD_DEAUTHENTICATE, gfp, -1, NULL, 0);
+				NL80211_CMD_DEAUTHENTICATE, gfp, -1, NULL, 0,
+				reconnect);
 }
 
 void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
 			   struct net_device *netdev, const u8 *buf,
-			   size_t len, gfp_t gfp)
+			   size_t len, bool reconnect, gfp_t gfp)
 {
 	nl80211_send_mlme_event(rdev, netdev, buf, len,
-				NL80211_CMD_DISASSOCIATE, gfp, -1, NULL, 0);
+				NL80211_CMD_DISASSOCIATE, gfp, -1, NULL, 0,
+				reconnect);
 }
 
 void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf,
@@ -15962,7 +15969,7 @@ void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf,
 
 	trace_cfg80211_rx_unprot_mlme_mgmt(dev, buf, len);
 	nl80211_send_mlme_event(rdev, dev, buf, len, cmd, GFP_ATOMIC, -1,
-				NULL, 0);
+				NULL, 0, false);
 }
 EXPORT_SYMBOL(cfg80211_rx_unprot_mlme_mgmt);
 
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index d3e8e426c486..a3f387770f1b 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Portions of this file
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018, 2020 Intel Corporation
  */
 #ifndef __NET_WIRELESS_NL80211_H
 #define __NET_WIRELESS_NL80211_H
@@ -69,10 +69,12 @@ void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev,
 			   const u8 *req_ies, size_t req_ies_len);
 void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
 			 struct net_device *netdev,
-			 const u8 *buf, size_t len, gfp_t gfp);
+			 const u8 *buf, size_t len,
+			 bool reconnect, gfp_t gfp);
 void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
 			   struct net_device *netdev,
-			   const u8 *buf, size_t len, gfp_t gfp);
+			   const u8 *buf, size_t len,
+			   bool reconnect, gfp_t gfp);
 void nl80211_send_auth_timeout(struct cfg80211_registered_device *rdev,
 			       struct net_device *netdev,
 			       const u8 *addr, gfp_t gfp);
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 817c6fef13be..d75cd23aea02 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2679,19 +2679,23 @@ DEFINE_EVENT(netdev_frame_event, cfg80211_rx_mlme_mgmt,
 );
 
 TRACE_EVENT(cfg80211_tx_mlme_mgmt,
-	TP_PROTO(struct net_device *netdev, const u8 *buf, int len),
-	TP_ARGS(netdev, buf, len),
+	TP_PROTO(struct net_device *netdev, const u8 *buf, int len,
+		 bool reconnect),
+	TP_ARGS(netdev, buf, len, reconnect),
 	TP_STRUCT__entry(
 		NETDEV_ENTRY
 		__dynamic_array(u8, frame, len)
+		__field(int, reconnect)
 	),
 	TP_fast_assign(
 		NETDEV_ASSIGN;
 		memcpy(__get_dynamic_array(frame), buf, len);
+		__entry->reconnect = reconnect;
 	),
-	TP_printk(NETDEV_PR_FMT ", ftype:0x%.2x",
+	TP_printk(NETDEV_PR_FMT ", ftype:0x%.2x reconnect:%d",
 		  NETDEV_PR_ARG,
-		  le16_to_cpup((__le16 *)__get_dynamic_array(frame)))
+		  le16_to_cpup((__le16 *)__get_dynamic_array(frame)),
+		  __entry->reconnect)
 );
 
 DECLARE_EVENT_CLASS(netdev_mac_evt,
-- 
cgit v1.2.3


From 6bdb68cef7bf57cdb3f8d1498623556d6823ff3a Mon Sep 17 00:00:00 2001
From: Carl Huang <cjhuang@codeaurora.org>
Date: Thu, 3 Dec 2020 05:37:26 -0500
Subject: nl80211: add common API to configure SAR power limitations

NL80211_CMD_SET_SAR_SPECS is added to configure SAR from
user space. NL80211_ATTR_SAR_SPEC is used to pass the SAR
power specification when used with NL80211_CMD_SET_SAR_SPECS.

Wireless driver needs to register SAR type, supported frequency
ranges to wiphy, so user space can query it. The index in
frequency range is used to specify which sub band the power
limitation applies to. The SAR type is for compatibility, so later
other SAR mechanism can be implemented without breaking the user
space SAR applications.

Normal process is user space queries the SAR capability, and
gets the index of supported frequency ranges and associates the
power limitation with this index and sends to kernel.

Here is an example of message send to kernel:
8c 00 00 00 08 00 01 00 00 00 00 00 38 00 2b 81
08 00 01 00 00 00 00 00 2c 00 02 80 14 00 00 80
08 00 02 00 00 00 00 00 08 00 01 00 38 00 00 00
14 00 01 80 08 00 02 00 01 00 00 00 08 00 01 00
48 00 00 00

NL80211_CMD_SET_SAR_SPECS:  0x8c
NL80211_ATTR_WIPHY:     0x01(phy idx is 0)
NL80211_ATTR_SAR_SPEC:  0x812b (NLA_NESTED)
NL80211_SAR_ATTR_TYPE:  0x00 (NL80211_SAR_TYPE_POWER)
NL80211_SAR_ATTR_SPECS: 0x8002 (NLA_NESTED)
freq range 0 power: 0x38 in 0.25dbm unit (14dbm)
freq range 1 power: 0x48 in 0.25dbm unit (18dbm)

Signed-off-by: Carl Huang <cjhuang@codeaurora.org>
Reviewed-by: Brian Norris <briannorris@chromium.org>
Reviewed-by: Abhishek Kumar <kuabhs@chromium.org>
Link: https://lore.kernel.org/r/20201203103728.3034-2-cjhuang@codeaurora.org
[minor edits, NLA parse cleanups]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  52 ++++++++++++
 include/uapi/linux/nl80211.h | 105 +++++++++++++++++++++++++
 net/wireless/nl80211.c       | 183 +++++++++++++++++++++++++++++++++++++++++++
 net/wireless/rdev-ops.h      |  12 +++
 net/wireless/trace.h         |  19 +++++
 5 files changed, 371 insertions(+)

(limited to 'include/uapi')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f7470eac2fc8..9a4bbccddc7f 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1732,6 +1732,54 @@ struct station_info {
 	u8 connected_to_as;
 };
 
+/**
+ * struct cfg80211_sar_sub_specs - sub specs limit
+ * @power: power limitation in 0.25dbm
+ * @freq_range_index: index the power limitation applies to
+ */
+struct cfg80211_sar_sub_specs {
+	s32 power;
+	u32 freq_range_index;
+};
+
+/**
+ * struct cfg80211_sar_specs - sar limit specs
+ * @type: it's set with power in 0.25dbm or other types
+ * @num_sub_specs: number of sar sub specs
+ * @sub_specs: memory to hold the sar sub specs
+ */
+struct cfg80211_sar_specs {
+	enum nl80211_sar_type type;
+	u32 num_sub_specs;
+	struct cfg80211_sar_sub_specs sub_specs[];
+};
+
+
+/**
+ * @struct cfg80211_sar_chan_ranges - sar frequency ranges
+ * @start_freq:  start range edge frequency
+ * @end_freq:    end range edge frequency
+ */
+struct cfg80211_sar_freq_ranges {
+	u32 start_freq;
+	u32 end_freq;
+};
+
+/**
+ * struct cfg80211_sar_capa - sar limit capability
+ * @type: it's set via power in 0.25dbm or other types
+ * @num_freq_ranges: number of frequency ranges
+ * @freq_ranges: memory to hold the freq ranges.
+ *
+ * Note: WLAN driver may append new ranges or split an existing
+ * range to small ones and then append them.
+ */
+struct cfg80211_sar_capa {
+	enum nl80211_sar_type type;
+	u32 num_freq_ranges;
+	const struct cfg80211_sar_freq_ranges *freq_ranges;
+};
+
 #if IS_ENABLED(CONFIG_CFG80211)
 /**
  * cfg80211_get_station - retrieve information about a given station
@@ -4249,6 +4297,8 @@ struct cfg80211_ops {
 				  struct cfg80211_tid_config *tid_conf);
 	int	(*reset_tid_config)(struct wiphy *wiphy, struct net_device *dev,
 				    const u8 *peer, u8 tids);
+	int	(*set_sar_specs)(struct wiphy *wiphy,
+				 struct cfg80211_sar_specs *sar);
 };
 
 /*
@@ -5017,6 +5067,8 @@ struct wiphy {
 
 	u8 max_data_retry_count;
 
+	const struct cfg80211_sar_capa *sar_capa;
+
 	char priv[] __aligned(NETDEV_ALIGN);
 };
 
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index c5b729e91068..40832d13c2f1 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1178,6 +1178,10 @@
  *	includes the contents of the frame. %NL80211_ATTR_ACK flag is included
  *	if the recipient acknowledged the frame.
  *
+ * @NL80211_CMD_SET_SAR_SPECS: SAR power limitation configuration is
+ *	passed using %NL80211_ATTR_SAR_SPEC. %NL80211_ATTR_WIPHY is used to
+ *	specify the wiphy index to be applied to.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -1408,6 +1412,8 @@ enum nl80211_commands {
 
 	NL80211_CMD_CONTROL_PORT_FRAME_TX_STATUS,
 
+	NL80211_CMD_SET_SAR_SPECS,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -2535,6 +2541,11 @@ enum nl80211_commands {
  *	This is a u8 attribute that encapsulates one of the values from
  *	&enum nl80211_sae_pwe_mechanism.
  *
+ * @NL80211_ATTR_SAR_SPEC: SAR power limitation specification when
+ *	used with %NL80211_CMD_SET_SAR_SPECS. The message contains fields
+ *	of %nl80211_sar_attrs which specifies the sar type and related
+ *	sar specs. Sar specs contains array of %nl80211_sar_specs_attrs.
+ *
  * @NL80211_ATTR_RECONNECT_REQUESTED: flag attribute, used with deauth and
  *	disassoc events to indicate that an immediate reconnect to the AP
  *	is desired.
@@ -3032,6 +3043,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_RECONNECT_REQUESTED,
 
+	NL80211_ATTR_SAR_SPEC,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -7163,4 +7176,96 @@ enum nl80211_sae_pwe_mechanism {
 	NL80211_SAE_PWE_HASH_TO_ELEMENT,
 	NL80211_SAE_PWE_BOTH,
 };
+
+/**
+ * enum nl80211_sar_type - type of SAR specs
+ *
+ * @NL80211_SAR_TYPE_POWER: power limitation specified in 0.25dBm unit
+ *
+ */
+enum nl80211_sar_type {
+	NL80211_SAR_TYPE_POWER,
+
+	/* add new type here */
+
+	/* Keep last */
+	NUM_NL80211_SAR_TYPE,
+};
+
+/**
+ * enum nl80211_sar_attrs - Attributes for SAR spec
+ *
+ * @NL80211_SAR_ATTR_TYPE: the SAR type as defined in &enum nl80211_sar_type.
+ *
+ * @NL80211_SAR_ATTR_SPECS: Nested array of SAR power
+ *	limit specifications. Each specification contains a set
+ *	of %nl80211_sar_specs_attrs.
+ *
+ *	For SET operation, it contains array of %NL80211_SAR_ATTR_SPECS_POWER
+ *	and %NL80211_SAR_ATTR_SPECS_RANGE_INDEX.
+ *
+ *	For sar_capa dump, it contains array of
+ *	%NL80211_SAR_ATTR_SPECS_START_FREQ
+ *	and %NL80211_SAR_ATTR_SPECS_END_FREQ.
+ *
+ * @__NL80211_SAR_ATTR_LAST: Internal
+ * @NL80211_SAR_ATTR_MAX: highest sar attribute
+ *
+ * These attributes are used with %NL80211_CMD_SET_SAR_SPEC
+ */
+enum nl80211_sar_attrs {
+	__NL80211_SAR_ATTR_INVALID,
+
+	NL80211_SAR_ATTR_TYPE,
+	NL80211_SAR_ATTR_SPECS,
+
+	__NL80211_SAR_ATTR_LAST,
+	NL80211_SAR_ATTR_MAX = __NL80211_SAR_ATTR_LAST - 1,
+};
+
+/**
+ * enum nl80211_sar_specs_attrs - Attributes for SAR power limit specs
+ *
+ * @NL80211_SAR_ATTR_SPECS_POWER: Required (s32)value to specify the actual
+ *	power limit value in units of 0.25 dBm if type is
+ *	NL80211_SAR_TYPE_POWER. (i.e., a value of 44 represents 11 dBm).
+ *	0 means userspace doesn't have SAR limitation on this associated range.
+ *
+ * @NL80211_SAR_ATTR_SPECS_RANGE_INDEX: Required (u32) value to specify the
+ *	index of exported freq range table and the associated power limitation
+ *	is applied to this range.
+ *
+ *	Userspace isn't required to set all the ranges advertised by WLAN driver,
+ *	and userspace can skip some certain ranges. These skipped ranges don't
+ *	have SAR limitations, and they are same as setting the
+ *	%NL80211_SAR_ATTR_SPECS_POWER to any unreasonable high value because any
+ *	value higher than regulatory allowed value just means SAR power
+ *	limitation is removed, but it's required to set at least one range.
+ *	It's not allowed to set duplicated range in one SET operation.
+ *
+ *	Every SET operation overwrites previous SET operation.
+ *
+ * @NL80211_SAR_ATTR_SPECS_START_FREQ: Required (u32) value to specify the start
+ *	frequency of this range edge when registering SAR capability to wiphy.
+ *	It's not a channel center frequency. The unit is kHz.
+ *
+ * @NL80211_SAR_ATTR_SPECS_END_FREQ: Required (u32) value to specify the end
+ *	frequency of this range edge when registering SAR capability to wiphy.
+ *	It's not a channel center frequency. The unit is kHz.
+ *
+ * @__NL80211_SAR_ATTR_SPECS_LAST: Internal
+ * @NL80211_SAR_ATTR_SPECS_MAX: highest sar specs attribute
+ */
+enum nl80211_sar_specs_attrs {
+	__NL80211_SAR_ATTR_SPECS_INVALID,
+
+	NL80211_SAR_ATTR_SPECS_POWER,
+	NL80211_SAR_ATTR_SPECS_RANGE_INDEX,
+	NL80211_SAR_ATTR_SPECS_START_FREQ,
+	NL80211_SAR_ATTR_SPECS_END_FREQ,
+
+	__NL80211_SAR_ATTR_SPECS_LAST,
+	NL80211_SAR_ATTR_SPECS_MAX = __NL80211_SAR_ATTR_SPECS_LAST - 1,
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 390e6e0f23ac..7db6079fab04 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -399,6 +399,18 @@ nl80211_unsol_bcast_probe_resp_policy[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_MAX +
 						       .len = IEEE80211_MAX_DATA_LEN }
 };
 
+static const struct nla_policy
+sar_specs_policy[NL80211_SAR_ATTR_SPECS_MAX + 1] = {
+	[NL80211_SAR_ATTR_SPECS_POWER] = { .type = NLA_S32 },
+	[NL80211_SAR_ATTR_SPECS_RANGE_INDEX] = {.type = NLA_U32 },
+};
+
+static const struct nla_policy
+sar_policy[NL80211_SAR_ATTR_MAX + 1] = {
+	[NL80211_SAR_ATTR_TYPE] = NLA_POLICY_MAX(NLA_U32, NUM_NL80211_SAR_TYPE),
+	[NL80211_SAR_ATTR_SPECS] = NLA_POLICY_NESTED_ARRAY(sar_specs_policy),
+};
+
 static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD },
 	[NL80211_ATTR_WIPHY] = { .type = NLA_U32 },
@@ -719,6 +731,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 		NLA_POLICY_RANGE(NLA_U8, NL80211_SAE_PWE_HUNT_AND_PECK,
 				 NL80211_SAE_PWE_BOTH),
 	[NL80211_ATTR_RECONNECT_REQUESTED] = { .type = NLA_REJECT },
+	[NL80211_ATTR_SAR_SPEC] = NLA_POLICY_NESTED(sar_policy),
 };
 
 /* policy for the key attributes */
@@ -2095,6 +2108,56 @@ fail:
 	return -ENOBUFS;
 }
 
+static int
+nl80211_put_sar_specs(struct cfg80211_registered_device *rdev,
+		      struct sk_buff *msg)
+{
+	struct nlattr *sar_capa, *specs, *sub_freq_range;
+	u8 num_freq_ranges;
+	int i;
+
+	if (!rdev->wiphy.sar_capa)
+		return 0;
+
+	num_freq_ranges = rdev->wiphy.sar_capa->num_freq_ranges;
+
+	sar_capa = nla_nest_start(msg, NL80211_ATTR_SAR_SPEC);
+	if (!sar_capa)
+		return -ENOSPC;
+
+	if (nla_put_u32(msg, NL80211_SAR_ATTR_TYPE, rdev->wiphy.sar_capa->type))
+		goto fail;
+
+	specs = nla_nest_start(msg, NL80211_SAR_ATTR_SPECS);
+	if (!specs)
+		goto fail;
+
+	/* report supported freq_ranges */
+	for (i = 0; i < num_freq_ranges; i++) {
+		sub_freq_range = nla_nest_start(msg, i + 1);
+		if (!sub_freq_range)
+			goto fail;
+
+		if (nla_put_u32(msg, NL80211_SAR_ATTR_SPECS_START_FREQ,
+				rdev->wiphy.sar_capa->freq_ranges[i].start_freq))
+			goto fail;
+
+		if (nla_put_u32(msg, NL80211_SAR_ATTR_SPECS_END_FREQ,
+				rdev->wiphy.sar_capa->freq_ranges[i].end_freq))
+			goto fail;
+
+		nla_nest_end(msg, sub_freq_range);
+	}
+
+	nla_nest_end(msg, specs);
+	nla_nest_end(msg, sar_capa);
+
+	return 0;
+fail:
+	nla_nest_cancel(msg, sar_capa);
+	return -ENOBUFS;
+}
+
 struct nl80211_dump_wiphy_state {
 	s64 filter_wiphy;
 	long start;
@@ -2344,6 +2407,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
 			CMD(set_multicast_to_unicast, SET_MULTICAST_TO_UNICAST);
 			CMD(update_connect_params, UPDATE_CONNECT_PARAMS);
 			CMD(update_ft_ies, UPDATE_FT_IES);
+			if (rdev->wiphy.sar_capa)
+				CMD(set_sar_specs, SET_SAR_SPECS);
 		}
 #undef CMD
 
@@ -2669,6 +2734,11 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
 
 		if (nl80211_put_tid_config_support(rdev, msg))
 			goto nla_put_failure;
+		state->split_start++;
+		break;
+	case 16:
+		if (nl80211_put_sar_specs(rdev, msg))
+			goto nla_put_failure;
 
 		/* done */
 		state->split_start = 0;
@@ -14668,6 +14738,111 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
 	}
 }
 
+static int nl80211_set_sar_sub_specs(struct cfg80211_registered_device *rdev,
+				     struct cfg80211_sar_specs *sar_specs,
+				     struct nlattr *spec[], int index)
+{
+	u32 range_index, i;
+
+	if (!sar_specs || !spec)
+		return -EINVAL;
+
+	if (!spec[NL80211_SAR_ATTR_SPECS_POWER] ||
+	    !spec[NL80211_SAR_ATTR_SPECS_RANGE_INDEX])
+		return -EINVAL;
+
+	range_index = nla_get_u32(spec[NL80211_SAR_ATTR_SPECS_RANGE_INDEX]);
+
+	/* check if range_index exceeds num_freq_ranges */
+	if (range_index >= rdev->wiphy.sar_capa->num_freq_ranges)
+		return -EINVAL;
+
+	/* check if range_index duplicates */
+	for (i = 0; i < index; i++) {
+		if (sar_specs->sub_specs[i].freq_range_index == range_index)
+			return -EINVAL;
+	}
+
+	sar_specs->sub_specs[index].power =
+		nla_get_s32(spec[NL80211_SAR_ATTR_SPECS_POWER]);
+
+	sar_specs->sub_specs[index].freq_range_index = range_index;
+
+	return 0;
+}
+
+static int nl80211_set_sar_specs(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct nlattr *spec[NL80211_SAR_ATTR_SPECS_MAX + 1];
+	struct nlattr *tb[NL80211_SAR_ATTR_MAX + 1];
+	struct cfg80211_sar_specs *sar_spec;
+	enum nl80211_sar_type type;
+	struct nlattr *spec_list;
+	u32 specs;
+	int rem, err;
+
+	if (!rdev->wiphy.sar_capa || !rdev->ops->set_sar_specs)
+		return -EOPNOTSUPP;
+
+	if (!info->attrs[NL80211_ATTR_SAR_SPEC])
+		return -EINVAL;
+
+	nla_parse_nested(tb, NL80211_SAR_ATTR_MAX,
+			 info->attrs[NL80211_ATTR_SAR_SPEC],
+			 NULL, NULL);
+
+	if (!tb[NL80211_SAR_ATTR_TYPE] || !tb[NL80211_SAR_ATTR_SPECS])
+		return -EINVAL;
+
+	type = nla_get_u32(tb[NL80211_SAR_ATTR_TYPE]);
+	if (type != rdev->wiphy.sar_capa->type)
+		return -EINVAL;
+
+	specs = 0;
+	nla_for_each_nested(spec_list, tb[NL80211_SAR_ATTR_SPECS], rem)
+		specs++;
+
+	if (specs > rdev->wiphy.sar_capa->num_freq_ranges)
+		return -EINVAL;
+
+	sar_spec = kzalloc(sizeof(*sar_spec) +
+			   specs * sizeof(struct cfg80211_sar_sub_specs),
+			   GFP_KERNEL);
+	if (!sar_spec)
+		return -ENOMEM;
+
+	sar_spec->type = type;
+	specs = 0;
+	nla_for_each_nested(spec_list, tb[NL80211_SAR_ATTR_SPECS], rem) {
+		nla_parse_nested(spec, NL80211_SAR_ATTR_SPECS_MAX,
+				 spec_list, NULL, NULL);
+
+		switch (type) {
+		case NL80211_SAR_TYPE_POWER:
+			if (nl80211_set_sar_sub_specs(rdev, sar_spec,
+						      spec, specs)) {
+				err = -EINVAL;
+				goto error;
+			}
+			break;
+		default:
+			err = -EINVAL;
+			goto error;
+		}
+		specs++;
+	}
+
+	sar_spec->num_sub_specs = specs;
+
+	rdev->cur_cmd_info = info;
+	err = rdev_set_sar_specs(rdev, sar_spec);
+	rdev->cur_cmd_info = NULL;
+error:
+	kfree(sar_spec);
+	return err;
+}
+
 static const struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_GET_WIPHY,
@@ -15521,6 +15696,14 @@ static const struct genl_small_ops nl80211_small_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_SET_SAR_SPECS,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = nl80211_set_sar_specs,
+		.flags = GENL_UNS_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_WIPHY |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 };
 
 static struct genl_family nl80211_fam __ro_after_init = {
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 5e2f349c92a8..8b1358d04ca2 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -1346,4 +1346,16 @@ static inline int rdev_reset_tid_config(struct cfg80211_registered_device *rdev,
 	return ret;
 }
 
+static inline int rdev_set_sar_specs(struct cfg80211_registered_device *rdev,
+				     struct cfg80211_sar_specs *sar)
+{
+	int ret;
+
+	trace_rdev_set_sar_specs(&rdev->wiphy, sar);
+	ret = rdev->ops->set_sar_specs(&rdev->wiphy, sar);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+
+	return ret;
+}
+
 #endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index d75cd23aea02..76b777d5903f 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -3546,6 +3546,25 @@ TRACE_EVENT(rdev_reset_tid_config,
 	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT ", tids: 0x%x",
 		  WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), __entry->tids)
 );
+
+TRACE_EVENT(rdev_set_sar_specs,
+	TP_PROTO(struct wiphy *wiphy, struct cfg80211_sar_specs *sar),
+	TP_ARGS(wiphy, sar),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		__field(u16, type)
+		__field(u16, num)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		__entry->type = sar->type;
+		__entry->num = sar->num_sub_specs;
+
+	),
+	TP_printk(WIPHY_PR_FMT ", Set type:%d, num_specs:%d",
+		  WIPHY_PR_ARG, __entry->type, __entry->num)
+);
+
 #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */
 
 #undef TRACE_INCLUDE_PATH
-- 
cgit v1.2.3


From f25b463883a8a2d1b7303a63339c0d589fc94f1e Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Tue, 17 Nov 2020 13:39:14 -0700
Subject: dmaengine: idxd: add IAX configuration support in the IDXD driver

Add support to allow configuration of Intel Analytics Accelerator (IAX) in
addition to the Intel Data Streaming Accelerator (DSA). The IAX hardware
has the same configuration interface as DSA. The main difference
is the type of operations it performs. We can support the DSA and
IAX devices on the same driver with some tweaks.

IAX has a 64B completion record that needs to be 64B aligned, as opposed to
a 32B completion record that is 32B aligned for DSA. IAX also does not
support token management.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/160564555488.1834439.4261958859935360473.stgit@djiang5-desk3.ch.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/idxd/cdev.c      |  1 +
 drivers/dma/idxd/device.c    | 37 ++++++++++++++++-----
 drivers/dma/idxd/idxd.h      | 24 +++++++++++---
 drivers/dma/idxd/init.c      | 14 ++++++++
 drivers/dma/idxd/registers.h |  1 +
 drivers/dma/idxd/submit.c    |  2 +-
 drivers/dma/idxd/sysfs.c     | 46 ++++++++++++++++++++++++--
 include/uapi/linux/idxd.h    | 79 ++++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 187 insertions(+), 17 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
index 010b820d8f74..0db9b82ed8cf 100644
--- a/drivers/dma/idxd/cdev.c
+++ b/drivers/dma/idxd/cdev.c
@@ -28,6 +28,7 @@ struct idxd_cdev_context {
  */
 static struct idxd_cdev_context ictx[IDXD_TYPE_MAX] = {
 	{ .name = "dsa" },
+	{ .name = "iax" }
 };
 
 struct idxd_user_context {
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
index b75f9a09666e..47ff8e387172 100644
--- a/drivers/dma/idxd/device.c
+++ b/drivers/dma/idxd/device.c
@@ -131,6 +131,8 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq)
 	struct idxd_device *idxd = wq->idxd;
 	struct device *dev = &idxd->pdev->dev;
 	int rc, num_descs, i;
+	int align;
+	u64 tmp;
 
 	if (wq->type != IDXD_WQT_KERNEL)
 		return 0;
@@ -142,14 +144,27 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq)
 	if (rc < 0)
 		return rc;
 
-	wq->compls_size = num_descs * sizeof(struct dsa_completion_record);
-	wq->compls = dma_alloc_coherent(dev, wq->compls_size,
-					&wq->compls_addr, GFP_KERNEL);
-	if (!wq->compls) {
+	if (idxd->type == IDXD_TYPE_DSA)
+		align = 32;
+	else if (idxd->type == IDXD_TYPE_IAX)
+		align = 64;
+	else
+		return -ENODEV;
+
+	wq->compls_size = num_descs * idxd->compl_size + align;
+	wq->compls_raw = dma_alloc_coherent(dev, wq->compls_size,
+					    &wq->compls_addr_raw, GFP_KERNEL);
+	if (!wq->compls_raw) {
 		rc = -ENOMEM;
 		goto fail_alloc_compls;
 	}
 
+	/* Adjust alignment */
+	wq->compls_addr = (wq->compls_addr_raw + (align - 1)) & ~(align - 1);
+	tmp = (u64)wq->compls_raw;
+	tmp = (tmp + (align - 1)) & ~(align - 1);
+	wq->compls = (struct dsa_completion_record *)tmp;
+
 	rc = alloc_descs(wq, num_descs);
 	if (rc < 0)
 		goto fail_alloc_descs;
@@ -163,9 +178,11 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq)
 		struct idxd_desc *desc = wq->descs[i];
 
 		desc->hw = wq->hw_descs[i];
-		desc->completion = &wq->compls[i];
-		desc->compl_dma  = wq->compls_addr +
-			sizeof(struct dsa_completion_record) * i;
+		if (idxd->type == IDXD_TYPE_DSA)
+			desc->completion = &wq->compls[i];
+		else if (idxd->type == IDXD_TYPE_IAX)
+			desc->iax_completion = &wq->iax_compls[i];
+		desc->compl_dma = wq->compls_addr + idxd->compl_size * i;
 		desc->id = i;
 		desc->wq = wq;
 		desc->cpu = -1;
@@ -178,7 +195,8 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq)
  fail_sbitmap_init:
 	free_descs(wq);
  fail_alloc_descs:
-	dma_free_coherent(dev, wq->compls_size, wq->compls, wq->compls_addr);
+	dma_free_coherent(dev, wq->compls_size, wq->compls_raw,
+			  wq->compls_addr_raw);
  fail_alloc_compls:
 	free_hw_descs(wq);
 	return rc;
@@ -193,7 +211,8 @@ void idxd_wq_free_resources(struct idxd_wq *wq)
 
 	free_hw_descs(wq);
 	free_descs(wq);
-	dma_free_coherent(dev, wq->compls_size, wq->compls, wq->compls_addr);
+	dma_free_coherent(dev, wq->compls_size, wq->compls_raw,
+			  wq->compls_addr_raw);
 	sbitmap_queue_free(&wq->sbq);
 }
 
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
index 149934f8d097..5a50e91c71bf 100644
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -20,7 +20,8 @@ extern struct kmem_cache *idxd_desc_pool;
 enum idxd_type {
 	IDXD_TYPE_UNKNOWN = -1,
 	IDXD_TYPE_DSA = 0,
-	IDXD_TYPE_MAX
+	IDXD_TYPE_IAX,
+	IDXD_TYPE_MAX,
 };
 
 #define IDXD_NAME_SIZE		128
@@ -114,8 +115,13 @@ struct idxd_wq {
 	u32 vec_ptr;		/* interrupt steering */
 	struct dsa_hw_desc **hw_descs;
 	int num_descs;
-	struct dsa_completion_record *compls;
+	union {
+		struct dsa_completion_record *compls;
+		struct iax_completion_record *iax_compls;
+	};
+	void *compls_raw;
 	dma_addr_t compls_addr;
+	dma_addr_t compls_addr_raw;
 	int compls_size;
 	struct idxd_desc **descs;
 	struct sbitmap_queue sbq;
@@ -196,6 +202,7 @@ struct idxd_device {
 	int token_limit;
 	int nr_tokens;		/* non-reserved tokens */
 	unsigned int wqcfg_size;
+	int compl_size;
 
 	union sw_err_reg sw_err;
 	wait_queue_head_t cmd_waitq;
@@ -210,9 +217,15 @@ struct idxd_device {
 
 /* IDXD software descriptor */
 struct idxd_desc {
-	struct dsa_hw_desc *hw;
+	union {
+		struct dsa_hw_desc *hw;
+		struct iax_hw_desc *iax_hw;
+	};
 	dma_addr_t desc_dma;
-	struct dsa_completion_record *completion;
+	union {
+		struct dsa_completion_record *completion;
+		struct iax_completion_record *iax_completion;
+	};
 	dma_addr_t compl_dma;
 	struct dma_async_tx_descriptor txd;
 	struct llist_node llnode;
@@ -226,6 +239,7 @@ struct idxd_desc {
 #define confdev_to_wq(dev) container_of(dev, struct idxd_wq, conf_dev)
 
 extern struct bus_type dsa_bus_type;
+extern struct bus_type iax_bus_type;
 
 extern bool support_enqcmd;
 
@@ -271,6 +285,8 @@ static inline void idxd_set_type(struct idxd_device *idxd)
 
 	if (pdev->device == PCI_DEVICE_ID_INTEL_DSA_SPR0)
 		idxd->type = IDXD_TYPE_DSA;
+	else if (pdev->device == PCI_DEVICE_ID_INTEL_IAX_SPR0)
+		idxd->type = IDXD_TYPE_IAX;
 	else
 		idxd->type = IDXD_TYPE_UNKNOWN;
 }
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 45b0eac640c3..2c051e07c34c 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -36,12 +36,16 @@ static struct mutex idxd_idr_lock;
 static struct pci_device_id idxd_pci_tbl[] = {
 	/* DSA ver 1.0 platforms */
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_DSA_SPR0) },
+
+	/* IAX ver 1.0 platforms */
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IAX_SPR0) },
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, idxd_pci_tbl);
 
 static char *idxd_name[] = {
 	"dsa",
+	"iax"
 };
 
 const char *idxd_get_dev_name(struct idxd_device *idxd)
@@ -377,6 +381,14 @@ static int idxd_probe(struct idxd_device *idxd)
 	return rc;
 }
 
+static void idxd_type_init(struct idxd_device *idxd)
+{
+	if (idxd->type == IDXD_TYPE_DSA)
+		idxd->compl_size = sizeof(struct dsa_completion_record);
+	else if (idxd->type == IDXD_TYPE_IAX)
+		idxd->compl_size = sizeof(struct iax_completion_record);
+}
+
 static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct device *dev = &pdev->dev;
@@ -412,6 +424,8 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	idxd_set_type(idxd);
 
+	idxd_type_init(idxd);
+
 	dev_dbg(dev, "Set PCI master\n");
 	pci_set_master(pdev);
 	pci_set_drvdata(pdev, idxd);
diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h
index 0cdc5405bc53..23c41fe52215 100644
--- a/drivers/dma/idxd/registers.h
+++ b/drivers/dma/idxd/registers.h
@@ -5,6 +5,7 @@
 
 /* PCI Config */
 #define PCI_DEVICE_ID_INTEL_DSA_SPR0	0x0b25
+#define PCI_DEVICE_ID_INTEL_IAX_SPR0	0x0cfe
 
 #define IDXD_MMIO_BAR		0
 #define IDXD_WQ_BAR		2
diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c
index efca5d8468a6..0ff64eeb84be 100644
--- a/drivers/dma/idxd/submit.c
+++ b/drivers/dma/idxd/submit.c
@@ -15,7 +15,7 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu)
 
 	desc = wq->descs[idx];
 	memset(desc->hw, 0, sizeof(struct dsa_hw_desc));
-	memset(desc->completion, 0, sizeof(struct dsa_completion_record));
+	memset(desc->completion, 0, idxd->compl_size);
 	desc->cpu = cpu;
 
 	if (device_pasid_enabled(idxd))
diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
index 3af83f1fd36e..266423a2cabc 100644
--- a/drivers/dma/idxd/sysfs.c
+++ b/drivers/dma/idxd/sysfs.c
@@ -41,14 +41,24 @@ static struct device_type dsa_device_type = {
 	.release = idxd_conf_device_release,
 };
 
+static struct device_type iax_device_type = {
+	.name = "iax",
+	.release = idxd_conf_device_release,
+};
+
 static inline bool is_dsa_dev(struct device *dev)
 {
 	return dev ? dev->type == &dsa_device_type : false;
 }
 
+static inline bool is_iax_dev(struct device *dev)
+{
+	return dev ? dev->type == &iax_device_type : false;
+}
+
 static inline bool is_idxd_dev(struct device *dev)
 {
-	return is_dsa_dev(dev);
+	return is_dsa_dev(dev) || is_iax_dev(dev);
 }
 
 static inline bool is_idxd_wq_dev(struct device *dev)
@@ -359,8 +369,17 @@ struct bus_type dsa_bus_type = {
 	.shutdown = idxd_config_bus_shutdown,
 };
 
+struct bus_type iax_bus_type = {
+	.name = "iax",
+	.match = idxd_config_bus_match,
+	.probe = idxd_config_bus_probe,
+	.remove = idxd_config_bus_remove,
+	.shutdown = idxd_config_bus_shutdown,
+};
+
 static struct bus_type *idxd_bus_types[] = {
-	&dsa_bus_type
+	&dsa_bus_type,
+	&iax_bus_type
 };
 
 static struct idxd_device_driver dsa_drv = {
@@ -372,8 +391,18 @@ static struct idxd_device_driver dsa_drv = {
 	},
 };
 
+static struct idxd_device_driver iax_drv = {
+	.drv = {
+		.name = "iax",
+		.bus = &iax_bus_type,
+		.owner = THIS_MODULE,
+		.mod_name = KBUILD_MODNAME,
+	},
+};
+
 static struct idxd_device_driver *idxd_drvs[] = {
-	&dsa_drv
+	&dsa_drv,
+	&iax_drv
 };
 
 struct bus_type *idxd_get_bus_type(struct idxd_device *idxd)
@@ -385,6 +414,8 @@ static struct device_type *idxd_get_device_type(struct idxd_device *idxd)
 {
 	if (idxd->type == IDXD_TYPE_DSA)
 		return &dsa_device_type;
+	else if (idxd->type == IDXD_TYPE_IAX)
+		return &iax_device_type;
 	else
 		return NULL;
 }
@@ -525,6 +556,9 @@ static ssize_t group_tokens_reserved_store(struct device *dev,
 	if (rc < 0)
 		return -EINVAL;
 
+	if (idxd->type == IDXD_TYPE_IAX)
+		return -EOPNOTSUPP;
+
 	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
 		return -EPERM;
 
@@ -570,6 +604,9 @@ static ssize_t group_tokens_allowed_store(struct device *dev,
 	if (rc < 0)
 		return -EINVAL;
 
+	if (idxd->type == IDXD_TYPE_IAX)
+		return -EOPNOTSUPP;
+
 	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
 		return -EPERM;
 
@@ -612,6 +649,9 @@ static ssize_t group_use_token_limit_store(struct device *dev,
 	if (rc < 0)
 		return -EINVAL;
 
+	if (idxd->type == IDXD_TYPE_IAX)
+		return -EOPNOTSUPP;
+
 	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
 		return -EPERM;
 
diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h
index fdcdfe414223..236d437947bc 100644
--- a/include/uapi/linux/idxd.h
+++ b/include/uapi/linux/idxd.h
@@ -26,6 +26,9 @@
 #define IDXD_OP_FLAG_DRDBK	0x4000
 #define IDXD_OP_FLAG_DSTS	0x8000
 
+/* IAX */
+#define IDXD_OP_FLAG_RD_SRC2_AECS	0x010000
+
 /* Opcode */
 enum dsa_opcode {
 	DSA_OPCODE_NOOP = 0,
@@ -47,6 +50,14 @@ enum dsa_opcode {
 	DSA_OPCODE_CFLUSH = 0x20,
 };
 
+enum iax_opcode {
+	IAX_OPCODE_NOOP = 0,
+	IAX_OPCODE_DRAIN = 2,
+	IAX_OPCODE_MEMMOVE,
+	IAX_OPCODE_DECOMPRESS = 0x42,
+	IAX_OPCODE_COMPRESS,
+};
+
 /* Completion record status */
 enum dsa_completion_status {
 	DSA_COMP_NONE = 0,
@@ -80,6 +91,33 @@ enum dsa_completion_status {
 	DSA_COMP_TRANSLATION_FAIL,
 };
 
+enum iax_completion_status {
+	IAX_COMP_NONE = 0,
+	IAX_COMP_SUCCESS,
+	IAX_COMP_PAGE_FAULT_IR = 0x04,
+	IAX_COMP_OUTBUF_OVERFLOW,
+	IAX_COMP_BAD_OPCODE = 0x10,
+	IAX_COMP_INVALID_FLAGS,
+	IAX_COMP_NOZERO_RESERVE,
+	IAX_COMP_INVALID_SIZE,
+	IAX_COMP_OVERLAP_BUFFERS = 0x16,
+	IAX_COMP_INT_HANDLE_INVAL = 0x19,
+	IAX_COMP_CRA_XLAT,
+	IAX_COMP_CRA_ALIGN,
+	IAX_COMP_ADDR_ALIGN,
+	IAX_COMP_PRIV_BAD,
+	IAX_COMP_TRAFFIC_CLASS_CONF,
+	IAX_COMP_PFAULT_RDBA,
+	IAX_COMP_HW_ERR1,
+	IAX_COMP_HW_ERR_DRB,
+	IAX_COMP_TRANSLATION_FAIL,
+	IAX_COMP_PRS_TIMEOUT,
+	IAX_COMP_WATCHDOG,
+	IAX_COMP_INVALID_COMP_FLAG = 0x30,
+	IAX_COMP_INVALID_FILTER_FLAG,
+	IAX_COMP_INVALID_NUM_ELEMS = 0x33,
+};
+
 #define DSA_COMP_STATUS_MASK		0x7f
 #define DSA_COMP_STATUS_WRITE		0x80
 
@@ -163,6 +201,28 @@ struct dsa_hw_desc {
 	};
 } __attribute__((packed));
 
+struct iax_hw_desc {
+	uint32_t        pasid:20;
+	uint32_t        rsvd:11;
+	uint32_t        priv:1;
+	uint32_t        flags:24;
+	uint32_t        opcode:8;
+	uint64_t        completion_addr;
+	uint64_t        src1_addr;
+	uint64_t        dst_addr;
+	uint32_t        src1_size;
+	uint16_t        int_handle;
+	union {
+		uint16_t        compr_flags;
+		uint16_t        decompr_flags;
+	};
+	uint64_t        src2_addr;
+	uint32_t        max_dst_size;
+	uint32_t        src2_size;
+	uint32_t	filter_flags;
+	uint32_t	num_inputs;
+} __attribute__((packed));
+
 struct dsa_raw_desc {
 	uint64_t	field[8];
 } __attribute__((packed));
@@ -223,4 +283,23 @@ struct dsa_raw_completion_record {
 	uint64_t	field[4];
 } __attribute__((packed));
 
+struct iax_completion_record {
+	volatile uint8_t        status;
+	uint8_t                 error_code;
+	uint16_t                rsvd;
+	uint32_t                bytes_completed;
+	uint64_t                fault_addr;
+	uint32_t                invalid_flags;
+	uint32_t                rsvd2;
+	uint32_t                output_size;
+	uint8_t                 output_bits;
+	uint8_t                 rsvd3;
+	uint16_t                rsvd4;
+	uint64_t                rsvd5[4];
+} __attribute__((packed));
+
+struct iax_raw_completion_record {
+	uint64_t	field[8];
+} __attribute__((packed));
+
 #endif
-- 
cgit v1.2.3


From b7906b70a2337e445b8dca3ce7ba8976b6ebd07d Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 11 Dec 2020 22:36:25 +0100
Subject: bpf: Fix enum names for bpf_this_cpu_ptr() and bpf_per_cpu_ptr()
 helpers

Remove bpf_ prefix, which causes these helpers to be reported in verifier
dump as bpf_bpf_this_cpu_ptr() and bpf_bpf_per_cpu_ptr(), respectively. Lets
fix it as long as it is still possible before UAPI freezes on these helpers.

Fixes: eaa6bcb71ef6 ("bpf: Introduce bpf_per_cpu_ptr()")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/bpf.h       | 4 ++--
 kernel/bpf/helpers.c           | 4 ++--
 kernel/trace/bpf_trace.c       | 4 ++--
 tools/include/uapi/linux/bpf.h | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e6ceac3f7d62..556216dc9703 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3897,8 +3897,8 @@ union bpf_attr {
 	FN(seq_printf_btf),		\
 	FN(skb_cgroup_classid),		\
 	FN(redirect_neigh),		\
-	FN(bpf_per_cpu_ptr),            \
-	FN(bpf_this_cpu_ptr),		\
+	FN(per_cpu_ptr),		\
+	FN(this_cpu_ptr),		\
 	FN(redirect_peer),		\
 	/* */
 
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 25520f5eeaf6..deda1185237b 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -717,9 +717,9 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_snprintf_btf_proto;
 	case BPF_FUNC_jiffies64:
 		return &bpf_jiffies64_proto;
-	case BPF_FUNC_bpf_per_cpu_ptr:
+	case BPF_FUNC_per_cpu_ptr:
 		return &bpf_per_cpu_ptr_proto;
-	case BPF_FUNC_bpf_this_cpu_ptr:
+	case BPF_FUNC_this_cpu_ptr:
 		return &bpf_this_cpu_ptr_proto;
 	default:
 		break;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 048c655315f1..a125ea5e04cd 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1337,9 +1337,9 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
 	case BPF_FUNC_snprintf_btf:
 		return &bpf_snprintf_btf_proto;
-	case BPF_FUNC_bpf_per_cpu_ptr:
+	case BPF_FUNC_per_cpu_ptr:
 		return &bpf_per_cpu_ptr_proto;
-	case BPF_FUNC_bpf_this_cpu_ptr:
+	case BPF_FUNC_this_cpu_ptr:
 		return &bpf_this_cpu_ptr_proto;
 	default:
 		return NULL;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e6ceac3f7d62..556216dc9703 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3897,8 +3897,8 @@ union bpf_attr {
 	FN(seq_printf_btf),		\
 	FN(skb_cgroup_classid),		\
 	FN(redirect_neigh),		\
-	FN(bpf_per_cpu_ptr),            \
-	FN(bpf_this_cpu_ptr),		\
+	FN(per_cpu_ptr),		\
+	FN(this_cpu_ptr),		\
 	FN(redirect_peer),		\
 	/* */
 
-- 
cgit v1.2.3


From d21a1240f5169a07a230d72e0e6d3773b2a088b4 Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Thu, 10 Dec 2020 11:42:59 -0600
Subject: RDMA/rxe: Use acquire/release for memory ordering

Change work and completion queues to use smp_load_acquire() and
smp_store_release() to synchronize between driver and users.  This commit
goes with a matching series of commits in the rxe user space provider.

Link: https://lore.kernel.org/r/20201210174258.5234-1-rpearson@hpe.com
Signed-off-by: Bob Pearson <rpearson@hpe.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_cq.c    |  5 --
 drivers/infiniband/sw/rxe/rxe_queue.h | 94 ++++++++++++++++++++++-------------
 drivers/infiniband/sw/rxe/rxe_verbs.c | 11 ----
 include/uapi/rdma/rdma_user_rxe.h     | 21 ++++++++
 4 files changed, 81 insertions(+), 50 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index 43394c3f29d4..b315ebf041ac 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -123,11 +123,6 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
 
 	memcpy(producer_addr(cq->queue), cqe, sizeof(*cqe));
 
-	/* make sure all changes to the CQ are written before we update the
-	 * producer pointer
-	 */
-	smp_wmb();
-
 	advance_producer(cq->queue);
 	spin_unlock_irqrestore(&cq->cq_lock, flags);
 
diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h
index 7d434a6837a7..2902ca7b288c 100644
--- a/drivers/infiniband/sw/rxe/rxe_queue.h
+++ b/drivers/infiniband/sw/rxe/rxe_queue.h
@@ -7,9 +7,11 @@
 #ifndef RXE_QUEUE_H
 #define RXE_QUEUE_H
 
+/* for definition of shared struct rxe_queue_buf */
+#include <uapi/rdma/rdma_user_rxe.h>
+
 /* implements a simple circular buffer that can optionally be
  * shared between user space and the kernel and can be resized
-
  * the requested element size is rounded up to a power of 2
  * and the number of elements in the buffer is also rounded
  * up to a power of 2. Since the queue is empty when the
@@ -17,28 +19,6 @@
  * of the queue is one less than the number of element slots
  */
 
-/* this data structure is shared between user space and kernel
- * space for those cases where the queue is shared. It contains
- * the producer and consumer indices. Is also contains a copy
- * of the queue size parameters for user space to use but the
- * kernel must use the parameters in the rxe_queue struct
- * this MUST MATCH the corresponding librxe struct
- * for performance reasons arrange to have producer and consumer
- * pointers in separate cache lines
- * the kernel should always mask the indices to avoid accessing
- * memory outside of the data area
- */
-struct rxe_queue_buf {
-	__u32			log2_elem_size;
-	__u32			index_mask;
-	__u32			pad_1[30];
-	__u32			producer_index;
-	__u32			pad_2[31];
-	__u32			consumer_index;
-	__u32			pad_3[31];
-	__u8			data[];
-};
-
 struct rxe_queue {
 	struct rxe_dev		*rxe;
 	struct rxe_queue_buf	*buf;
@@ -46,7 +26,7 @@ struct rxe_queue {
 	size_t			buf_size;
 	size_t			elem_size;
 	unsigned int		log2_elem_size;
-	unsigned int		index_mask;
+	u32			index_mask;
 };
 
 int do_mmap_info(struct rxe_dev *rxe, struct mminfo __user *outbuf,
@@ -76,26 +56,56 @@ static inline int next_index(struct rxe_queue *q, int index)
 
 static inline int queue_empty(struct rxe_queue *q)
 {
-	return ((q->buf->producer_index - q->buf->consumer_index)
-			& q->index_mask) == 0;
+	u32 prod;
+	u32 cons;
+
+	/* make sure all changes to queue complete before
+	 * testing queue empty
+	 */
+	prod = smp_load_acquire(&q->buf->producer_index);
+	/* same */
+	cons = smp_load_acquire(&q->buf->consumer_index);
+
+	return ((prod - cons) & q->index_mask) == 0;
 }
 
 static inline int queue_full(struct rxe_queue *q)
 {
-	return ((q->buf->producer_index + 1 - q->buf->consumer_index)
-			& q->index_mask) == 0;
+	u32 prod;
+	u32 cons;
+
+	/* make sure all changes to queue complete before
+	 * testing queue full
+	 */
+	prod = smp_load_acquire(&q->buf->producer_index);
+	/* same */
+	cons = smp_load_acquire(&q->buf->consumer_index);
+
+	return ((prod + 1 - cons) & q->index_mask) == 0;
 }
 
 static inline void advance_producer(struct rxe_queue *q)
 {
-	q->buf->producer_index = (q->buf->producer_index + 1)
-			& q->index_mask;
+	u32 prod;
+
+	prod = (q->buf->producer_index + 1) & q->index_mask;
+
+	/* make sure all changes to queue complete before
+	 * changing producer index
+	 */
+	smp_store_release(&q->buf->producer_index, prod);
 }
 
 static inline void advance_consumer(struct rxe_queue *q)
 {
-	q->buf->consumer_index = (q->buf->consumer_index + 1)
-			& q->index_mask;
+	u32 cons;
+
+	cons = (q->buf->consumer_index + 1) & q->index_mask;
+
+	/* make sure all changes to queue complete before
+	 * changing consumer index
+	 */
+	smp_store_release(&q->buf->consumer_index, cons);
 }
 
 static inline void *producer_addr(struct rxe_queue *q)
@@ -112,12 +122,28 @@ static inline void *consumer_addr(struct rxe_queue *q)
 
 static inline unsigned int producer_index(struct rxe_queue *q)
 {
-	return q->buf->producer_index;
+	u32 index;
+
+	/* make sure all changes to queue
+	 * complete before getting producer index
+	 */
+	index = smp_load_acquire(&q->buf->producer_index);
+	index &= q->index_mask;
+
+	return index;
 }
 
 static inline unsigned int consumer_index(struct rxe_queue *q)
 {
-	return q->buf->consumer_index;
+	u32 index;
+
+	/* make sure all changes to queue
+	 * complete before getting consumer index
+	 */
+	index = smp_load_acquire(&q->buf->consumer_index);
+	index &= q->index_mask;
+
+	return index;
 }
 
 static inline void *addr_from_index(struct rxe_queue *q, unsigned int index)
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 2fbea2b2d72a..a031514e2f41 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -244,11 +244,6 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
 	recv_wqe->dma.cur_sge		= 0;
 	recv_wqe->dma.sge_offset	= 0;
 
-	/* make sure all changes to the work queue are written before we
-	 * update the producer pointer
-	 */
-	smp_wmb();
-
 	advance_producer(rq->queue);
 	return 0;
 
@@ -633,12 +628,6 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
 	if (unlikely(err))
 		goto err1;
 
-	/*
-	 * make sure all changes to the work queue are
-	 * written before we update the producer pointer
-	 */
-	smp_wmb();
-
 	advance_producer(sq->queue);
 	spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
 
diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h
index e591d8c1f3cf..068433e2229d 100644
--- a/include/uapi/rdma/rdma_user_rxe.h
+++ b/include/uapi/rdma/rdma_user_rxe.h
@@ -181,4 +181,25 @@ struct rxe_modify_srq_cmd {
 	__aligned_u64 mmap_info_addr;
 };
 
+/* This data structure is stored at the base of work and
+ * completion queues shared between user space and kernel space.
+ * It contains the producer and consumer indices. Is also
+ * contains a copy of the queue size parameters for user space
+ * to use but the kernel must use the parameters in the
+ * rxe_queue struct. For performance reasons arrange to have
+ * producer and consumer indices in separate cache lines
+ * the kernel should always mask the indices to avoid accessing
+ * memory outside of the data area
+ */
+struct rxe_queue_buf {
+	__u32			log2_elem_size;
+	__u32			index_mask;
+	__u32			pad_1[30];
+	__u32			producer_index;
+	__u32			pad_2[31];
+	__u32			consumer_index;
+	__u32			pad_3[31];
+	__u8			data[];
+};
+
 #endif /* RDMA_USER_RXE_H */
-- 
cgit v1.2.3


From 48b0ae046ee96eac999839f6d26c624b8c93ed66 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 7 Dec 2020 17:37:14 +0100
Subject: netfilter: nftables: netlink support for several set element
 expressions

This patch adds three new netlink attributes to encapsulate a list of
expressions per set elements:

- NFTA_SET_EXPRESSIONS: this attribute provides the set definition in
  terms of expressions. New set elements get attached the list of
  expressions that is specified by this new netlink attribute.
- NFTA_SET_ELEM_EXPRESSIONS: this attribute allows users to restore (or
  initialize) the stateful information of set elements when adding an
  element to the set.
- NFTA_DYNSET_EXPRESSIONS: this attribute specifies the list of
  expressions that the set element gets when it is inserted from the
  packet path.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |  6 +++
 net/netfilter/nf_tables_api.c            | 93 ++++++++++++++++++++++++++++++--
 net/netfilter/nft_dynset.c               | 56 +++++++++++++++++--
 3 files changed, 149 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 98272cb5f617..28b6ee53305f 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -361,6 +361,7 @@ enum nft_set_field_attributes {
  * @NFTA_SET_OBJ_TYPE: stateful object type (NLA_U32: NFT_OBJECT_*)
  * @NFTA_SET_HANDLE: set handle (NLA_U64)
  * @NFTA_SET_EXPR: set expression (NLA_NESTED: nft_expr_attributes)
+ * @NFTA_SET_EXPRESSIONS: list of expressions (NLA_NESTED: nft_list_attributes)
  */
 enum nft_set_attributes {
 	NFTA_SET_UNSPEC,
@@ -381,6 +382,7 @@ enum nft_set_attributes {
 	NFTA_SET_OBJ_TYPE,
 	NFTA_SET_HANDLE,
 	NFTA_SET_EXPR,
+	NFTA_SET_EXPRESSIONS,
 	__NFTA_SET_MAX
 };
 #define NFTA_SET_MAX		(__NFTA_SET_MAX - 1)
@@ -406,6 +408,7 @@ enum nft_set_elem_flags {
  * @NFTA_SET_ELEM_EXPR: expression (NLA_NESTED: nft_expr_attributes)
  * @NFTA_SET_ELEM_OBJREF: stateful object reference (NLA_STRING)
  * @NFTA_SET_ELEM_KEY_END: closing key value (NLA_NESTED: nft_data)
+ * @NFTA_SET_ELEM_EXPRESSIONS: list of expressions (NLA_NESTED: nft_list_attributes)
  */
 enum nft_set_elem_attributes {
 	NFTA_SET_ELEM_UNSPEC,
@@ -419,6 +422,7 @@ enum nft_set_elem_attributes {
 	NFTA_SET_ELEM_PAD,
 	NFTA_SET_ELEM_OBJREF,
 	NFTA_SET_ELEM_KEY_END,
+	NFTA_SET_ELEM_EXPRESSIONS,
 	__NFTA_SET_ELEM_MAX
 };
 #define NFTA_SET_ELEM_MAX	(__NFTA_SET_ELEM_MAX - 1)
@@ -715,6 +719,7 @@ enum nft_dynset_flags {
  * @NFTA_DYNSET_TIMEOUT: timeout value for the new element (NLA_U64)
  * @NFTA_DYNSET_EXPR: expression (NLA_NESTED: nft_expr_attributes)
  * @NFTA_DYNSET_FLAGS: flags (NLA_U32)
+ * @NFTA_DYNSET_EXPRESSIONS: list of expressions (NLA_NESTED: nft_list_attributes)
  */
 enum nft_dynset_attributes {
 	NFTA_DYNSET_UNSPEC,
@@ -727,6 +732,7 @@ enum nft_dynset_attributes {
 	NFTA_DYNSET_EXPR,
 	NFTA_DYNSET_PAD,
 	NFTA_DYNSET_FLAGS,
+	NFTA_DYNSET_EXPRESSIONS,
 	__NFTA_DYNSET_MAX,
 };
 #define NFTA_DYNSET_MAX		(__NFTA_DYNSET_MAX - 1)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index a3d5014dd246..243e3c2c7629 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3566,6 +3566,7 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
 	[NFTA_SET_OBJ_TYPE]		= { .type = NLA_U32 },
 	[NFTA_SET_HANDLE]		= { .type = NLA_U64 },
 	[NFTA_SET_EXPR]			= { .type = NLA_NESTED },
+	[NFTA_SET_EXPRESSIONS]		= { .type = NLA_NESTED },
 };
 
 static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
@@ -3773,6 +3774,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
 	u32 portid = ctx->portid;
 	struct nlattr *nest;
 	u32 seq = ctx->seq;
+	int i;
 
 	event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
@@ -3847,6 +3849,17 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
 			goto nla_put_failure;
 
 		nla_nest_end(skb, nest);
+	} else if (set->num_exprs > 1) {
+		nest = nla_nest_start_noflag(skb, NFTA_SET_EXPRESSIONS);
+		if (nest == NULL)
+			goto nla_put_failure;
+
+		for (i = 0; i < set->num_exprs; i++) {
+			if (nft_expr_dump(skb, NFTA_LIST_ELEM,
+					  set->exprs[i]) < 0)
+				goto nla_put_failure;
+		}
+		nla_nest_end(skb, nest);
 	}
 
 	nlmsg_end(skb, nlh);
@@ -4215,7 +4228,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 			return err;
 	}
 
-	if (nla[NFTA_SET_EXPR])
+	if (nla[NFTA_SET_EXPR] || nla[NFTA_SET_EXPRESSIONS])
 		desc.expr = true;
 
 	table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family, genmask);
@@ -4281,6 +4294,29 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 		}
 		set->exprs[0] = expr;
 		set->num_exprs++;
+	} else if (nla[NFTA_SET_EXPRESSIONS]) {
+		struct nft_expr *expr;
+		struct nlattr *tmp;
+		int left;
+
+		i = 0;
+		nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) {
+			if (i == NFT_SET_EXPR_MAX) {
+				err = -E2BIG;
+				goto err_set_init;
+			}
+			if (nla_type(tmp) != NFTA_LIST_ELEM) {
+				err = -EINVAL;
+				goto err_set_init;
+			}
+			expr = nft_set_elem_expr_alloc(&ctx, set, tmp);
+			if (IS_ERR(expr)) {
+				err = PTR_ERR(expr);
+				goto err_set_init;
+			}
+			set->exprs[i++] = expr;
+			set->num_exprs++;
+		}
 	}
 
 	udata = NULL;
@@ -4540,6 +4576,7 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
 	[NFTA_SET_ELEM_OBJREF]		= { .type = NLA_STRING,
 					    .len = NFT_OBJ_MAXNAMELEN - 1 },
 	[NFTA_SET_ELEM_KEY_END]		= { .type = NLA_NESTED },
+	[NFTA_SET_ELEM_EXPRESSIONS]	= { .type = NLA_NESTED },
 };
 
 static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {
@@ -4580,6 +4617,7 @@ static int nft_set_elem_expr_dump(struct sk_buff *skb,
 	struct nft_set_elem_expr *elem_expr;
 	u32 size, num_exprs = 0;
 	struct nft_expr *expr;
+	struct nlattr *nest;
 
 	elem_expr = nft_set_ext_expr(ext);
 	nft_setelem_expr_foreach(expr, elem_expr, size)
@@ -4591,9 +4629,22 @@ static int nft_set_elem_expr_dump(struct sk_buff *skb,
 			return -1;
 
 		return 0;
-	}
+	} else if (num_exprs > 1) {
+		nest = nla_nest_start_noflag(skb, NFTA_SET_ELEM_EXPRESSIONS);
+		if (nest == NULL)
+			goto nla_put_failure;
 
+		nft_setelem_expr_foreach(expr, elem_expr, size) {
+			expr = nft_setelem_expr_at(elem_expr, size);
+			if (nft_expr_dump(skb, NFTA_LIST_ELEM, expr) < 0)
+				goto nla_put_failure;
+		}
+		nla_nest_end(skb, nest);
+	}
 	return 0;
+
+nla_put_failure:
+	return -1;
 }
 
 static int nf_tables_fill_setelem(struct sk_buff *skb,
@@ -5268,7 +5319,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 	      nla[NFTA_SET_ELEM_TIMEOUT] ||
 	      nla[NFTA_SET_ELEM_EXPIRATION] ||
 	      nla[NFTA_SET_ELEM_USERDATA] ||
-	      nla[NFTA_SET_ELEM_EXPR]))
+	      nla[NFTA_SET_ELEM_EXPR] ||
+	      nla[NFTA_SET_ELEM_EXPRESSIONS]))
 		return -EINVAL;
 
 	timeout = 0;
@@ -5310,6 +5362,41 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 			err = -EOPNOTSUPP;
 			goto err_set_elem_expr;
 		}
+	} else if (nla[NFTA_SET_ELEM_EXPRESSIONS]) {
+		struct nft_expr *expr;
+		struct nlattr *tmp;
+		int left;
+
+		if (set->num_exprs == 0)
+			return -EOPNOTSUPP;
+
+		i = 0;
+		nla_for_each_nested(tmp, nla[NFTA_SET_ELEM_EXPRESSIONS], left) {
+			if (i == set->num_exprs) {
+				err = -E2BIG;
+				goto err_set_elem_expr;
+			}
+			if (nla_type(tmp) != NFTA_LIST_ELEM) {
+				err = -EINVAL;
+				goto err_set_elem_expr;
+			}
+			expr = nft_set_elem_expr_alloc(ctx, set, tmp);
+			if (IS_ERR(expr)) {
+				err = PTR_ERR(expr);
+				goto err_set_elem_expr;
+			}
+			expr_array[i] = expr;
+
+			if (expr->ops != set->exprs[i]->ops) {
+				err = -EOPNOTSUPP;
+				goto err_set_elem_expr;
+			}
+			i++;
+		}
+		if (set->num_exprs != i) {
+			err = -EOPNOTSUPP;
+			goto err_set_elem_expr;
+		}
 	} else if (set->num_exprs > 0) {
 		err = nft_set_elem_expr_clone(ctx, set, expr_array);
 		if (err < 0)
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index d9e609b2e5d4..13c426d5dcf9 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -153,6 +153,7 @@ static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = {
 	[NFTA_DYNSET_TIMEOUT]	= { .type = NLA_U64 },
 	[NFTA_DYNSET_EXPR]	= { .type = NLA_NESTED },
 	[NFTA_DYNSET_FLAGS]	= { .type = NLA_U32 },
+	[NFTA_DYNSET_EXPRESSIONS] = { .type = NLA_NESTED },
 };
 
 static int nft_dynset_init(const struct nft_ctx *ctx,
@@ -232,12 +233,13 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
 	} else if (set->flags & NFT_SET_MAP)
 		return -EINVAL;
 
+	if ((tb[NFTA_DYNSET_EXPR] || tb[NFTA_DYNSET_EXPRESSIONS]) &&
+	    !(set->flags & NFT_SET_EVAL))
+		return -EINVAL;
+
 	if (tb[NFTA_DYNSET_EXPR]) {
 		struct nft_expr *dynset_expr;
 
-		if (!(set->flags & NFT_SET_EVAL))
-			return -EINVAL;
-
 		dynset_expr = nft_dynset_expr_alloc(ctx, set,
 						    tb[NFTA_DYNSET_EXPR], 0);
 		if (IS_ERR(dynset_expr))
@@ -252,6 +254,40 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
 			err = -EOPNOTSUPP;
 			goto err_expr_free;
 		}
+	} else if (tb[NFTA_DYNSET_EXPRESSIONS]) {
+		struct nft_expr *dynset_expr;
+		struct nlattr *tmp;
+		int left;
+
+		i = 0;
+		nla_for_each_nested(tmp, tb[NFTA_DYNSET_EXPRESSIONS], left) {
+			if (i == NFT_SET_EXPR_MAX) {
+				err = -E2BIG;
+				goto err_expr_free;
+			}
+			if (nla_type(tmp) != NFTA_LIST_ELEM) {
+				err = -EINVAL;
+				goto err_expr_free;
+			}
+			dynset_expr = nft_dynset_expr_alloc(ctx, set, tmp, i);
+			if (IS_ERR(dynset_expr)) {
+				err = PTR_ERR(dynset_expr);
+				goto err_expr_free;
+			}
+			priv->expr_array[i] = dynset_expr;
+			priv->num_exprs++;
+
+			if (set->num_exprs &&
+			    dynset_expr->ops != set->exprs[i]->ops) {
+				err = -EOPNOTSUPP;
+				goto err_expr_free;
+			}
+			i++;
+		}
+		if (set->num_exprs && set->num_exprs != i) {
+			err = -EOPNOTSUPP;
+			goto err_expr_free;
+		}
 	}
 
 	nft_set_ext_prepare(&priv->tmpl);
@@ -318,6 +354,7 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	const struct nft_dynset *priv = nft_expr_priv(expr);
 	u32 flags = priv->invert ? NFT_DYNSET_F_INV : 0;
+	int i;
 
 	if (nft_dump_register(skb, NFTA_DYNSET_SREG_KEY, priv->sreg_key))
 		goto nla_put_failure;
@@ -335,6 +372,19 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
 	if (priv->num_exprs == 1) {
 		if (nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr_array[0]))
 			goto nla_put_failure;
+	} else if (priv->num_exprs > 1) {
+		struct nlattr *nest;
+
+		nest = nla_nest_start_noflag(skb, NFTA_DYNSET_EXPRESSIONS);
+		if (!nest)
+			goto nla_put_failure;
+
+		for (i = 0; i < priv->num_exprs; i++) {
+			if (nft_expr_dump(skb, NFTA_LIST_ELEM,
+					  priv->expr_array[i]))
+				goto nla_put_failure;
+		}
+		nla_nest_end(skb, nest);
 	}
 	if (nla_put_be32(skb, NFTA_DYNSET_FLAGS, htonl(flags)))
 		goto nla_put_failure;
-- 
cgit v1.2.3


From 06f08dab3ca726b86431889495c45049616d6a15 Mon Sep 17 00:00:00 2001
From: Samuel Cabrero <scabrero@suse.de>
Date: Mon, 30 Nov 2020 19:02:49 +0100
Subject: cifs: Register generic netlink family

Register a new generic netlink family to talk to the witness service
userspace daemon.

Signed-off-by: Samuel Cabrero <scabrero@suse.de>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/Kconfig                        | 11 ++++++
 fs/cifs/Makefile                       |  2 +
 fs/cifs/cifsfs.c                       | 17 ++++++++-
 fs/cifs/netlink.c                      | 69 ++++++++++++++++++++++++++++++++++
 fs/cifs/netlink.h                      | 16 ++++++++
 include/uapi/linux/cifs/cifs_netlink.h | 31 +++++++++++++++
 6 files changed, 145 insertions(+), 1 deletion(-)
 create mode 100644 fs/cifs/netlink.c
 create mode 100644 fs/cifs/netlink.h
 create mode 100644 include/uapi/linux/cifs/cifs_netlink.h

(limited to 'include/uapi')

diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 604f65f4b6c5..664ac5c63d39 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -190,6 +190,17 @@ config CIFS_DFS_UPCALL
 	  servers if their addresses change or for implicit mounts of
 	  DFS junction points. If unsure, say Y.
 
+config CIFS_SWN_UPCALL
+	bool "SWN feature support"
+	depends on CIFS
+	help
+	  The Service Witness Protocol (SWN) is used to get notifications
+	  from a highly available server of resource state changes. This
+	  feature enables an upcall mechanism for CIFS which contacts an
+	  userspace daemon to establish the DCE/RPC connection to retrieve
+	  the cluster available interfaces and resource change notifications.
+	  If unsure, say Y.
+
 config CIFS_NFSD_EXPORT
 	bool "Allow nfsd to export CIFS file system"
 	depends on CIFS && BROKEN
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 848ebad6af7d..9e398d227b0e 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -18,6 +18,8 @@ cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o
 
 cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o dfs_cache.o
 
+cifs-$(CONFIG_CIFS_SWN_UPCALL) += netlink.o
+
 cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o
 
 cifs-$(CONFIG_CIFS_SMB_DIRECT) += smbdirect.o
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 4f27f77d3053..5d32561ae2ed 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -55,6 +55,9 @@
 #ifdef CONFIG_CIFS_DFS_UPCALL
 #include "dfs_cache.h"
 #endif
+#ifdef CONFIG_CIFS_SWN_UPCALL
+#include "netlink.h"
+#endif
 #include "fs_context.h"
 
 /*
@@ -1601,10 +1604,15 @@ init_cifs(void)
 	if (rc)
 		goto out_destroy_dfs_cache;
 #endif /* CONFIG_CIFS_UPCALL */
+#ifdef CONFIG_CIFS_SWN_UPCALL
+	rc = cifs_genl_init();
+	if (rc)
+		goto out_register_key_type;
+#endif /* CONFIG_CIFS_SWN_UPCALL */
 
 	rc = init_cifs_idmap();
 	if (rc)
-		goto out_register_key_type;
+		goto out_cifs_swn_init;
 
 	rc = register_filesystem(&cifs_fs_type);
 	if (rc)
@@ -1620,7 +1628,11 @@ init_cifs(void)
 
 out_init_cifs_idmap:
 	exit_cifs_idmap();
+out_cifs_swn_init:
+#ifdef CONFIG_CIFS_SWN_UPCALL
+	cifs_genl_exit();
 out_register_key_type:
+#endif
 #ifdef CONFIG_CIFS_UPCALL
 	exit_cifs_spnego();
 out_destroy_dfs_cache:
@@ -1657,6 +1669,9 @@ exit_cifs(void)
 	unregister_filesystem(&smb3_fs_type);
 	cifs_dfs_release_automount_timer();
 	exit_cifs_idmap();
+#ifdef CONFIG_CIFS_SWN_UPCALL
+	cifs_genl_exit();
+#endif
 #ifdef CONFIG_CIFS_UPCALL
 	exit_cifs_spnego();
 #endif
diff --git a/fs/cifs/netlink.c b/fs/cifs/netlink.c
new file mode 100644
index 000000000000..b9154661fa85
--- /dev/null
+++ b/fs/cifs/netlink.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Netlink routines for CIFS
+ *
+ * Copyright (c) 2020 Samuel Cabrero <scabrero@suse.de>
+ */
+
+#include <net/genetlink.h>
+#include <uapi/linux/cifs/cifs_netlink.h>
+
+#include "netlink.h"
+#include "cifsglob.h"
+#include "cifs_debug.h"
+
+static const struct nla_policy cifs_genl_policy[CIFS_GENL_ATTR_MAX + 1] = {
+};
+
+static struct genl_ops cifs_genl_ops[] = {
+};
+
+static const struct genl_multicast_group cifs_genl_mcgrps[] = {
+	[CIFS_GENL_MCGRP_SWN] = { .name = CIFS_GENL_MCGRP_SWN_NAME },
+};
+
+struct genl_family cifs_genl_family = {
+	.name		= CIFS_GENL_NAME,
+	.version	= CIFS_GENL_VERSION,
+	.hdrsize	= 0,
+	.maxattr	= CIFS_GENL_ATTR_MAX,
+	.module		= THIS_MODULE,
+	.policy		= cifs_genl_policy,
+	.ops		= cifs_genl_ops,
+	.n_ops		= ARRAY_SIZE(cifs_genl_ops),
+	.mcgrps		= cifs_genl_mcgrps,
+	.n_mcgrps	= ARRAY_SIZE(cifs_genl_mcgrps),
+};
+
+/**
+ * cifs_genl_init - Register generic netlink family
+ *
+ * Return zero if initialized successfully, otherwise non-zero.
+ */
+int cifs_genl_init(void)
+{
+	int ret;
+
+	ret = genl_register_family(&cifs_genl_family);
+	if (ret < 0) {
+		cifs_dbg(VFS, "%s: failed to register netlink family\n",
+				__func__);
+		return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * cifs_genl_exit - Unregister generic netlink family
+ */
+void cifs_genl_exit(void)
+{
+	int ret;
+
+	ret = genl_unregister_family(&cifs_genl_family);
+	if (ret < 0) {
+		cifs_dbg(VFS, "%s: failed to unregister netlink family\n",
+				__func__);
+	}
+}
diff --git a/fs/cifs/netlink.h b/fs/cifs/netlink.h
new file mode 100644
index 000000000000..e2fa8ed24c54
--- /dev/null
+++ b/fs/cifs/netlink.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Netlink routines for CIFS
+ *
+ * Copyright (c) 2020 Samuel Cabrero <scabrero@suse.de>
+ */
+
+#ifndef _CIFS_NETLINK_H
+#define _CIFS_NETLINK_H
+
+extern struct genl_family cifs_genl_family;
+
+extern int cifs_genl_init(void);
+extern void cifs_genl_exit(void);
+
+#endif /* _CIFS_NETLINK_H */
diff --git a/include/uapi/linux/cifs/cifs_netlink.h b/include/uapi/linux/cifs/cifs_netlink.h
new file mode 100644
index 000000000000..cdb1bd78fbc7
--- /dev/null
+++ b/include/uapi/linux/cifs/cifs_netlink.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: LGPL-2.1+ WITH Linux-syscall-note */
+/*
+ * Netlink routines for CIFS
+ *
+ * Copyright (c) 2020 Samuel Cabrero <scabrero@suse.de>
+ */
+
+
+#ifndef _UAPILINUX_CIFS_NETLINK_H
+#define _UAPILINUX_CIFS_NETLINK_H
+
+#define CIFS_GENL_NAME			"cifs"
+#define CIFS_GENL_VERSION		0x1
+
+#define CIFS_GENL_MCGRP_SWN_NAME	"cifs_mcgrp_swn"
+
+enum cifs_genl_multicast_groups {
+	CIFS_GENL_MCGRP_SWN,
+};
+
+enum cifs_genl_attributes {
+	__CIFS_GENL_ATTR_MAX,
+};
+#define CIFS_GENL_ATTR_MAX (__CIFS_GENL_ATTR_MAX - 1)
+
+enum cifs_genl_commands {
+	__CIFS_GENL_CMD_MAX
+};
+#define CIFS_GENL_CMD_MAX (__CIFS_GENL_CMD_MAX - 1)
+
+#endif /* _UAPILINUX_CIFS_NETLINK_H */
-- 
cgit v1.2.3


From bf80e5d4259a192d6b06ae17e79a5e9dab48bf51 Mon Sep 17 00:00:00 2001
From: Samuel Cabrero <scabrero@suse.de>
Date: Mon, 30 Nov 2020 19:02:51 +0100
Subject: cifs: Send witness register and unregister commands to userspace
 daemon

+ Define the generic netlink family commands and message attributes to
  communicate with the userspace daemon

+ The register and unregister commands are sent when connecting or
  disconnecting a tree. The witness registration keeps a pointer to
  the tcon and has the same lifetime.

+ Each registration has an id allocated by an IDR. This id is sent to the
  userspace daemon in the register command, and will be included in the
  notification messages from the userspace daemon to retrieve from the
  IDR the matching registration.

+ The authentication information is bundled in the register message.
  If kerberos is used the message just carries a flag.

Signed-off-by: Samuel Cabrero <scabrero@suse.de>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/Makefile                       |   2 +-
 fs/cifs/cifs_swn.c                     | 421 +++++++++++++++++++++++++++++++++
 fs/cifs/cifs_swn.h                     |  17 ++
 fs/cifs/connect.c                      |  26 +-
 fs/cifs/netlink.c                      |  11 +
 include/uapi/linux/cifs/cifs_netlink.h |  15 ++
 6 files changed, 489 insertions(+), 3 deletions(-)
 create mode 100644 fs/cifs/cifs_swn.c
 create mode 100644 fs/cifs/cifs_swn.h

(limited to 'include/uapi')

diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 9e398d227b0e..5213b20843b5 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -18,7 +18,7 @@ cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o
 
 cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o dfs_cache.o
 
-cifs-$(CONFIG_CIFS_SWN_UPCALL) += netlink.o
+cifs-$(CONFIG_CIFS_SWN_UPCALL) += netlink.o cifs_swn.o
 
 cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o
 
diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c
new file mode 100644
index 000000000000..c0af03955d0c
--- /dev/null
+++ b/fs/cifs/cifs_swn.c
@@ -0,0 +1,421 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Witness Service client for CIFS
+ *
+ * Copyright (c) 2020 Samuel Cabrero <scabrero@suse.de>
+ */
+
+#include <linux/kref.h>
+#include <net/genetlink.h>
+#include <uapi/linux/cifs/cifs_netlink.h>
+
+#include "cifs_swn.h"
+#include "cifsglob.h"
+#include "cifsproto.h"
+#include "fscache.h"
+#include "cifs_debug.h"
+#include "netlink.h"
+
+static DEFINE_IDR(cifs_swnreg_idr);
+static DEFINE_MUTEX(cifs_swnreg_idr_mutex);
+
+struct cifs_swn_reg {
+	int id;
+	struct kref ref_count;
+
+	const char *net_name;
+	const char *share_name;
+	bool net_name_notify;
+	bool share_name_notify;
+	bool ip_notify;
+
+	struct cifs_tcon *tcon;
+};
+
+static int cifs_swn_auth_info_krb(struct cifs_tcon *tcon, struct sk_buff *skb)
+{
+	int ret;
+
+	ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_KRB_AUTH);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int cifs_swn_auth_info_ntlm(struct cifs_tcon *tcon, struct sk_buff *skb)
+{
+	int ret;
+
+	if (tcon->ses->user_name != NULL) {
+		ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_USER_NAME, tcon->ses->user_name);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (tcon->ses->password != NULL) {
+		ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_PASSWORD, tcon->ses->password);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (tcon->ses->domainName != NULL) {
+		ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_DOMAIN_NAME, tcon->ses->domainName);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * Sends a register message to the userspace daemon based on the registration.
+ * The authentication information to connect to the witness service is bundled
+ * into the message.
+ */
+static int cifs_swn_send_register_message(struct cifs_swn_reg *swnreg)
+{
+	struct sk_buff *skb;
+	struct genlmsghdr *hdr;
+	enum securityEnum authtype;
+	int ret;
+
+	skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (skb == NULL) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	hdr = genlmsg_put(skb, 0, 0, &cifs_genl_family, 0, CIFS_GENL_CMD_SWN_REGISTER);
+	if (hdr == NULL) {
+		ret = -ENOMEM;
+		goto nlmsg_fail;
+	}
+
+	ret = nla_put_u32(skb, CIFS_GENL_ATTR_SWN_REGISTRATION_ID, swnreg->id);
+	if (ret < 0)
+		goto nlmsg_fail;
+
+	ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_NET_NAME, swnreg->net_name);
+	if (ret < 0)
+		goto nlmsg_fail;
+
+	ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_SHARE_NAME, swnreg->share_name);
+	if (ret < 0)
+		goto nlmsg_fail;
+
+	ret = nla_put(skb, CIFS_GENL_ATTR_SWN_IP, sizeof(struct sockaddr_storage),
+			&swnreg->tcon->ses->server->dstaddr);
+	if (ret < 0)
+		goto nlmsg_fail;
+
+	if (swnreg->net_name_notify) {
+		ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_NET_NAME_NOTIFY);
+		if (ret < 0)
+			goto nlmsg_fail;
+	}
+
+	if (swnreg->share_name_notify) {
+		ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_SHARE_NAME_NOTIFY);
+		if (ret < 0)
+			goto nlmsg_fail;
+	}
+
+	if (swnreg->ip_notify) {
+		ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_IP_NOTIFY);
+		if (ret < 0)
+			goto nlmsg_fail;
+	}
+
+	authtype = cifs_select_sectype(swnreg->tcon->ses->server, swnreg->tcon->ses->sectype);
+	switch (authtype) {
+	case Kerberos:
+		ret = cifs_swn_auth_info_krb(swnreg->tcon, skb);
+		if (ret < 0) {
+			cifs_dbg(VFS, "%s: Failed to get kerberos auth info: %d\n", __func__, ret);
+			goto nlmsg_fail;
+		}
+		break;
+	case LANMAN:
+	case NTLM:
+	case NTLMv2:
+	case RawNTLMSSP:
+		ret = cifs_swn_auth_info_ntlm(swnreg->tcon, skb);
+		if (ret < 0) {
+			cifs_dbg(VFS, "%s: Failed to get NTLM auth info: %d\n", __func__, ret);
+			goto nlmsg_fail;
+		}
+		break;
+	default:
+		cifs_dbg(VFS, "%s: secType %d not supported!\n", __func__, authtype);
+		ret = -EINVAL;
+		goto nlmsg_fail;
+	}
+
+	genlmsg_end(skb, hdr);
+	genlmsg_multicast(&cifs_genl_family, skb, 0, CIFS_GENL_MCGRP_SWN, GFP_ATOMIC);
+
+	cifs_dbg(FYI, "%s: Message to register for network name %s with id %d sent\n", __func__,
+			swnreg->net_name, swnreg->id);
+
+	return 0;
+
+nlmsg_fail:
+	genlmsg_cancel(skb, hdr);
+	nlmsg_free(skb);
+fail:
+	return ret;
+}
+
+/*
+ * Sends an uregister message to the userspace daemon based on the registration
+ */
+static int cifs_swn_send_unregister_message(struct cifs_swn_reg *swnreg)
+{
+	struct sk_buff *skb;
+	struct genlmsghdr *hdr;
+	int ret;
+
+	skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(skb, 0, 0, &cifs_genl_family, 0, CIFS_GENL_CMD_SWN_UNREGISTER);
+	if (hdr == NULL) {
+		ret = -ENOMEM;
+		goto nlmsg_fail;
+	}
+
+	ret = nla_put_u32(skb, CIFS_GENL_ATTR_SWN_REGISTRATION_ID, swnreg->id);
+	if (ret < 0)
+		goto nlmsg_fail;
+
+	ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_NET_NAME, swnreg->net_name);
+	if (ret < 0)
+		goto nlmsg_fail;
+
+	ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_SHARE_NAME, swnreg->share_name);
+	if (ret < 0)
+		goto nlmsg_fail;
+
+	ret = nla_put(skb, CIFS_GENL_ATTR_SWN_IP, sizeof(struct sockaddr_storage),
+			&swnreg->tcon->ses->server->dstaddr);
+	if (ret < 0)
+		goto nlmsg_fail;
+
+	if (swnreg->net_name_notify) {
+		ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_NET_NAME_NOTIFY);
+		if (ret < 0)
+			goto nlmsg_fail;
+	}
+
+	if (swnreg->share_name_notify) {
+		ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_SHARE_NAME_NOTIFY);
+		if (ret < 0)
+			goto nlmsg_fail;
+	}
+
+	if (swnreg->ip_notify) {
+		ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_IP_NOTIFY);
+		if (ret < 0)
+			goto nlmsg_fail;
+	}
+
+	genlmsg_end(skb, hdr);
+	genlmsg_multicast(&cifs_genl_family, skb, 0, CIFS_GENL_MCGRP_SWN, GFP_ATOMIC);
+
+	cifs_dbg(FYI, "%s: Message to unregister for network name %s with id %d sent\n", __func__,
+			swnreg->net_name, swnreg->id);
+
+	return 0;
+
+nlmsg_fail:
+	genlmsg_cancel(skb, hdr);
+	nlmsg_free(skb);
+	return ret;
+}
+
+/*
+ * Try to find a matching registration for the tcon's server name and share name.
+ * Calls to this funciton must be protected by cifs_swnreg_idr_mutex.
+ * TODO Try to avoid memory allocations
+ */
+static struct cifs_swn_reg *cifs_find_swn_reg(struct cifs_tcon *tcon)
+{
+	struct cifs_swn_reg *swnreg;
+	int id;
+	const char *share_name;
+	const char *net_name;
+
+	net_name = extract_hostname(tcon->treeName);
+	if (IS_ERR_OR_NULL(net_name)) {
+		int ret;
+
+		ret = PTR_ERR(net_name);
+		cifs_dbg(VFS, "%s: failed to extract host name from target '%s': %d\n",
+				__func__, tcon->treeName, ret);
+		return NULL;
+	}
+
+	share_name = extract_sharename(tcon->treeName);
+	if (IS_ERR_OR_NULL(share_name)) {
+		int ret;
+
+		ret = PTR_ERR(net_name);
+		cifs_dbg(VFS, "%s: failed to extract share name from target '%s': %d\n",
+				__func__, tcon->treeName, ret);
+		kfree(net_name);
+		return NULL;
+	}
+
+	idr_for_each_entry(&cifs_swnreg_idr, swnreg, id) {
+		if (strcasecmp(swnreg->net_name, net_name) != 0
+		    || strcasecmp(swnreg->share_name, share_name) != 0) {
+			continue;
+		}
+
+		mutex_unlock(&cifs_swnreg_idr_mutex);
+
+		cifs_dbg(FYI, "Existing swn registration for %s:%s found\n", swnreg->net_name,
+				swnreg->share_name);
+
+		kfree(net_name);
+		kfree(share_name);
+
+		return swnreg;
+	}
+
+	kfree(net_name);
+	kfree(share_name);
+
+	return NULL;
+}
+
+/*
+ * Get a registration for the tcon's server and share name, allocating a new one if it does not
+ * exists
+ */
+static struct cifs_swn_reg *cifs_get_swn_reg(struct cifs_tcon *tcon)
+{
+	struct cifs_swn_reg *reg = NULL;
+	int ret;
+
+	mutex_lock(&cifs_swnreg_idr_mutex);
+
+	/* Check if we are already registered for this network and share names */
+	reg = cifs_find_swn_reg(tcon);
+	if (IS_ERR(reg)) {
+		return reg;
+	} else if (reg != NULL) {
+		kref_get(&reg->ref_count);
+		mutex_unlock(&cifs_swnreg_idr_mutex);
+		return reg;
+	}
+
+	reg = kmalloc(sizeof(struct cifs_swn_reg), GFP_ATOMIC);
+	if (reg == NULL) {
+		mutex_unlock(&cifs_swnreg_idr_mutex);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	kref_init(&reg->ref_count);
+
+	reg->id = idr_alloc(&cifs_swnreg_idr, reg, 1, 0, GFP_ATOMIC);
+	if (reg->id < 0) {
+		cifs_dbg(FYI, "%s: failed to allocate registration id\n", __func__);
+		ret = reg->id;
+		goto fail;
+	}
+
+	reg->net_name = extract_hostname(tcon->treeName);
+	if (IS_ERR(reg->net_name)) {
+		ret = PTR_ERR(reg->net_name);
+		cifs_dbg(VFS, "%s: failed to extract host name from target: %d\n", __func__, ret);
+		goto fail_idr;
+	}
+
+	reg->share_name = extract_sharename(tcon->treeName);
+	if (IS_ERR(reg->share_name)) {
+		ret = PTR_ERR(reg->share_name);
+		cifs_dbg(VFS, "%s: failed to extract share name from target: %d\n", __func__, ret);
+		goto fail_net_name;
+	}
+
+	reg->net_name_notify = true;
+	reg->share_name_notify = true;
+	reg->ip_notify = (tcon->capabilities & SMB2_SHARE_CAP_SCALEOUT);
+
+	reg->tcon = tcon;
+
+	mutex_unlock(&cifs_swnreg_idr_mutex);
+
+	return reg;
+
+fail_net_name:
+	kfree(reg->net_name);
+fail_idr:
+	idr_remove(&cifs_swnreg_idr, reg->id);
+fail:
+	kfree(reg);
+	mutex_unlock(&cifs_swnreg_idr_mutex);
+	return ERR_PTR(ret);
+}
+
+static void cifs_swn_reg_release(struct kref *ref)
+{
+	struct cifs_swn_reg *swnreg = container_of(ref, struct cifs_swn_reg, ref_count);
+	int ret;
+
+	ret = cifs_swn_send_unregister_message(swnreg);
+	if (ret < 0)
+		cifs_dbg(VFS, "%s: Failed to send unregister message: %d\n", __func__, ret);
+
+	idr_remove(&cifs_swnreg_idr, swnreg->id);
+	kfree(swnreg->net_name);
+	kfree(swnreg->share_name);
+	kfree(swnreg);
+}
+
+static void cifs_put_swn_reg(struct cifs_swn_reg *swnreg)
+{
+	mutex_lock(&cifs_swnreg_idr_mutex);
+	kref_put(&swnreg->ref_count, cifs_swn_reg_release);
+	mutex_unlock(&cifs_swnreg_idr_mutex);
+}
+
+int cifs_swn_register(struct cifs_tcon *tcon)
+{
+	struct cifs_swn_reg *swnreg;
+	int ret;
+
+	swnreg = cifs_get_swn_reg(tcon);
+	if (IS_ERR(swnreg))
+		return PTR_ERR(swnreg);
+
+	ret = cifs_swn_send_register_message(swnreg);
+	if (ret < 0) {
+		cifs_dbg(VFS, "%s: Failed to send swn register message: %d\n", __func__, ret);
+		/* Do not put the swnreg or return error, the echo task will retry */
+	}
+
+	return 0;
+}
+
+int cifs_swn_unregister(struct cifs_tcon *tcon)
+{
+	struct cifs_swn_reg *swnreg;
+
+	mutex_lock(&cifs_swnreg_idr_mutex);
+
+	swnreg = cifs_find_swn_reg(tcon);
+	if (swnreg == NULL) {
+		mutex_unlock(&cifs_swnreg_idr_mutex);
+		return -EEXIST;
+	}
+
+	mutex_unlock(&cifs_swnreg_idr_mutex);
+
+	cifs_put_swn_reg(swnreg);
+
+	return 0;
+}
diff --git a/fs/cifs/cifs_swn.h b/fs/cifs/cifs_swn.h
new file mode 100644
index 000000000000..69c7bd1035da
--- /dev/null
+++ b/fs/cifs/cifs_swn.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Witness Service client for CIFS
+ *
+ * Copyright (c) 2020 Samuel Cabrero <scabrero@suse.de>
+ */
+
+#ifndef _CIFS_SWN_H
+#define _CIFS_SWN_H
+
+struct cifs_tcon;
+
+extern int cifs_swn_register(struct cifs_tcon *tcon);
+
+extern int cifs_swn_unregister(struct cifs_tcon *tcon);
+
+#endif /* _CIFS_SWN_H */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index ead1c086b88d..68ef2da7c74b 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -62,6 +62,9 @@
 #include "dfs_cache.h"
 #endif
 #include "fs_context.h"
+#ifdef CONFIG_CIFS_SWN_UPCALL
+#include "cifs_swn.h"
+#endif
 
 extern mempool_t *cifs_req_poolp;
 extern bool disable_legacy_dialects;
@@ -1944,7 +1947,17 @@ cifs_put_tcon(struct cifs_tcon *tcon)
 		return;
 	}
 
-	/* TODO witness unregister */
+#ifdef CONFIG_CIFS_SWN_UPCALL
+	if (tcon->use_witness) {
+		int rc;
+
+		rc = cifs_swn_unregister(tcon);
+		if (rc < 0) {
+			cifs_dbg(VFS, "%s: Failed to unregister for witness notifications: %d\n",
+					__func__, rc);
+		}
+	}
+#endif
 
 	list_del_init(&tcon->tcon_list);
 	spin_unlock(&cifs_tcp_ses_lock);
@@ -2111,8 +2124,17 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
 	if (ctx->witness) {
 		if (ses->server->vals->protocol_id >= SMB30_PROT_ID) {
 			if (tcon->capabilities & SMB2_SHARE_CAP_CLUSTER) {
-				/* TODO witness register */
+				/*
+				 * Set witness in use flag in first place
+				 * to retry registration in the echo task
+				 */
 				tcon->use_witness = true;
+				/* And try to register immediately */
+				rc = cifs_swn_register(tcon);
+				if (rc < 0) {
+					cifs_dbg(VFS, "Failed to register for witness notifications: %d\n", rc);
+					goto out_fail;
+				}
 			} else {
 				/* TODO: try to extend for non-cluster uses (eg multichannel) */
 				cifs_dbg(VFS, "witness requested on mount but no CLUSTER capability on share\n");
diff --git a/fs/cifs/netlink.c b/fs/cifs/netlink.c
index b9154661fa85..83008a56def5 100644
--- a/fs/cifs/netlink.c
+++ b/fs/cifs/netlink.c
@@ -13,6 +13,17 @@
 #include "cifs_debug.h"
 
 static const struct nla_policy cifs_genl_policy[CIFS_GENL_ATTR_MAX + 1] = {
+	[CIFS_GENL_ATTR_SWN_REGISTRATION_ID]	= { .type = NLA_U32 },
+	[CIFS_GENL_ATTR_SWN_NET_NAME]		= { .type = NLA_STRING },
+	[CIFS_GENL_ATTR_SWN_SHARE_NAME]		= { .type = NLA_STRING },
+	[CIFS_GENL_ATTR_SWN_IP]			= { .len = sizeof(struct sockaddr_storage) },
+	[CIFS_GENL_ATTR_SWN_NET_NAME_NOTIFY]	= { .type = NLA_FLAG },
+	[CIFS_GENL_ATTR_SWN_SHARE_NAME_NOTIFY]	= { .type = NLA_FLAG },
+	[CIFS_GENL_ATTR_SWN_IP_NOTIFY]		= { .type = NLA_FLAG },
+	[CIFS_GENL_ATTR_SWN_KRB_AUTH]		= { .type = NLA_FLAG },
+	[CIFS_GENL_ATTR_SWN_USER_NAME]		= { .type = NLA_STRING },
+	[CIFS_GENL_ATTR_SWN_PASSWORD]		= { .type = NLA_STRING },
+	[CIFS_GENL_ATTR_SWN_DOMAIN_NAME]	= { .type = NLA_STRING },
 };
 
 static struct genl_ops cifs_genl_ops[] = {
diff --git a/include/uapi/linux/cifs/cifs_netlink.h b/include/uapi/linux/cifs/cifs_netlink.h
index cdb1bd78fbc7..5662e2774513 100644
--- a/include/uapi/linux/cifs/cifs_netlink.h
+++ b/include/uapi/linux/cifs/cifs_netlink.h
@@ -19,11 +19,26 @@ enum cifs_genl_multicast_groups {
 };
 
 enum cifs_genl_attributes {
+	CIFS_GENL_ATTR_UNSPEC,
+	CIFS_GENL_ATTR_SWN_REGISTRATION_ID,
+	CIFS_GENL_ATTR_SWN_NET_NAME,
+	CIFS_GENL_ATTR_SWN_SHARE_NAME,
+	CIFS_GENL_ATTR_SWN_IP,
+	CIFS_GENL_ATTR_SWN_NET_NAME_NOTIFY,
+	CIFS_GENL_ATTR_SWN_SHARE_NAME_NOTIFY,
+	CIFS_GENL_ATTR_SWN_IP_NOTIFY,
+	CIFS_GENL_ATTR_SWN_KRB_AUTH,
+	CIFS_GENL_ATTR_SWN_USER_NAME,
+	CIFS_GENL_ATTR_SWN_PASSWORD,
+	CIFS_GENL_ATTR_SWN_DOMAIN_NAME,
 	__CIFS_GENL_ATTR_MAX,
 };
 #define CIFS_GENL_ATTR_MAX (__CIFS_GENL_ATTR_MAX - 1)
 
 enum cifs_genl_commands {
+	CIFS_GENL_CMD_UNSPEC,
+	CIFS_GENL_CMD_SWN_REGISTER,
+	CIFS_GENL_CMD_SWN_UNREGISTER,
 	__CIFS_GENL_CMD_MAX
 };
 #define CIFS_GENL_CMD_MAX (__CIFS_GENL_CMD_MAX - 1)
-- 
cgit v1.2.3


From fed979a7e082bd9f25f9002c3c4f8740dacd0bc8 Mon Sep 17 00:00:00 2001
From: Samuel Cabrero <scabrero@suse.de>
Date: Mon, 30 Nov 2020 19:02:52 +0100
Subject: cifs: Set witness notification handler for messages from userspace
 daemon

+ Set a handler for the witness notification messages received from the
  userspace daemon.

+ Handle the resource state change notification. When the resource
  becomes unavailable or available set the tcp status to
  CifsNeedReconnect for all channels.

Signed-off-by: Samuel Cabrero <scabrero@suse.de>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifs_swn.c                     | 86 ++++++++++++++++++++++++++++++++++
 fs/cifs/cifs_swn.h                     |  4 ++
 fs/cifs/netlink.c                      |  9 ++++
 include/uapi/linux/cifs/cifs_netlink.h | 17 +++++++
 4 files changed, 116 insertions(+)

(limited to 'include/uapi')

diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c
index c0af03955d0c..63b0764af5d5 100644
--- a/fs/cifs/cifs_swn.c
+++ b/fs/cifs/cifs_swn.c
@@ -383,6 +383,92 @@ static void cifs_put_swn_reg(struct cifs_swn_reg *swnreg)
 	mutex_unlock(&cifs_swnreg_idr_mutex);
 }
 
+static int cifs_swn_resource_state_changed(struct cifs_swn_reg *swnreg, const char *name, int state)
+{
+	int i;
+
+	switch (state) {
+	case CIFS_SWN_RESOURCE_STATE_UNAVAILABLE:
+		cifs_dbg(FYI, "%s: resource name '%s' become unavailable\n", __func__, name);
+		for (i = 0; i < swnreg->tcon->ses->chan_count; i++) {
+			spin_lock(&GlobalMid_Lock);
+			if (swnreg->tcon->ses->chans[i].server->tcpStatus != CifsExiting)
+				swnreg->tcon->ses->chans[i].server->tcpStatus = CifsNeedReconnect;
+			spin_unlock(&GlobalMid_Lock);
+		}
+		break;
+	case CIFS_SWN_RESOURCE_STATE_AVAILABLE:
+		cifs_dbg(FYI, "%s: resource name '%s' become available\n", __func__, name);
+		for (i = 0; i < swnreg->tcon->ses->chan_count; i++) {
+			spin_lock(&GlobalMid_Lock);
+			if (swnreg->tcon->ses->chans[i].server->tcpStatus != CifsExiting)
+				swnreg->tcon->ses->chans[i].server->tcpStatus = CifsNeedReconnect;
+			spin_unlock(&GlobalMid_Lock);
+		}
+		break;
+	case CIFS_SWN_RESOURCE_STATE_UNKNOWN:
+		cifs_dbg(FYI, "%s: resource name '%s' changed to unknown state\n", __func__, name);
+		break;
+	}
+	return 0;
+}
+
+int cifs_swn_notify(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cifs_swn_reg *swnreg;
+	char name[256];
+	int type;
+
+	if (info->attrs[CIFS_GENL_ATTR_SWN_REGISTRATION_ID]) {
+		int swnreg_id;
+
+		swnreg_id = nla_get_u32(info->attrs[CIFS_GENL_ATTR_SWN_REGISTRATION_ID]);
+		mutex_lock(&cifs_swnreg_idr_mutex);
+		swnreg = idr_find(&cifs_swnreg_idr, swnreg_id);
+		mutex_unlock(&cifs_swnreg_idr_mutex);
+		if (swnreg == NULL) {
+			cifs_dbg(FYI, "%s: registration id %d not found\n", __func__, swnreg_id);
+			return -EINVAL;
+		}
+	} else {
+		cifs_dbg(FYI, "%s: missing registration id attribute\n", __func__);
+		return -EINVAL;
+	}
+
+	if (info->attrs[CIFS_GENL_ATTR_SWN_NOTIFICATION_TYPE]) {
+		type = nla_get_u32(info->attrs[CIFS_GENL_ATTR_SWN_NOTIFICATION_TYPE]);
+	} else {
+		cifs_dbg(FYI, "%s: missing notification type attribute\n", __func__);
+		return -EINVAL;
+	}
+
+	switch (type) {
+	case CIFS_SWN_NOTIFICATION_RESOURCE_CHANGE: {
+		int state;
+
+		if (info->attrs[CIFS_GENL_ATTR_SWN_RESOURCE_NAME]) {
+			nla_strlcpy(name, info->attrs[CIFS_GENL_ATTR_SWN_RESOURCE_NAME],
+					sizeof(name));
+		} else {
+			cifs_dbg(FYI, "%s: missing resource name attribute\n", __func__);
+			return -EINVAL;
+		}
+		if (info->attrs[CIFS_GENL_ATTR_SWN_RESOURCE_STATE]) {
+			state = nla_get_u32(info->attrs[CIFS_GENL_ATTR_SWN_RESOURCE_STATE]);
+		} else {
+			cifs_dbg(FYI, "%s: missing resource state attribute\n", __func__);
+			return -EINVAL;
+		}
+		return cifs_swn_resource_state_changed(swnreg, name, state);
+	}
+	default:
+		cifs_dbg(FYI, "%s: unknown notification type %d\n", __func__, type);
+		break;
+	}
+
+	return 0;
+}
+
 int cifs_swn_register(struct cifs_tcon *tcon)
 {
 	struct cifs_swn_reg *swnreg;
diff --git a/fs/cifs/cifs_swn.h b/fs/cifs/cifs_swn.h
index 69c7bd1035da..7ef9ecedbd05 100644
--- a/fs/cifs/cifs_swn.h
+++ b/fs/cifs/cifs_swn.h
@@ -9,9 +9,13 @@
 #define _CIFS_SWN_H
 
 struct cifs_tcon;
+struct sk_buff;
+struct genl_info;
 
 extern int cifs_swn_register(struct cifs_tcon *tcon);
 
 extern int cifs_swn_unregister(struct cifs_tcon *tcon);
 
+extern int cifs_swn_notify(struct sk_buff *skb, struct genl_info *info);
+
 #endif /* _CIFS_SWN_H */
diff --git a/fs/cifs/netlink.c b/fs/cifs/netlink.c
index 83008a56def5..5aaabe4cc0a7 100644
--- a/fs/cifs/netlink.c
+++ b/fs/cifs/netlink.c
@@ -11,6 +11,7 @@
 #include "netlink.h"
 #include "cifsglob.h"
 #include "cifs_debug.h"
+#include "cifs_swn.h"
 
 static const struct nla_policy cifs_genl_policy[CIFS_GENL_ATTR_MAX + 1] = {
 	[CIFS_GENL_ATTR_SWN_REGISTRATION_ID]	= { .type = NLA_U32 },
@@ -24,9 +25,17 @@ static const struct nla_policy cifs_genl_policy[CIFS_GENL_ATTR_MAX + 1] = {
 	[CIFS_GENL_ATTR_SWN_USER_NAME]		= { .type = NLA_STRING },
 	[CIFS_GENL_ATTR_SWN_PASSWORD]		= { .type = NLA_STRING },
 	[CIFS_GENL_ATTR_SWN_DOMAIN_NAME]	= { .type = NLA_STRING },
+	[CIFS_GENL_ATTR_SWN_NOTIFICATION_TYPE]	= { .type = NLA_U32 },
+	[CIFS_GENL_ATTR_SWN_RESOURCE_STATE]	= { .type = NLA_U32 },
+	[CIFS_GENL_ATTR_SWN_RESOURCE_NAME]	= { .type = NLA_STRING},
 };
 
 static struct genl_ops cifs_genl_ops[] = {
+	{
+		.cmd = CIFS_GENL_CMD_SWN_NOTIFY,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = cifs_swn_notify,
+	},
 };
 
 static const struct genl_multicast_group cifs_genl_mcgrps[] = {
diff --git a/include/uapi/linux/cifs/cifs_netlink.h b/include/uapi/linux/cifs/cifs_netlink.h
index 5662e2774513..da3107582f49 100644
--- a/include/uapi/linux/cifs/cifs_netlink.h
+++ b/include/uapi/linux/cifs/cifs_netlink.h
@@ -31,6 +31,9 @@ enum cifs_genl_attributes {
 	CIFS_GENL_ATTR_SWN_USER_NAME,
 	CIFS_GENL_ATTR_SWN_PASSWORD,
 	CIFS_GENL_ATTR_SWN_DOMAIN_NAME,
+	CIFS_GENL_ATTR_SWN_NOTIFICATION_TYPE,
+	CIFS_GENL_ATTR_SWN_RESOURCE_STATE,
+	CIFS_GENL_ATTR_SWN_RESOURCE_NAME,
 	__CIFS_GENL_ATTR_MAX,
 };
 #define CIFS_GENL_ATTR_MAX (__CIFS_GENL_ATTR_MAX - 1)
@@ -39,8 +42,22 @@ enum cifs_genl_commands {
 	CIFS_GENL_CMD_UNSPEC,
 	CIFS_GENL_CMD_SWN_REGISTER,
 	CIFS_GENL_CMD_SWN_UNREGISTER,
+	CIFS_GENL_CMD_SWN_NOTIFY,
 	__CIFS_GENL_CMD_MAX
 };
 #define CIFS_GENL_CMD_MAX (__CIFS_GENL_CMD_MAX - 1)
 
+enum cifs_swn_notification_type {
+	CIFS_SWN_NOTIFICATION_RESOURCE_CHANGE = 0x01,
+	CIFS_SWN_NOTIFICATION_CLIENT_MOVE	 = 0x02,
+	CIFS_SWN_NOTIFICATION_SHARE_MOVE	 = 0x03,
+	CIFS_SWN_NOTIFICATION_IP_CHANGE	 = 0x04,
+};
+
+enum cifs_swn_resource_state {
+	CIFS_SWN_RESOURCE_STATE_UNKNOWN     = 0x00,
+	CIFS_SWN_RESOURCE_STATE_AVAILABLE   = 0x01,
+	CIFS_SWN_RESOURCE_STATE_UNAVAILABLE = 0xFF
+};
+
 #endif /* _UAPILINUX_CIFS_NETLINK_H */
-- 
cgit v1.2.3


From dc8eeef73b63ed8988224ba6b5ed19a615163a7f Mon Sep 17 00:00:00 2001
From: Andra Paraschiv <andraprs@amazon.com>
Date: Mon, 14 Dec 2020 18:11:18 +0200
Subject: vm_sockets: Add flags field in the vsock address data structure

vsock enables communication between virtual machines and the host they
are running on. With the multi transport support (guest->host and
host->guest), nested VMs can also use vsock channels for communication.

In addition to this, by default, all the vsock packets are forwarded to
the host, if no host->guest transport is loaded. This behavior can be
implicitly used for enabling vsock communication between sibling VMs.

Add a flags field in the vsock address data structure that can be used
to explicitly mark the vsock connection as being targeted for a certain
type of communication. This way, can distinguish between different use
cases such as nested VMs and sibling VMs.

This field can be set when initializing the vsock address variable used
for the connect() call.

Changelog

v3 -> v4

* Update the size of "svm_flags" field to be 1 byte instead of 2 bytes.

v2 -> v3

* Add "svm_flags" as a new field, not reusing "svm_reserved1".

v1 -> v2

* Update the field name to "svm_flags".
* Split the current patch in 2 patches.

Signed-off-by: Andra Paraschiv <andraprs@amazon.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/vm_sockets.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h
index fd0ed7221645..c2eac3d0a9f0 100644
--- a/include/uapi/linux/vm_sockets.h
+++ b/include/uapi/linux/vm_sockets.h
@@ -18,6 +18,7 @@
 #define _UAPI_VM_SOCKETS_H
 
 #include <linux/socket.h>
+#include <linux/types.h>
 
 /* Option name for STREAM socket buffer size.  Use as the option name in
  * setsockopt(3) or getsockopt(3) to set or get an unsigned long long that
@@ -148,10 +149,13 @@ struct sockaddr_vm {
 	unsigned short svm_reserved1;
 	unsigned int svm_port;
 	unsigned int svm_cid;
+	__u8 svm_flags;
 	unsigned char svm_zero[sizeof(struct sockaddr) -
 			       sizeof(sa_family_t) -
 			       sizeof(unsigned short) -
-			       sizeof(unsigned int) - sizeof(unsigned int)];
+			       sizeof(unsigned int) -
+			       sizeof(unsigned int) -
+			       sizeof(__u8)];
 };
 
 #define IOCTL_VM_SOCKETS_GET_LOCAL_CID		_IO(7, 0xb9)
-- 
cgit v1.2.3


From caaf95e0f23f9ed240b02251aab0f6fdb652b33d Mon Sep 17 00:00:00 2001
From: Andra Paraschiv <andraprs@amazon.com>
Date: Mon, 14 Dec 2020 18:11:19 +0200
Subject: vm_sockets: Add VMADDR_FLAG_TO_HOST vsock flag

Add VMADDR_FLAG_TO_HOST vsock flag that is used to setup a vsock
connection where all the packets are forwarded to the host.

Then, using this type of vsock channel, vsock communication between
sibling VMs can be built on top of it.

Changelog

v3 -> v4

* Update the "VMADDR_FLAG_TO_HOST" value, as the size of the field has
  been updated to 1 byte.

v2 -> v3

* Update comments to mention when the flag is set in the connect and
  listen paths.

v1 -> v2

* New patch in v2, it was split from the first patch in the series.
* Remove the default value for the vsock flags field.
* Update the naming for the vsock flag to "VMADDR_FLAG_TO_HOST".

Signed-off-by: Andra Paraschiv <andraprs@amazon.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/vm_sockets.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h
index c2eac3d0a9f0..46918a1852d7 100644
--- a/include/uapi/linux/vm_sockets.h
+++ b/include/uapi/linux/vm_sockets.h
@@ -115,6 +115,26 @@
 
 #define VMADDR_CID_HOST 2
 
+/* The current default use case for the vsock channel is the following:
+ * local vsock communication between guest and host and nested VMs setup.
+ * In addition to this, implicitly, the vsock packets are forwarded to the host
+ * if no host->guest vsock transport is set.
+ *
+ * Set this flag value in the sockaddr_vm corresponding field if the vsock
+ * packets need to be always forwarded to the host. Using this behavior,
+ * vsock communication between sibling VMs can be setup.
+ *
+ * This way can explicitly distinguish between vsock channels created for
+ * different use cases, such as nested VMs (or local communication between
+ * guest and host) and sibling VMs.
+ *
+ * The flag can be set in the connect logic in the user space application flow.
+ * In the listen logic (from kernel space) the flag is set on the remote peer
+ * address. This happens for an incoming connection when it is routed from the
+ * host and comes from the guest (local CID and remote CID > VMADDR_CID_HOST).
+ */
+#define VMADDR_FLAG_TO_HOST 0x01
+
 /* Invalid vSockets version. */
 
 #define VM_SOCKETS_INVALID_VERSION -1U
-- 
cgit v1.2.3


From a85cbe6159ffc973e5702f70a3bd5185f8f3c38d Mon Sep 17 00:00:00 2001
From: Petr Vorel <petr.vorel@gmail.com>
Date: Mon, 14 Dec 2020 19:03:21 -0800
Subject: uapi: move constants from <linux/kernel.h> to <linux/const.h>

and include <linux/const.h> in UAPI headers instead of <linux/kernel.h>.

The reason is to avoid indirect <linux/sysinfo.h> include when using
some network headers: <linux/netlink.h> or others -> <linux/kernel.h>
-> <linux/sysinfo.h>.

This indirect include causes on MUSL redefinition of struct sysinfo when
included both <sys/sysinfo.h> and some of UAPI headers:

    In file included from x86_64-buildroot-linux-musl/sysroot/usr/include/linux/kernel.h:5,
                     from x86_64-buildroot-linux-musl/sysroot/usr/include/linux/netlink.h:5,
                     from ../include/tst_netlink.h:14,
                     from tst_crypto.c:13:
    x86_64-buildroot-linux-musl/sysroot/usr/include/linux/sysinfo.h:8:8: error: redefinition of `struct sysinfo'
     struct sysinfo {
            ^~~~~~~
    In file included from ../include/tst_safe_macros.h:15,
                     from ../include/tst_test.h:93,
                     from tst_crypto.c:11:
    x86_64-buildroot-linux-musl/sysroot/usr/include/sys/sysinfo.h:10:8: note: originally defined here

Link: https://lkml.kernel.org/r/20201015190013.8901-1-petr.vorel@gmail.com
Signed-off-by: Petr Vorel <petr.vorel@gmail.com>
Suggested-by: Rich Felker <dalias@aerifal.cx>
Acked-by: Rich Felker <dalias@libc.org>
Cc: Peter Korsgaard <peter@korsgaard.com>
Cc: Baruch Siach <baruch@tkos.co.il>
Cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/const.h              | 5 +++++
 include/uapi/linux/ethtool.h            | 2 +-
 include/uapi/linux/kernel.h             | 9 +--------
 include/uapi/linux/lightnvm.h           | 2 +-
 include/uapi/linux/mroute6.h            | 2 +-
 include/uapi/linux/netfilter/x_tables.h | 2 +-
 include/uapi/linux/netlink.h            | 2 +-
 include/uapi/linux/sysctl.h             | 2 +-
 8 files changed, 12 insertions(+), 14 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/const.h b/include/uapi/linux/const.h
index 5ed721ad5b19..af2a44c08683 100644
--- a/include/uapi/linux/const.h
+++ b/include/uapi/linux/const.h
@@ -28,4 +28,9 @@
 #define _BITUL(x)	(_UL(1) << (x))
 #define _BITULL(x)	(_ULL(1) << (x))
 
+#define __ALIGN_KERNEL(x, a)		__ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
+#define __ALIGN_KERNEL_MASK(x, mask)	(((x) + (mask)) & ~(mask))
+
+#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+
 #endif /* _UAPI_LINUX_CONST_H */
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 9ca87bc73c44..cde753bb2093 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -14,7 +14,7 @@
 #ifndef _UAPI_LINUX_ETHTOOL_H
 #define _UAPI_LINUX_ETHTOOL_H
 
-#include <linux/kernel.h>
+#include <linux/const.h>
 #include <linux/types.h>
 #include <linux/if_ether.h>
 
diff --git a/include/uapi/linux/kernel.h b/include/uapi/linux/kernel.h
index 0ff8f7477847..fadf2db71fe8 100644
--- a/include/uapi/linux/kernel.h
+++ b/include/uapi/linux/kernel.h
@@ -3,13 +3,6 @@
 #define _UAPI_LINUX_KERNEL_H
 
 #include <linux/sysinfo.h>
-
-/*
- * 'kernel.h' contains some often-used function prototypes etc
- */
-#define __ALIGN_KERNEL(x, a)		__ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
-#define __ALIGN_KERNEL_MASK(x, mask)	(((x) + (mask)) & ~(mask))
-
-#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+#include <linux/const.h>
 
 #endif /* _UAPI_LINUX_KERNEL_H */
diff --git a/include/uapi/linux/lightnvm.h b/include/uapi/linux/lightnvm.h
index f9a1be7fc696..ead2e72e5c88 100644
--- a/include/uapi/linux/lightnvm.h
+++ b/include/uapi/linux/lightnvm.h
@@ -21,7 +21,7 @@
 #define _UAPI_LINUX_LIGHTNVM_H
 
 #ifdef __KERNEL__
-#include <linux/kernel.h>
+#include <linux/const.h>
 #include <linux/ioctl.h>
 #else /* __KERNEL__ */
 #include <stdio.h>
diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h
index c36177a86516..a1fd6173e2db 100644
--- a/include/uapi/linux/mroute6.h
+++ b/include/uapi/linux/mroute6.h
@@ -2,7 +2,7 @@
 #ifndef _UAPI__LINUX_MROUTE6_H
 #define _UAPI__LINUX_MROUTE6_H
 
-#include <linux/kernel.h>
+#include <linux/const.h>
 #include <linux/types.h>
 #include <linux/sockios.h>
 #include <linux/in6.h>		/* For struct sockaddr_in6. */
diff --git a/include/uapi/linux/netfilter/x_tables.h b/include/uapi/linux/netfilter/x_tables.h
index a8283f7dbc51..b8c6bb233ac1 100644
--- a/include/uapi/linux/netfilter/x_tables.h
+++ b/include/uapi/linux/netfilter/x_tables.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _UAPI_X_TABLES_H
 #define _UAPI_X_TABLES_H
-#include <linux/kernel.h>
+#include <linux/const.h>
 #include <linux/types.h>
 
 #define XT_FUNCTION_MAXNAMELEN 30
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index c3816ff7bfc3..3d94269bbfa8 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -2,7 +2,7 @@
 #ifndef _UAPI__LINUX_NETLINK_H
 #define _UAPI__LINUX_NETLINK_H
 
-#include <linux/kernel.h>
+#include <linux/const.h>
 #include <linux/socket.h> /* for __kernel_sa_family_t */
 #include <linux/types.h>
 
diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h
index 27c1ed2822e6..458179df9b27 100644
--- a/include/uapi/linux/sysctl.h
+++ b/include/uapi/linux/sysctl.h
@@ -23,7 +23,7 @@
 #ifndef _UAPI_LINUX_SYSCTL_H
 #define _UAPI_LINUX_SYSCTL_H
 
-#include <linux/kernel.h>
+#include <linux/const.h>
 #include <linux/types.h>
 #include <linux/compiler.h>
 
-- 
cgit v1.2.3


From 37cd0575b8510159992d279c530c05f872990b02 Mon Sep 17 00:00:00 2001
From: Lokesh Gidra <lokeshgidra@google.com>
Date: Mon, 14 Dec 2020 19:13:49 -0800
Subject: userfaultfd: add UFFD_USER_MODE_ONLY

Patch series "Control over userfaultfd kernel-fault handling", v6.

This patch series is split from [1].  The other series enables SELinux
support for userfaultfd file descriptors so that its creation and movement
can be controlled.

It has been demonstrated on various occasions that suspending kernel code
execution for an arbitrary amount of time at any access to userspace
memory (copy_from_user()/copy_to_user()/...) can be exploited to change
the intended behavior of the kernel.  For instance, handling page faults
in kernel-mode using userfaultfd has been exploited in [2, 3].  Likewise,
FUSE, which is similar to userfaultfd in this respect, has been exploited
in [4, 5] for similar outcome.

This small patch series adds a new flag to userfaultfd(2) that allows
callers to give up the ability to handle kernel-mode faults with the
resulting UFFD file object.  It then adds a 'user-mode only' option to the
unprivileged_userfaultfd sysctl knob to require unprivileged callers to
use this new flag.

The purpose of this new interface is to decrease the chance of an
unprivileged userfaultfd user taking advantage of userfaultfd to enhance
security vulnerabilities by lengthening the race window in kernel code.

[1] https://lore.kernel.org/lkml/20200211225547.235083-1-dancol@google.com/
[2] https://duasynt.com/blog/linux-kernel-heap-spray
[3] https://duasynt.com/blog/cve-2016-6187-heap-off-by-one-exploit
[4] https://googleprojectzero.blogspot.com/2016/06/exploiting-recursion-in-linux-kernel_20.html
[5] https://bugs.chromium.org/p/project-zero/issues/detail?id=808

This patch (of 2):

userfaultfd handles page faults from both user and kernel code.  Add a new
UFFD_USER_MODE_ONLY flag for userfaultfd(2) that makes the resulting
userfaultfd object refuse to handle faults from kernel mode, treating
these faults as if SIGBUS were always raised, causing the kernel code to
fail with EFAULT.

A future patch adds a knob allowing administrators to give some processes
the ability to create userfaultfd file objects only if they pass
UFFD_USER_MODE_ONLY, reducing the likelihood that these processes will
exploit userfaultfd's ability to delay kernel page faults to open timing
windows for future exploits.

Link: https://lkml.kernel.org/r/20201120030411.2690816-1-lokeshgidra@google.com
Link: https://lkml.kernel.org/r/20201120030411.2690816-2-lokeshgidra@google.com
Signed-off-by: Daniel Colascione <dancol@google.com>
Signed-off-by: Lokesh Gidra <lokeshgidra@google.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: <calin@google.com>
Cc: Daniel Colascione <dancol@dancol.org>
Cc: Eric Biggers <ebiggers@kernel.org>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: Jeff Vander Stoep <jeffv@google.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Nitin Gupta <nigupta@nvidia.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Shaohua Li <shli@fb.com>
Cc: Stephen Smalley <stephen.smalley.work@gmail.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/userfaultfd.c                 | 10 +++++++++-
 include/uapi/linux/userfaultfd.h |  9 +++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 000b457ad087..605599fde015 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -405,6 +405,13 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
 
 	if (ctx->features & UFFD_FEATURE_SIGBUS)
 		goto out;
+	if ((vmf->flags & FAULT_FLAG_USER) == 0 &&
+	    ctx->flags & UFFD_USER_MODE_ONLY) {
+		printk_once(KERN_WARNING "uffd: Set unprivileged_userfaultfd "
+			"sysctl knob to 1 if kernel faults must be handled "
+			"without obtaining CAP_SYS_PTRACE capability\n");
+		goto out;
+	}
 
 	/*
 	 * If it's already released don't get it. This avoids to loop
@@ -1965,10 +1972,11 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
 	BUG_ON(!current->mm);
 
 	/* Check the UFFD_* constants for consistency.  */
+	BUILD_BUG_ON(UFFD_USER_MODE_ONLY & UFFD_SHARED_FCNTL_FLAGS);
 	BUILD_BUG_ON(UFFD_CLOEXEC != O_CLOEXEC);
 	BUILD_BUG_ON(UFFD_NONBLOCK != O_NONBLOCK);
 
-	if (flags & ~UFFD_SHARED_FCNTL_FLAGS)
+	if (flags & ~(UFFD_SHARED_FCNTL_FLAGS | UFFD_USER_MODE_ONLY))
 		return -EINVAL;
 
 	ctx = kmem_cache_alloc(userfaultfd_ctx_cachep, GFP_KERNEL);
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index e7e98bde221f..5f2d88212f7c 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -257,4 +257,13 @@ struct uffdio_writeprotect {
 	__u64 mode;
 };
 
+/*
+ * Flags for the userfaultfd(2) system call itself.
+ */
+
+/*
+ * Create a userfaultfd that can handle page faults only in user mode.
+ */
+#define UFFD_USER_MODE_ONLY 1
+
 #endif /* _LINUX_USERFAULTFD_H */
-- 
cgit v1.2.3


From 75f4d4544db9fa34e1f04174f27d9f8a387be37d Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Tue, 15 Dec 2020 11:25:31 +0100
Subject: devlink: use _BITUL() macro instead of BIT() in the UAPI header

The BIT() macro is not available for the UAPI headers. Moreover, it can
be defined differently in user space headers. Thus, replace its usage
with the _BITUL() macro which is already used in other macro definitions
in <linux/devlink.h>.

Fixes: dc64cc7c6310 ("devlink: Add devlink reload limit option")
Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Link: https://lore.kernel.org/r/20201215102531.16958-1-tklauser@distanz.ch
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/devlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 5203f54a2be1..cf89c318f2ac 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -322,7 +322,7 @@ enum devlink_reload_limit {
 	DEVLINK_RELOAD_LIMIT_MAX = __DEVLINK_RELOAD_LIMIT_MAX - 1
 };
 
-#define DEVLINK_RELOAD_LIMITS_VALID_MASK (BIT(__DEVLINK_RELOAD_LIMIT_MAX) - 1)
+#define DEVLINK_RELOAD_LIMITS_VALID_MASK (_BITUL(__DEVLINK_RELOAD_LIMIT_MAX) - 1)
 
 enum devlink_attr {
 	/* don't change the order or add anything between, this is ABI! */
-- 
cgit v1.2.3


From 1e38f0031c3055c9c7e5ffcb3bb09c95f69614ee Mon Sep 17 00:00:00 2001
From: "Enrico Weigelt, metux IT consult" <info@metux.net>
Date: Wed, 2 Dec 2020 12:19:30 +0100
Subject: uapi: virtio_ids.h: consistent indentions

Fixing the differing indentions to be consistent and properly aligned.

Signed-off-by: Enrico Weigelt, metux IT consult <info@metux.net>
Link: https://lore.kernel.org/r/20201202111931.31953-1-info@metux.net
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/virtio_ids.h | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index b052355ac7a3..3cb55e5277a1 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -29,24 +29,24 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE. */
 
-#define VIRTIO_ID_NET		1 /* virtio net */
-#define VIRTIO_ID_BLOCK		2 /* virtio block */
-#define VIRTIO_ID_CONSOLE	3 /* virtio console */
-#define VIRTIO_ID_RNG		4 /* virtio rng */
-#define VIRTIO_ID_BALLOON	5 /* virtio balloon */
-#define VIRTIO_ID_RPMSG		7 /* virtio remote processor messaging */
-#define VIRTIO_ID_SCSI		8 /* virtio scsi */
-#define VIRTIO_ID_9P		9 /* 9p virtio console */
-#define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */
-#define VIRTIO_ID_CAIF	       12 /* Virtio caif */
-#define VIRTIO_ID_GPU          16 /* virtio GPU */
-#define VIRTIO_ID_INPUT        18 /* virtio input */
-#define VIRTIO_ID_VSOCK        19 /* virtio vsock transport */
-#define VIRTIO_ID_CRYPTO       20 /* virtio crypto */
-#define VIRTIO_ID_IOMMU        23 /* virtio IOMMU */
-#define VIRTIO_ID_MEM          24 /* virtio mem */
-#define VIRTIO_ID_FS           26 /* virtio filesystem */
-#define VIRTIO_ID_PMEM         27 /* virtio pmem */
-#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */
+#define VIRTIO_ID_NET			1 /* virtio net */
+#define VIRTIO_ID_BLOCK			2 /* virtio block */
+#define VIRTIO_ID_CONSOLE		3 /* virtio console */
+#define VIRTIO_ID_RNG			4 /* virtio rng */
+#define VIRTIO_ID_BALLOON		5 /* virtio balloon */
+#define VIRTIO_ID_RPMSG			7 /* virtio remote processor messaging */
+#define VIRTIO_ID_SCSI			8 /* virtio scsi */
+#define VIRTIO_ID_9P			9 /* 9p virtio console */
+#define VIRTIO_ID_RPROC_SERIAL		11 /* virtio remoteproc serial link */
+#define VIRTIO_ID_CAIF			12 /* Virtio caif */
+#define VIRTIO_ID_GPU			16 /* virtio GPU */
+#define VIRTIO_ID_INPUT			18 /* virtio input */
+#define VIRTIO_ID_VSOCK			19 /* virtio vsock transport */
+#define VIRTIO_ID_CRYPTO		20 /* virtio crypto */
+#define VIRTIO_ID_IOMMU			23 /* virtio IOMMU */
+#define VIRTIO_ID_MEM			24 /* virtio mem */
+#define VIRTIO_ID_FS			26 /* virtio filesystem */
+#define VIRTIO_ID_PMEM			27 /* virtio pmem */
+#define VIRTIO_ID_MAC80211_HWSIM	29 /* virtio mac80211-hwsim */
 
 #endif /* _LINUX_VIRTIO_IDS_H */
-- 
cgit v1.2.3


From be618636de4186521ffba2cbe5105e9c3481b9cb Mon Sep 17 00:00:00 2001
From: "Enrico Weigelt, metux IT consult" <info@metux.net>
Date: Wed, 2 Dec 2020 12:19:31 +0100
Subject: uapi: virtio_ids: add missing device type IDs from OASIS spec

The OASIS virtio spec (1.1) defines several IDs that aren't reflected
in the header yet. Fixing this by adding the missing IDs, even though
they're not yet used by the kernel yet.

Signed-off-by: Enrico Weigelt, metux IT consult <info@metux.net>
Link: https://lore.kernel.org/r/20201202111931.31953-2-info@metux.net
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/virtio_ids.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index 3cb55e5277a1..bc1c0621f5ed 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -34,15 +34,21 @@
 #define VIRTIO_ID_CONSOLE		3 /* virtio console */
 #define VIRTIO_ID_RNG			4 /* virtio rng */
 #define VIRTIO_ID_BALLOON		5 /* virtio balloon */
+#define VIRTIO_ID_IOMEM			6 /* virtio ioMemory */
 #define VIRTIO_ID_RPMSG			7 /* virtio remote processor messaging */
 #define VIRTIO_ID_SCSI			8 /* virtio scsi */
 #define VIRTIO_ID_9P			9 /* 9p virtio console */
+#define VIRTIO_ID_MAC80211_WLAN		10 /* virtio WLAN MAC */
 #define VIRTIO_ID_RPROC_SERIAL		11 /* virtio remoteproc serial link */
 #define VIRTIO_ID_CAIF			12 /* Virtio caif */
+#define VIRTIO_ID_MEMORY_BALLOON	13 /* virtio memory balloon */
 #define VIRTIO_ID_GPU			16 /* virtio GPU */
+#define VIRTIO_ID_CLOCK			17 /* virtio clock/timer */
 #define VIRTIO_ID_INPUT			18 /* virtio input */
 #define VIRTIO_ID_VSOCK			19 /* virtio vsock transport */
 #define VIRTIO_ID_CRYPTO		20 /* virtio crypto */
+#define VIRTIO_ID_SIGNAL_DIST		21 /* virtio signal distribution device */
+#define VIRTIO_ID_PSTORE		22 /* virtio pstore device */
 #define VIRTIO_ID_IOMMU			23 /* virtio IOMMU */
 #define VIRTIO_ID_MEM			24 /* virtio mem */
 #define VIRTIO_ID_FS			26 /* virtio filesystem */
-- 
cgit v1.2.3


From b0a0c2615f6f199a656ed8549d7dce625d77aa77 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 18 Dec 2020 14:05:41 -0800
Subject: epoll: wire up syscall epoll_pwait2

Split off from prev patch in the series that implements the syscall.

Link: https://lkml.kernel.org/r/20201121144401.3727659-4-willemdebruijn.kernel@gmail.com
Signed-off-by: Willem de Bruijn <willemb@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/syscalls/syscall.tbl      | 1 +
 arch/arm/tools/syscall.tbl                  | 1 +
 arch/arm64/include/asm/unistd.h             | 2 +-
 arch/arm64/include/asm/unistd32.h           | 2 ++
 arch/ia64/kernel/syscalls/syscall.tbl       | 1 +
 arch/m68k/kernel/syscalls/syscall.tbl       | 1 +
 arch/microblaze/kernel/syscalls/syscall.tbl | 1 +
 arch/mips/kernel/syscalls/syscall_n32.tbl   | 1 +
 arch/mips/kernel/syscalls/syscall_n64.tbl   | 1 +
 arch/mips/kernel/syscalls/syscall_o32.tbl   | 1 +
 arch/parisc/kernel/syscalls/syscall.tbl     | 1 +
 arch/powerpc/kernel/syscalls/syscall.tbl    | 1 +
 arch/s390/kernel/syscalls/syscall.tbl       | 1 +
 arch/sh/kernel/syscalls/syscall.tbl         | 1 +
 arch/sparc/kernel/syscalls/syscall.tbl      | 1 +
 arch/x86/entry/syscalls/syscall_32.tbl      | 1 +
 arch/x86/entry/syscalls/syscall_64.tbl      | 1 +
 arch/xtensa/kernel/syscalls/syscall.tbl     | 1 +
 include/linux/compat.h                      | 6 ++++++
 include/linux/syscalls.h                    | 5 +++++
 include/uapi/asm-generic/unistd.h           | 4 +++-
 kernel/sys_ni.c                             | 2 ++
 22 files changed, 35 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index ee7b01bb7346..a6617067dbe6 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -480,3 +480,4 @@
 548	common	pidfd_getfd			sys_pidfd_getfd
 549	common	faccessat2			sys_faccessat2
 550	common	process_madvise			sys_process_madvise
+551	common	epoll_pwait2			sys_epoll_pwait2
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index d056a548358e..20e1170e2e0a 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -454,3 +454,4 @@
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
 440	common	process_madvise			sys_process_madvise
+441	common	epoll_pwait2			sys_epoll_pwait2
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index b3b2019f8d16..86a9d7b3eabe 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -38,7 +38,7 @@
 #define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE + 5)
 #define __ARM_NR_COMPAT_END		(__ARM_NR_COMPAT_BASE + 0x800)
 
-#define __NR_compat_syscalls		441
+#define __NR_compat_syscalls		442
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 107f08e03b9f..f4bca2b90218 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -889,6 +889,8 @@ __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
 __SYSCALL(__NR_faccessat2, sys_faccessat2)
 #define __NR_process_madvise 440
 __SYSCALL(__NR_process_madvise, sys_process_madvise)
+#define __NR_epoll_pwait2 441
+__SYSCALL(__NR_epoll_pwait2, sys_epoll_pwait2)
 
 /*
  * Please add new compat syscalls above this comment and update
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index b96ed8b8a508..bfc00f2bd437 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -361,3 +361,4 @@
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
 440	common	process_madvise			sys_process_madvise
+441	common	epoll_pwait2			sys_epoll_pwait2
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 625fb6d32842..7fe4e45c864c 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -440,3 +440,4 @@
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
 440	common	process_madvise			sys_process_madvise
+441	common	epoll_pwait2			sys_epoll_pwait2
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index aae729c95cf9..a522adf194ab 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -446,3 +446,4 @@
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
 440	common	process_madvise			sys_process_madvise
+441	common	epoll_pwait2			sys_epoll_pwait2
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 32817c954435..ad9c3dd0ab1f 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -379,3 +379,4 @@
 438	n32	pidfd_getfd			sys_pidfd_getfd
 439	n32	faccessat2			sys_faccessat2
 440	n32	process_madvise			sys_process_madvise
+441	n32	epoll_pwait2			sys_epoll_pwait2
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index 9e4ea3c31b1c..91649690b52f 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -355,3 +355,4 @@
 438	n64	pidfd_getfd			sys_pidfd_getfd
 439	n64	faccessat2			sys_faccessat2
 440	n64	process_madvise			sys_process_madvise
+441	n64	epoll_pwait2			sys_epoll_pwait2
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 29f5f28cf5ce..4bad0c40aed6 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -428,3 +428,4 @@
 438	o32	pidfd_getfd			sys_pidfd_getfd
 439	o32	faccessat2			sys_faccessat2
 440	o32	process_madvise			sys_process_madvise
+441	o32	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index f375ea528e59..6bcc31966b44 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -438,3 +438,4 @@
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
 440	common	process_madvise			sys_process_madvise
+441	common	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 1275daec7fec..f744eb5cba88 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -530,3 +530,4 @@
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
 440	common	process_madvise			sys_process_madvise
+441	common	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 28c168000483..14f6525886a8 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -443,3 +443,4 @@
 438  common	pidfd_getfd		sys_pidfd_getfd			sys_pidfd_getfd
 439  common	faccessat2		sys_faccessat2			sys_faccessat2
 440  common	process_madvise		sys_process_madvise		sys_process_madvise
+441  common	epoll_pwait2		sys_epoll_pwait2		sys_epoll_pwait2
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index 783738448ff5..9df40ac0ebc0 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -443,3 +443,4 @@
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
 440	common	process_madvise			sys_process_madvise
+441	common	epoll_pwait2			sys_epoll_pwait2
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index 78160260991b..c7da4c3271e6 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -486,3 +486,4 @@
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
 440	common	process_madvise			sys_process_madvise
+441	common	epoll_pwait2			sys_epoll_pwait2
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 0d0667a9fbd7..874aeacde2dd 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -445,3 +445,4 @@
 438	i386	pidfd_getfd		sys_pidfd_getfd
 439	i386	faccessat2		sys_faccessat2
 440	i386	process_madvise		sys_process_madvise
+441	i386	epoll_pwait2		sys_epoll_pwait2		compat_sys_epoll_pwait2
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 379819244b91..78672124d28b 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -362,6 +362,7 @@
 438	common	pidfd_getfd		sys_pidfd_getfd
 439	common	faccessat2		sys_faccessat2
 440	common	process_madvise		sys_process_madvise
+441	common	epoll_pwait2		sys_epoll_pwait2
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index b070f272995d..46116a28eeed 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -411,3 +411,4 @@
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
 440	common	process_madvise			sys_process_madvise
+441	common	epoll_pwait2			sys_epoll_pwait2
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 400c0941c8af..6e65be753603 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -537,6 +537,12 @@ asmlinkage long compat_sys_epoll_pwait(int epfd,
 			int maxevents, int timeout,
 			const compat_sigset_t __user *sigmask,
 			compat_size_t sigsetsize);
+asmlinkage long compat_sys_epoll_pwait2(int epfd,
+			struct epoll_event __user *events,
+			int maxevents,
+			const struct __kernel_timespec __user *timeout,
+			const compat_sigset_t __user *sigmask,
+			compat_size_t sigsetsize);
 
 /* fs/fcntl.c */
 asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd,
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index df0c3c74609e..f3929aff39cf 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -362,6 +362,11 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
 				int maxevents, int timeout,
 				const sigset_t __user *sigmask,
 				size_t sigsetsize);
+asmlinkage long sys_epoll_pwait2(int epfd, struct epoll_event __user *events,
+				 int maxevents,
+				 const struct __kernel_timespec __user *timeout,
+				 const sigset_t __user *sigmask,
+				 size_t sigsetsize);
 
 /* fs/fcntl.c */
 asmlinkage long sys_dup(unsigned int fildes);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index fc48c64700eb..728752917785 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -859,9 +859,11 @@ __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
 __SYSCALL(__NR_faccessat2, sys_faccessat2)
 #define __NR_process_madvise 440
 __SYSCALL(__NR_process_madvise, sys_process_madvise)
+#define __NR_epoll_pwait2 441
+__SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
 
 #undef __NR_syscalls
-#define __NR_syscalls 441
+#define __NR_syscalls 442
 
 /*
  * 32 bit systems traditionally used different
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index f27ac94d5fa7..19aa806890d5 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -68,6 +68,8 @@ COND_SYSCALL(epoll_create1);
 COND_SYSCALL(epoll_ctl);
 COND_SYSCALL(epoll_pwait);
 COND_SYSCALL_COMPAT(epoll_pwait);
+COND_SYSCALL(epoll_pwait2);
+COND_SYSCALL_COMPAT(epoll_pwait2);
 
 /* fs/fcntl.c */
 
-- 
cgit v1.2.3


From bcce55f556e824d43f352d76b94509185585e38d Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Sat, 19 Dec 2020 13:19:24 +0100
Subject: ppp: Fix PPPIOCUNBRIDGECHAN request number

PPPIOCGL2TPSTATS already uses 54. This shouldn't be a problem in
practice, but let's keep the logical decreasing assignment scheme.

Fixes: 4cf476ced45d ("ppp: add PPPIOCBRIDGECHAN and PPPIOCUNBRIDGECHAN ioctls")
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Link: https://lore.kernel.org/r/e3a4c355e3820331d8e1fffef8522739aae58b57.1608380117.git.gnault@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/ppp-ioctl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/ppp-ioctl.h b/include/uapi/linux/ppp-ioctl.h
index 8dbecb3ad036..1cc5ce0ae062 100644
--- a/include/uapi/linux/ppp-ioctl.h
+++ b/include/uapi/linux/ppp-ioctl.h
@@ -116,7 +116,7 @@ struct pppol2tp_ioc_stats {
 #define PPPIOCGCHAN	_IOR('t', 55, int)	/* get ppp channel number */
 #define PPPIOCGL2TPSTATS _IOR('t', 54, struct pppol2tp_ioc_stats)
 #define PPPIOCBRIDGECHAN _IOW('t', 53, int)	/* bridge one channel to another */
-#define PPPIOCUNBRIDGECHAN _IO('t', 54)	/* unbridge channel */
+#define PPPIOCUNBRIDGECHAN _IO('t', 52)	/* unbridge channel */
 
 #define SIOCGPPPSTATS   (SIOCDEVPRIVATE + 0)
 #define SIOCGPPPVER     (SIOCDEVPRIVATE + 1)	/* NEVER change this!! */
-- 
cgit v1.2.3


From 429f1571e8f0b14ec42b8fb14efcfc0576b2788f Mon Sep 17 00:00:00 2001
From: Alon Mizrahi <amizrahi@habana.ai>
Date: Tue, 1 Dec 2020 18:44:11 +0200
Subject: habanalabs: add comment for pll frequency ioctl opcode

Forgot to add the comment for the opcode when it was added.

Signed-off-by: Alon Mizrahi <amizrahi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 include/uapi/misc/habanalabs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 8c15a7d336a0..dc8bcec195cc 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -279,6 +279,7 @@ enum hl_device_status {
  * HL_INFO_CLK_THROTTLE_REASON - Retrieve clock throttling reason
  * HL_INFO_SYNC_MANAGER  - Retrieve sync manager info per dcore
  * HL_INFO_TOTAL_ENERGY  - Retrieve total energy consumption
+ * HL_INFO_PLL_FREQUENCY - Retrieve PLL frequency
  */
 #define HL_INFO_HW_IP_INFO		0
 #define HL_INFO_HW_EVENTS		1
-- 
cgit v1.2.3


From a3fd28306329e8e82efab973aafe81e9001dcf6f Mon Sep 17 00:00:00 2001
From: Alon Mizrahi <amizrahi@habana.ai>
Date: Tue, 8 Dec 2020 16:14:01 +0200
Subject: habanalabs: add validation cs counter, fix misplaced counters

Up until now validation errors were counted in the parsing field
of the cs_counters struct, so we added a new counter and increased
it when needed.

In addition, there were some locations where only one of the counters
was updated (ctx or aggregate) so add the second one to be updated
as well.

Signed-off-by: Alon Mizrahi <amizrahi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../misc/habanalabs/common/command_submission.c    | 75 ++++++++++++++++------
 drivers/misc/habanalabs/common/habanalabs.h        |  2 +
 drivers/misc/habanalabs/common/habanalabs_ioctl.c  |  5 ++
 include/uapi/misc/habanalabs.h                     |  4 ++
 4 files changed, 68 insertions(+), 18 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 92c1c516b65f..b2b3d2b0f808 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -472,8 +472,11 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 	cntr = &hdev->aggregated_cs_counters;
 
 	cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
-	if (!cs)
+	if (!cs) {
+		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+		atomic64_inc(&cntr->out_of_mem_drop_cnt);
 		return -ENOMEM;
+	}
 
 	cs->ctx = ctx;
 	cs->submitted = false;
@@ -486,6 +489,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 
 	cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
 	if (!cs_cmpl) {
+		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+		atomic64_inc(&cntr->out_of_mem_drop_cnt);
 		rc = -ENOMEM;
 		goto free_cs;
 	}
@@ -513,6 +518,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 	cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
 			sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
 	if (!cs->jobs_in_queue_cnt) {
+		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+		atomic64_inc(&cntr->out_of_mem_drop_cnt);
 		rc = -ENOMEM;
 		goto free_fence;
 	}
@@ -764,11 +771,14 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
 
 static int hl_cs_copy_chunk_array(struct hl_device *hdev,
 					struct hl_cs_chunk **cs_chunk_array,
-					void __user *chunks, u32 num_chunks)
+					void __user *chunks, u32 num_chunks,
+					struct hl_ctx *ctx)
 {
 	u32 size_to_copy;
 
 	if (num_chunks > HL_MAX_JOBS_PER_CS) {
+		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+		atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
 		dev_err(hdev->dev,
 			"Number of chunks can NOT be larger than %d\n",
 			HL_MAX_JOBS_PER_CS);
@@ -777,11 +787,16 @@ static int hl_cs_copy_chunk_array(struct hl_device *hdev,
 
 	*cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
 					GFP_ATOMIC);
-	if (!*cs_chunk_array)
+	if (!*cs_chunk_array) {
+		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+		atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
 		return -ENOMEM;
+	}
 
 	size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
 	if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) {
+		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+		atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
 		dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
 		kfree(*cs_chunk_array);
 		return -EFAULT;
@@ -797,6 +812,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 	struct hl_device *hdev = hpriv->hdev;
 	struct hl_cs_chunk *cs_chunk_array;
 	struct hl_cs_counters_atomic *cntr;
+	struct hl_ctx *ctx = hpriv->ctx;
 	struct hl_cs_job *job;
 	struct hl_cs *cs;
 	struct hl_cb *cb;
@@ -805,7 +821,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 	cntr = &hdev->aggregated_cs_counters;
 	*cs_seq = ULLONG_MAX;
 
-	rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks);
+	rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
+			hpriv->ctx);
 	if (rc)
 		goto out;
 
@@ -832,8 +849,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 		rc = validate_queue_index(hdev, chunk, &queue_type,
 						&is_kernel_allocated_cb);
 		if (rc) {
-			atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
-			atomic64_inc(&cntr->parsing_drop_cnt);
+			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+			atomic64_inc(&cntr->validation_drop_cnt);
 			goto free_cs_object;
 		}
 
@@ -841,8 +858,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 			cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
 			if (!cb) {
 				atomic64_inc(
-				&hpriv->ctx->cs_counters.parsing_drop_cnt);
-				atomic64_inc(&cntr->parsing_drop_cnt);
+					&ctx->cs_counters.validation_drop_cnt);
+				atomic64_inc(&cntr->validation_drop_cnt);
 				rc = -EINVAL;
 				goto free_cs_object;
 			}
@@ -856,8 +873,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 		job = hl_cs_allocate_job(hdev, queue_type,
 						is_kernel_allocated_cb);
 		if (!job) {
-			atomic64_inc(
-			&hpriv->ctx->cs_counters.out_of_mem_drop_cnt);
+			atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 			atomic64_inc(&cntr->out_of_mem_drop_cnt);
 			dev_err(hdev->dev, "Failed to allocate a new job\n");
 			rc = -ENOMEM;
@@ -891,7 +907,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 
 		rc = cs_parser(hpriv, job);
 		if (rc) {
-			atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
+			atomic64_inc(&ctx->cs_counters.parsing_drop_cnt);
 			atomic64_inc(&cntr->parsing_drop_cnt);
 			dev_err(hdev->dev,
 				"Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
@@ -901,8 +917,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 	}
 
 	if (int_queues_only) {
-		atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
-		atomic64_inc(&cntr->parsing_drop_cnt);
+		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+		atomic64_inc(&cntr->validation_drop_cnt);
 		dev_err(hdev->dev,
 			"Reject CS %d.%llu because only internal queues jobs are present\n",
 			cs->ctx->asid, cs->sequence);
@@ -1042,7 +1058,7 @@ out:
 }
 
 static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
-		struct hl_cs_chunk *chunk, u64 *signal_seq)
+		struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx)
 {
 	u64 *signal_seq_arr = NULL;
 	u32 size_to_copy, signal_seq_arr_len;
@@ -1052,6 +1068,8 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
 
 	/* currently only one signal seq is supported */
 	if (signal_seq_arr_len != 1) {
+		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+		atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
 		dev_err(hdev->dev,
 			"Wait for signal CS supports only one signal CS seq\n");
 		return -EINVAL;
@@ -1060,13 +1078,18 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
 	signal_seq_arr = kmalloc_array(signal_seq_arr_len,
 					sizeof(*signal_seq_arr),
 					GFP_ATOMIC);
-	if (!signal_seq_arr)
+	if (!signal_seq_arr) {
+		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+		atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
 		return -ENOMEM;
+	}
 
 	size_to_copy = chunk->num_signal_seq_arr * sizeof(*signal_seq_arr);
 	if (copy_from_user(signal_seq_arr,
 				u64_to_user_ptr(chunk->signal_seq_arr),
 				size_to_copy)) {
+		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+		atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
 		dev_err(hdev->dev,
 			"Failed to copy signal seq array from user\n");
 		rc = -EFAULT;
@@ -1153,6 +1176,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 	struct hl_device *hdev = hpriv->hdev;
 	struct hl_cs_compl *sig_waitcs_cmpl;
 	u32 q_idx, collective_engine_id = 0;
+	struct hl_cs_counters_atomic *cntr;
 	struct hl_fence *sig_fence = NULL;
 	struct hl_ctx *ctx = hpriv->ctx;
 	enum hl_queue_type q_type;
@@ -1160,9 +1184,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 	u64 signal_seq;
 	int rc;
 
+	cntr = &hdev->aggregated_cs_counters;
 	*cs_seq = ULLONG_MAX;
 
-	rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks);
+	rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
+			ctx);
 	if (rc)
 		goto out;
 
@@ -1170,6 +1196,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 	chunk = &cs_chunk_array[0];
 
 	if (chunk->queue_index >= hdev->asic_prop.max_queues) {
+		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+		atomic64_inc(&cntr->validation_drop_cnt);
 		dev_err(hdev->dev, "Queue index %d is invalid\n",
 			chunk->queue_index);
 		rc = -EINVAL;
@@ -1181,6 +1209,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 	q_type = hw_queue_prop->type;
 
 	if (!hw_queue_prop->supports_sync_stream) {
+		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+		atomic64_inc(&cntr->validation_drop_cnt);
 		dev_err(hdev->dev,
 			"Queue index %d does not support sync stream operations\n",
 			q_idx);
@@ -1190,6 +1220,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 
 	if (cs_type == CS_TYPE_COLLECTIVE_WAIT) {
 		if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
+			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+			atomic64_inc(&cntr->validation_drop_cnt);
 			dev_err(hdev->dev,
 				"Queue index %d is invalid\n", q_idx);
 			rc = -EINVAL;
@@ -1200,12 +1232,14 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 	}
 
 	if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) {
-		rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq);
+		rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq, ctx);
 		if (rc)
 			goto free_cs_chunk_array;
 
 		sig_fence = hl_ctx_get_fence(ctx, signal_seq);
 		if (IS_ERR(sig_fence)) {
+			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+			atomic64_inc(&cntr->validation_drop_cnt);
 			dev_err(hdev->dev,
 				"Failed to get signal CS with seq 0x%llx\n",
 				signal_seq);
@@ -1223,6 +1257,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 			container_of(sig_fence, struct hl_cs_compl, base_fence);
 
 		if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) {
+			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+			atomic64_inc(&cntr->validation_drop_cnt);
 			dev_err(hdev->dev,
 				"CS seq 0x%llx is not of a signal CS\n",
 				signal_seq);
@@ -1270,8 +1306,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 	else if (cs_type == CS_TYPE_COLLECTIVE_WAIT)
 		rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
 				cs, q_idx, collective_engine_id);
-	else
+	else {
+		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+		atomic64_inc(&cntr->validation_drop_cnt);
 		rc = -EINVAL;
+	}
 
 	if (rc)
 		goto free_cs_object;
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 70b778a0d60e..e0d7f5fbaa5c 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1000,6 +1000,7 @@ struct hl_va_range {
  * @queue_full_drop_cnt: dropped due to queue full
  * @device_in_reset_drop_cnt: dropped due to device in reset
  * @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight
+ * @validation_drop_cnt: dropped due to error in validation
  */
 struct hl_cs_counters_atomic {
 	atomic64_t out_of_mem_drop_cnt;
@@ -1007,6 +1008,7 @@ struct hl_cs_counters_atomic {
 	atomic64_t queue_full_drop_cnt;
 	atomic64_t device_in_reset_drop_cnt;
 	atomic64_t max_cs_in_flight_drop_cnt;
+	atomic64_t validation_drop_cnt;
 };
 
 /**
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index a0c0d20f6f8f..12efbd9d2e3a 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -335,6 +335,8 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 			atomic64_read(&cntr->device_in_reset_drop_cnt);
 	cs_counters.total_max_cs_in_flight_drop_cnt =
 			atomic64_read(&cntr->max_cs_in_flight_drop_cnt);
+	cs_counters.total_validation_drop_cnt =
+			atomic64_read(&cntr->validation_drop_cnt);
 
 	if (hpriv->ctx) {
 		cs_counters.ctx_out_of_mem_drop_cnt =
@@ -352,6 +354,9 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 		cs_counters.ctx_max_cs_in_flight_drop_cnt =
 				atomic64_read(
 			&hpriv->ctx->cs_counters.max_cs_in_flight_drop_cnt);
+		cs_counters.ctx_validation_drop_cnt =
+				atomic64_read(
+				&hpriv->ctx->cs_counters.validation_drop_cnt);
 	}
 
 	return copy_to_user(out, &cs_counters,
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index dc8bcec195cc..dba3827c43ca 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -426,6 +426,8 @@ struct hl_info_sync_manager {
  * @ctx_device_in_reset_drop_cnt: context dropped due to device in reset
  * @total_max_cs_in_flight_drop_cnt: total dropped due to maximum CS in-flight
  * @ctx_max_cs_in_flight_drop_cnt: context dropped due to maximum CS in-flight
+ * @total_validation_drop_cnt: total dropped due to validation error
+ * @ctx_validation_drop_cnt: context dropped due to validation error
  */
 struct hl_info_cs_counters {
 	__u64 total_out_of_mem_drop_cnt;
@@ -438,6 +440,8 @@ struct hl_info_cs_counters {
 	__u64 ctx_device_in_reset_drop_cnt;
 	__u64 total_max_cs_in_flight_drop_cnt;
 	__u64 ctx_max_cs_in_flight_drop_cnt;
+	__u64 total_validation_drop_cnt;
+	__u64 ctx_validation_drop_cnt;
 };
 
 enum gaudi_dcores {
-- 
cgit v1.2.3


From b4e70d8dd9ea6bd5d5fb3122586f652326ca09cd Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 27 Dec 2020 12:35:43 +0100
Subject: netfilter: nftables: add set expression flags

The set flag NFT_SET_EXPR provides a hint to the kernel that userspace
supports for multiple expressions per set element. In the same
direction, NFT_DYNSET_F_EXPR specifies that dynset expression defines
multiple expressions per set element.

This allows new userspace software with old kernels to bail out with
EOPNOTSUPP. This update is similar to ef516e8625dd ("netfilter:
nf_tables: reintroduce the NFT_SET_CONCAT flag"). The NFT_SET_EXPR flag
needs to be set on when the NFTA_SET_EXPRESSIONS attribute is specified.
The NFT_SET_EXPR flag is not set on with NFTA_SET_EXPR to retain
backward compatibility in old userspace binaries.

Fixes: 48b0ae046ee9 ("netfilter: nftables: netlink support for several set element expressions")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 3 +++
 net/netfilter/nf_tables_api.c            | 6 +++++-
 net/netfilter/nft_dynset.c               | 9 +++++++--
 3 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 28b6ee53305f..b1633e7ba529 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -293,6 +293,7 @@ enum nft_rule_compat_attributes {
  * @NFT_SET_EVAL: set can be updated from the evaluation path
  * @NFT_SET_OBJECT: set contains stateful objects
  * @NFT_SET_CONCAT: set contains a concatenation
+ * @NFT_SET_EXPR: set contains expressions
  */
 enum nft_set_flags {
 	NFT_SET_ANONYMOUS		= 0x1,
@@ -303,6 +304,7 @@ enum nft_set_flags {
 	NFT_SET_EVAL			= 0x20,
 	NFT_SET_OBJECT			= 0x40,
 	NFT_SET_CONCAT			= 0x80,
+	NFT_SET_EXPR			= 0x100,
 };
 
 /**
@@ -706,6 +708,7 @@ enum nft_dynset_ops {
 
 enum nft_dynset_flags {
 	NFT_DYNSET_F_INV	= (1 << 0),
+	NFT_DYNSET_F_EXPR	= (1 << 1),
 };
 
 /**
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4186b1e52d58..15c467f1a9dd 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4162,7 +4162,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 		if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT |
 			      NFT_SET_INTERVAL | NFT_SET_TIMEOUT |
 			      NFT_SET_MAP | NFT_SET_EVAL |
-			      NFT_SET_OBJECT | NFT_SET_CONCAT))
+			      NFT_SET_OBJECT | NFT_SET_CONCAT | NFT_SET_EXPR))
 			return -EOPNOTSUPP;
 		/* Only one of these operations is supported */
 		if ((flags & (NFT_SET_MAP | NFT_SET_OBJECT)) ==
@@ -4304,6 +4304,10 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 		struct nlattr *tmp;
 		int left;
 
+		if (!(flags & NFT_SET_EXPR)) {
+			err = -EINVAL;
+			goto err_set_alloc_name;
+		}
 		i = 0;
 		nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) {
 			if (i == NFT_SET_EXPR_MAX) {
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index f35df221a633..0b053f75cd60 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -19,6 +19,7 @@ struct nft_dynset {
 	enum nft_registers		sreg_key:8;
 	enum nft_registers		sreg_data:8;
 	bool				invert;
+	bool				expr;
 	u8				num_exprs;
 	u64				timeout;
 	struct nft_expr			*expr_array[NFT_SET_EXPR_MAX];
@@ -175,11 +176,12 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
 
 	if (tb[NFTA_DYNSET_FLAGS]) {
 		u32 flags = ntohl(nla_get_be32(tb[NFTA_DYNSET_FLAGS]));
-
-		if (flags & ~NFT_DYNSET_F_INV)
+		if (flags & ~(NFT_DYNSET_F_INV | NFT_DYNSET_F_EXPR))
 			return -EOPNOTSUPP;
 		if (flags & NFT_DYNSET_F_INV)
 			priv->invert = true;
+		if (flags & NFT_DYNSET_F_EXPR)
+			priv->expr = true;
 	}
 
 	set = nft_set_lookup_global(ctx->net, ctx->table,
@@ -261,6 +263,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
 		struct nlattr *tmp;
 		int left;
 
+		if (!priv->expr)
+			return -EINVAL;
+
 		i = 0;
 		nla_for_each_nested(tmp, tb[NFTA_DYNSET_EXPRESSIONS], left) {
 			if (i == NFT_SET_EXPR_MAX) {
-- 
cgit v1.2.3


From cf0720697143f3eaa0779cca5a6602d8557d1c6f Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 30 Dec 2020 19:37:53 -0800
Subject: net: suggest L2 discards be counted towards rx_dropped

From the existing definitions it's unclear which stat to
use to report filtering based on L2 dst addr in old
broadcast-medium Ethernet.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 874cc12a34d9..82708c6db432 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -75,8 +75,9 @@ struct rtnl_link_stats {
  *
  * @rx_dropped: Number of packets received but not processed,
  *   e.g. due to lack of resources or unsupported protocol.
- *   For hardware interfaces this counter should not include packets
- *   dropped by the device which are counted separately in
+ *   For hardware interfaces this counter may include packets discarded
+ *   due to L2 address filtering but should not include packets dropped
+ *   by the device due to buffer exhaustion which are counted separately in
  *   @rx_missed_errors (since procfs folds those two counters together).
  *
  * @tx_dropped: Number of packets dropped on their way to transmission,
-- 
cgit v1.2.3


From 647daca25d24fb6eadc7b6cd680ad3e6eed0f3d5 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Mon, 4 Jan 2021 14:20:01 -0600
Subject: KVM: SVM: Add support for booting APs in an SEV-ES guest

Typically under KVM, an AP is booted using the INIT-SIPI-SIPI sequence,
where the guest vCPU register state is updated and then the vCPU is VMRUN
to begin execution of the AP. For an SEV-ES guest, this won't work because
the guest register state is encrypted.

Following the GHCB specification, the hypervisor must not alter the guest
register state, so KVM must track an AP/vCPU boot. Should the guest want
to park the AP, it must use the AP Reset Hold exit event in place of, for
example, a HLT loop.

First AP boot (first INIT-SIPI-SIPI sequence):
  Execute the AP (vCPU) as it was initialized and measured by the SEV-ES
  support. It is up to the guest to transfer control of the AP to the
  proper location.

Subsequent AP boot:
  KVM will expect to receive an AP Reset Hold exit event indicating that
  the vCPU is being parked and will require an INIT-SIPI-SIPI sequence to
  awaken it. When the AP Reset Hold exit event is received, KVM will place
  the vCPU into a simulated HLT mode. Upon receiving the INIT-SIPI-SIPI
  sequence, KVM will make the vCPU runnable. It is again up to the guest
  to then transfer control of the AP to the proper location.

  To differentiate between an actual HLT and an AP Reset Hold, a new MP
  state is introduced, KVM_MP_STATE_AP_RESET_HOLD, which the vCPU is
  placed in upon receiving the AP Reset Hold exit event. Additionally, to
  communicate the AP Reset Hold exit event up to userspace (if needed), a
  new exit reason is introduced, KVM_EXIT_AP_RESET_HOLD.

A new x86 ops function is introduced, vcpu_deliver_sipi_vector, in order
to accomplish AP booting. For VMX, vcpu_deliver_sipi_vector is set to the
original SIPI delivery function, kvm_vcpu_deliver_sipi_vector(). SVM adds
a new function that, for non SEV-ES guests, invokes the original SIPI
delivery function, kvm_vcpu_deliver_sipi_vector(), but for SEV-ES guests,
implements the logic above.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Message-Id: <e8fbebe8eb161ceaabdad7c01a5859a78b424d5e.1609791600.git.thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  3 +++
 arch/x86/kvm/lapic.c            |  2 +-
 arch/x86/kvm/svm/sev.c          | 22 ++++++++++++++++++++++
 arch/x86/kvm/svm/svm.c          | 10 ++++++++++
 arch/x86/kvm/svm/svm.h          |  2 ++
 arch/x86/kvm/vmx/vmx.c          |  2 ++
 arch/x86/kvm/x86.c              | 26 +++++++++++++++++++++-----
 include/uapi/linux/kvm.h        |  2 ++
 8 files changed, 63 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index afed3da3b3a0..3d6616f6f6ef 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1299,6 +1299,8 @@ struct kvm_x86_ops {
 	void (*migrate_timers)(struct kvm_vcpu *vcpu);
 	void (*msr_filter_changed)(struct kvm_vcpu *vcpu);
 	int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);
+
+	void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
 };
 
 struct kvm_x86_nested_ops {
@@ -1480,6 +1482,7 @@ int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in);
 int kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
 int kvm_emulate_halt(struct kvm_vcpu *vcpu);
 int kvm_vcpu_halt(struct kvm_vcpu *vcpu);
+int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu);
 int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
 
 void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 78823227c592..43cceadd073e 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2898,7 +2898,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
 			/* evaluate pending_events before reading the vector */
 			smp_rmb();
 			sipi_vector = apic->sipi_vector;
-			kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
+			kvm_x86_ops.vcpu_deliver_sipi_vector(vcpu, sipi_vector);
 			vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 		}
 	}
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 563ced07b0b8..c8ffdbc81709 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1563,6 +1563,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
 			goto vmgexit_err;
 		break;
 	case SVM_VMGEXIT_NMI_COMPLETE:
+	case SVM_VMGEXIT_AP_HLT_LOOP:
 	case SVM_VMGEXIT_AP_JUMP_TABLE:
 	case SVM_VMGEXIT_UNSUPPORTED_EVENT:
 		break;
@@ -1888,6 +1889,9 @@ int sev_handle_vmgexit(struct vcpu_svm *svm)
 	case SVM_VMGEXIT_NMI_COMPLETE:
 		ret = svm_invoke_exit_handler(svm, SVM_EXIT_IRET);
 		break;
+	case SVM_VMGEXIT_AP_HLT_LOOP:
+		ret = kvm_emulate_ap_reset_hold(&svm->vcpu);
+		break;
 	case SVM_VMGEXIT_AP_JUMP_TABLE: {
 		struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
 
@@ -2040,3 +2044,21 @@ void sev_es_vcpu_put(struct vcpu_svm *svm)
 		wrmsrl(host_save_user_msrs[i].index, svm->host_user_msrs[i]);
 	}
 }
+
+void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	/* First SIPI: Use the values as initially set by the VMM */
+	if (!svm->received_first_sipi) {
+		svm->received_first_sipi = true;
+		return;
+	}
+
+	/*
+	 * Subsequent SIPI: Return from an AP Reset Hold VMGEXIT, where
+	 * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a
+	 * non-zero value.
+	 */
+	ghcb_set_sw_exit_info_2(svm->ghcb, 1);
+}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 6824d611dc5d..7ef171790d02 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4382,6 +4382,14 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
 		   (vmcb_is_intercept(&svm->vmcb->control, INTERCEPT_INIT));
 }
 
+static void svm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
+{
+	if (!sev_es_guest(vcpu->kvm))
+		return kvm_vcpu_deliver_sipi_vector(vcpu, vector);
+
+	sev_vcpu_deliver_sipi_vector(vcpu, vector);
+}
+
 static void svm_vm_destroy(struct kvm *kvm)
 {
 	avic_vm_destroy(kvm);
@@ -4524,6 +4532,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
 
 	.msr_filter_changed = svm_msr_filter_changed,
 	.complete_emulated_msr = svm_complete_emulated_msr,
+
+	.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
 };
 
 static struct kvm_x86_init_ops svm_init_ops __initdata = {
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 5431e6335e2e..0fe874ae5498 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -185,6 +185,7 @@ struct vcpu_svm {
 	struct vmcb_save_area *vmsa;
 	struct ghcb *ghcb;
 	struct kvm_host_map ghcb_map;
+	bool received_first_sipi;
 
 	/* SEV-ES scratch area support */
 	void *ghcb_sa;
@@ -591,6 +592,7 @@ void sev_es_init_vmcb(struct vcpu_svm *svm);
 void sev_es_create_vcpu(struct vcpu_svm *svm);
 void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu);
 void sev_es_vcpu_put(struct vcpu_svm *svm);
+void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
 
 /* vmenter.S */
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 75c9c6a0a3a4..2af05d3b0590 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7707,6 +7707,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 	.msr_filter_changed = vmx_msr_filter_changed,
 	.complete_emulated_msr = kvm_complete_insn_gp,
 	.cpu_dirty_log_size = vmx_cpu_dirty_log_size,
+
+	.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
 };
 
 static __init int hardware_setup(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f6e7b25c40e2..0287840b93e0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7976,17 +7976,22 @@ void kvm_arch_exit(void)
 	kmem_cache_destroy(x86_fpu_cache);
 }
 
-int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
+int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason)
 {
 	++vcpu->stat.halt_exits;
 	if (lapic_in_kernel(vcpu)) {
-		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
+		vcpu->arch.mp_state = state;
 		return 1;
 	} else {
-		vcpu->run->exit_reason = KVM_EXIT_HLT;
+		vcpu->run->exit_reason = reason;
 		return 0;
 	}
 }
+
+int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
+{
+	return __kvm_vcpu_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT);
+}
 EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
 
 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
@@ -8000,6 +8005,14 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
 
+int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu)
+{
+	int ret = kvm_skip_emulated_instruction(vcpu);
+
+	return __kvm_vcpu_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD, KVM_EXIT_AP_RESET_HOLD) && ret;
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold);
+
 #ifdef CONFIG_X86_64
 static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
 			        unsigned long clock_type)
@@ -9096,6 +9109,7 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
 	kvm_apic_accept_events(vcpu);
 	switch(vcpu->arch.mp_state) {
 	case KVM_MP_STATE_HALTED:
+	case KVM_MP_STATE_AP_RESET_HOLD:
 		vcpu->arch.pv.pv_unhalted = false;
 		vcpu->arch.mp_state =
 			KVM_MP_STATE_RUNNABLE;
@@ -9522,8 +9536,9 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
 		kvm_load_guest_fpu(vcpu);
 
 	kvm_apic_accept_events(vcpu);
-	if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
-					vcpu->arch.pv.pv_unhalted)
+	if ((vcpu->arch.mp_state == KVM_MP_STATE_HALTED ||
+	     vcpu->arch.mp_state == KVM_MP_STATE_AP_RESET_HOLD) &&
+	    vcpu->arch.pv.pv_unhalted)
 		mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
 	else
 		mp_state->mp_state = vcpu->arch.mp_state;
@@ -10154,6 +10169,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
 	kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
 	kvm_rip_write(vcpu, 0);
 }
+EXPORT_SYMBOL_GPL(kvm_vcpu_deliver_sipi_vector);
 
 int kvm_arch_hardware_enable(void)
 {
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 886802b8ffba..374c67875cdb 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -251,6 +251,7 @@ struct kvm_hyperv_exit {
 #define KVM_EXIT_X86_RDMSR        29
 #define KVM_EXIT_X86_WRMSR        30
 #define KVM_EXIT_DIRTY_RING_FULL  31
+#define KVM_EXIT_AP_RESET_HOLD    32
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -573,6 +574,7 @@ struct kvm_vapic_addr {
 #define KVM_MP_STATE_CHECK_STOP        6
 #define KVM_MP_STATE_OPERATING         7
 #define KVM_MP_STATE_LOAD              8
+#define KVM_MP_STATE_AP_RESET_HOLD     9
 
 struct kvm_mp_state {
 	__u32 mp_state;
-- 
cgit v1.2.3


From b16671e8f493e3df40b1fb0dff4078f391c5099a Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Mon, 4 Jan 2021 15:41:21 +0800
Subject: bcache: introduce BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE for
 large bucket

When large bucket feature was added, BCH_FEATURE_INCOMPAT_LARGE_BUCKET
was introduced into the incompat feature set. It used bucket_size_hi
(which was added at the tail of struct cache_sb_disk) to extend current
16bit bucket size to 32bit with existing bucket_size in struct
cache_sb_disk.

This is not a good idea, there are two obvious problems,
- Bucket size is always value power of 2, if store log2(bucket size) in
  existing bucket_size of struct cache_sb_disk, it is unnecessary to add
  bucket_size_hi.
- Macro csum_set() assumes d[SB_JOURNAL_BUCKETS] is the last member in
  struct cache_sb_disk, bucket_size_hi was added after d[] which makes
  csum_set calculate an unexpected super block checksum.

To fix the above problems, this patch introduces a new incompat feature
bit BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE, when this bit is set, it
means bucket_size in struct cache_sb_disk stores the order of power-of-2
bucket size value. When user specifies a bucket size larger than 32768
sectors, BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE will be set to
incompat feature set, and bucket_size stores log2(bucket size) more
than store the real bucket size value.

The obsoleted BCH_FEATURE_INCOMPAT_LARGE_BUCKET won't be used anymore,
it is renamed to BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET and still only
recognized by kernel driver for legacy compatible purpose. The previous
bucket_size_hi is renmaed to obso_bucket_size_hi in struct cache_sb_disk
and not used in bcache-tools anymore.

For cache device created with BCH_FEATURE_INCOMPAT_LARGE_BUCKET feature,
bcache-tools and kernel driver still recognize the feature string and
display it as "obso_large_bucket".

With this change, the unnecessary extra space extend of bcache on-disk
super block can be avoided, and csum_set() may generate expected check
sum as well.

Fixes: ffa470327572 ("bcache: add bucket_size_hi into struct cache_sb_disk for large bucket")
Signed-off-by: Coly Li <colyli@suse.de>
Cc: stable@vger.kernel.org # 5.9+
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/bcache/features.c |  2 +-
 drivers/md/bcache/features.h | 11 ++++++++---
 drivers/md/bcache/super.c    | 22 +++++++++++++++++++---
 include/uapi/linux/bcache.h  |  2 +-
 4 files changed, 29 insertions(+), 8 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/md/bcache/features.c b/drivers/md/bcache/features.c
index 6469223f0b77..d636b7b2d070 100644
--- a/drivers/md/bcache/features.c
+++ b/drivers/md/bcache/features.c
@@ -17,7 +17,7 @@ struct feature {
 };
 
 static struct feature feature_list[] = {
-	{BCH_FEATURE_INCOMPAT, BCH_FEATURE_INCOMPAT_LARGE_BUCKET,
+	{BCH_FEATURE_INCOMPAT, BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE,
 		"large_bucket"},
 	{0, 0, 0 },
 };
diff --git a/drivers/md/bcache/features.h b/drivers/md/bcache/features.h
index e73724c2b49b..84fc2c0f0101 100644
--- a/drivers/md/bcache/features.h
+++ b/drivers/md/bcache/features.h
@@ -13,11 +13,15 @@
 
 /* Feature set definition */
 /* Incompat feature set */
-#define BCH_FEATURE_INCOMPAT_LARGE_BUCKET	0x0001 /* 32bit bucket size */
+/* 32bit bucket size, obsoleted */
+#define BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET		0x0001
+/* real bucket size is (1 << bucket_size) */
+#define BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE	0x0002
 
 #define BCH_FEATURE_COMPAT_SUPP		0
 #define BCH_FEATURE_RO_COMPAT_SUPP	0
-#define BCH_FEATURE_INCOMPAT_SUPP	BCH_FEATURE_INCOMPAT_LARGE_BUCKET
+#define BCH_FEATURE_INCOMPAT_SUPP	(BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET| \
+					 BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE)
 
 #define BCH_HAS_COMPAT_FEATURE(sb, mask) \
 		((sb)->feature_compat & (mask))
@@ -77,7 +81,8 @@ static inline void bch_clear_feature_##name(struct cache_sb *sb) \
 		~BCH##_FEATURE_INCOMPAT_##flagname; \
 }
 
-BCH_FEATURE_INCOMPAT_FUNCS(large_bucket, LARGE_BUCKET);
+BCH_FEATURE_INCOMPAT_FUNCS(obso_large_bucket, OBSO_LARGE_BUCKET);
+BCH_FEATURE_INCOMPAT_FUNCS(large_bucket, LOG_LARGE_BUCKET_SIZE);
 
 static inline bool bch_has_unknown_compat_features(struct cache_sb *sb)
 {
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index f4674a3298af..3999641f1775 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -64,9 +64,25 @@ static unsigned int get_bucket_size(struct cache_sb *sb, struct cache_sb_disk *s
 {
 	unsigned int bucket_size = le16_to_cpu(s->bucket_size);
 
-	if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES &&
-	     bch_has_feature_large_bucket(sb))
-		bucket_size |= le16_to_cpu(s->bucket_size_hi) << 16;
+	if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES) {
+		if (bch_has_feature_large_bucket(sb)) {
+			unsigned int max, order;
+
+			max = sizeof(unsigned int) * BITS_PER_BYTE - 1;
+			order = le16_to_cpu(s->bucket_size);
+			/*
+			 * bcache tool will make sure the overflow won't
+			 * happen, an error message here is enough.
+			 */
+			if (order > max)
+				pr_err("Bucket size (1 << %u) overflows\n",
+					order);
+			bucket_size = 1 << order;
+		} else if (bch_has_feature_obso_large_bucket(sb)) {
+			bucket_size +=
+				le16_to_cpu(s->obso_bucket_size_hi) << 16;
+		}
+	}
 
 	return bucket_size;
 }
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
index 52e8bcb33981..cf7399f03b71 100644
--- a/include/uapi/linux/bcache.h
+++ b/include/uapi/linux/bcache.h
@@ -213,7 +213,7 @@ struct cache_sb_disk {
 		__le16		keys;
 	};
 	__le64			d[SB_JOURNAL_BUCKETS];	/* journal buckets */
-	__le16			bucket_size_hi;
+	__le16			obso_bucket_size_hi;	/* obsoleted */
 };
 
 /*
-- 
cgit v1.2.3


From 9f206f7398f6f6ec7dd0198c045c2459b4f720b6 Mon Sep 17 00:00:00 2001
From: Bryan Tan <bryantan@vmware.com>
Date: Mon, 18 Jan 2021 19:16:29 -0800
Subject: RDMA/vmw_pvrdma: Fix network_hdr_type reported in WC

The PVRDMA device HW interface defines network_hdr_type according to an
old definition of the internal kernel rdma_network_type enum that has
since changed, resulting in the wrong rdma_network_type being reported.

Fix this by explicitly defining the enum used by the PVRDMA device and
adding a function to convert the pvrdma_network_type to rdma_network_type
enum.

Cc: stable@vger.kernel.org # 5.10+
Fixes: 1c15b4f2a42f ("RDMA/core: Modify enum ib_gid_type and enum rdma_network_type")
Link: https://lore.kernel.org/r/1611026189-17943-1-git-send-email-bryantan@vmware.com
Reviewed-by: Adit Ranadive <aditr@vmware.com>
Signed-off-by: Bryan Tan <bryantan@vmware.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/vmw_pvrdma/pvrdma.h    | 14 ++++++++++++++
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c |  2 +-
 include/uapi/rdma/vmw_pvrdma-abi.h           |  7 +++++++
 3 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
index c142f5e7f25f..de57f2fed743 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
@@ -509,6 +509,20 @@ static inline int ib_send_flags_to_pvrdma(int flags)
 	return flags & PVRDMA_MASK(PVRDMA_SEND_FLAGS_MAX);
 }
 
+static inline int pvrdma_network_type_to_ib(enum pvrdma_network_type type)
+{
+	switch (type) {
+	case PVRDMA_NETWORK_ROCE_V1:
+		return RDMA_NETWORK_ROCE_V1;
+	case PVRDMA_NETWORK_IPV4:
+		return RDMA_NETWORK_IPV4;
+	case PVRDMA_NETWORK_IPV6:
+		return RDMA_NETWORK_IPV6;
+	default:
+		return RDMA_NETWORK_IPV6;
+	}
+}
+
 void pvrdma_qp_cap_to_ib(struct ib_qp_cap *dst,
 			 const struct pvrdma_qp_cap *src);
 void ib_qp_cap_to_pvrdma(struct pvrdma_qp_cap *dst,
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
index a119ac3e103c..6aa40bd2fd52 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
@@ -367,7 +367,7 @@ retry:
 	wc->dlid_path_bits = cqe->dlid_path_bits;
 	wc->port_num = cqe->port_num;
 	wc->vendor_err = cqe->vendor_err;
-	wc->network_hdr_type = cqe->network_hdr_type;
+	wc->network_hdr_type = pvrdma_network_type_to_ib(cqe->network_hdr_type);
 
 	/* Update shared ring state */
 	pvrdma_idx_ring_inc(&cq->ring_state->rx.cons_head, cq->ibcq.cqe);
diff --git a/include/uapi/rdma/vmw_pvrdma-abi.h b/include/uapi/rdma/vmw_pvrdma-abi.h
index f8b638c73371..901a4fd72c09 100644
--- a/include/uapi/rdma/vmw_pvrdma-abi.h
+++ b/include/uapi/rdma/vmw_pvrdma-abi.h
@@ -133,6 +133,13 @@ enum pvrdma_wc_flags {
 	PVRDMA_WC_FLAGS_MAX		= PVRDMA_WC_WITH_NETWORK_HDR_TYPE,
 };
 
+enum pvrdma_network_type {
+	PVRDMA_NETWORK_IB,
+	PVRDMA_NETWORK_ROCE_V1 = PVRDMA_NETWORK_IB,
+	PVRDMA_NETWORK_IPV4,
+	PVRDMA_NETWORK_IPV6
+};
+
 struct pvrdma_alloc_ucontext_resp {
 	__u32 qp_tab_size;
 	__u32 reserved;
-- 
cgit v1.2.3


From dc090de854b9d7fdbc6f4df70bd7fc1b43eeccf8 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Date: Thu, 21 Jan 2021 21:40:36 +0100
Subject: net: mrp: fix definitions of MRP test packets

Wireshark says that the MRP test packets cannot be decoded - and the
reason for that is that there's a two-byte hole filled with garbage
between the "transitions" and "timestamp" members.

So Wireshark decodes the two garbage bytes and the top two bytes of
the timestamp written by the kernel as the timestamp value (which thus
fluctuates wildly), and interprets the lower two bytes of the
timestamp as a new (type, length) pair, which is of course broken.

Even though this makes the timestamp field in the struct unaligned, it
actually makes it end up on a 32 bit boundary in the frame as mandated
by the standard, since it is preceded by a two byte TLV header.

The struct definitions live under include/uapi/, but they are not
really part of any kernel<->userspace API/ABI, so fixing the
definitions by adding the packed attribute should not cause any
compatibility issues.

Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Reviewed-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/mrp_bridge.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/mrp_bridge.h b/include/uapi/linux/mrp_bridge.h
index 9744773de5ff..5ce771327e32 100644
--- a/include/uapi/linux/mrp_bridge.h
+++ b/include/uapi/linux/mrp_bridge.h
@@ -97,7 +97,7 @@ struct br_mrp_ring_test_hdr {
 	__be16 state;
 	__be16 transitions;
 	__be32 timestamp;
-};
+} __attribute__((__packed__));
 
 struct br_mrp_ring_topo_hdr {
 	__be16 prio;
@@ -142,7 +142,7 @@ struct br_mrp_in_test_hdr {
 	__be16 state;
 	__be16 transitions;
 	__be32 timestamp;
-};
+} __attribute__((__packed__));
 
 struct br_mrp_in_topo_hdr {
 	__u8 sa[ETH_ALEN];
-- 
cgit v1.2.3


From 6781939054a1a161e06e7a7955a4846be770a711 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Date: Thu, 21 Jan 2021 21:40:37 +0100
Subject: net: mrp: move struct definitions out of uapi

None of these are actually used in the kernel/userspace interface -
there's a userspace component of implementing MRP, and userspace will
need to construct certain frames to put on the wire, but there's no
reason the kernel should provide the relevant definitions in a UAPI
header.

In fact, some of those definitions were broken until previous commit,
so only keep the few that are actually referenced in the kernel code,
and move them to the br_private_mrp.h header.

Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/mrp_bridge.h | 86 -----------------------------------------
 net/bridge/br_private_mrp.h     | 29 ++++++++++++++
 2 files changed, 29 insertions(+), 86 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/mrp_bridge.h b/include/uapi/linux/mrp_bridge.h
index 5ce771327e32..bd4424de56ff 100644
--- a/include/uapi/linux/mrp_bridge.h
+++ b/include/uapi/linux/mrp_bridge.h
@@ -71,90 +71,4 @@ enum br_mrp_sub_tlv_header_type {
 	BR_MRP_SUB_TLV_HEADER_TEST_AUTO_MGR = 0x3,
 };
 
-struct br_mrp_tlv_hdr {
-	__u8 type;
-	__u8 length;
-};
-
-struct br_mrp_sub_tlv_hdr {
-	__u8 type;
-	__u8 length;
-};
-
-struct br_mrp_end_hdr {
-	struct br_mrp_tlv_hdr hdr;
-};
-
-struct br_mrp_common_hdr {
-	__be16 seq_id;
-	__u8 domain[MRP_DOMAIN_UUID_LENGTH];
-};
-
-struct br_mrp_ring_test_hdr {
-	__be16 prio;
-	__u8 sa[ETH_ALEN];
-	__be16 port_role;
-	__be16 state;
-	__be16 transitions;
-	__be32 timestamp;
-} __attribute__((__packed__));
-
-struct br_mrp_ring_topo_hdr {
-	__be16 prio;
-	__u8 sa[ETH_ALEN];
-	__be16 interval;
-};
-
-struct br_mrp_ring_link_hdr {
-	__u8 sa[ETH_ALEN];
-	__be16 port_role;
-	__be16 interval;
-	__be16 blocked;
-};
-
-struct br_mrp_sub_opt_hdr {
-	__u8 type;
-	__u8 manufacture_data[MRP_MANUFACTURE_DATA_LENGTH];
-};
-
-struct br_mrp_test_mgr_nack_hdr {
-	__be16 prio;
-	__u8 sa[ETH_ALEN];
-	__be16 other_prio;
-	__u8 other_sa[ETH_ALEN];
-};
-
-struct br_mrp_test_prop_hdr {
-	__be16 prio;
-	__u8 sa[ETH_ALEN];
-	__be16 other_prio;
-	__u8 other_sa[ETH_ALEN];
-};
-
-struct br_mrp_oui_hdr {
-	__u8 oui[MRP_OUI_LENGTH];
-};
-
-struct br_mrp_in_test_hdr {
-	__be16 id;
-	__u8 sa[ETH_ALEN];
-	__be16 port_role;
-	__be16 state;
-	__be16 transitions;
-	__be32 timestamp;
-} __attribute__((__packed__));
-
-struct br_mrp_in_topo_hdr {
-	__u8 sa[ETH_ALEN];
-	__be16 id;
-	__be16 interval;
-};
-
-struct br_mrp_in_link_hdr {
-	__u8 sa[ETH_ALEN];
-	__be16 port_role;
-	__be16 id;
-	__be16 interval;
-};
-
 #endif
diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h
index 1883118aae55..32a48e5418da 100644
--- a/net/bridge/br_private_mrp.h
+++ b/net/bridge/br_private_mrp.h
@@ -88,4 +88,33 @@ int br_mrp_switchdev_send_in_test(struct net_bridge *br, struct br_mrp *mrp,
 int br_mrp_ring_port_open(struct net_device *dev, u8 loc);
 int br_mrp_in_port_open(struct net_device *dev, u8 loc);
 
+/* MRP protocol data units */
+struct br_mrp_tlv_hdr {
+	__u8 type;
+	__u8 length;
+};
+
+struct br_mrp_common_hdr {
+	__be16 seq_id;
+	__u8 domain[MRP_DOMAIN_UUID_LENGTH];
+};
+
+struct br_mrp_ring_test_hdr {
+	__be16 prio;
+	__u8 sa[ETH_ALEN];
+	__be16 port_role;
+	__be16 state;
+	__be16 transitions;
+	__be32 timestamp;
+} __attribute__((__packed__));
+
+struct br_mrp_in_test_hdr {
+	__be16 id;
+	__u8 sa[ETH_ALEN];
+	__be16 port_role;
+	__be16 state;
+	__be16 transitions;
+	__be32 timestamp;
+} __attribute__((__packed__));
+
 #endif /* _BR_PRIVATE_MRP_H */
-- 
cgit v1.2.3


From 07d46d93c9acdfe0614071d73c415dd5f745cc6e Mon Sep 17 00:00:00 2001
From: Justin Iurman <justin.iurman@uliege.be>
Date: Thu, 21 Jan 2021 23:00:44 +0100
Subject: uapi: fix big endian definition of ipv6_rpl_sr_hdr

Following RFC 6554 [1], the current order of fields is wrong for big
endian definition. Indeed, here is how the header looks like:

+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|  Next Header  |  Hdr Ext Len  | Routing Type  | Segments Left |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| CmprI | CmprE |  Pad  |               Reserved                |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

This patch reorders fields so that big endian definition is now correct.

  [1] https://tools.ietf.org/html/rfc6554#section-3

Fixes: cfa933d938d8 ("include: uapi: linux: add rpl sr header definition")
Signed-off-by: Justin Iurman <justin.iurman@uliege.be>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/rpl.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/rpl.h b/include/uapi/linux/rpl.h
index 1dccb55cf8c6..708adddf9f13 100644
--- a/include/uapi/linux/rpl.h
+++ b/include/uapi/linux/rpl.h
@@ -28,10 +28,10 @@ struct ipv6_rpl_sr_hdr {
 		pad:4,
 		reserved1:16;
 #elif defined(__BIG_ENDIAN_BITFIELD)
-	__u32	reserved:20,
+	__u32	cmpri:4,
+		cmpre:4,
 		pad:4,
-		cmpri:4,
-		cmpre:4;
+		reserved:20;
 #else
 #error  "Please fix <asm/byteorder.h>"
 #endif
-- 
cgit v1.2.3


From a53e3c189cc6460b60e152af3fc24edf8e0ea9d2 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Mon, 18 Jan 2021 16:37:00 +0100
Subject: media: v4l2-subdev.h: BIT() is not available in userspace

The BIT macro is not available in userspace, so replace BIT(0) by
0x00000001.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Fixes: 6446ec6cbf46 ("media: v4l2-subdev: add VIDIOC_SUBDEV_QUERYCAP ioctl")
Cc: <stable@vger.kernel.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/uapi/linux/v4l2-subdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/v4l2-subdev.h b/include/uapi/linux/v4l2-subdev.h
index 00850b98078a..a38454d9e0f5 100644
--- a/include/uapi/linux/v4l2-subdev.h
+++ b/include/uapi/linux/v4l2-subdev.h
@@ -176,7 +176,7 @@ struct v4l2_subdev_capability {
 };
 
 /* The v4l2 sub-device video device node is registered in read-only mode. */
-#define V4L2_SUBDEV_CAP_RO_SUBDEV		BIT(0)
+#define V4L2_SUBDEV_CAP_RO_SUBDEV		0x00000001
 
 /* Backwards compatibility define --- to be removed */
 #define v4l2_subdev_edid v4l2_edid
-- 
cgit v1.2.3


From 31f190e0ccac8b75d33fdc95a797c526cf9b149e Mon Sep 17 00:00:00 2001
From: Dafna Hirschfeld <dafna.hirschfeld@collabora.com>
Date: Thu, 21 Jan 2021 15:44:02 +0100
Subject: media: rkisp1: uapi: change hist_bins array type from __u16 to __u32

Each entry in the array is a 20 bits value composed of 16 bits unsigned
integer and 4 bits fractional part. So the type should change to __u32.
In addition add a documentation of how the measurements are done.

Signed-off-by: Dafna Hirschfeld <dafna.hirschfeld@collabora.com>
Acked-by: Helen Koike <helen.koike@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/uapi/linux/rkisp1-config.h | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/rkisp1-config.h b/include/uapi/linux/rkisp1-config.h
index 6e449e784260..24f05d6d258f 100644
--- a/include/uapi/linux/rkisp1-config.h
+++ b/include/uapi/linux/rkisp1-config.h
@@ -844,13 +844,18 @@ struct rkisp1_cif_isp_af_stat {
 /**
  * struct rkisp1_cif_isp_hist_stat - statistics histogram data
  *
- * @hist_bins: measured bin counters
+ * @hist_bins: measured bin counters. Each bin is a 20 bits unsigned fixed point
+ *	       type. Bits 0-4 are the fractional part and bits 5-19 are the
+ *	       integer part.
  *
- * Measurement window divided into 25 sub-windows, set
- * with ISP_HIST_XXX
+ * The window of the measurements area is divided to 5x5 sub-windows. The
+ * histogram is then computed for each sub-window independently and the final
+ * result is a weighted average of the histogram measurements on all
+ * sub-windows. The window of the measurements area and the weight of each
+ * sub-window are configurable using struct @rkisp1_cif_isp_hst_config.
  */
 struct rkisp1_cif_isp_hist_stat {
-	__u16 hist_bins[RKISP1_CIF_ISP_HIST_BIN_N_MAX];
+	__u32 hist_bins[RKISP1_CIF_ISP_HIST_BIN_N_MAX];
 };
 
 /**
-- 
cgit v1.2.3


From 66d81de7ea9d2b0775e5bfd5e770483a1c24b9ca Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko.stuebner@theobroma-systems.com>
Date: Thu, 21 Jan 2021 15:44:05 +0100
Subject: media: rockchip: rkisp1: reduce number of histogram grid elements in
 uapi

The uapi right now specifies an array size of 28 but the actual number
of elements is only 25 with the last 3 being unused.

Reduce the array size to the correct number of elements and change
the params code to iterate the array 25 times.

Signed-off-by: Heiko Stuebner <heiko.stuebner@theobroma-systems.com>
Signed-off-by: Dafna Hirschfeld <dafna.hirschfeld@collabora.com>
Acked-by: Helen Koike <helen.koike@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/rockchip/rkisp1/rkisp1-params.c | 3 ++-
 include/uapi/linux/rkisp1-config.h                     | 3 +--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c
index 6af4d551ffb5..021939466b24 100644
--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c
@@ -589,7 +589,6 @@ static void rkisp1_hst_config(struct rkisp1_params *params,
 		RKISP1_CIF_ISP_HIST_WEIGHT_22TO03,
 		RKISP1_CIF_ISP_HIST_WEIGHT_13TO43,
 		RKISP1_CIF_ISP_HIST_WEIGHT_04TO34,
-		RKISP1_CIF_ISP_HIST_WEIGHT_44,
 	};
 	const u8 *weight;
 	unsigned int i;
@@ -622,6 +621,8 @@ static void rkisp1_hst_config(struct rkisp1_params *params,
 							    weight[2],
 							    weight[3]),
 				 hist_weight_regs[i]);
+
+	rkisp1_write(params->rkisp1, weight[0] & 0x1F, RKISP1_CIF_ISP_HIST_WEIGHT_44);
 }
 
 static void
diff --git a/include/uapi/linux/rkisp1-config.h b/include/uapi/linux/rkisp1-config.h
index 24f05d6d258f..35aa82d5f6dd 100644
--- a/include/uapi/linux/rkisp1-config.h
+++ b/include/uapi/linux/rkisp1-config.h
@@ -102,8 +102,7 @@
 /*
  * Histogram calculation
  */
-/* Last 3 values unused. */
-#define RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE 28
+#define RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE 25
 
 /*
  * Defect Pixel Cluster Correction
-- 
cgit v1.2.3


From fc672d806bd77eff26117479e90ccdcfd2a8ecb4 Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko.stuebner@theobroma-systems.com>
Date: Thu, 21 Jan 2021 15:44:06 +0100
Subject: media: rockchip: rkisp1: carry ip version information

The IP block evolved from its rk3288/rk3399 base and the vendor
designates them with a numerical version. rk3399 for example
is designated V10 probably meaning V1.0.

There doesn't seem to be an actual version register we could read that
information from, so allow the match_data to carry that information
for future differentiation.

Also carry that information in the hw_revision field of the media-
controller API, so that userspace also has access to that.

The added versions are:
- V10: at least rk3288 + rk3399
- V11: seemingly unused as of now, but probably appeared in some soc
- V12: at least rk3326 + px30
- V13: at least rk1808

[fix checkpatch warning don't use multiple blank lines]

Signed-off-by: Heiko Stuebner <heiko.stuebner@theobroma-systems.com>
Signed-off-by: Dafna Hirschfeld <dafna.hirschfeld@collabora.com>
Reviewed-by: Ezequiel Garcia <ezequiel@collabora.com>
Acked-by: Helen Koike <helen.koike@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 Documentation/admin-guide/media/rkisp1.rst          | 16 ++++++++++++++++
 drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c | 21 ++++++++++++---------
 include/uapi/linux/rkisp1-config.h                  | 15 +++++++++++++++
 3 files changed, 43 insertions(+), 9 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/admin-guide/media/rkisp1.rst b/Documentation/admin-guide/media/rkisp1.rst
index 2267e4fb475e..ccf418713623 100644
--- a/Documentation/admin-guide/media/rkisp1.rst
+++ b/Documentation/admin-guide/media/rkisp1.rst
@@ -13,6 +13,22 @@ This file documents the driver for the Rockchip ISP1 that is part of RK3288
 and RK3399 SoCs. The driver is located under drivers/staging/media/rkisp1
 and uses the Media-Controller API.
 
+Revisions
+=========
+
+There exist multiple smaller revisions to this ISP that got introduced in
+later SoCs. Revisions can be found in the enum :c:type:`rkisp1_cif_isp_version`
+in the UAPI and the revision of the ISP inside the running SoC can be read
+in the field hw_revision of struct media_device_info as returned by
+ioctl MEDIA_IOC_DEVICE_INFO.
+
+Versions in use are:
+
+- RKISP1_V10: used at least in rk3288 and rk3399
+- RKISP1_V11: declared in the original vendor code, but not used
+- RKISP1_V12: used at least in rk3326 and px30
+- RKISP1_V13: used at least in rk1808
+
 Topology
 ========
 .. _rkisp1_topology_graph:
diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c
index 68da1eed753d..f7e9fd305548 100644
--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c
@@ -104,6 +104,7 @@
 struct rkisp1_match_data {
 	const char * const *clks;
 	unsigned int size;
+	enum rkisp1_cif_isp_version isp_ver;
 };
 
 /* ----------------------------------------------------------------------------
@@ -411,15 +412,16 @@ static const char * const rk3399_isp_clks[] = {
 	"hclk",
 };
 
-static const struct rkisp1_match_data rk3399_isp_clk_data = {
+static const struct rkisp1_match_data rk3399_isp_match_data = {
 	.clks = rk3399_isp_clks,
 	.size = ARRAY_SIZE(rk3399_isp_clks),
+	.isp_ver = RKISP1_V10,
 };
 
 static const struct of_device_id rkisp1_of_match[] = {
 	{
 		.compatible = "rockchip,rk3399-cif-isp",
-		.data = &rk3399_isp_clk_data,
+		.data = &rk3399_isp_match_data,
 	},
 	{},
 };
@@ -457,15 +459,15 @@ static void rkisp1_debug_init(struct rkisp1_device *rkisp1)
 
 static int rkisp1_probe(struct platform_device *pdev)
 {
-	const struct rkisp1_match_data *clk_data;
+	const struct rkisp1_match_data *match_data;
 	struct device *dev = &pdev->dev;
 	struct rkisp1_device *rkisp1;
 	struct v4l2_device *v4l2_dev;
 	unsigned int i;
 	int ret, irq;
 
-	clk_data = of_device_get_match_data(&pdev->dev);
-	if (!clk_data)
+	match_data = of_device_get_match_data(&pdev->dev);
+	if (!match_data)
 		return -ENODEV;
 
 	rkisp1 = devm_kzalloc(dev, sizeof(*rkisp1), GFP_KERNEL);
@@ -494,15 +496,16 @@ static int rkisp1_probe(struct platform_device *pdev)
 
 	rkisp1->irq = irq;
 
-	for (i = 0; i < clk_data->size; i++)
-		rkisp1->clks[i].id = clk_data->clks[i];
-	ret = devm_clk_bulk_get(dev, clk_data->size, rkisp1->clks);
+	for (i = 0; i < match_data->size; i++)
+		rkisp1->clks[i].id = match_data->clks[i];
+	ret = devm_clk_bulk_get(dev, match_data->size, rkisp1->clks);
 	if (ret)
 		return ret;
-	rkisp1->clk_size = clk_data->size;
+	rkisp1->clk_size = match_data->size;
 
 	pm_runtime_enable(&pdev->dev);
 
+	rkisp1->media_dev.hw_revision = match_data->isp_ver;
 	strscpy(rkisp1->media_dev.model, RKISP1_DRIVER_NAME,
 		sizeof(rkisp1->media_dev.model));
 	rkisp1->media_dev.dev = &pdev->dev;
diff --git a/include/uapi/linux/rkisp1-config.h b/include/uapi/linux/rkisp1-config.h
index 35aa82d5f6dd..bee4413fe0d3 100644
--- a/include/uapi/linux/rkisp1-config.h
+++ b/include/uapi/linux/rkisp1-config.h
@@ -123,6 +123,21 @@
 #define RKISP1_CIF_ISP_STAT_AFM           (1U << 2)
 #define RKISP1_CIF_ISP_STAT_HIST          (1U << 3)
 
+/**
+ * enum rkisp1_cif_isp_version - ISP variants
+ *
+ * @RKISP1_V10: used at least in rk3288 and rk3399
+ * @RKISP1_V11: declared in the original vendor code, but not used
+ * @RKISP1_V12: used at least in rk3326 and px30
+ * @RKISP1_V13: used at least in rk1808
+ */
+enum rkisp1_cif_isp_version {
+	RKISP1_V10 = 10,
+	RKISP1_V11,
+	RKISP1_V12,
+	RKISP1_V13,
+};
+
 enum rkisp1_cif_isp_histogram_mode {
 	RKISP1_CIF_ISP_HISTOGRAM_MODE_DISABLE,
 	RKISP1_CIF_ISP_HISTOGRAM_MODE_RGB_COMBINED,
-- 
cgit v1.2.3


From ef357e02b6c420dc2d668ebf3165838c77358acd Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko.stuebner@theobroma-systems.com>
Date: Thu, 21 Jan 2021 15:44:07 +0100
Subject: media: rockchip: rkisp1: extend uapi array sizes

Later variants of the rkisp1 block use more entries in some arrays:

RKISP1_CIF_ISP_AE_MEAN_MAX                 25 -> 81
RKISP1_CIF_ISP_HIST_BIN_N_MAX              16 -> 32
RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES       17 -> 34
RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE 25 -> 81

and we can still extend the uapi during the 5.11-rc cycle, so do that
now to be on the safe side.

V10 and V11 only need the smaller sizes, while V12 and V13 needed
the larger sizes.

When adding the bigger sizes make sure, values filled from hardware
values and transmitted to userspace don't leak kernel data by zeroing
them beforehand.

Signed-off-by: Heiko Stuebner <heiko.stuebner@theobroma-systems.com>
Signed-off-by: Dafna Hirschfeld <dafna.hirschfeld@collabora.com>
Reviewed-by: Ezequiel Garcia <ezequiel@collabora.com>
Acked-by: Helen Koike <helen.koike@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../media/platform/rockchip/rkisp1/rkisp1-params.c |  2 +-
 .../media/platform/rockchip/rkisp1/rkisp1-stats.c  |  4 +-
 include/uapi/linux/rkisp1-config.h                 | 67 ++++++++++++++++++----
 3 files changed, 60 insertions(+), 13 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c
index 021939466b24..aa5f45749543 100644
--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c
@@ -391,7 +391,7 @@ static void rkisp1_goc_config(struct rkisp1_params *params,
 				RKISP1_CIF_ISP_CTRL_ISP_GAMMA_OUT_ENA);
 	rkisp1_write(params->rkisp1, arg->mode, RKISP1_CIF_ISP_GAMMA_OUT_MODE);
 
-	for (i = 0; i < RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES; i++)
+	for (i = 0; i < RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES_V10; i++)
 		rkisp1_write(params->rkisp1, arg->gamma_y[i],
 			     RKISP1_CIF_ISP_GAMMA_OUT_Y_0 + i * 4);
 }
diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-stats.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-stats.c
index 3b2783700abc..c1d07a2e8839 100644
--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-stats.c
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-stats.c
@@ -203,7 +203,7 @@ static void rkisp1_stats_get_aec_meas(struct rkisp1_stats *stats,
 	unsigned int i;
 
 	pbuf->meas_type |= RKISP1_CIF_ISP_STAT_AUTOEXP;
-	for (i = 0; i < RKISP1_CIF_ISP_AE_MEAN_MAX; i++)
+	for (i = 0; i < RKISP1_CIF_ISP_AE_MEAN_MAX_V10; i++)
 		pbuf->params.ae.exp_mean[i] =
 			(u8)rkisp1_read(rkisp1,
 					RKISP1_CIF_ISP_EXP_MEAN_00 + i * 4);
@@ -233,7 +233,7 @@ static void rkisp1_stats_get_hst_meas(struct rkisp1_stats *stats,
 	unsigned int i;
 
 	pbuf->meas_type |= RKISP1_CIF_ISP_STAT_HIST;
-	for (i = 0; i < RKISP1_CIF_ISP_HIST_BIN_N_MAX; i++) {
+	for (i = 0; i < RKISP1_CIF_ISP_HIST_BIN_N_MAX_V10; i++) {
 		u32 reg_val = rkisp1_read(rkisp1, RKISP1_CIF_ISP_HIST_BIN_0 + i * 4);
 
 		pbuf->params.hist.hist_bins[i] = RKISP1_CIF_ISP_HIST_GET_BIN(reg_val);
diff --git a/include/uapi/linux/rkisp1-config.h b/include/uapi/linux/rkisp1-config.h
index bee4413fe0d3..36e3efb81b01 100644
--- a/include/uapi/linux/rkisp1-config.h
+++ b/include/uapi/linux/rkisp1-config.h
@@ -49,8 +49,14 @@
 #define RKISP1_CIF_ISP_CTK_COEFF_MAX            0x100
 #define RKISP1_CIF_ISP_CTK_OFFSET_MAX           0x800
 
-#define RKISP1_CIF_ISP_AE_MEAN_MAX              25
-#define RKISP1_CIF_ISP_HIST_BIN_N_MAX           16
+#define RKISP1_CIF_ISP_AE_MEAN_MAX_V10		25
+#define RKISP1_CIF_ISP_AE_MEAN_MAX_V12		81
+#define RKISP1_CIF_ISP_AE_MEAN_MAX		RKISP1_CIF_ISP_AE_MEAN_MAX_V12
+
+#define RKISP1_CIF_ISP_HIST_BIN_N_MAX_V10	16
+#define RKISP1_CIF_ISP_HIST_BIN_N_MAX_V12	32
+#define RKISP1_CIF_ISP_HIST_BIN_N_MAX		RKISP1_CIF_ISP_HIST_BIN_N_MAX_V12
+
 #define RKISP1_CIF_ISP_AFM_MAX_WINDOWS          3
 #define RKISP1_CIF_ISP_DEGAMMA_CURVE_SIZE       17
 
@@ -86,7 +92,9 @@
  * Gamma out
  */
 /* Maximum number of color samples supported */
-#define RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES       17
+#define RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES_V10   17
+#define RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES_V12   34
+#define RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES       RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES_V12
 
 /*
  * Lens shade correction
@@ -102,7 +110,9 @@
 /*
  * Histogram calculation
  */
-#define RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE 25
+#define RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE_V10 25
+#define RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE_V12 81
+#define RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE     RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE_V12
 
 /*
  * Defect Pixel Cluster Correction
@@ -524,6 +534,15 @@ enum rkisp1_cif_isp_goc_mode {
  *
  * @mode: goc mode (from enum rkisp1_cif_isp_goc_mode)
  * @gamma_y: gamma out curve y-axis for all color components
+ *
+ * The number of entries of @gamma_y depends on the hardware revision
+ * as is reported by the hw_revision field of the struct media_device_info
+ * that is returned by ioctl MEDIA_IOC_DEVICE_INFO.
+ *
+ * Versions <= V11 have RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES_V10
+ * entries, versions >= V12 have RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES_V12
+ * entries. RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES is equal to the maximum
+ * of the two.
  */
 struct rkisp1_cif_isp_goc_config {
 	__u32 mode;
@@ -538,6 +557,15 @@ struct rkisp1_cif_isp_goc_config {
  *			  skipped
  * @meas_window: coordinates of the measure window
  * @hist_weight: weighting factor for sub-windows
+ *
+ * The number of entries of @hist_weight depends on the hardware revision
+ * as is reported by the hw_revision field of the struct media_device_info
+ * that is returned by ioctl MEDIA_IOC_DEVICE_INFO.
+ *
+ * Versions <= V11 have RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE_V10
+ * entries, versions >= V12 have RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE_V12
+ * entries. RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE is equal to the maximum
+ * of the two.
  */
 struct rkisp1_cif_isp_hst_config {
 	__u32 mode;
@@ -825,7 +853,15 @@ struct rkisp1_cif_isp_bls_meas_val {
  * @exp_mean: Mean luminance value of block xx
  * @bls_val:  BLS measured values
  *
- * Image is divided into 5x5 blocks.
+ * The number of entries of @exp_mean depends on the hardware revision
+ * as is reported by the hw_revision field of the struct media_device_info
+ * that is returned by ioctl MEDIA_IOC_DEVICE_INFO.
+ *
+ * Versions <= V11 have RKISP1_CIF_ISP_AE_MEAN_MAX_V10 entries,
+ * versions >= V12 have RKISP1_CIF_ISP_AE_MEAN_MAX_V12 entries.
+ * RKISP1_CIF_ISP_AE_MEAN_MAX is equal to the maximum of the two.
+ *
+ * Image is divided into 5x5 blocks on V10 and 9x9 blocks on V12.
  */
 struct rkisp1_cif_isp_ae_stat {
 	__u8 exp_mean[RKISP1_CIF_ISP_AE_MEAN_MAX];
@@ -862,11 +898,22 @@ struct rkisp1_cif_isp_af_stat {
  *	       type. Bits 0-4 are the fractional part and bits 5-19 are the
  *	       integer part.
  *
- * The window of the measurements area is divided to 5x5 sub-windows. The
- * histogram is then computed for each sub-window independently and the final
- * result is a weighted average of the histogram measurements on all
- * sub-windows. The window of the measurements area and the weight of each
- * sub-window are configurable using struct @rkisp1_cif_isp_hst_config.
+ * The window of the measurements area is divided to 5x5 sub-windows for
+ * V10/V11 and to 9x9 sub-windows for V12. The histogram is then computed for
+ * each sub-window independently and the final result is a weighted average of
+ * the histogram measurements on all sub-windows. The window of the
+ * measurements area and the weight of each sub-window are configurable using
+ * struct @rkisp1_cif_isp_hst_config.
+ *
+ * The histogram contains 16 bins in V10/V11 and 32 bins in V12/V13.
+ *
+ * The number of entries of @hist_bins depends on the hardware revision
+ * as is reported by the hw_revision field of the struct media_device_info
+ * that is returned by ioctl MEDIA_IOC_DEVICE_INFO.
+ *
+ * Versions <= V11 have RKISP1_CIF_ISP_HIST_BIN_N_MAX_V10 entries,
+ * versions >= V12 have RKISP1_CIF_ISP_HIST_BIN_N_MAX_V12 entries.
+ * RKISP1_CIF_ISP_HIST_BIN_N_MAX is equal to the maximum of the two.
  */
 struct rkisp1_cif_isp_hist_stat {
 	__u32 hist_bins[RKISP1_CIF_ISP_HIST_BIN_N_MAX];
-- 
cgit v1.2.3


From 36a6c843fd0d8e02506681577e96dabd203dd8e8 Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@collabora.com>
Date: Fri, 5 Feb 2021 13:43:21 -0500
Subject: entry: Use different define for selector variable in SUD

Michael Kerrisk suggested that, from an API perspective, it is a bad
idea to share the PR_SYS_DISPATCH_ defines between the prctl operation
and the selector variable.

Therefore, define two new constants to be used by SUD's selector variable
and update the corresponding documentation and test cases.

While this changes the API syscall user dispatch has never been part of a
Linux release, it will show up for the first time in 5.11.

Suggested-by: Michael Kerrisk (man-pages) <mtk.manpages@gmail.com>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210205184321.2062251-1-krisman@collabora.com
---
 Documentation/admin-guide/syscall-user-dispatch.rst        |  4 ++--
 include/uapi/linux/prctl.h                                 |  3 +++
 kernel/entry/syscall_user_dispatch.c                       |  4 ++--
 .../selftests/syscall_user_dispatch/sud_benchmark.c        |  8 +++++---
 tools/testing/selftests/syscall_user_dispatch/sud_test.c   | 14 ++++++++------
 5 files changed, 20 insertions(+), 13 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/admin-guide/syscall-user-dispatch.rst b/Documentation/admin-guide/syscall-user-dispatch.rst
index a380d6515774..60314953c728 100644
--- a/Documentation/admin-guide/syscall-user-dispatch.rst
+++ b/Documentation/admin-guide/syscall-user-dispatch.rst
@@ -70,8 +70,8 @@ trampoline code on the vDSO, that trampoline is never intercepted.
 [selector] is a pointer to a char-sized region in the process memory
 region, that provides a quick way to enable disable syscall redirection
 thread-wide, without the need to invoke the kernel directly.  selector
-can be set to PR_SYS_DISPATCH_ON or PR_SYS_DISPATCH_OFF.  Any other
-value should terminate the program with a SIGSYS.
+can be set to SYSCALL_DISPATCH_FILTER_ALLOW or SYSCALL_DISPATCH_FILTER_BLOCK.
+Any other value should terminate the program with a SIGSYS.
 
 Security Notes
 --------------
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 90deb41c8a34..667f1aed091c 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -251,5 +251,8 @@ struct prctl_mm_map {
 #define PR_SET_SYSCALL_USER_DISPATCH	59
 # define PR_SYS_DISPATCH_OFF		0
 # define PR_SYS_DISPATCH_ON		1
+/* The control values for the user space selector when dispatch is enabled */
+# define SYSCALL_DISPATCH_FILTER_ALLOW	0
+# define SYSCALL_DISPATCH_FILTER_BLOCK	1
 
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/entry/syscall_user_dispatch.c b/kernel/entry/syscall_user_dispatch.c
index b0338a5625d9..c240302f56e2 100644
--- a/kernel/entry/syscall_user_dispatch.c
+++ b/kernel/entry/syscall_user_dispatch.c
@@ -50,10 +50,10 @@ bool syscall_user_dispatch(struct pt_regs *regs)
 		if (unlikely(__get_user(state, sd->selector)))
 			do_exit(SIGSEGV);
 
-		if (likely(state == PR_SYS_DISPATCH_OFF))
+		if (likely(state == SYSCALL_DISPATCH_FILTER_ALLOW))
 			return false;
 
-		if (state != PR_SYS_DISPATCH_ON)
+		if (state != SYSCALL_DISPATCH_FILTER_BLOCK)
 			do_exit(SIGSYS);
 	}
 
diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c b/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c
index 6689f1183dbf..073a03702ff5 100644
--- a/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c
+++ b/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c
@@ -22,6 +22,8 @@
 # define PR_SET_SYSCALL_USER_DISPATCH	59
 # define PR_SYS_DISPATCH_OFF	0
 # define PR_SYS_DISPATCH_ON	1
+# define SYSCALL_DISPATCH_FILTER_ALLOW	0
+# define SYSCALL_DISPATCH_FILTER_BLOCK	1
 #endif
 
 #ifdef __NR_syscalls
@@ -55,8 +57,8 @@ unsigned long trapped_call_count = 0;
 unsigned long native_call_count = 0;
 
 char selector;
-#define SYSCALL_BLOCK   (selector = PR_SYS_DISPATCH_ON)
-#define SYSCALL_UNBLOCK (selector = PR_SYS_DISPATCH_OFF)
+#define SYSCALL_BLOCK   (selector = SYSCALL_DISPATCH_FILTER_BLOCK)
+#define SYSCALL_UNBLOCK (selector = SYSCALL_DISPATCH_FILTER_ALLOW)
 
 #define CALIBRATION_STEP 100000
 #define CALIBRATE_TO_SECS 5
@@ -170,7 +172,7 @@ int main(void)
 	syscall(MAGIC_SYSCALL_1);
 
 #ifdef TEST_BLOCKED_RETURN
-	if (selector == PR_SYS_DISPATCH_OFF) {
+	if (selector == SYSCALL_DISPATCH_FILTER_ALLOW) {
 		fprintf(stderr, "Failed to return with selector blocked.\n");
 		exit(-1);
 	}
diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_test.c b/tools/testing/selftests/syscall_user_dispatch/sud_test.c
index 6498b050ef89..b5d592d4099e 100644
--- a/tools/testing/selftests/syscall_user_dispatch/sud_test.c
+++ b/tools/testing/selftests/syscall_user_dispatch/sud_test.c
@@ -18,6 +18,8 @@
 # define PR_SET_SYSCALL_USER_DISPATCH	59
 # define PR_SYS_DISPATCH_OFF	0
 # define PR_SYS_DISPATCH_ON	1
+# define SYSCALL_DISPATCH_FILTER_ALLOW	0
+# define SYSCALL_DISPATCH_FILTER_BLOCK	1
 #endif
 
 #ifndef SYS_USER_DISPATCH
@@ -30,8 +32,8 @@
 # define MAGIC_SYSCALL_1 (0xff00)  /* Bad Linux syscall number */
 #endif
 
-#define SYSCALL_DISPATCH_ON(x) ((x) = 1)
-#define SYSCALL_DISPATCH_OFF(x) ((x) = 0)
+#define SYSCALL_DISPATCH_ON(x) ((x) = SYSCALL_DISPATCH_FILTER_BLOCK)
+#define SYSCALL_DISPATCH_OFF(x) ((x) = SYSCALL_DISPATCH_FILTER_ALLOW)
 
 /* Test Summary:
  *
@@ -56,7 +58,7 @@
 
 TEST_SIGNAL(dispatch_trigger_sigsys, SIGSYS)
 {
-	char sel = 0;
+	char sel = SYSCALL_DISPATCH_FILTER_ALLOW;
 	struct sysinfo info;
 	int ret;
 
@@ -79,7 +81,7 @@ TEST_SIGNAL(dispatch_trigger_sigsys, SIGSYS)
 
 TEST(bad_prctl_param)
 {
-	char sel = 0;
+	char sel = SYSCALL_DISPATCH_FILTER_ALLOW;
 	int op;
 
 	/* Invalid op */
@@ -220,7 +222,7 @@ TEST_SIGNAL(bad_selector, SIGSYS)
 	sigset_t mask;
 	struct sysinfo info;
 
-	glob_sel = 0;
+	glob_sel = SYSCALL_DISPATCH_FILTER_ALLOW;
 	nr_syscalls_emulated = 0;
 	si_code = 0;
 	si_errno = 0;
@@ -288,7 +290,7 @@ TEST(direct_dispatch_range)
 {
 	int ret = 0;
 	struct sysinfo info;
-	char sel = 0;
+	char sel = SYSCALL_DISPATCH_FILTER_ALLOW;
 
 	/*
 	 * Instead of calculating libc addresses; allow the entire
-- 
cgit v1.2.3