From f7c475b8dfc23d461a47dfac5e498f8cc96faea5 Mon Sep 17 00:00:00 2001
From: xinhui pan <xinhui.pan@amd.com>
Date: Wed, 24 Feb 2021 11:28:08 +0800
Subject: drm/ttm: Do not add non-system domain BO into swap list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BO would be added into swap list if it is validated into system domain.
If BO is validated again into non-system domain, say, VRAM domain. It
actually should not be in the swap list.

Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: xinhui pan <xinhui.pan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210224032808.150465-1-xinhui.pan@amd.com
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 101a68dc615b..799ec7a7caa4 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -153,6 +153,8 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
 
 		swap = &ttm_bo_glob.swap_lru[bo->priority];
 		list_move_tail(&bo->swap, swap);
+	} else {
+		list_del_init(&bo->swap);
 	}
 
 	if (bdev->driver->del_from_lru_notify)
-- 
cgit v1.2.3


From ffe8768fb8f391cb478466778c55e2110525c15c Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Thu, 15 Apr 2021 16:45:25 +0800
Subject: drm/vc4: remove unused function

Fix the following clang warning:

drivers/gpu/drm/vc4/vc4_vec.c:201:1: warning: unused function
'to_vc4_vec_connector' [-Wunused-function].

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://patchwork.freedesktop.org/patch/msgid/1618476325-112629-1-git-send-email-jiapeng.chong@linux.alibaba.com
---
 drivers/gpu/drm/vc4/vc4_vec.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/gpu/drm/vc4/vc4_vec.c b/drivers/gpu/drm/vc4/vc4_vec.c
index bd5b8eb58b18..090529d0d5dc 100644
--- a/drivers/gpu/drm/vc4/vc4_vec.c
+++ b/drivers/gpu/drm/vc4/vc4_vec.c
@@ -197,12 +197,6 @@ struct vc4_vec_connector {
 	struct drm_encoder *encoder;
 };
 
-static inline struct vc4_vec_connector *
-to_vc4_vec_connector(struct drm_connector *connector)
-{
-	return container_of(connector, struct vc4_vec_connector, base);
-}
-
 enum vc4_vec_tv_mode_id {
 	VC4_VEC_TV_MODE_NTSC,
 	VC4_VEC_TV_MODE_NTSC_J,
-- 
cgit v1.2.3


From 366db3ac3cdf97e90695282b959c75d5ea58cf00 Mon Sep 17 00:00:00 2001
From: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Date: Wed, 21 Apr 2021 17:02:20 +0200
Subject: arm64: dts: renesas: aistarvision-mipi-adapter-2.1: Fix CSI40 ports
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the DTS schema by explicitly stating that the input is port@0. This
fixes a schema validation error but has no runtime effect as the default
port number is 0 if not specified.

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Link: https://lore.kernel.org/r/20210421150221.3202955-2-niklas.soderlund+renesas@ragnatech.se
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts b/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts
index e7b4a929bb17..2e3d1981cac4 100644
--- a/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts
+++ b/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts
@@ -33,7 +33,7 @@
 	status = "okay";
 
 	ports {
-		port {
+		port@0 {
 			csi40_in: endpoint {
 				clock-lanes = <0>;
 				data-lanes = <1 2>;
-- 
cgit v1.2.3


From 0a96c05995ef1085f9c5e6bf005a04915dd2ec6f Mon Sep 17 00:00:00 2001
From: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Date: Wed, 21 Apr 2021 17:02:21 +0200
Subject: arm64: dts: renesas: Add port@0 node for all CSI-2 nodes to dtsi
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The port@0 is a mandatory port, add or move the declaration to the CSI-2
nodes top declared in dtsi files instead of depending on dts files
adding them when describing the external connection.

This fixes validation warnings for DTB outputs that do not connect all
CSI-2 receivers to transmitters and thus declaring all port@0 nodes in
dts files.

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Link: https://lore.kernel.org/r/20210421150221.3202955-3-niklas.soderlund+renesas@ragnatech.se
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 .../hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi        |  2 --
 arch/arm64/boot/dts/renesas/r8a774a1.dtsi                    |  8 ++++++++
 arch/arm64/boot/dts/renesas/r8a774b1.dtsi                    |  8 ++++++++
 arch/arm64/boot/dts/renesas/r8a774c0.dtsi                    |  4 ++++
 arch/arm64/boot/dts/renesas/r8a774e1.dtsi                    |  8 ++++++++
 arch/arm64/boot/dts/renesas/r8a77950.dtsi                    |  4 ++++
 arch/arm64/boot/dts/renesas/r8a77951.dtsi                    | 12 ++++++++++++
 arch/arm64/boot/dts/renesas/r8a77960.dtsi                    |  8 ++++++++
 arch/arm64/boot/dts/renesas/r8a77961.dtsi                    |  8 ++++++++
 arch/arm64/boot/dts/renesas/r8a77965.dtsi                    |  8 ++++++++
 arch/arm64/boot/dts/renesas/r8a77970.dtsi                    |  4 ++++
 arch/arm64/boot/dts/renesas/r8a77980.dtsi                    |  8 ++++++++
 arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts               |  2 --
 arch/arm64/boot/dts/renesas/r8a77990.dtsi                    |  4 ++++
 arch/arm64/boot/dts/renesas/salvator-common.dtsi             |  3 ---
 15 files changed, 84 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi b/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi
index c62ddb9b2ba5..3771144a2ce4 100644
--- a/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi
+++ b/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi
@@ -14,7 +14,6 @@
 
 	ports {
 		port@0 {
-			reg = <0>;
 			csi20_in: endpoint {
 				clock-lanes = <0>;
 				data-lanes = <1 2>;
@@ -29,7 +28,6 @@
 
 	ports {
 		port@0 {
-			reg = <0>;
 			csi40_in: endpoint {
 				clock-lanes = <0>;
 				data-lanes = <1 2>;
diff --git a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi
index d64fb8b1b86c..46f8dbf68904 100644
--- a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi
@@ -2573,6 +2573,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
@@ -2628,6 +2632,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a774b1.dtsi b/arch/arm64/boot/dts/renesas/r8a774b1.dtsi
index 5b05474dc272..d16a4be5ef77 100644
--- a/arch/arm64/boot/dts/renesas/r8a774b1.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a774b1.dtsi
@@ -2419,6 +2419,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
@@ -2474,6 +2478,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi
index 20fa3caa050e..1aef34447abd 100644
--- a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi
@@ -1823,6 +1823,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi
index 8eb006cbd9af..1f51237ab0a6 100644
--- a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi
@@ -2709,6 +2709,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
@@ -2764,6 +2768,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77950.dtsi b/arch/arm64/boot/dts/renesas/r8a77950.dtsi
index 25b87da32eeb..b643d3079db1 100644
--- a/arch/arm64/boot/dts/renesas/r8a77950.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77950.dtsi
@@ -192,6 +192,10 @@
 			#address-cells = <1>;
 			#size-cells = <0>;
 
+			port@0 {
+				reg = <0>;
+			};
+
 			port@1 {
 				#address-cells = <1>;
 				#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77951.dtsi b/arch/arm64/boot/dts/renesas/r8a77951.dtsi
index 5c39152e4570..85d66d15465a 100644
--- a/arch/arm64/boot/dts/renesas/r8a77951.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77951.dtsi
@@ -3097,6 +3097,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
@@ -3152,6 +3156,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
@@ -3191,6 +3199,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77960.dtsi b/arch/arm64/boot/dts/renesas/r8a77960.dtsi
index 25d947a81b29..12476e354d74 100644
--- a/arch/arm64/boot/dts/renesas/r8a77960.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77960.dtsi
@@ -2761,6 +2761,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
@@ -2816,6 +2820,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77961.dtsi b/arch/arm64/boot/dts/renesas/r8a77961.dtsi
index ab081f14af9a..d9804768425a 100644
--- a/arch/arm64/boot/dts/renesas/r8a77961.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77961.dtsi
@@ -2499,6 +2499,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
@@ -2554,6 +2558,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77965.dtsi b/arch/arm64/boot/dts/renesas/r8a77965.dtsi
index 657b20d3533b..dcb9df861d74 100644
--- a/arch/arm64/boot/dts/renesas/r8a77965.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77965.dtsi
@@ -2575,6 +2575,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
@@ -2630,6 +2634,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77970.dtsi b/arch/arm64/boot/dts/renesas/r8a77970.dtsi
index 5a5d5649332a..e8f6352c3665 100644
--- a/arch/arm64/boot/dts/renesas/r8a77970.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77970.dtsi
@@ -1106,6 +1106,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77980.dtsi b/arch/arm64/boot/dts/renesas/r8a77980.dtsi
index 1ffa4a995a7a..7b51d464de0e 100644
--- a/arch/arm64/boot/dts/renesas/r8a77980.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77980.dtsi
@@ -1439,6 +1439,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
@@ -1478,6 +1482,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts
index 295d34f1d216..4715e4a4abe0 100644
--- a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts
+++ b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts
@@ -298,8 +298,6 @@
 
 	ports {
 		port@0 {
-			reg = <0>;
-
 			csi40_in: endpoint {
 				clock-lanes = <0>;
 				data-lanes = <1 2>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77990.dtsi b/arch/arm64/boot/dts/renesas/r8a77990.dtsi
index 5010f23fafcc..0eaea58f4210 100644
--- a/arch/arm64/boot/dts/renesas/r8a77990.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77990.dtsi
@@ -1970,6 +1970,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/salvator-common.dtsi b/arch/arm64/boot/dts/renesas/salvator-common.dtsi
index e18747df219f..453ffcef24fa 100644
--- a/arch/arm64/boot/dts/renesas/salvator-common.dtsi
+++ b/arch/arm64/boot/dts/renesas/salvator-common.dtsi
@@ -349,7 +349,6 @@
 
 	ports {
 		port@0 {
-			reg = <0>;
 			csi20_in: endpoint {
 				clock-lanes = <0>;
 				data-lanes = <1>;
@@ -364,8 +363,6 @@
 
 	ports {
 		port@0 {
-			reg = <0>;
-
 			csi40_in: endpoint {
 				clock-lanes = <0>;
 				data-lanes = <1 2 3 4>;
-- 
cgit v1.2.3


From d9cd78edb2e6b7e26747c0ec312be31e7ef196fe Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 22 Apr 2021 12:02:29 +0300
Subject: firmware: arm_scpi: Prevent the ternary sign expansion bug

How the type promotion works in ternary expressions is a bit tricky.
The problem is that scpi_clk_get_val() returns longs, "ret" is a int
which holds a negative error code, and le32_to_cpu() is an unsigned int.
We want the negative error code to be cast to a negative long.  But
because le32_to_cpu() is an u32 then "ret" is type promoted to u32 and
becomes a high positive and then it is promoted to long and it is still
a high positive value.

Fix this by getting rid of the ternary.

Link: https://lore.kernel.org/r/YIE7pdqV/h10tEAK@mwanda
Fixes: 8cb7cf56c9fe ("firmware: add support for ARM System Control and Power Interface(SCPI) protocol")
Reviewed-by: Cristian Marussi <cristian.marussi@arm.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
[sudeep.holla: changed to return 0 as clock rate on error]
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 drivers/firmware/arm_scpi.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/arm_scpi.c b/drivers/firmware/arm_scpi.c
index d0dee37ad522..4ceba5ef7895 100644
--- a/drivers/firmware/arm_scpi.c
+++ b/drivers/firmware/arm_scpi.c
@@ -552,8 +552,10 @@ static unsigned long scpi_clk_get_val(u16 clk_id)
 
 	ret = scpi_send_message(CMD_GET_CLOCK_VALUE, &le_clk_id,
 				sizeof(le_clk_id), &rate, sizeof(rate));
+	if (ret)
+		return 0;
 
-	return ret ? ret : le32_to_cpu(rate);
+	return le32_to_cpu(rate);
 }
 
 static int scpi_clk_set_val(u16 clk_id, unsigned long rate)
-- 
cgit v1.2.3


From 03f840c49207e8c125b3df8c29c13137c6675d42 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Tue, 27 Apr 2021 11:30:31 +0800
Subject: firmware: arm_scmi: Remove duplicate declaration of struct
 scmi_protocol_handle

struct scmi_protocol_handle is declared twice, let us remove the duplicate
declaration.

Link: https://lore.kernel.org/r/20210427033031.4580-1-wanjiabing@vivo.com
Reviewed-by: Cristian Marussi <cristian.marussi@arm.com>
Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
[sudeep.holla: minor updates to the title and the changelog]
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 drivers/firmware/arm_scmi/notify.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/firmware/arm_scmi/notify.h b/drivers/firmware/arm_scmi/notify.h
index ce0324be6c71..4e9b627edfef 100644
--- a/drivers/firmware/arm_scmi/notify.h
+++ b/drivers/firmware/arm_scmi/notify.h
@@ -79,8 +79,6 @@ struct scmi_protocol_events {
 
 int scmi_notification_init(struct scmi_handle *handle);
 void scmi_notification_exit(struct scmi_handle *handle);
-
-struct scmi_protocol_handle;
 int scmi_register_protocol_events(const struct scmi_handle *handle, u8 proto_id,
 				  const struct scmi_protocol_handle *ph,
 				  const struct scmi_protocol_events *ee);
-- 
cgit v1.2.3


From 10f76165d30bf568214e75767f2d8d8682cd4040 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Mon, 26 Apr 2021 16:53:25 -0700
Subject: drm/msm: Do not unpin/evict exported dma-buf's

Our initial logic for excluding dma-bufs was not quite right.  In
particular we want msm_gem_get/put_pages() path used for exported
dma-bufs to increment/decrement the pin-count.

Also, in case the importer is vmap'ing the dma-buf, we need to be
sure to update the object's status, because it is now no longer
potentially evictable.

Fixes: 63f17ef83428 drm/msm: Support evicting GEM objects to swap
Signed-off-by: Rob Clark <robdclark@chromium.org>
Link: https://lore.kernel.org/r/20210426235326.1230125-1-robdclark@gmail.com
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/msm_gem.c | 16 +++++++++++++++-
 drivers/gpu/drm/msm/msm_gem.h |  4 ++--
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index b199942266a2..56df86e5f740 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -190,13 +190,25 @@ struct page **msm_gem_get_pages(struct drm_gem_object *obj)
 	}
 
 	p = get_pages(obj);
+
+	if (!IS_ERR(p)) {
+		msm_obj->pin_count++;
+		update_inactive(msm_obj);
+	}
+
 	msm_gem_unlock(obj);
 	return p;
 }
 
 void msm_gem_put_pages(struct drm_gem_object *obj)
 {
-	/* when we start tracking the pin count, then do something here */
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+
+	msm_gem_lock(obj);
+	msm_obj->pin_count--;
+	GEM_WARN_ON(msm_obj->pin_count < 0);
+	update_inactive(msm_obj);
+	msm_gem_unlock(obj);
 }
 
 int msm_gem_mmap_obj(struct drm_gem_object *obj,
@@ -646,6 +658,8 @@ static void *get_vaddr(struct drm_gem_object *obj, unsigned madv)
 			ret = -ENOMEM;
 			goto fail;
 		}
+
+		update_inactive(msm_obj);
 	}
 
 	return msm_obj->vaddr;
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index a6480d2c81b2..03e2cc2a2ce1 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -221,7 +221,7 @@ static inline bool is_active(struct msm_gem_object *msm_obj)
 /* imported/exported objects are not purgeable: */
 static inline bool is_unpurgeable(struct msm_gem_object *msm_obj)
 {
-	return msm_obj->base.dma_buf && msm_obj->base.import_attach;
+	return msm_obj->base.import_attach || msm_obj->pin_count;
 }
 
 static inline bool is_purgeable(struct msm_gem_object *msm_obj)
@@ -271,7 +271,7 @@ static inline void mark_unpurgeable(struct msm_gem_object *msm_obj)
 
 static inline bool is_unevictable(struct msm_gem_object *msm_obj)
 {
-	return is_unpurgeable(msm_obj) || msm_obj->pin_count || msm_obj->vaddr;
+	return is_unpurgeable(msm_obj) || msm_obj->vaddr;
 }
 
 static inline void mark_evictable(struct msm_gem_object *msm_obj)
-- 
cgit v1.2.3


From 4b95d371fb001185af84d177e69a23d55bd0167a Mon Sep 17 00:00:00 2001
From: Jonathan Marek <jonathan@marek.ca>
Date: Fri, 23 Apr 2021 21:49:26 -0400
Subject: drm/msm: fix LLC not being enabled for mmu500 targets

mmu500 targets don't have a "cx_mem" region, set llc_mmio to NULL in that
case to avoid the IS_ERR() condition in a6xx_llc_activate().

Fixes: 3d247123b5a1 ("drm/msm/a6xx: Add support for using system cache on MMU500 based targets")
Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Link: https://lore.kernel.org/r/20210424014927.1661-1-jonathan@marek.ca
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index d553f62f4eeb..b4d8e1b01ee4 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1153,10 +1153,6 @@ static void a6xx_llc_slices_init(struct platform_device *pdev,
 {
 	struct device_node *phandle;
 
-	a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx");
-	if (IS_ERR(a6xx_gpu->llc_mmio))
-		return;
-
 	/*
 	 * There is a different programming path for targets with an mmu500
 	 * attached, so detect if that is the case
@@ -1166,6 +1162,11 @@ static void a6xx_llc_slices_init(struct platform_device *pdev,
 		of_device_is_compatible(phandle, "arm,mmu-500"));
 	of_node_put(phandle);
 
+	if (a6xx_gpu->have_mmu500)
+		a6xx_gpu->llc_mmio = NULL;
+	else
+		a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx");
+
 	a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU);
 	a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
 
-- 
cgit v1.2.3


From 08811c057b3e22f7a3df3955c138a59f3b651df0 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 10 Apr 2021 04:19:01 +0300
Subject: drm/msm/dsi: dsi_phy_28nm_8960: fix uninitialized variable access

The parent_name initialization was lost in refactoring, restore it now.

Fixes: 5d13459650b3 ("drm/msm/dsi: push provided clocks handling into a generic code")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <abhinavk@codeaurora.org>
Link: https://lore.kernel.org/r/20210410011901.1735866-1-dmitry.baryshkov@linaro.org
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
index 582b1428f971..86e40a0d41a3 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
@@ -405,6 +405,10 @@ static int pll_28nm_register(struct dsi_pll_28nm *pll_28nm, struct clk_hw **prov
 	if (!vco_name)
 		return -ENOMEM;
 
+	parent_name = devm_kzalloc(dev, 32, GFP_KERNEL);
+	if (!parent_name)
+		return -ENOMEM;
+
 	clk_name = devm_kzalloc(dev, 32, GFP_KERNEL);
 	if (!clk_name)
 		return -ENOMEM;
-- 
cgit v1.2.3


From 094c7f39ba4b5ae7e4c448527834428b79e3baf9 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Mon, 12 Apr 2021 03:01:58 +0300
Subject: drm/msm/dsi: fix msm_dsi_phy_get_clk_provider return code

msm_dsi_phy_get_clk_provider() always returns two provided clocks, so
return 0 instead of returning incorrect -EINVAL error code.

Fixes: 5d13459650b3 ("drm/msm/dsi: push provided clocks handling into a generic code")
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <abhinavk@codeaurora.org>
Tested-by: Jonathan Marek <jonathan@marek.ca>
Link: https://lore.kernel.org/r/20210412000158.2049066-1-dmitry.baryshkov@linaro.org
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/dsi/phy/dsi_phy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
index f0a2ddf96a4b..ff7f2ec42030 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
@@ -843,7 +843,7 @@ int msm_dsi_phy_get_clk_provider(struct msm_dsi_phy *phy,
 	if (pixel_clk_provider)
 		*pixel_clk_provider = phy->provided_clocks->hws[DSI_PIXEL_PLL_CLK]->clk;
 
-	return -EINVAL;
+	return 0;
 }
 
 void msm_dsi_phy_pll_save_state(struct msm_dsi_phy *phy)
-- 
cgit v1.2.3


From 774cda6f12d5ad11410c4cda223554c3735ee862 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 27 Apr 2021 22:59:55 +0200
Subject: dt-bindings: nvmem: mediatek: remove duplicate mt8192 line

The same patch was accidentally merged twice, resulting in a
duplicate line for the mt8192 SoC.

Fixes: f2674c0c7488 ("dt-bindings: nvmem: mediatek: add support for MediaTek mt8192 SoC")
Fixes: 2a1405a14c3a ("dt-bindings: nvmem: mediatek: add support for MediaTek mt8192 SoC")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 Documentation/devicetree/bindings/nvmem/mtk-efuse.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/nvmem/mtk-efuse.txt b/Documentation/devicetree/bindings/nvmem/mtk-efuse.txt
index d479ad977e24..b6791702bcfc 100644
--- a/Documentation/devicetree/bindings/nvmem/mtk-efuse.txt
+++ b/Documentation/devicetree/bindings/nvmem/mtk-efuse.txt
@@ -9,7 +9,6 @@ Required properties:
 	      "mediatek,mt8173-efuse" or "mediatek,efuse": for MT8173
 	      "mediatek,mt8192-efuse", "mediatek,efuse": for MT8192
 	      "mediatek,mt8516-efuse", "mediatek,efuse": for MT8516
-	      "mediatek,mt8192-efuse", "mediatek,efuse": for MT8192
 - reg: Should contain registers location and length
 
 = Data cells =
-- 
cgit v1.2.3


From c019d92457826bb7b2091c86f36adb5de08405f9 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Fri, 23 Apr 2021 17:09:28 +0200
Subject: openrisc: Fix a memory leak

'setup_find_cpu_node()' take a reference on the node it returns.
This reference must be decremented when not needed anymore, or there will
be a leak.

Add the missing 'of_node_put(cpu)'.

Note that 'setup_cpuinfo()' that also calls this function already has a
correct 'of_node_put(cpu)' at its end.

Fixes: 9d02a4283e9c ("OpenRISC: Boot code")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Stafford Horne <shorne@gmail.com>
---
 arch/openrisc/kernel/setup.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c
index 2416a9f91533..c6f9e7b9f7cb 100644
--- a/arch/openrisc/kernel/setup.c
+++ b/arch/openrisc/kernel/setup.c
@@ -278,6 +278,8 @@ void calibrate_delay(void)
 	pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n",
 		loops_per_jiffy / (500000 / HZ),
 		(loops_per_jiffy / (5000 / HZ)) % 100, loops_per_jiffy);
+
+	of_node_put(cpu);
 }
 
 void __init setup_arch(char **cmdline_p)
-- 
cgit v1.2.3


From c7299fea67696db5bd09d924d1f1080d894f92ef Mon Sep 17 00:00:00 2001
From: Saravana Kannan <saravanak@google.com>
Date: Mon, 26 Apr 2021 16:56:38 -0700
Subject: spi: Fix spi device unregister flow

When an SPI device is unregistered, the spi->controller->cleanup() is
called in the device's release callback. That's wrong for a couple of
reasons:

1. spi_dev_put() can be called before spi_add_device() is called. And
   it's spi_add_device() that calls spi_setup(). This will cause clean()
   to get called without the spi device ever being setup.

2. There's no guarantee that the controller's driver would be present by
   the time the spi device's release function gets called.

3. It also causes "sleeping in atomic context" stack dump[1] when device
   link deletion code does a put_device() on the spi device.

Fix these issues by simply moving the cleanup from the device release
callback to the actual spi_unregister_device() function.

[1] - https://lore.kernel.org/lkml/CAHp75Vc=FCGcUyS0v6fnxme2YJ+qD+Y-hQDQLa2JhWNON9VmsQ@mail.gmail.com/

Signed-off-by: Saravana Kannan <saravanak@google.com>
Link: https://lore.kernel.org/r/20210426235638.1285530-1-saravanak@google.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index ba425b9c7700..f9885c096563 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -47,10 +47,6 @@ static void spidev_release(struct device *dev)
 {
 	struct spi_device	*spi = to_spi_device(dev);
 
-	/* spi controllers may cleanup for released devices */
-	if (spi->controller->cleanup)
-		spi->controller->cleanup(spi);
-
 	spi_controller_put(spi->controller);
 	kfree(spi->driver_override);
 	kfree(spi);
@@ -558,6 +554,12 @@ static int spi_dev_check(struct device *dev, void *data)
 	return 0;
 }
 
+static void spi_cleanup(struct spi_device *spi)
+{
+	if (spi->controller->cleanup)
+		spi->controller->cleanup(spi);
+}
+
 /**
  * spi_add_device - Add spi_device allocated with spi_alloc_device
  * @spi: spi_device to register
@@ -622,11 +624,13 @@ int spi_add_device(struct spi_device *spi)
 
 	/* Device may be bound to an active driver when this returns */
 	status = device_add(&spi->dev);
-	if (status < 0)
+	if (status < 0) {
 		dev_err(dev, "can't add %s, status %d\n",
 				dev_name(&spi->dev), status);
-	else
+		spi_cleanup(spi);
+	} else {
 		dev_dbg(dev, "registered child %s\n", dev_name(&spi->dev));
+	}
 
 done:
 	mutex_unlock(&spi_add_lock);
@@ -710,6 +714,8 @@ void spi_unregister_device(struct spi_device *spi)
 	if (!spi)
 		return;
 
+	spi_cleanup(spi);
+
 	if (spi->dev.of_node) {
 		of_node_clear_flag(spi->dev.of_node, OF_POPULATED);
 		of_node_put(spi->dev.of_node);
-- 
cgit v1.2.3


From 41f48a29ebd5ce944e412f491f1876b5abeff1d6 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 27 Apr 2021 16:38:42 +0200
Subject: spi: altera: Make SPI_ALTERA_CORE invisible

The SPI_ALTERA_CORE config symbol controls compilation of the Altera SPI
Controller core code.  It is already selected by all of its users, so
there is no reason to make it visible, unless compile-testing.

Fixes: b0c3d9354de1f87e ("spi: altera: separate core code from platform code")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/f0cb8e66baba4506db6f42fca74dc51b76883507.1619534253.git.geert+renesas@glider.be
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 8b161ec4943b..f4481fe48bf0 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -65,7 +65,7 @@ config SPI_ALTERA
 	  This is the driver for the Altera SPI Controller.
 
 config SPI_ALTERA_CORE
-	tristate "Altera SPI Controller core code"
+	tristate "Altera SPI Controller core code" if COMPILE_TEST
 	select REGMAP
 	help
 	  "The core code for the Altera SPI Controller"
-- 
cgit v1.2.3


From adbd914dcde0b03bfc08ffe40b81f31b0457833f Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 21 Apr 2021 14:31:50 +0100
Subject: btrfs: zoned: fix silent data loss after failure splitting ordered
 extent

On a zoned filesystem, sometimes we need to split an ordered extent into 3
different ordered extents. The original ordered extent is shortened, at
the front and at the rear, and we create two other new ordered extents to
represent the trimmed parts of the original ordered extent.

After adjusting the original ordered extent, we create an ordered extent
to represent the pre-range, and that may fail with ENOMEM for example.
After that we always try to create the ordered extent for the post-range,
and if that happens to succeed we end up returning success to the caller
as we overwrite the 'ret' variable which contained the previous error.

This means we end up with a file range for which there is no ordered
extent, which results in the range never getting a new file extent item
pointing to the new data location. And since the split operation did
not return an error, writeback does not fail and the inode's mapping is
not flagged with an error, resulting in a subsequent fsync not reporting
an error either.

It's possibly very unlikely to have the creation of the post-range ordered
extent succeed after the creation of the pre-range ordered extent failed,
but it's not impossible.

So fix this by making sure we only create the post-range ordered extent
if there was no error creating the ordered extent for the pre-range.

Fixes: d22002fd37bd97 ("btrfs: zoned: split ordered extent when bio is sent")
CC: stable@vger.kernel.org # 5.12+
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ordered-data.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 07b0b4218791..6c413bb451a3 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -984,7 +984,7 @@ int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pre,
 
 	if (pre)
 		ret = clone_ordered_extent(ordered, 0, pre);
-	if (post)
+	if (ret == 0 && post)
 		ret = clone_ordered_extent(ordered, pre + ordered->disk_num_bytes,
 					   post);
 
-- 
cgit v1.2.3


From ffb7c2e923cb3232454a513dcb5636e73091aa88 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 22 Apr 2021 12:09:21 +0100
Subject: btrfs: do not consider send context as valid when trying to flush
 qgroups

At qgroup.c:try_flush_qgroup() we are asserting that current->journal_info
is either NULL or has the value BTRFS_SEND_TRANS_STUB.

However allowing for BTRFS_SEND_TRANS_STUB makes no sense because:

1) It is misleading, because send operations are read-only and do not
   ever need to reserve qgroup space;

2) We already assert that current->journal_info != BTRFS_SEND_TRANS_STUB
   at transaction.c:start_transaction();

3) On a kernel without CONFIG_BTRFS_ASSERT=y set, it would result in
   a crash if try_flush_qgroup() is ever called in a send context, because
   at transaction.c:start_transaction we cast current->journal_info into
   a struct btrfs_trans_handle pointer and then dereference it.

So just do allow a send context at try_flush_qgroup() and update the
comment about it.

Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 2319c923c9e6..b1caf5acf1e2 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -3545,11 +3545,15 @@ static int try_flush_qgroup(struct btrfs_root *root)
 	struct btrfs_trans_handle *trans;
 	int ret;
 
-	/* Can't hold an open transaction or we run the risk of deadlocking */
-	ASSERT(current->journal_info == NULL ||
-	       current->journal_info == BTRFS_SEND_TRANS_STUB);
-	if (WARN_ON(current->journal_info &&
-		    current->journal_info != BTRFS_SEND_TRANS_STUB))
+	/*
+	 * Can't hold an open transaction or we run the risk of deadlocking,
+	 * and can't either be under the context of a send operation (where
+	 * current->journal_info is set to BTRFS_SEND_TRANS_STUB), as that
+	 * would result in a crash when starting a transaction and does not
+	 * make sense either (send is a read-only operation).
+	 */
+	ASSERT(current->journal_info == NULL);
+	if (WARN_ON(current->journal_info))
 		return 0;
 
 	/*
-- 
cgit v1.2.3


From 626e9f41f7c281ba3e02843702f68471706aa6d9 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Tue, 27 Apr 2021 11:27:20 +0100
Subject: btrfs: fix race leading to unpersisted data and metadata on fsync

When doing a fast fsync on a file, there is a race which can result in the
fsync returning success to user space without logging the inode and without
durably persisting new data.

The following example shows one possible scenario for this:

   $ mkfs.btrfs -f /dev/sdc
   $ mount /dev/sdc /mnt

   $ touch /mnt/bar
   $ xfs_io -f -c "pwrite -S 0xab 0 1M" -c "fsync" /mnt/baz

   # Now we have:
   # file bar == inode 257
   # file baz == inode 258

   $ mv /mnt/baz /mnt/foo

   # Now we have:
   # file bar == inode 257
   # file foo == inode 258

   $ xfs_io -c "pwrite -S 0xcd 0 1M" /mnt/foo

   # fsync bar before foo, it is important to trigger the race.
   $ xfs_io -c "fsync" /mnt/bar
   $ xfs_io -c "fsync" /mnt/foo

   # After this:
   # inode 257, file bar, is empty
   # inode 258, file foo, has 1M filled with 0xcd

   <power failure>

   # Replay the log:
   $ mount /dev/sdc /mnt

   # After this point file foo should have 1M filled with 0xcd and not 0xab

The following steps explain how the race happens:

1) Before the first fsync of inode 258, when it has the "baz" name, its
   ->logged_trans is 0, ->last_sub_trans is 0 and ->last_log_commit is -1.
   The inode also has the full sync flag set;

2) After the first fsync, we set inode 258 ->logged_trans to 6, which is
   the generation of the current transaction, and set ->last_log_commit
   to 0, which is the current value of ->last_sub_trans (done at
   btrfs_log_inode()).

   The full sync flag is cleared from the inode during the fsync.

   The log sub transaction that was committed had an ID of 0 and when we
   synced the log, at btrfs_sync_log(), we incremented root->log_transid
   from 0 to 1;

3) During the rename:

   We update inode 258, through btrfs_update_inode(), and that causes its
   ->last_sub_trans to be set to 1 (the current log transaction ID), and
   ->last_log_commit remains with a value of 0.

   After updating inode 258, because we have previously logged the inode
   in the previous fsync, we log again the inode through the call to
   btrfs_log_new_name(). This results in updating the inode's
   ->last_log_commit from 0 to 1 (the current value of its
   ->last_sub_trans).

   The ->last_sub_trans of inode 257 is updated to 1, which is the ID of
   the next log transaction;

4) Then a buffered write against inode 258 is made. This leaves the value
   of ->last_sub_trans as 1 (the ID of the current log transaction, stored
   at root->log_transid);

5) Then an fsync against inode 257 (or any other inode other than 258),
   happens. This results in committing the log transaction with ID 1,
   which results in updating root->last_log_commit to 1 and bumping
   root->log_transid from 1 to 2;

6) Then an fsync against inode 258 starts. We flush delalloc and wait only
   for writeback to complete, since the full sync flag is not set in the
   inode's runtime flags - we do not wait for ordered extents to complete.

   Then, at btrfs_sync_file(), we call btrfs_inode_in_log() before the
   ordered extent completes. The call returns true:

     static inline bool btrfs_inode_in_log(...)
     {
         bool ret = false;

         spin_lock(&inode->lock);
         if (inode->logged_trans == generation &&
             inode->last_sub_trans <= inode->last_log_commit &&
             inode->last_sub_trans <= inode->root->last_log_commit)
                 ret = true;
         spin_unlock(&inode->lock);
         return ret;
     }

   generation has a value of 6 (fs_info->generation), ->logged_trans also
   has a value of 6 (set when we logged the inode during the first fsync
   and when logging it during the rename), ->last_sub_trans has a value
   of 1, set during the rename (step 3), ->last_log_commit also has a
   value of 1 (set in step 3) and root->last_log_commit has a value of 1,
   which was set in step 5 when fsyncing inode 257.

   As a consequence we don't log the inode, any new extents and do not
   sync the log, resulting in a data loss if a power failure happens
   after the fsync and before the current transaction commits.
   Also, because we do not log the inode, after a power failure the mtime
   and ctime of the inode do not match those we had before.

   When the ordered extent completes before we call btrfs_inode_in_log(),
   then the call returns false and we log the inode and sync the log,
   since at the end of ordered extent completion we update the inode and
   set ->last_sub_trans to 2 (the value of root->log_transid) and
   ->last_log_commit to 1.

This problem is found after removing the check for the emptiness of the
inode's list of modified extents in the recent commit 209ecbb8585bf6
("btrfs: remove stale comment and logic from btrfs_inode_in_log()"),
added in the 5.13 merge window. However checking the emptiness of the
list is not really the way to solve this problem, and was never intended
to, because while that solves the problem for COW writes, the problem
persists for NOCOW writes because in that case the list is always empty.

In the case of NOCOW writes, even though we wait for the writeback to
complete before returning from btrfs_sync_file(), we end up not logging
the inode, which has a new mtime/ctime, and because we don't sync the log,
we never issue disk barriers (send REQ_PREFLUSH to the device) since that
only happens when we sync the log (when we write super blocks at
btrfs_sync_log()). So effectively, for a NOCOW case, when we return from
btrfs_sync_file() to user space, we are not guaranteeing that the data is
durably persisted on disk.

Also, while the example above uses a rename exchange to show how the
problem happens, it is not the only way to trigger it. An alternative
could be adding a new hard link to inode 258, since that also results
in calling btrfs_log_new_name() and updating the inode in the log.
An example reproducer using the addition of a hard link instead of a
rename operation:

  $ mkfs.btrfs -f /dev/sdc
  $ mount /dev/sdc /mnt

  $ touch /mnt/bar
  $ xfs_io -f -c "pwrite -S 0xab 0 1M" -c "fsync" /mnt/foo

  $ ln /mnt/foo /mnt/foo_link
  $ xfs_io -c "pwrite -S 0xcd 0 1M" /mnt/foo

  $ xfs_io -c "fsync" /mnt/bar
  $ xfs_io -c "fsync" /mnt/foo

  <power failure>

  # Replay the log:
  $ mount /dev/sdc /mnt

  # After this point file foo often has 1M filled with 0xab and not 0xcd

The reasons leading to the final fsync of file foo, inode 258, not
persisting the new data are the same as for the previous example with
a rename operation.

So fix by never skipping logging and log syncing when there are still any
ordered extents in flight. To avoid making the conditional if statement
that checks if logging an inode is needed harder to read, place all the
logic into an helper function with separate if statements to make it more
manageable and easier to read.

A test case for fstests will follow soon.

For NOCOW writes, the problem existed before commit b5e6c3e170b770
("btrfs: always wait on ordered extents at fsync time"), introduced in
kernel 4.19, then it went away with that commit since we started to always
wait for ordered extent completion before logging.

The problem came back again once the fast fsync path was changed again to
avoid waiting for ordered extent completion, in commit 487781796d3022
("btrfs: make fast fsyncs wait only for writeback"), added in kernel 5.10.

However, for COW writes, the race only happens after the recent
commit 209ecbb8585bf6 ("btrfs: remove stale comment and logic from
btrfs_inode_in_log()"), introduced in the 5.13 merge window. For NOCOW
writes, the bug existed before that commit. So tag 5.10+ as the release
for stable backports.

CC: stable@vger.kernel.org # 5.10+
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file.c     | 35 +++++++++++++++++++++++++----------
 fs/btrfs/tree-log.c |  3 ++-
 2 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 864c08d08a35..3b10d98b4ebb 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2067,6 +2067,30 @@ static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
 	return ret;
 }
 
+static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx)
+{
+	struct btrfs_inode *inode = BTRFS_I(ctx->inode);
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
+
+	if (btrfs_inode_in_log(inode, fs_info->generation) &&
+	    list_empty(&ctx->ordered_extents))
+		return true;
+
+	/*
+	 * If we are doing a fast fsync we can not bail out if the inode's
+	 * last_trans is <= then the last committed transaction, because we only
+	 * update the last_trans of the inode during ordered extent completion,
+	 * and for a fast fsync we don't wait for that, we only wait for the
+	 * writeback to complete.
+	 */
+	if (inode->last_trans <= fs_info->last_trans_committed &&
+	    (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) ||
+	     list_empty(&ctx->ordered_extents)))
+		return true;
+
+	return false;
+}
+
 /*
  * fsync call for both files and directories.  This logs the inode into
  * the tree log instead of forcing full commits whenever possible.
@@ -2185,17 +2209,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 
 	atomic_inc(&root->log_batch);
 
-	/*
-	 * If we are doing a fast fsync we can not bail out if the inode's
-	 * last_trans is <= then the last committed transaction, because we only
-	 * update the last_trans of the inode during ordered extent completion,
-	 * and for a fast fsync we don't wait for that, we only wait for the
-	 * writeback to complete.
-	 */
 	smp_mb();
-	if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) ||
-	    (BTRFS_I(inode)->last_trans <= fs_info->last_trans_committed &&
-	     (full_sync || list_empty(&ctx.ordered_extents)))) {
+	if (skip_inode_logging(&ctx)) {
 		/*
 		 * We've had everything committed since the last time we were
 		 * modified so clear this flag in case it was set for whatever
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c1353b84ae54..a0fc3a1390ab 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -6060,7 +6060,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 	 * (since logging them is pointless, a link count of 0 means they
 	 * will never be accessible).
 	 */
-	if (btrfs_inode_in_log(inode, trans->transid) ||
+	if ((btrfs_inode_in_log(inode, trans->transid) &&
+	     list_empty(&ctx->ordered_extents)) ||
 	    inode->vfs_inode.i_nlink == 0) {
 		ret = BTRFS_NO_LOG_SYNC;
 		goto end_no_trans;
-- 
cgit v1.2.3


From f9baa501b4fd6962257853d46ddffbc21f27e344 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 22 Apr 2021 12:08:05 +0100
Subject: btrfs: fix deadlock when cloning inline extents and using qgroups

There are a few exceptional cases where cloning an inline extent needs to
copy the inline extent data into a page of the destination inode.

When this happens, we end up starting a transaction while having a dirty
page for the destination inode and while having the range locked in the
destination's inode iotree too. Because when reserving metadata space
for a transaction we may need to flush existing delalloc in case there is
not enough free space, we have a mechanism in place to prevent a deadlock,
which was introduced in commit 3d45f221ce627d ("btrfs: fix deadlock when
cloning inline extent and low on free metadata space").

However when using qgroups, a transaction also reserves metadata qgroup
space, which can also result in flushing delalloc in case there is not
enough available space at the moment. When this happens we deadlock, since
flushing delalloc requires locking the file range in the inode's iotree
and the range was already locked at the very beginning of the clone
operation, before attempting to start the transaction.

When this issue happens, stack traces like the following are reported:

  [72747.556262] task:kworker/u81:9   state:D stack:    0 pid:  225 ppid:     2 flags:0x00004000
  [72747.556268] Workqueue: writeback wb_workfn (flush-btrfs-1142)
  [72747.556271] Call Trace:
  [72747.556273]  __schedule+0x296/0x760
  [72747.556277]  schedule+0x3c/0xa0
  [72747.556279]  io_schedule+0x12/0x40
  [72747.556284]  __lock_page+0x13c/0x280
  [72747.556287]  ? generic_file_readonly_mmap+0x70/0x70
  [72747.556325]  extent_write_cache_pages+0x22a/0x440 [btrfs]
  [72747.556331]  ? __set_page_dirty_nobuffers+0xe7/0x160
  [72747.556358]  ? set_extent_buffer_dirty+0x5e/0x80 [btrfs]
  [72747.556362]  ? update_group_capacity+0x25/0x210
  [72747.556366]  ? cpumask_next_and+0x1a/0x20
  [72747.556391]  extent_writepages+0x44/0xa0 [btrfs]
  [72747.556394]  do_writepages+0x41/0xd0
  [72747.556398]  __writeback_single_inode+0x39/0x2a0
  [72747.556403]  writeback_sb_inodes+0x1ea/0x440
  [72747.556407]  __writeback_inodes_wb+0x5f/0xc0
  [72747.556410]  wb_writeback+0x235/0x2b0
  [72747.556414]  ? get_nr_inodes+0x35/0x50
  [72747.556417]  wb_workfn+0x354/0x490
  [72747.556420]  ? newidle_balance+0x2c5/0x3e0
  [72747.556424]  process_one_work+0x1aa/0x340
  [72747.556426]  worker_thread+0x30/0x390
  [72747.556429]  ? create_worker+0x1a0/0x1a0
  [72747.556432]  kthread+0x116/0x130
  [72747.556435]  ? kthread_park+0x80/0x80
  [72747.556438]  ret_from_fork+0x1f/0x30

  [72747.566958] Workqueue: btrfs-flush_delalloc btrfs_work_helper [btrfs]
  [72747.566961] Call Trace:
  [72747.566964]  __schedule+0x296/0x760
  [72747.566968]  ? finish_wait+0x80/0x80
  [72747.566970]  schedule+0x3c/0xa0
  [72747.566995]  wait_extent_bit.constprop.68+0x13b/0x1c0 [btrfs]
  [72747.566999]  ? finish_wait+0x80/0x80
  [72747.567024]  lock_extent_bits+0x37/0x90 [btrfs]
  [72747.567047]  btrfs_invalidatepage+0x299/0x2c0 [btrfs]
  [72747.567051]  ? find_get_pages_range_tag+0x2cd/0x380
  [72747.567076]  __extent_writepage+0x203/0x320 [btrfs]
  [72747.567102]  extent_write_cache_pages+0x2bb/0x440 [btrfs]
  [72747.567106]  ? update_load_avg+0x7e/0x5f0
  [72747.567109]  ? enqueue_entity+0xf4/0x6f0
  [72747.567134]  extent_writepages+0x44/0xa0 [btrfs]
  [72747.567137]  ? enqueue_task_fair+0x93/0x6f0
  [72747.567140]  do_writepages+0x41/0xd0
  [72747.567144]  __filemap_fdatawrite_range+0xc7/0x100
  [72747.567167]  btrfs_run_delalloc_work+0x17/0x40 [btrfs]
  [72747.567195]  btrfs_work_helper+0xc2/0x300 [btrfs]
  [72747.567200]  process_one_work+0x1aa/0x340
  [72747.567202]  worker_thread+0x30/0x390
  [72747.567205]  ? create_worker+0x1a0/0x1a0
  [72747.567208]  kthread+0x116/0x130
  [72747.567211]  ? kthread_park+0x80/0x80
  [72747.567214]  ret_from_fork+0x1f/0x30

  [72747.569686] task:fsstress        state:D stack:    0 pid:841421 ppid:841417 flags:0x00000000
  [72747.569689] Call Trace:
  [72747.569691]  __schedule+0x296/0x760
  [72747.569694]  schedule+0x3c/0xa0
  [72747.569721]  try_flush_qgroup+0x95/0x140 [btrfs]
  [72747.569725]  ? finish_wait+0x80/0x80
  [72747.569753]  btrfs_qgroup_reserve_data+0x34/0x50 [btrfs]
  [72747.569781]  btrfs_check_data_free_space+0x5f/0xa0 [btrfs]
  [72747.569804]  btrfs_buffered_write+0x1f7/0x7f0 [btrfs]
  [72747.569810]  ? path_lookupat.isra.48+0x97/0x140
  [72747.569833]  btrfs_file_write_iter+0x81/0x410 [btrfs]
  [72747.569836]  ? __kmalloc+0x16a/0x2c0
  [72747.569839]  do_iter_readv_writev+0x160/0x1c0
  [72747.569843]  do_iter_write+0x80/0x1b0
  [72747.569847]  vfs_writev+0x84/0x140
  [72747.569869]  ? btrfs_file_llseek+0x38/0x270 [btrfs]
  [72747.569873]  do_writev+0x65/0x100
  [72747.569876]  do_syscall_64+0x33/0x40
  [72747.569879]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

  [72747.569899] task:fsstress        state:D stack:    0 pid:841424 ppid:841417 flags:0x00004000
  [72747.569903] Call Trace:
  [72747.569906]  __schedule+0x296/0x760
  [72747.569909]  schedule+0x3c/0xa0
  [72747.569936]  try_flush_qgroup+0x95/0x140 [btrfs]
  [72747.569940]  ? finish_wait+0x80/0x80
  [72747.569967]  __btrfs_qgroup_reserve_meta+0x36/0x50 [btrfs]
  [72747.569989]  start_transaction+0x279/0x580 [btrfs]
  [72747.570014]  clone_copy_inline_extent+0x332/0x490 [btrfs]
  [72747.570041]  btrfs_clone+0x5b7/0x7a0 [btrfs]
  [72747.570068]  ? lock_extent_bits+0x64/0x90 [btrfs]
  [72747.570095]  btrfs_clone_files+0xfc/0x150 [btrfs]
  [72747.570122]  btrfs_remap_file_range+0x3d8/0x4a0 [btrfs]
  [72747.570126]  do_clone_file_range+0xed/0x200
  [72747.570131]  vfs_clone_file_range+0x37/0x110
  [72747.570134]  ioctl_file_clone+0x7d/0xb0
  [72747.570137]  do_vfs_ioctl+0x138/0x630
  [72747.570140]  __x64_sys_ioctl+0x62/0xc0
  [72747.570143]  do_syscall_64+0x33/0x40
  [72747.570146]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

So fix this by skipping the flush of delalloc for an inode that is
flagged with BTRFS_INODE_NO_DELALLOC_FLUSH, meaning it is currently under
such a special case of cloning an inline extent, when flushing delalloc
during qgroup metadata reservation.

The special cases for cloning inline extents were added in kernel 5.7 by
by commit 05a5a7621ce66c ("Btrfs: implement full reflink support for
inline extents"), while having qgroup metadata space reservation flushing
delalloc when low on space was added in kernel 5.9 by commit
c53e9653605dbf ("btrfs: qgroup: try to flush qgroup space when we get
-EDQUOT"). So use a "Fixes:" tag for the later commit to ease stable
kernel backports.

Reported-by: Wang Yugui <wangyugui@e16-tech.com>
Link: https://lore.kernel.org/linux-btrfs/20210421083137.31E3.409509F4@e16-tech.com/
Fixes: c53e9653605dbf ("btrfs: qgroup: try to flush qgroup space when we get -EDQUOT")
CC: stable@vger.kernel.org # 5.9+
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h  | 2 +-
 fs/btrfs/inode.c  | 4 ++--
 fs/btrfs/ioctl.c  | 2 +-
 fs/btrfs/qgroup.c | 2 +-
 fs/btrfs/send.c   | 4 ++--
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 278e0cbc9a98..0f5b0b12762b 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3127,7 +3127,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 			       struct btrfs_inode *inode, u64 new_size,
 			       u32 min_type);
 
-int btrfs_start_delalloc_snapshot(struct btrfs_root *root);
+int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context);
 int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
 			       bool in_reclaim_context);
 int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1a349759efae..69fcdf8f0b1c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9691,7 +9691,7 @@ out:
 	return ret;
 }
 
-int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
+int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context)
 {
 	struct writeback_control wbc = {
 		.nr_to_write = LONG_MAX,
@@ -9704,7 +9704,7 @@ int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
 	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
 		return -EROFS;
 
-	return start_delalloc_inodes(root, &wbc, true, false);
+	return start_delalloc_inodes(root, &wbc, true, in_reclaim_context);
 }
 
 int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b1328f17607e..0ba0e4ddaf6b 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1051,7 +1051,7 @@ static noinline int btrfs_mksnapshot(const struct path *parent,
 	 */
 	btrfs_drew_read_lock(&root->snapshot_lock);
 
-	ret = btrfs_start_delalloc_snapshot(root);
+	ret = btrfs_start_delalloc_snapshot(root, false);
 	if (ret)
 		goto out;
 
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index b1caf5acf1e2..3ded812f522c 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -3566,7 +3566,7 @@ static int try_flush_qgroup(struct btrfs_root *root)
 		return 0;
 	}
 
-	ret = btrfs_start_delalloc_snapshot(root);
+	ret = btrfs_start_delalloc_snapshot(root, true);
 	if (ret < 0)
 		goto out;
 	btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 55741adf9071..bd69db72acc5 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -7170,7 +7170,7 @@ static int flush_delalloc_roots(struct send_ctx *sctx)
 	int i;
 
 	if (root) {
-		ret = btrfs_start_delalloc_snapshot(root);
+		ret = btrfs_start_delalloc_snapshot(root, false);
 		if (ret)
 			return ret;
 		btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
@@ -7178,7 +7178,7 @@ static int flush_delalloc_roots(struct send_ctx *sctx)
 
 	for (i = 0; i < sctx->clone_roots_cnt; i++) {
 		root = sctx->clone_roots[i].root;
-		ret = btrfs_start_delalloc_snapshot(root);
+		ret = btrfs_start_delalloc_snapshot(root, false);
 		if (ret)
 			return ret;
 		btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
-- 
cgit v1.2.3


From 02ded1314a465a89267be38231d9858206853d80 Mon Sep 17 00:00:00 2001
From: Jonathan Marek <jonathan@marek.ca>
Date: Fri, 23 Apr 2021 15:04:20 -0400
Subject: drm/msm: fix minor version to indicate MSM_PARAM_SUSPENDS support

Increase the minor version to indicate that MSM_PARAM_SUSPENDS is supported.

Fixes: 3ab1c5cc3939 ("drm/msm: Add param for userspace to query suspend count")
Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Link: https://lore.kernel.org/r/20210423190420.25217-1-jonathan@marek.ca
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/msm_drv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index e1104d2454e2..fe7d17cd35ec 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -42,7 +42,7 @@
  * - 1.7.0 - Add MSM_PARAM_SUSPENDS to access suspend count
  */
 #define MSM_VERSION_MAJOR	1
-#define MSM_VERSION_MINOR	6
+#define MSM_VERSION_MINOR	7
 #define MSM_VERSION_PATCHLEVEL	0
 
 static const struct drm_mode_config_funcs mode_config_funcs = {
-- 
cgit v1.2.3


From d89f6048bdcb6a56abb396c584747d5eeae650db Mon Sep 17 00:00:00 2001
From: Harry Wentland <harry.wentland@amd.com>
Date: Thu, 22 Apr 2021 19:10:52 -0400
Subject: drm/amd/display: Reject non-zero src_y and src_x for video planes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[Why]
This hasn't been well tested and leads to complete system hangs on DCN1
based systems, possibly others.

The system hang can be reproduced by gesturing the video on the YouTube
Android app on ChromeOS into full screen.

[How]
Reject atomic commits with non-zero drm_plane_state.src_x or src_y values.

v2:
 - Add code comment describing the reason we're rejecting non-zero
   src_x and src_y
 - Drop gerrit Change-Id
 - Add stable CC
 - Based on amd-staging-drm-next

v3: removed trailing whitespace

Signed-off-by: Harry Wentland <harry.wentland@amd.com>
Cc: stable@vger.kernel.org
Cc: nicholas.kazlauskas@amd.com
Cc: amd-gfx@lists.freedesktop.org
Cc: alexander.deucher@amd.com
Cc: Roman.Li@amd.com
Cc: hersenxs.wu@amd.com
Cc: danny.wang@amd.com
Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Hersen Wu <hersenxs.wu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index a0c8c41e4e57..deec7e0466da 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -4008,6 +4008,23 @@ static int fill_dc_scaling_info(const struct drm_plane_state *state,
 	scaling_info->src_rect.x = state->src_x >> 16;
 	scaling_info->src_rect.y = state->src_y >> 16;
 
+	/*
+	 * For reasons we don't (yet) fully understand a non-zero
+	 * src_y coordinate into an NV12 buffer can cause a
+	 * system hang. To avoid hangs (and maybe be overly cautious)
+	 * let's reject both non-zero src_x and src_y.
+	 *
+	 * We currently know of only one use-case to reproduce a
+	 * scenario with non-zero src_x and src_y for NV12, which
+	 * is to gesture the YouTube Android app into full screen
+	 * on ChromeOS.
+	 */
+	if (state->fb &&
+	    state->fb->format->format == DRM_FORMAT_NV12 &&
+	    (scaling_info->src_rect.x != 0 ||
+	     scaling_info->src_rect.y != 0))
+		return -EINVAL;
+
 	scaling_info->src_rect.width = state->src_w >> 16;
 	if (scaling_info->src_rect.width == 0)
 		return -EINVAL;
-- 
cgit v1.2.3


From 20a5f5a98e1bb3d40acd97e89299e8c2d22784be Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 22 Apr 2021 13:11:39 +0200
Subject: drm/amdgpu: fix concurrent VM flushes on Vega/Navi v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Starting with Vega the hardware supports concurrent flushes
of VMID which can be used to implement per process VMID
allocation.

But concurrent flushes are mutual exclusive with back to
back VMID allocations, fix this to avoid a VMID used in
two ways at the same time.

v2: don't set ring to NULL

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: James Zhu <James.Zhu@amd.com>
Tested-by: James Zhu <James.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 19 +++++++++++--------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  6 ++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  1 +
 3 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 94b069630db3..b4971e90b98c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -215,7 +215,11 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
 	/* Check if we have an idle VMID */
 	i = 0;
 	list_for_each_entry((*idle), &id_mgr->ids_lru, list) {
-		fences[i] = amdgpu_sync_peek_fence(&(*idle)->active, ring);
+		/* Don't use per engine and per process VMID at the same time */
+		struct amdgpu_ring *r = adev->vm_manager.concurrent_flush ?
+			NULL : ring;
+
+		fences[i] = amdgpu_sync_peek_fence(&(*idle)->active, r);
 		if (!fences[i])
 			break;
 		++i;
@@ -281,7 +285,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
 	if (updates && (*id)->flushed_updates &&
 	    updates->context == (*id)->flushed_updates->context &&
 	    !dma_fence_is_later(updates, (*id)->flushed_updates))
-	    updates = NULL;
+		updates = NULL;
 
 	if ((*id)->owner != vm->immediate.fence_context ||
 	    job->vm_pd_addr != (*id)->pd_gpu_addr ||
@@ -290,6 +294,10 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
 	     !dma_fence_is_signaled((*id)->last_flush))) {
 		struct dma_fence *tmp;
 
+		/* Don't use per engine and per process VMID at the same time */
+		if (adev->vm_manager.concurrent_flush)
+			ring = NULL;
+
 		/* to prevent one context starved by another context */
 		(*id)->pd_gpu_addr = 0;
 		tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
@@ -365,12 +373,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
 		if (updates && (!flushed || dma_fence_is_later(updates, flushed)))
 			needs_flush = true;
 
-		/* Concurrent flushes are only possible starting with Vega10 and
-		 * are broken on Navi10 and Navi14.
-		 */
-		if (needs_flush && (adev->asic_type < CHIP_VEGA10 ||
-				    adev->asic_type == CHIP_NAVI10 ||
-				    adev->asic_type == CHIP_NAVI14))
+		if (needs_flush && !adev->vm_manager.concurrent_flush)
 			continue;
 
 		/* Good, we can use this VMID. Remember this submission as
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0ffdf847cad0..9acee4a5b2ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -3148,6 +3148,12 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 {
 	unsigned i;
 
+	/* Concurrent flushes are only possible starting with Vega10 and
+	 * are broken on Navi10 and Navi14.
+	 */
+	adev->vm_manager.concurrent_flush = !(adev->asic_type < CHIP_VEGA10 ||
+					      adev->asic_type == CHIP_NAVI10 ||
+					      adev->asic_type == CHIP_NAVI14);
 	amdgpu_vmid_mgr_init(adev);
 
 	adev->vm_manager.fence_context =
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 976a12e5a8b9..4e140288159c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -331,6 +331,7 @@ struct amdgpu_vm_manager {
 	/* Handling of VMIDs */
 	struct amdgpu_vmid_mgr			id_mgr[AMDGPU_MAX_VMHUBS];
 	unsigned int				first_kfd_vmid;
+	bool					concurrent_flush;
 
 	/* Handling of VM fences */
 	u64					fence_context;
-- 
cgit v1.2.3


From b117b3964f38a988cb79825950dbd607c02237f3 Mon Sep 17 00:00:00 2001
From: Darren Powell <darren.powell@amd.com>
Date: Wed, 7 Apr 2021 00:34:35 -0400
Subject: amdgpu/pm: Prevent force of DCEFCLK on NAVI10 and SIENNA_CICHLID

Writing to dcefclk causes the gpu to become unresponsive, and requires a reboot.
Patch ignores a .force_clk_levels(SMU_DCEFCLK) call and issues an
info message.

Signed-off-by: Darren Powell <darren.powell@amd.com>
Reviewed-by: Kenneth Feng <kenneth.feng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c         | 5 ++++-
 drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 4 +++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index f827096dc849..ac13042672ea 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -1443,7 +1443,6 @@ static int navi10_force_clk_levels(struct smu_context *smu,
 	case SMU_SOCCLK:
 	case SMU_MCLK:
 	case SMU_UCLK:
-	case SMU_DCEFCLK:
 	case SMU_FCLK:
 		/* There is only 2 levels for fine grained DPM */
 		if (navi10_is_support_fine_grained_dpm(smu, clk_type)) {
@@ -1463,6 +1462,10 @@ static int navi10_force_clk_levels(struct smu_context *smu,
 		if (ret)
 			return size;
 		break;
+	case SMU_DCEFCLK:
+		dev_info(smu->adev->dev,"Setting DCEFCLK min/max dpm level is not supported!\n");
+		break;
+
 	default:
 		break;
 	}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 72d9c1be1835..d2fd44b903ca 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -1127,7 +1127,6 @@ static int sienna_cichlid_force_clk_levels(struct smu_context *smu,
 	case SMU_SOCCLK:
 	case SMU_MCLK:
 	case SMU_UCLK:
-	case SMU_DCEFCLK:
 	case SMU_FCLK:
 		/* There is only 2 levels for fine grained DPM */
 		if (sienna_cichlid_is_support_fine_grained_dpm(smu, clk_type)) {
@@ -1147,6 +1146,9 @@ static int sienna_cichlid_force_clk_levels(struct smu_context *smu,
 		if (ret)
 			goto forec_level_out;
 		break;
+	case SMU_DCEFCLK:
+		dev_info(smu->adev->dev,"Setting DCEFCLK min/max dpm level is not supported!\n");
+		break;
 	default:
 		break;
 	}
-- 
cgit v1.2.3


From d7b4a6077ec38763a1f6fed2b2f6a0113028eea7 Mon Sep 17 00:00:00 2001
From: Darren Powell <darren.powell@amd.com>
Date: Wed, 7 Apr 2021 18:40:34 -0400
Subject: amdgpu/pm: set pp_dpm_dcefclk to readonly on NAVI10 and newer gpus

v2 : change condition to apply to all chips after NAVI10

Writing to dcefclk causes the gpu to become unresponsive, and requires a reboot.
Patch prevents user from successfully writing to file pp_dpm_dcefclk on parts
NAVI10 and newer, and gives better user feedback that this operation is not allowed.

Signed-off-by: Darren Powell <darren.powell@amd.com>
Reviewed-by: Kenneth Feng <kenneth.feng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 204e34549013..a8d6cc2525b2 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -1891,6 +1891,14 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_
 		}
 	}
 
+	if (DEVICE_ATTR_IS(pp_dpm_dcefclk)) {
+		/* SMU MP1 does not support dcefclk level setting */
+		if (asic_type >= CHIP_NAVI10) {
+			dev_attr->attr.mode &= ~S_IWUGO;
+			dev_attr->store = NULL;
+		}
+	}
+
 #undef DEVICE_ATTR_IS
 
 	return 0;
-- 
cgit v1.2.3


From 3cbae5abfa8ebc8bc2b445dbe392b6987cd15483 Mon Sep 17 00:00:00 2001
From: Mikita Lipski <mikita.lipski@amd.com>
Date: Wed, 14 Apr 2021 14:15:52 -0400
Subject: drm/amd/display: fix wrong statement in mst hpd debugfs

[why]
Previous statement would always evaluate to true
making it meaningless
[how]
Just check if a connector is MST by checking if its port exists.

Fixes: 41efcd3879b1df ("drm/amd/display: Add MST capability to trigger_hotplug interface")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Mikita Lipski <mikita.lipski@amd.com>
Reviewed-by: Nicholas Kazlauskas <Nicholas.Kazlauskas@amd.com>
Acked-by: Wayne Lin <waynelin@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index 9a13f47022df..2cfee7d6bb4d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -3012,7 +3012,7 @@ static int trigger_hpd_mst_set(void *data, u64 val)
 			if (!aconnector->dc_link)
 				continue;
 
-			if (!(aconnector->port && &aconnector->mst_port->mst_mgr))
+			if (!aconnector->mst_port)
 				continue;
 
 			link = aconnector->dc_link;
-- 
cgit v1.2.3


From 4b12ee6f426e5e36396501a58f3a1af5b92a7e06 Mon Sep 17 00:00:00 2001
From: Victor Zhao <Victor.Zhao@amd.com>
Date: Tue, 27 Apr 2021 17:52:56 +0800
Subject: drm/amdgpu: fix r initial values

Sriov gets suspend of IP block <dce_virtual> failed as return
value was not initialized.

v2: return 0 directly to align original code semantic before this
was broken out into a separate helper function instead of setting
initial values

Signed-off-by: Victor Zhao <Victor.Zhao@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 9a2f811450ed..4fbae20839b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -1399,7 +1399,7 @@ int amdgpu_display_suspend_helper(struct amdgpu_device *adev)
 			}
 		}
 	}
-	return r;
+	return 0;
 }
 
 int amdgpu_display_resume_helper(struct amdgpu_device *adev)
-- 
cgit v1.2.3


From b45aeb2dea9142d4d32fa3a117ba381d84f27065 Mon Sep 17 00:00:00 2001
From: Pavan Kumar Ramayanam <pavan.ramayanam@amd.com>
Date: Tue, 27 Apr 2021 10:21:18 +0530
Subject: drm/amdgpu: Handling of amdgpu_device_resume return value for
 graceful teardown

The runtime resume PM op disregards the return value from
amdgpu_device_resume(), masking errors for failed resumes at the PM
layer.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Pavan Kumar Ramayanam <pavan.ramayanam@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index d8f131ed10cb..b0378cfe2218 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1573,6 +1573,9 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
 		amdgpu_device_baco_exit(drm_dev);
 	}
 	ret = amdgpu_device_resume(drm_dev, false);
+	if (ret)
+		return ret;
+
 	if (amdgpu_device_supports_px(drm_dev))
 		drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
 	adev->in_runpm = false;
-- 
cgit v1.2.3


From 8c3dd61cfa05a65a7e1a8a028000fc95856156c4 Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Mon, 26 Apr 2021 18:50:00 +0800
Subject: drm/amdgpu: Register VGA clients after init can no longer fail

When an amdgpu device fails to init, it makes another VGA device cause
kernel splat:
kernel: amdgpu 0000:08:00.0: amdgpu: amdgpu_device_ip_init failed
kernel: amdgpu 0000:08:00.0: amdgpu: Fatal error during GPU init
kernel: amdgpu: probe of 0000:08:00.0 failed with error -110
...
kernel: amdgpu 0000:01:00.0: vgaarb: changed VGA decodes: olddecodes=io+mem,decodes=none:owns=none
kernel: BUG: kernel NULL pointer dereference, address: 0000000000000018
kernel: #PF: supervisor read access in kernel mode
kernel: #PF: error_code(0x0000) - not-present page
kernel: PGD 0 P4D 0
kernel: Oops: 0000 [#1] SMP NOPTI
kernel: CPU: 6 PID: 1080 Comm: Xorg Tainted: G        W         5.12.0-rc8+ #12
kernel: Hardware name: HP HP EliteDesk 805 G6/872B, BIOS S09 Ver. 02.02.00 12/30/2020
kernel: RIP: 0010:amdgpu_device_vga_set_decode+0x13/0x30 [amdgpu]
kernel: Code: 06 31 c0 c3 b8 ea ff ff ff 5d c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 0f 1f 44 00 00 55 48 8b 87 90 06 00 00 48 89 e5 53 89 f3 <48> 8b 40 18 40 0f b6 f6 e8 40 58 39 fd 80 fb 01 5b 5d 19 c0 83 e0
kernel: RSP: 0018:ffffae3c0246bd68 EFLAGS: 00010002
kernel: RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
kernel: RDX: ffff8dd1af5a8560 RSI: 0000000000000000 RDI: ffff8dce8c160000
kernel: RBP: ffffae3c0246bd70 R08: ffff8dd1af5985c0 R09: ffffae3c0246ba38
kernel: R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000246
kernel: R13: 0000000000000000 R14: 0000000000000003 R15: ffff8dce81490000
kernel: FS:  00007f9303d8fa40(0000) GS:ffff8dd1af580000(0000) knlGS:0000000000000000
kernel: CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
kernel: CR2: 0000000000000018 CR3: 0000000103cfa000 CR4: 0000000000350ee0
kernel: Call Trace:
kernel:  vga_arbiter_notify_clients.part.0+0x4a/0x80
kernel:  vga_get+0x17f/0x1c0
kernel:  vga_arb_write+0x121/0x6a0
kernel:  ? apparmor_file_permission+0x1c/0x20
kernel:  ? security_file_permission+0x30/0x180
kernel:  vfs_write+0xca/0x280
kernel:  ksys_write+0x67/0xe0
kernel:  __x64_sys_write+0x1a/0x20
kernel:  do_syscall_64+0x38/0x90
kernel:  entry_SYSCALL_64_after_hwframe+0x44/0xae
kernel: RIP: 0033:0x7f93041e02f7
kernel: Code: 75 05 48 83 c4 58 c3 e8 f7 33 ff ff 0f 1f 80 00 00 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24
kernel: RSP: 002b:00007fff60e49b28 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
kernel: RAX: ffffffffffffffda RBX: 000000000000000b RCX: 00007f93041e02f7
kernel: RDX: 000000000000000b RSI: 00007fff60e49b40 RDI: 000000000000000f
kernel: RBP: 00007fff60e49b40 R08: 00000000ffffffff R09: 00007fff60e499d0
kernel: R10: 00007f93049350b5 R11: 0000000000000246 R12: 000056111d45e808
kernel: R13: 0000000000000000 R14: 000056111d45e7f8 R15: 000056111d46c980
kernel: Modules linked in: nls_iso8859_1 snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio snd_hda_codec_hdmi snd_hda_intel snd_intel_dspcfg snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_seq input_leds snd_seq_device snd_timer snd soundcore joydev kvm_amd serio_raw k10temp mac_hid hp_wmi ccp kvm sparse_keymap wmi_bmof ucsi_acpi efi_pstore typec_ucsi rapl typec video wmi sch_fq_codel parport_pc ppdev lp parport ip_tables x_tables autofs4 btrfs blake2b_generic zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx libcrc32c xor raid6_pq raid1 raid0 multipath linear dm_mirror dm_region_hash dm_log hid_generic usbhid hid amdgpu drm_ttm_helper ttm iommu_v2 gpu_sched i2c_algo_bit drm_kms_helper syscopyarea sysfillrect crct10dif_pclmul sysimgblt crc32_pclmul fb_sys_fops ghash_clmulni_intel cec rc_core aesni_intel crypto_simd psmouse cryptd r8169 i2c_piix4 drm ahci xhci_pci realtek libahci xhci_pci_renesas gpio_amdpt gpio_generic
kernel: CR2: 0000000000000018
kernel: ---[ end trace 76d04313d4214c51 ]---

Commit 4192f7b57689 ("drm/amdgpu: unmap register bar on device init
failure") makes amdgpu_driver_unload_kms() skips amdgpu_device_fini(),
so the VGA clients remain registered. So when
vga_arbiter_notify_clients() iterates over registered clients, it causes
NULL pointer dereference.

Since there's no reason to register VGA clients that early, so solve
the issue by putting them after all the goto cleanups.

v2:
 - Remove redundant vga_switcheroo cleanup in failed: label.

Fixes: 4192f7b57689 ("drm/amdgpu: unmap register bar on device init failure")
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index b4ad1c055c70..7d3b54615147 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3410,19 +3410,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	/* doorbell bar mapping and doorbell index init*/
 	amdgpu_device_doorbell_init(adev);
 
-	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
-	/* this will fail for cards that aren't VGA class devices, just
-	 * ignore it */
-	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
-		vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
-
-	if (amdgpu_device_supports_px(ddev)) {
-		px = true;
-		vga_switcheroo_register_client(adev->pdev,
-					       &amdgpu_switcheroo_ops, px);
-		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
-	}
-
 	if (amdgpu_emu_mode == 1) {
 		/* post the asic on emulation mode */
 		emu_soc_asic_init(adev);
@@ -3619,6 +3606,19 @@ fence_driver_init:
 	if (amdgpu_device_cache_pci_state(adev->pdev))
 		pci_restore_state(pdev);
 
+	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
+	/* this will fail for cards that aren't VGA class devices, just
+	 * ignore it */
+	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
+		vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
+
+	if (amdgpu_device_supports_px(ddev)) {
+		px = true;
+		vga_switcheroo_register_client(adev->pdev,
+					       &amdgpu_switcheroo_ops, px);
+		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
+	}
+
 	if (adev->gmc.xgmi.pending_reset)
 		queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
@@ -3630,8 +3630,6 @@ release_ras_con:
 
 failed:
 	amdgpu_vf_error_trans_all(adev);
-	if (px)
-		vga_switcheroo_fini_domain_pm_ops(adev->dev);
 
 failed_unmap:
 	iounmap(adev->rmmio);
-- 
cgit v1.2.3


From e0c16eb4b3610298a74ae5504c7f6939b12be991 Mon Sep 17 00:00:00 2001
From: Simon Ser <contact@emersion.fr>
Date: Wed, 21 Apr 2021 11:16:35 +0200
Subject: amdgpu: fix GEM obj leak in amdgpu_display_user_framebuffer_create
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This error code-path is missing a drm_gem_object_put call. Other
error code-paths are fine.

Signed-off-by: Simon Ser <contact@emersion.fr>
Fixes: 1769152ac64b ("drm/amdgpu: Fail fb creation from imported dma-bufs. (v2)")
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Harry Wentland <hwentlan@amd.com>
Cc: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Cc: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 4fbae20839b7..c4cfefb33e03 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -986,6 +986,7 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev,
 	domains = amdgpu_display_supported_domains(drm_to_adev(dev), bo->flags);
 	if (obj->import_attach && !(domains & AMDGPU_GEM_DOMAIN_GTT)) {
 		drm_dbg_kms(dev, "Cannot create framebuffer from imported dma_buf\n");
+		drm_gem_object_put(obj);
 		return ERR_PTR(-EINVAL);
 	}
 
-- 
cgit v1.2.3


From d385c16173f28a18866abf54c764200c276dace0 Mon Sep 17 00:00:00 2001
From: Colin Xu <colin.xu@intel.com>
Date: Fri, 16 Apr 2021 16:33:55 +0800
Subject: drm/i915/gvt: Prevent divided by zero when calculating refresh rate

To get refresh rate as vblank timer period and keep the precision, the
calculation of rate is multiplied by 1000. However old logic was using:
rate = pixel clock / (h * v / 1000). When the h/v total is invalid, like
all 0, h * v / 1000 will be rounded to 0, which leads to a divided by 0
fault.

0 H/V are already checked above. Instead of divide after divide, refine
the calculation to divide after multiply: "pixel clock * 1000 / (h * v)"
Guest driver should guarantee the correctness of the timing regs' value.

Fixes: 6a4500c7b83f ("drm/i915/gvt: Get accurate vGPU virtual display refresh rate from vreg")
Reported-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Colin Xu <colin.xu@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20210416083355.159305-1-colin.xu@intel.com
Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 477badfcb258..dda320749c65 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -669,8 +669,8 @@ static void vgpu_update_refresh_rate(struct intel_vgpu *vgpu)
 	link_n = vgpu_vreg_t(vgpu, PIPE_LINK_N1(TRANSCODER_A));
 
 	/* Get H/V total from transcoder timing */
-	htotal = (vgpu_vreg_t(vgpu, HTOTAL(TRANSCODER_A)) >> TRANS_HTOTAL_SHIFT) + 1;
-	vtotal = (vgpu_vreg_t(vgpu, VTOTAL(TRANSCODER_A)) >> TRANS_VTOTAL_SHIFT) + 1;
+	htotal = (vgpu_vreg_t(vgpu, HTOTAL(TRANSCODER_A)) >> TRANS_HTOTAL_SHIFT);
+	vtotal = (vgpu_vreg_t(vgpu, VTOTAL(TRANSCODER_A)) >> TRANS_VTOTAL_SHIFT);
 
 	if (dp_br && link_n && htotal && vtotal) {
 		u64 pixel_clk = 0;
@@ -682,7 +682,7 @@ static void vgpu_update_refresh_rate(struct intel_vgpu *vgpu)
 		pixel_clk *= MSEC_PER_SEC;
 
 		/* Calcuate refresh rate by (pixel_clk / (h_total * v_total)) */
-		new_rate = DIV64_U64_ROUND_CLOSEST(pixel_clk, div64_u64(mul_u32_u32(htotal, vtotal), MSEC_PER_SEC));
+		new_rate = DIV64_U64_ROUND_CLOSEST(mul_u64_u32_shr(pixel_clk, MSEC_PER_SEC, 0), mul_u32_u32(htotal + 1, vtotal + 1));
 
 		if (*old_rate != new_rate)
 			*old_rate = new_rate;
-- 
cgit v1.2.3


From 121271f08809e5dc01d15d3e529988ac5d740af6 Mon Sep 17 00:00:00 2001
From: Amit Kumar Mahapatra <amit.kumar-mahapatra@xilinx.com>
Date: Wed, 28 Apr 2021 23:38:01 -0600
Subject: spi: spi-zynq-qspi: Fix kernel-doc warning

Fix kernel-doc warning.

Signed-off-by: Amit Kumar Mahapatra <amit.kumar-mahapatra@xilinx.com>
Link: https://lore.kernel.org/r/20210429053802.17650-2-amit.kumar-mahapatra@xilinx.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-zynq-qspi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-zynq-qspi.c b/drivers/spi/spi-zynq-qspi.c
index 5d8a5ee62fa2..1acde9e24973 100644
--- a/drivers/spi/spi-zynq-qspi.c
+++ b/drivers/spi/spi-zynq-qspi.c
@@ -367,7 +367,7 @@ static int zynq_qspi_config_op(struct zynq_qspi *xqspi, struct spi_device *spi)
 }
 
 /**
- * zynq_qspi_setup - Configure the QSPI controller
+ * zynq_qspi_setup_op - Configure the QSPI controller
  * @spi:	Pointer to the spi_device structure
  *
  * Sets the operational mode of QSPI controller for the next QSPI transfer, baud
-- 
cgit v1.2.3


From 6d5ff8e632a4f2389c331e5554cd1c2a9a28c7aa Mon Sep 17 00:00:00 2001
From: Karen Dombroski <karen.dombroski@marsbioimaging.com>
Date: Wed, 28 Apr 2021 23:38:02 -0600
Subject: spi: spi-zynq-qspi: Fix stack violation bug

When the number of bytes for the op is greater than one, the read could
run off the end of the function stack and cause a crash.

This patch restores the behaviour of safely reading out of the original
opcode location.

Signed-off-by: Karen Dombroski <karen.dombroski@marsbioimaging.com>
Signed-off-by: Amit Kumar Mahapatra <amit.kumar-mahapatra@xilinx.com>
Link: https://lore.kernel.org/r/20210429053802.17650-3-amit.kumar-mahapatra@xilinx.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-zynq-qspi.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi-zynq-qspi.c b/drivers/spi/spi-zynq-qspi.c
index 1acde9e24973..5a3d81c31d04 100644
--- a/drivers/spi/spi-zynq-qspi.c
+++ b/drivers/spi/spi-zynq-qspi.c
@@ -528,18 +528,17 @@ static int zynq_qspi_exec_mem_op(struct spi_mem *mem,
 	struct zynq_qspi *xqspi = spi_controller_get_devdata(mem->spi->master);
 	int err = 0, i;
 	u8 *tmpbuf;
-	u8 opcode = op->cmd.opcode;
 
 	dev_dbg(xqspi->dev, "cmd:%#x mode:%d.%d.%d.%d\n",
-		opcode, op->cmd.buswidth, op->addr.buswidth,
+		op->cmd.opcode, op->cmd.buswidth, op->addr.buswidth,
 		op->dummy.buswidth, op->data.buswidth);
 
 	zynq_qspi_chipselect(mem->spi, true);
 	zynq_qspi_config_op(xqspi, mem->spi);
 
-	if (op->cmd.nbytes) {
+	if (op->cmd.opcode) {
 		reinit_completion(&xqspi->data_completion);
-		xqspi->txbuf = &opcode;
+		xqspi->txbuf = (u8 *)&op->cmd.opcode;
 		xqspi->rxbuf = NULL;
 		xqspi->tx_bytes = op->cmd.nbytes;
 		xqspi->rx_bytes = op->cmd.nbytes;
-- 
cgit v1.2.3


From a712b307cfde6dbe0d4829293afb1566beb30a9a Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Fri, 30 Apr 2021 10:17:39 -0700
Subject: drm/msm/dpu: Delete bonkers code

dpu_crtc_atomic_flush() was directly poking it's attached planes in a
code path that ended up in dpu_plane_atomic_update(), even if the plane
was not involved in the current atomic update.  While a bit dubious,
this worked before because plane->state would always point to something
valid.  But now using drm_atomic_get_new_plane_state() we could get a
NULL state pointer instead, leading to:

   [   20.873273] Call trace:
   [   20.875740]  dpu_plane_atomic_update+0x5c/0xed0
   [   20.880311]  dpu_plane_restore+0x40/0x88
   [   20.884266]  dpu_crtc_atomic_flush+0xf4/0x208
   [   20.888660]  drm_atomic_helper_commit_planes+0x150/0x238
   [   20.894014]  msm_atomic_commit_tail+0x1d4/0x7a0
   [   20.898579]  commit_tail+0xa4/0x168
   [   20.902102]  drm_atomic_helper_commit+0x164/0x178
   [   20.906841]  drm_atomic_commit+0x54/0x60
   [   20.910798]  drm_atomic_connector_commit_dpms+0x10c/0x118
   [   20.916236]  drm_mode_obj_set_property_ioctl+0x1e4/0x440
   [   20.921588]  drm_connector_property_set_ioctl+0x60/0x88
   [   20.926852]  drm_ioctl_kernel+0xd0/0x120
   [   20.930807]  drm_ioctl+0x21c/0x478
   [   20.934235]  __arm64_sys_ioctl+0xa8/0xe0
   [   20.938193]  invoke_syscall+0x64/0x130
   [   20.941977]  el0_svc_common.constprop.3+0x5c/0xe0
   [   20.946716]  do_el0_svc+0x80/0xa0
   [   20.950058]  el0_svc+0x20/0x30
   [   20.953145]  el0_sync_handler+0x88/0xb0
   [   20.957014]  el0_sync+0x13c/0x140

The reason for the codepath seems dubious, the atomic suspend/resume
heplers should handle the power-collapse case.  If not, the CRTC's
atomic_check() should be adding the planes to the atomic update.

Reported-by: Stephen Boyd <swboyd@chromium.org>
Reported-by: John Stultz <john.stultz@linaro.org>
Fixes: 37418bf14c13 ("drm: Use state helper instead of the plane state pointer")
Tested-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Rob Clark <robdclark@chromium.org>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://patchwork.freedesktop.org/patch/msgid/20210430171744.1721408-1-robdclark@gmail.com
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c  | 10 ----------
 drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 16 ----------------
 drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h |  6 ------
 3 files changed, 32 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
index 9607a7644d4b..e65ff8168269 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
@@ -566,16 +566,6 @@ static void dpu_crtc_atomic_flush(struct drm_crtc *crtc,
 	if (unlikely(!cstate->num_mixers))
 		return;
 
-	/*
-	 * For planes without commit update, drm framework will not add
-	 * those planes to current state since hardware update is not
-	 * required. However, if those planes were power collapsed since
-	 * last commit cycle, driver has to restore the hardware state
-	 * of those planes explicitly here prior to plane flush.
-	 */
-	drm_atomic_crtc_for_each_plane(plane, crtc)
-		dpu_plane_restore(plane, state);
-
 	/* update performance setting before crtc kickoff */
 	dpu_core_perf_crtc_update(crtc, 1, false);
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
index df7f3d3afd8b..7a993547eb75 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
@@ -1258,22 +1258,6 @@ static void dpu_plane_atomic_update(struct drm_plane *plane,
 	}
 }
 
-void dpu_plane_restore(struct drm_plane *plane, struct drm_atomic_state *state)
-{
-	struct dpu_plane *pdpu;
-
-	if (!plane || !plane->state) {
-		DPU_ERROR("invalid plane\n");
-		return;
-	}
-
-	pdpu = to_dpu_plane(plane);
-
-	DPU_DEBUG_PLANE(pdpu, "\n");
-
-	dpu_plane_atomic_update(plane, state);
-}
-
 static void dpu_plane_destroy(struct drm_plane *plane)
 {
 	struct dpu_plane *pdpu = plane ? to_dpu_plane(plane) : NULL;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h
index 03b6365a750c..34e03ac05f4a 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h
@@ -84,12 +84,6 @@ bool is_dpu_plane_virtual(struct drm_plane *plane);
 void dpu_plane_get_ctl_flush(struct drm_plane *plane, struct dpu_hw_ctl *ctl,
 		u32 *flush_sspp);
 
-/**
- * dpu_plane_restore - restore hw state if previously power collapsed
- * @plane: Pointer to drm plane structure
- */
-void dpu_plane_restore(struct drm_plane *plane, struct drm_atomic_state *state);
-
 /**
  * dpu_plane_flush - final plane operations before commit flush
  * @plane: Pointer to drm plane structure
-- 
cgit v1.2.3


From ff76d506030daeeeb967be8b8a189bf7aee8e7a8 Mon Sep 17 00:00:00 2001
From: Kai Huang <kai.huang@intel.com>
Date: Thu, 29 Apr 2021 16:12:26 +1200
Subject: KVM: x86/mmu: Avoid unnecessary page table allocation in
 kvm_tdp_mmu_map()

In kvm_tdp_mmu_map(), while iterating TDP MMU page table entries, it is
possible SPTE has already been frozen by another thread but the frozen
is not done yet, for instance, when another thread is still in middle of
zapping large page.  In this case, the !is_shadow_present_pte() check
for old SPTE in tdp_mmu_for_each_pte() may hit true, and in this case
allocating new page table is unnecessary since tdp_mmu_set_spte_atomic()
later will return false and page table will need to be freed.  Add
is_removed_spte() check before allocating new page table to avoid this.

Signed-off-by: Kai Huang <kai.huang@intel.com>
Message-Id: <20210429041226.50279-1-kai.huang@intel.com>
Reviewed-by: Ben Gardon <bgardon@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 88f69a6cc492..3c8284841bed 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1009,6 +1009,14 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 		}
 
 		if (!is_shadow_present_pte(iter.old_spte)) {
+			/*
+			 * If SPTE has been forzen by another thread, just
+			 * give up and retry, avoiding unnecessary page table
+			 * allocation and free.
+			 */
+			if (is_removed_spte(iter.old_spte))
+				break;
+
 			sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level);
 			child_pt = sp->spt;
 
-- 
cgit v1.2.3


From 1699f65c8b658d434fe92563c906cd1a136c9cb6 Mon Sep 17 00:00:00 2001
From: "Shahin, Md Shahadat Hossain" <shahinmd@amazon.de>
Date: Fri, 30 Apr 2021 11:52:31 +0000
Subject: kvm/x86: Fix 'lpages' kvm stat for TDM MMU

Large pages not being created properly may result in increased memory
access time. The 'lpages' kvm stat used to keep track of the current
number of large pages in the system, but with TDP MMU enabled the stat
is not showing the correct number.

This patch extends the lpages counter to cover the TDP case.

Signed-off-by: Md Shahadat Hossain Shahin <shahinmd@amazon.de>
Cc: Bartosz Szczepanek <bsz@amazon.de>
Message-Id: <1619783551459.35424@amazon.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 3c8284841bed..c743894fe0b7 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -444,6 +444,13 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
 
 	trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte);
 
+	if (is_large_pte(old_spte) != is_large_pte(new_spte)) {
+		if (is_large_pte(old_spte))
+			atomic64_sub(1, (atomic64_t*)&kvm->stat.lpages);
+		else
+			atomic64_add(1, (atomic64_t*)&kvm->stat.lpages);
+	}
+
 	/*
 	 * The only times a SPTE should be changed from a non-present to
 	 * non-present state is when an MMIO entry is installed/modified/
-- 
cgit v1.2.3


From d981dd15498b188636ec5a7d8ad485e650f63d8d Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Wed, 28 Apr 2021 19:08:02 +0800
Subject: KVM: LAPIC: Accurately guarantee busy wait for timer to expire when
 using hv_timer

Commit ee66e453db13d (KVM: lapic: Busy wait for timer to expire when
using hv_timer) tries to set ktime->expired_tscdeadline by checking
ktime->hv_timer_in_use since lapic timer oneshot/periodic modes which
are emulated by vmx preemption timer also get advanced, they leverage
the same vmx preemption timer logic with tsc-deadline mode. However,
ktime->hv_timer_in_use is cleared before apic_timer_expired() handling,
let's delay this clearing in preemption-disabled region.

Fixes: ee66e453db13d ("KVM: lapic: Busy wait for timer to expire when using hv_timer")
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Message-Id: <1619608082-4187-1-git-send-email-wanpengli@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 152591f9243a..c0ebef560bd1 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1913,8 +1913,8 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
 	if (!apic->lapic_timer.hv_timer_in_use)
 		goto out;
 	WARN_ON(rcuwait_active(&vcpu->wait));
-	cancel_hv_timer(apic);
 	apic_timer_expired(apic, false);
+	cancel_hv_timer(apic);
 
 	if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
 		advance_periodic_target_expiration(apic);
-- 
cgit v1.2.3


From 262de4102c7bb8e59f26a967a8ffe8cce85cc537 Mon Sep 17 00:00:00 2001
From: Benjamin Segall <bsegall@google.com>
Date: Thu, 29 Apr 2021 16:22:34 +0000
Subject: kvm: exit halt polling on need_resched() as well

single_task_running() is usually more general than need_resched()
but CFS_BANDWIDTH throttling will use resched_task() when there
is just one task to get the task to block. This was causing
long-need_resched warnings and was likely allowing VMs to
overrun their quota when halt polling.

Signed-off-by: Ben Segall <bsegall@google.com>
Signed-off-by: Venkatesh Srinivas <venkateshs@chromium.org>
Message-Id: <20210429162233.116849-1-venkateshs@chromium.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: stable@vger.kernel.org
Reviewed-by: Jim Mattson <jmattson@google.com>
---
 virt/kvm/kvm_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2799c6660cce..b9f12da6af0e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2973,7 +2973,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 				goto out;
 			}
 			poll_end = cur = ktime_get();
-		} while (single_task_running() && ktime_before(cur, stop));
+		} while (single_task_running() && !need_resched() &&
+			 ktime_before(cur, stop));
 	}
 
 	prepare_to_rcuwait(&vcpu->wait);
-- 
cgit v1.2.3


From deee59bacb2402c20e6b1b6800f9a5127367eb2a Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Mon, 3 May 2021 15:54:42 +0300
Subject: KVM: nSVM: fix a typo in svm_leave_nested

When forcibly leaving the nested mode, we should switch to vmcb01

Fixes: 4995a3685f1b ("KVM: SVM: Use a separate vmcb for the nested L2 guest")

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210503125446.1353307-2-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/nested.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 540d43ba2cf4..3321220f3deb 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -886,7 +886,7 @@ void svm_leave_nested(struct vcpu_svm *svm)
 		svm->nested.nested_run_pending = 0;
 		leave_guest_mode(vcpu);
 
-		svm_switch_vmcb(svm, &svm->nested.vmcb02);
+		svm_switch_vmcb(svm, &svm->vmcb01);
 
 		nested_svm_uninit_mmu_context(vcpu);
 		vmcb_mark_all_dirty(svm->vmcb);
-- 
cgit v1.2.3


From c74ad08f3333db2e44d3346b863f6d10d35e37dd Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Mon, 3 May 2021 15:54:43 +0300
Subject: KVM: nSVM: fix few bugs in the vmcb02 caching logic

* Define and use an invalid GPA (all ones) for init value of last
  and current nested vmcb physical addresses.

* Reset the current vmcb12 gpa to the invalid value when leaving
  the nested mode, similar to what is done on nested vmexit.

* Reset	the last seen vmcb12 address when disabling the nested SVM,
  as it relies on vmcb02 fields which are freed at that point.

Fixes: 4995a3685f1b ("KVM: SVM: Use a separate vmcb for the nested L2 guest")

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210503125446.1353307-3-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/svm/nested.c       | 11 +++++++++++
 arch/x86/kvm/svm/svm.c          |  4 ++--
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index cbbcee0a84f9..848956bb3cf1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -113,6 +113,7 @@
 #define VALID_PAGE(x) ((x) != INVALID_PAGE)
 
 #define UNMAPPED_GVA (~(gpa_t)0)
+#define INVALID_GPA (~(gpa_t)0)
 
 /* KVM Hugepage definitions for x86 */
 #define KVM_MAX_HUGEPAGE_LEVEL	PG_LEVEL_1G
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 3321220f3deb..a88c64e004c3 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -872,6 +872,15 @@ void svm_free_nested(struct vcpu_svm *svm)
 	__free_page(virt_to_page(svm->nested.vmcb02.ptr));
 	svm->nested.vmcb02.ptr = NULL;
 
+	/*
+	 * When last_vmcb12_gpa matches the current vmcb12 gpa,
+	 * some vmcb12 fields are not loaded if they are marked clean
+	 * in the vmcb12, since in this case they are up to date already.
+	 *
+	 * When the vmcb02 is freed, this optimization becomes invalid.
+	 */
+	svm->nested.last_vmcb12_gpa = INVALID_GPA;
+
 	svm->nested.initialized = false;
 }
 
@@ -884,6 +893,8 @@ void svm_leave_nested(struct vcpu_svm *svm)
 
 	if (is_guest_mode(vcpu)) {
 		svm->nested.nested_run_pending = 0;
+		svm->nested.vmcb12_gpa = INVALID_GPA;
+
 		leave_guest_mode(vcpu);
 
 		svm_switch_vmcb(svm, &svm->vmcb01);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9790c73f2a32..be5cf612ab1f 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1235,8 +1235,8 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
 	svm->current_vmcb->asid_generation = 0;
 	svm->asid = 0;
 
-	svm->nested.vmcb12_gpa = 0;
-	svm->nested.last_vmcb12_gpa = 0;
+	svm->nested.vmcb12_gpa = INVALID_GPA;
+	svm->nested.last_vmcb12_gpa = INVALID_GPA;
 	vcpu->arch.hflags = 0;
 
 	if (!kvm_pause_in_guest(vcpu->kvm)) {
-- 
cgit v1.2.3


From 9d290e16432cacd448475d38dec2753b75b9665f Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Mon, 3 May 2021 15:54:44 +0300
Subject: KVM: nSVM: leave the guest mode prior to loading a nested state

This allows the KVM to load the nested state more than
once without warnings.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210503125446.1353307-4-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/nested.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index a88c64e004c3..32400cba608d 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1309,12 +1309,15 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
 	 * L2 registers if needed are moved from the current VMCB to VMCB02.
 	 */
 
+	if (is_guest_mode(vcpu))
+		svm_leave_nested(svm);
+	else
+		svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
+
 	svm->nested.nested_run_pending =
 		!!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
 
 	svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
-	if (svm->current_vmcb == &svm->vmcb01)
-		svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
 
 	svm->vmcb01.ptr->save.es = save->es;
 	svm->vmcb01.ptr->save.cs = save->cs;
-- 
cgit v1.2.3


From 7f6231a39117c2781beead59d6ae4923c2703147 Mon Sep 17 00:00:00 2001
From: Kai Huang <kai.huang@intel.com>
Date: Mon, 3 May 2021 16:24:46 +1200
Subject: KVM: x86/mmu: Fix kdoc of __handle_changed_spte

The function name of kdoc of __handle_changed_spte() should be itself,
rather than handle_changed_spte().  Fix the typo.

Signed-off-by: Kai Huang <kai.huang@intel.com>
Message-Id: <20210503042446.154695-1-kai.huang@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index c743894fe0b7..95eeb5ac6a8a 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -388,7 +388,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
 }
 
 /**
- * handle_changed_spte - handle bookkeeping associated with an SPTE change
+ * __handle_changed_spte - handle bookkeeping associated with an SPTE change
  * @kvm: kvm instance
  * @as_id: the address space of the paging structure the SPTE was a part of
  * @gfn: the base GFN that was mapped by the SPTE
-- 
cgit v1.2.3


From 8899a5fc7da516460f841189a28aac0b52b554fd Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 30 Apr 2021 18:03:03 +0100
Subject: KVM: x86: Fix potential fput on a null source_kvm_file

The fget can potentially return null, so the fput on the error return
path can cause a null pointer dereference. Fix this by checking for
a null source_kvm_file before doing a fput.

Addresses-Coverity: ("Dereference null return")
Fixes: 54526d1fd593 ("KVM: x86: Support KVM VMs sharing SEV context")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Message-Id: <20210430170303.131924-1-colin.king@canonical.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/sev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 1356ee095cd5..8b11c711a0e4 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1764,7 +1764,8 @@ e_mirror_unlock:
 e_source_unlock:
 	mutex_unlock(&source_kvm->lock);
 e_source_put:
-	fput(source_kvm_file);
+	if (source_kvm_file)
+		fput(source_kvm_file);
 	return ret;
 }
 
-- 
cgit v1.2.3


From 5e753a817b2d5991dfe8a801b7b1e8e79a1c5a20 Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Fri, 30 Apr 2021 19:59:51 +0800
Subject: btrfs: fix unmountable seed device after fstrim

The following test case reproduces an issue of wrongly freeing in-use
blocks on the readonly seed device when fstrim is called on the rw sprout
device. As shown below.

Create a seed device and add a sprout device to it:

  $ mkfs.btrfs -fq -dsingle -msingle /dev/loop0
  $ btrfstune -S 1 /dev/loop0
  $ mount /dev/loop0 /btrfs
  $ btrfs dev add -f /dev/loop1 /btrfs
  BTRFS info (device loop0): relocating block group 290455552 flags system
  BTRFS info (device loop0): relocating block group 1048576 flags system
  BTRFS info (device loop0): disk added /dev/loop1
  $ umount /btrfs

Mount the sprout device and run fstrim:

  $ mount /dev/loop1 /btrfs
  $ fstrim /btrfs
  $ umount /btrfs

Now try to mount the seed device, and it fails:

  $ mount /dev/loop0 /btrfs
  mount: /btrfs: wrong fs type, bad option, bad superblock on /dev/loop0, missing codepage or helper program, or other error.

Block 5292032 is missing on the readonly seed device:

 $ dmesg -kt | tail
 <snip>
 BTRFS error (device loop0): bad tree block start, want 5292032 have 0
 BTRFS warning (device loop0): couldn't read-tree root
 BTRFS error (device loop0): open_ctree failed

From the dump-tree of the seed device (taken before the fstrim). Block
5292032 belonged to the block group starting at 5242880:

  $ btrfs inspect dump-tree -e /dev/loop0 | grep -A1 BLOCK_GROUP
  <snip>
  item 3 key (5242880 BLOCK_GROUP_ITEM 8388608) itemoff 16169 itemsize 24
  	block group used 114688 chunk_objectid 256 flags METADATA
  <snip>

From the dump-tree of the sprout device (taken before the fstrim).
fstrim used block-group 5242880 to find the related free space to free:

  $ btrfs inspect dump-tree -e /dev/loop1 | grep -A1 BLOCK_GROUP
  <snip>
  item 1 key (5242880 BLOCK_GROUP_ITEM 8388608) itemoff 16226 itemsize 24
  	block group used 32768 chunk_objectid 256 flags METADATA
  <snip>

BPF kernel tracing the fstrim command finds the missing block 5292032
within the range of the discarded blocks as below:

  kprobe:btrfs_discard_extent {
  	printf("freeing start %llu end %llu num_bytes %llu:\n",
  		arg1, arg1+arg2, arg2);
  }

  freeing start 5259264 end 5406720 num_bytes 147456
  <snip>

Fix this by avoiding the discard command to the readonly seed device.

Reported-by: Chris Murphy <lists@colorremedies.com>
CC: stable@vger.kernel.org # 4.4+
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 7a28314189b4..f1d15b68994a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1340,12 +1340,16 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
 		stripe = bbio->stripes;
 		for (i = 0; i < bbio->num_stripes; i++, stripe++) {
 			u64 bytes;
+			struct btrfs_device *device = stripe->dev;
 
-			if (!stripe->dev->bdev) {
+			if (!device->bdev) {
 				ASSERT(btrfs_test_opt(fs_info, DEGRADED));
 				continue;
 			}
 
+			if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
+				continue;
+
 			ret = do_discard_extent(stripe, &bytes);
 			if (!ret) {
 				discarded_bytes += bytes;
-- 
cgit v1.2.3


From 784daf2b9628f2d0117f1f0b578cfe5ab6634919 Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota@wdc.com>
Date: Fri, 30 Apr 2021 15:34:17 +0200
Subject: btrfs: zoned: sanity check zone type

The fstests test case generic/475 creates a dm-linear device that gets
changed to a dm-error device. This leads to errors in loading the block
group's zone information when running on a zoned file system, ultimately
resulting in a list corruption. When running on a kernel with list
debugging enabled this leads to the following crash.

 BTRFS: error (device dm-2) in cleanup_transaction:1953: errno=-5 IO failure
 kernel BUG at lib/list_debug.c:54!
 invalid opcode: 0000 [#1] SMP PTI
 CPU: 1 PID: 2433 Comm: umount Tainted: G        W         5.12.0+ #1018
 RIP: 0010:__list_del_entry_valid.cold+0x1d/0x47
 RSP: 0018:ffffc90001473df0 EFLAGS: 00010296
 RAX: 0000000000000054 RBX: ffff8881038fd000 RCX: ffffc90001473c90
 RDX: 0000000100001a31 RSI: 0000000000000003 RDI: 0000000000000003
 RBP: ffff888308871108 R08: 0000000000000003 R09: 0000000000000001
 R10: 3961373532383838 R11: 6666666620736177 R12: ffff888308871000
 R13: ffff8881038fd088 R14: ffff8881038fdc78 R15: dead000000000100
 FS:  00007f353c9b1540(0000) GS:ffff888627d00000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007f353cc2c710 CR3: 000000018e13c000 CR4: 00000000000006a0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
 Call Trace:
  btrfs_free_block_groups+0xc9/0x310 [btrfs]
  close_ctree+0x2ee/0x31a [btrfs]
  ? call_rcu+0x8f/0x270
  ? mutex_lock+0x1c/0x40
  generic_shutdown_super+0x67/0x100
  kill_anon_super+0x14/0x30
  btrfs_kill_super+0x12/0x20 [btrfs]
  deactivate_locked_super+0x31/0x90
  cleanup_mnt+0x13e/0x1b0
  task_work_run+0x63/0xb0
  exit_to_user_mode_loop+0xd9/0xe0
  exit_to_user_mode_prepare+0x3e/0x60
  syscall_exit_to_user_mode+0x1d/0x50
  entry_SYSCALL_64_after_hwframe+0x44/0xae

As dm-error has no support for zones, btrfs will run it's zone emulation
mode on this device. The zone emulation mode emulates conventional zones,
so bail out if the zone bitmap that gets populated on mount sees the zone
as sequential while we're thinking it's a conventional zone when creating
a block group.

Note: this scenario is unlikely in a real wold application and can only
happen by this (ab)use of device-mapper targets.

CC: stable@vger.kernel.org # 5.12+
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/zoned.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 70b23a0d03b1..304ce64c70a4 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1126,6 +1126,11 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
 			goto out;
 		}
 
+		if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) {
+			ret = -EIO;
+			goto out;
+		}
+
 		switch (zone.cond) {
 		case BLK_ZONE_COND_OFFLINE:
 		case BLK_ZONE_COND_READONLY:
-- 
cgit v1.2.3


From b9d79e4ca4ff23543d6b33c736ba07c1f0a9dcb1 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 4 May 2021 07:29:10 -0700
Subject: fbmem: Mark proc_fb_seq_ops as __maybe_unused

With CONFIG_PROC_FS=n and -Werror, 0-day reports:

drivers/video/fbdev/core/fbmem.c:736:36: error:
	'proc_fb_seq_ops' defined but not used

Mark it as __maybe_unused.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20210504142910.2084722-1-linux@roeck-us.net
---
 drivers/video/fbdev/core/fbmem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 372b52a2befa..52c606c0f8a2 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -733,7 +733,7 @@ static int fb_seq_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static const struct seq_operations proc_fb_seq_ops = {
+static const struct __maybe_unused seq_operations proc_fb_seq_ops = {
 	.start	= fb_seq_start,
 	.next	= fb_seq_next,
 	.stop	= fb_seq_stop,
-- 
cgit v1.2.3


From 77364faf21b4105ee5adbb4844fdfb461334d249 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Fri, 30 Apr 2021 11:06:55 -0700
Subject: btrfs: initialize return variable in cleanup_free_space_cache_v1

Static analysis reports this problem

  free-space-cache.c:3965:2: warning: Undefined or garbage value returned
    return ret;
    ^~~~~~~~~~

ret is set in the node handling loop.  Treat doing nothing as a success
and initialize ret to 0, although it's unlikely the loop would be
skipped. We always have block groups, but as it could lead to
transaction abort in the caller it's better to be safe.

CC: stable@vger.kernel.org # 5.12+
Signed-off-by: Tom Rix <trix@redhat.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/free-space-cache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index e54466fc101f..4806295116d8 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -3949,7 +3949,7 @@ static int cleanup_free_space_cache_v1(struct btrfs_fs_info *fs_info,
 {
 	struct btrfs_block_group *block_group;
 	struct rb_node *node;
-	int ret;
+	int ret = 0;
 
 	btrfs_info(fs_info, "cleaning free space cache v1");
 
-- 
cgit v1.2.3


From c83c4e1912446db697a120eb30126cd80cbf6349 Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Wed, 28 Apr 2021 12:00:20 +0800
Subject: drm/amdgpu: add new MC firmware for Polaris12 32bit ASIC

Polaris12 32bit ASIC needs a special MC firmware.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index c1bd190841f8..e4f27b3f28fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -59,6 +59,7 @@ MODULE_FIRMWARE("amdgpu/tonga_mc.bin");
 MODULE_FIRMWARE("amdgpu/polaris11_mc.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_mc.bin");
 MODULE_FIRMWARE("amdgpu/polaris12_mc.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_32_mc.bin");
 MODULE_FIRMWARE("amdgpu/polaris11_k_mc.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_k_mc.bin");
 MODULE_FIRMWARE("amdgpu/polaris12_k_mc.bin");
@@ -243,10 +244,16 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
 			chip_name = "polaris10";
 		break;
 	case CHIP_POLARIS12:
-		if (ASICID_IS_P23(adev->pdev->device, adev->pdev->revision))
+		if (ASICID_IS_P23(adev->pdev->device, adev->pdev->revision)) {
 			chip_name = "polaris12_k";
-		else
-			chip_name = "polaris12";
+		} else {
+			WREG32(mmMC_SEQ_IO_DEBUG_INDEX, ixMC_IO_DEBUG_UP_159);
+			/* Polaris12 32bit ASIC needs a special MC firmware */
+			if (RREG32(mmMC_SEQ_IO_DEBUG_DATA) == 0x05b4dc40)
+				chip_name = "polaris12_32";
+			else
+				chip_name = "polaris12";
+		}
 		break;
 	case CHIP_FIJI:
 	case CHIP_CARRIZO:
-- 
cgit v1.2.3


From 16e9b3e58bc3fce7391539e0eb3fd167cbf9951f Mon Sep 17 00:00:00 2001
From: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Date: Tue, 13 Apr 2021 20:06:04 -0400
Subject: drm/amd/display: Fix two cursor duplication when using overlay

Our driver supports overlay planes, and as expected, some userspace
compositor takes advantage of these features. If the userspace is not
enabling the cursor, they can use multiple planes as they please.
Nevertheless, we start to have constraints when userspace tries to
enable hardware cursor with various planes. Basically, we cannot draw
the cursor at the same size and position on two separated pipes since it
uses extra bandwidth and DML only run with one cursor.

For those reasons, when we enable hardware cursor and multiple planes,
our driver should accept variations like the ones described below:

  +-------------+   +--------------+
  | +---------+ |   |              |
  | |Primary  | |   | Primary      |
  | |         | |   | Overlay      |
  | +---------+ |   |              |
  |Overlay      |   |              |
  +-------------+   +--------------+

In this scenario, we can have the desktop UI in the overlay and some
other framebuffer attached to the primary plane (e.g., video). However,
userspace needs to obey some rules and avoid scenarios like the ones
described below (when enabling hw cursor):

                                      +--------+
                                      |Overlay |
 +-------------+    +-----+-------+ +-|        |--+
 | +--------+  | +--------+       | | +--------+  |
 | |Overlay |  | |Overlay |       | |             |
 | |        |  | |        |       | |             |
 | +--------+  | +--------+       | |             |
 | Primary     |    | Primary     | | Primary     |
 +-------------+    +-------------+ +-------------+

 +-------------+   +-------------+
 |     +--------+  |  Primary    |
 |     |Overlay |  |             |
 |     |        |  |             |
 |     +--------+  | +--------+  |
 | Primary     |   | |Overlay |  |
 +-------------+   +-|        |--+
                     +--------+

If the userspace violates some of the above scenarios, our driver needs
to reject the commit; otherwise, we can have unexpected behavior. Since
we don't have a proper driver validation for the above case, we can see
some problems like a duplicate cursor in applications that use multiple
planes. This commit fixes the cursor issue and others by adding adequate
verification for multiple planes.

Change since V1 (Harry and Sean):
- Remove cursor verification from the equation.

Cc: Louis Li <Ching-shih.Li@amd.com>
Cc: Nicholas Kazlauskas <Nicholas.Kazlauskas@amd.com>
Cc: Harry Wentland <Harry.Wentland@amd.com>
Cc: Hersen Wu <hersenxs.wu@amd.com>
Cc: Sean Paul <seanpaul@chromium.org>
Signed-off-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 51 +++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index deec7e0466da..1b71e0ca1a89 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -9870,6 +9870,53 @@ static int add_affected_mst_dsc_crtcs(struct drm_atomic_state *state, struct drm
 }
 #endif
 
+static int validate_overlay(struct drm_atomic_state *state)
+{
+	int i;
+	struct drm_plane *plane;
+	struct drm_plane_state *old_plane_state, *new_plane_state;
+	struct drm_plane_state *primary_state, *overlay_state = NULL;
+
+	/* Check if primary plane is contained inside overlay */
+	for_each_oldnew_plane_in_state_reverse(state, plane, old_plane_state, new_plane_state, i) {
+		if (plane->type == DRM_PLANE_TYPE_OVERLAY) {
+			if (drm_atomic_plane_disabling(plane->state, new_plane_state))
+				return 0;
+
+			overlay_state = new_plane_state;
+			continue;
+		}
+	}
+
+	/* check if we're making changes to the overlay plane */
+	if (!overlay_state)
+		return 0;
+
+	/* check if overlay plane is enabled */
+	if (!overlay_state->crtc)
+		return 0;
+
+	/* find the primary plane for the CRTC that the overlay is enabled on */
+	primary_state = drm_atomic_get_plane_state(state, overlay_state->crtc->primary);
+	if (IS_ERR(primary_state))
+		return PTR_ERR(primary_state);
+
+	/* check if primary plane is enabled */
+	if (!primary_state->crtc)
+		return 0;
+
+	/* Perform the bounds check to ensure the overlay plane covers the primary */
+	if (primary_state->crtc_x < overlay_state->crtc_x ||
+	    primary_state->crtc_y < overlay_state->crtc_y ||
+	    primary_state->crtc_x + primary_state->crtc_w > overlay_state->crtc_x + overlay_state->crtc_w ||
+	    primary_state->crtc_y + primary_state->crtc_h > overlay_state->crtc_y + overlay_state->crtc_h) {
+		DRM_DEBUG_ATOMIC("Overlay plane is enabled with hardware cursor but does not fully cover primary plane\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 /**
  * amdgpu_dm_atomic_check() - Atomic check implementation for AMDgpu DM.
  * @dev: The DRM device
@@ -10044,6 +10091,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
 			goto fail;
 	}
 
+	ret = validate_overlay(state);
+	if (ret)
+		goto fail;
+
 	/* Add new/modified planes */
 	for_each_oldnew_plane_in_state_reverse(state, plane, old_plane_state, new_plane_state, i) {
 		ret = dm_update_plane_state(dc, state, plane,
-- 
cgit v1.2.3


From 5bbf219328849e83878bddb7c226d8d42e84affc Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sun, 2 May 2021 22:06:07 -0700
Subject: drm/radeon: Fix off-by-one power_state index heap overwrite

An out of bounds write happens when setting the default power state.
KASAN sees this as:

[drm] radeon: 512M of GTT memory ready.
[drm] GART: num cpu pages 131072, num gpu pages 131072
==================================================================
BUG: KASAN: slab-out-of-bounds in
radeon_atombios_parse_power_table_1_3+0x1837/0x1998 [radeon]
Write of size 4 at addr ffff88810178d858 by task systemd-udevd/157

CPU: 0 PID: 157 Comm: systemd-udevd Not tainted 5.12.0-E620 #50
Hardware name: eMachines        eMachines E620  /Nile       , BIOS V1.03 09/30/2008
Call Trace:
 dump_stack+0xa5/0xe6
 print_address_description.constprop.0+0x18/0x239
 kasan_report+0x170/0x1a8
 radeon_atombios_parse_power_table_1_3+0x1837/0x1998 [radeon]
 radeon_atombios_get_power_modes+0x144/0x1888 [radeon]
 radeon_pm_init+0x1019/0x1904 [radeon]
 rs690_init+0x76e/0x84a [radeon]
 radeon_device_init+0x1c1a/0x21e5 [radeon]
 radeon_driver_load_kms+0xf5/0x30b [radeon]
 drm_dev_register+0x255/0x4a0 [drm]
 radeon_pci_probe+0x246/0x2f6 [radeon]
 pci_device_probe+0x1aa/0x294
 really_probe+0x30e/0x850
 driver_probe_device+0xe6/0x135
 device_driver_attach+0xc1/0xf8
 __driver_attach+0x13f/0x146
 bus_for_each_dev+0xfa/0x146
 bus_add_driver+0x2b3/0x447
 driver_register+0x242/0x2c1
 do_one_initcall+0x149/0x2fd
 do_init_module+0x1ae/0x573
 load_module+0x4dee/0x5cca
 __do_sys_finit_module+0xf1/0x140
 do_syscall_64+0x33/0x40
 entry_SYSCALL_64_after_hwframe+0x44/0xae

Without KASAN, this will manifest later when the kernel attempts to
allocate memory that was stomped, since it collides with the inline slab
freelist pointer:

invalid opcode: 0000 [#1] SMP NOPTI
CPU: 0 PID: 781 Comm: openrc-run.sh Tainted: G        W 5.10.12-gentoo-E620 #2
Hardware name: eMachines        eMachines E620  /Nile , BIOS V1.03       09/30/2008
RIP: 0010:kfree+0x115/0x230
Code: 89 c5 e8 75 ea ff ff 48 8b 00 0f ba e0 09 72 63 e8 1f f4 ff ff 41 89 c4 48 8b 45 00 0f ba e0 10 72 0a 48 8b 45 08 a8 01 75 02 <0f> 0b 44 89 e1 48 c7 c2 00 f0 ff ff be 06 00 00 00 48 d3 e2 48 c7
RSP: 0018:ffffb42f40267e10 EFLAGS: 00010246
RAX: ffffd61280ee8d88 RBX: 0000000000000004 RCX: 000000008010000d
RDX: 4000000000000000 RSI: ffffffffba1360b0 RDI: ffffd61280ee8d80
RBP: ffffd61280ee8d80 R08: ffffffffb91bebdf R09: 0000000000000000
R10: ffff8fe2c1047ac8 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000100
FS:  00007fe80eff6b68(0000) GS:ffff8fe339c00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fe80eec7bc0 CR3: 0000000038012000 CR4: 00000000000006f0
Call Trace:
 __free_fdtable+0x16/0x1f
 put_files_struct+0x81/0x9b
 do_exit+0x433/0x94d
 do_group_exit+0xa6/0xa6
 __x64_sys_exit_group+0xf/0xf
 do_syscall_64+0x33/0x40
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x7fe80ef64bea
Code: Unable to access opcode bytes at RIP 0x7fe80ef64bc0.
RSP: 002b:00007ffdb1c47528 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7
RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007fe80ef64bea
RDX: 00007fe80ef64f60 RSI: 0000000000000000 RDI: 0000000000000000
RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000
R10: 00007fe80ee2c620 R11: 0000000000000246 R12: 00007fe80eff41e0
R13: 00000000ffffffff R14: 0000000000000024 R15: 00007fe80edf9cd0
Modules linked in: radeon(+) ath5k(+) snd_hda_codec_realtek ...

Use a valid power_state index when initializing the "flags" and "misc"
and "misc2" fields.

Bug: https://bugzilla.kernel.org/show_bug.cgi?id=211537
Reported-by: Erhard F. <erhard_f@mailbox.org>
Fixes: a48b9b4edb8b ("drm/radeon/kms/pm: add asic specific callbacks for getting power state (v2)")
Fixes: 79daedc94281 ("drm/radeon/kms: minor pm cleanups")
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon_atombios.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index 42301b4e56f5..f9f4efa1738c 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -2250,10 +2250,10 @@ static int radeon_atombios_parse_power_table_1_3(struct radeon_device *rdev)
 		rdev->pm.default_power_state_index = state_index - 1;
 		rdev->pm.power_state[state_index - 1].default_clock_mode =
 			&rdev->pm.power_state[state_index - 1].clock_info[0];
-		rdev->pm.power_state[state_index].flags &=
+		rdev->pm.power_state[state_index - 1].flags &=
 			~RADEON_PM_STATE_SINGLE_DISPLAY_ONLY;
-		rdev->pm.power_state[state_index].misc = 0;
-		rdev->pm.power_state[state_index].misc2 = 0;
+		rdev->pm.power_state[state_index - 1].misc = 0;
+		rdev->pm.power_state[state_index - 1].misc2 = 0;
 	}
 	return state_index;
 }
-- 
cgit v1.2.3


From c69f27137a38d24301a6b659454a91ad85dff4aa Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sun, 2 May 2021 22:06:08 -0700
Subject: drm/radeon: Avoid power table parsing memory leaks

Avoid leaving a hanging pre-allocated clock_info if last mode is
invalid, and avoid heap corruption if no valid modes are found.

Bug: https://bugzilla.kernel.org/show_bug.cgi?id=211537
Fixes: 6991b8f2a319 ("drm/radeon/kms: fix segfault in pm rework")
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon_atombios.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index f9f4efa1738c..28c4413f4dc8 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -2120,11 +2120,14 @@ static int radeon_atombios_parse_power_table_1_3(struct radeon_device *rdev)
 		return state_index;
 	/* last mode is usually default, array is low to high */
 	for (i = 0; i < num_modes; i++) {
-		rdev->pm.power_state[state_index].clock_info =
-			kcalloc(1, sizeof(struct radeon_pm_clock_info),
-				GFP_KERNEL);
+		/* avoid memory leaks from invalid modes or unknown frev. */
+		if (!rdev->pm.power_state[state_index].clock_info) {
+			rdev->pm.power_state[state_index].clock_info =
+				kzalloc(sizeof(struct radeon_pm_clock_info),
+					GFP_KERNEL);
+		}
 		if (!rdev->pm.power_state[state_index].clock_info)
-			return state_index;
+			goto out;
 		rdev->pm.power_state[state_index].num_clock_modes = 1;
 		rdev->pm.power_state[state_index].clock_info[0].voltage.type = VOLTAGE_NONE;
 		switch (frev) {
@@ -2243,8 +2246,15 @@ static int radeon_atombios_parse_power_table_1_3(struct radeon_device *rdev)
 			break;
 		}
 	}
+out:
+	/* free any unused clock_info allocation. */
+	if (state_index && state_index < num_modes) {
+		kfree(rdev->pm.power_state[state_index].clock_info);
+		rdev->pm.power_state[state_index].clock_info = NULL;
+	}
+
 	/* last mode is usually default */
-	if (rdev->pm.default_power_state_index == -1) {
+	if (state_index && rdev->pm.default_power_state_index == -1) {
 		rdev->pm.power_state[state_index - 1].type =
 			POWER_STATE_TYPE_DEFAULT;
 		rdev->pm.default_power_state_index = state_index - 1;
-- 
cgit v1.2.3


From 8651fcb9873be097bb6fe8542bfb6089020726ae Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Fri, 30 Apr 2021 10:16:54 -0700
Subject: drm/amd/pm: initialize variable

Static analysis reports this problem

amdgpu_pm.c:478:16: warning: The right operand of '<' is a garbage value
  for (i = 0; i < data.nums; i++) {
                ^ ~~~~~~~~~

In some cases data is not set.  Initialize to 0 and flag not setting
data as an error with the existing check.

Signed-off-by: Tom Rix <trix@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index a8d6cc2525b2..e2ea42b1b4f0 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -451,7 +451,7 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = drm_to_adev(ddev);
 	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
-	struct pp_states_info data;
+	struct pp_states_info data = {0};
 	enum amd_pm_state_type pm = 0;
 	int i = 0, ret = 0;
 
-- 
cgit v1.2.3


From 025768a966a3dde8455de46d1f121a51bacb6a77 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 4 May 2021 14:07:53 -0700
Subject: x86/cpu: Use alternative to generate the TASK_SIZE_MAX constant

We used to generate this constant with static jumps, which certainly
works, but generates some quite unreadable and horrid code, and extra
jumps.

It's actually much simpler to just use our alternative_asm()
infrastructure to generate a simple alternative constant, making the
generated code much more obvious (and straight-line rather than "jump
around to load the right constant").

Acked-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
---
 arch/x86/include/asm/page_64.h       | 33 +++++++++++++++++++++++++++++++++
 arch/x86/include/asm/page_64_types.h | 23 +++--------------------
 2 files changed, 36 insertions(+), 20 deletions(-)

diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 939b1cff4a7b..ca840fec7776 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -56,6 +56,39 @@ static inline void clear_page(void *page)
 
 void copy_page(void *to, void *from);
 
+#ifdef CONFIG_X86_5LEVEL
+/*
+ * User space process size.  This is the first address outside the user range.
+ * There are a few constraints that determine this:
+ *
+ * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
+ * address, then that syscall will enter the kernel with a
+ * non-canonical return address, and SYSRET will explode dangerously.
+ * We avoid this particular problem by preventing anything
+ * from being mapped at the maximum canonical address.
+ *
+ * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
+ * CPUs malfunction if they execute code from the highest canonical page.
+ * They'll speculate right off the end of the canonical space, and
+ * bad things happen.  This is worked around in the same way as the
+ * Intel problem.
+ *
+ * With page table isolation enabled, we map the LDT in ... [stay tuned]
+ */
+static inline unsigned long task_size_max(void)
+{
+	unsigned long ret;
+
+	alternative_io("movq %[small],%0","movq %[large],%0",
+			X86_FEATURE_LA57,
+			"=r" (ret),
+			[small] "i" ((1ul << 47)-PAGE_SIZE),
+			[large] "i" ((1ul << 56)-PAGE_SIZE));
+
+	return ret;
+}
+#endif	/* CONFIG_X86_5LEVEL */
+
 #endif	/* !__ASSEMBLY__ */
 
 #ifdef CONFIG_X86_VSYSCALL_EMULATION
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 64297eabad63..a8d4ad856568 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -55,30 +55,13 @@
 
 #ifdef CONFIG_X86_5LEVEL
 #define __VIRTUAL_MASK_SHIFT	(pgtable_l5_enabled() ? 56 : 47)
+/* See task_size_max() in <asm/page_64.h> */
 #else
 #define __VIRTUAL_MASK_SHIFT	47
+#define task_size_max()		((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
 #endif
 
-/*
- * User space process size.  This is the first address outside the user range.
- * There are a few constraints that determine this:
- *
- * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
- * address, then that syscall will enter the kernel with a
- * non-canonical return address, and SYSRET will explode dangerously.
- * We avoid this particular problem by preventing anything
- * from being mapped at the maximum canonical address.
- *
- * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
- * CPUs malfunction if they execute code from the highest canonical page.
- * They'll speculate right off the end of the canonical space, and
- * bad things happen.  This is worked around in the same way as the
- * Intel problem.
- *
- * With page table isolation enabled, we map the LDT in ... [stay tuned]
- */
-#define TASK_SIZE_MAX	((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
-
+#define TASK_SIZE_MAX		task_size_max()
 #define DEFAULT_MAP_WINDOW	((1UL << 47) - PAGE_SIZE)
 
 /* This decides where the kernel will search for a free chunk of vm
-- 
cgit v1.2.3


From 0a269a008f837e76ce285679ab3005059fadc2a6 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 14 Apr 2021 14:35:40 +0200
Subject: x86/kvm: Fix pr_info() for async PF setup/teardown

'pr_fmt' already has 'kvm-guest: ' so 'KVM' prefix is redundant.
"Unregister pv shared memory" is very ambiguous, it's hard to
say which particular PV feature it relates to.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210414123544.1060604-2-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kernel/kvm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index d307c22e5c18..dc440bb69222 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -345,7 +345,7 @@ static void kvm_guest_cpu_init(void)
 
 		wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
 		__this_cpu_write(apf_reason.enabled, 1);
-		pr_info("KVM setup async PF for cpu %d\n", smp_processor_id());
+		pr_info("setup async PF for cpu %d\n", smp_processor_id());
 	}
 
 	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
@@ -371,7 +371,7 @@ static void kvm_pv_disable_apf(void)
 	wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
 	__this_cpu_write(apf_reason.enabled, 0);
 
-	pr_info("Unregister pv shared memory for cpu %d\n", smp_processor_id());
+	pr_info("disable async PF for cpu %d\n", smp_processor_id());
 }
 
 static void kvm_pv_guest_cpu_reboot(void *unused)
-- 
cgit v1.2.3


From 9f015b3765bf593b3ed5d3b588e409dc0ffa9f85 Mon Sep 17 00:00:00 2001
From: Rijo Thomas <Rijo-john.Thomas@amd.com>
Date: Wed, 14 Apr 2021 23:08:27 +0530
Subject: tee: amdtee: unload TA only when its refcount becomes 0

Same Trusted Application (TA) can be loaded in multiple TEE contexts.

If it is a single instance TA, the TA should not get unloaded from AMD
Secure Processor, while it is still in use in another TEE context.

Therefore reference count TA and unload it when the count becomes zero.

Fixes: 757cc3e9ff1d ("tee: add AMD-TEE driver")
Reviewed-by: Devaraj Rangasamy <Devaraj.Rangasamy@amd.com>
Signed-off-by: Rijo Thomas <Rijo-john.Thomas@amd.com>
Acked-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 drivers/tee/amdtee/amdtee_private.h | 13 +++++
 drivers/tee/amdtee/call.c           | 94 +++++++++++++++++++++++++++++++++----
 drivers/tee/amdtee/core.c           | 15 +++---
 3 files changed, 106 insertions(+), 16 deletions(-)

diff --git a/drivers/tee/amdtee/amdtee_private.h b/drivers/tee/amdtee/amdtee_private.h
index 337c8d82f74e..6d0f7062bb87 100644
--- a/drivers/tee/amdtee/amdtee_private.h
+++ b/drivers/tee/amdtee/amdtee_private.h
@@ -21,6 +21,7 @@
 #define TEEC_SUCCESS			0x00000000
 #define TEEC_ERROR_GENERIC		0xFFFF0000
 #define TEEC_ERROR_BAD_PARAMETERS	0xFFFF0006
+#define TEEC_ERROR_OUT_OF_MEMORY	0xFFFF000C
 #define TEEC_ERROR_COMMUNICATION	0xFFFF000E
 
 #define TEEC_ORIGIN_COMMS		0x00000002
@@ -93,6 +94,18 @@ struct amdtee_shm_data {
 	u32     buf_id;
 };
 
+/**
+ * struct amdtee_ta_data - Keeps track of all TAs loaded in AMD Secure
+ *			   Processor
+ * @ta_handle:	Handle to TA loaded in TEE
+ * @refcount:	Reference count for the loaded TA
+ */
+struct amdtee_ta_data {
+	struct list_head list_node;
+	u32 ta_handle;
+	u32 refcount;
+};
+
 #define LOWER_TWO_BYTE_MASK	0x0000FFFF
 
 /**
diff --git a/drivers/tee/amdtee/call.c b/drivers/tee/amdtee/call.c
index 096dd4d92d39..07f36ac834c8 100644
--- a/drivers/tee/amdtee/call.c
+++ b/drivers/tee/amdtee/call.c
@@ -121,15 +121,69 @@ static int amd_params_to_tee_params(struct tee_param *tee, u32 count,
 	return ret;
 }
 
+static DEFINE_MUTEX(ta_refcount_mutex);
+static struct list_head ta_list = LIST_HEAD_INIT(ta_list);
+
+static u32 get_ta_refcount(u32 ta_handle)
+{
+	struct amdtee_ta_data *ta_data;
+	u32 count = 0;
+
+	/* Caller must hold a mutex */
+	list_for_each_entry(ta_data, &ta_list, list_node)
+		if (ta_data->ta_handle == ta_handle)
+			return ++ta_data->refcount;
+
+	ta_data = kzalloc(sizeof(*ta_data), GFP_KERNEL);
+	if (ta_data) {
+		ta_data->ta_handle = ta_handle;
+		ta_data->refcount = 1;
+		count = ta_data->refcount;
+		list_add(&ta_data->list_node, &ta_list);
+	}
+
+	return count;
+}
+
+static u32 put_ta_refcount(u32 ta_handle)
+{
+	struct amdtee_ta_data *ta_data;
+	u32 count = 0;
+
+	/* Caller must hold a mutex */
+	list_for_each_entry(ta_data, &ta_list, list_node)
+		if (ta_data->ta_handle == ta_handle) {
+			count = --ta_data->refcount;
+			if (count == 0) {
+				list_del(&ta_data->list_node);
+				kfree(ta_data);
+				break;
+			}
+		}
+
+	return count;
+}
+
 int handle_unload_ta(u32 ta_handle)
 {
 	struct tee_cmd_unload_ta cmd = {0};
-	u32 status;
+	u32 status, count;
 	int ret;
 
 	if (!ta_handle)
 		return -EINVAL;
 
+	mutex_lock(&ta_refcount_mutex);
+
+	count = put_ta_refcount(ta_handle);
+
+	if (count) {
+		pr_debug("unload ta: not unloading %u count %u\n",
+			 ta_handle, count);
+		ret = -EBUSY;
+		goto unlock;
+	}
+
 	cmd.ta_handle = ta_handle;
 
 	ret = psp_tee_process_cmd(TEE_CMD_ID_UNLOAD_TA, (void *)&cmd,
@@ -137,8 +191,12 @@ int handle_unload_ta(u32 ta_handle)
 	if (!ret && status != 0) {
 		pr_err("unload ta: status = 0x%x\n", status);
 		ret = -EBUSY;
+	} else {
+		pr_debug("unloaded ta handle %u\n", ta_handle);
 	}
 
+unlock:
+	mutex_unlock(&ta_refcount_mutex);
 	return ret;
 }
 
@@ -340,7 +398,8 @@ int handle_open_session(struct tee_ioctl_open_session_arg *arg, u32 *info,
 
 int handle_load_ta(void *data, u32 size, struct tee_ioctl_open_session_arg *arg)
 {
-	struct tee_cmd_load_ta cmd = {0};
+	struct tee_cmd_unload_ta unload_cmd = {};
+	struct tee_cmd_load_ta load_cmd = {};
 	phys_addr_t blob;
 	int ret;
 
@@ -353,21 +412,36 @@ int handle_load_ta(void *data, u32 size, struct tee_ioctl_open_session_arg *arg)
 		return -EINVAL;
 	}
 
-	cmd.hi_addr = upper_32_bits(blob);
-	cmd.low_addr = lower_32_bits(blob);
-	cmd.size = size;
+	load_cmd.hi_addr = upper_32_bits(blob);
+	load_cmd.low_addr = lower_32_bits(blob);
+	load_cmd.size = size;
 
-	ret = psp_tee_process_cmd(TEE_CMD_ID_LOAD_TA, (void *)&cmd,
-				  sizeof(cmd), &arg->ret);
+	mutex_lock(&ta_refcount_mutex);
+
+	ret = psp_tee_process_cmd(TEE_CMD_ID_LOAD_TA, (void *)&load_cmd,
+				  sizeof(load_cmd), &arg->ret);
 	if (ret) {
 		arg->ret_origin = TEEC_ORIGIN_COMMS;
 		arg->ret = TEEC_ERROR_COMMUNICATION;
-	} else {
-		set_session_id(cmd.ta_handle, 0, &arg->session);
+	} else if (arg->ret == TEEC_SUCCESS) {
+		ret = get_ta_refcount(load_cmd.ta_handle);
+		if (!ret) {
+			arg->ret_origin = TEEC_ORIGIN_COMMS;
+			arg->ret = TEEC_ERROR_OUT_OF_MEMORY;
+
+			/* Unload the TA on error */
+			unload_cmd.ta_handle = load_cmd.ta_handle;
+			psp_tee_process_cmd(TEE_CMD_ID_UNLOAD_TA,
+					    (void *)&unload_cmd,
+					    sizeof(unload_cmd), &ret);
+		} else {
+			set_session_id(load_cmd.ta_handle, 0, &arg->session);
+		}
 	}
+	mutex_unlock(&ta_refcount_mutex);
 
 	pr_debug("load TA: TA handle = 0x%x, RO = 0x%x, ret = 0x%x\n",
-		 cmd.ta_handle, arg->ret_origin, arg->ret);
+		 load_cmd.ta_handle, arg->ret_origin, arg->ret);
 
 	return 0;
 }
diff --git a/drivers/tee/amdtee/core.c b/drivers/tee/amdtee/core.c
index 8a6a8f30bb42..da6b88e80dc0 100644
--- a/drivers/tee/amdtee/core.c
+++ b/drivers/tee/amdtee/core.c
@@ -59,10 +59,9 @@ static void release_session(struct amdtee_session *sess)
 			continue;
 
 		handle_close_session(sess->ta_handle, sess->session_info[i]);
+		handle_unload_ta(sess->ta_handle);
 	}
 
-	/* Unload Trusted Application once all sessions are closed */
-	handle_unload_ta(sess->ta_handle);
 	kfree(sess);
 }
 
@@ -224,8 +223,6 @@ static void destroy_session(struct kref *ref)
 	struct amdtee_session *sess = container_of(ref, struct amdtee_session,
 						   refcount);
 
-	/* Unload the TA from TEE */
-	handle_unload_ta(sess->ta_handle);
 	mutex_lock(&session_list_mutex);
 	list_del(&sess->list_node);
 	mutex_unlock(&session_list_mutex);
@@ -238,7 +235,7 @@ int amdtee_open_session(struct tee_context *ctx,
 {
 	struct amdtee_context_data *ctxdata = ctx->data;
 	struct amdtee_session *sess = NULL;
-	u32 session_info;
+	u32 session_info, ta_handle;
 	size_t ta_size;
 	int rc, i;
 	void *ta;
@@ -259,11 +256,14 @@ int amdtee_open_session(struct tee_context *ctx,
 	if (arg->ret != TEEC_SUCCESS)
 		goto out;
 
+	ta_handle = get_ta_handle(arg->session);
+
 	mutex_lock(&session_list_mutex);
 	sess = alloc_session(ctxdata, arg->session);
 	mutex_unlock(&session_list_mutex);
 
 	if (!sess) {
+		handle_unload_ta(ta_handle);
 		rc = -ENOMEM;
 		goto out;
 	}
@@ -277,6 +277,7 @@ int amdtee_open_session(struct tee_context *ctx,
 
 	if (i >= TEE_NUM_SESSIONS) {
 		pr_err("reached maximum session count %d\n", TEE_NUM_SESSIONS);
+		handle_unload_ta(ta_handle);
 		kref_put(&sess->refcount, destroy_session);
 		rc = -ENOMEM;
 		goto out;
@@ -289,12 +290,13 @@ int amdtee_open_session(struct tee_context *ctx,
 		spin_lock(&sess->lock);
 		clear_bit(i, sess->sess_mask);
 		spin_unlock(&sess->lock);
+		handle_unload_ta(ta_handle);
 		kref_put(&sess->refcount, destroy_session);
 		goto out;
 	}
 
 	sess->session_info[i] = session_info;
-	set_session_id(sess->ta_handle, i, &arg->session);
+	set_session_id(ta_handle, i, &arg->session);
 out:
 	free_pages((u64)ta, get_order(ta_size));
 	return rc;
@@ -329,6 +331,7 @@ int amdtee_close_session(struct tee_context *ctx, u32 session)
 
 	/* Close the session */
 	handle_close_session(ta_handle, session_info);
+	handle_unload_ta(ta_handle);
 
 	kref_put(&sess->refcount, destroy_session);
 
-- 
cgit v1.2.3


From 9683e5775c75097c46bd24e65411b16ac6c6cbb3 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Tue, 4 May 2021 16:49:10 -0700
Subject: libbpf: Add NULL check to add_dummy_ksym_var

Avoids a segv if btf isn't present. Seen on the call path
__bpf_object__open calling bpf_object__collect_externs.

Fixes: 5bd022ec01f0 (libbpf: Support extern kernel function)
Suggested-by: Stanislav Fomichev <sdf@google.com>
Suggested-by: Petar Penkov <ppenkov@google.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210504234910.976501-1-irogers@google.com
---
 tools/lib/bpf/libbpf.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e2a3cf437814..c41d9b2b59ac 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -3216,6 +3216,9 @@ static int add_dummy_ksym_var(struct btf *btf)
 	const struct btf_var_secinfo *vs;
 	const struct btf_type *sec;
 
+	if (!btf)
+		return 0;
+
 	sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC,
 					    BTF_KIND_DATASEC);
 	if (sec_btf_id < 0)
-- 
cgit v1.2.3


From 3cf4524ce40b204418537e6a3a55ed44911b3f53 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Tue, 27 Apr 2021 14:38:26 +0800
Subject: x86/smpboot: Remove duplicate includes

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210427063835.9039-1-wanjiabing@vivo.com
---
 arch/x86/kernel/smpboot.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7ffb0cf3f997..0ad5214f598a 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1865,9 +1865,6 @@ static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
 	return true;
 }
 
-#include <asm/cpu_device_id.h>
-#include <asm/intel-family.h>
-
 #define X86_MATCH(model)					\
 	X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6,		\
 		INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL)
-- 
cgit v1.2.3


From 790d1ce71de9199bf9fd37c4743aec4a09489a51 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 22 Apr 2021 21:58:40 +0300
Subject: x86: Delete UD0, UD1 traces

Both instructions aren't used by kernel.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/YIHHYNKbiSf5N7+o@localhost.localdomain
---
 arch/x86/include/asm/bug.h | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 297fa12e7e27..84b87538a15d 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -7,18 +7,9 @@
 
 /*
  * Despite that some emulators terminate on UD2, we use it for WARN().
- *
- * Since various instruction decoders/specs disagree on the encoding of
- * UD0/UD1.
  */
-
-#define ASM_UD0		".byte 0x0f, 0xff" /* + ModRM (for Intel) */
-#define ASM_UD1		".byte 0x0f, 0xb9" /* + ModRM */
 #define ASM_UD2		".byte 0x0f, 0x0b"
-
-#define INSN_UD0	0xff0f
 #define INSN_UD2	0x0b0f
-
 #define LEN_UD2		2
 
 #ifdef CONFIG_GENERIC_BUG
-- 
cgit v1.2.3


From 4029b9706d53e5e8db2e1cee6ecd75e60b62cd09 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Sun, 25 Apr 2021 14:12:29 -0700
Subject: x86/resctrl: Fix init const confusion

const variable must be initconst, not initdata.

Signed-off-by: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210425211229.3157674-1-ak@linux.intel.com
---
 arch/x86/kernel/cpu/resctrl/monitor.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index dbeaa8409313..f07c10b87a87 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -84,7 +84,7 @@ unsigned int resctrl_cqm_threshold;
 static const struct mbm_correction_factor_table {
 	u32 rmidthreshold;
 	u64 cf;
-} mbm_cf_table[] __initdata = {
+} mbm_cf_table[] __initconst = {
 	{7,	CF(1.000000)},
 	{15,	CF(1.000000)},
 	{15,	CF(0.969650)},
-- 
cgit v1.2.3


From b6b4fbd90b155a0025223df2c137af8a701d53b3 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 15:56:31 -0700
Subject: x86/cpu: Initialize MSR_TSC_AUX if RDTSCP *or* RDPID is supported

Initialize MSR_TSC_AUX with CPU node information if RDTSCP or RDPID is
supported.  This fixes a bug where vdso_read_cpunode() will read garbage
via RDPID if RDPID is supported but RDTSCP is not.  While no known CPU
supports RDPID but not RDTSCP, both Intel's SDM and AMD's APM allow for
RDPID to exist without RDTSCP, e.g. it's technically a legal CPU model
for a virtual machine.

Note, technically MSR_TSC_AUX could be initialized if and only if RDPID
is supported since RDTSCP is currently not used to retrieve the CPU node.
But, the cost of the superfluous WRMSR is negigible, whereas leaving
MSR_TSC_AUX uninitialized is just asking for future breakage if someone
decides to utilize RDTSCP.

Fixes: a582c540ac1b ("x86/vdso: Use RDPID in preference to LSL when available")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210504225632.1532621-2-seanjc@google.com
---
 arch/x86/kernel/cpu/common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 6bdb69a9a7dc..490bed07fe35 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1851,7 +1851,7 @@ static inline void setup_getcpu(int cpu)
 	unsigned long cpudata = vdso_encode_cpunode(cpu, early_cpu_to_node(cpu));
 	struct desc_struct d = { };
 
-	if (boot_cpu_has(X86_FEATURE_RDTSCP))
+	if (boot_cpu_has(X86_FEATURE_RDTSCP) || boot_cpu_has(X86_FEATURE_RDPID))
 		write_rdtscp_aux(cpudata);
 
 	/* Store CPU and node number in limit. */
-- 
cgit v1.2.3


From fc48a6d1faadbf08b7a840d58a5a6eb85bd1a79a Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 15:56:32 -0700
Subject: x86/cpu: Remove write_tsc() and write_rdtscp_aux() wrappers

Drop write_tsc() and write_rdtscp_aux(); the former has no users, and the
latter has only a single user and is slightly misleading since the only
in-kernel consumer of MSR_TSC_AUX is RDPID, not RDTSCP.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210504225632.1532621-3-seanjc@google.com
---
 arch/x86/include/asm/msr.h   | 4 ----
 arch/x86/kernel/cpu/common.c | 2 +-
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index e16cccdd0420..a3f87f1015d3 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -324,10 +324,6 @@ static inline int wrmsrl_safe(u32 msr, u64 val)
 	return wrmsr_safe(msr, (u32)val,  (u32)(val >> 32));
 }
 
-#define write_tsc(low, high) wrmsr(MSR_IA32_TSC, (low), (high))
-
-#define write_rdtscp_aux(val) wrmsr(MSR_TSC_AUX, (val), 0)
-
 struct msr *msrs_alloc(void);
 void msrs_free(struct msr *msrs);
 int msr_set_bit(u32 msr, u8 bit);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 490bed07fe35..a1b756c49a93 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1852,7 +1852,7 @@ static inline void setup_getcpu(int cpu)
 	struct desc_struct d = { };
 
 	if (boot_cpu_has(X86_FEATURE_RDTSCP) || boot_cpu_has(X86_FEATURE_RDPID))
-		write_rdtscp_aux(cpudata);
+		wrmsr(MSR_TSC_AUX, cpudata, 0);
 
 	/* Store CPU and node number in limit. */
 	d.limit0 = cpudata;
-- 
cgit v1.2.3


From a217a6593cec8b315d4c2f344bae33660b39b703 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@linux.alibaba.com>
Date: Tue, 4 May 2021 21:50:14 +0200
Subject: KVM/VMX: Invoke NMI non-IST entry instead of IST entry

In VMX, the host NMI handler needs to be invoked after NMI VM-Exit.
Before commit 1a5488ef0dcf6 ("KVM: VMX: Invoke NMI handler via indirect
call instead of INTn"), this was done by INTn ("int $2"). But INTn
microcode is relatively expensive, so the commit reworked NMI VM-Exit
handling to invoke the kernel handler by function call.

But this missed a detail. The NMI entry point for direct invocation is
fetched from the IDT table and called on the kernel stack.  But on 64-bit
the NMI entry installed in the IDT expects to be invoked on the IST stack.
It relies on the "NMI executing" variable on the IST stack to work
correctly, which is at a fixed position in the IST stack.  When the entry
point is unexpectedly called on the kernel stack, the RSP-addressed "NMI
executing" variable is obviously also on the kernel stack and is
"uninitialized" and can cause the NMI entry code to run in the wrong way.

Provide a non-ist entry point for VMX which shares the C-function with
the regular NMI entry and invoke the new asm entry point instead.

On 32-bit this just maps to the regular NMI entry point as 32-bit has no
ISTs and is not affected.

[ tglx: Made it independent for backporting, massaged changelog ]

Fixes: 1a5488ef0dcf6 ("KVM: VMX: Invoke NMI handler via indirect call instead of INTn")
Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Lai Jiangshan <laijs@linux.alibaba.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/87r1imi8i1.ffs@nanos.tec.linutronix.de
---
 arch/x86/include/asm/idtentry.h | 15 +++++++++++++++
 arch/x86/kernel/nmi.c           | 10 ++++++++++
 arch/x86/kvm/vmx/vmx.c          | 16 +++++++++-------
 3 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index e35e342673c7..73d45b0dfff2 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -588,6 +588,21 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_MC,	xenpv_exc_machine_check);
 #endif
 
 /* NMI */
+
+#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)
+/*
+ * Special NOIST entry point for VMX which invokes this on the kernel
+ * stack. asm_exc_nmi() requires an IST to work correctly vs. the NMI
+ * 'executing' marker.
+ *
+ * On 32bit this just uses the regular NMI entry point because 32-bit does
+ * not have ISTs.
+ */
+DECLARE_IDTENTRY(X86_TRAP_NMI,		exc_nmi_noist);
+#else
+#define asm_exc_nmi_noist		asm_exc_nmi
+#endif
+
 DECLARE_IDTENTRY_NMI(X86_TRAP_NMI,	exc_nmi);
 #ifdef CONFIG_XEN_PV
 DECLARE_IDTENTRY_RAW(X86_TRAP_NMI,	xenpv_exc_nmi);
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index bf250a339655..2ef961cf4cfc 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -524,6 +524,16 @@ nmi_restart:
 		mds_user_clear_cpu_buffers();
 }
 
+#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)
+DEFINE_IDTENTRY_RAW(exc_nmi_noist)
+{
+	exc_nmi(regs);
+}
+#endif
+#if IS_MODULE(CONFIG_KVM_INTEL)
+EXPORT_SYMBOL_GPL(asm_exc_nmi_noist);
+#endif
+
 void stop_nmi(void)
 {
 	ignore_nmis++;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index cbe0cdade38a..b21d751407b5 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -36,6 +36,7 @@
 #include <asm/debugreg.h>
 #include <asm/desc.h>
 #include <asm/fpu/internal.h>
+#include <asm/idtentry.h>
 #include <asm/io.h>
 #include <asm/irq_remapping.h>
 #include <asm/kexec.h>
@@ -6415,18 +6416,17 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
 
 void vmx_do_interrupt_nmi_irqoff(unsigned long entry);
 
-static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu, u32 intr_info)
+static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu,
+					unsigned long entry)
 {
-	unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
-	gate_desc *desc = (gate_desc *)host_idt_base + vector;
-
 	kvm_before_interrupt(vcpu);
-	vmx_do_interrupt_nmi_irqoff(gate_offset(desc));
+	vmx_do_interrupt_nmi_irqoff(entry);
 	kvm_after_interrupt(vcpu);
 }
 
 static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
 {
+	const unsigned long nmi_entry = (unsigned long)asm_exc_nmi_noist;
 	u32 intr_info = vmx_get_intr_info(&vmx->vcpu);
 
 	/* if exit due to PF check for async PF */
@@ -6437,18 +6437,20 @@ static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
 		kvm_machine_check();
 	/* We need to handle NMIs before interrupts are enabled */
 	else if (is_nmi(intr_info))
-		handle_interrupt_nmi_irqoff(&vmx->vcpu, intr_info);
+		handle_interrupt_nmi_irqoff(&vmx->vcpu, nmi_entry);
 }
 
 static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
 {
 	u32 intr_info = vmx_get_intr_info(vcpu);
+	unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
+	gate_desc *desc = (gate_desc *)host_idt_base + vector;
 
 	if (WARN_ONCE(!is_external_intr(intr_info),
 	    "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
 		return;
 
-	handle_interrupt_nmi_irqoff(vcpu, intr_info);
+	handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc));
 }
 
 static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
-- 
cgit v1.2.3


From 866a6dadbb027b2955a7ae00bab9705d382def12 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Tue, 4 May 2021 17:27:28 -0700
Subject: context_tracking: Move guest exit context tracking to separate
 helpers

Provide separate context tracking helpers for guest exit, the standalone
helpers will be called separately by KVM x86 in later patches to fix
tick-based accounting.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210505002735.1684165-2-seanjc@google.com
---
 include/linux/context_tracking.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index bceb06498521..b8c7313495a7 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -131,10 +131,15 @@ static __always_inline void guest_enter_irqoff(void)
 	}
 }
 
-static __always_inline void guest_exit_irqoff(void)
+static __always_inline void context_tracking_guest_exit(void)
 {
 	if (context_tracking_enabled())
 		__context_tracking_exit(CONTEXT_GUEST);
+}
+
+static __always_inline void guest_exit_irqoff(void)
+{
+	context_tracking_guest_exit();
 
 	instrumentation_begin();
 	if (vtime_accounting_enabled_this_cpu())
@@ -159,6 +164,8 @@ static __always_inline void guest_enter_irqoff(void)
 	instrumentation_end();
 }
 
+static __always_inline void context_tracking_guest_exit(void) { }
+
 static __always_inline void guest_exit_irqoff(void)
 {
 	instrumentation_begin();
-- 
cgit v1.2.3


From 88d8220bbf06dd8045b2ac4be1046290eaa7773a Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Tue, 4 May 2021 17:27:29 -0700
Subject: context_tracking: Move guest exit vtime accounting to separate
 helpers

Provide separate vtime accounting functions for guest exit instead of
open coding the logic within the context tracking code.  This will allow
KVM x86 to handle vtime accounting slightly differently when using
tick-based accounting.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Link: https://lore.kernel.org/r/20210505002735.1684165-3-seanjc@google.com
---
 include/linux/context_tracking.h | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index b8c7313495a7..4f4556232dcf 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -137,15 +137,20 @@ static __always_inline void context_tracking_guest_exit(void)
 		__context_tracking_exit(CONTEXT_GUEST);
 }
 
-static __always_inline void guest_exit_irqoff(void)
+static __always_inline void vtime_account_guest_exit(void)
 {
-	context_tracking_guest_exit();
-
-	instrumentation_begin();
 	if (vtime_accounting_enabled_this_cpu())
 		vtime_guest_exit(current);
 	else
 		current->flags &= ~PF_VCPU;
+}
+
+static __always_inline void guest_exit_irqoff(void)
+{
+	context_tracking_guest_exit();
+
+	instrumentation_begin();
+	vtime_account_guest_exit();
 	instrumentation_end();
 }
 
@@ -166,12 +171,17 @@ static __always_inline void guest_enter_irqoff(void)
 
 static __always_inline void context_tracking_guest_exit(void) { }
 
+static __always_inline void vtime_account_guest_exit(void)
+{
+	vtime_account_kernel(current);
+	current->flags &= ~PF_VCPU;
+}
+
 static __always_inline void guest_exit_irqoff(void)
 {
 	instrumentation_begin();
 	/* Flush the guest cputime we spent on the guest */
-	vtime_account_kernel(current);
-	current->flags &= ~PF_VCPU;
+	vtime_account_guest_exit();
 	instrumentation_end();
 }
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
-- 
cgit v1.2.3


From 160457140187c5fb127b844e5a85f87f00a01b14 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Tue, 4 May 2021 17:27:30 -0700
Subject: KVM: x86: Defer vtime accounting 'til after IRQ handling

Defer the call to account guest time until after servicing any IRQ(s)
that happened in the guest or immediately after VM-Exit.  Tick-based
accounting of vCPU time relies on PF_VCPU being set when the tick IRQ
handler runs, and IRQs are blocked throughout the main sequence of
vcpu_enter_guest(), including the call into vendor code to actually
enter and exit the guest.

This fixes a bug where reported guest time remains '0', even when
running an infinite loop in the guest:

  https://bugzilla.kernel.org/show_bug.cgi?id=209831

Fixes: 87fa7f3e98a131 ("x86/kvm: Move context tracking where it belongs")
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210505002735.1684165-4-seanjc@google.com
---
 arch/x86/kvm/svm/svm.c | 6 +++---
 arch/x86/kvm/vmx/vmx.c | 6 +++---
 arch/x86/kvm/x86.c     | 9 +++++++++
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9790c73f2a32..c400def6220b 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3753,15 +3753,15 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
 	 * have them in state 'on' as recorded before entering guest mode.
 	 * Same as enter_from_user_mode().
 	 *
-	 * guest_exit_irqoff() restores host context and reinstates RCU if
-	 * enabled and required.
+	 * context_tracking_guest_exit() restores host context and reinstates
+	 * RCU if enabled and required.
 	 *
 	 * This needs to be done before the below as native_read_msr()
 	 * contains a tracepoint and x86_spec_ctrl_restore_host() calls
 	 * into world and some more.
 	 */
 	lockdep_hardirqs_off(CALLER_ADDR0);
-	guest_exit_irqoff();
+	context_tracking_guest_exit();
 
 	instrumentation_begin();
 	trace_hardirqs_off_finish();
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b21d751407b5..e108fb47855b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6703,15 +6703,15 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
 	 * have them in state 'on' as recorded before entering guest mode.
 	 * Same as enter_from_user_mode().
 	 *
-	 * guest_exit_irqoff() restores host context and reinstates RCU if
-	 * enabled and required.
+	 * context_tracking_guest_exit() restores host context and reinstates
+	 * RCU if enabled and required.
 	 *
 	 * This needs to be done before the below as native_read_msr()
 	 * contains a tracepoint and x86_spec_ctrl_restore_host() calls
 	 * into world and some more.
 	 */
 	lockdep_hardirqs_off(CALLER_ADDR0);
-	guest_exit_irqoff();
+	context_tracking_guest_exit();
 
 	instrumentation_begin();
 	trace_hardirqs_off_finish();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cebdaa1e3cf5..6eda2834fc05 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9315,6 +9315,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	local_irq_disable();
 	kvm_after_interrupt(vcpu);
 
+	/*
+	 * Wait until after servicing IRQs to account guest time so that any
+	 * ticks that occurred while running the guest are properly accounted
+	 * to the guest.  Waiting until IRQs are enabled degrades the accuracy
+	 * of accounting via context tracking, but the loss of accuracy is
+	 * acceptable for all known use cases.
+	 */
+	vtime_account_guest_exit();
+
 	if (lapic_in_kernel(vcpu)) {
 		s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
 		if (delta != S64_MIN) {
-- 
cgit v1.2.3


From b41c723b203e19480c26f2ec8f04eedc03d34b34 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 17:27:31 -0700
Subject: sched/vtime: Move vtime accounting external declarations above
 inlines

Move the blob of external declarations (and their stubs) above the set of
inline definitions (and their stubs) for vtime accounting.  This will
allow a future patch to bring in more inline definitions without also
having to shuffle large chunks of code.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Link: https://lore.kernel.org/r/20210505002735.1684165-5-seanjc@google.com
---
 include/linux/vtime.h | 74 +++++++++++++++++++++++++--------------------------
 1 file changed, 37 insertions(+), 37 deletions(-)

diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index 041d6524d144..6a4317560539 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -10,6 +10,43 @@
 
 struct task_struct;
 
+/*
+ * Common vtime APIs
+ */
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+extern void vtime_account_kernel(struct task_struct *tsk);
+extern void vtime_account_idle(struct task_struct *tsk);
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
+static inline void vtime_account_kernel(struct task_struct *tsk) { }
+#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+extern void arch_vtime_task_switch(struct task_struct *tsk);
+extern void vtime_user_enter(struct task_struct *tsk);
+extern void vtime_user_exit(struct task_struct *tsk);
+extern void vtime_guest_enter(struct task_struct *tsk);
+extern void vtime_guest_exit(struct task_struct *tsk);
+extern void vtime_init_idle(struct task_struct *tsk, int cpu);
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN  */
+static inline void vtime_user_enter(struct task_struct *tsk) { }
+static inline void vtime_user_exit(struct task_struct *tsk) { }
+static inline void vtime_guest_enter(struct task_struct *tsk) { }
+static inline void vtime_guest_exit(struct task_struct *tsk) { }
+static inline void vtime_init_idle(struct task_struct *tsk, int cpu) { }
+#endif
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+extern void vtime_account_irq(struct task_struct *tsk, unsigned int offset);
+extern void vtime_account_softirq(struct task_struct *tsk);
+extern void vtime_account_hardirq(struct task_struct *tsk);
+extern void vtime_flush(struct task_struct *tsk);
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+static inline void vtime_account_irq(struct task_struct *tsk, unsigned int offset) { }
+static inline void vtime_account_softirq(struct task_struct *tsk) { }
+static inline void vtime_account_hardirq(struct task_struct *tsk) { }
+static inline void vtime_flush(struct task_struct *tsk) { }
+#endif
+
 /*
  * vtime_accounting_enabled_this_cpu() definitions/declarations
  */
@@ -57,43 +94,6 @@ static inline void vtime_task_switch(struct task_struct *prev) { }
 
 #endif
 
-/*
- * Common vtime APIs
- */
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-extern void vtime_account_kernel(struct task_struct *tsk);
-extern void vtime_account_idle(struct task_struct *tsk);
-#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
-static inline void vtime_account_kernel(struct task_struct *tsk) { }
-#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
-
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-extern void arch_vtime_task_switch(struct task_struct *tsk);
-extern void vtime_user_enter(struct task_struct *tsk);
-extern void vtime_user_exit(struct task_struct *tsk);
-extern void vtime_guest_enter(struct task_struct *tsk);
-extern void vtime_guest_exit(struct task_struct *tsk);
-extern void vtime_init_idle(struct task_struct *tsk, int cpu);
-#else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN  */
-static inline void vtime_user_enter(struct task_struct *tsk) { }
-static inline void vtime_user_exit(struct task_struct *tsk) { }
-static inline void vtime_guest_enter(struct task_struct *tsk) { }
-static inline void vtime_guest_exit(struct task_struct *tsk) { }
-static inline void vtime_init_idle(struct task_struct *tsk, int cpu) { }
-#endif
-
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-extern void vtime_account_irq(struct task_struct *tsk, unsigned int offset);
-extern void vtime_account_softirq(struct task_struct *tsk);
-extern void vtime_account_hardirq(struct task_struct *tsk);
-extern void vtime_flush(struct task_struct *tsk);
-#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
-static inline void vtime_account_irq(struct task_struct *tsk, unsigned int offset) { }
-static inline void vtime_account_softirq(struct task_struct *tsk) { }
-static inline void vtime_account_hardirq(struct task_struct *tsk) { }
-static inline void vtime_flush(struct task_struct *tsk) { }
-#endif
-
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 extern void irqtime_account_irq(struct task_struct *tsk, unsigned int offset);
-- 
cgit v1.2.3


From 6f922b89e5518143920b10e3643e556d9df58d94 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 17:27:32 -0700
Subject: sched/vtime: Move guest enter/exit vtime accounting to vtime.h

Provide separate helpers for guest enter vtime accounting (in addition to
the existing guest exit helpers), and move all vtime accounting helpers
to vtime.h where the existing #ifdef infrastructure can be leveraged to
better delineate the different types of accounting.  This will also allow
future cleanups via deduplication of context tracking code.

Opportunstically delete the vtime_account_kernel() stub now that all
callers are wrapped with CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210505002735.1684165-6-seanjc@google.com
---
 include/linux/context_tracking.h | 17 +--------------
 include/linux/vtime.h            | 46 ++++++++++++++++++++++++++++++++++------
 2 files changed, 41 insertions(+), 22 deletions(-)

diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index 4f4556232dcf..56c648bdbde8 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -137,14 +137,6 @@ static __always_inline void context_tracking_guest_exit(void)
 		__context_tracking_exit(CONTEXT_GUEST);
 }
 
-static __always_inline void vtime_account_guest_exit(void)
-{
-	if (vtime_accounting_enabled_this_cpu())
-		vtime_guest_exit(current);
-	else
-		current->flags &= ~PF_VCPU;
-}
-
 static __always_inline void guest_exit_irqoff(void)
 {
 	context_tracking_guest_exit();
@@ -163,20 +155,13 @@ static __always_inline void guest_enter_irqoff(void)
 	 * to flush.
 	 */
 	instrumentation_begin();
-	vtime_account_kernel(current);
-	current->flags |= PF_VCPU;
+	vtime_account_guest_enter();
 	rcu_virt_note_context_switch(smp_processor_id());
 	instrumentation_end();
 }
 
 static __always_inline void context_tracking_guest_exit(void) { }
 
-static __always_inline void vtime_account_guest_exit(void)
-{
-	vtime_account_kernel(current);
-	current->flags &= ~PF_VCPU;
-}
-
 static __always_inline void guest_exit_irqoff(void)
 {
 	instrumentation_begin();
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index 6a4317560539..3684487d01e1 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -3,21 +3,18 @@
 #define _LINUX_KERNEL_VTIME_H
 
 #include <linux/context_tracking_state.h>
+#include <linux/sched.h>
+
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 #include <asm/vtime.h>
 #endif
 
-
-struct task_struct;
-
 /*
  * Common vtime APIs
  */
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 extern void vtime_account_kernel(struct task_struct *tsk);
 extern void vtime_account_idle(struct task_struct *tsk);
-#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
-static inline void vtime_account_kernel(struct task_struct *tsk) { }
 #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
@@ -55,6 +52,18 @@ static inline void vtime_flush(struct task_struct *tsk) { }
 static inline bool vtime_accounting_enabled_this_cpu(void) { return true; }
 extern void vtime_task_switch(struct task_struct *prev);
 
+static __always_inline void vtime_account_guest_enter(void)
+{
+	vtime_account_kernel(current);
+	current->flags |= PF_VCPU;
+}
+
+static __always_inline void vtime_account_guest_exit(void)
+{
+	vtime_account_kernel(current);
+	current->flags &= ~PF_VCPU;
+}
+
 #elif defined(CONFIG_VIRT_CPU_ACCOUNTING_GEN)
 
 /*
@@ -86,12 +95,37 @@ static inline void vtime_task_switch(struct task_struct *prev)
 		vtime_task_switch_generic(prev);
 }
 
+static __always_inline void vtime_account_guest_enter(void)
+{
+	if (vtime_accounting_enabled_this_cpu())
+		vtime_guest_enter(current);
+	else
+		current->flags |= PF_VCPU;
+}
+
+static __always_inline void vtime_account_guest_exit(void)
+{
+	if (vtime_accounting_enabled_this_cpu())
+		vtime_guest_exit(current);
+	else
+		current->flags &= ~PF_VCPU;
+}
+
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING */
 
-static inline bool vtime_accounting_enabled_cpu(int cpu) {return false; }
 static inline bool vtime_accounting_enabled_this_cpu(void) { return false; }
 static inline void vtime_task_switch(struct task_struct *prev) { }
 
+static __always_inline void vtime_account_guest_enter(void)
+{
+	current->flags |= PF_VCPU;
+}
+
+static __always_inline void vtime_account_guest_exit(void)
+{
+	current->flags &= ~PF_VCPU;
+}
+
 #endif
 
 
-- 
cgit v1.2.3


From 14296e0c447885d6c7b326e059fb528eb00526ed Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 17:27:33 -0700
Subject: context_tracking: Consolidate guest enter/exit wrappers

Consolidate the guest enter/exit wrappers, providing and tweaking stubs
as needed.  This will allow moving the wrappers under KVM without having
to bleed #ifdefs into the soon-to-be KVM code.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210505002735.1684165-7-seanjc@google.com
---
 include/linux/context_tracking.h | 65 +++++++++++++++-------------------------
 1 file changed, 24 insertions(+), 41 deletions(-)

diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index 56c648bdbde8..aa58c2ac67ca 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -71,6 +71,19 @@ static inline void exception_exit(enum ctx_state prev_ctx)
 	}
 }
 
+static __always_inline bool context_tracking_guest_enter(void)
+{
+	if (context_tracking_enabled())
+		__context_tracking_enter(CONTEXT_GUEST);
+
+	return context_tracking_enabled_this_cpu();
+}
+
+static __always_inline void context_tracking_guest_exit(void)
+{
+	if (context_tracking_enabled())
+		__context_tracking_exit(CONTEXT_GUEST);
+}
 
 /**
  * ct_state() - return the current context tracking state if known
@@ -92,6 +105,9 @@ static inline void user_exit_irqoff(void) { }
 static inline enum ctx_state exception_enter(void) { return 0; }
 static inline void exception_exit(enum ctx_state prev_ctx) { }
 static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; }
+static inline bool context_tracking_guest_enter(void) { return false; }
+static inline void context_tracking_guest_exit(void) { }
+
 #endif /* !CONFIG_CONTEXT_TRACKING */
 
 #define CT_WARN_ON(cond) WARN_ON(context_tracking_enabled() && (cond))
@@ -102,74 +118,41 @@ extern void context_tracking_init(void);
 static inline void context_tracking_init(void) { }
 #endif /* CONFIG_CONTEXT_TRACKING_FORCE */
 
-
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 /* must be called with irqs disabled */
 static __always_inline void guest_enter_irqoff(void)
 {
+	/*
+	 * This is running in ioctl context so its safe to assume that it's the
+	 * stime pending cputime to flush.
+	 */
 	instrumentation_begin();
-	if (vtime_accounting_enabled_this_cpu())
-		vtime_guest_enter(current);
-	else
-		current->flags |= PF_VCPU;
+	vtime_account_guest_enter();
 	instrumentation_end();
 
-	if (context_tracking_enabled())
-		__context_tracking_enter(CONTEXT_GUEST);
-
-	/* KVM does not hold any references to rcu protected data when it
+	/*
+	 * KVM does not hold any references to rcu protected data when it
 	 * switches CPU into a guest mode. In fact switching to a guest mode
 	 * is very similar to exiting to userspace from rcu point of view. In
 	 * addition CPU may stay in a guest mode for quite a long time (up to
 	 * one time slice). Lets treat guest mode as quiescent state, just like
 	 * we do with user-mode execution.
 	 */
-	if (!context_tracking_enabled_this_cpu()) {
+	if (!context_tracking_guest_enter()) {
 		instrumentation_begin();
 		rcu_virt_note_context_switch(smp_processor_id());
 		instrumentation_end();
 	}
 }
 
-static __always_inline void context_tracking_guest_exit(void)
-{
-	if (context_tracking_enabled())
-		__context_tracking_exit(CONTEXT_GUEST);
-}
-
 static __always_inline void guest_exit_irqoff(void)
 {
 	context_tracking_guest_exit();
 
-	instrumentation_begin();
-	vtime_account_guest_exit();
-	instrumentation_end();
-}
-
-#else
-static __always_inline void guest_enter_irqoff(void)
-{
-	/*
-	 * This is running in ioctl context so its safe
-	 * to assume that it's the stime pending cputime
-	 * to flush.
-	 */
-	instrumentation_begin();
-	vtime_account_guest_enter();
-	rcu_virt_note_context_switch(smp_processor_id());
-	instrumentation_end();
-}
-
-static __always_inline void context_tracking_guest_exit(void) { }
-
-static __always_inline void guest_exit_irqoff(void)
-{
 	instrumentation_begin();
 	/* Flush the guest cputime we spent on the guest */
 	vtime_account_guest_exit();
 	instrumentation_end();
 }
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
 
 static inline void guest_exit(void)
 {
-- 
cgit v1.2.3


From 1ca0016c149be35fe19a6b75fce95c25807b7159 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 17:27:34 -0700
Subject: context_tracking: KVM: Move guest enter/exit wrappers to KVM's domain

Move the guest enter/exit wrappers to kvm_host.h so that KVM can manage
its context tracking vs. vtime accounting without bleeding too many KVM
details into the context tracking code.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210505002735.1684165-8-seanjc@google.com
---
 include/linux/context_tracking.h | 45 ----------------------------------------
 include/linux/kvm_host.h         | 45 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index aa58c2ac67ca..4d7fced3a39f 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -118,49 +118,4 @@ extern void context_tracking_init(void);
 static inline void context_tracking_init(void) { }
 #endif /* CONFIG_CONTEXT_TRACKING_FORCE */
 
-/* must be called with irqs disabled */
-static __always_inline void guest_enter_irqoff(void)
-{
-	/*
-	 * This is running in ioctl context so its safe to assume that it's the
-	 * stime pending cputime to flush.
-	 */
-	instrumentation_begin();
-	vtime_account_guest_enter();
-	instrumentation_end();
-
-	/*
-	 * KVM does not hold any references to rcu protected data when it
-	 * switches CPU into a guest mode. In fact switching to a guest mode
-	 * is very similar to exiting to userspace from rcu point of view. In
-	 * addition CPU may stay in a guest mode for quite a long time (up to
-	 * one time slice). Lets treat guest mode as quiescent state, just like
-	 * we do with user-mode execution.
-	 */
-	if (!context_tracking_guest_enter()) {
-		instrumentation_begin();
-		rcu_virt_note_context_switch(smp_processor_id());
-		instrumentation_end();
-	}
-}
-
-static __always_inline void guest_exit_irqoff(void)
-{
-	context_tracking_guest_exit();
-
-	instrumentation_begin();
-	/* Flush the guest cputime we spent on the guest */
-	vtime_account_guest_exit();
-	instrumentation_end();
-}
-
-static inline void guest_exit(void)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	guest_exit_irqoff();
-	local_irq_restore(flags);
-}
-
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8895b95b6a22..2f34487e21f2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -338,6 +338,51 @@ struct kvm_vcpu {
 	struct kvm_dirty_ring dirty_ring;
 };
 
+/* must be called with irqs disabled */
+static __always_inline void guest_enter_irqoff(void)
+{
+	/*
+	 * This is running in ioctl context so its safe to assume that it's the
+	 * stime pending cputime to flush.
+	 */
+	instrumentation_begin();
+	vtime_account_guest_enter();
+	instrumentation_end();
+
+	/*
+	 * KVM does not hold any references to rcu protected data when it
+	 * switches CPU into a guest mode. In fact switching to a guest mode
+	 * is very similar to exiting to userspace from rcu point of view. In
+	 * addition CPU may stay in a guest mode for quite a long time (up to
+	 * one time slice). Lets treat guest mode as quiescent state, just like
+	 * we do with user-mode execution.
+	 */
+	if (!context_tracking_guest_enter()) {
+		instrumentation_begin();
+		rcu_virt_note_context_switch(smp_processor_id());
+		instrumentation_end();
+	}
+}
+
+static __always_inline void guest_exit_irqoff(void)
+{
+	context_tracking_guest_exit();
+
+	instrumentation_begin();
+	/* Flush the guest cputime we spent on the guest */
+	vtime_account_guest_exit();
+	instrumentation_end();
+}
+
+static inline void guest_exit(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	guest_exit_irqoff();
+	local_irq_restore(flags);
+}
+
 static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
 {
 	/*
-- 
cgit v1.2.3


From bc908e091b3264672889162733020048901021fb Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 17:27:35 -0700
Subject: KVM: x86: Consolidate guest enter/exit logic to common helpers

Move the enter/exit logic in {svm,vmx}_vcpu_enter_exit() to common
helpers.  Opportunistically update the somewhat stale comment about the
updates needing to occur immediately after VM-Exit.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210505002735.1684165-9-seanjc@google.com
---
 arch/x86/kvm/svm/svm.c | 39 ++-------------------------------------
 arch/x86/kvm/vmx/vmx.c | 39 ++-------------------------------------
 arch/x86/kvm/x86.h     | 45 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 49 insertions(+), 74 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index c400def6220b..b649f92287a2 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3710,25 +3710,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
 	struct vcpu_svm *svm = to_svm(vcpu);
 	unsigned long vmcb_pa = svm->current_vmcb->pa;
 
-	/*
-	 * VMENTER enables interrupts (host state), but the kernel state is
-	 * interrupts disabled when this is invoked. Also tell RCU about
-	 * it. This is the same logic as for exit_to_user_mode().
-	 *
-	 * This ensures that e.g. latency analysis on the host observes
-	 * guest mode as interrupt enabled.
-	 *
-	 * guest_enter_irqoff() informs context tracking about the
-	 * transition to guest mode and if enabled adjusts RCU state
-	 * accordingly.
-	 */
-	instrumentation_begin();
-	trace_hardirqs_on_prepare();
-	lockdep_hardirqs_on_prepare(CALLER_ADDR0);
-	instrumentation_end();
-
-	guest_enter_irqoff();
-	lockdep_hardirqs_on(CALLER_ADDR0);
+	kvm_guest_enter_irqoff();
 
 	if (sev_es_guest(vcpu->kvm)) {
 		__svm_sev_es_vcpu_run(vmcb_pa);
@@ -3748,24 +3730,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
 		vmload(__sme_page_pa(sd->save_area));
 	}
 
-	/*
-	 * VMEXIT disables interrupts (host state), but tracing and lockdep
-	 * have them in state 'on' as recorded before entering guest mode.
-	 * Same as enter_from_user_mode().
-	 *
-	 * context_tracking_guest_exit() restores host context and reinstates
-	 * RCU if enabled and required.
-	 *
-	 * This needs to be done before the below as native_read_msr()
-	 * contains a tracepoint and x86_spec_ctrl_restore_host() calls
-	 * into world and some more.
-	 */
-	lockdep_hardirqs_off(CALLER_ADDR0);
-	context_tracking_guest_exit();
-
-	instrumentation_begin();
-	trace_hardirqs_off_finish();
-	instrumentation_end();
+	kvm_guest_exit_irqoff();
 }
 
 static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e108fb47855b..d000cddbd734 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6664,25 +6664,7 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
 static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
 					struct vcpu_vmx *vmx)
 {
-	/*
-	 * VMENTER enables interrupts (host state), but the kernel state is
-	 * interrupts disabled when this is invoked. Also tell RCU about
-	 * it. This is the same logic as for exit_to_user_mode().
-	 *
-	 * This ensures that e.g. latency analysis on the host observes
-	 * guest mode as interrupt enabled.
-	 *
-	 * guest_enter_irqoff() informs context tracking about the
-	 * transition to guest mode and if enabled adjusts RCU state
-	 * accordingly.
-	 */
-	instrumentation_begin();
-	trace_hardirqs_on_prepare();
-	lockdep_hardirqs_on_prepare(CALLER_ADDR0);
-	instrumentation_end();
-
-	guest_enter_irqoff();
-	lockdep_hardirqs_on(CALLER_ADDR0);
+	kvm_guest_enter_irqoff();
 
 	/* L1D Flush includes CPU buffer clear to mitigate MDS */
 	if (static_branch_unlikely(&vmx_l1d_should_flush))
@@ -6698,24 +6680,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
 
 	vcpu->arch.cr2 = native_read_cr2();
 
-	/*
-	 * VMEXIT disables interrupts (host state), but tracing and lockdep
-	 * have them in state 'on' as recorded before entering guest mode.
-	 * Same as enter_from_user_mode().
-	 *
-	 * context_tracking_guest_exit() restores host context and reinstates
-	 * RCU if enabled and required.
-	 *
-	 * This needs to be done before the below as native_read_msr()
-	 * contains a tracepoint and x86_spec_ctrl_restore_host() calls
-	 * into world and some more.
-	 */
-	lockdep_hardirqs_off(CALLER_ADDR0);
-	context_tracking_guest_exit();
-
-	instrumentation_begin();
-	trace_hardirqs_off_finish();
-	instrumentation_end();
+	kvm_guest_exit_irqoff();
 }
 
 static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 8ddd38146525..521f74e5bbf2 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -8,6 +8,51 @@
 #include "kvm_cache_regs.h"
 #include "kvm_emulate.h"
 
+static __always_inline void kvm_guest_enter_irqoff(void)
+{
+	/*
+	 * VMENTER enables interrupts (host state), but the kernel state is
+	 * interrupts disabled when this is invoked. Also tell RCU about
+	 * it. This is the same logic as for exit_to_user_mode().
+	 *
+	 * This ensures that e.g. latency analysis on the host observes
+	 * guest mode as interrupt enabled.
+	 *
+	 * guest_enter_irqoff() informs context tracking about the
+	 * transition to guest mode and if enabled adjusts RCU state
+	 * accordingly.
+	 */
+	instrumentation_begin();
+	trace_hardirqs_on_prepare();
+	lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+	instrumentation_end();
+
+	guest_enter_irqoff();
+	lockdep_hardirqs_on(CALLER_ADDR0);
+}
+
+static __always_inline void kvm_guest_exit_irqoff(void)
+{
+	/*
+	 * VMEXIT disables interrupts (host state), but tracing and lockdep
+	 * have them in state 'on' as recorded before entering guest mode.
+	 * Same as enter_from_user_mode().
+	 *
+	 * context_tracking_guest_exit() restores host context and reinstates
+	 * RCU if enabled and required.
+	 *
+	 * This needs to be done immediately after VM-Exit, before any code
+	 * that might contain tracepoints or call out to the greater world,
+	 * e.g. before x86_spec_ctrl_restore_host().
+	 */
+	lockdep_hardirqs_off(CALLER_ADDR0);
+	context_tracking_guest_exit();
+
+	instrumentation_begin();
+	trace_hardirqs_off_finish();
+	instrumentation_end();
+}
+
 #define KVM_NESTED_VMENTER_CONSISTENCY_CHECK(consistency_check)		\
 ({									\
 	bool failed = (consistency_check);				\
-- 
cgit v1.2.3


From 8bf073ca9235fe38d7b74a0b4e779cfa7cc70fc9 Mon Sep 17 00:00:00 2001
From: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Date: Wed, 5 May 2021 03:27:49 +0200
Subject: drm/amdgpu: Init GFX10_ADDR_CONFIG for VCN v3 in DPG mode.

Otherwise tiling modes that require the values form this field
(In particular _*_X) would be corrupted upon video decode.

Copied from the VCN v2 code.

Fixes: 99541f392b4d ("drm/amdgpu: add mc resume DPG mode for VCN3.0")
Reviewed-and-Tested by: Leo Liu <leo.liu@amd.com>
Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index 3f15bf34123a..cf165ab5dd26 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -589,6 +589,10 @@ static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx
 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
 			VCN, inst_idx, mmUVD_VCPU_NONCACHE_SIZE0),
 			AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect);
+
+	/* VCN global tiling registers */
+	WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+		UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
 }
 
 static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int inst)
-- 
cgit v1.2.3


From 234055fd9728e6726787bc63b24b6450034876cf Mon Sep 17 00:00:00 2001
From: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Date: Tue, 4 May 2021 11:43:34 +0200
Subject: drm/amdgpu: Use device specific BO size & stride check.

The builtin size check isn't really the right thing for AMD
modifiers due to a couple of reasons:

1) In the format structs we don't do set any of the tilesize / blocks
etc. to avoid having format arrays per modifier/GPU
2) The pitch on the main plane is pixel_pitch * bytes_per_pixel even
for tiled ...
3) The pitch for the DCC planes is really the pixel pitch of the main
surface that would be covered by it ...

Note that we only handle GFX9+ case but we do this after converting
the implicit modifier to an explicit modifier, so on GFX9+ all
framebuffers should be checked here.

There is a TODO about DCC alignment, but it isn't worse than before
and I'd need to dig a bunch into the specifics. Getting this out in
a reasonable timeframe to make sure it gets the appropriate testing
seemed more important.

Finally as I've found that debugging addfb2 failures is a pita I was
generous adding explicit error messages to every failure case.

Fixes: f258907fdd83 ("drm/amdgpu: Verify bo size can fit framebuffer size on init.")
Tested-by: Simon Ser <contact@emersion.fr>
Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 181 +++++++++++++++++++++++++++-
 1 file changed, 175 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index c4cfefb33e03..dab98a1fdcaa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -837,6 +837,174 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
 	return 0;
 }
 
+static void get_block_dimensions(unsigned int block_log2, unsigned int cpp,
+				 unsigned int *width, unsigned int *height)
+{
+	unsigned int cpp_log2 = ilog2(cpp);
+	unsigned int pixel_log2 = block_log2 - cpp_log2;
+	unsigned int width_log2 = (pixel_log2 + 1) / 2;
+	unsigned int height_log2 = pixel_log2 - width_log2;
+
+	*width = 1 << width_log2;
+	*height = 1 << height_log2;
+}
+
+static unsigned int get_dcc_block_size(uint64_t modifier, bool rb_aligned,
+				       bool pipe_aligned)
+{
+	unsigned int ver = AMD_FMT_MOD_GET(TILE_VERSION, modifier);
+
+	switch (ver) {
+	case AMD_FMT_MOD_TILE_VER_GFX9: {
+		/*
+		 * TODO: for pipe aligned we may need to check the alignment of the
+		 * total size of the surface, which may need to be bigger than the
+		 * natural alignment due to some HW workarounds
+		 */
+		return max(10 + (rb_aligned ? (int)AMD_FMT_MOD_GET(RB, modifier) : 0), 12);
+	}
+	case AMD_FMT_MOD_TILE_VER_GFX10:
+	case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS: {
+		int pipes_log2 = AMD_FMT_MOD_GET(PIPE_XOR_BITS, modifier);
+
+		if (ver == AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS && pipes_log2 > 1 &&
+		    AMD_FMT_MOD_GET(PACKERS, modifier) == pipes_log2)
+			++pipes_log2;
+
+		return max(8 + (pipe_aligned ? pipes_log2 : 0), 12);
+	}
+	default:
+		return 0;
+	}
+}
+
+static int amdgpu_display_verify_plane(struct amdgpu_framebuffer *rfb, int plane,
+				       const struct drm_format_info *format,
+				       unsigned int block_width, unsigned int block_height,
+				       unsigned int block_size_log2)
+{
+	unsigned int width = rfb->base.width /
+		((plane && plane < format->num_planes) ? format->hsub : 1);
+	unsigned int height = rfb->base.height /
+		((plane && plane < format->num_planes) ? format->vsub : 1);
+	unsigned int cpp = plane < format->num_planes ? format->cpp[plane] : 1;
+	unsigned int block_pitch = block_width * cpp;
+	unsigned int min_pitch = ALIGN(width * cpp, block_pitch);
+	unsigned int block_size = 1 << block_size_log2;
+	uint64_t size;
+
+	if (rfb->base.pitches[plane] % block_pitch) {
+		drm_dbg_kms(rfb->base.dev,
+			    "pitch %d for plane %d is not a multiple of block pitch %d\n",
+			    rfb->base.pitches[plane], plane, block_pitch);
+		return -EINVAL;
+	}
+	if (rfb->base.pitches[plane] < min_pitch) {
+		drm_dbg_kms(rfb->base.dev,
+			    "pitch %d for plane %d is less than minimum pitch %d\n",
+			    rfb->base.pitches[plane], plane, min_pitch);
+		return -EINVAL;
+	}
+
+	/* Force at least natural alignment. */
+	if (rfb->base.offsets[plane] % block_size) {
+		drm_dbg_kms(rfb->base.dev,
+			    "offset 0x%x for plane %d is not a multiple of block pitch 0x%x\n",
+			    rfb->base.offsets[plane], plane, block_size);
+		return -EINVAL;
+	}
+
+	size = rfb->base.offsets[plane] +
+		(uint64_t)rfb->base.pitches[plane] / block_pitch *
+		block_size * DIV_ROUND_UP(height, block_height);
+
+	if (rfb->base.obj[0]->size < size) {
+		drm_dbg_kms(rfb->base.dev,
+			    "BO size 0x%zx is less than 0x%llx required for plane %d\n",
+			    rfb->base.obj[0]->size, size, plane);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+
+static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb)
+{
+	const struct drm_format_info *format_info = drm_format_info(rfb->base.format->format);
+	uint64_t modifier = rfb->base.modifier;
+	int ret;
+	unsigned int i, block_width, block_height, block_size_log2;
+
+	if (!rfb->base.dev->mode_config.allow_fb_modifiers)
+		return 0;
+
+	for (i = 0; i < format_info->num_planes; ++i) {
+		if (modifier == DRM_FORMAT_MOD_LINEAR) {
+			block_width = 256 / format_info->cpp[i];
+			block_height = 1;
+			block_size_log2 = 8;
+		} else {
+			int swizzle = AMD_FMT_MOD_GET(TILE, modifier);
+
+			switch ((swizzle & ~3) + 1) {
+			case DC_SW_256B_S:
+				block_size_log2 = 8;
+				break;
+			case DC_SW_4KB_S:
+			case DC_SW_4KB_S_X:
+				block_size_log2 = 12;
+				break;
+			case DC_SW_64KB_S:
+			case DC_SW_64KB_S_T:
+			case DC_SW_64KB_S_X:
+				block_size_log2 = 16;
+				break;
+			default:
+				drm_dbg_kms(rfb->base.dev,
+					    "Swizzle mode with unknown block size: %d\n", swizzle);
+				return -EINVAL;
+			}
+
+			get_block_dimensions(block_size_log2, format_info->cpp[i],
+					     &block_width, &block_height);
+		}
+
+		ret = amdgpu_display_verify_plane(rfb, i, format_info,
+						  block_width, block_height, block_size_log2);
+		if (ret)
+			return ret;
+	}
+
+	if (AMD_FMT_MOD_GET(DCC, modifier)) {
+		if (AMD_FMT_MOD_GET(DCC_RETILE, modifier)) {
+			block_size_log2 = get_dcc_block_size(modifier, false, false);
+			get_block_dimensions(block_size_log2 + 8, format_info->cpp[0],
+					     &block_width, &block_height);
+			ret = amdgpu_display_verify_plane(rfb, i, format_info,
+							  block_width, block_height,
+							  block_size_log2);
+			if (ret)
+				return ret;
+
+			++i;
+			block_size_log2 = get_dcc_block_size(modifier, true, true);
+		} else {
+			bool pipe_aligned = AMD_FMT_MOD_GET(DCC_PIPE_ALIGN, modifier);
+
+			block_size_log2 = get_dcc_block_size(modifier, true, pipe_aligned);
+		}
+		get_block_dimensions(block_size_log2 + 8, format_info->cpp[0],
+				     &block_width, &block_height);
+		ret = amdgpu_display_verify_plane(rfb, i, format_info,
+						  block_width, block_height, block_size_log2);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb,
 				      uint64_t *tiling_flags, bool *tmz_surface)
 {
@@ -902,10 +1070,8 @@ int amdgpu_display_gem_fb_verify_and_init(
 	int ret;
 
 	rfb->base.obj[0] = obj;
-
-	/* Verify that bo size can fit the fb size. */
-	ret = drm_gem_fb_init_with_funcs(dev, &rfb->base, file_priv, mode_cmd,
-					 &amdgpu_fb_funcs);
+	drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd);
+	ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
 	if (ret)
 		goto err;
 
@@ -954,9 +1120,12 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev,
 		}
 	}
 
-	for (i = 1; i < rfb->base.format->num_planes; ++i) {
+	ret = amdgpu_display_verify_sizes(rfb);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < rfb->base.format->num_planes; ++i) {
 		drm_gem_object_get(rfb->base.obj[0]);
-		drm_gem_object_put(rfb->base.obj[i]);
 		rfb->base.obj[i] = rfb->base.obj[0];
 	}
 
-- 
cgit v1.2.3


From 19987fdad506515a92b3c430076cbdb329a11aee Mon Sep 17 00:00:00 2001
From: Barry Song <song.bao.hua@hisilicon.com>
Date: Tue, 4 May 2021 22:53:43 +1200
Subject: sched,doc: sched_debug_verbose cmdline should be sched_verbose

The cmdline should include sched_verbose but not sched_debug_verbose
as sched_debug_verbose is only the variant name in code.

Fixes: 9406415f46 ("sched/debug: Rename the sched_debug parameter to sched_verbose")
Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Link: https://lkml.kernel.org/r/20210504105344.31344-1-song.bao.hua@hisilicon.com
---
 Documentation/scheduler/sched-domains.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/scheduler/sched-domains.rst b/Documentation/scheduler/sched-domains.rst
index 14ea2f21d20e..84dcdcd2911c 100644
--- a/Documentation/scheduler/sched-domains.rst
+++ b/Documentation/scheduler/sched-domains.rst
@@ -74,7 +74,7 @@ for a given topology level by creating a sched_domain_topology_level array and
 calling set_sched_topology() with this array as the parameter.
 
 The sched-domains debugging infrastructure can be enabled by enabling
-CONFIG_SCHED_DEBUG and adding 'sched_debug_verbose' to your cmdline. If you
+CONFIG_SCHED_DEBUG and adding 'sched_verbose' to your cmdline. If you
 forgot to tweak your cmdline, you can also flip the
 /sys/kernel/debug/sched/verbose knob. This enables an error checking parse of
 the sched domains which should catch most possible errors (described above). It
-- 
cgit v1.2.3


From d583d360a620e6229422b3455d0be082b8255f5e Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 3 May 2021 13:49:17 -0400
Subject: psi: Fix psi state corruption when schedule() races with cgroup move

4117cebf1a9f ("psi: Optimize task switch inside shared cgroups")
introduced a race condition that corrupts internal psi state. This
manifests as kernel warnings, sometimes followed by bogusly high IO
pressure:

  psi: task underflow! cpu=1 t=2 tasks=[0 0 0 0] clear=c set=0
  (schedule() decreasing RUNNING and ONCPU, both of which are 0)

  psi: incosistent task state! task=2412744:systemd cpu=17 psi_flags=e clear=3 set=0
  (cgroup_move_task() clearing MEMSTALL and IOWAIT, but task is MEMSTALL | RUNNING | ONCPU)

What the offending commit does is batch the two psi callbacks in
schedule() to reduce the number of cgroup tree updates. When prev is
deactivated and removed from the runqueue, nothing is done in psi at
first; when the task switch completes, TSK_RUNNING and TSK_IOWAIT are
updated along with TSK_ONCPU.

However, the deactivation and the task switch inside schedule() aren't
atomic: pick_next_task() may drop the rq lock for load balancing. When
this happens, cgroup_move_task() can run after the task has been
physically dequeued, but the psi updates are still pending. Since it
looks at the task's scheduler state, it doesn't move everything to the
new cgroup that the task switch that follows is about to clear from
it. cgroup_move_task() will leak the TSK_RUNNING count in the old
cgroup, and psi_sched_switch() will underflow it in the new cgroup.

A similar thing can happen for iowait. TSK_IOWAIT is usually set when
a p->in_iowait task is dequeued, but again this update is deferred to
the switch. cgroup_move_task() can see an unqueued p->in_iowait task
and move a non-existent TSK_IOWAIT. This results in the inconsistent
task state warning, as well as a counter underflow that will result in
permanent IO ghost pressure being reported.

Fix this bug by making cgroup_move_task() use task->psi_flags instead
of looking at the potentially mismatching scheduler state.

[ We used the scheduler state historically in order to not rely on
  task->psi_flags for anything but debugging. But that ship has sailed
  anyway, and this is simpler and more robust.

  We previously already batched TSK_ONCPU clearing with the
  TSK_RUNNING update inside the deactivation call from schedule(). But
  that ordering was safe and didn't result in TSK_ONCPU corruption:
  unlike most places in the scheduler, cgroup_move_task() only checked
  task_current() and handled TSK_ONCPU if the task was still queued. ]

Fixes: 4117cebf1a9f ("psi: Optimize task switch inside shared cgroups")
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210503174917.38579-1-hannes@cmpxchg.org
---
 kernel/sched/psi.c | 36 ++++++++++++++++++++++++++----------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index db27b69fa92a..cc25a3cff41f 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -972,7 +972,7 @@ void psi_cgroup_free(struct cgroup *cgroup)
  */
 void cgroup_move_task(struct task_struct *task, struct css_set *to)
 {
-	unsigned int task_flags = 0;
+	unsigned int task_flags;
 	struct rq_flags rf;
 	struct rq *rq;
 
@@ -987,15 +987,31 @@ void cgroup_move_task(struct task_struct *task, struct css_set *to)
 
 	rq = task_rq_lock(task, &rf);
 
-	if (task_on_rq_queued(task)) {
-		task_flags = TSK_RUNNING;
-		if (task_current(rq, task))
-			task_flags |= TSK_ONCPU;
-	} else if (task->in_iowait)
-		task_flags = TSK_IOWAIT;
-
-	if (task->in_memstall)
-		task_flags |= TSK_MEMSTALL;
+	/*
+	 * We may race with schedule() dropping the rq lock between
+	 * deactivating prev and switching to next. Because the psi
+	 * updates from the deactivation are deferred to the switch
+	 * callback to save cgroup tree updates, the task's scheduling
+	 * state here is not coherent with its psi state:
+	 *
+	 * schedule()                   cgroup_move_task()
+	 *   rq_lock()
+	 *   deactivate_task()
+	 *     p->on_rq = 0
+	 *     psi_dequeue() // defers TSK_RUNNING & TSK_IOWAIT updates
+	 *   pick_next_task()
+	 *     rq_unlock()
+	 *                                rq_lock()
+	 *                                psi_task_change() // old cgroup
+	 *                                task->cgroups = to
+	 *                                psi_task_change() // new cgroup
+	 *                                rq_unlock()
+	 *     rq_lock()
+	 *   psi_sched_switch() // does deferred updates in new cgroup
+	 *
+	 * Don't rely on the scheduling state. Use psi_flags instead.
+	 */
+	task_flags = task->psi_flags;
 
 	if (task_flags)
 		psi_task_change(task, task_flags, 0);
-- 
cgit v1.2.3


From 6d2f8909a5fabb73fe2a63918117943986c39b6c Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Fri, 30 Apr 2021 15:14:12 +0000
Subject: sched: Fix out-of-bound access in uclamp

Util-clamp places tasks in different buckets based on their clamp values
for performance reasons. However, the size of buckets is currently
computed using a rounding division, which can lead to an off-by-one
error in some configurations.

For instance, with 20 buckets, the bucket size will be 1024/20=51. A
task with a clamp of 1024 will be mapped to bucket id 1024/51=20. Sadly,
correct indexes are in range [0,19], hence leading to an out of bound
memory access.

Clamp the bucket id to fix the issue.

Fixes: 69842cba9ace ("sched/uclamp: Add CPU's clamp buckets refcounting")
Suggested-by: Qais Yousef <qais.yousef@arm.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Link: https://lkml.kernel.org/r/20210430151412.160913-1-qperret@google.com
---
 kernel/sched/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9143163fa678..5226cc26a095 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -938,7 +938,7 @@ DEFINE_STATIC_KEY_FALSE(sched_uclamp_used);
 
 static inline unsigned int uclamp_bucket_id(unsigned int clamp_value)
 {
-	return clamp_value / UCLAMP_BUCKET_DELTA;
+	return min_t(unsigned int, clamp_value / UCLAMP_BUCKET_DELTA, UCLAMP_BUCKETS - 1);
 }
 
 static inline unsigned int uclamp_none(enum uclamp_id clamp_id)
-- 
cgit v1.2.3


From 0258bdfaff5bd13c4d2383150b7097aecd6b6d82 Mon Sep 17 00:00:00 2001
From: Odin Ugedal <odin@uged.al>
Date: Sat, 1 May 2021 16:19:50 +0200
Subject: sched/fair: Fix unfairness caused by missing load decay

This fixes an issue where old load on a cfs_rq is not properly decayed,
resulting in strange behavior where fairness can decrease drastically.
Real workloads with equally weighted control groups have ended up
getting a respective 99% and 1%(!!) of cpu time.

When an idle task is attached to a cfs_rq by attaching a pid to a cgroup,
the old load of the task is attached to the new cfs_rq and sched_entity by
attach_entity_cfs_rq. If the task is then moved to another cpu (and
therefore cfs_rq) before being enqueued/woken up, the load will be moved
to cfs_rq->removed from the sched_entity. Such a move will happen when
enforcing a cpuset on the task (eg. via a cgroup) that force it to move.

The load will however not be removed from the task_group itself, making
it look like there is a constant load on that cfs_rq. This causes the
vruntime of tasks on other sibling cfs_rq's to increase faster than they
are supposed to; causing severe fairness issues. If no other task is
started on the given cfs_rq, and due to the cpuset it would not happen,
this load would never be properly unloaded. With this patch the load
will be properly removed inside update_blocked_averages. This also
applies to tasks moved to the fair scheduling class and moved to another
cpu, and this path will also fix that. For fork, the entity is queued
right away, so this problem does not affect that.

This applies to cases where the new process is the first in the cfs_rq,
issue introduced 3d30544f0212 ("sched/fair: Apply more PELT fixes"), and
when there has previously been load on the cgroup but the cgroup was
removed from the leaflist due to having null PELT load, indroduced
in 039ae8bcf7a5 ("sched/fair: Fix O(nr_cgroups) in the load balancing
path").

For a simple cgroup hierarchy (as seen below) with two equally weighted
groups, that in theory should get 50/50 of cpu time each, it often leads
to a load of 60/40 or 70/30.

parent/
  cg-1/
    cpu.weight: 100
    cpuset.cpus: 1
  cg-2/
    cpu.weight: 100
    cpuset.cpus: 1

If the hierarchy is deeper (as seen below), while keeping cg-1 and cg-2
equally weighted, they should still get a 50/50 balance of cpu time.
This however sometimes results in a balance of 10/90 or 1/99(!!) between
the task groups.

$ ps u -C stress
USER         PID %CPU %MEM    VSZ   RSS TTY      STAT START   TIME COMMAND
root       18568  1.1  0.0   3684   100 pts/12   R+   13:36   0:00 stress --cpu 1
root       18580 99.3  0.0   3684   100 pts/12   R+   13:36   0:09 stress --cpu 1

parent/
  cg-1/
    cpu.weight: 100
    sub-group/
      cpu.weight: 1
      cpuset.cpus: 1
  cg-2/
    cpu.weight: 100
    sub-group/
      cpu.weight: 10000
      cpuset.cpus: 1

This can be reproduced by attaching an idle process to a cgroup and
moving it to a given cpuset before it wakes up. The issue is evident in
many (if not most) container runtimes, and has been reproduced
with both crun and runc (and therefore docker and all its "derivatives"),
and with both cgroup v1 and v2.

Fixes: 3d30544f0212 ("sched/fair: Apply more PELT fixes")
Fixes: 039ae8bcf7a5 ("sched/fair: Fix O(nr_cgroups) in the load balancing path")
Signed-off-by: Odin Ugedal <odin@uged.al>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210501141950.23622-2-odin@uged.al
---
 kernel/sched/fair.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1d75af1ecfb4..20aa234ffe04 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -10878,16 +10878,22 @@ static void propagate_entity_cfs_rq(struct sched_entity *se)
 {
 	struct cfs_rq *cfs_rq;
 
+	list_add_leaf_cfs_rq(cfs_rq_of(se));
+
 	/* Start to propagate at parent */
 	se = se->parent;
 
 	for_each_sched_entity(se) {
 		cfs_rq = cfs_rq_of(se);
 
-		if (cfs_rq_throttled(cfs_rq))
-			break;
+		if (!cfs_rq_throttled(cfs_rq)){
+			update_load_avg(cfs_rq, se, UPDATE_TG);
+			list_add_leaf_cfs_rq(cfs_rq);
+			continue;
+		}
 
-		update_load_avg(cfs_rq, se, UPDATE_TG);
+		if (list_add_leaf_cfs_rq(cfs_rq))
+			break;
 	}
 }
 #else
-- 
cgit v1.2.3


From e10de314287c2c14b0e6f0e3e961975ce2f4a83d Mon Sep 17 00:00:00 2001
From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Date: Tue, 4 May 2021 01:52:36 -0500
Subject: x86/events/amd/iommu: Fix invalid Perf result due to IOMMU PMC
 power-gating

On certain AMD platforms, when the IOMMU performance counter source
(csource) field is zero, power-gating for the counter is enabled, which
prevents write access and returns zero for read access.

This can cause invalid perf result especially when event multiplexing
is needed (i.e. more number of events than available counters) since
the current logic keeps track of the previously read counter value,
and subsequently re-program the counter to continue counting the event.
With power-gating enabled, we cannot gurantee successful re-programming
of the counter.

Workaround this issue by :

1. Modifying the ordering of setting/reading counters and enabing/
   disabling csources to only access the counter when the csource
   is set to non-zero.

2. Since AMD IOMMU PMU does not support interrupt mode, the logic
   can be simplified to always start counting with value zero,
   and accumulate the counter value when stopping without the need
   to keep track and reprogram the counter with the previously read
   counter value.

This has been tested on systems with and without power-gating.

Fixes: 994d6608efe4 ("iommu/amd: Remove performance counter pre-initialization test")
Suggested-by: Alexander Monakov <amonakov@ispras.ru>
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210504065236.4415-1-suravee.suthikulpanit@amd.com
---
 arch/x86/events/amd/iommu.c | 47 +++++++++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 21 deletions(-)

diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
index 6a98a7651621..2da6139b0977 100644
--- a/arch/x86/events/amd/iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -18,8 +18,6 @@
 #include "../perf_event.h"
 #include "iommu.h"
 
-#define COUNTER_SHIFT		16
-
 /* iommu pmu conf masks */
 #define GET_CSOURCE(x)     ((x)->conf & 0xFFULL)
 #define GET_DEVID(x)       (((x)->conf >> 8)  & 0xFFFFULL)
@@ -285,22 +283,31 @@ static void perf_iommu_start(struct perf_event *event, int flags)
 	WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
 	hwc->state = 0;
 
+	/*
+	 * To account for power-gating, which prevents write to
+	 * the counter, we need to enable the counter
+	 * before setting up counter register.
+	 */
+	perf_iommu_enable_event(event);
+
 	if (flags & PERF_EF_RELOAD) {
-		u64 prev_raw_count = local64_read(&hwc->prev_count);
+		u64 count = 0;
 		struct amd_iommu *iommu = perf_event_2_iommu(event);
 
+		/*
+		 * Since the IOMMU PMU only support counting mode,
+		 * the counter always start with value zero.
+		 */
 		amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr,
-				     IOMMU_PC_COUNTER_REG, &prev_raw_count);
+				     IOMMU_PC_COUNTER_REG, &count);
 	}
 
-	perf_iommu_enable_event(event);
 	perf_event_update_userpage(event);
-
 }
 
 static void perf_iommu_read(struct perf_event *event)
 {
-	u64 count, prev, delta;
+	u64 count;
 	struct hw_perf_event *hwc = &event->hw;
 	struct amd_iommu *iommu = perf_event_2_iommu(event);
 
@@ -311,14 +318,11 @@ static void perf_iommu_read(struct perf_event *event)
 	/* IOMMU pc counter register is only 48 bits */
 	count &= GENMASK_ULL(47, 0);
 
-	prev = local64_read(&hwc->prev_count);
-	if (local64_cmpxchg(&hwc->prev_count, prev, count) != prev)
-		return;
-
-	/* Handle 48-bit counter overflow */
-	delta = (count << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
-	delta >>= COUNTER_SHIFT;
-	local64_add(delta, &event->count);
+	/*
+	 * Since the counter always start with value zero,
+	 * simply just accumulate the count for the event.
+	 */
+	local64_add(count, &event->count);
 }
 
 static void perf_iommu_stop(struct perf_event *event, int flags)
@@ -328,15 +332,16 @@ static void perf_iommu_stop(struct perf_event *event, int flags)
 	if (hwc->state & PERF_HES_UPTODATE)
 		return;
 
+	/*
+	 * To account for power-gating, in which reading the counter would
+	 * return zero, we need to read the register before disabling.
+	 */
+	perf_iommu_read(event);
+	hwc->state |= PERF_HES_UPTODATE;
+
 	perf_iommu_disable_event(event);
 	WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
 	hwc->state |= PERF_HES_STOPPED;
-
-	if (hwc->state & PERF_HES_UPTODATE)
-		return;
-
-	perf_iommu_read(event);
-	hwc->state |= PERF_HES_UPTODATE;
 }
 
 static int perf_iommu_add(struct perf_event *event, int flags)
-- 
cgit v1.2.3


From 1139aeb1c521eb4a050920ce6c64c36c4f2a3ab7 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 5 May 2021 23:12:42 +0200
Subject: smp: Fix smp_call_function_single_async prototype

As of commit 966a967116e6 ("smp: Avoid using two cache lines for struct
call_single_data"), the smp code prefers 32-byte aligned call_single_data
objects for performance reasons, but the block layer includes an instance
of this structure in the main 'struct request' that is more senstive
to size than to performance here, see 4ccafe032005 ("block: unalign
call_single_data in struct request").

The result is a violation of the calling conventions that clang correctly
points out:

block/blk-mq.c:630:39: warning: passing 8-byte aligned argument to 32-byte aligned parameter 2 of 'smp_call_function_single_async' may result in an unaligned pointer access [-Walign-mismatch]
                smp_call_function_single_async(cpu, &rq->csd);

It does seem that the usage of the call_single_data without cache line
alignment should still be allowed by the smp code, so just change the
function prototype so it accepts both, but leave the default alignment
unchanged for the other users. This seems better to me than adding
a local hack to shut up an otherwise correct warning in the caller.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Jens Axboe <axboe@kernel.dk>
Link: https://lkml.kernel.org/r/20210505211300.3174456-1-arnd@kernel.org
---
 include/linux/smp.h |  2 +-
 kernel/smp.c        | 26 +++++++++++++-------------
 kernel/up.c         |  2 +-
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 84a0b4828f66..f0d3ef654207 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -53,7 +53,7 @@ int smp_call_function_single(int cpuid, smp_call_func_t func, void *info,
 void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
 			   void *info, bool wait, const struct cpumask *mask);
 
-int smp_call_function_single_async(int cpu, call_single_data_t *csd);
+int smp_call_function_single_async(int cpu, struct __call_single_data *csd);
 
 /*
  * Call a function on all processors
diff --git a/kernel/smp.c b/kernel/smp.c
index e21074900006..52bf159ec400 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -211,7 +211,7 @@ static u64 cfd_seq_inc(unsigned int src, unsigned int dst, unsigned int type)
 	} while (0)
 
 /* Record current CSD work for current CPU, NULL to erase. */
-static void __csd_lock_record(call_single_data_t *csd)
+static void __csd_lock_record(struct __call_single_data *csd)
 {
 	if (!csd) {
 		smp_mb(); /* NULL cur_csd after unlock. */
@@ -226,13 +226,13 @@ static void __csd_lock_record(call_single_data_t *csd)
 		  /* Or before unlock, as the case may be. */
 }
 
-static __always_inline void csd_lock_record(call_single_data_t *csd)
+static __always_inline void csd_lock_record(struct __call_single_data *csd)
 {
 	if (static_branch_unlikely(&csdlock_debug_enabled))
 		__csd_lock_record(csd);
 }
 
-static int csd_lock_wait_getcpu(call_single_data_t *csd)
+static int csd_lock_wait_getcpu(struct __call_single_data *csd)
 {
 	unsigned int csd_type;
 
@@ -282,7 +282,7 @@ static const char *csd_lock_get_type(unsigned int type)
 	return (type >= ARRAY_SIZE(seq_type)) ? "?" : seq_type[type];
 }
 
-static void csd_lock_print_extended(call_single_data_t *csd, int cpu)
+static void csd_lock_print_extended(struct __call_single_data *csd, int cpu)
 {
 	struct cfd_seq_local *seq = &per_cpu(cfd_seq_local, cpu);
 	unsigned int srccpu = csd->node.src;
@@ -321,7 +321,7 @@ static void csd_lock_print_extended(call_single_data_t *csd, int cpu)
  * the CSD_TYPE_SYNC/ASYNC types provide the destination CPU,
  * so waiting on other types gets much less information.
  */
-static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id)
+static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 *ts1, int *bug_id)
 {
 	int cpu = -1;
 	int cpux;
@@ -387,7 +387,7 @@ static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, in
  * previous function call. For multi-cpu calls its even more interesting
  * as we'll have to ensure no other cpu is observing our csd.
  */
-static void __csd_lock_wait(call_single_data_t *csd)
+static void __csd_lock_wait(struct __call_single_data *csd)
 {
 	int bug_id = 0;
 	u64 ts0, ts1;
@@ -401,7 +401,7 @@ static void __csd_lock_wait(call_single_data_t *csd)
 	smp_acquire__after_ctrl_dep();
 }
 
-static __always_inline void csd_lock_wait(call_single_data_t *csd)
+static __always_inline void csd_lock_wait(struct __call_single_data *csd)
 {
 	if (static_branch_unlikely(&csdlock_debug_enabled)) {
 		__csd_lock_wait(csd);
@@ -431,17 +431,17 @@ static void __smp_call_single_queue_debug(int cpu, struct llist_node *node)
 #else
 #define cfd_seq_store(var, src, dst, type)
 
-static void csd_lock_record(call_single_data_t *csd)
+static void csd_lock_record(struct __call_single_data *csd)
 {
 }
 
-static __always_inline void csd_lock_wait(call_single_data_t *csd)
+static __always_inline void csd_lock_wait(struct __call_single_data *csd)
 {
 	smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK));
 }
 #endif
 
-static __always_inline void csd_lock(call_single_data_t *csd)
+static __always_inline void csd_lock(struct __call_single_data *csd)
 {
 	csd_lock_wait(csd);
 	csd->node.u_flags |= CSD_FLAG_LOCK;
@@ -454,7 +454,7 @@ static __always_inline void csd_lock(call_single_data_t *csd)
 	smp_wmb();
 }
 
-static __always_inline void csd_unlock(call_single_data_t *csd)
+static __always_inline void csd_unlock(struct __call_single_data *csd)
 {
 	WARN_ON(!(csd->node.u_flags & CSD_FLAG_LOCK));
 
@@ -501,7 +501,7 @@ void __smp_call_single_queue(int cpu, struct llist_node *node)
  * for execution on the given CPU. data must already have
  * ->func, ->info, and ->flags set.
  */
-static int generic_exec_single(int cpu, call_single_data_t *csd)
+static int generic_exec_single(int cpu, struct __call_single_data *csd)
 {
 	if (cpu == smp_processor_id()) {
 		smp_call_func_t func = csd->func;
@@ -784,7 +784,7 @@ EXPORT_SYMBOL(smp_call_function_single);
  * NOTE: Be careful, there is unfortunately no current debugging facility to
  * validate the correctness of this serialization.
  */
-int smp_call_function_single_async(int cpu, call_single_data_t *csd)
+int smp_call_function_single_async(int cpu, struct __call_single_data *csd)
 {
 	int err = 0;
 
diff --git a/kernel/up.c b/kernel/up.c
index bf20b4a9af60..c7321307923a 100644
--- a/kernel/up.c
+++ b/kernel/up.c
@@ -25,7 +25,7 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
 }
 EXPORT_SYMBOL(smp_call_function_single);
 
-int smp_call_function_single_async(int cpu, call_single_data_t *csd)
+int smp_call_function_single_async(int cpu, struct __call_single_data *csd)
 {
 	unsigned long flags;
 
-- 
cgit v1.2.3


From 28ce0e70ecc30cc7d558a0304e6b816d70848f9a Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Mon, 26 Apr 2021 14:50:17 -0400
Subject: locking/qrwlock: Cleanup queued_write_lock_slowpath()

Make the code more readable by replacing the atomic_cmpxchg_acquire()
by an equivalent atomic_try_cmpxchg_acquire() and change atomic_add()
to atomic_or().

For architectures that use qrwlock, I do not find one that has an
atomic_add() defined but not an atomic_or().  I guess it should be fine
by changing atomic_add() to atomic_or().

Note that the previous use of atomic_add() isn't wrong as only one
writer that is the wait_lock owner can set the waiting flag and the
flag will be cleared later on when acquiring the write lock.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lkml.kernel.org/r/20210426185017.19815-1-longman@redhat.com
---
 kernel/locking/qrwlock.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index b94f3831e963..ec36b73f4733 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -66,12 +66,12 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
 	arch_spin_lock(&lock->wait_lock);
 
 	/* Try to acquire the lock directly if no reader is present */
-	if (!atomic_read(&lock->cnts) &&
-	    (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0))
+	if (!(cnts = atomic_read(&lock->cnts)) &&
+	    atomic_try_cmpxchg_acquire(&lock->cnts, &cnts, _QW_LOCKED))
 		goto unlock;
 
 	/* Set the waiting flag to notify readers that a writer is pending */
-	atomic_add(_QW_WAITING, &lock->cnts);
+	atomic_or(_QW_WAITING, &lock->cnts);
 
 	/* When no more readers or writers, set the locked flag */
 	do {
-- 
cgit v1.2.3


From 4fbf5d6837bf81fd7a27d771358f4ee6c4f243f8 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 22 Apr 2021 21:44:18 +0200
Subject: Revert 337f13046ff0 ("futex: Allow FUTEX_CLOCK_REALTIME with
 FUTEX_WAIT op")

The FUTEX_WAIT operand has historically a relative timeout which means that
the clock id is irrelevant as relative timeouts on CLOCK_REALTIME are not
subject to wall clock changes and therefore are mapped by the kernel to
CLOCK_MONOTONIC for simplicity.

If a caller would set FUTEX_CLOCK_REALTIME for FUTEX_WAIT the timeout is
still treated relative vs. CLOCK_MONOTONIC and then the wait arms that
timeout based on CLOCK_REALTIME which is broken and obviously has never
been used or even tested.

Reject any attempt to use FUTEX_CLOCK_REALTIME with FUTEX_WAIT again.

The desired functionality can be achieved with FUTEX_WAIT_BITSET and a
FUTEX_BITSET_MATCH_ANY argument.

Fixes: 337f13046ff0 ("futex: Allow FUTEX_CLOCK_REALTIME with FUTEX_WAIT op")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210422194704.834797921@linutronix.de
---
 kernel/futex.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index c98b825da9cf..47402008a430 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -3710,8 +3710,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 
 	if (op & FUTEX_CLOCK_REALTIME) {
 		flags |= FLAGS_CLOCKRT;
-		if (cmd != FUTEX_WAIT && cmd != FUTEX_WAIT_BITSET && \
-		    cmd != FUTEX_WAIT_REQUEUE_PI)
+		if (cmd != FUTEX_WAIT_BITSET &&	cmd != FUTEX_WAIT_REQUEUE_PI)
 			return -ENOSYS;
 	}
 
-- 
cgit v1.2.3


From cdf78db4070967869e4d027c11f4dd825d8f815a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 22 Apr 2021 21:44:19 +0200
Subject: futex: Do not apply time namespace adjustment on FUTEX_LOCK_PI

FUTEX_LOCK_PI does not require to have the FUTEX_CLOCK_REALTIME bit set
because it has been using CLOCK_REALTIME based absolute timeouts
forever. Due to that, the time namespace adjustment which is applied when
FUTEX_CLOCK_REALTIME is not set, will wrongly take place for FUTEX_LOCK_PI
and wreckage the timeout.

Exclude it from that procedure.

Fixes: c2f7d08cccf4 ("futex: Adjust absolute futex timeouts with per time namespace offset")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210422194704.984540159@linutronix.de
---
 kernel/futex.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index 47402008a430..b0f53045f206 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -3780,7 +3780,7 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
 		t = timespec64_to_ktime(ts);
 		if (cmd == FUTEX_WAIT)
 			t = ktime_add_safe(ktime_get(), t);
-		else if (!(op & FUTEX_CLOCK_REALTIME))
+		else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
 			t = timens_ktime_to_host(CLOCK_MONOTONIC, t);
 		tp = &t;
 	}
@@ -3974,7 +3974,7 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
 		t = timespec64_to_ktime(ts);
 		if (cmd == FUTEX_WAIT)
 			t = ktime_add_safe(ktime_get(), t);
-		else if (!(op & FUTEX_CLOCK_REALTIME))
+		else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
 			t = timens_ktime_to_host(CLOCK_MONOTONIC, t);
 		tp = &t;
 	}
-- 
cgit v1.2.3


From b097d5ed33561507eeffc77120a8c16c2f0f2c4c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 22 Apr 2021 21:44:20 +0200
Subject: futex: Get rid of the val2 conditional dance

There is no point in checking which FUTEX operand treats the utime pointer
as 'val2' argument because that argument to do_futex() is only used by
exactly these operands.

So just handing it in unconditionally is not making any difference, but
removes a lot of pointless gunk.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210422194705.125957049@linutronix.de
---
 kernel/futex.c | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index b0f53045f206..4ddfdce325ad 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -3764,7 +3764,6 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
 {
 	struct timespec64 ts;
 	ktime_t t, *tp = NULL;
-	u32 val2 = 0;
 	int cmd = op & FUTEX_CMD_MASK;
 
 	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
@@ -3784,15 +3783,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
 			t = timens_ktime_to_host(CLOCK_MONOTONIC, t);
 		tp = &t;
 	}
-	/*
-	 * requeue parameter in 'utime' if cmd == FUTEX_*_REQUEUE_*.
-	 * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP.
-	 */
-	if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
-	    cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
-		val2 = (u32) (unsigned long) utime;
 
-	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
+	return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
 }
 
 #ifdef CONFIG_COMPAT
@@ -3960,7 +3952,6 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
 {
 	struct timespec64 ts;
 	ktime_t t, *tp = NULL;
-	int val2 = 0;
 	int cmd = op & FUTEX_CMD_MASK;
 
 	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
@@ -3978,11 +3969,8 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
 			t = timens_ktime_to_host(CLOCK_MONOTONIC, t);
 		tp = &t;
 	}
-	if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
-	    cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
-		val2 = (int) (unsigned long) utime;
 
-	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
+	return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
 }
 #endif /* CONFIG_COMPAT_32BIT_TIME */
 
-- 
cgit v1.2.3


From 51cf94d16860a324e97d1b670d88f1f2b643bc32 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 22 Apr 2021 21:44:21 +0200
Subject: futex: Make syscall entry points less convoluted

The futex and the compat syscall entry points do pretty much the same
except for the timespec data types and the corresponding copy from
user function.

Split out the rest into inline functions and share the functionality.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210422194705.244476369@linutronix.de
---
 kernel/futex.c | 63 ++++++++++++++++++++++++++++++++++------------------------
 1 file changed, 37 insertions(+), 26 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index 4ddfdce325ad..4938a00bc785 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -3757,30 +3757,48 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 	return -ENOSYS;
 }
 
+static __always_inline bool futex_cmd_has_timeout(u32 cmd)
+{
+	switch (cmd) {
+	case FUTEX_WAIT:
+	case FUTEX_LOCK_PI:
+	case FUTEX_WAIT_BITSET:
+	case FUTEX_WAIT_REQUEUE_PI:
+		return true;
+	}
+	return false;
+}
+
+static __always_inline int
+futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t)
+{
+	if (!timespec64_valid(ts))
+		return -EINVAL;
+
+	*t = timespec64_to_ktime(*ts);
+	if (cmd == FUTEX_WAIT)
+		*t = ktime_add_safe(ktime_get(), *t);
+	else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
+		*t = timens_ktime_to_host(CLOCK_MONOTONIC, *t);
+	return 0;
+}
 
 SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
 		const struct __kernel_timespec __user *, utime,
 		u32 __user *, uaddr2, u32, val3)
 {
-	struct timespec64 ts;
+	int ret, cmd = op & FUTEX_CMD_MASK;
 	ktime_t t, *tp = NULL;
-	int cmd = op & FUTEX_CMD_MASK;
+	struct timespec64 ts;
 
-	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
-		      cmd == FUTEX_WAIT_BITSET ||
-		      cmd == FUTEX_WAIT_REQUEUE_PI)) {
+	if (utime && futex_cmd_has_timeout(cmd)) {
 		if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
 			return -EFAULT;
 		if (get_timespec64(&ts, utime))
 			return -EFAULT;
-		if (!timespec64_valid(&ts))
-			return -EINVAL;
-
-		t = timespec64_to_ktime(ts);
-		if (cmd == FUTEX_WAIT)
-			t = ktime_add_safe(ktime_get(), t);
-		else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
-			t = timens_ktime_to_host(CLOCK_MONOTONIC, t);
+		ret = futex_init_timeout(cmd, op, &ts, &t);
+		if (ret)
+			return ret;
 		tp = &t;
 	}
 
@@ -3950,23 +3968,16 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
 		const struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
 		u32, val3)
 {
-	struct timespec64 ts;
+	int ret, cmd = op & FUTEX_CMD_MASK;
 	ktime_t t, *tp = NULL;
-	int cmd = op & FUTEX_CMD_MASK;
+	struct timespec64 ts;
 
-	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
-		      cmd == FUTEX_WAIT_BITSET ||
-		      cmd == FUTEX_WAIT_REQUEUE_PI)) {
+	if (utime && futex_cmd_has_timeout(cmd)) {
 		if (get_old_timespec32(&ts, utime))
 			return -EFAULT;
-		if (!timespec64_valid(&ts))
-			return -EINVAL;
-
-		t = timespec64_to_ktime(ts);
-		if (cmd == FUTEX_WAIT)
-			t = ktime_add_safe(ktime_get(), t);
-		else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
-			t = timens_ktime_to_host(CLOCK_MONOTONIC, t);
+		ret = futex_init_timeout(cmd, op, &ts, &t);
+		if (ret)
+			return ret;
 		tp = &t;
 	}
 
-- 
cgit v1.2.3


From 3b80d106e110d39d3f678954d3b55078669cf07e Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Thu, 6 May 2021 14:43:49 +0200
Subject: samples/bpf: Consider frame size in tx_only of xdpsock sample

Fix the tx_only micro-benchmark in xdpsock to take frame size into
consideration. It was hardcoded to the default value of frame_size
which is 4K. Changing this on the command line to 2K made half of the
packets illegal as they were outside the umem and were therefore
discarded by the kernel.

Fixes: 46738f73ea4f ("samples/bpf: add use of need_wakeup flag in xdpsock")
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20210506124349.6666-1-magnus.karlsson@gmail.com
---
 samples/bpf/xdpsock_user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index aa696854be78..53e300f860bb 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -1255,7 +1255,7 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size)
 	for (i = 0; i < batch_size; i++) {
 		struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx,
 								  idx + i);
-		tx_desc->addr = (*frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
+		tx_desc->addr = (*frame_nb + i) * opt_xsk_frame_size;
 		tx_desc->len = PKT_SIZE;
 	}
 
-- 
cgit v1.2.3


From d9aa6571b28ba0022de1e48801ff03a1854c7ef2 Mon Sep 17 00:00:00 2001
From: Kuogee Hsieh <khsieh@codeaurora.org>
Date: Wed, 21 Apr 2021 16:37:35 -0700
Subject: drm/msm/dp: check sink_count before update is_connected status

Link status is different from display connected status in the case
of something like an Apple dongle where the type-c plug can be
connected, and therefore the link is connected, but no sink is
connected until an HDMI cable is plugged into the dongle.
The sink_count of DPCD of dongle will increase to 1 once an HDMI
cable is plugged into the dongle so that display connected status
will become true. This checking also apply at pm_resume.

Changes in v4:
-- none

Fixes: 94e58e2d06e3 ("drm/msm/dp: reset dp controller only at boot up and pm_resume")
Reported-by: Stephen Boyd <swboyd@chromium.org>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Tested-by: Stephen Boyd <swboyd@chromium.org>
Signed-off-by: Kuogee Hsieh <khsieh@codeaurora.org>
Fixes: 8ede2ecc3e5e ("drm/msm/dp: Add DP compliance tests on Snapdragon Chipsets")
Link: https://lore.kernel.org/r/1619048258-8717-2-git-send-email-khsieh@codeaurora.org
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/dp/dp_display.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index 5a39da6e1eaf..0ba71c7a8dd4 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -586,10 +586,8 @@ static int dp_connect_pending_timeout(struct dp_display_private *dp, u32 data)
 	mutex_lock(&dp->event_mutex);
 
 	state = dp->hpd_state;
-	if (state == ST_CONNECT_PENDING) {
-		dp_display_enable(dp, 0);
+	if (state == ST_CONNECT_PENDING)
 		dp->hpd_state = ST_CONNECTED;
-	}
 
 	mutex_unlock(&dp->event_mutex);
 
@@ -669,10 +667,8 @@ static int dp_disconnect_pending_timeout(struct dp_display_private *dp, u32 data
 	mutex_lock(&dp->event_mutex);
 
 	state =  dp->hpd_state;
-	if (state == ST_DISCONNECT_PENDING) {
-		dp_display_disable(dp, 0);
+	if (state == ST_DISCONNECT_PENDING)
 		dp->hpd_state = ST_DISCONNECTED;
-	}
 
 	mutex_unlock(&dp->event_mutex);
 
@@ -1272,7 +1268,12 @@ static int dp_pm_resume(struct device *dev)
 
 	status = dp_catalog_link_is_connected(dp->catalog);
 
-	if (status)
+	/*
+	 * can not declared display is connected unless
+	 * HDMI cable is plugged in and sink_count of
+	 * dongle become 1
+	 */
+	if (status && dp->link->sink_count)
 		dp->dp_display.is_connected = true;
 	else
 		dp->dp_display.is_connected = false;
-- 
cgit v1.2.3


From f2f46b878777e0d3f885c7ddad48f477b4dea247 Mon Sep 17 00:00:00 2001
From: Kuogee Hsieh <khsieh@codeaurora.org>
Date: Wed, 21 Apr 2021 16:37:36 -0700
Subject: drm/msm/dp: initialize audio_comp when audio starts

Initialize audio_comp when audio starts and wait for audio_comp at
dp_display_disable(). This will take care of both dongle unplugged
and display off (suspend) cases.

Changes in v2:
-- add dp_display_signal_audio_start()

Changes in v3:
-- restore dp_display_handle_plugged_change() at dp_hpd_unplug_handle().

Changes in v4:
-- none

Signed-off-by: Kuogee Hsieh <khsieh@codeaurora.org>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Tested-by: Stephen Boyd <swboyd@chromium.org>
Fixes: c703d5789590 ("drm/msm/dp: trigger unplug event in msm_dp_display_disable")
Link: https://lore.kernel.org/r/1619048258-8717-3-git-send-email-khsieh@codeaurora.org
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/dp/dp_audio.c   |  1 +
 drivers/gpu/drm/msm/dp/dp_display.c | 11 +++++++++--
 drivers/gpu/drm/msm/dp/dp_display.h |  1 +
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/dp/dp_audio.c b/drivers/gpu/drm/msm/dp/dp_audio.c
index 82a8673ab8da..d7e4a39a904e 100644
--- a/drivers/gpu/drm/msm/dp/dp_audio.c
+++ b/drivers/gpu/drm/msm/dp/dp_audio.c
@@ -527,6 +527,7 @@ int dp_audio_hw_params(struct device *dev,
 	dp_audio_setup_acr(audio);
 	dp_audio_safe_to_exit_level(audio);
 	dp_audio_enable(audio, true);
+	dp_display_signal_audio_start(dp_display);
 	dp_display->audio_enabled = true;
 
 end:
diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index 0ba71c7a8dd4..1784e119269b 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -178,6 +178,15 @@ static int dp_del_event(struct dp_display_private *dp_priv, u32 event)
 	return 0;
 }
 
+void dp_display_signal_audio_start(struct msm_dp *dp_display)
+{
+	struct dp_display_private *dp;
+
+	dp = container_of(dp_display, struct dp_display_private, dp_display);
+
+	reinit_completion(&dp->audio_comp);
+}
+
 void dp_display_signal_audio_complete(struct msm_dp *dp_display)
 {
 	struct dp_display_private *dp;
@@ -649,7 +658,6 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data)
 	dp_add_event(dp, EV_DISCONNECT_PENDING_TIMEOUT, 0, DP_TIMEOUT_5_SECOND);
 
 	/* signal the disconnect event early to ensure proper teardown */
-	reinit_completion(&dp->audio_comp);
 	dp_display_handle_plugged_change(g_dp_display, false);
 
 	dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_PLUG_INT_MASK |
@@ -894,7 +902,6 @@ static int dp_display_disable(struct dp_display_private *dp, u32 data)
 	/* wait only if audio was enabled */
 	if (dp_display->audio_enabled) {
 		/* signal the disconnect event */
-		reinit_completion(&dp->audio_comp);
 		dp_display_handle_plugged_change(dp_display, false);
 		if (!wait_for_completion_timeout(&dp->audio_comp,
 				HZ * 5))
diff --git a/drivers/gpu/drm/msm/dp/dp_display.h b/drivers/gpu/drm/msm/dp/dp_display.h
index 6092ba1ed85e..5173c89eedf7 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.h
+++ b/drivers/gpu/drm/msm/dp/dp_display.h
@@ -34,6 +34,7 @@ int dp_display_get_modes(struct msm_dp *dp_display,
 int dp_display_request_irq(struct msm_dp *dp_display);
 bool dp_display_check_video_test(struct msm_dp *dp_display);
 int dp_display_get_test_bpp(struct msm_dp *dp_display);
+void dp_display_signal_audio_start(struct msm_dp *dp_display);
 void dp_display_signal_audio_complete(struct msm_dp *dp_display);
 
 #endif /* _DP_DISPLAY_H_ */
-- 
cgit v1.2.3


From 31379397dcc364a59ce764fabb131b645c43e340 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 5 May 2021 15:25:29 +0200
Subject: bpf: Forbid trampoline attach for functions with variable arguments

We can't currently allow to attach functions with variable arguments.
The problem is that we should save all the registers for arguments,
which is probably doable, but if caller uses more than 6 arguments,
we need stack data, which will be wrong, because of the extra stack
frame we do in bpf trampoline, so we could crash.

Also currently there's malformed trampoline code generated for such
functions at the moment as described in:

  https://lore.kernel.org/bpf/20210429212834.82621-1-jolsa@kernel.org/

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210505132529.401047-1-jolsa@kernel.org
---
 kernel/bpf/btf.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 0600ed325fa0..f982a9f0dbc4 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -5206,6 +5206,12 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
 	m->ret_size = ret;
 
 	for (i = 0; i < nargs; i++) {
+		if (i == nargs - 1 && args[i].type == 0) {
+			bpf_log(log,
+				"The function %s with variable args is unsupported.\n",
+				tname);
+			return -EINVAL;
+		}
 		ret = __get_type_size(btf, args[i].type, &t);
 		if (ret < 0) {
 			bpf_log(log,
@@ -5213,6 +5219,12 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
 				tname, i, btf_kind_str[BTF_INFO_KIND(t->info)]);
 			return -EINVAL;
 		}
+		if (ret == 0) {
+			bpf_log(log,
+				"The function %s has malformed void argument.\n",
+				tname);
+			return -EINVAL;
+		}
 		m->arg_size[i] = ret;
 	}
 	m->nr_args = nargs;
-- 
cgit v1.2.3


From 8822702f6e4c8917c83ba79e0ebf2c8c218910d4 Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Fri, 7 May 2021 10:44:52 +0800
Subject: ALSA: hda/realtek: reset eapd coeff to default value for alc287

Ubuntu users reported an audio bug on the Lenovo Yoga Slim 7 14IIL05,
he installed dual OS (Windows + Linux), if he booted to the Linux
from Windows, the Speaker can't work well, it has crackling noise,
if he poweroff the machine first after Windows, the Speaker worked
well.

Before rebooting or shutdown from Windows, the Windows changes the
codec eapd coeff value, but the BIOS doesn't re-initialize its value,
when booting into the Linux from Windows, the eapd coeff value is not
correct. To fix it, set the codec default value to that coeff register
in the alsa driver.

BugLink: http://bugs.launchpad.net/bugs/1925057
Suggested-by: Kailang Yang <kailang@realtek.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Hui Wang <hui.wang@canonical.com>
Link: https://lore.kernel.org/r/20210507024452.8300-1-hui.wang@canonical.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 6d58f24c9702..a5f3e78ec04e 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -395,7 +395,6 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
 	case 0x10ec0282:
 	case 0x10ec0283:
 	case 0x10ec0286:
-	case 0x10ec0287:
 	case 0x10ec0288:
 	case 0x10ec0285:
 	case 0x10ec0298:
@@ -406,6 +405,10 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
 	case 0x10ec0275:
 		alc_update_coef_idx(codec, 0xe, 0, 1<<0);
 		break;
+	case 0x10ec0287:
+		alc_update_coef_idx(codec, 0x10, 1<<9, 0);
+		alc_write_coef_idx(codec, 0x8, 0x4ab7);
+		break;
 	case 0x10ec0293:
 		alc_update_coef_idx(codec, 0xa, 1<<13, 0);
 		break;
-- 
cgit v1.2.3


From 8b79feffeca28c5459458fe78676b081e87c93a4 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 14 Apr 2021 14:35:41 +0200
Subject: x86/kvm: Teardown PV features on boot CPU as well

Various PV features (Async PF, PV EOI, steal time) work through memory
shared with hypervisor and when we restore from hibernation we must
properly teardown all these features to make sure hypervisor doesn't
write to stale locations after we jump to the previously hibernated kernel
(which can try to place anything there). For secondary CPUs the job is
already done by kvm_cpu_down_prepare(), register syscore ops to do
the same for boot CPU.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210414123544.1060604-3-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kernel/kvm.c | 56 ++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 40 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index dc440bb69222..9d5f96321c7f 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -26,6 +26,7 @@
 #include <linux/kprobes.h>
 #include <linux/nmi.h>
 #include <linux/swait.h>
+#include <linux/syscore_ops.h>
 #include <asm/timer.h>
 #include <asm/cpu.h>
 #include <asm/traps.h>
@@ -451,6 +452,25 @@ static void __init sev_map_percpu_data(void)
 	}
 }
 
+static void kvm_guest_cpu_offline(void)
+{
+	kvm_disable_steal_time();
+	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
+		wrmsrl(MSR_KVM_PV_EOI_EN, 0);
+	kvm_pv_disable_apf();
+	apf_task_wake_all();
+}
+
+static int kvm_cpu_online(unsigned int cpu)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	kvm_guest_cpu_init();
+	local_irq_restore(flags);
+	return 0;
+}
+
 #ifdef CONFIG_SMP
 
 static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);
@@ -635,32 +655,34 @@ static void __init kvm_smp_prepare_boot_cpu(void)
 	kvm_spinlock_init();
 }
 
-static void kvm_guest_cpu_offline(void)
+static int kvm_cpu_down_prepare(unsigned int cpu)
 {
-	kvm_disable_steal_time();
-	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
-		wrmsrl(MSR_KVM_PV_EOI_EN, 0);
-	kvm_pv_disable_apf();
-	apf_task_wake_all();
-}
+	unsigned long flags;
 
-static int kvm_cpu_online(unsigned int cpu)
-{
-	local_irq_disable();
-	kvm_guest_cpu_init();
-	local_irq_enable();
+	local_irq_save(flags);
+	kvm_guest_cpu_offline();
+	local_irq_restore(flags);
 	return 0;
 }
 
-static int kvm_cpu_down_prepare(unsigned int cpu)
+#endif
+
+static int kvm_suspend(void)
 {
-	local_irq_disable();
 	kvm_guest_cpu_offline();
-	local_irq_enable();
+
 	return 0;
 }
 
-#endif
+static void kvm_resume(void)
+{
+	kvm_cpu_online(raw_smp_processor_id());
+}
+
+static struct syscore_ops kvm_syscore_ops = {
+	.suspend	= kvm_suspend,
+	.resume		= kvm_resume,
+};
 
 static void __init kvm_guest_init(void)
 {
@@ -704,6 +726,8 @@ static void __init kvm_guest_init(void)
 	kvm_guest_cpu_init();
 #endif
 
+	register_syscore_ops(&kvm_syscore_ops);
+
 	/*
 	 * Hard lockup detection is enabled by default. Disable it, as guests
 	 * can get false positives too easily, for example if the host is
-- 
cgit v1.2.3


From c02027b5742b5aa804ef08a4a9db433295533046 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 14 Apr 2021 14:35:42 +0200
Subject: x86/kvm: Disable kvmclock on all CPUs on shutdown

Currenly, we disable kvmclock from machine_shutdown() hook and this
only happens for boot CPU. We need to disable it for all CPUs to
guard against memory corruption e.g. on restore from hibernate.

Note, writing '0' to kvmclock MSR doesn't clear memory location, it
just prevents hypervisor from updating the location so for the short
while after write and while CPU is still alive, the clock remains usable
and correct so we don't need to switch to some other clocksource.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210414123544.1060604-4-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_para.h | 4 ++--
 arch/x86/kernel/kvm.c           | 1 +
 arch/x86/kernel/kvmclock.c      | 5 +----
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 338119852512..9c56e0defd45 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -7,8 +7,6 @@
 #include <linux/interrupt.h>
 #include <uapi/asm/kvm_para.h>
 
-extern void kvmclock_init(void);
-
 #ifdef CONFIG_KVM_GUEST
 bool kvm_check_and_clear_guest_paused(void);
 #else
@@ -86,6 +84,8 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
 }
 
 #ifdef CONFIG_KVM_GUEST
+void kvmclock_init(void);
+void kvmclock_disable(void);
 bool kvm_para_available(void);
 unsigned int kvm_arch_para_features(void);
 unsigned int kvm_arch_para_hints(void);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 9d5f96321c7f..25dd126a3325 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -459,6 +459,7 @@ static void kvm_guest_cpu_offline(void)
 		wrmsrl(MSR_KVM_PV_EOI_EN, 0);
 	kvm_pv_disable_apf();
 	apf_task_wake_all();
+	kvmclock_disable();
 }
 
 static int kvm_cpu_online(unsigned int cpu)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index d37ed4e1d033..081686df6cd8 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -220,11 +220,9 @@ static void kvm_crash_shutdown(struct pt_regs *regs)
 }
 #endif
 
-static void kvm_shutdown(void)
+void kvmclock_disable(void)
 {
 	native_write_msr(msr_kvm_system_time, 0, 0);
-	kvm_disable_steal_time();
-	native_machine_shutdown();
 }
 
 static void __init kvmclock_init_mem(void)
@@ -351,7 +349,6 @@ void __init kvmclock_init(void)
 #endif
 	x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
 	x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
-	machine_ops.shutdown  = kvm_shutdown;
 #ifdef CONFIG_KEXEC_CORE
 	machine_ops.crash_shutdown  = kvm_crash_shutdown;
 #endif
-- 
cgit v1.2.3


From 3d6b84132d2a57b5a74100f6923a8feb679ac2ce Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 14 Apr 2021 14:35:43 +0200
Subject: x86/kvm: Disable all PV features on crash

Crash shutdown handler only disables kvmclock and steal time, other PV
features remain active so we risk corrupting memory or getting some
side-effects in kdump kernel. Move crash handler to kvm.c and unify
with CPU offline.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210414123544.1060604-5-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_para.h |  6 ------
 arch/x86/kernel/kvm.c           | 44 ++++++++++++++++++++++++++++++-----------
 arch/x86/kernel/kvmclock.c      | 21 --------------------
 3 files changed, 32 insertions(+), 39 deletions(-)

diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 9c56e0defd45..69299878b200 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -92,7 +92,6 @@ unsigned int kvm_arch_para_hints(void);
 void kvm_async_pf_task_wait_schedule(u32 token);
 void kvm_async_pf_task_wake(u32 token);
 u32 kvm_read_and_reset_apf_flags(void);
-void kvm_disable_steal_time(void);
 bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token);
 
 DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
@@ -137,11 +136,6 @@ static inline u32 kvm_read_and_reset_apf_flags(void)
 	return 0;
 }
 
-static inline void kvm_disable_steal_time(void)
-{
-	return;
-}
-
 static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token)
 {
 	return false;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 25dd126a3325..8eb91dc0f5a8 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -38,6 +38,7 @@
 #include <asm/tlb.h>
 #include <asm/cpuidle_haltpoll.h>
 #include <asm/ptrace.h>
+#include <asm/reboot.h>
 #include <asm/svm.h>
 
 DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
@@ -375,6 +376,14 @@ static void kvm_pv_disable_apf(void)
 	pr_info("disable async PF for cpu %d\n", smp_processor_id());
 }
 
+static void kvm_disable_steal_time(void)
+{
+	if (!has_steal_clock)
+		return;
+
+	wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
+}
+
 static void kvm_pv_guest_cpu_reboot(void *unused)
 {
 	/*
@@ -417,14 +426,6 @@ static u64 kvm_steal_clock(int cpu)
 	return steal;
 }
 
-void kvm_disable_steal_time(void)
-{
-	if (!has_steal_clock)
-		return;
-
-	wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
-}
-
 static inline void __set_percpu_decrypted(void *ptr, unsigned long size)
 {
 	early_set_memory_decrypted((unsigned long) ptr, size);
@@ -452,13 +453,14 @@ static void __init sev_map_percpu_data(void)
 	}
 }
 
-static void kvm_guest_cpu_offline(void)
+static void kvm_guest_cpu_offline(bool shutdown)
 {
 	kvm_disable_steal_time();
 	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
 		wrmsrl(MSR_KVM_PV_EOI_EN, 0);
 	kvm_pv_disable_apf();
-	apf_task_wake_all();
+	if (!shutdown)
+		apf_task_wake_all();
 	kvmclock_disable();
 }
 
@@ -661,7 +663,7 @@ static int kvm_cpu_down_prepare(unsigned int cpu)
 	unsigned long flags;
 
 	local_irq_save(flags);
-	kvm_guest_cpu_offline();
+	kvm_guest_cpu_offline(false);
 	local_irq_restore(flags);
 	return 0;
 }
@@ -670,7 +672,7 @@ static int kvm_cpu_down_prepare(unsigned int cpu)
 
 static int kvm_suspend(void)
 {
-	kvm_guest_cpu_offline();
+	kvm_guest_cpu_offline(false);
 
 	return 0;
 }
@@ -685,6 +687,20 @@ static struct syscore_ops kvm_syscore_ops = {
 	.resume		= kvm_resume,
 };
 
+/*
+ * After a PV feature is registered, the host will keep writing to the
+ * registered memory location. If the guest happens to shutdown, this memory
+ * won't be valid. In cases like kexec, in which you install a new kernel, this
+ * means a random memory location will be kept being written.
+ */
+#ifdef CONFIG_KEXEC_CORE
+static void kvm_crash_shutdown(struct pt_regs *regs)
+{
+	kvm_guest_cpu_offline(true);
+	native_machine_crash_shutdown(regs);
+}
+#endif
+
 static void __init kvm_guest_init(void)
 {
 	int i;
@@ -727,6 +743,10 @@ static void __init kvm_guest_init(void)
 	kvm_guest_cpu_init();
 #endif
 
+#ifdef CONFIG_KEXEC_CORE
+	machine_ops.crash_shutdown = kvm_crash_shutdown;
+#endif
+
 	register_syscore_ops(&kvm_syscore_ops);
 
 	/*
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 081686df6cd8..ad273e5861c1 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -20,7 +20,6 @@
 #include <asm/hypervisor.h>
 #include <asm/mem_encrypt.h>
 #include <asm/x86_init.h>
-#include <asm/reboot.h>
 #include <asm/kvmclock.h>
 
 static int kvmclock __initdata = 1;
@@ -203,23 +202,6 @@ static void kvm_setup_secondary_clock(void)
 }
 #endif
 
-/*
- * After the clock is registered, the host will keep writing to the
- * registered memory location. If the guest happens to shutdown, this memory
- * won't be valid. In cases like kexec, in which you install a new kernel, this
- * means a random memory location will be kept being written. So before any
- * kind of shutdown from our side, we unregister the clock by writing anything
- * that does not have the 'enable' bit set in the msr
- */
-#ifdef CONFIG_KEXEC_CORE
-static void kvm_crash_shutdown(struct pt_regs *regs)
-{
-	native_write_msr(msr_kvm_system_time, 0, 0);
-	kvm_disable_steal_time();
-	native_machine_crash_shutdown(regs);
-}
-#endif
-
 void kvmclock_disable(void)
 {
 	native_write_msr(msr_kvm_system_time, 0, 0);
@@ -349,9 +331,6 @@ void __init kvmclock_init(void)
 #endif
 	x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
 	x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
-#ifdef CONFIG_KEXEC_CORE
-	machine_ops.crash_shutdown  = kvm_crash_shutdown;
-#endif
 	kvm_get_preset_lpj();
 
 	/*
-- 
cgit v1.2.3


From 384fc672f528d3b84eacd9a86ecf35df3363b8ba Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 14 Apr 2021 14:35:44 +0200
Subject: x86/kvm: Unify kvm_pv_guest_cpu_reboot() with kvm_guest_cpu_offline()

Simplify the code by making PV features shutdown happen in one place.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210414123544.1060604-6-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kernel/kvm.c | 42 +++++++++++++++++-------------------------
 1 file changed, 17 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 8eb91dc0f5a8..a26643dc6bd6 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -384,31 +384,6 @@ static void kvm_disable_steal_time(void)
 	wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
 }
 
-static void kvm_pv_guest_cpu_reboot(void *unused)
-{
-	/*
-	 * We disable PV EOI before we load a new kernel by kexec,
-	 * since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory.
-	 * New kernel can re-enable when it boots.
-	 */
-	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
-		wrmsrl(MSR_KVM_PV_EOI_EN, 0);
-	kvm_pv_disable_apf();
-	kvm_disable_steal_time();
-}
-
-static int kvm_pv_reboot_notify(struct notifier_block *nb,
-				unsigned long code, void *unused)
-{
-	if (code == SYS_RESTART)
-		on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block kvm_pv_reboot_nb = {
-	.notifier_call = kvm_pv_reboot_notify,
-};
-
 static u64 kvm_steal_clock(int cpu)
 {
 	u64 steal;
@@ -687,6 +662,23 @@ static struct syscore_ops kvm_syscore_ops = {
 	.resume		= kvm_resume,
 };
 
+static void kvm_pv_guest_cpu_reboot(void *unused)
+{
+	kvm_guest_cpu_offline(true);
+}
+
+static int kvm_pv_reboot_notify(struct notifier_block *nb,
+				unsigned long code, void *unused)
+{
+	if (code == SYS_RESTART)
+		on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block kvm_pv_reboot_nb = {
+	.notifier_call = kvm_pv_reboot_notify,
+};
+
 /*
  * After a PV feature is registered, the host will keep writing to the
  * registered memory location. If the guest happens to shutdown, this memory
-- 
cgit v1.2.3


From 46a63924b05f335b0765ad13dae4d2d7569f25c9 Mon Sep 17 00:00:00 2001
From: Siddharth Chandrasekaran <sidcha@amazon.de>
Date: Mon, 3 May 2021 14:00:58 +0200
Subject: doc/kvm: Fix wrong entry for KVM_CAP_X86_MSR_FILTER

The capability that exposes new ioctl KVM_X86_SET_MSR_FILTER to
userspace is specified incorrectly as the ioctl itself (instead of
KVM_CAP_X86_MSR_FILTER). This patch fixes it.

Fixes: 1a155254ff93 ("KVM: x86: Introduce MSR filtering")
Reviewed-by: Alexander Graf <graf@amazon.de>
Signed-off-by: Siddharth Chandrasekaran <sidcha@amazon.de>
Message-Id: <20210503120059.9283-1-sidcha@amazon.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/api.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 22d077562149..7fcb2fd38f42 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -4803,7 +4803,7 @@ KVM_PV_VM_VERIFY
 4.126 KVM_X86_SET_MSR_FILTER
 ----------------------------
 
-:Capability: KVM_X86_SET_MSR_FILTER
+:Capability: KVM_CAP_X86_MSR_FILTER
 :Architectures: x86
 :Type: vm ioctl
 :Parameters: struct kvm_msr_filter
@@ -6715,7 +6715,7 @@ accesses that would usually trigger a #GP by KVM into the guest will
 instead get bounced to user space through the KVM_EXIT_X86_RDMSR and
 KVM_EXIT_X86_WRMSR exit notifications.
 
-8.27 KVM_X86_SET_MSR_FILTER
+8.27 KVM_CAP_X86_MSR_FILTER
 ---------------------------
 
 :Architectures: x86
-- 
cgit v1.2.3


From f5c7e8425f18fdb9bdb7d13340651d7876890329 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Mon, 3 May 2021 17:08:51 +0200
Subject: KVM: nVMX: Always make an attempt to map eVMCS after migration

When enlightened VMCS is in use and nested state is migrated with
vmx_get_nested_state()/vmx_set_nested_state() KVM can't map evmcs
page right away: evmcs gpa is not 'struct kvm_vmx_nested_state_hdr'
and we can't read it from VP assist page because userspace may decide
to restore HV_X64_MSR_VP_ASSIST_PAGE after restoring nested state
(and QEMU, for example, does exactly that). To make sure eVMCS is
mapped /vmx_set_nested_state() raises KVM_REQ_GET_NESTED_STATE_PAGES
request.

Commit f2c7ef3ba955 ("KVM: nSVM: cancel KVM_REQ_GET_NESTED_STATE_PAGES
on nested vmexit") added KVM_REQ_GET_NESTED_STATE_PAGES clearing to
nested_vmx_vmexit() to make sure MSR permission bitmap is not switched
when an immediate exit from L2 to L1 happens right after migration (caused
by a pending event, for example). Unfortunately, in the exact same
situation we still need to have eVMCS mapped so
nested_sync_vmcs12_to_shadow() reflects changes in VMCS12 to eVMCS.

As a band-aid, restore nested_get_evmcs_page() when clearing
KVM_REQ_GET_NESTED_STATE_PAGES in nested_vmx_vmexit(). The 'fix' is far
from being ideal as we can't easily propagate possible failures and even if
we could, this is most likely already too late to do so. The whole
'KVM_REQ_GET_NESTED_STATE_PAGES' idea for mapping eVMCS after migration
seems to be fragile as we diverge too much from the 'native' path when
vmptr loading happens on vmx_set_nested_state().

Fixes: f2c7ef3ba955 ("KVM: nSVM: cancel KVM_REQ_GET_NESTED_STATE_PAGES on nested vmexit")
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210503150854.1144255-2-vkuznets@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index bced76637823..6058a65a6ede 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3098,15 +3098,8 @@ static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
 			nested_vmx_handle_enlightened_vmptrld(vcpu, false);
 
 		if (evmptrld_status == EVMPTRLD_VMFAIL ||
-		    evmptrld_status == EVMPTRLD_ERROR) {
-			pr_debug_ratelimited("%s: enlightened vmptrld failed\n",
-					     __func__);
-			vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-			vcpu->run->internal.suberror =
-				KVM_INTERNAL_ERROR_EMULATION;
-			vcpu->run->internal.ndata = 0;
+		    evmptrld_status == EVMPTRLD_ERROR)
 			return false;
-		}
 	}
 
 	return true;
@@ -3194,8 +3187,16 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
 
 static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu)
 {
-	if (!nested_get_evmcs_page(vcpu))
+	if (!nested_get_evmcs_page(vcpu)) {
+		pr_debug_ratelimited("%s: enlightened vmptrld failed\n",
+				     __func__);
+		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		vcpu->run->internal.suberror =
+			KVM_INTERNAL_ERROR_EMULATION;
+		vcpu->run->internal.ndata = 0;
+
 		return false;
+	}
 
 	if (is_guest_mode(vcpu) && !nested_get_vmcs12_pages(vcpu))
 		return false;
@@ -4435,7 +4436,15 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
 	/* Similarly, triple faults in L2 should never escape. */
 	WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu));
 
-	kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
+	if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
+		/*
+		 * KVM_REQ_GET_NESTED_STATE_PAGES is also used to map
+		 * Enlightened VMCS after migration and we still need to
+		 * do that when something is forcing L2->L1 exit prior to
+		 * the first L2 run.
+		 */
+		(void)nested_get_evmcs_page(vcpu);
+	}
 
 	/* Service the TLB flush request for L2 before switching to L1. */
 	if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
-- 
cgit v1.2.3


From 32d1b3ab588c1231dbfa9eb08819c50529ce77d7 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 5 May 2021 17:18:21 +0200
Subject: KVM: selftests: evmcs_test: Check that VMLAUNCH with bogus EVMPTR is
 causing #UD

'run->exit_reason == KVM_EXIT_SHUTDOWN' check is not ideal as we may be
getting some unexpected exception. Directly check for #UD instead.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210505151823.1341678-2-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/x86_64/evmcs_test.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index ca22ee6d19cb..b01f64ac6ce3 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -19,6 +19,14 @@
 
 #define VCPU_ID		5
 
+static int ud_count;
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+	ud_count++;
+	regs->rip += 3; /* VMLAUNCH */
+}
+
 void l2_guest_code(void)
 {
 	GUEST_SYNC(7);
@@ -71,11 +79,11 @@ void guest_code(struct vmx_pages *vmx_pages)
 	if (vmx_pages)
 		l1_guest_code(vmx_pages);
 
-	GUEST_DONE();
-
 	/* Try enlightened vmptrld with an incorrect GPA */
 	evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs);
 	GUEST_ASSERT(vmlaunch());
+	GUEST_ASSERT(ud_count == 1);
+	GUEST_DONE();
 }
 
 int main(int argc, char *argv[])
@@ -109,6 +117,10 @@ int main(int argc, char *argv[])
 	vcpu_alloc_vmx(vm, &vmx_pages_gva);
 	vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
 
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vm, VCPU_ID);
+	vm_handle_exception(vm, UD_VECTOR, guest_ud_handler);
+
 	for (stage = 1;; stage++) {
 		_vcpu_run(vm, VCPU_ID);
 		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
@@ -124,7 +136,7 @@ int main(int argc, char *argv[])
 		case UCALL_SYNC:
 			break;
 		case UCALL_DONE:
-			goto part1_done;
+			goto done;
 		default:
 			TEST_FAIL("Unknown ucall %lu", uc.cmd);
 		}
@@ -156,10 +168,6 @@ int main(int argc, char *argv[])
 			    (ulong) regs2.rdi, (ulong) regs2.rsi);
 	}
 
-part1_done:
-	_vcpu_run(vm, VCPU_ID);
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
-		    "Unexpected successful VMEnter with invalid eVMCS pointer!");
-
+done:
 	kvm_vm_free(vm);
 }
-- 
cgit v1.2.3


From c9ecafaf0113a305f5085ceb9c7a4b64ca70eae9 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 5 May 2021 17:18:22 +0200
Subject: KVM: selftests: evmcs_test: Check that VMCS12 is alway properly
 synced to eVMCS after restore

Add a test for the regression, introduced by commit f2c7ef3ba955
("KVM: nSVM: cancel KVM_REQ_GET_NESTED_STATE_PAGES on nested vmexit"). When
L2->L1 exit is forced immediately after restoring nested state,
KVM_REQ_GET_NESTED_STATE_PAGES request is cleared and VMCS12 changes
(e.g. fresh RIP) are not reflected to eVMCS. The consequent nested
vCPU run gets broken.

Utilize NMI injection to do the job.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210505151823.1341678-3-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/x86_64/evmcs_test.c | 66 ++++++++++++++++++++-----
 1 file changed, 55 insertions(+), 11 deletions(-)

diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index b01f64ac6ce3..63096cea26c6 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -18,30 +18,52 @@
 #include "vmx.h"
 
 #define VCPU_ID		5
+#define NMI_VECTOR	2
 
 static int ud_count;
 
+void enable_x2apic(void)
+{
+	uint32_t spiv_reg = APIC_BASE_MSR + (APIC_SPIV >> 4);
+
+	wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) |
+	      MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD);
+	wrmsr(spiv_reg, rdmsr(spiv_reg) | APIC_SPIV_APIC_ENABLED);
+}
+
 static void guest_ud_handler(struct ex_regs *regs)
 {
 	ud_count++;
 	regs->rip += 3; /* VMLAUNCH */
 }
 
+static void guest_nmi_handler(struct ex_regs *regs)
+{
+}
+
 void l2_guest_code(void)
 {
 	GUEST_SYNC(7);
 
 	GUEST_SYNC(8);
 
+	/* Forced exit to L1 upon restore */
+	GUEST_SYNC(9);
+
 	/* Done, exit to L1 and never come back.  */
 	vmcall();
 }
 
-void l1_guest_code(struct vmx_pages *vmx_pages)
+void guest_code(struct vmx_pages *vmx_pages)
 {
 #define L2_GUEST_STACK_SIZE 64
 	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
 
+	enable_x2apic();
+
+	GUEST_SYNC(1);
+	GUEST_SYNC(2);
+
 	enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist);
 
 	GUEST_ASSERT(vmx_pages->vmcs_gpa);
@@ -63,21 +85,22 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
 	current_evmcs->revision_id = EVMCS_VERSION;
 	GUEST_SYNC(6);
 
+	current_evmcs->pin_based_vm_exec_control |=
+		PIN_BASED_NMI_EXITING;
 	GUEST_ASSERT(!vmlaunch());
 	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
-	GUEST_SYNC(9);
+
+	/*
+	 * NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is
+	 * up-to-date (RIP points where it should and not at the beginning
+	 * of l2_guest_code(). GUEST_SYNC(9) checkes that.
+	 */
 	GUEST_ASSERT(!vmresume());
-	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-	GUEST_SYNC(10);
-}
 
-void guest_code(struct vmx_pages *vmx_pages)
-{
-	GUEST_SYNC(1);
-	GUEST_SYNC(2);
+	GUEST_SYNC(10);
 
-	if (vmx_pages)
-		l1_guest_code(vmx_pages);
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+	GUEST_SYNC(11);
 
 	/* Try enlightened vmptrld with an incorrect GPA */
 	evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs);
@@ -86,6 +109,18 @@ void guest_code(struct vmx_pages *vmx_pages)
 	GUEST_DONE();
 }
 
+void inject_nmi(struct kvm_vm *vm)
+{
+	struct kvm_vcpu_events events;
+
+	vcpu_events_get(vm, VCPU_ID, &events);
+
+	events.nmi.pending = 1;
+	events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING;
+
+	vcpu_events_set(vm, VCPU_ID, &events);
+}
+
 int main(int argc, char *argv[])
 {
 	vm_vaddr_t vmx_pages_gva = 0;
@@ -120,6 +155,9 @@ int main(int argc, char *argv[])
 	vm_init_descriptor_tables(vm);
 	vcpu_init_descriptor_tables(vm, VCPU_ID);
 	vm_handle_exception(vm, UD_VECTOR, guest_ud_handler);
+	vm_handle_exception(vm, NMI_VECTOR, guest_nmi_handler);
+
+	pr_info("Running L1 which uses EVMCS to run L2\n");
 
 	for (stage = 1;; stage++) {
 		_vcpu_run(vm, VCPU_ID);
@@ -166,6 +204,12 @@ int main(int argc, char *argv[])
 		TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
 			    "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
 			    (ulong) regs2.rdi, (ulong) regs2.rsi);
+
+		/* Force immediate L2->L1 exit before resuming */
+		if (stage == 8) {
+			pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n");
+			inject_nmi(vm);
+		}
 	}
 
 done:
-- 
cgit v1.2.3


From 70f094f4f01dc4d6f78ac6407f85627293a6553c Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Mon, 3 May 2021 17:08:52 +0200
Subject: KVM: nVMX: Properly pad 'struct kvm_vmx_nested_state_hdr'

Eliminate the probably unwanted hole in 'struct kvm_vmx_nested_state_hdr':

Pre-patch:
struct kvm_vmx_nested_state_hdr {
        __u64                      vmxon_pa;             /*     0     8 */
        __u64                      vmcs12_pa;            /*     8     8 */
        struct {
                __u16              flags;                /*    16     2 */
        } smm;                                           /*    16     2 */

        /* XXX 2 bytes hole, try to pack */

        __u32                      flags;                /*    20     4 */
        __u64                      preemption_timer_deadline; /*    24     8 */
};

Post-patch:
struct kvm_vmx_nested_state_hdr {
        __u64                      vmxon_pa;             /*     0     8 */
        __u64                      vmcs12_pa;            /*     8     8 */
        struct {
                __u16              flags;                /*    16     2 */
        } smm;                                           /*    16     2 */
        __u16                      pad;                  /*    18     2 */
        __u32                      flags;                /*    20     4 */
        __u64                      preemption_timer_deadline; /*    24     8 */
};

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210503150854.1144255-3-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/uapi/asm/kvm.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 5a3022c8af82..0662f644aad9 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -437,6 +437,8 @@ struct kvm_vmx_nested_state_hdr {
 		__u16 flags;
 	} smm;
 
+	__u16 pad;
+
 	__u32 flags;
 	__u64 preemption_timer_deadline;
 };
-- 
cgit v1.2.3


From 5f443e424efab56baa8021da04878f88eb0815d4 Mon Sep 17 00:00:00 2001
From: Bill Wendling <morbo@google.com>
Date: Thu, 10 Dec 2020 17:23:17 -0800
Subject: selftests: kvm: remove reassignment of non-absolute variables

Clang's integrated assembler does not allow symbols with non-absolute
values to be reassigned. Modify the interrupt entry loop macro to be
compatible with IAS by using a label and an offset.

Cc: Jian Cai <caij2003@gmail.com>
Signed-off-by: Bill Wendling <morbo@google.com>
References: https://lore.kernel.org/lkml/20200714233024.1789985-1-caij2003@gmail.com/
Message-Id: <20201211012317.3722214-1-morbo@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/lib/x86_64/handlers.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86_64/handlers.S
index aaf7bc7d2ce1..7629819734af 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/handlers.S
+++ b/tools/testing/selftests/kvm/lib/x86_64/handlers.S
@@ -54,9 +54,9 @@ idt_handlers:
 	.align 8
 
 	/* Fetch current address and append it to idt_handlers. */
-	current_handler = .
+666 :
 .pushsection .rodata
-.quad current_handler
+	.quad 666b
 .popsection
 
 	.if ! \has_error
-- 
cgit v1.2.3


From aca352886ebdd675b5131ed4c83bf5477eee5d72 Mon Sep 17 00:00:00 2001
From: Siddharth Chandrasekaran <sidcha@amazon.de>
Date: Mon, 3 May 2021 14:21:11 +0200
Subject: KVM: x86: Hoist input checks in kvm_add_msr_filter()

In ioctl KVM_X86_SET_MSR_FILTER, input from user space is validated
after a memdup_user(). For invalid inputs we'd memdup and then call
kfree unnecessarily. Hoist input validation to avoid kfree altogether.

Signed-off-by: Siddharth Chandrasekaran <sidcha@amazon.de>
Message-Id: <20210503122111.13775-1-sidcha@amazon.de>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 26 +++++++-------------------
 1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cebdaa1e3cf5..102f116d9bb4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5468,14 +5468,18 @@ static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter)
 static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
 			      struct kvm_msr_filter_range *user_range)
 {
-	struct msr_bitmap_range range;
 	unsigned long *bitmap = NULL;
 	size_t bitmap_size;
-	int r;
 
 	if (!user_range->nmsrs)
 		return 0;
 
+	if (user_range->flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE))
+		return -EINVAL;
+
+	if (!user_range->flags)
+		return -EINVAL;
+
 	bitmap_size = BITS_TO_LONGS(user_range->nmsrs) * sizeof(long);
 	if (!bitmap_size || bitmap_size > KVM_MSR_FILTER_MAX_BITMAP_SIZE)
 		return -EINVAL;
@@ -5484,31 +5488,15 @@ static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
 	if (IS_ERR(bitmap))
 		return PTR_ERR(bitmap);
 
-	range = (struct msr_bitmap_range) {
+	msr_filter->ranges[msr_filter->count] = (struct msr_bitmap_range) {
 		.flags = user_range->flags,
 		.base = user_range->base,
 		.nmsrs = user_range->nmsrs,
 		.bitmap = bitmap,
 	};
 
-	if (range.flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE)) {
-		r = -EINVAL;
-		goto err;
-	}
-
-	if (!range.flags) {
-		r = -EINVAL;
-		goto err;
-	}
-
-	/* Everything ok, add this range identifier. */
-	msr_filter->ranges[msr_filter->count] = range;
 	msr_filter->count++;
-
 	return 0;
-err:
-	kfree(bitmap);
-	return r;
 }
 
 static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
-- 
cgit v1.2.3


From 063ab16c14db5a2ef52d54d0475b7fed19c982d7 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 4 May 2021 17:39:35 +0300
Subject: KVM: nSVM: always restore the L1's GIF on migration

While usually the L1's GIF is set while L2 runs, and usually
migration nested state is loaded after a vCPU reset which
also sets L1's GIF to true, this is not guaranteed.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210504143936.1644378-2-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/nested.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 32400cba608d..b331446f67f3 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1314,6 +1314,8 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
 	else
 		svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
 
+	svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
+
 	svm->nested.nested_run_pending =
 		!!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
 
-- 
cgit v1.2.3


From 809c79137a192d7e881a517f803ebbf96305f066 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 4 May 2021 17:39:36 +0300
Subject: KVM: nSVM: remove a warning about vmcb01 VM exit reason

While in most cases, when returning to use the VMCB01,
the exit reason stored in it will be SVM_EXIT_VMRUN,
on first VM exit after a nested migration this field
can contain anything since the VM entry did happen
before the migration.

Remove this warning to avoid the false positive.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210504143936.1644378-3-mlevitsk@redhat.com>
Fixes: 9a7de6ecc3ed ("KVM: nSVM: If VMRUN is single-stepped, queue the #DB intercept in nested_svm_vmexit()")
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/nested.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index b331446f67f3..5e8d8443154e 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -764,7 +764,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
 	nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
 
 	svm_switch_vmcb(svm, &svm->vmcb01);
-	WARN_ON_ONCE(svm->vmcb->control.exit_code != SVM_EXIT_VMRUN);
 
 	/*
 	 * On vmexit the  GIF is set to false and
-- 
cgit v1.2.3


From 8aec21c04caa2000f91cf8822ae0811e4b0c3971 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:20 -0700
Subject: KVM: VMX: Do not advertise RDPID if ENABLE_RDTSCP control is
 unsupported

Clear KVM's RDPID capability if the ENABLE_RDTSCP secondary exec control is
unsupported.  Despite being enumerated in a separate CPUID flag, RDPID is
bundled under the same VMCS control as RDTSCP and will #UD in VMX non-root
if ENABLE_RDTSCP is not enabled.

Fixes: 41cd02c6f7f6 ("kvm: x86: Expose RDPID in KVM_GET_SUPPORTED_CPUID")
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-2-seanjc@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index cbe0cdade38a..46573b862638 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7377,9 +7377,11 @@ static __init void vmx_set_cpu_caps(void)
 	if (!cpu_has_vmx_xsaves())
 		kvm_cpu_cap_clear(X86_FEATURE_XSAVES);
 
-	/* CPUID 0x80000001 */
-	if (!cpu_has_vmx_rdtscp())
+	/* CPUID 0x80000001 and 0x7 (RDPID) */
+	if (!cpu_has_vmx_rdtscp()) {
 		kvm_cpu_cap_clear(X86_FEATURE_RDTSCP);
+		kvm_cpu_cap_clear(X86_FEATURE_RDPID);
+	}
 
 	if (cpu_has_vmx_waitpkg())
 		kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
-- 
cgit v1.2.3


From 85d0011264da24be08ae907d7f29983a597ca9b1 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:21 -0700
Subject: KVM: x86: Emulate RDPID only if RDTSCP is supported

Do not advertise emulation support for RDPID if RDTSCP is unsupported.
RDPID emulation subtly relies on MSR_TSC_AUX to exist in hardware, as
both vmx_get_msr() and svm_get_msr() will return an error if the MSR is
unsupported, i.e. ctxt->ops->get_msr() will fail and the emulator will
inject a #UD.

Note, RDPID emulation also relies on RDTSCP being enabled in the guest,
but this is a KVM bug and will eventually be fixed.

Fixes: fb6d4d340e05 ("KVM: x86: emulate RDPID")
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-3-seanjc@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/cpuid.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 19606a341888..c0e8c5e92189 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -637,7 +637,8 @@ static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func)
 	case 7:
 		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 		entry->eax = 0;
-		entry->ecx = F(RDPID);
+		if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
+			entry->ecx = F(RDPID);
 		++array->nent;
 	default:
 		break;
-- 
cgit v1.2.3


From 3b195ac9260235624b1c18f7bdaef184479c1d41 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:22 -0700
Subject: KVM: SVM: Inject #UD on RDTSCP when it should be disabled in the
 guest

Intercept RDTSCP to inject #UD if RDTSC is disabled in the guest.

Note, SVM does not support intercepting RDPID.  Unlike VMX's
ENABLE_RDTSCP control, RDTSCP interception does not apply to RDPID.  This
is a benign virtualization hole as the host kernel (incorrectly) sets
MSR_TSC_AUX if RDTSCP is supported, and KVM loads the guest's MSR_TSC_AUX
into hardware if RDTSCP is supported in the host, i.e. KVM will not leak
the host's MSR_TSC_AUX to the guest.

But, when the kernel bug is fixed, KVM will start leaking the host's
MSR_TSC_AUX if RDPID is supported in hardware, but RDTSCP isn't available
for whatever reason.  This leak will be remedied in a future commit.

Fixes: 46896c73c1a4 ("KVM: svm: add support for RDTSCP")
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-4-seanjc@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index be5cf612ab1f..ebcb5849d69b 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1100,7 +1100,9 @@ static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 	return svm->vmcb->control.tsc_offset;
 }
 
-static void svm_check_invpcid(struct vcpu_svm *svm)
+/* Evaluate instruction intercepts that depend on guest CPUID features. */
+static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,
+					      struct vcpu_svm *svm)
 {
 	/*
 	 * Intercept INVPCID if shadow paging is enabled to sync/free shadow
@@ -1113,6 +1115,13 @@ static void svm_check_invpcid(struct vcpu_svm *svm)
 		else
 			svm_clr_intercept(svm, INTERCEPT_INVPCID);
 	}
+
+	if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) {
+		if (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
+			svm_clr_intercept(svm, INTERCEPT_RDTSCP);
+		else
+			svm_set_intercept(svm, INTERCEPT_RDTSCP);
+	}
 }
 
 static void init_vmcb(struct kvm_vcpu *vcpu)
@@ -1248,7 +1257,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
 		svm_clr_intercept(svm, INTERCEPT_PAUSE);
 	}
 
-	svm_check_invpcid(svm);
+	svm_recalc_instruction_intercepts(vcpu, svm);
 
 	/*
 	 * If the host supports V_SPEC_CTRL then disable the interception
@@ -3084,6 +3093,7 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[SVM_EXIT_STGI]				= stgi_interception,
 	[SVM_EXIT_CLGI]				= clgi_interception,
 	[SVM_EXIT_SKINIT]			= skinit_interception,
+	[SVM_EXIT_RDTSCP]			= kvm_handle_invalid_op,
 	[SVM_EXIT_WBINVD]                       = kvm_emulate_wbinvd,
 	[SVM_EXIT_MONITOR]			= kvm_emulate_monitor,
 	[SVM_EXIT_MWAIT]			= kvm_emulate_mwait,
@@ -4007,8 +4017,7 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 	svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) &&
 			     guest_cpuid_has(vcpu, X86_FEATURE_NRIPS);
 
-	/* Check again if INVPCID interception if required */
-	svm_check_invpcid(svm);
+	svm_recalc_instruction_intercepts(vcpu, svm);
 
 	/* For sev guests, the memory encryption bit is not reserved in CR3.  */
 	if (sev_guest(vcpu->kvm)) {
-- 
cgit v1.2.3


From 2183de4161b90bd3851ccd3910c87b2c9adfc6ed Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:23 -0700
Subject: KVM: x86: Move RDPID emulation intercept to its own enum

Add a dedicated intercept enum for RDPID instead of piggybacking RDTSCP.
Unlike VMX's ENABLE_RDTSCP, RDPID is not bound to SVM's RDTSCP intercept.

Fixes: fb6d4d340e05 ("KVM: x86: emulate RDPID")
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-5-seanjc@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/emulate.c     | 2 +-
 arch/x86/kvm/kvm_emulate.h | 1 +
 arch/x86/kvm/vmx/vmx.c     | 3 ++-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 77e1c89a95a7..8a0ccdb56076 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4502,7 +4502,7 @@ static const struct opcode group8[] = {
  * from the register case of group9.
  */
 static const struct gprefix pfx_0f_c7_7 = {
-	N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdtscp),
+	N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdpid),
 };
 
 
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 0d359115429a..f016838faedd 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -468,6 +468,7 @@ enum x86_intercept {
 	x86_intercept_clgi,
 	x86_intercept_skinit,
 	x86_intercept_rdtscp,
+	x86_intercept_rdpid,
 	x86_intercept_icebp,
 	x86_intercept_wbinvd,
 	x86_intercept_monitor,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 46573b862638..4a625c748275 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7437,8 +7437,9 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
 	/*
 	 * RDPID causes #UD if disabled through secondary execution controls.
 	 * Because it is marked as EmulateOnUD, we need to intercept it here.
+	 * Note, RDPID is hidden behind ENABLE_RDTSCP.
 	 */
-	case x86_intercept_rdtscp:
+	case x86_intercept_rdpid:
 		if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_RDTSCP)) {
 			exception->vector = UD_VECTOR;
 			exception->error_code_valid = false;
-- 
cgit v1.2.3


From 5104d7ffcf24749939bea7fdb5378d186473f890 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:24 -0700
Subject: KVM: VMX: Disable preemption when probing user return MSRs

Disable preemption when probing a user return MSR via RDSMR/WRMSR.  If
the MSR holds a different value per logical CPU, the WRMSR could corrupt
the host's value if KVM is preempted between the RDMSR and WRMSR, and
then rescheduled on a different CPU.

Opportunistically land the helper in common x86, SVM will use the helper
in a future commit.

Fixes: 4be534102624 ("KVM: VMX: Initialize vmx->guest_msrs[] right after allocation")
Cc: stable@vger.kernel.org
Cc: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-6-seanjc@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/vmx/vmx.c          |  5 +----
 arch/x86/kvm/x86.c              | 16 ++++++++++++++++
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 848956bb3cf1..e8dcbc632cf8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1777,6 +1777,7 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
 		    unsigned long icr, int op_64_bit);
 
 void kvm_define_user_return_msr(unsigned index, u32 msr);
+int kvm_probe_user_return_msr(u32 msr);
 int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
 
 u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 4a625c748275..11ff9c3d95d5 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6914,12 +6914,9 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
 
 	for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) {
 		u32 index = vmx_uret_msrs_list[i];
-		u32 data_low, data_high;
 		int j = vmx->nr_uret_msrs;
 
-		if (rdmsr_safe(index, &data_low, &data_high) < 0)
-			continue;
-		if (wrmsr_safe(index, data_low, data_high) < 0)
+		if (kvm_probe_user_return_msr(index))
 			continue;
 
 		vmx->guest_uret_msrs[j].slot = i;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 102f116d9bb4..bd90c73c37b4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -339,6 +339,22 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
 	}
 }
 
+int kvm_probe_user_return_msr(u32 msr)
+{
+	u64 val;
+	int ret;
+
+	preempt_disable();
+	ret = rdmsrl_safe(msr, &val);
+	if (ret)
+		goto out;
+	ret = wrmsrl_safe(msr, val);
+out:
+	preempt_enable();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(kvm_probe_user_return_msr);
+
 void kvm_define_user_return_msr(unsigned slot, u32 msr)
 {
 	BUG_ON(slot >= KVM_MAX_NR_USER_RETURN_MSRS);
-- 
cgit v1.2.3


From 0caa0a77c2f6fcd0830cdcd018db1af98fe35e28 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:25 -0700
Subject: KVM: SVM: Probe and load MSR_TSC_AUX regardless of RDTSCP support in
 host

Probe MSR_TSC_AUX whether or not RDTSCP is supported in the host, and
if probing succeeds, load the guest's MSR_TSC_AUX into hardware prior to
VMRUN.  Because SVM doesn't support interception of RDPID, RDPID cannot
be disallowed in the guest (without resorting to binary translation).
Leaving the host's MSR_TSC_AUX in hardware would leak the host's value to
the guest if RDTSCP is not supported.

Note, there is also a kernel bug that prevents leaking the host's value.
The host kernel initializes MSR_TSC_AUX if and only if RDTSCP is
supported, even though the vDSO usage consumes MSR_TSC_AUX via RDPID.
I.e. if RDTSCP is not supported, there is no host value to leak.  But,
if/when the host kernel bug is fixed, KVM would start leaking MSR_TSC_AUX
in the case where hardware supports RDPID but RDTSCP is unavailable for
whatever reason.

Probing MSR_TSC_AUX will also allow consolidating the probe and define
logic in common x86, and will make it simpler to condition the existence
of MSR_TSX_AUX (from the guest's perspective) on RDTSCP *or* RDPID.

Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-7-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index ebcb5849d69b..13e4dd128177 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -212,7 +212,7 @@ DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
  * RDTSCP and RDPID are not used in the kernel, specifically to allow KVM to
  * defer the restoration of TSC_AUX until the CPU returns to userspace.
  */
-#define TSC_AUX_URET_SLOT	0
+static int tsc_aux_uret_slot __read_mostly = -1;
 
 static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
 
@@ -959,8 +959,10 @@ static __init int svm_hardware_setup(void)
 		kvm_tsc_scaling_ratio_frac_bits = 32;
 	}
 
-	if (boot_cpu_has(X86_FEATURE_RDTSCP))
-		kvm_define_user_return_msr(TSC_AUX_URET_SLOT, MSR_TSC_AUX);
+	if (!kvm_probe_user_return_msr(MSR_TSC_AUX)) {
+		tsc_aux_uret_slot = 0;
+		kvm_define_user_return_msr(tsc_aux_uret_slot, MSR_TSC_AUX);
+	}
 
 	/* Check for pause filtering support */
 	if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
@@ -1454,8 +1456,8 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
 		}
 	}
 
-	if (static_cpu_has(X86_FEATURE_RDTSCP))
-		kvm_set_user_return_msr(TSC_AUX_URET_SLOT, svm->tsc_aux, -1ull);
+	if (likely(tsc_aux_uret_slot >= 0))
+		kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull);
 
 	svm->guest_state_loaded = true;
 }
@@ -2664,7 +2666,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			msr_info->data |= (u64)svm->sysenter_esp_hi << 32;
 		break;
 	case MSR_TSC_AUX:
-		if (!boot_cpu_has(X86_FEATURE_RDTSCP))
+		if (tsc_aux_uret_slot < 0)
 			return 1;
 		if (!msr_info->host_initiated &&
 		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
@@ -2885,7 +2887,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 		svm->sysenter_esp_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0;
 		break;
 	case MSR_TSC_AUX:
-		if (!boot_cpu_has(X86_FEATURE_RDTSCP))
+		if (tsc_aux_uret_slot < 0)
 			return 1;
 
 		if (!msr->host_initiated &&
@@ -2908,7 +2910,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 		 * guest via direct_access_msrs, and switch it via user return.
 		 */
 		preempt_disable();
-		r = kvm_set_user_return_msr(TSC_AUX_URET_SLOT, data, -1ull);
+		r = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull);
 		preempt_enable();
 		if (r)
 			return 1;
-- 
cgit v1.2.3


From 36fa06f9ff39f23e03cd8206dc6bbb7711c23be6 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:26 -0700
Subject: KVM: x86: Add support for RDPID without RDTSCP

Allow userspace to enable RDPID for a guest without also enabling RDTSCP.
Aside from checking for RDPID support in the obvious flows, VMX also needs
to set ENABLE_RDTSCP=1 when RDPID is exposed.

For the record, there is no known scenario where enabling RDPID without
RDTSCP is desirable.  But, both AMD and Intel architectures allow for the
condition, i.e. this is purely to make KVM more architecturally accurate.

Fixes: 41cd02c6f7f6 ("kvm: x86: Expose RDPID in KVM_GET_SUPPORTED_CPUID")
Cc: stable@vger.kernel.org
Reported-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-8-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c |  6 ++++--
 arch/x86/kvm/vmx/vmx.c | 27 +++++++++++++++++++++++----
 arch/x86/kvm/x86.c     |  3 ++-
 3 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 13e4dd128177..0ba0a00f8dc6 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2669,7 +2669,8 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (tsc_aux_uret_slot < 0)
 			return 1;
 		if (!msr_info->host_initiated &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
 			return 1;
 		msr_info->data = svm->tsc_aux;
 		break;
@@ -2891,7 +2892,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 			return 1;
 
 		if (!msr->host_initiated &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
 			return 1;
 
 		/*
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 11ff9c3d95d5..b304e372aab3 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1788,7 +1788,8 @@ static void setup_msrs(struct vcpu_vmx *vmx)
 	if (update_transition_efer(vmx))
 		vmx_setup_uret_msr(vmx, MSR_EFER);
 
-	if (guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP))
+	if (guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP)  ||
+	    guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID))
 		vmx_setup_uret_msr(vmx, MSR_TSC_AUX);
 
 	vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL);
@@ -1994,7 +1995,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		break;
 	case MSR_TSC_AUX:
 		if (!msr_info->host_initiated &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
 			return 1;
 		goto find_uret_msr;
 	case MSR_IA32_DEBUGCTLMSR:
@@ -2314,7 +2316,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		break;
 	case MSR_TSC_AUX:
 		if (!msr_info->host_initiated &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
 			return 1;
 		/* Check reserved bit, higher 32 bits should be zero */
 		if ((data >> 32) != 0)
@@ -4368,7 +4371,23 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
 						  xsaves_enabled, false);
 	}
 
-	vmx_adjust_sec_exec_feature(vmx, &exec_control, rdtscp, RDTSCP);
+	/*
+	 * RDPID is also gated by ENABLE_RDTSCP, turn on the control if either
+	 * feature is exposed to the guest.  This creates a virtualization hole
+	 * if both are supported in hardware but only one is exposed to the
+	 * guest, but letting the guest execute RDTSCP or RDPID when either one
+	 * is advertised is preferable to emulating the advertised instruction
+	 * in KVM on #UD, and obviously better than incorrectly injecting #UD.
+	 */
+	if (cpu_has_vmx_rdtscp()) {
+		bool rdpid_or_rdtscp_enabled =
+			guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) ||
+			guest_cpuid_has(vcpu, X86_FEATURE_RDPID);
+
+		vmx_adjust_secondary_exec_control(vmx, &exec_control,
+						  SECONDARY_EXEC_ENABLE_RDTSCP,
+						  rdpid_or_rdtscp_enabled, false);
+	}
 	vmx_adjust_sec_exec_feature(vmx, &exec_control, invpcid, INVPCID);
 
 	vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdrand, RDRAND);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bd90c73c37b4..0856636efc44 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5941,7 +5941,8 @@ static void kvm_init_msr_list(void)
 				continue;
 			break;
 		case MSR_TSC_AUX:
-			if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
+			if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP) &&
+			    !kvm_cpu_cap_has(X86_FEATURE_RDPID))
 				continue;
 			break;
 		case MSR_IA32_UMWAIT_CONTROL:
-- 
cgit v1.2.3


From b6194b94a2ca4affce5aab1bbf773a977ad73671 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:27 -0700
Subject: KVM: VMX: Configure list of user return MSRs at module init

Configure the list of user return MSRs that are actually supported at
module init instead of reprobing the list of possible MSRs every time a
vCPU is created.  Curating the list on a per-vCPU basis is pointless; KVM
is completely hosed if the set of supported MSRs changes after module init,
or if the set of MSRs differs per physical PCU.

The per-vCPU lists also increase complexity (see __vmx_find_uret_msr()) and
creates corner cases that _should_ be impossible, but theoretically exist
in KVM, e.g. advertising RDTSCP to userspace without actually being able to
virtualize RDTSCP if probing MSR_TSC_AUX fails.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-9-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 61 +++++++++++++++++++++++++++++++++-----------------
 arch/x86/kvm/vmx/vmx.h | 10 ++++++++-
 2 files changed, 50 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b304e372aab3..887db1af1312 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -461,7 +461,7 @@ static unsigned long host_idt_base;
  * support this emulation, IA32_STAR must always be included in
  * vmx_uret_msrs_list[], even in i386 builds.
  */
-static const u32 vmx_uret_msrs_list[] = {
+static u32 vmx_uret_msrs_list[] = {
 #ifdef CONFIG_X86_64
 	MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
 #endif
@@ -469,6 +469,12 @@ static const u32 vmx_uret_msrs_list[] = {
 	MSR_IA32_TSX_CTRL,
 };
 
+/*
+ * Number of user return MSRs that are actually supported in hardware.
+ * vmx_uret_msrs_list is modified when KVM is loaded to drop unsupported MSRs.
+ */
+static int vmx_nr_uret_msrs;
+
 #if IS_ENABLED(CONFIG_HYPERV)
 static bool __read_mostly enlightened_vmcs = true;
 module_param(enlightened_vmcs, bool, 0444);
@@ -700,9 +706,16 @@ static inline int __vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
 {
 	int i;
 
-	for (i = 0; i < vmx->nr_uret_msrs; ++i)
+	/*
+	 * Note, vmx->guest_uret_msrs is the same size as vmx_uret_msrs_list,
+	 * but is ordered differently.  The MSR is matched against the list of
+	 * supported uret MSRs using "slot", but the index that is returned is
+	 * the index into guest_uret_msrs.
+	 */
+	for (i = 0; i < vmx_nr_uret_msrs; ++i) {
 		if (vmx_uret_msrs_list[vmx->guest_uret_msrs[i].slot] == msr)
 			return i;
+	}
 	return -1;
 }
 
@@ -6929,18 +6942,10 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
 			goto free_vpid;
 	}
 
-	BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS);
-
-	for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) {
-		u32 index = vmx_uret_msrs_list[i];
-		int j = vmx->nr_uret_msrs;
+	for (i = 0; i < vmx_nr_uret_msrs; ++i) {
+		vmx->guest_uret_msrs[i].data = 0;
 
-		if (kvm_probe_user_return_msr(index))
-			continue;
-
-		vmx->guest_uret_msrs[j].slot = i;
-		vmx->guest_uret_msrs[j].data = 0;
-		switch (index) {
+		switch (vmx_uret_msrs_list[i]) {
 		case MSR_IA32_TSX_CTRL:
 			/*
 			 * TSX_CTRL_CPUID_CLEAR is handled in the CPUID
@@ -6954,15 +6959,14 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
 			 * host so that TSX remains always disabled.
 			 */
 			if (boot_cpu_has(X86_FEATURE_RTM))
-				vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
+				vmx->guest_uret_msrs[i].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
 			else
-				vmx->guest_uret_msrs[j].mask = 0;
+				vmx->guest_uret_msrs[i].mask = 0;
 			break;
 		default:
-			vmx->guest_uret_msrs[j].mask = -1ull;
+			vmx->guest_uret_msrs[i].mask = -1ull;
 			break;
 		}
-		++vmx->nr_uret_msrs;
 	}
 
 	err = alloc_loaded_vmcs(&vmx->vmcs01);
@@ -7821,17 +7825,34 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 	.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
 };
 
+static __init void vmx_setup_user_return_msrs(void)
+{
+	u32 msr;
+	int i;
+
+	BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS);
+
+	for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) {
+		msr = vmx_uret_msrs_list[i];
+
+		if (kvm_probe_user_return_msr(msr))
+			continue;
+
+		kvm_define_user_return_msr(vmx_nr_uret_msrs, msr);
+		vmx_uret_msrs_list[vmx_nr_uret_msrs++] = msr;
+	}
+}
+
 static __init int hardware_setup(void)
 {
 	unsigned long host_bndcfgs;
 	struct desc_ptr dt;
-	int r, i, ept_lpage_level;
+	int r, ept_lpage_level;
 
 	store_idt(&dt);
 	host_idt_base = dt.address;
 
-	for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i)
-		kvm_define_user_return_msr(i, vmx_uret_msrs_list[i]);
+	vmx_setup_user_return_msrs();
 
 	if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0)
 		return -EIO;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 008cb87ff088..d71ed8b425c5 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -245,8 +245,16 @@ struct vcpu_vmx {
 	u32                   idt_vectoring_info;
 	ulong                 rflags;
 
+	/*
+	 * User return MSRs are always emulated when enabled in the guest, but
+	 * only loaded into hardware when necessary, e.g. SYSCALL #UDs outside
+	 * of 64-bit mode or if EFER.SCE=1, thus the SYSCALL MSRs don't need to
+	 * be loaded into hardware if those conditions aren't met.
+	 * nr_active_uret_msrs tracks the number of MSRs that need to be loaded
+	 * into hardware when running the guest.  guest_uret_msrs[] is resorted
+	 * whenever the number of "active" uret MSRs is modified.
+	 */
 	struct vmx_uret_msr   guest_uret_msrs[MAX_NR_USER_RETURN_MSRS];
-	int                   nr_uret_msrs;
 	int                   nr_active_uret_msrs;
 	bool                  guest_uret_msrs_loaded;
 #ifdef CONFIG_X86_64
-- 
cgit v1.2.3


From ee9d22e08d1341692a43926e5e1d84c90a5dac1d Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:28 -0700
Subject: KVM: VMX: Use flag to indicate "active" uret MSRs instead of sorting
 list

Explicitly flag a uret MSR as needing to be loaded into hardware instead of
resorting the list of "active" MSRs and tracking how many MSRs in total
need to be loaded.  The only benefit to sorting the list is that the loop
to load MSRs during vmx_prepare_switch_to_guest() doesn't need to iterate
over all supported uret MRS, only those that are active.  But that is a
pointless optimization, as the most common case, running a 64-bit guest,
will load the vast majority of MSRs.  Not to mention that a single WRMSR is
far more expensive than iterating over the list.

Providing a stable list order obviates the need to track a given MSR's
"slot" in the per-CPU list of user return MSRs; all lists simply use the
same ordering.  Future patches will take advantage of the stable order to
further simplify the related code.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-10-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 80 ++++++++++++++++++++++++++------------------------
 arch/x86/kvm/vmx/vmx.h |  2 +-
 2 files changed, 42 insertions(+), 40 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 887db1af1312..adefedac0e3b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -458,8 +458,9 @@ static unsigned long host_idt_base;
  * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm
  * will emulate SYSCALL in legacy mode if the vendor string in guest
  * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To
- * support this emulation, IA32_STAR must always be included in
- * vmx_uret_msrs_list[], even in i386 builds.
+ * support this emulation, MSR_STAR is included in the list for i386,
+ * but is never loaded into hardware.  MSR_CSTAR is also never loaded
+ * into hardware and is here purely for emulation purposes.
  */
 static u32 vmx_uret_msrs_list[] = {
 #ifdef CONFIG_X86_64
@@ -702,18 +703,12 @@ static bool is_valid_passthrough_msr(u32 msr)
 	return r;
 }
 
-static inline int __vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
+static inline int __vmx_find_uret_msr(u32 msr)
 {
 	int i;
 
-	/*
-	 * Note, vmx->guest_uret_msrs is the same size as vmx_uret_msrs_list,
-	 * but is ordered differently.  The MSR is matched against the list of
-	 * supported uret MSRs using "slot", but the index that is returned is
-	 * the index into guest_uret_msrs.
-	 */
 	for (i = 0; i < vmx_nr_uret_msrs; ++i) {
-		if (vmx_uret_msrs_list[vmx->guest_uret_msrs[i].slot] == msr)
+		if (vmx_uret_msrs_list[i] == msr)
 			return i;
 	}
 	return -1;
@@ -723,7 +718,7 @@ struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
 {
 	int i;
 
-	i = __vmx_find_uret_msr(vmx, msr);
+	i = __vmx_find_uret_msr(msr);
 	if (i >= 0)
 		return &vmx->guest_uret_msrs[i];
 	return NULL;
@@ -732,13 +727,14 @@ struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
 static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
 				  struct vmx_uret_msr *msr, u64 data)
 {
+	unsigned int slot = msr - vmx->guest_uret_msrs;
 	int ret = 0;
 
 	u64 old_msr_data = msr->data;
 	msr->data = data;
-	if (msr - vmx->guest_uret_msrs < vmx->nr_active_uret_msrs) {
+	if (msr->load_into_hardware) {
 		preempt_disable();
-		ret = kvm_set_user_return_msr(msr->slot, msr->data, msr->mask);
+		ret = kvm_set_user_return_msr(slot, msr->data, msr->mask);
 		preempt_enable();
 		if (ret)
 			msr->data = old_msr_data;
@@ -1090,7 +1086,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx)
 		return false;
 	}
 
-	i = __vmx_find_uret_msr(vmx, MSR_EFER);
+	i = __vmx_find_uret_msr(MSR_EFER);
 	if (i < 0)
 		return false;
 
@@ -1252,11 +1248,14 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
 	 */
 	if (!vmx->guest_uret_msrs_loaded) {
 		vmx->guest_uret_msrs_loaded = true;
-		for (i = 0; i < vmx->nr_active_uret_msrs; ++i)
-			kvm_set_user_return_msr(vmx->guest_uret_msrs[i].slot,
+		for (i = 0; i < vmx_nr_uret_msrs; ++i) {
+			if (!vmx->guest_uret_msrs[i].load_into_hardware)
+				continue;
+
+			kvm_set_user_return_msr(i,
 						vmx->guest_uret_msrs[i].data,
 						vmx->guest_uret_msrs[i].mask);
-
+		}
 	}
 
     	if (vmx->nested.need_vmcs12_to_shadow_sync)
@@ -1763,19 +1762,16 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
 	vmx_clear_hlt(vcpu);
 }
 
-static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr)
+static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr,
+			       bool load_into_hardware)
 {
-	struct vmx_uret_msr tmp;
-	int from, to;
+	struct vmx_uret_msr *uret_msr;
 
-	from = __vmx_find_uret_msr(vmx, msr);
-	if (from < 0)
+	uret_msr = vmx_find_uret_msr(vmx, msr);
+	if (!uret_msr)
 		return;
-	to = vmx->nr_active_uret_msrs++;
 
-	tmp = vmx->guest_uret_msrs[to];
-	vmx->guest_uret_msrs[to] = vmx->guest_uret_msrs[from];
-	vmx->guest_uret_msrs[from] = tmp;
+	uret_msr->load_into_hardware = load_into_hardware;
 }
 
 /*
@@ -1785,30 +1781,36 @@ static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr)
  */
 static void setup_msrs(struct vcpu_vmx *vmx)
 {
-	vmx->guest_uret_msrs_loaded = false;
-	vmx->nr_active_uret_msrs = 0;
 #ifdef CONFIG_X86_64
+	bool load_syscall_msrs;
+
 	/*
 	 * The SYSCALL MSRs are only needed on long mode guests, and only
 	 * when EFER.SCE is set.
 	 */
-	if (is_long_mode(&vmx->vcpu) && (vmx->vcpu.arch.efer & EFER_SCE)) {
-		vmx_setup_uret_msr(vmx, MSR_STAR);
-		vmx_setup_uret_msr(vmx, MSR_LSTAR);
-		vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK);
-	}
+	load_syscall_msrs = is_long_mode(&vmx->vcpu) &&
+			    (vmx->vcpu.arch.efer & EFER_SCE);
+
+	vmx_setup_uret_msr(vmx, MSR_STAR, load_syscall_msrs);
+	vmx_setup_uret_msr(vmx, MSR_LSTAR, load_syscall_msrs);
+	vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK, load_syscall_msrs);
 #endif
-	if (update_transition_efer(vmx))
-		vmx_setup_uret_msr(vmx, MSR_EFER);
+	vmx_setup_uret_msr(vmx, MSR_EFER, update_transition_efer(vmx));
 
-	if (guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP)  ||
-	    guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID))
-		vmx_setup_uret_msr(vmx, MSR_TSC_AUX);
+	vmx_setup_uret_msr(vmx, MSR_TSC_AUX,
+			   guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP) ||
+			   guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID));
 
-	vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL);
+	vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, true);
 
 	if (cpu_has_vmx_msr_bitmap())
 		vmx_update_msr_bitmap(&vmx->vcpu);
+
+	/*
+	 * The set of MSRs to load may have changed, reload MSRs before the
+	 * next VM-Enter.
+	 */
+	vmx->guest_uret_msrs_loaded = false;
 }
 
 static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index d71ed8b425c5..16e4e457ba23 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -36,7 +36,7 @@ struct vmx_msrs {
 };
 
 struct vmx_uret_msr {
-	unsigned int slot; /* The MSR's slot in kvm_user_return_msrs. */
+	bool load_into_hardware;
 	u64 data;
 	u64 mask;
 };
-- 
cgit v1.2.3


From 8ea8b8d6f869425e21f34e60bdbe7e47e6c9d6b9 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:29 -0700
Subject: KVM: VMX: Use common x86's uret MSR list as the one true list

Drop VMX's global list of user return MSRs now that VMX doesn't resort said
list to isolate "active" MSRs, i.e. now that VMX's list and x86's list have
the same MSRs in the same order.

In addition to eliminating the redundant list, this will also allow moving
more of the list management into common x86.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-11-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/vmx/vmx.c          | 97 +++++++++++++++++------------------------
 arch/x86/kvm/x86.c              | 12 +++++
 3 files changed, 53 insertions(+), 57 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e8dcbc632cf8..6b15f27f49d0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1777,6 +1777,7 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
 		    unsigned long icr, int op_64_bit);
 
 void kvm_define_user_return_msr(unsigned index, u32 msr);
+int kvm_find_user_return_msr(u32 msr);
 int kvm_probe_user_return_msr(u32 msr);
 int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index adefedac0e3b..39506704be96 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -454,26 +454,7 @@ static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
 
 static unsigned long host_idt_base;
 
-/*
- * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm
- * will emulate SYSCALL in legacy mode if the vendor string in guest
- * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To
- * support this emulation, MSR_STAR is included in the list for i386,
- * but is never loaded into hardware.  MSR_CSTAR is also never loaded
- * into hardware and is here purely for emulation purposes.
- */
-static u32 vmx_uret_msrs_list[] = {
-#ifdef CONFIG_X86_64
-	MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
-#endif
-	MSR_EFER, MSR_TSC_AUX, MSR_STAR,
-	MSR_IA32_TSX_CTRL,
-};
-
-/*
- * Number of user return MSRs that are actually supported in hardware.
- * vmx_uret_msrs_list is modified when KVM is loaded to drop unsupported MSRs.
- */
+/* Number of user return MSRs that are actually supported in hardware. */
 static int vmx_nr_uret_msrs;
 
 #if IS_ENABLED(CONFIG_HYPERV)
@@ -703,22 +684,11 @@ static bool is_valid_passthrough_msr(u32 msr)
 	return r;
 }
 
-static inline int __vmx_find_uret_msr(u32 msr)
-{
-	int i;
-
-	for (i = 0; i < vmx_nr_uret_msrs; ++i) {
-		if (vmx_uret_msrs_list[i] == msr)
-			return i;
-	}
-	return -1;
-}
-
 struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
 {
 	int i;
 
-	i = __vmx_find_uret_msr(msr);
+	i = kvm_find_user_return_msr(msr);
 	if (i >= 0)
 		return &vmx->guest_uret_msrs[i];
 	return NULL;
@@ -1086,7 +1056,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx)
 		return false;
 	}
 
-	i = __vmx_find_uret_msr(MSR_EFER);
+	i = kvm_find_user_return_msr(MSR_EFER);
 	if (i < 0)
 		return false;
 
@@ -6922,6 +6892,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 
 static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
 {
+	struct vmx_uret_msr *tsx_ctrl;
 	struct vcpu_vmx *vmx;
 	int i, cpu, err;
 
@@ -6946,29 +6917,25 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
 
 	for (i = 0; i < vmx_nr_uret_msrs; ++i) {
 		vmx->guest_uret_msrs[i].data = 0;
-
-		switch (vmx_uret_msrs_list[i]) {
-		case MSR_IA32_TSX_CTRL:
-			/*
-			 * TSX_CTRL_CPUID_CLEAR is handled in the CPUID
-			 * interception.  Keep the host value unchanged to avoid
-			 * changing CPUID bits under the host kernel's feet.
-			 *
-			 * hle=0, rtm=0, tsx_ctrl=1 can be found with some
-			 * combinations of new kernel and old userspace.  If
-			 * those guests run on a tsx=off host, do allow guests
-			 * to use TSX_CTRL, but do not change the value on the
-			 * host so that TSX remains always disabled.
-			 */
-			if (boot_cpu_has(X86_FEATURE_RTM))
-				vmx->guest_uret_msrs[i].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
-			else
-				vmx->guest_uret_msrs[i].mask = 0;
-			break;
-		default:
-			vmx->guest_uret_msrs[i].mask = -1ull;
-			break;
-		}
+		vmx->guest_uret_msrs[i].mask = -1ull;
+	}
+	tsx_ctrl = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL);
+	if (tsx_ctrl) {
+		/*
+		 * TSX_CTRL_CPUID_CLEAR is handled in the CPUID interception.
+		 * Keep the host value unchanged to avoid changing CPUID bits
+		 * under the host kernel's feet.
+		 *
+		 * hle=0, rtm=0, tsx_ctrl=1 can be found with some combinations
+		 * of new kernel and old userspace.  If those guests run on a
+		 * tsx=off host, do allow guests to use TSX_CTRL, but do not
+		 * change the value on the host so that TSX remains always
+		 * disabled.
+		 */
+		if (boot_cpu_has(X86_FEATURE_RTM))
+			vmx->guest_uret_msrs[i].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
+		else
+			vmx->guest_uret_msrs[i].mask = 0;
 	}
 
 	err = alloc_loaded_vmcs(&vmx->vmcs01);
@@ -7829,6 +7796,22 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 
 static __init void vmx_setup_user_return_msrs(void)
 {
+
+	/*
+	 * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm
+	 * will emulate SYSCALL in legacy mode if the vendor string in guest
+	 * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To
+	 * support this emulation, MSR_STAR is included in the list for i386,
+	 * but is never loaded into hardware.  MSR_CSTAR is also never loaded
+	 * into hardware and is here purely for emulation purposes.
+	 */
+	const u32 vmx_uret_msrs_list[] = {
+	#ifdef CONFIG_X86_64
+		MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
+	#endif
+		MSR_EFER, MSR_TSC_AUX, MSR_STAR,
+		MSR_IA32_TSX_CTRL,
+	};
 	u32 msr;
 	int i;
 
@@ -7841,7 +7824,7 @@ static __init void vmx_setup_user_return_msrs(void)
 			continue;
 
 		kvm_define_user_return_msr(vmx_nr_uret_msrs, msr);
-		vmx_uret_msrs_list[vmx_nr_uret_msrs++] = msr;
+		vmx_nr_uret_msrs++;
 	}
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0856636efc44..d514031ed25f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -364,6 +364,18 @@ void kvm_define_user_return_msr(unsigned slot, u32 msr)
 }
 EXPORT_SYMBOL_GPL(kvm_define_user_return_msr);
 
+int kvm_find_user_return_msr(u32 msr)
+{
+	int i;
+
+	for (i = 0; i < user_return_msrs_global.nr; ++i) {
+		if (user_return_msrs_global.msrs[i] == msr)
+			return i;
+	}
+	return -1;
+}
+EXPORT_SYMBOL_GPL(kvm_find_user_return_msr);
+
 static void kvm_user_return_msr_cpu_online(void)
 {
 	unsigned int cpu = smp_processor_id();
-- 
cgit v1.2.3


From 5e17c624010a82bbcca9b955155781927eb6532a Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:30 -0700
Subject: KVM: VMX: Disable loading of TSX_CTRL MSR the more conventional way

Tag TSX_CTRL as not needing to be loaded when RTM isn't supported in the
host.  Crushing the write mask to '0' has the same effect, but requires
more mental gymnastics to understand.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-12-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 39506704be96..bd2187b4cc53 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1771,7 +1771,13 @@ static void setup_msrs(struct vcpu_vmx *vmx)
 			   guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP) ||
 			   guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID));
 
-	vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, true);
+	/*
+	 * hle=0, rtm=0, tsx_ctrl=1 can be found with some combinations of new
+	 * kernel and old userspace.  If those guests run on a tsx=off host, do
+	 * allow guests to use TSX_CTRL, but don't change the value in hardware
+	 * so that TSX remains always disabled.
+	 */
+	vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, boot_cpu_has(X86_FEATURE_RTM));
 
 	if (cpu_has_vmx_msr_bitmap())
 		vmx_update_msr_bitmap(&vmx->vcpu);
@@ -6919,23 +6925,15 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
 		vmx->guest_uret_msrs[i].data = 0;
 		vmx->guest_uret_msrs[i].mask = -1ull;
 	}
-	tsx_ctrl = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL);
-	if (tsx_ctrl) {
+	if (boot_cpu_has(X86_FEATURE_RTM)) {
 		/*
 		 * TSX_CTRL_CPUID_CLEAR is handled in the CPUID interception.
 		 * Keep the host value unchanged to avoid changing CPUID bits
 		 * under the host kernel's feet.
-		 *
-		 * hle=0, rtm=0, tsx_ctrl=1 can be found with some combinations
-		 * of new kernel and old userspace.  If those guests run on a
-		 * tsx=off host, do allow guests to use TSX_CTRL, but do not
-		 * change the value on the host so that TSX remains always
-		 * disabled.
 		 */
-		if (boot_cpu_has(X86_FEATURE_RTM))
+		tsx_ctrl = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL);
+		if (tsx_ctrl)
 			vmx->guest_uret_msrs[i].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
-		else
-			vmx->guest_uret_msrs[i].mask = 0;
 	}
 
 	err = alloc_loaded_vmcs(&vmx->vmcs01);
-- 
cgit v1.2.3


From 9cc39a5a43c05f8eda206bf9e144119820ecf5c8 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:31 -0700
Subject: KVM: x86: Export the number of uret MSRs to vendor modules

Split out and export the number of configured user return MSRs so that
VMX can iterate over the set of MSRs without having to do its own tracking.
Keep the list itself internal to x86 so that vendor code still has to go
through the "official" APIs to add/modify entries.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-13-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/x86.c              | 29 +++++++++++++----------------
 2 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6b15f27f49d0..22505e74c3da 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1418,6 +1418,7 @@ struct kvm_arch_async_pf {
 	bool direct_map;
 };
 
+extern u32 __read_mostly kvm_nr_uret_msrs;
 extern u64 __read_mostly host_efer;
 extern bool __read_mostly allow_smaller_maxphyaddr;
 extern struct kvm_x86_ops kvm_x86_ops;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d514031ed25f..5e1deed8ea5d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -184,11 +184,6 @@ module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
  */
 #define KVM_MAX_NR_USER_RETURN_MSRS 16
 
-struct kvm_user_return_msrs_global {
-	int nr;
-	u32 msrs[KVM_MAX_NR_USER_RETURN_MSRS];
-};
-
 struct kvm_user_return_msrs {
 	struct user_return_notifier urn;
 	bool registered;
@@ -198,7 +193,9 @@ struct kvm_user_return_msrs {
 	} values[KVM_MAX_NR_USER_RETURN_MSRS];
 };
 
-static struct kvm_user_return_msrs_global __read_mostly user_return_msrs_global;
+u32 __read_mostly kvm_nr_uret_msrs;
+EXPORT_SYMBOL_GPL(kvm_nr_uret_msrs);
+static u32 __read_mostly kvm_uret_msrs_list[KVM_MAX_NR_USER_RETURN_MSRS];
 static struct kvm_user_return_msrs __percpu *user_return_msrs;
 
 #define KVM_SUPPORTED_XCR0     (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
@@ -330,10 +327,10 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
 		user_return_notifier_unregister(urn);
 	}
 	local_irq_restore(flags);
-	for (slot = 0; slot < user_return_msrs_global.nr; ++slot) {
+	for (slot = 0; slot < kvm_nr_uret_msrs; ++slot) {
 		values = &msrs->values[slot];
 		if (values->host != values->curr) {
-			wrmsrl(user_return_msrs_global.msrs[slot], values->host);
+			wrmsrl(kvm_uret_msrs_list[slot], values->host);
 			values->curr = values->host;
 		}
 	}
@@ -358,9 +355,9 @@ EXPORT_SYMBOL_GPL(kvm_probe_user_return_msr);
 void kvm_define_user_return_msr(unsigned slot, u32 msr)
 {
 	BUG_ON(slot >= KVM_MAX_NR_USER_RETURN_MSRS);
-	user_return_msrs_global.msrs[slot] = msr;
-	if (slot >= user_return_msrs_global.nr)
-		user_return_msrs_global.nr = slot + 1;
+	kvm_uret_msrs_list[slot] = msr;
+	if (slot >= kvm_nr_uret_msrs)
+		kvm_nr_uret_msrs = slot + 1;
 }
 EXPORT_SYMBOL_GPL(kvm_define_user_return_msr);
 
@@ -368,8 +365,8 @@ int kvm_find_user_return_msr(u32 msr)
 {
 	int i;
 
-	for (i = 0; i < user_return_msrs_global.nr; ++i) {
-		if (user_return_msrs_global.msrs[i] == msr)
+	for (i = 0; i < kvm_nr_uret_msrs; ++i) {
+		if (kvm_uret_msrs_list[i] == msr)
 			return i;
 	}
 	return -1;
@@ -383,8 +380,8 @@ static void kvm_user_return_msr_cpu_online(void)
 	u64 value;
 	int i;
 
-	for (i = 0; i < user_return_msrs_global.nr; ++i) {
-		rdmsrl_safe(user_return_msrs_global.msrs[i], &value);
+	for (i = 0; i < kvm_nr_uret_msrs; ++i) {
+		rdmsrl_safe(kvm_uret_msrs_list[i], &value);
 		msrs->values[i].host = value;
 		msrs->values[i].curr = value;
 	}
@@ -399,7 +396,7 @@ int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask)
 	value = (value & mask) | (msrs->values[slot].host & ~mask);
 	if (value == msrs->values[slot].curr)
 		return 0;
-	err = wrmsrl_safe(user_return_msrs_global.msrs[slot], value);
+	err = wrmsrl_safe(kvm_uret_msrs_list[slot], value);
 	if (err)
 		return 1;
 
-- 
cgit v1.2.3


From e5fda4bbadb053e3b5164476146cf43092785c0b Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:32 -0700
Subject: KVM: x86: Move uret MSR slot management to common x86

Now that SVM and VMX both probe MSRs before "defining" user return slots
for them, consolidate the code for probe+define into common x86 and
eliminate the odd behavior of having the vendor code define the slot for
a given MSR.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-14-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  3 +--
 arch/x86/kvm/svm/svm.c          |  5 +----
 arch/x86/kvm/vmx/vmx.c          | 19 ++++---------------
 arch/x86/kvm/x86.c              | 19 +++++++++++--------
 4 files changed, 17 insertions(+), 29 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 22505e74c3da..8155eaac22a7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1777,9 +1777,8 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
 		    unsigned long ipi_bitmap_high, u32 min,
 		    unsigned long icr, int op_64_bit);
 
-void kvm_define_user_return_msr(unsigned index, u32 msr);
+int kvm_add_user_return_msr(u32 msr);
 int kvm_find_user_return_msr(u32 msr);
-int kvm_probe_user_return_msr(u32 msr);
 int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
 
 u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 0ba0a00f8dc6..d13b72bfb736 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -959,10 +959,7 @@ static __init int svm_hardware_setup(void)
 		kvm_tsc_scaling_ratio_frac_bits = 32;
 	}
 
-	if (!kvm_probe_user_return_msr(MSR_TSC_AUX)) {
-		tsc_aux_uret_slot = 0;
-		kvm_define_user_return_msr(tsc_aux_uret_slot, MSR_TSC_AUX);
-	}
+	tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX);
 
 	/* Check for pause filtering support */
 	if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index bd2187b4cc53..042823b49273 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -454,9 +454,6 @@ static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
 
 static unsigned long host_idt_base;
 
-/* Number of user return MSRs that are actually supported in hardware. */
-static int vmx_nr_uret_msrs;
-
 #if IS_ENABLED(CONFIG_HYPERV)
 static bool __read_mostly enlightened_vmcs = true;
 module_param(enlightened_vmcs, bool, 0444);
@@ -1218,7 +1215,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
 	 */
 	if (!vmx->guest_uret_msrs_loaded) {
 		vmx->guest_uret_msrs_loaded = true;
-		for (i = 0; i < vmx_nr_uret_msrs; ++i) {
+		for (i = 0; i < kvm_nr_uret_msrs; ++i) {
 			if (!vmx->guest_uret_msrs[i].load_into_hardware)
 				continue;
 
@@ -6921,7 +6918,7 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
 			goto free_vpid;
 	}
 
-	for (i = 0; i < vmx_nr_uret_msrs; ++i) {
+	for (i = 0; i < kvm_nr_uret_msrs; ++i) {
 		vmx->guest_uret_msrs[i].data = 0;
 		vmx->guest_uret_msrs[i].mask = -1ull;
 	}
@@ -7810,20 +7807,12 @@ static __init void vmx_setup_user_return_msrs(void)
 		MSR_EFER, MSR_TSC_AUX, MSR_STAR,
 		MSR_IA32_TSX_CTRL,
 	};
-	u32 msr;
 	int i;
 
 	BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS);
 
-	for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) {
-		msr = vmx_uret_msrs_list[i];
-
-		if (kvm_probe_user_return_msr(msr))
-			continue;
-
-		kvm_define_user_return_msr(vmx_nr_uret_msrs, msr);
-		vmx_nr_uret_msrs++;
-	}
+	for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i)
+		kvm_add_user_return_msr(vmx_uret_msrs_list[i]);
 }
 
 static __init int hardware_setup(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5e1deed8ea5d..0a0eebf35dd5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -336,7 +336,7 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
 	}
 }
 
-int kvm_probe_user_return_msr(u32 msr)
+static int kvm_probe_user_return_msr(u32 msr)
 {
 	u64 val;
 	int ret;
@@ -350,16 +350,18 @@ out:
 	preempt_enable();
 	return ret;
 }
-EXPORT_SYMBOL_GPL(kvm_probe_user_return_msr);
 
-void kvm_define_user_return_msr(unsigned slot, u32 msr)
+int kvm_add_user_return_msr(u32 msr)
 {
-	BUG_ON(slot >= KVM_MAX_NR_USER_RETURN_MSRS);
-	kvm_uret_msrs_list[slot] = msr;
-	if (slot >= kvm_nr_uret_msrs)
-		kvm_nr_uret_msrs = slot + 1;
+	BUG_ON(kvm_nr_uret_msrs >= KVM_MAX_NR_USER_RETURN_MSRS);
+
+	if (kvm_probe_user_return_msr(msr))
+		return -1;
+
+	kvm_uret_msrs_list[kvm_nr_uret_msrs] = msr;
+	return kvm_nr_uret_msrs++;
 }
-EXPORT_SYMBOL_GPL(kvm_define_user_return_msr);
+EXPORT_SYMBOL_GPL(kvm_add_user_return_msr);
 
 int kvm_find_user_return_msr(u32 msr)
 {
@@ -8132,6 +8134,7 @@ int kvm_arch_init(void *opaque)
 		printk(KERN_ERR "kvm: failed to allocate percpu kvm_user_return_msrs\n");
 		goto out_free_x86_emulator_cache;
 	}
+	kvm_nr_uret_msrs = 0;
 
 	r = kvm_mmu_module_init();
 	if (r)
-- 
cgit v1.2.3


From 61a05d444d2ca8d40add453a5f7058fbb1b57eca Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:33 -0700
Subject: KVM: x86: Tie Intel and AMD behavior for MSR_TSC_AUX to guest CPU
 model

Squish the Intel and AMD emulation of MSR_TSC_AUX together and tie it to
the guest CPU model instead of the host CPU behavior.  While not strictly
necessary to avoid guest breakage, emulating cross-vendor "architecture"
will provide consistent behavior for the guest, e.g. WRMSR fault behavior
won't change if the vCPU is migrated to a host with divergent behavior.

Note, the "new" kvm_is_supported_user_return_msr() checks do not add new
functionality on either SVM or VMX.  On SVM, the equivalent was
"tsc_aux_uret_slot < 0", and on VMX the check was buried in the
vmx_find_uret_msr() call at the find_uret_msr label.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-15-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  5 +++++
 arch/x86/kvm/svm/svm.c          | 24 ------------------------
 arch/x86/kvm/vmx/vmx.c          | 15 ---------------
 arch/x86/kvm/x86.c              | 36 ++++++++++++++++++++++++++++++++++++
 4 files changed, 41 insertions(+), 39 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8155eaac22a7..f85480b1d215 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1781,6 +1781,11 @@ int kvm_add_user_return_msr(u32 msr);
 int kvm_find_user_return_msr(u32 msr);
 int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
 
+static inline bool kvm_is_supported_user_return_msr(u32 msr)
+{
+	return kvm_find_user_return_msr(msr) >= 0;
+}
+
 u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
 u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
 
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index d13b72bfb736..0922d8e173e6 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2663,12 +2663,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			msr_info->data |= (u64)svm->sysenter_esp_hi << 32;
 		break;
 	case MSR_TSC_AUX:
-		if (tsc_aux_uret_slot < 0)
-			return 1;
-		if (!msr_info->host_initiated &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
-			return 1;
 		msr_info->data = svm->tsc_aux;
 		break;
 	/*
@@ -2885,24 +2879,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 		svm->sysenter_esp_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0;
 		break;
 	case MSR_TSC_AUX:
-		if (tsc_aux_uret_slot < 0)
-			return 1;
-
-		if (!msr->host_initiated &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
-			return 1;
-
-		/*
-		 * Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has
-		 * incomplete and conflicting architectural behavior.  Current
-		 * AMD CPUs completely ignore bits 63:32, i.e. they aren't
-		 * reserved and always read as zeros.  Emulate AMD CPU behavior
-		 * to avoid explosions if the vCPU is migrated from an AMD host
-		 * to an Intel host.
-		 */
-		data = (u32)data;
-
 		/*
 		 * TSC_AUX is usually changed only during boot and never read
 		 * directly.  Intercept TSC_AUX instead of exposing it to the
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 042823b49273..61f7e5221bf3 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1981,12 +1981,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		else
 			msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
 		break;
-	case MSR_TSC_AUX:
-		if (!msr_info->host_initiated &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
-			return 1;
-		goto find_uret_msr;
 	case MSR_IA32_DEBUGCTLMSR:
 		msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
 		break;
@@ -2302,15 +2296,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		else
 			vmx->pt_desc.guest.addr_a[index / 2] = data;
 		break;
-	case MSR_TSC_AUX:
-		if (!msr_info->host_initiated &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
-			return 1;
-		/* Check reserved bit, higher 32 bits should be zero */
-		if ((data >> 32) != 0)
-			return 1;
-		goto find_uret_msr;
 	case MSR_IA32_PERF_CAPABILITIES:
 		if (data && !vcpu_to_pmu(vcpu)->version)
 			return 1;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0a0eebf35dd5..4efd2978ec08 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1642,6 +1642,30 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
 		 * invokes 64-bit SYSENTER.
 		 */
 		data = get_canonical(data, vcpu_virt_addr_bits(vcpu));
+		break;
+	case MSR_TSC_AUX:
+		if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
+			return 1;
+
+		if (!host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
+			return 1;
+
+		/*
+		 * Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has
+		 * incomplete and conflicting architectural behavior.  Current
+		 * AMD CPUs completely ignore bits 63:32, i.e. they aren't
+		 * reserved and always read as zeros.  Enforce Intel's reserved
+		 * bits check if and only if the guest CPU is Intel, and clear
+		 * the bits in all other cases.  This ensures cross-vendor
+		 * migration will provide consistent behavior for the guest.
+		 */
+		if (guest_cpuid_is_intel(vcpu) && (data >> 32) != 0)
+			return 1;
+
+		data = (u32)data;
+		break;
 	}
 
 	msr.data = data;
@@ -1678,6 +1702,18 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
 	if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
 		return KVM_MSR_RET_FILTERED;
 
+	switch (index) {
+	case MSR_TSC_AUX:
+		if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
+			return 1;
+
+		if (!host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
+			return 1;
+		break;
+	}
+
 	msr.index = index;
 	msr.host_initiated = host_initiated;
 
-- 
cgit v1.2.3


From 78bba966ee3cdbbfc585d8e39237378fba50a142 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:34 -0700
Subject: KVM: x86: Hide RDTSCP and RDPID if MSR_TSC_AUX probing failed

If probing MSR_TSC_AUX failed, hide RDTSCP and RDPID, and WARN if either
feature was reported as supported.  In theory, such a scenario should
never happen as both Intel and AMD state that MSR_TSC_AUX is available if
RDTSCP or RDPID is supported.  But, KVM injects #GP on MSR_TSC_AUX
accesses if probing failed, faults on WRMSR(MSR_TSC_AUX) may be fatal to
the guest (because they happen during early CPU bringup), and KVM itself
has effectively misreported RDPID support in the past.

Note, this also has the happy side effect of omitting MSR_TSC_AUX from
the list of MSRs that are exposed to userspace if probing the MSR fails.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-16-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/cpuid.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index c0e8c5e92189..f42e9491a6c8 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -567,6 +567,21 @@ void kvm_set_cpu_caps(void)
 		F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
 		F(PMM) | F(PMM_EN)
 	);
+
+	/*
+	 * Hide RDTSCP and RDPID if either feature is reported as supported but
+	 * probing MSR_TSC_AUX failed.  This is purely a sanity check and
+	 * should never happen, but the guest will likely crash if RDTSCP or
+	 * RDPID is misreported, and KVM has botched MSR_TSC_AUX emulation in
+	 * the past.  For example, the sanity check may fire if this instance of
+	 * KVM is running as L1 on top of an older, broken KVM.
+	 */
+	if (WARN_ON((kvm_cpu_cap_has(X86_FEATURE_RDTSCP) ||
+		     kvm_cpu_cap_has(X86_FEATURE_RDPID)) &&
+		     !kvm_is_supported_user_return_msr(MSR_TSC_AUX))) {
+		kvm_cpu_cap_clear(X86_FEATURE_RDTSCP);
+		kvm_cpu_cap_clear(X86_FEATURE_RDPID);
+	}
 }
 EXPORT_SYMBOL_GPL(kvm_set_cpu_caps);
 
-- 
cgit v1.2.3


From 34114136f725cbd0c83e7b5a0c8a977976cd82f7 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 5 May 2021 22:15:09 +1000
Subject: KVM: PPC: Book3S HV: Fix conversion to gfn-based MMU notifier
 callbacks

Commit b1c5356e873c ("KVM: PPC: Convert to the gfn-based MMU notifier
callbacks") causes unmap_gfn_range and age_gfn callbacks to only work
on the first gfn in the range. It also makes the aging callbacks call
into both radix and hash aging functions for radix guests. Fix this.

Add warnings for the single-gfn calls that have been converted to range
callbacks, in case they ever receieve ranges greater than 1.

Fixes: b1c5356e873c ("KVM: PPC: Convert to the gfn-based MMU notifier callbacks")
Reported-by: Bharata B Rao <bharata@linux.ibm.com>
Tested-by: Bharata B Rao <bharata@linux.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Message-Id: <20210505121509.1470207-1-npiggin@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/powerpc/include/asm/kvm_book3s.h  |  2 +-
 arch/powerpc/kvm/book3s_64_mmu_hv.c    | 46 ++++++++++++++++++++++++----------
 arch/powerpc/kvm/book3s_64_mmu_radix.c |  5 ++--
 3 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index a6e9a5585e61..e6b53c6e21e3 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -210,7 +210,7 @@ extern void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd,
 				      unsigned int lpid);
 extern int kvmppc_radix_init(void);
 extern void kvmppc_radix_exit(void);
-extern bool kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+extern void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
 			    unsigned long gfn);
 extern bool kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
 			  unsigned long gfn);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index b7bd9ca040b8..2d9193cd73be 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -795,7 +795,7 @@ static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i,
 	}
 }
 
-static bool kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
+static void kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
 			    unsigned long gfn)
 {
 	unsigned long i;
@@ -829,15 +829,21 @@ static bool kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
 		unlock_rmap(rmapp);
 		__unlock_hpte(hptep, be64_to_cpu(hptep[0]));
 	}
-	return false;
 }
 
 bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range)
 {
-	if (kvm_is_radix(kvm))
-		return kvm_unmap_radix(kvm, range->slot, range->start);
+	gfn_t gfn;
+
+	if (kvm_is_radix(kvm)) {
+		for (gfn = range->start; gfn < range->end; gfn++)
+			kvm_unmap_radix(kvm, range->slot, gfn);
+	} else {
+		for (gfn = range->start; gfn < range->end; gfn++)
+			kvm_unmap_rmapp(kvm, range->slot, range->start);
+	}
 
-	return kvm_unmap_rmapp(kvm, range->slot, range->start);
+	return false;
 }
 
 void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
@@ -924,10 +930,18 @@ static bool kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
 
 bool kvm_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
 {
-	if (kvm_is_radix(kvm))
-		kvm_age_radix(kvm, range->slot, range->start);
+	gfn_t gfn;
+	bool ret = false;
 
-	return kvm_age_rmapp(kvm, range->slot, range->start);
+	if (kvm_is_radix(kvm)) {
+		for (gfn = range->start; gfn < range->end; gfn++)
+			ret |= kvm_age_radix(kvm, range->slot, gfn);
+	} else {
+		for (gfn = range->start; gfn < range->end; gfn++)
+			ret |= kvm_age_rmapp(kvm, range->slot, gfn);
+	}
+
+	return ret;
 }
 
 static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
@@ -965,18 +979,24 @@ static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
 
 bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
 {
-	if (kvm_is_radix(kvm))
-		kvm_test_age_radix(kvm, range->slot, range->start);
+	WARN_ON(range->start + 1 != range->end);
 
-	return kvm_test_age_rmapp(kvm, range->slot, range->start);
+	if (kvm_is_radix(kvm))
+		return kvm_test_age_radix(kvm, range->slot, range->start);
+	else
+		return kvm_test_age_rmapp(kvm, range->slot, range->start);
 }
 
 bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
 {
+	WARN_ON(range->start + 1 != range->end);
+
 	if (kvm_is_radix(kvm))
-		return kvm_unmap_radix(kvm, range->slot, range->start);
+		kvm_unmap_radix(kvm, range->slot, range->start);
+	else
+		kvm_unmap_rmapp(kvm, range->slot, range->start);
 
-	return kvm_unmap_rmapp(kvm, range->slot, range->start);
+	return false;
 }
 
 static int vcpus_running(struct kvm *kvm)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index ec4f58fa9f5a..d909c069363e 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -993,7 +993,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu,
 }
 
 /* Called with kvm->mmu_lock held */
-bool kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
 		     unsigned long gfn)
 {
 	pte_t *ptep;
@@ -1002,14 +1002,13 @@ bool kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
 
 	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) {
 		uv_page_inval(kvm->arch.lpid, gpa, PAGE_SHIFT);
-		return false;
+		return;
 	}
 
 	ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
 	if (ptep && pte_present(*ptep))
 		kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
 				 kvm->arch.lpid);
-	return false;
 }
 
 /* Called with kvm->mmu_lock held */
-- 
cgit v1.2.3


From e8ea85fb280ec55674bca88ea7cd85f60d19567f Mon Sep 17 00:00:00 2001
From: Chenyi Qiang <chenyi.qiang@intel.com>
Date: Tue, 2 Feb 2021 17:04:32 +0800
Subject: KVM: X86: Add support for the emulation of DR6_BUS_LOCK bit

Bus lock debug exception introduces a new bit DR6_BUS_LOCK (bit 11 of
DR6) to indicate that bus lock #DB exception is generated. The set/clear
of DR6_BUS_LOCK is similar to the DR6_RTM. The processor clears
DR6_BUS_LOCK when the exception is generated. For all other #DB, the
processor sets this bit to 1. Software #DB handler should set this bit
before returning to the interrupted task.

In VMM, to avoid breaking the CPUs without bus lock #DB exception
support, activate the DR6_BUS_LOCK conditionally in DR6_FIXED_1 bits.
When intercepting the #DB exception caused by bus locks, bit 11 of the
exit qualification is set to identify it. The VMM should emulate the
exception by clearing the bit 11 of the guest DR6.

Co-developed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
Message-Id: <20210202090433.13441-3-chenyi.qiang@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 3 ++-
 arch/x86/kvm/x86.c              | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f85480b1d215..8836b3096217 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -200,6 +200,7 @@ enum x86_intercept_stage;
 
 #define KVM_NR_DB_REGS	4
 
+#define DR6_BUS_LOCK   (1 << 11)
 #define DR6_BD		(1 << 13)
 #define DR6_BS		(1 << 14)
 #define DR6_BT		(1 << 15)
@@ -213,7 +214,7 @@ enum x86_intercept_stage;
  * DR6_ACTIVE_LOW is also used as the init/reset value for DR6.
  */
 #define DR6_ACTIVE_LOW	0xffff0ff0
-#define DR6_VOLATILE	0x0001e00f
+#define DR6_VOLATILE	0x0001e80f
 #define DR6_FIXED_1	(DR6_ACTIVE_LOW & ~DR6_VOLATILE)
 
 #define DR7_BP_EN_MASK	0x000000ff
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4efd2978ec08..9b1a7040eae3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1176,6 +1176,9 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
 
 	if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM))
 		fixed |= DR6_RTM;
+
+	if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
+		fixed |= DR6_BUS_LOCK;
 	return fixed;
 }
 
-- 
cgit v1.2.3


From 76ea438b4afcd9ee8da3387e9af4625eaccff58f Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 6 May 2021 06:30:04 -0400
Subject: KVM: X86: Expose bus lock debug exception to guest

Bus lock debug exception is an ability to notify the kernel by an #DB
trap after the instruction acquires a bus lock and is executed when
CPL>0. This allows the kernel to enforce user application throttling or
mitigations.

Existence of bus lock debug exception is enumerated via
CPUID.(EAX=7,ECX=0).ECX[24]. Software can enable these exceptions by
setting bit 2 of the MSR_IA32_DEBUGCTL. Expose the CPUID to guest and
emulate the MSR handling when guest enables it.

Support for this feature was originally developed by Xiaoyao Li and
Chenyi Qiang, but code has since changed enough that this patch has
nothing in common with theirs, except for this commit message.

Co-developed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
Message-Id: <20210202090433.13441-4-chenyi.qiang@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/cpuid.c            | 2 +-
 arch/x86/kvm/vmx/capabilities.h | 3 +++
 arch/x86/kvm/vmx/vmx.c          | 3 +++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index f42e9491a6c8..9a48f138832d 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -458,7 +458,7 @@ void kvm_set_cpu_caps(void)
 		F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
 		F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
 		F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ |
-		F(SGX_LC)
+		F(SGX_LC) | F(BUS_LOCK_DETECT)
 	);
 	/* Set LA57 based on hardware capability. */
 	if (cpuid_ecx(7) & F(LA57))
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index d1d77985e889..8dee8a5fbc17 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -398,6 +398,9 @@ static inline u64 vmx_supported_debugctl(void)
 {
 	u64 debugctl = 0;
 
+	if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
+		debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT;
+
 	if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT)
 		debugctl |= DEBUGCTLMSR_LBR_MASK;
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 61f7e5221bf3..f2fd447eed45 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2014,6 +2014,9 @@ static u64 vcpu_supported_debugctl(struct kvm_vcpu *vcpu)
 	if (!intel_pmu_lbr_is_enabled(vcpu))
 		debugctl &= ~DEBUGCTLMSR_LBR_MASK;
 
+	if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
+		debugctl &= ~DEBUGCTLMSR_BUS_LOCK_DETECT;
+
 	return debugctl;
 }
 
-- 
cgit v1.2.3


From 03ca4589fabcc66b27e4cb8f8e95d64cf43badd0 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 5 May 2021 13:42:21 -0700
Subject: KVM: x86: Prevent KVM SVM from loading on kernels with 5-level paging

Disallow loading KVM SVM if 5-level paging is supported.  In theory, NPT
for L1 should simply work, but there unknowns with respect to how the
guest's MAXPHYADDR will be handled by hardware.

Nested NPT is more problematic, as running an L1 VMM that is using
2-level page tables requires stacking single-entry PDP and PML4 tables in
KVM's NPT for L2, as there are no equivalent entries in L1's NPT to
shadow.  Barring hardware magic, for 5-level paging, KVM would need stack
another layer to handle PML5.

Opportunistically rename the lm_root pointer, which is used for the
aforementioned stacking when shadowing 2-level L1 NPT, to pml4_root to
call out that it's specifically for PML4.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210505204221.1934471-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/mmu/mmu.c          | 20 ++++++++++----------
 arch/x86/kvm/svm/svm.c          |  5 +++++
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8836b3096217..55efbacfc244 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -409,7 +409,7 @@ struct kvm_mmu {
 	u32 pkru_mask;
 
 	u64 *pae_root;
-	u64 *lm_root;
+	u64 *pml4_root;
 
 	/*
 	 * check zero bits on shadow page table entries, these
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 4b3ee244ebe0..0144c40d09c7 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3310,12 +3310,12 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 	if (mmu->shadow_root_level == PT64_ROOT_4LEVEL) {
 		pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
 
-		if (WARN_ON_ONCE(!mmu->lm_root)) {
+		if (WARN_ON_ONCE(!mmu->pml4_root)) {
 			r = -EIO;
 			goto out_unlock;
 		}
 
-		mmu->lm_root[0] = __pa(mmu->pae_root) | pm_mask;
+		mmu->pml4_root[0] = __pa(mmu->pae_root) | pm_mask;
 	}
 
 	for (i = 0; i < 4; ++i) {
@@ -3335,7 +3335,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 	}
 
 	if (mmu->shadow_root_level == PT64_ROOT_4LEVEL)
-		mmu->root_hpa = __pa(mmu->lm_root);
+		mmu->root_hpa = __pa(mmu->pml4_root);
 	else
 		mmu->root_hpa = __pa(mmu->pae_root);
 
@@ -3350,7 +3350,7 @@ out_unlock:
 static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu *mmu = vcpu->arch.mmu;
-	u64 *lm_root, *pae_root;
+	u64 *pml4_root, *pae_root;
 
 	/*
 	 * When shadowing 32-bit or PAE NPT with 64-bit NPT, the PML4 and PDP
@@ -3369,14 +3369,14 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
 	if (WARN_ON_ONCE(mmu->shadow_root_level != PT64_ROOT_4LEVEL))
 		return -EIO;
 
-	if (mmu->pae_root && mmu->lm_root)
+	if (mmu->pae_root && mmu->pml4_root)
 		return 0;
 
 	/*
 	 * The special roots should always be allocated in concert.  Yell and
 	 * bail if KVM ends up in a state where only one of the roots is valid.
 	 */
-	if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->lm_root))
+	if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root))
 		return -EIO;
 
 	/*
@@ -3387,14 +3387,14 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
 	if (!pae_root)
 		return -ENOMEM;
 
-	lm_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
-	if (!lm_root) {
+	pml4_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
+	if (!pml4_root) {
 		free_page((unsigned long)pae_root);
 		return -ENOMEM;
 	}
 
 	mmu->pae_root = pae_root;
-	mmu->lm_root = lm_root;
+	mmu->pml4_root = pml4_root;
 
 	return 0;
 }
@@ -5261,7 +5261,7 @@ static void free_mmu_pages(struct kvm_mmu *mmu)
 	if (!tdp_enabled && mmu->pae_root)
 		set_memory_encrypted((unsigned long)mmu->pae_root, 1);
 	free_page((unsigned long)mmu->pae_root);
-	free_page((unsigned long)mmu->lm_root);
+	free_page((unsigned long)mmu->pml4_root);
 }
 
 static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 0922d8e173e6..8124f51e9488 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -447,6 +447,11 @@ static int has_svm(void)
 		return 0;
 	}
 
+	if (pgtable_l5_enabled()) {
+		pr_info("KVM doesn't yet support 5-level paging on AMD SVM\n");
+		return 0;
+	}
+
 	return 1;
 }
 
-- 
cgit v1.2.3


From 594b27e677b35f9734b1969d175ebc6146741109 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 5 May 2021 23:48:17 +0200
Subject: KVM: x86: Cancel pvclock_gtod_work on module removal

Nothing prevents the following:

  pvclock_gtod_notify()
    queue_work(system_long_wq, &pvclock_gtod_work);
  ...
  remove_module(kvm);
  ...
  work_queue_run()
    pvclock_gtod_work()	<- UAF

Ditto for any other operation on that workqueue list head which touches
pvclock_gtod_work after module removal.

Cancel the work in kvm_arch_exit() to prevent that.

Fixes: 16e8d74d2da9 ("KVM: x86: notifier for clocksource changes")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Message-Id: <87czu4onry.ffs@nanos.tec.linutronix.de>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9b1a7040eae3..259139a145cb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8224,6 +8224,7 @@ void kvm_arch_exit(void)
 	cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
 #ifdef CONFIG_X86_64
 	pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
+	cancel_work_sync(&pvclock_gtod_work);
 #endif
 	kvm_x86_ops.hardware_enable = NULL;
 	kvm_mmu_module_exit();
-- 
cgit v1.2.3


From 3f804f6d201ca93adf4c3df04d1bfd152c1129d6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 6 May 2021 15:21:37 +0200
Subject: KVM: x86: Prevent deadlock against tk_core.seq

syzbot reported a possible deadlock in pvclock_gtod_notify():

CPU 0  		  	   	    	    CPU 1
write_seqcount_begin(&tk_core.seq);
  pvclock_gtod_notify()			    spin_lock(&pool->lock);
    queue_work(..., &pvclock_gtod_work)	    ktime_get()
     spin_lock(&pool->lock);		      do {
     						seq = read_seqcount_begin(tk_core.seq)
						...
				              } while (read_seqcount_retry(&tk_core.seq, seq);

While this is unlikely to happen, it's possible.

Delegate queue_work() to irq_work() which postpones it until the
tk_core.seq write held region is left and interrupts are reenabled.

Fixes: 16e8d74d2da9 ("KVM: x86: notifier for clocksource changes")
Reported-by: syzbot+6beae4000559d41d80f8@syzkaller.appspotmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Message-Id: <87h7jgm1zy.ffs@nanos.tec.linutronix.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 259139a145cb..5bd550eaf683 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8094,6 +8094,18 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
 
 static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
 
+/*
+ * Indirection to move queue_work() out of the tk_core.seq write held
+ * region to prevent possible deadlocks against time accessors which
+ * are invoked with work related locks held.
+ */
+static void pvclock_irq_work_fn(struct irq_work *w)
+{
+	queue_work(system_long_wq, &pvclock_gtod_work);
+}
+
+static DEFINE_IRQ_WORK(pvclock_irq_work, pvclock_irq_work_fn);
+
 /*
  * Notification about pvclock gtod data update.
  */
@@ -8105,13 +8117,14 @@ static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
 
 	update_pvclock_gtod(tk);
 
-	/* disable master clock if host does not trust, or does not
-	 * use, TSC based clocksource.
+	/*
+	 * Disable master clock if host does not trust, or does not use,
+	 * TSC based clocksource. Delegate queue_work() to irq_work as
+	 * this is invoked with tk_core.seq write held.
 	 */
 	if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
 	    atomic_read(&kvm_guest_has_master_clock) != 0)
-		queue_work(system_long_wq, &pvclock_gtod_work);
-
+		irq_work_queue(&pvclock_irq_work);
 	return 0;
 }
 
@@ -8224,6 +8237,7 @@ void kvm_arch_exit(void)
 	cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
 #ifdef CONFIG_X86_64
 	pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
+	irq_work_sync(&pvclock_irq_work);
 	cancel_work_sync(&pvclock_gtod_work);
 #endif
 	kvm_x86_ops.hardware_enable = NULL;
-- 
cgit v1.2.3


From b26990987ffce0525abbd84b36595869cfdbbfe6 Mon Sep 17 00:00:00 2001
From: Stefan Raspl <raspl@linux.ibm.com>
Date: Thu, 6 May 2021 16:03:52 +0200
Subject: tools/kvm_stat: Fix documentation typo

Makes the dash in front of option '-z' disappear in the generated
man-page.

Signed-off-by: Stefan Raspl <raspl@linux.ibm.com>
Message-Id: <20210506140352.4178789-1-raspl@linux.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/kvm/kvm_stat/kvm_stat.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt
index feaf46451e83..3a9f2037bd23 100644
--- a/tools/kvm/kvm_stat/kvm_stat.txt
+++ b/tools/kvm/kvm_stat/kvm_stat.txt
@@ -111,7 +111,7 @@ OPTIONS
 --tracepoints::
         retrieve statistics from tracepoints
 
-*z*::
+-z::
 --skip-zero-records::
         omit records with all zeros in logging mode
 
-- 
cgit v1.2.3


From 258785ef08b323bddd844b4926a32c2b2045a1b0 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Thu, 6 May 2021 15:24:43 +0000
Subject: kvm: Cap halt polling at kvm->max_halt_poll_ns

When growing halt-polling, there is no check that the poll time exceeds
the per-VM limit. It's possible for vcpu->halt_poll_ns to grow past
kvm->max_halt_poll_ns and stay there until a halt which takes longer
than kvm->halt_poll_ns.

Signed-off-by: David Matlack <dmatlack@google.com>
Signed-off-by: Venkatesh Srinivas <venkateshs@chromium.org>
Message-Id: <20210506152442.4010298-1-venkateshs@chromium.org>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/kvm_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b9f12da6af0e..6b4feb92dc79 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2893,8 +2893,8 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
 	if (val < grow_start)
 		val = grow_start;
 
-	if (val > halt_poll_ns)
-		val = halt_poll_ns;
+	if (val > vcpu->kvm->max_halt_poll_ns)
+		val = vcpu->kvm->max_halt_poll_ns;
 
 	vcpu->halt_poll_ns = val;
 out:
-- 
cgit v1.2.3


From 368340a3c7d9a207bfe544721d464b7109be8eae Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 May 2021 16:15:42 -0700
Subject: KVM: SVM: Invert user pointer casting in SEV {en,de}crypt helpers

Invert the user pointer params for SEV's helpers for encrypting and
decrypting guest memory so that they take a pointer and cast to an
unsigned long as necessary, as opposed to doing the opposite.  Tagging a
non-pointer as __user is confusing and weird since a cast of some form
needs to occur to actually access the user data.  This also fixes Sparse
warnings triggered by directly consuming the unsigned longs, which are
"noderef" due to the __user tag.

Cc: Brijesh Singh <brijesh.singh@amd.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Ashish Kalra <ashish.kalra@amd.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210506231542.2331138-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/sev.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 8b11c711a0e4..eb241c1a4add 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -763,7 +763,7 @@ static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
 }
 
 static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
-				  unsigned long __user dst_uaddr,
+				  void __user *dst_uaddr,
 				  unsigned long dst_paddr,
 				  int size, int *err)
 {
@@ -787,8 +787,7 @@ static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
 
 	if (tpage) {
 		offset = paddr & 15;
-		if (copy_to_user((void __user *)(uintptr_t)dst_uaddr,
-				 page_address(tpage) + offset, size))
+		if (copy_to_user(dst_uaddr, page_address(tpage) + offset, size))
 			ret = -EFAULT;
 	}
 
@@ -800,9 +799,9 @@ e_free:
 }
 
 static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
-				  unsigned long __user vaddr,
+				  void __user *vaddr,
 				  unsigned long dst_paddr,
-				  unsigned long __user dst_vaddr,
+				  void __user *dst_vaddr,
 				  int size, int *error)
 {
 	struct page *src_tpage = NULL;
@@ -810,13 +809,12 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
 	int ret, len = size;
 
 	/* If source buffer is not aligned then use an intermediate buffer */
-	if (!IS_ALIGNED(vaddr, 16)) {
+	if (!IS_ALIGNED((unsigned long)vaddr, 16)) {
 		src_tpage = alloc_page(GFP_KERNEL);
 		if (!src_tpage)
 			return -ENOMEM;
 
-		if (copy_from_user(page_address(src_tpage),
-				(void __user *)(uintptr_t)vaddr, size)) {
+		if (copy_from_user(page_address(src_tpage), vaddr, size)) {
 			__free_page(src_tpage);
 			return -EFAULT;
 		}
@@ -830,7 +828,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
 	 *   - copy the source buffer in an intermediate buffer
 	 *   - use the intermediate buffer as source buffer
 	 */
-	if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
+	if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
 		int dst_offset;
 
 		dst_tpage = alloc_page(GFP_KERNEL);
@@ -855,7 +853,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
 			       page_address(src_tpage), size);
 		else {
 			if (copy_from_user(page_address(dst_tpage) + dst_offset,
-					   (void __user *)(uintptr_t)vaddr, size)) {
+					   vaddr, size)) {
 				ret = -EFAULT;
 				goto e_free;
 			}
@@ -935,15 +933,15 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
 		if (dec)
 			ret = __sev_dbg_decrypt_user(kvm,
 						     __sme_page_pa(src_p[0]) + s_off,
-						     dst_vaddr,
+						     (void __user *)dst_vaddr,
 						     __sme_page_pa(dst_p[0]) + d_off,
 						     len, &argp->error);
 		else
 			ret = __sev_dbg_encrypt_user(kvm,
 						     __sme_page_pa(src_p[0]) + s_off,
-						     vaddr,
+						     (void __user *)vaddr,
 						     __sme_page_pa(dst_p[0]) + d_off,
-						     dst_vaddr,
+						     (void __user *)dst_vaddr,
 						     len, &argp->error);
 
 		sev_unpin_memory(kvm, src_p, n);
-- 
cgit v1.2.3


From ce7ea0cfdc2e9ff31d12da31c3226deddb9644f5 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Thu, 6 May 2021 15:14:41 -0500
Subject: KVM: SVM: Move GHCB unmapping to fix RCU warning

When an SEV-ES guest is running, the GHCB is unmapped as part of the
vCPU run support. However, kvm_vcpu_unmap() triggers an RCU dereference
warning with CONFIG_PROVE_LOCKING=y because the SRCU lock is released
before invoking the vCPU run support.

Move the GHCB unmapping into the prepare_guest_switch callback, which is
invoked while still holding the SRCU lock, eliminating the RCU dereference
warning.

Fixes: 291bd20d5d88 ("KVM: SVM: Add initial support for a VMGEXIT VMEXIT")
Reported-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Message-Id: <b2f9b79d15166f2c3e4375c0d9bc3268b7696455.1620332081.git.thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/sev.c | 5 +----
 arch/x86/kvm/svm/svm.c | 3 +++
 arch/x86/kvm/svm/svm.h | 1 +
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index eb241c1a4add..5bc887e9a986 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2197,7 +2197,7 @@ vmgexit_err:
 	return -EINVAL;
 }
 
-static void pre_sev_es_run(struct vcpu_svm *svm)
+void sev_es_unmap_ghcb(struct vcpu_svm *svm)
 {
 	if (!svm->ghcb)
 		return;
@@ -2233,9 +2233,6 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
 	struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
 	int asid = sev_get_asid(svm->vcpu.kvm);
 
-	/* Perform any SEV-ES pre-run actions */
-	pre_sev_es_run(svm);
-
 	/* Assign the asid allocated with this SEV guest */
 	svm->asid = asid;
 
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 8124f51e9488..4dd9b7856e5b 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1437,6 +1437,9 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
 	struct vcpu_svm *svm = to_svm(vcpu);
 	struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
 
+	if (sev_es_guest(vcpu->kvm))
+		sev_es_unmap_ghcb(svm);
+
 	if (svm->guest_state_loaded)
 		return;
 
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 84b3133c2251..e44567ceb865 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -581,6 +581,7 @@ void sev_es_init_vmcb(struct vcpu_svm *svm);
 void sev_es_create_vcpu(struct vcpu_svm *svm);
 void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
 void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu);
+void sev_es_unmap_ghcb(struct vcpu_svm *svm);
 
 /* vmenter.S */
 
-- 
cgit v1.2.3


From 698ab77aebffe08b312fbcdddeb0e8bd08b78717 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Wed, 28 Apr 2021 15:03:12 -0400
Subject: dax: Add an enum for specifying dax wakup mode

Dan mentioned that he is not very fond of passing around a boolean true/false
to specify if only next waiter should be woken up or all waiters should be
woken up. He instead prefers that we introduce an enum and make it very
explicity at the callsite itself. Easier to read code.

This patch should not introduce any change of behavior.

Reviewed-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Suggested-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Link: https://lore.kernel.org/r/20210428190314.1865312-2-vgoyal@redhat.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/dax.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index b3d27fdc6775..5ecee51c44ee 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -144,6 +144,16 @@ struct wait_exceptional_entry_queue {
 	struct exceptional_entry_key key;
 };
 
+/**
+ * enum dax_wake_mode: waitqueue wakeup behaviour
+ * @WAKE_ALL: wake all waiters in the waitqueue
+ * @WAKE_NEXT: wake only the first waiter in the waitqueue
+ */
+enum dax_wake_mode {
+	WAKE_ALL,
+	WAKE_NEXT,
+};
+
 static wait_queue_head_t *dax_entry_waitqueue(struct xa_state *xas,
 		void *entry, struct exceptional_entry_key *key)
 {
@@ -182,7 +192,8 @@ static int wake_exceptional_entry_func(wait_queue_entry_t *wait,
  * The important information it's conveying is whether the entry at
  * this index used to be a PMD entry.
  */
-static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all)
+static void dax_wake_entry(struct xa_state *xas, void *entry,
+			   enum dax_wake_mode mode)
 {
 	struct exceptional_entry_key key;
 	wait_queue_head_t *wq;
@@ -196,7 +207,7 @@ static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all)
 	 * must be in the waitqueue and the following check will see them.
 	 */
 	if (waitqueue_active(wq))
-		__wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
+		__wake_up(wq, TASK_NORMAL, mode == WAKE_ALL ? 0 : 1, &key);
 }
 
 /*
@@ -268,7 +279,7 @@ static void put_unlocked_entry(struct xa_state *xas, void *entry)
 {
 	/* If we were the only waiter woken, wake the next one */
 	if (entry && !dax_is_conflict(entry))
-		dax_wake_entry(xas, entry, false);
+		dax_wake_entry(xas, entry, WAKE_NEXT);
 }
 
 /*
@@ -286,7 +297,7 @@ static void dax_unlock_entry(struct xa_state *xas, void *entry)
 	old = xas_store(xas, entry);
 	xas_unlock_irq(xas);
 	BUG_ON(!dax_is_locked(old));
-	dax_wake_entry(xas, entry, false);
+	dax_wake_entry(xas, entry, WAKE_NEXT);
 }
 
 /*
@@ -524,7 +535,7 @@ retry:
 
 		dax_disassociate_entry(entry, mapping, false);
 		xas_store(xas, NULL);	/* undo the PMD join */
-		dax_wake_entry(xas, entry, true);
+		dax_wake_entry(xas, entry, WAKE_ALL);
 		mapping->nrexceptional--;
 		entry = NULL;
 		xas_set(xas, index);
@@ -937,7 +948,7 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,
 	xas_lock_irq(xas);
 	xas_store(xas, entry);
 	xas_clear_mark(xas, PAGECACHE_TAG_DIRTY);
-	dax_wake_entry(xas, entry, false);
+	dax_wake_entry(xas, entry, WAKE_NEXT);
 
 	trace_dax_writeback_one(mapping->host, index, count);
 	return ret;
-- 
cgit v1.2.3


From 4c3d043d271d4d629aa2328796cdfc96b37d3b3c Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Wed, 28 Apr 2021 15:03:13 -0400
Subject: dax: Add a wakeup mode parameter to put_unlocked_entry()

As of now put_unlocked_entry() always wakes up next waiter. In next
patches we want to wake up all waiters at one callsite. Hence, add a
parameter to the function.

This patch does not introduce any change of behavior.

Reviewed-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Suggested-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Link: https://lore.kernel.org/r/20210428190314.1865312-3-vgoyal@redhat.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/dax.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 5ecee51c44ee..56eb1c759ca5 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -275,11 +275,11 @@ static void wait_entry_unlocked(struct xa_state *xas, void *entry)
 	finish_wait(wq, &ewait.wait);
 }
 
-static void put_unlocked_entry(struct xa_state *xas, void *entry)
+static void put_unlocked_entry(struct xa_state *xas, void *entry,
+			       enum dax_wake_mode mode)
 {
-	/* If we were the only waiter woken, wake the next one */
 	if (entry && !dax_is_conflict(entry))
-		dax_wake_entry(xas, entry, WAKE_NEXT);
+		dax_wake_entry(xas, entry, mode);
 }
 
 /*
@@ -633,7 +633,7 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping,
 			entry = get_unlocked_entry(&xas, 0);
 		if (entry)
 			page = dax_busy_page(entry);
-		put_unlocked_entry(&xas, entry);
+		put_unlocked_entry(&xas, entry, WAKE_NEXT);
 		if (page)
 			break;
 		if (++scanned % XA_CHECK_SCHED)
@@ -675,7 +675,7 @@ static int __dax_invalidate_entry(struct address_space *mapping,
 	mapping->nrexceptional--;
 	ret = 1;
 out:
-	put_unlocked_entry(&xas, entry);
+	put_unlocked_entry(&xas, entry, WAKE_NEXT);
 	xas_unlock_irq(&xas);
 	return ret;
 }
@@ -954,7 +954,7 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,
 	return ret;
 
  put_unlocked:
-	put_unlocked_entry(xas, entry);
+	put_unlocked_entry(xas, entry, WAKE_NEXT);
 	return ret;
 }
 
@@ -1695,7 +1695,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
 	/* Did we race with someone splitting entry or so? */
 	if (!entry || dax_is_conflict(entry) ||
 	    (order == 0 && !dax_is_pte_entry(entry))) {
-		put_unlocked_entry(&xas, entry);
+		put_unlocked_entry(&xas, entry, WAKE_NEXT);
 		xas_unlock_irq(&xas);
 		trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
 						      VM_FAULT_NOPAGE);
-- 
cgit v1.2.3


From 237388320deffde7c2d65ed8fc9eef670dc979b3 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Wed, 28 Apr 2021 15:03:14 -0400
Subject: dax: Wake up all waiters after invalidating dax entry

I am seeing missed wakeups which ultimately lead to a deadlock when I am
using virtiofs with DAX enabled and running "make -j". I had to mount
virtiofs as rootfs and also reduce to dax window size to 256M to reproduce
the problem consistently.

So here is the problem. put_unlocked_entry() wakes up waiters only
if entry is not null as well as !dax_is_conflict(entry). But if I
call multiple instances of invalidate_inode_pages2() in parallel,
then I can run into a situation where there are waiters on
this index but nobody will wake these waiters.

invalidate_inode_pages2()
  invalidate_inode_pages2_range()
    invalidate_exceptional_entry2()
      dax_invalidate_mapping_entry_sync()
        __dax_invalidate_entry() {
                xas_lock_irq(&xas);
                entry = get_unlocked_entry(&xas, 0);
                ...
                ...
                dax_disassociate_entry(entry, mapping, trunc);
                xas_store(&xas, NULL);
                ...
                ...
                put_unlocked_entry(&xas, entry);
                xas_unlock_irq(&xas);
        }

Say a fault in in progress and it has locked entry at offset say "0x1c".
Now say three instances of invalidate_inode_pages2() are in progress
(A, B, C) and they all try to invalidate entry at offset "0x1c". Given
dax entry is locked, all tree instances A, B, C will wait in wait queue.

When dax fault finishes, say A is woken up. It will store NULL entry
at index "0x1c" and wake up B. When B comes along it will find "entry=0"
at page offset 0x1c and it will call put_unlocked_entry(&xas, 0). And
this means put_unlocked_entry() will not wake up next waiter, given
the current code. And that means C continues to wait and is not woken
up.

This patch fixes the issue by waking up all waiters when a dax entry
has been invalidated. This seems to fix the deadlock I am facing
and I can make forward progress.

Reported-by: Sergio Lopez <slp@redhat.com>
Fixes: ac401cc78242 ("dax: New fault locking")
Reviewed-by: Jan Kara <jack@suse.cz>
Suggested-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Link: https://lore.kernel.org/r/20210428190314.1865312-4-vgoyal@redhat.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/dax.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/dax.c b/fs/dax.c
index 56eb1c759ca5..df5485b4bddf 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -675,7 +675,7 @@ static int __dax_invalidate_entry(struct address_space *mapping,
 	mapping->nrexceptional--;
 	ret = 1;
 out:
-	put_unlocked_entry(&xas, entry, WAKE_NEXT);
+	put_unlocked_entry(&xas, entry, WAKE_ALL);
 	xas_unlock_irq(&xas);
 	return ret;
 }
-- 
cgit v1.2.3


From 285c0faddcebdf360412fc9ef9cde63cf98da7f6 Mon Sep 17 00:00:00 2001
From: Bharat Jauhari <bjauhari@habana.ai>
Date: Thu, 25 Mar 2021 18:15:40 +0200
Subject: habanalabs: expose ASIC specific PLL index

Currently the user cannot interpret the PLL information based on index
as its exposed as an integer.

This commit exposes ASIC specific PLL indexes and maps it to a generic
FW compatible index.

Signed-off-by: Bharat Jauhari <bjauhari@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c | 34 ++++++++++-------
 drivers/misc/habanalabs/common/habanalabs.h  | 16 ++++----
 drivers/misc/habanalabs/common/sysfs.c       |  4 +-
 drivers/misc/habanalabs/gaudi/gaudi.c        | 55 ++++++++++------------------
 drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c  | 12 +++---
 drivers/misc/habanalabs/goya/goya.c          | 47 +++++++++---------------
 drivers/misc/habanalabs/goya/goya_hwmgr.c    | 40 ++++++++++----------
 include/uapi/misc/habanalabs.h               | 33 +++++++++++++++++
 8 files changed, 127 insertions(+), 114 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 832dd5c5bb06..7cf82da67dab 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -661,18 +661,13 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy)
 	return rc;
 }
 
-int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index,
+int get_used_pll_index(struct hl_device *hdev, u32 input_pll_index,
 						enum pll_index *pll_index)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	u8 pll_byte, pll_bit_off;
 	bool dynamic_pll;
-
-	if (input_pll_index >= PLL_MAX) {
-		dev_err(hdev->dev, "PLL index %d is out of range\n",
-							input_pll_index);
-		return -EINVAL;
-	}
+	int fw_pll_idx;
 
 	dynamic_pll = prop->fw_security_status_valid &&
 		(prop->fw_app_security_map & CPU_BOOT_DEV_STS0_DYN_PLL_EN);
@@ -680,28 +675,39 @@ int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index,
 	if (!dynamic_pll) {
 		/*
 		 * in case we are working with legacy FW (each asic has unique
-		 * PLL numbering) extract the legacy numbering
+		 * PLL numbering) use the driver based index as they are
+		 * aligned with fw legacy numbering
 		 */
-		*pll_index = hdev->legacy_pll_map[input_pll_index];
+		*pll_index = input_pll_index;
 		return 0;
 	}
 
+	/* retrieve a FW compatible PLL index based on
+	 * ASIC specific user request
+	 */
+	fw_pll_idx = hdev->asic_funcs->map_pll_idx_to_fw_idx(input_pll_index);
+	if (fw_pll_idx < 0) {
+		dev_err(hdev->dev, "Invalid PLL index (%u) error %d\n",
+			input_pll_index, fw_pll_idx);
+		return -EINVAL;
+	}
+
 	/* PLL map is a u8 array */
-	pll_byte = prop->cpucp_info.pll_map[input_pll_index >> 3];
-	pll_bit_off = input_pll_index & 0x7;
+	pll_byte = prop->cpucp_info.pll_map[fw_pll_idx >> 3];
+	pll_bit_off = fw_pll_idx & 0x7;
 
 	if (!(pll_byte & BIT(pll_bit_off))) {
 		dev_err(hdev->dev, "PLL index %d is not supported\n",
-							input_pll_index);
+			fw_pll_idx);
 		return -EINVAL;
 	}
 
-	*pll_index = input_pll_index;
+	*pll_index = fw_pll_idx;
 
 	return 0;
 }
 
-int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index,
+int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u32 pll_index,
 		u16 *pll_freq_arr)
 {
 	struct cpucp_packet pkt;
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 44e89da30b4a..91291a8e201e 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -930,6 +930,9 @@ enum div_select_defs {
  *                         driver is ready to receive asynchronous events. This
  *                         function should be called during the first init and
  *                         after every hard-reset of the device
+ * @get_msi_info: Retrieve asic-specific MSI ID of the f/w async event
+ * @map_pll_idx_to_fw_idx: convert driver specific per asic PLL index to
+ *                         generic f/w compatible PLL Indexes
  */
 struct hl_asic_funcs {
 	int (*early_init)(struct hl_device *hdev);
@@ -1054,6 +1057,7 @@ struct hl_asic_funcs {
 			u32 block_id, u32 block_size);
 	void (*enable_events_from_fw)(struct hl_device *hdev);
 	void (*get_msi_info)(u32 *table);
+	int (*map_pll_idx_to_fw_idx)(u32 pll_idx);
 };
 
 
@@ -1950,8 +1954,6 @@ struct hl_mmu_funcs {
  * @aggregated_cs_counters: aggregated cs counters among all contexts
  * @mmu_priv: device-specific MMU data.
  * @mmu_func: device-related MMU functions.
- * @legacy_pll_map: map holding map between dynamic (common) PLL indexes and
- *                  static (asic specific) PLL indexes.
  * @dram_used_mem: current DRAM memory consumption.
  * @timeout_jiffies: device CS timeout value.
  * @max_power: the max power of the device, as configured by the sysadmin. This
@@ -2071,8 +2073,6 @@ struct hl_device {
 	struct hl_mmu_priv		mmu_priv;
 	struct hl_mmu_funcs		mmu_func[MMU_NUM_PGT_LOCATIONS];
 
-	enum pll_index			*legacy_pll_map;
-
 	atomic64_t			dram_used_mem;
 	u64				timeout_jiffies;
 	u64				max_power;
@@ -2387,9 +2387,9 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
 		struct hl_info_pci_counters *counters);
 int hl_fw_cpucp_total_energy_get(struct hl_device *hdev,
 			u64 *total_energy);
-int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index,
+int get_used_pll_index(struct hl_device *hdev, u32 input_pll_index,
 						enum pll_index *pll_index);
-int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index,
+int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u32 pll_index,
 		u16 *pll_freq_arr);
 int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power);
 int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
@@ -2411,9 +2411,9 @@ int hl_pci_set_outbound_region(struct hl_device *hdev,
 int hl_pci_init(struct hl_device *hdev);
 void hl_pci_fini(struct hl_device *hdev);
 
-long hl_get_frequency(struct hl_device *hdev, enum pll_index pll_index,
+long hl_get_frequency(struct hl_device *hdev, u32 pll_index,
 								bool curr);
-void hl_set_frequency(struct hl_device *hdev, enum pll_index pll_index,
+void hl_set_frequency(struct hl_device *hdev, u32 pll_index,
 								u64 freq);
 int hl_get_temperature(struct hl_device *hdev,
 		       int sensor_index, u32 attr, long *value);
diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c
index 9fa61573a89d..c9f649b31e3a 100644
--- a/drivers/misc/habanalabs/common/sysfs.c
+++ b/drivers/misc/habanalabs/common/sysfs.c
@@ -9,7 +9,7 @@
 
 #include <linux/pci.h>
 
-long hl_get_frequency(struct hl_device *hdev, enum pll_index pll_index,
+long hl_get_frequency(struct hl_device *hdev, u32 pll_index,
 								bool curr)
 {
 	struct cpucp_packet pkt;
@@ -44,7 +44,7 @@ long hl_get_frequency(struct hl_device *hdev, enum pll_index pll_index,
 	return (long) result;
 }
 
-void hl_set_frequency(struct hl_device *hdev, enum pll_index pll_index,
+void hl_set_frequency(struct hl_device *hdev, u32 pll_index,
 								u64 freq)
 {
 	struct cpucp_packet pkt;
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index b751652f80a8..81155f06c126 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -105,36 +105,6 @@
 
 #define GAUDI_PLL_MAX 10
 
-/*
- * this enum kept here for compatibility with old FW (in which each asic has
- * unique PLL numbering
- */
-enum gaudi_pll_index {
-	GAUDI_CPU_PLL = 0,
-	GAUDI_PCI_PLL,
-	GAUDI_SRAM_PLL,
-	GAUDI_HBM_PLL,
-	GAUDI_NIC_PLL,
-	GAUDI_DMA_PLL,
-	GAUDI_MESH_PLL,
-	GAUDI_MME_PLL,
-	GAUDI_TPC_PLL,
-	GAUDI_IF_PLL,
-};
-
-static enum pll_index gaudi_pll_map[PLL_MAX] = {
-	[CPU_PLL] = GAUDI_CPU_PLL,
-	[PCI_PLL] = GAUDI_PCI_PLL,
-	[SRAM_PLL] = GAUDI_SRAM_PLL,
-	[HBM_PLL] = GAUDI_HBM_PLL,
-	[NIC_PLL] = GAUDI_NIC_PLL,
-	[DMA_PLL] = GAUDI_DMA_PLL,
-	[MESH_PLL] = GAUDI_MESH_PLL,
-	[MME_PLL] = GAUDI_MME_PLL,
-	[TPC_PLL] = GAUDI_TPC_PLL,
-	[IF_PLL] = GAUDI_IF_PLL,
-};
-
 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
 		"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
 		"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
@@ -810,7 +780,7 @@ static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
 			freq = 0;
 		}
 	} else {
-		rc = hl_fw_cpucp_pll_info_get(hdev, CPU_PLL, pll_freq_arr);
+		rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
 
 		if (rc)
 			return rc;
@@ -1652,9 +1622,6 @@ static int gaudi_sw_init(struct hl_device *hdev)
 
 	hdev->asic_specific = gaudi;
 
-	/* store legacy PLL map */
-	hdev->legacy_pll_map = gaudi_pll_map;
-
 	/* Create DMA pool for small allocations */
 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
 			&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
@@ -8783,6 +8750,23 @@ static void gaudi_enable_events_from_fw(struct hl_device *hdev)
 	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
 }
 
+static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
+{
+	switch (pll_idx) {
+	case HL_GAUDI_CPU_PLL: return CPU_PLL;
+	case HL_GAUDI_PCI_PLL: return PCI_PLL;
+	case HL_GAUDI_NIC_PLL: return NIC_PLL;
+	case HL_GAUDI_DMA_PLL: return DMA_PLL;
+	case HL_GAUDI_MESH_PLL: return MESH_PLL;
+	case HL_GAUDI_MME_PLL: return MME_PLL;
+	case HL_GAUDI_TPC_PLL: return TPC_PLL;
+	case HL_GAUDI_IF_PLL: return IF_PLL;
+	case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
+	case HL_GAUDI_HBM_PLL: return HBM_PLL;
+	default: return -EINVAL;
+	}
+}
+
 static const struct hl_asic_funcs gaudi_funcs = {
 	.early_init = gaudi_early_init,
 	.early_fini = gaudi_early_fini,
@@ -8866,7 +8850,8 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
 	.get_hw_block_id = gaudi_get_hw_block_id,
 	.hw_block_mmap = gaudi_block_mmap,
-	.enable_events_from_fw = gaudi_enable_events_from_fw
+	.enable_events_from_fw = gaudi_enable_events_from_fw,
+	.map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx
 };
 
 /**
diff --git a/drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c b/drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c
index 8c49da4bcbd5..9b60eadd4c35 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c
@@ -13,7 +13,7 @@ void gaudi_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
 	struct gaudi_device *gaudi = hdev->asic_specific;
 
 	if (freq == PLL_LAST)
-		hl_set_frequency(hdev, MME_PLL, gaudi->max_freq_value);
+		hl_set_frequency(hdev, HL_GAUDI_MME_PLL, gaudi->max_freq_value);
 }
 
 int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
@@ -23,7 +23,7 @@ int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, MME_PLL, false);
+	value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, false);
 
 	if (value < 0) {
 		dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n",
@@ -33,7 +33,7 @@ int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
 
 	*max_clk = (value / 1000 / 1000);
 
-	value = hl_get_frequency(hdev, MME_PLL, true);
+	value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, true);
 
 	if (value < 0) {
 		dev_err(hdev->dev,
@@ -57,7 +57,7 @@ static ssize_t clk_max_freq_mhz_show(struct device *dev,
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, MME_PLL, false);
+	value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, false);
 
 	gaudi->max_freq_value = value;
 
@@ -85,7 +85,7 @@ static ssize_t clk_max_freq_mhz_store(struct device *dev,
 
 	gaudi->max_freq_value = value * 1000 * 1000;
 
-	hl_set_frequency(hdev, MME_PLL, gaudi->max_freq_value);
+	hl_set_frequency(hdev, HL_GAUDI_MME_PLL, gaudi->max_freq_value);
 
 fail:
 	return count;
@@ -100,7 +100,7 @@ static ssize_t clk_cur_freq_mhz_show(struct device *dev,
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, MME_PLL, true);
+	value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, true);
 
 	return sprintf(buf, "%lu\n", (value / 1000 / 1000));
 }
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index e27338f4aad2..e0ad2a269779 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -118,30 +118,6 @@
 #define IS_MME_IDLE(mme_arch_sts) \
 	(((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
 
-/*
- * this enum kept here for compatibility with old FW (in which each asic has
- * unique PLL numbering
- */
-enum goya_pll_index {
-	GOYA_CPU_PLL = 0,
-	GOYA_IC_PLL,
-	GOYA_MC_PLL,
-	GOYA_MME_PLL,
-	GOYA_PCI_PLL,
-	GOYA_EMMC_PLL,
-	GOYA_TPC_PLL,
-};
-
-static enum pll_index goya_pll_map[PLL_MAX] = {
-	[CPU_PLL] = GOYA_CPU_PLL,
-	[IC_PLL] = GOYA_IC_PLL,
-	[MC_PLL] = GOYA_MC_PLL,
-	[MME_PLL] = GOYA_MME_PLL,
-	[PCI_PLL] = GOYA_PCI_PLL,
-	[EMMC_PLL] = GOYA_EMMC_PLL,
-	[TPC_PLL] = GOYA_TPC_PLL,
-};
-
 static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
 		"goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
 		"goya cq 4", "goya cpu eq"
@@ -775,7 +751,8 @@ static void goya_fetch_psoc_frequency(struct hl_device *hdev)
 			freq = 0;
 		}
 	} else {
-		rc = hl_fw_cpucp_pll_info_get(hdev, PCI_PLL, pll_freq_arr);
+		rc = hl_fw_cpucp_pll_info_get(hdev, HL_GOYA_PCI_PLL,
+				pll_freq_arr);
 
 		if (rc)
 			return;
@@ -897,9 +874,6 @@ static int goya_sw_init(struct hl_device *hdev)
 
 	hdev->asic_specific = goya;
 
-	/* store legacy PLL map */
-	hdev->legacy_pll_map = goya_pll_map;
-
 	/* Create DMA pool for small allocations */
 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
 			&hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0);
@@ -5512,6 +5486,20 @@ static void goya_enable_events_from_fw(struct hl_device *hdev)
 			GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
 }
 
+static int goya_map_pll_idx_to_fw_idx(u32 pll_idx)
+{
+	switch (pll_idx) {
+	case HL_GOYA_CPU_PLL: return CPU_PLL;
+	case HL_GOYA_PCI_PLL: return PCI_PLL;
+	case HL_GOYA_MME_PLL: return MME_PLL;
+	case HL_GOYA_TPC_PLL: return TPC_PLL;
+	case HL_GOYA_IC_PLL: return IC_PLL;
+	case HL_GOYA_MC_PLL: return MC_PLL;
+	case HL_GOYA_EMMC_PLL: return EMMC_PLL;
+	default: return -EINVAL;
+	}
+}
+
 static const struct hl_asic_funcs goya_funcs = {
 	.early_init = goya_early_init,
 	.early_fini = goya_early_fini,
@@ -5595,7 +5583,8 @@ static const struct hl_asic_funcs goya_funcs = {
 	.ack_protection_bits_errors = goya_ack_protection_bits_errors,
 	.get_hw_block_id = goya_get_hw_block_id,
 	.hw_block_mmap = goya_block_mmap,
-	.enable_events_from_fw = goya_enable_events_from_fw
+	.enable_events_from_fw = goya_enable_events_from_fw,
+	.map_pll_idx_to_fw_idx = goya_map_pll_idx_to_fw_idx
 };
 
 /*
diff --git a/drivers/misc/habanalabs/goya/goya_hwmgr.c b/drivers/misc/habanalabs/goya/goya_hwmgr.c
index 3acb36a1a902..7d007125727f 100644
--- a/drivers/misc/habanalabs/goya/goya_hwmgr.c
+++ b/drivers/misc/habanalabs/goya/goya_hwmgr.c
@@ -13,19 +13,19 @@ void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
 
 	switch (freq) {
 	case PLL_HIGH:
-		hl_set_frequency(hdev, MME_PLL, hdev->high_pll);
-		hl_set_frequency(hdev, TPC_PLL, hdev->high_pll);
-		hl_set_frequency(hdev, IC_PLL, hdev->high_pll);
+		hl_set_frequency(hdev, HL_GOYA_MME_PLL, hdev->high_pll);
+		hl_set_frequency(hdev, HL_GOYA_TPC_PLL, hdev->high_pll);
+		hl_set_frequency(hdev, HL_GOYA_IC_PLL, hdev->high_pll);
 		break;
 	case PLL_LOW:
-		hl_set_frequency(hdev, MME_PLL, GOYA_PLL_FREQ_LOW);
-		hl_set_frequency(hdev, TPC_PLL, GOYA_PLL_FREQ_LOW);
-		hl_set_frequency(hdev, IC_PLL, GOYA_PLL_FREQ_LOW);
+		hl_set_frequency(hdev, HL_GOYA_MME_PLL, GOYA_PLL_FREQ_LOW);
+		hl_set_frequency(hdev, HL_GOYA_TPC_PLL, GOYA_PLL_FREQ_LOW);
+		hl_set_frequency(hdev, HL_GOYA_IC_PLL, GOYA_PLL_FREQ_LOW);
 		break;
 	case PLL_LAST:
-		hl_set_frequency(hdev, MME_PLL, goya->mme_clk);
-		hl_set_frequency(hdev, TPC_PLL, goya->tpc_clk);
-		hl_set_frequency(hdev, IC_PLL, goya->ic_clk);
+		hl_set_frequency(hdev, HL_GOYA_MME_PLL, goya->mme_clk);
+		hl_set_frequency(hdev, HL_GOYA_TPC_PLL, goya->tpc_clk);
+		hl_set_frequency(hdev, HL_GOYA_IC_PLL, goya->ic_clk);
 		break;
 	default:
 		dev_err(hdev->dev, "unknown frequency setting\n");
@@ -39,7 +39,7 @@ int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, MME_PLL, false);
+	value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, false);
 
 	if (value < 0) {
 		dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n",
@@ -49,7 +49,7 @@ int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
 
 	*max_clk = (value / 1000 / 1000);
 
-	value = hl_get_frequency(hdev, MME_PLL, true);
+	value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, true);
 
 	if (value < 0) {
 		dev_err(hdev->dev,
@@ -72,7 +72,7 @@ static ssize_t mme_clk_show(struct device *dev, struct device_attribute *attr,
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, MME_PLL, false);
+	value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, false);
 
 	if (value < 0)
 		return value;
@@ -105,7 +105,7 @@ static ssize_t mme_clk_store(struct device *dev, struct device_attribute *attr,
 		goto fail;
 	}
 
-	hl_set_frequency(hdev, MME_PLL, value);
+	hl_set_frequency(hdev, HL_GOYA_MME_PLL, value);
 	goya->mme_clk = value;
 
 fail:
@@ -121,7 +121,7 @@ static ssize_t tpc_clk_show(struct device *dev, struct device_attribute *attr,
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, TPC_PLL, false);
+	value = hl_get_frequency(hdev, HL_GOYA_TPC_PLL, false);
 
 	if (value < 0)
 		return value;
@@ -154,7 +154,7 @@ static ssize_t tpc_clk_store(struct device *dev, struct device_attribute *attr,
 		goto fail;
 	}
 
-	hl_set_frequency(hdev, TPC_PLL, value);
+	hl_set_frequency(hdev, HL_GOYA_TPC_PLL, value);
 	goya->tpc_clk = value;
 
 fail:
@@ -170,7 +170,7 @@ static ssize_t ic_clk_show(struct device *dev, struct device_attribute *attr,
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, IC_PLL, false);
+	value = hl_get_frequency(hdev, HL_GOYA_IC_PLL, false);
 
 	if (value < 0)
 		return value;
@@ -203,7 +203,7 @@ static ssize_t ic_clk_store(struct device *dev, struct device_attribute *attr,
 		goto fail;
 	}
 
-	hl_set_frequency(hdev, IC_PLL, value);
+	hl_set_frequency(hdev, HL_GOYA_IC_PLL, value);
 	goya->ic_clk = value;
 
 fail:
@@ -219,7 +219,7 @@ static ssize_t mme_clk_curr_show(struct device *dev,
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, MME_PLL, true);
+	value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, true);
 
 	if (value < 0)
 		return value;
@@ -236,7 +236,7 @@ static ssize_t tpc_clk_curr_show(struct device *dev,
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, TPC_PLL, true);
+	value = hl_get_frequency(hdev, HL_GOYA_TPC_PLL, true);
 
 	if (value < 0)
 		return value;
@@ -253,7 +253,7 @@ static ssize_t ic_clk_curr_show(struct device *dev,
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, IC_PLL, true);
+	value = hl_get_frequency(hdev, HL_GOYA_IC_PLL, true);
 
 	if (value < 0)
 		return value;
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index d3e017b5f0db..6d2d34c9f375 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -239,6 +239,39 @@ enum gaudi_engine_id {
 	GAUDI_ENGINE_ID_SIZE
 };
 
+/*
+ * ASIC specific PLL index
+ *
+ * Used to retrieve in frequency info of different IPs via
+ * HL_INFO_PLL_FREQUENCY under HL_IOCTL_INFO IOCTL. The enums need to be
+ * used as an index in struct hl_pll_frequency_info
+ */
+
+enum hl_goya_pll_index {
+	HL_GOYA_CPU_PLL = 0,
+	HL_GOYA_IC_PLL,
+	HL_GOYA_MC_PLL,
+	HL_GOYA_MME_PLL,
+	HL_GOYA_PCI_PLL,
+	HL_GOYA_EMMC_PLL,
+	HL_GOYA_TPC_PLL,
+	HL_GOYA_PLL_MAX
+};
+
+enum hl_gaudi_pll_index {
+	HL_GAUDI_CPU_PLL = 0,
+	HL_GAUDI_PCI_PLL,
+	HL_GAUDI_SRAM_PLL,
+	HL_GAUDI_HBM_PLL,
+	HL_GAUDI_NIC_PLL,
+	HL_GAUDI_DMA_PLL,
+	HL_GAUDI_MESH_PLL,
+	HL_GAUDI_MME_PLL,
+	HL_GAUDI_TPC_PLL,
+	HL_GAUDI_IF_PLL,
+	HL_GAUDI_PLL_MAX
+};
+
 enum hl_device_status {
 	HL_DEVICE_STATUS_OPERATIONAL,
 	HL_DEVICE_STATUS_IN_RESET,
-- 
cgit v1.2.3


From 001d5f66c156f2c30b6bf85346de09de8db49b59 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Sun, 11 Apr 2021 21:06:05 +0300
Subject: habanalabs: skip reading f/w errors on bad status

If we read all FF from the boot status register, then something is
totally wrong and there is no point of reading specific errors.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 7cf82da67dab..fff29f057b6d 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -850,8 +850,13 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
 	if (rc) {
 		dev_err(hdev->dev, "Failed to read preboot version\n");
 		detect_cpu_boot_status(hdev, status);
-		fw_read_errors(hdev, boot_err0_reg,
-				cpu_security_boot_status_reg);
+
+		/* If we read all FF, then something is totally wrong, no point
+		 * of reading specific errors
+		 */
+		if (status != -1)
+			fw_read_errors(hdev, boot_err0_reg,
+					cpu_security_boot_status_reg);
 		return -EIO;
 	}
 
-- 
cgit v1.2.3


From b5fd82a7af198db04408e218f64dc3d4178d585a Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Mon, 12 Apr 2021 09:38:22 +0300
Subject: habanalabs: change error level of security not ready

This error indicates a problem in the security initialization inside
the f/w so we need to stop the device loading because it won't be
usable.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index fff29f057b6d..377a7ca886fe 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -362,12 +362,9 @@ static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
 	}
 
 	if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY) {
-		dev_warn(hdev->dev,
+		dev_err(hdev->dev,
 			"Device boot warning - security not ready\n");
-		/* This is a warning so we don't want it to disable the
-		 * device
-		 */
-		err_val &= ~CPU_BOOT_ERR0_SECURITY_NOT_RDY;
+		err_exists = true;
 	}
 
 	if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL) {
-- 
cgit v1.2.3


From 27a9e35daad080f3770401a1a11eda2f9f7732dd Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Mon, 12 Apr 2021 09:52:05 +0300
Subject: habanalabs: ignore f/w status error

In case firmware has a bug and erroneously reports a status error
(e.g. device unusable) during boot, allow the user to tell the driver
to continue the boot regardless of the error status.

This will be done via kernel parameter which exposes a mask. The
user that loads the driver can decide exactly which status error to
ignore and which to take into account. The bitmask is according to
defines in hl_boot_if.h

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c    | 3 ++-
 drivers/misc/habanalabs/common/habanalabs.h     | 7 +++++++
 drivers/misc/habanalabs/common/habanalabs_drv.c | 7 +++++++
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 377a7ca886fe..0713b2c12d54 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -400,7 +400,8 @@ static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
 		err_exists = true;
 	}
 
-	if (err_exists)
+	if (err_exists && ((err_val & ~CPU_BOOT_ERR0_ENABLED) &
+				lower_32_bits(hdev->boot_error_status_mask)))
 		return -EIO;
 
 	return 0;
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 91291a8e201e..6579f8767abd 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1962,6 +1962,12 @@ struct hl_mmu_funcs {
  * @clock_gating_mask: is clock gating enabled. bitmask that represents the
  *                     different engines. See debugfs-driver-habanalabs for
  *                     details.
+ * @boot_error_status_mask: contains a mask of the device boot error status.
+ *                          Each bit represents a different error, according to
+ *                          the defines in hl_boot_if.h. If the bit is cleared,
+ *                          the error will be ignored by the driver during
+ *                          device initialization. Mainly used to debug and
+ *                          workaround firmware bugs
  * @in_reset: is device in reset flow.
  * @curr_pll_profile: current PLL profile.
  * @card_type: Various ASICs have several card types. This indicates the card
@@ -2077,6 +2083,7 @@ struct hl_device {
 	u64				timeout_jiffies;
 	u64				max_power;
 	u64				clock_gating_mask;
+	u64				boot_error_status_mask;
 	atomic_t			in_reset;
 	enum hl_pll_frequency		curr_pll_profile;
 	enum cpucp_card_types		card_type;
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 7135f1e03864..64d1530db985 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -30,6 +30,7 @@ static DEFINE_MUTEX(hl_devs_idr_lock);
 static int timeout_locked = 30;
 static int reset_on_lockup = 1;
 static int memory_scrub = 1;
+static ulong boot_error_status_mask = ULONG_MAX;
 
 module_param(timeout_locked, int, 0444);
 MODULE_PARM_DESC(timeout_locked,
@@ -43,6 +44,10 @@ module_param(memory_scrub, int, 0444);
 MODULE_PARM_DESC(memory_scrub,
 	"Scrub device memory in various states (0 = no, 1 = yes, default yes)");
 
+module_param(boot_error_status_mask, ulong, 0444);
+MODULE_PARM_DESC(boot_error_status_mask,
+	"Mask of the error status during device CPU boot (If bitX is cleared then error X is masked. Default all 1's)");
+
 #define PCI_VENDOR_ID_HABANALABS	0x1da3
 
 #define PCI_IDS_GOYA			0x0001
@@ -319,6 +324,8 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
 	hdev->major = hl_major;
 	hdev->reset_on_lockup = reset_on_lockup;
 	hdev->memory_scrub = memory_scrub;
+	hdev->boot_error_status_mask = boot_error_status_mask;
+
 	hdev->pldm = 0;
 
 	set_driver_behavior_per_device(hdev);
-- 
cgit v1.2.3


From 24a107097fbd8fb6a48a0dcb31e64c1de6831a1d Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Tue, 27 Apr 2021 17:49:25 +0300
Subject: habanalabs: wait for interrupt wrong timeout calculation

Wait for interrupt timeout calculation is wrong, hence timeout occurs
when user waits on an interrupt with certain timeout values.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/command_submission.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index ff8791a651fd..af3c497defb1 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -2017,7 +2017,7 @@ wait_again:
 		if (completion_value >= target_value) {
 			*status = CS_WAIT_STATUS_COMPLETED;
 		} else {
-			timeout -= jiffies_to_usecs(completion_rc);
+			timeout = completion_rc;
 			goto wait_again;
 		}
 	} else {
-- 
cgit v1.2.3


From 115726c5d312b462c9d9931ea42becdfa838a076 Mon Sep 17 00:00:00 2001
From: Lv Yunlong <lyl2019@mail.ustc.edu.cn>
Date: Mon, 26 Apr 2021 06:43:46 -0700
Subject: habanalabs/gaudi: Fix a potential use after free in
 gaudi_memset_device_memory

Our code analyzer reported a uaf.

In gaudi_memset_device_memory, cb is get via hl_cb_kernel_create()
with 2 refcount.
If hl_cs_allocate_job() failed, the execution runs into release_cb
branch. One ref of cb is dropped by hl_cb_put(cb) and could be freed
if other thread also drops one ref. Then cb is used by cb->id later,
which is a potential uaf.

My patch add a variable 'id' to accept the value of cb->id before the
hl_cb_put(cb) is called, to avoid the potential uaf.

Fixes: 423815bf02e25 ("habanalabs/gaudi: remove PCI access to SM block")
Signed-off-by: Lv Yunlong <lyl2019@mail.ustc.edu.cn>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi/gaudi.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 81155f06c126..9e4a6bb3acd1 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -5579,6 +5579,7 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
 	struct hl_cs_job *job;
 	u32 cb_size, ctl, err_cause;
 	struct hl_cb *cb;
+	u64 id;
 	int rc;
 
 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
@@ -5645,8 +5646,9 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
 	}
 
 release_cb:
+	id = cb->id;
 	hl_cb_put(cb);
-	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
+	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
 
 	return rc;
 }
-- 
cgit v1.2.3


From 679971e7213174efb56abc8fab1299d0a88db0e8 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Fri, 7 May 2021 18:24:11 -0500
Subject: smb3: when mounting with multichannel include it in requested
 capabilities

In the SMB3/SMB3.1.1 negotiate protocol request, we are supposed to
advertise CAP_MULTICHANNEL capability when establishing multiple
channels has been requested by the user doing the mount. See MS-SMB2
sections 2.2.3 and 3.2.5.2

Without setting it there is some risk that multichannel could fail
if the server interpreted the field strictly.

Reviewed-By: Tom Talpey <tom@talpey.com>
Reviewed-by: Shyam Prasad N <sprasad@microsoft.com>
Cc: <stable@vger.kernel.org> # v5.8+
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2pdu.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index e36c2a867783..a8bf43184773 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -841,6 +841,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 		req->SecurityMode = 0;
 
 	req->Capabilities = cpu_to_le32(server->vals->req_capabilities);
+	if (ses->chan_max > 1)
+		req->Capabilities |= cpu_to_le32(SMB2_GLOBAL_CAP_MULTI_CHANNEL);
 
 	/* ClientGUID must be zero for SMB2.02 dialect */
 	if (server->vals->protocol_id == SMB20_PROT_ID)
@@ -1032,6 +1034,9 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
 
 	pneg_inbuf->Capabilities =
 			cpu_to_le32(server->vals->req_capabilities);
+	if (tcon->ses->chan_max > 1)
+		pneg_inbuf->Capabilities |= cpu_to_le32(SMB2_GLOBAL_CAP_MULTI_CHANNEL);
+
 	memcpy(pneg_inbuf->Guid, server->client_guid,
 					SMB2_CLIENT_GUID_SIZE);
 
-- 
cgit v1.2.3


From 9c2dc11df50d1c8537075ff6b98472198e24438e Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Fri, 7 May 2021 20:00:41 -0500
Subject: smb3: do not attempt multichannel to server which does not support it

We were ignoring CAP_MULTI_CHANNEL in the server response - if the
server doesn't support multichannel we should not be attempting it.

See MS-SMB2 section 3.2.5.2

Reviewed-by: Shyam Prasad N <sprasad@microsoft.com>
Reviewed-By: Tom Talpey <tom@talpey.com>
Cc: <stable@vger.kernel.org> # v5.8+
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/sess.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 63d517b9f2ff..a92a1fb7cb52 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -97,6 +97,12 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
 		return 0;
 	}
 
+	if (!(ses->server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) {
+		cifs_dbg(VFS, "server %s does not support multichannel\n", ses->server->hostname);
+		ses->chan_max = 1;
+		return 0;
+	}
+
 	/*
 	 * Make a copy of the iface list at the time and use that
 	 * instead so as to not hold the iface spinlock for opening
-- 
cgit v1.2.3


From c1f8a398b6d661b594556a91224b096d92293061 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Fri, 7 May 2021 19:33:51 -0500
Subject: smb3: if max_channels set to more than one channel request
 multichannel

Mounting with "multichannel" is obviously implied if user requested
more than one channel on mount (ie mount parm max_channels>1).
Currently both have to be specified. Fix that so that if max_channels
is greater than 1 on mount, enable multichannel rather than silently
falling back to non-multichannel.

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-By: Tom Talpey <tom@talpey.com>
Cc: <stable@vger.kernel.org> # v5.11+
Reviewed-by: Shyam Prasad N <sprasad@microsoft.com>
---
 fs/cifs/fs_context.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
index 3bcf881c3ae9..5d21cd905315 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -1021,6 +1021,9 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
 			goto cifs_parse_mount_err;
 		}
 		ctx->max_channels = result.uint_32;
+		/* If more than one channel requested ... they want multichan */
+		if (result.uint_32 > 1)
+			ctx->multichannel = true;
 		break;
 	case Opt_handletimeout:
 		ctx->handle_timeout = result.uint_32;
-- 
cgit v1.2.3


From 35c820e71565d1fa835b82499359218b219828ac Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 8 May 2021 21:49:48 -0600
Subject: Revert "bio: limit bio max size"

This reverts commit cd2c7545ae1beac3b6aae033c7f31193b3255946.

Alex reports that the commit causes corruption with LUKS on ext4. Revert
it for now so that this can be investigated properly.

Link: https://lore.kernel.org/linux-block/1620493841.bxdq8r5haw.none@localhost/
Reported-by: Alex Xu (Hello71) <alex_y_xu@yahoo.ca>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c            | 13 ++-----------
 block/blk-settings.c   |  5 -----
 include/linux/bio.h    |  4 +---
 include/linux/blkdev.h |  2 --
 4 files changed, 3 insertions(+), 21 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 221dc56ba22f..44205dfb6b60 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -255,13 +255,6 @@ void bio_init(struct bio *bio, struct bio_vec *table,
 }
 EXPORT_SYMBOL(bio_init);
 
-unsigned int bio_max_size(struct bio *bio)
-{
-	struct block_device *bdev = bio->bi_bdev;
-
-	return bdev ? bdev->bd_disk->queue->limits.bio_max_bytes : UINT_MAX;
-}
-
 /**
  * bio_reset - reinitialize a bio
  * @bio:	bio to reset
@@ -873,7 +866,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page,
 		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
 
 		if (page_is_mergeable(bv, page, len, off, same_page)) {
-			if (bio->bi_iter.bi_size > bio_max_size(bio) - len) {
+			if (bio->bi_iter.bi_size > UINT_MAX - len) {
 				*same_page = false;
 				return false;
 			}
@@ -1002,7 +995,6 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 {
 	unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
 	unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
-	unsigned int bytes_left = bio_max_size(bio) - bio->bi_iter.bi_size;
 	struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
 	struct page **pages = (struct page **)bv;
 	bool same_page = false;
@@ -1018,8 +1010,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 	BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
 	pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
 
-	size = iov_iter_get_pages(iter, pages, bytes_left, nr_pages,
-				  &offset);
+	size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
 	if (unlikely(size <= 0))
 		return size ? size : -EFAULT;
 
diff --git a/block/blk-settings.c b/block/blk-settings.c
index c646503e55d2..9c009090c4b5 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -31,7 +31,6 @@ EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);
  */
 void blk_set_default_limits(struct queue_limits *lim)
 {
-	lim->bio_max_bytes = UINT_MAX;
 	lim->max_segments = BLK_MAX_SEGMENTS;
 	lim->max_discard_segments = 1;
 	lim->max_integrity_segments = 0;
@@ -140,10 +139,6 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto
 				 limits->logical_block_size >> SECTOR_SHIFT);
 	limits->max_sectors = max_sectors;
 
-	if (check_shl_overflow(max_sectors, SECTOR_SHIFT,
-				&limits->bio_max_bytes))
-		limits->bio_max_bytes = UINT_MAX;
-
 	q->backing_dev_info->io_pages = max_sectors >> (PAGE_SHIFT - 9);
 }
 EXPORT_SYMBOL(blk_queue_max_hw_sectors);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index f1a99f0a240c..a0b4cfdf62a4 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -106,8 +106,6 @@ static inline void *bio_data(struct bio *bio)
 	return NULL;
 }
 
-extern unsigned int bio_max_size(struct bio *bio);
-
 /**
  * bio_full - check if the bio is full
  * @bio:	bio to check
@@ -121,7 +119,7 @@ static inline bool bio_full(struct bio *bio, unsigned len)
 	if (bio->bi_vcnt >= bio->bi_max_vecs)
 		return true;
 
-	if (bio->bi_iter.bi_size > bio_max_size(bio) - len)
+	if (bio->bi_iter.bi_size > UINT_MAX - len)
 		return true;
 
 	return false;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 40c7c4d87aa1..b91ba6207365 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -327,8 +327,6 @@ enum blk_bounce {
 };
 
 struct queue_limits {
-	unsigned int		bio_max_bytes;
-
 	enum blk_bounce		bounce;
 	unsigned long		seg_boundary_mask;
 	unsigned long		virt_boundary_mask;
-- 
cgit v1.2.3


From a298232ee6b9a1d5d732aa497ff8be0d45b5bd82 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 7 May 2021 21:06:38 +0100
Subject: io_uring: fix link timeout refs

WARNING: CPU: 0 PID: 10242 at lib/refcount.c:28 refcount_warn_saturate+0x15b/0x1a0 lib/refcount.c:28
RIP: 0010:refcount_warn_saturate+0x15b/0x1a0 lib/refcount.c:28
Call Trace:
 __refcount_sub_and_test include/linux/refcount.h:283 [inline]
 __refcount_dec_and_test include/linux/refcount.h:315 [inline]
 refcount_dec_and_test include/linux/refcount.h:333 [inline]
 io_put_req fs/io_uring.c:2140 [inline]
 io_queue_linked_timeout fs/io_uring.c:6300 [inline]
 __io_queue_sqe+0xbef/0xec0 fs/io_uring.c:6354
 io_submit_sqe fs/io_uring.c:6534 [inline]
 io_submit_sqes+0x2bbd/0x7c50 fs/io_uring.c:6660
 __do_sys_io_uring_enter fs/io_uring.c:9240 [inline]
 __se_sys_io_uring_enter+0x256/0x1d60 fs/io_uring.c:9182

io_link_timeout_fn() should put only one reference of the linked timeout
request, however in case of racing with the master request's completion
first io_req_complete() puts one and then io_put_req_deferred() is
called.

Cc: stable@vger.kernel.org # 5.12+
Fixes: 9ae1f8dd372e0 ("io_uring: fix inconsistent lock state")
Reported-by: syzbot+a2910119328ce8e7996f@syzkaller.appspotmail.com
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/ff51018ff29de5ffa76f09273ef48cb24c720368.1620417627.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index f46acbbeed57..9ac5e278a91e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6363,10 +6363,10 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
 	if (prev) {
 		io_async_find_and_cancel(ctx, req, prev->user_data, -ETIME);
 		io_put_req_deferred(prev, 1);
+		io_put_req_deferred(req, 1);
 	} else {
 		io_req_complete_post(req, -ETIME, 0);
 	}
-	io_put_req_deferred(req, 1);
 	return HRTIMER_NORESTART;
 }
 
-- 
cgit v1.2.3


From c1b55029493879f5bd585ff79f326e71f0bc05e3 Mon Sep 17 00:00:00 2001
From: Daniel Cordova A <danesc87@gmail.com>
Date: Fri, 7 May 2021 12:31:16 -0500
Subject: ALSA: hda: fixup headset for ASUS GU502 laptop

The GU502 requires a few steps to make headset i/o works properly:
pincfg, verbs to unmute headphone out and callback to toggle output
between speakers and headphone using jack.

Signed-off-by: Daniel Cordova A <danesc87@gmail.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210507173116.12043-1-danesc87@gmail.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 62 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index a5f3e78ec04e..b4b71609dff1 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6254,6 +6254,35 @@ static void alc294_fixup_gx502_hp(struct hda_codec *codec,
 	}
 }
 
+static void alc294_gu502_toggle_output(struct hda_codec *codec,
+				       struct hda_jack_callback *cb)
+{
+	/* Windows sets 0x10 to 0x8420 for Node 0x20 which is
+	 * responsible from changes between speakers and headphones
+	 */
+	if (snd_hda_jack_detect_state(codec, 0x21) == HDA_JACK_PRESENT)
+		alc_write_coef_idx(codec, 0x10, 0x8420);
+	else
+		alc_write_coef_idx(codec, 0x10, 0x0a20);
+}
+
+static void alc294_fixup_gu502_hp(struct hda_codec *codec,
+				  const struct hda_fixup *fix, int action)
+{
+	if (!is_jack_detectable(codec, 0x21))
+		return;
+
+	switch (action) {
+	case HDA_FIXUP_ACT_PRE_PROBE:
+		snd_hda_jack_detect_enable_callback(codec, 0x21,
+				alc294_gu502_toggle_output);
+		break;
+	case HDA_FIXUP_ACT_INIT:
+		alc294_gu502_toggle_output(codec, NULL);
+		break;
+	}
+}
+
 static void  alc285_fixup_hp_gpio_amp_init(struct hda_codec *codec,
 			      const struct hda_fixup *fix, int action)
 {
@@ -6471,6 +6500,9 @@ enum {
 	ALC294_FIXUP_ASUS_GX502_HP,
 	ALC294_FIXUP_ASUS_GX502_PINS,
 	ALC294_FIXUP_ASUS_GX502_VERBS,
+	ALC294_FIXUP_ASUS_GU502_HP,
+	ALC294_FIXUP_ASUS_GU502_PINS,
+	ALC294_FIXUP_ASUS_GU502_VERBS,
 	ALC285_FIXUP_HP_GPIO_LED,
 	ALC285_FIXUP_HP_MUTE_LED,
 	ALC236_FIXUP_HP_GPIO_LED,
@@ -7712,6 +7744,35 @@ static const struct hda_fixup alc269_fixups[] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc294_fixup_gx502_hp,
 	},
+	[ALC294_FIXUP_ASUS_GU502_PINS] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x19, 0x01a11050 }, /* rear HP mic */
+			{ 0x1a, 0x01a11830 }, /* rear external mic */
+			{ 0x21, 0x012110f0 }, /* rear HP out */
+			{ }
+		},
+		.chained = true,
+		.chain_id = ALC294_FIXUP_ASUS_GU502_VERBS
+	},
+	[ALC294_FIXUP_ASUS_GU502_VERBS] = {
+		.type = HDA_FIXUP_VERBS,
+		.v.verbs = (const struct hda_verb[]) {
+			/* set 0x15 to HP-OUT ctrl */
+			{ 0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, 0xc0 },
+			/* unmute the 0x15 amp */
+			{ 0x15, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000 },
+			/* set 0x1b to HP-OUT */
+			{ 0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 },
+			{ }
+		},
+		.chained = true,
+		.chain_id = ALC294_FIXUP_ASUS_GU502_HP
+	},
+	[ALC294_FIXUP_ASUS_GU502_HP] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc294_fixup_gu502_hp,
+	},
 	[ALC294_FIXUP_ASUS_COEF_1B] = {
 		.type = HDA_FIXUP_VERBS,
 		.v.verbs = (const struct hda_verb[]) {
@@ -8256,6 +8317,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC),
 	SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE),
 	SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502),
+	SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS),
 	SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401),
 	SND_PCI_QUIRK(0x1043, 0x1f11, "ASUS Zephyrus G14", ALC289_FIXUP_ASUS_GA401),
 	SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2),
-- 
cgit v1.2.3


From 6dae40aed484ef2f1a3934dcdcd17b7055173e56 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 9 May 2021 14:03:33 -0700
Subject: fbmem: fix horribly incorrect placement of __maybe_unused

Commit b9d79e4ca4ff ("fbmem: Mark proc_fb_seq_ops as __maybe_unused")
places the '__maybe_unused' in an entirely incorrect location between
the "struct" keyword and the structure name.

It's a wonder that gcc accepts that silently, but clang quite reasonably
warns about it:

    drivers/video/fbdev/core/fbmem.c:736:21: warning: attribute declaration must precede definition [-Wignored-attributes]
    static const struct __maybe_unused seq_operations proc_fb_seq_ops = {
                        ^

Fix it.

Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/fbdev/core/fbmem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 52c606c0f8a2..072780b0e570 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -733,7 +733,7 @@ static int fb_seq_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static const struct __maybe_unused seq_operations proc_fb_seq_ops = {
+static const struct seq_operations __maybe_unused proc_fb_seq_ops = {
 	.start	= fb_seq_start,
 	.next	= fb_seq_next,
 	.stop	= fb_seq_stop,
-- 
cgit v1.2.3


From 6efb943b8616ec53a5e444193dccf1af9ad627b5 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 9 May 2021 14:17:44 -0700
Subject: Linux 5.13-rc1

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 15b6476d0f89..53d09c414635 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 VERSION = 5
-PATCHLEVEL = 12
+PATCHLEVEL = 13
 SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc1
 NAME = Frozen Wasteland
 
 # *DOCUMENTATION*
-- 
cgit v1.2.3


From 4eff124347191d1548eb4e14e20e77513dcbd0fe Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Sun, 9 May 2021 12:11:02 +0300
Subject: openrisc: mm/init.c: remove unused memblock_region variable in
 map_ram()

Kernel test robot reports:

cppcheck possible warnings: (new ones prefixed by >>, may not real problems)

>> arch/openrisc/mm/init.c:125:10: warning: Uninitialized variable: region [uninitvar]
            region->base, region->base + region->size);
            ^

Replace usage of memblock_region fields with 'start' and 'end' variables
that are initialized in for_each_mem_range() and remove the declaration of
region.

Fixes: b10d6bca8720 ("arch, drivers: replace for_each_membock() with for_each_mem_range()")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Stafford Horne <shorne@gmail.com>
---
 arch/openrisc/mm/init.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c
index bf9b2310fc93..f3fa02b8838a 100644
--- a/arch/openrisc/mm/init.c
+++ b/arch/openrisc/mm/init.c
@@ -75,7 +75,6 @@ static void __init map_ram(void)
 	/* These mark extents of read-only kernel pages...
 	 * ...from vmlinux.lds.S
 	 */
-	struct memblock_region *region;
 
 	v = PAGE_OFFSET;
 
@@ -121,7 +120,7 @@ static void __init map_ram(void)
 		}
 
 		printk(KERN_INFO "%s: Memory: 0x%x-0x%x\n", __func__,
-		       region->base, region->base + region->size);
+		       start, end);
 	}
 }
 
-- 
cgit v1.2.3


From 371dcaee1ade4b1eefd541ae6ee048b5ce15b37c Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Sun, 9 May 2021 12:11:03 +0300
Subject: openrisc: mm/init.c: remove unused variable 'end' in paging_init()

A build with W=1 enabled produces the following warning:

  CC      arch/openrisc/mm/init.o
arch/openrisc/mm/init.c: In function 'paging_init':
arch/openrisc/mm/init.c:131:16: warning: variable 'end' set but not used [-Wunused-but-set-variable]
  131 |  unsigned long end;
      |                ^~~

Remove the unused variable 'end'.

Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Stafford Horne <shorne@gmail.com>
---
 arch/openrisc/mm/init.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c
index f3fa02b8838a..6e38ec96cab8 100644
--- a/arch/openrisc/mm/init.c
+++ b/arch/openrisc/mm/init.c
@@ -128,7 +128,6 @@ void __init paging_init(void)
 {
 	extern void tlb_init(void);
 
-	unsigned long end;
 	int i;
 
 	printk(KERN_INFO "Setting up paging and PTEs.\n");
@@ -144,8 +143,6 @@ void __init paging_init(void)
 	 */
 	current_pgd[smp_processor_id()] = init_mm.pgd;
 
-	end = (unsigned long)__va(max_low_pfn * PAGE_SIZE);
-
 	map_ram();
 
 	zone_sizes_init();
-- 
cgit v1.2.3


From e759959fe3b8313c81d6200be44cb8a644d845ea Mon Sep 17 00:00:00 2001
From: Brijesh Singh <brijesh.singh@amd.com>
Date: Tue, 27 Apr 2021 06:16:34 -0500
Subject: x86/sev-es: Rename sev-es.{ch} to sev.{ch}

SEV-SNP builds upon the SEV-ES functionality while adding new hardware
protection. Version 2 of the GHCB specification adds new NAE events that
are SEV-SNP specific. Rename the sev-es.{ch} to sev.{ch} so that all
SEV* functionality can be consolidated in one place.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Joerg Roedel <jroedel@suse.de>
Link: https://lkml.kernel.org/r/20210427111636.1207-2-brijesh.singh@amd.com
---
 arch/x86/boot/compressed/Makefile |    6 +-
 arch/x86/boot/compressed/sev-es.c |  206 ------
 arch/x86/boot/compressed/sev.c    |  206 ++++++
 arch/x86/include/asm/sev-es.h     |  114 ---
 arch/x86/include/asm/sev.h        |  114 +++
 arch/x86/kernel/Makefile          |    6 +-
 arch/x86/kernel/head64.c          |    2 +-
 arch/x86/kernel/nmi.c             |    2 +-
 arch/x86/kernel/sev-es-shared.c   |  525 -------------
 arch/x86/kernel/sev-es.c          | 1461 -------------------------------------
 arch/x86/kernel/sev-shared.c      |  525 +++++++++++++
 arch/x86/kernel/sev.c             | 1461 +++++++++++++++++++++++++++++++++++++
 arch/x86/mm/extable.c             |    2 +-
 arch/x86/platform/efi/efi_64.c    |    2 +-
 arch/x86/realmode/init.c          |    2 +-
 15 files changed, 2317 insertions(+), 2317 deletions(-)
 delete mode 100644 arch/x86/boot/compressed/sev-es.c
 create mode 100644 arch/x86/boot/compressed/sev.c
 delete mode 100644 arch/x86/include/asm/sev-es.h
 create mode 100644 arch/x86/include/asm/sev.h
 delete mode 100644 arch/x86/kernel/sev-es-shared.c
 delete mode 100644 arch/x86/kernel/sev-es.c
 create mode 100644 arch/x86/kernel/sev-shared.c
 create mode 100644 arch/x86/kernel/sev.c

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 6e5522aebbbd..2a2975236c9e 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -48,10 +48,10 @@ KBUILD_CFLAGS += $(call as-option,-Wa$(comma)-mrelax-relocations=no)
 KBUILD_CFLAGS += -include $(srctree)/include/linux/hidden.h
 KBUILD_CFLAGS += $(CLANG_FLAGS)
 
-# sev-es.c indirectly inludes inat-table.h which is generated during
+# sev.c indirectly inludes inat-table.h which is generated during
 # compilation and stored in $(objtree). Add the directory to the includes so
 # that the compiler finds it even with out-of-tree builds (make O=/some/path).
-CFLAGS_sev-es.o += -I$(objtree)/arch/x86/lib/
+CFLAGS_sev.o += -I$(objtree)/arch/x86/lib/
 
 KBUILD_AFLAGS  := $(KBUILD_CFLAGS) -D__ASSEMBLY__
 GCOV_PROFILE := n
@@ -93,7 +93,7 @@ ifdef CONFIG_X86_64
 	vmlinux-objs-y += $(obj)/idt_64.o $(obj)/idt_handlers_64.o
 	vmlinux-objs-y += $(obj)/mem_encrypt.o
 	vmlinux-objs-y += $(obj)/pgtable_64.o
-	vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev-es.o
+	vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev.o
 endif
 
 vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev-es.c
deleted file mode 100644
index 82041bd380e5..000000000000
--- a/arch/x86/boot/compressed/sev-es.c
+++ /dev/null
@@ -1,206 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * AMD Encrypted Register State Support
- *
- * Author: Joerg Roedel <jroedel@suse.de>
- */
-
-/*
- * misc.h needs to be first because it knows how to include the other kernel
- * headers in the pre-decompression code in a way that does not break
- * compilation.
- */
-#include "misc.h"
-
-#include <asm/pgtable_types.h>
-#include <asm/sev-es.h>
-#include <asm/trapnr.h>
-#include <asm/trap_pf.h>
-#include <asm/msr-index.h>
-#include <asm/fpu/xcr.h>
-#include <asm/ptrace.h>
-#include <asm/svm.h>
-
-#include "error.h"
-
-struct ghcb boot_ghcb_page __aligned(PAGE_SIZE);
-struct ghcb *boot_ghcb;
-
-/*
- * Copy a version of this function here - insn-eval.c can't be used in
- * pre-decompression code.
- */
-static bool insn_has_rep_prefix(struct insn *insn)
-{
-	insn_byte_t p;
-	int i;
-
-	insn_get_prefixes(insn);
-
-	for_each_insn_prefix(insn, i, p) {
-		if (p == 0xf2 || p == 0xf3)
-			return true;
-	}
-
-	return false;
-}
-
-/*
- * Only a dummy for insn_get_seg_base() - Early boot-code is 64bit only and
- * doesn't use segments.
- */
-static unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx)
-{
-	return 0UL;
-}
-
-static inline u64 sev_es_rd_ghcb_msr(void)
-{
-	unsigned long low, high;
-
-	asm volatile("rdmsr" : "=a" (low), "=d" (high) :
-			"c" (MSR_AMD64_SEV_ES_GHCB));
-
-	return ((high << 32) | low);
-}
-
-static inline void sev_es_wr_ghcb_msr(u64 val)
-{
-	u32 low, high;
-
-	low  = val & 0xffffffffUL;
-	high = val >> 32;
-
-	asm volatile("wrmsr" : : "c" (MSR_AMD64_SEV_ES_GHCB),
-			"a"(low), "d" (high) : "memory");
-}
-
-static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
-{
-	char buffer[MAX_INSN_SIZE];
-	int ret;
-
-	memcpy(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
-
-	ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64);
-	if (ret < 0)
-		return ES_DECODE_FAILED;
-
-	return ES_OK;
-}
-
-static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
-				   void *dst, char *buf, size_t size)
-{
-	memcpy(dst, buf, size);
-
-	return ES_OK;
-}
-
-static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
-				  void *src, char *buf, size_t size)
-{
-	memcpy(buf, src, size);
-
-	return ES_OK;
-}
-
-#undef __init
-#undef __pa
-#define __init
-#define __pa(x)	((unsigned long)(x))
-
-#define __BOOT_COMPRESSED
-
-/* Basic instruction decoding support needed */
-#include "../../lib/inat.c"
-#include "../../lib/insn.c"
-
-/* Include code for early handlers */
-#include "../../kernel/sev-es-shared.c"
-
-static bool early_setup_sev_es(void)
-{
-	if (!sev_es_negotiate_protocol())
-		sev_es_terminate(GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED);
-
-	if (set_page_decrypted((unsigned long)&boot_ghcb_page))
-		return false;
-
-	/* Page is now mapped decrypted, clear it */
-	memset(&boot_ghcb_page, 0, sizeof(boot_ghcb_page));
-
-	boot_ghcb = &boot_ghcb_page;
-
-	/* Initialize lookup tables for the instruction decoder */
-	inat_init_tables();
-
-	return true;
-}
-
-void sev_es_shutdown_ghcb(void)
-{
-	if (!boot_ghcb)
-		return;
-
-	if (!sev_es_check_cpu_features())
-		error("SEV-ES CPU Features missing.");
-
-	/*
-	 * GHCB Page must be flushed from the cache and mapped encrypted again.
-	 * Otherwise the running kernel will see strange cache effects when
-	 * trying to use that page.
-	 */
-	if (set_page_encrypted((unsigned long)&boot_ghcb_page))
-		error("Can't map GHCB page encrypted");
-
-	/*
-	 * GHCB page is mapped encrypted again and flushed from the cache.
-	 * Mark it non-present now to catch bugs when #VC exceptions trigger
-	 * after this point.
-	 */
-	if (set_page_non_present((unsigned long)&boot_ghcb_page))
-		error("Can't unmap GHCB page");
-}
-
-bool sev_es_check_ghcb_fault(unsigned long address)
-{
-	/* Check whether the fault was on the GHCB page */
-	return ((address & PAGE_MASK) == (unsigned long)&boot_ghcb_page);
-}
-
-void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code)
-{
-	struct es_em_ctxt ctxt;
-	enum es_result result;
-
-	if (!boot_ghcb && !early_setup_sev_es())
-		sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
-
-	vc_ghcb_invalidate(boot_ghcb);
-	result = vc_init_em_ctxt(&ctxt, regs, exit_code);
-	if (result != ES_OK)
-		goto finish;
-
-	switch (exit_code) {
-	case SVM_EXIT_RDTSC:
-	case SVM_EXIT_RDTSCP:
-		result = vc_handle_rdtsc(boot_ghcb, &ctxt, exit_code);
-		break;
-	case SVM_EXIT_IOIO:
-		result = vc_handle_ioio(boot_ghcb, &ctxt);
-		break;
-	case SVM_EXIT_CPUID:
-		result = vc_handle_cpuid(boot_ghcb, &ctxt);
-		break;
-	default:
-		result = ES_UNSUPPORTED;
-		break;
-	}
-
-finish:
-	if (result == ES_OK)
-		vc_finish_insn(&ctxt);
-	else if (result != ES_RETRY)
-		sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
-}
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
new file mode 100644
index 000000000000..670e998fe930
--- /dev/null
+++ b/arch/x86/boot/compressed/sev.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD Encrypted Register State Support
+ *
+ * Author: Joerg Roedel <jroedel@suse.de>
+ */
+
+/*
+ * misc.h needs to be first because it knows how to include the other kernel
+ * headers in the pre-decompression code in a way that does not break
+ * compilation.
+ */
+#include "misc.h"
+
+#include <asm/pgtable_types.h>
+#include <asm/sev.h>
+#include <asm/trapnr.h>
+#include <asm/trap_pf.h>
+#include <asm/msr-index.h>
+#include <asm/fpu/xcr.h>
+#include <asm/ptrace.h>
+#include <asm/svm.h>
+
+#include "error.h"
+
+struct ghcb boot_ghcb_page __aligned(PAGE_SIZE);
+struct ghcb *boot_ghcb;
+
+/*
+ * Copy a version of this function here - insn-eval.c can't be used in
+ * pre-decompression code.
+ */
+static bool insn_has_rep_prefix(struct insn *insn)
+{
+	insn_byte_t p;
+	int i;
+
+	insn_get_prefixes(insn);
+
+	for_each_insn_prefix(insn, i, p) {
+		if (p == 0xf2 || p == 0xf3)
+			return true;
+	}
+
+	return false;
+}
+
+/*
+ * Only a dummy for insn_get_seg_base() - Early boot-code is 64bit only and
+ * doesn't use segments.
+ */
+static unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx)
+{
+	return 0UL;
+}
+
+static inline u64 sev_es_rd_ghcb_msr(void)
+{
+	unsigned long low, high;
+
+	asm volatile("rdmsr" : "=a" (low), "=d" (high) :
+			"c" (MSR_AMD64_SEV_ES_GHCB));
+
+	return ((high << 32) | low);
+}
+
+static inline void sev_es_wr_ghcb_msr(u64 val)
+{
+	u32 low, high;
+
+	low  = val & 0xffffffffUL;
+	high = val >> 32;
+
+	asm volatile("wrmsr" : : "c" (MSR_AMD64_SEV_ES_GHCB),
+			"a"(low), "d" (high) : "memory");
+}
+
+static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
+{
+	char buffer[MAX_INSN_SIZE];
+	int ret;
+
+	memcpy(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
+
+	ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64);
+	if (ret < 0)
+		return ES_DECODE_FAILED;
+
+	return ES_OK;
+}
+
+static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
+				   void *dst, char *buf, size_t size)
+{
+	memcpy(dst, buf, size);
+
+	return ES_OK;
+}
+
+static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
+				  void *src, char *buf, size_t size)
+{
+	memcpy(buf, src, size);
+
+	return ES_OK;
+}
+
+#undef __init
+#undef __pa
+#define __init
+#define __pa(x)	((unsigned long)(x))
+
+#define __BOOT_COMPRESSED
+
+/* Basic instruction decoding support needed */
+#include "../../lib/inat.c"
+#include "../../lib/insn.c"
+
+/* Include code for early handlers */
+#include "../../kernel/sev-shared.c"
+
+static bool early_setup_sev_es(void)
+{
+	if (!sev_es_negotiate_protocol())
+		sev_es_terminate(GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED);
+
+	if (set_page_decrypted((unsigned long)&boot_ghcb_page))
+		return false;
+
+	/* Page is now mapped decrypted, clear it */
+	memset(&boot_ghcb_page, 0, sizeof(boot_ghcb_page));
+
+	boot_ghcb = &boot_ghcb_page;
+
+	/* Initialize lookup tables for the instruction decoder */
+	inat_init_tables();
+
+	return true;
+}
+
+void sev_es_shutdown_ghcb(void)
+{
+	if (!boot_ghcb)
+		return;
+
+	if (!sev_es_check_cpu_features())
+		error("SEV-ES CPU Features missing.");
+
+	/*
+	 * GHCB Page must be flushed from the cache and mapped encrypted again.
+	 * Otherwise the running kernel will see strange cache effects when
+	 * trying to use that page.
+	 */
+	if (set_page_encrypted((unsigned long)&boot_ghcb_page))
+		error("Can't map GHCB page encrypted");
+
+	/*
+	 * GHCB page is mapped encrypted again and flushed from the cache.
+	 * Mark it non-present now to catch bugs when #VC exceptions trigger
+	 * after this point.
+	 */
+	if (set_page_non_present((unsigned long)&boot_ghcb_page))
+		error("Can't unmap GHCB page");
+}
+
+bool sev_es_check_ghcb_fault(unsigned long address)
+{
+	/* Check whether the fault was on the GHCB page */
+	return ((address & PAGE_MASK) == (unsigned long)&boot_ghcb_page);
+}
+
+void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code)
+{
+	struct es_em_ctxt ctxt;
+	enum es_result result;
+
+	if (!boot_ghcb && !early_setup_sev_es())
+		sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
+
+	vc_ghcb_invalidate(boot_ghcb);
+	result = vc_init_em_ctxt(&ctxt, regs, exit_code);
+	if (result != ES_OK)
+		goto finish;
+
+	switch (exit_code) {
+	case SVM_EXIT_RDTSC:
+	case SVM_EXIT_RDTSCP:
+		result = vc_handle_rdtsc(boot_ghcb, &ctxt, exit_code);
+		break;
+	case SVM_EXIT_IOIO:
+		result = vc_handle_ioio(boot_ghcb, &ctxt);
+		break;
+	case SVM_EXIT_CPUID:
+		result = vc_handle_cpuid(boot_ghcb, &ctxt);
+		break;
+	default:
+		result = ES_UNSUPPORTED;
+		break;
+	}
+
+finish:
+	if (result == ES_OK)
+		vc_finish_insn(&ctxt);
+	else if (result != ES_RETRY)
+		sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
+}
diff --git a/arch/x86/include/asm/sev-es.h b/arch/x86/include/asm/sev-es.h
deleted file mode 100644
index cf1d957c7091..000000000000
--- a/arch/x86/include/asm/sev-es.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * AMD Encrypted Register State Support
- *
- * Author: Joerg Roedel <jroedel@suse.de>
- */
-
-#ifndef __ASM_ENCRYPTED_STATE_H
-#define __ASM_ENCRYPTED_STATE_H
-
-#include <linux/types.h>
-#include <asm/insn.h>
-
-#define GHCB_SEV_INFO		0x001UL
-#define GHCB_SEV_INFO_REQ	0x002UL
-#define		GHCB_INFO(v)		((v) & 0xfffUL)
-#define		GHCB_PROTO_MAX(v)	(((v) >> 48) & 0xffffUL)
-#define		GHCB_PROTO_MIN(v)	(((v) >> 32) & 0xffffUL)
-#define		GHCB_PROTO_OUR		0x0001UL
-#define GHCB_SEV_CPUID_REQ	0x004UL
-#define		GHCB_CPUID_REQ_EAX	0
-#define		GHCB_CPUID_REQ_EBX	1
-#define		GHCB_CPUID_REQ_ECX	2
-#define		GHCB_CPUID_REQ_EDX	3
-#define		GHCB_CPUID_REQ(fn, reg) (GHCB_SEV_CPUID_REQ | \
-					(((unsigned long)reg & 3) << 30) | \
-					(((unsigned long)fn) << 32))
-
-#define	GHCB_PROTOCOL_MAX	0x0001UL
-#define GHCB_DEFAULT_USAGE	0x0000UL
-
-#define GHCB_SEV_CPUID_RESP	0x005UL
-#define GHCB_SEV_TERMINATE	0x100UL
-#define		GHCB_SEV_TERMINATE_REASON(reason_set, reason_val)	\
-			(((((u64)reason_set) &  0x7) << 12) |		\
-			 ((((u64)reason_val) & 0xff) << 16))
-#define		GHCB_SEV_ES_REASON_GENERAL_REQUEST	0
-#define		GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED	1
-
-#define	GHCB_SEV_GHCB_RESP_CODE(v)	((v) & 0xfff)
-#define	VMGEXIT()			{ asm volatile("rep; vmmcall\n\r"); }
-
-enum es_result {
-	ES_OK,			/* All good */
-	ES_UNSUPPORTED,		/* Requested operation not supported */
-	ES_VMM_ERROR,		/* Unexpected state from the VMM */
-	ES_DECODE_FAILED,	/* Instruction decoding failed */
-	ES_EXCEPTION,		/* Instruction caused exception */
-	ES_RETRY,		/* Retry instruction emulation */
-};
-
-struct es_fault_info {
-	unsigned long vector;
-	unsigned long error_code;
-	unsigned long cr2;
-};
-
-struct pt_regs;
-
-/* ES instruction emulation context */
-struct es_em_ctxt {
-	struct pt_regs *regs;
-	struct insn insn;
-	struct es_fault_info fi;
-};
-
-void do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code);
-
-static inline u64 lower_bits(u64 val, unsigned int bits)
-{
-	u64 mask = (1ULL << bits) - 1;
-
-	return (val & mask);
-}
-
-struct real_mode_header;
-enum stack_type;
-
-/* Early IDT entry points for #VC handler */
-extern void vc_no_ghcb(void);
-extern void vc_boot_ghcb(void);
-extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
-
-#ifdef CONFIG_AMD_MEM_ENCRYPT
-extern struct static_key_false sev_es_enable_key;
-extern void __sev_es_ist_enter(struct pt_regs *regs);
-extern void __sev_es_ist_exit(void);
-static __always_inline void sev_es_ist_enter(struct pt_regs *regs)
-{
-	if (static_branch_unlikely(&sev_es_enable_key))
-		__sev_es_ist_enter(regs);
-}
-static __always_inline void sev_es_ist_exit(void)
-{
-	if (static_branch_unlikely(&sev_es_enable_key))
-		__sev_es_ist_exit();
-}
-extern int sev_es_setup_ap_jump_table(struct real_mode_header *rmh);
-extern void __sev_es_nmi_complete(void);
-static __always_inline void sev_es_nmi_complete(void)
-{
-	if (static_branch_unlikely(&sev_es_enable_key))
-		__sev_es_nmi_complete();
-}
-extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd);
-#else
-static inline void sev_es_ist_enter(struct pt_regs *regs) { }
-static inline void sev_es_ist_exit(void) { }
-static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; }
-static inline void sev_es_nmi_complete(void) { }
-static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; }
-#endif
-
-#endif
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
new file mode 100644
index 000000000000..cf1d957c7091
--- /dev/null
+++ b/arch/x86/include/asm/sev.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AMD Encrypted Register State Support
+ *
+ * Author: Joerg Roedel <jroedel@suse.de>
+ */
+
+#ifndef __ASM_ENCRYPTED_STATE_H
+#define __ASM_ENCRYPTED_STATE_H
+
+#include <linux/types.h>
+#include <asm/insn.h>
+
+#define GHCB_SEV_INFO		0x001UL
+#define GHCB_SEV_INFO_REQ	0x002UL
+#define		GHCB_INFO(v)		((v) & 0xfffUL)
+#define		GHCB_PROTO_MAX(v)	(((v) >> 48) & 0xffffUL)
+#define		GHCB_PROTO_MIN(v)	(((v) >> 32) & 0xffffUL)
+#define		GHCB_PROTO_OUR		0x0001UL
+#define GHCB_SEV_CPUID_REQ	0x004UL
+#define		GHCB_CPUID_REQ_EAX	0
+#define		GHCB_CPUID_REQ_EBX	1
+#define		GHCB_CPUID_REQ_ECX	2
+#define		GHCB_CPUID_REQ_EDX	3
+#define		GHCB_CPUID_REQ(fn, reg) (GHCB_SEV_CPUID_REQ | \
+					(((unsigned long)reg & 3) << 30) | \
+					(((unsigned long)fn) << 32))
+
+#define	GHCB_PROTOCOL_MAX	0x0001UL
+#define GHCB_DEFAULT_USAGE	0x0000UL
+
+#define GHCB_SEV_CPUID_RESP	0x005UL
+#define GHCB_SEV_TERMINATE	0x100UL
+#define		GHCB_SEV_TERMINATE_REASON(reason_set, reason_val)	\
+			(((((u64)reason_set) &  0x7) << 12) |		\
+			 ((((u64)reason_val) & 0xff) << 16))
+#define		GHCB_SEV_ES_REASON_GENERAL_REQUEST	0
+#define		GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED	1
+
+#define	GHCB_SEV_GHCB_RESP_CODE(v)	((v) & 0xfff)
+#define	VMGEXIT()			{ asm volatile("rep; vmmcall\n\r"); }
+
+enum es_result {
+	ES_OK,			/* All good */
+	ES_UNSUPPORTED,		/* Requested operation not supported */
+	ES_VMM_ERROR,		/* Unexpected state from the VMM */
+	ES_DECODE_FAILED,	/* Instruction decoding failed */
+	ES_EXCEPTION,		/* Instruction caused exception */
+	ES_RETRY,		/* Retry instruction emulation */
+};
+
+struct es_fault_info {
+	unsigned long vector;
+	unsigned long error_code;
+	unsigned long cr2;
+};
+
+struct pt_regs;
+
+/* ES instruction emulation context */
+struct es_em_ctxt {
+	struct pt_regs *regs;
+	struct insn insn;
+	struct es_fault_info fi;
+};
+
+void do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code);
+
+static inline u64 lower_bits(u64 val, unsigned int bits)
+{
+	u64 mask = (1ULL << bits) - 1;
+
+	return (val & mask);
+}
+
+struct real_mode_header;
+enum stack_type;
+
+/* Early IDT entry points for #VC handler */
+extern void vc_no_ghcb(void);
+extern void vc_boot_ghcb(void);
+extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+extern struct static_key_false sev_es_enable_key;
+extern void __sev_es_ist_enter(struct pt_regs *regs);
+extern void __sev_es_ist_exit(void);
+static __always_inline void sev_es_ist_enter(struct pt_regs *regs)
+{
+	if (static_branch_unlikely(&sev_es_enable_key))
+		__sev_es_ist_enter(regs);
+}
+static __always_inline void sev_es_ist_exit(void)
+{
+	if (static_branch_unlikely(&sev_es_enable_key))
+		__sev_es_ist_exit();
+}
+extern int sev_es_setup_ap_jump_table(struct real_mode_header *rmh);
+extern void __sev_es_nmi_complete(void);
+static __always_inline void sev_es_nmi_complete(void)
+{
+	if (static_branch_unlikely(&sev_es_enable_key))
+		__sev_es_nmi_complete();
+}
+extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd);
+#else
+static inline void sev_es_ist_enter(struct pt_regs *regs) { }
+static inline void sev_es_ist_exit(void) { }
+static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; }
+static inline void sev_es_nmi_complete(void) { }
+static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; }
+#endif
+
+#endif
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0704c2a94272..0f66682ac02a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -20,7 +20,7 @@ CFLAGS_REMOVE_kvmclock.o = -pg
 CFLAGS_REMOVE_ftrace.o = -pg
 CFLAGS_REMOVE_early_printk.o = -pg
 CFLAGS_REMOVE_head64.o = -pg
-CFLAGS_REMOVE_sev-es.o = -pg
+CFLAGS_REMOVE_sev.o = -pg
 endif
 
 KASAN_SANITIZE_head$(BITS).o				:= n
@@ -28,7 +28,7 @@ KASAN_SANITIZE_dumpstack.o				:= n
 KASAN_SANITIZE_dumpstack_$(BITS).o			:= n
 KASAN_SANITIZE_stacktrace.o				:= n
 KASAN_SANITIZE_paravirt.o				:= n
-KASAN_SANITIZE_sev-es.o					:= n
+KASAN_SANITIZE_sev.o					:= n
 
 # With some compiler versions the generated code results in boot hangs, caused
 # by several compilation units. To be safe, disable all instrumentation.
@@ -148,7 +148,7 @@ obj-$(CONFIG_UNWINDER_ORC)		+= unwind_orc.o
 obj-$(CONFIG_UNWINDER_FRAME_POINTER)	+= unwind_frame.o
 obj-$(CONFIG_UNWINDER_GUESS)		+= unwind_guess.o
 
-obj-$(CONFIG_AMD_MEM_ENCRYPT)		+= sev-es.o
+obj-$(CONFIG_AMD_MEM_ENCRYPT)		+= sev.o
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 18be44163a50..de01903c3735 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -39,7 +39,7 @@
 #include <asm/realmode.h>
 #include <asm/extable.h>
 #include <asm/trapnr.h>
-#include <asm/sev-es.h>
+#include <asm/sev.h>
 
 /*
  * Manage page tables very early on.
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 2ef961cf4cfc..4bce802d25fb 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -33,7 +33,7 @@
 #include <asm/reboot.h>
 #include <asm/cache.h>
 #include <asm/nospec-branch.h>
-#include <asm/sev-es.h>
+#include <asm/sev.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/nmi.h>
diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c
deleted file mode 100644
index 0aa9f13efd57..000000000000
--- a/arch/x86/kernel/sev-es-shared.c
+++ /dev/null
@@ -1,525 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * AMD Encrypted Register State Support
- *
- * Author: Joerg Roedel <jroedel@suse.de>
- *
- * This file is not compiled stand-alone. It contains code shared
- * between the pre-decompression boot code and the running Linux kernel
- * and is included directly into both code-bases.
- */
-
-#ifndef __BOOT_COMPRESSED
-#define error(v)	pr_err(v)
-#define has_cpuflag(f)	boot_cpu_has(f)
-#endif
-
-static bool __init sev_es_check_cpu_features(void)
-{
-	if (!has_cpuflag(X86_FEATURE_RDRAND)) {
-		error("RDRAND instruction not supported - no trusted source of randomness available\n");
-		return false;
-	}
-
-	return true;
-}
-
-static void __noreturn sev_es_terminate(unsigned int reason)
-{
-	u64 val = GHCB_SEV_TERMINATE;
-
-	/*
-	 * Tell the hypervisor what went wrong - only reason-set 0 is
-	 * currently supported.
-	 */
-	val |= GHCB_SEV_TERMINATE_REASON(0, reason);
-
-	/* Request Guest Termination from Hypvervisor */
-	sev_es_wr_ghcb_msr(val);
-	VMGEXIT();
-
-	while (true)
-		asm volatile("hlt\n" : : : "memory");
-}
-
-static bool sev_es_negotiate_protocol(void)
-{
-	u64 val;
-
-	/* Do the GHCB protocol version negotiation */
-	sev_es_wr_ghcb_msr(GHCB_SEV_INFO_REQ);
-	VMGEXIT();
-	val = sev_es_rd_ghcb_msr();
-
-	if (GHCB_INFO(val) != GHCB_SEV_INFO)
-		return false;
-
-	if (GHCB_PROTO_MAX(val) < GHCB_PROTO_OUR ||
-	    GHCB_PROTO_MIN(val) > GHCB_PROTO_OUR)
-		return false;
-
-	return true;
-}
-
-static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
-{
-	memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
-}
-
-static bool vc_decoding_needed(unsigned long exit_code)
-{
-	/* Exceptions don't require to decode the instruction */
-	return !(exit_code >= SVM_EXIT_EXCP_BASE &&
-		 exit_code <= SVM_EXIT_LAST_EXCP);
-}
-
-static enum es_result vc_init_em_ctxt(struct es_em_ctxt *ctxt,
-				      struct pt_regs *regs,
-				      unsigned long exit_code)
-{
-	enum es_result ret = ES_OK;
-
-	memset(ctxt, 0, sizeof(*ctxt));
-	ctxt->regs = regs;
-
-	if (vc_decoding_needed(exit_code))
-		ret = vc_decode_insn(ctxt);
-
-	return ret;
-}
-
-static void vc_finish_insn(struct es_em_ctxt *ctxt)
-{
-	ctxt->regs->ip += ctxt->insn.length;
-}
-
-static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
-					  struct es_em_ctxt *ctxt,
-					  u64 exit_code, u64 exit_info_1,
-					  u64 exit_info_2)
-{
-	enum es_result ret;
-
-	/* Fill in protocol and format specifiers */
-	ghcb->protocol_version = GHCB_PROTOCOL_MAX;
-	ghcb->ghcb_usage       = GHCB_DEFAULT_USAGE;
-
-	ghcb_set_sw_exit_code(ghcb, exit_code);
-	ghcb_set_sw_exit_info_1(ghcb, exit_info_1);
-	ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
-
-	sev_es_wr_ghcb_msr(__pa(ghcb));
-	VMGEXIT();
-
-	if ((ghcb->save.sw_exit_info_1 & 0xffffffff) == 1) {
-		u64 info = ghcb->save.sw_exit_info_2;
-		unsigned long v;
-
-		info = ghcb->save.sw_exit_info_2;
-		v = info & SVM_EVTINJ_VEC_MASK;
-
-		/* Check if exception information from hypervisor is sane. */
-		if ((info & SVM_EVTINJ_VALID) &&
-		    ((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) &&
-		    ((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) {
-			ctxt->fi.vector = v;
-			if (info & SVM_EVTINJ_VALID_ERR)
-				ctxt->fi.error_code = info >> 32;
-			ret = ES_EXCEPTION;
-		} else {
-			ret = ES_VMM_ERROR;
-		}
-	} else {
-		ret = ES_OK;
-	}
-
-	return ret;
-}
-
-/*
- * Boot VC Handler - This is the first VC handler during boot, there is no GHCB
- * page yet, so it only supports the MSR based communication with the
- * hypervisor and only the CPUID exit-code.
- */
-void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
-{
-	unsigned int fn = lower_bits(regs->ax, 32);
-	unsigned long val;
-
-	/* Only CPUID is supported via MSR protocol */
-	if (exit_code != SVM_EXIT_CPUID)
-		goto fail;
-
-	sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EAX));
-	VMGEXIT();
-	val = sev_es_rd_ghcb_msr();
-	if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
-		goto fail;
-	regs->ax = val >> 32;
-
-	sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EBX));
-	VMGEXIT();
-	val = sev_es_rd_ghcb_msr();
-	if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
-		goto fail;
-	regs->bx = val >> 32;
-
-	sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_ECX));
-	VMGEXIT();
-	val = sev_es_rd_ghcb_msr();
-	if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
-		goto fail;
-	regs->cx = val >> 32;
-
-	sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EDX));
-	VMGEXIT();
-	val = sev_es_rd_ghcb_msr();
-	if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
-		goto fail;
-	regs->dx = val >> 32;
-
-	/*
-	 * This is a VC handler and the #VC is only raised when SEV-ES is
-	 * active, which means SEV must be active too. Do sanity checks on the
-	 * CPUID results to make sure the hypervisor does not trick the kernel
-	 * into the no-sev path. This could map sensitive data unencrypted and
-	 * make it accessible to the hypervisor.
-	 *
-	 * In particular, check for:
-	 *	- Availability of CPUID leaf 0x8000001f
-	 *	- SEV CPUID bit.
-	 *
-	 * The hypervisor might still report the wrong C-bit position, but this
-	 * can't be checked here.
-	 */
-
-	if (fn == 0x80000000 && (regs->ax < 0x8000001f))
-		/* SEV leaf check */
-		goto fail;
-	else if ((fn == 0x8000001f && !(regs->ax & BIT(1))))
-		/* SEV bit */
-		goto fail;
-
-	/* Skip over the CPUID two-byte opcode */
-	regs->ip += 2;
-
-	return;
-
-fail:
-	/* Terminate the guest */
-	sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
-}
-
-static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt,
-					  void *src, char *buf,
-					  unsigned int data_size,
-					  unsigned int count,
-					  bool backwards)
-{
-	int i, b = backwards ? -1 : 1;
-	enum es_result ret = ES_OK;
-
-	for (i = 0; i < count; i++) {
-		void *s = src + (i * data_size * b);
-		char *d = buf + (i * data_size);
-
-		ret = vc_read_mem(ctxt, s, d, data_size);
-		if (ret != ES_OK)
-			break;
-	}
-
-	return ret;
-}
-
-static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt,
-					   void *dst, char *buf,
-					   unsigned int data_size,
-					   unsigned int count,
-					   bool backwards)
-{
-	int i, s = backwards ? -1 : 1;
-	enum es_result ret = ES_OK;
-
-	for (i = 0; i < count; i++) {
-		void *d = dst + (i * data_size * s);
-		char *b = buf + (i * data_size);
-
-		ret = vc_write_mem(ctxt, d, b, data_size);
-		if (ret != ES_OK)
-			break;
-	}
-
-	return ret;
-}
-
-#define IOIO_TYPE_STR  BIT(2)
-#define IOIO_TYPE_IN   1
-#define IOIO_TYPE_INS  (IOIO_TYPE_IN | IOIO_TYPE_STR)
-#define IOIO_TYPE_OUT  0
-#define IOIO_TYPE_OUTS (IOIO_TYPE_OUT | IOIO_TYPE_STR)
-
-#define IOIO_REP       BIT(3)
-
-#define IOIO_ADDR_64   BIT(9)
-#define IOIO_ADDR_32   BIT(8)
-#define IOIO_ADDR_16   BIT(7)
-
-#define IOIO_DATA_32   BIT(6)
-#define IOIO_DATA_16   BIT(5)
-#define IOIO_DATA_8    BIT(4)
-
-#define IOIO_SEG_ES    (0 << 10)
-#define IOIO_SEG_DS    (3 << 10)
-
-static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
-{
-	struct insn *insn = &ctxt->insn;
-	*exitinfo = 0;
-
-	switch (insn->opcode.bytes[0]) {
-	/* INS opcodes */
-	case 0x6c:
-	case 0x6d:
-		*exitinfo |= IOIO_TYPE_INS;
-		*exitinfo |= IOIO_SEG_ES;
-		*exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
-		break;
-
-	/* OUTS opcodes */
-	case 0x6e:
-	case 0x6f:
-		*exitinfo |= IOIO_TYPE_OUTS;
-		*exitinfo |= IOIO_SEG_DS;
-		*exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
-		break;
-
-	/* IN immediate opcodes */
-	case 0xe4:
-	case 0xe5:
-		*exitinfo |= IOIO_TYPE_IN;
-		*exitinfo |= (u8)insn->immediate.value << 16;
-		break;
-
-	/* OUT immediate opcodes */
-	case 0xe6:
-	case 0xe7:
-		*exitinfo |= IOIO_TYPE_OUT;
-		*exitinfo |= (u8)insn->immediate.value << 16;
-		break;
-
-	/* IN register opcodes */
-	case 0xec:
-	case 0xed:
-		*exitinfo |= IOIO_TYPE_IN;
-		*exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
-		break;
-
-	/* OUT register opcodes */
-	case 0xee:
-	case 0xef:
-		*exitinfo |= IOIO_TYPE_OUT;
-		*exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
-		break;
-
-	default:
-		return ES_DECODE_FAILED;
-	}
-
-	switch (insn->opcode.bytes[0]) {
-	case 0x6c:
-	case 0x6e:
-	case 0xe4:
-	case 0xe6:
-	case 0xec:
-	case 0xee:
-		/* Single byte opcodes */
-		*exitinfo |= IOIO_DATA_8;
-		break;
-	default:
-		/* Length determined by instruction parsing */
-		*exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16
-						     : IOIO_DATA_32;
-	}
-	switch (insn->addr_bytes) {
-	case 2:
-		*exitinfo |= IOIO_ADDR_16;
-		break;
-	case 4:
-		*exitinfo |= IOIO_ADDR_32;
-		break;
-	case 8:
-		*exitinfo |= IOIO_ADDR_64;
-		break;
-	}
-
-	if (insn_has_rep_prefix(insn))
-		*exitinfo |= IOIO_REP;
-
-	return ES_OK;
-}
-
-static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
-{
-	struct pt_regs *regs = ctxt->regs;
-	u64 exit_info_1, exit_info_2;
-	enum es_result ret;
-
-	ret = vc_ioio_exitinfo(ctxt, &exit_info_1);
-	if (ret != ES_OK)
-		return ret;
-
-	if (exit_info_1 & IOIO_TYPE_STR) {
-
-		/* (REP) INS/OUTS */
-
-		bool df = ((regs->flags & X86_EFLAGS_DF) == X86_EFLAGS_DF);
-		unsigned int io_bytes, exit_bytes;
-		unsigned int ghcb_count, op_count;
-		unsigned long es_base;
-		u64 sw_scratch;
-
-		/*
-		 * For the string variants with rep prefix the amount of in/out
-		 * operations per #VC exception is limited so that the kernel
-		 * has a chance to take interrupts and re-schedule while the
-		 * instruction is emulated.
-		 */
-		io_bytes   = (exit_info_1 >> 4) & 0x7;
-		ghcb_count = sizeof(ghcb->shared_buffer) / io_bytes;
-
-		op_count    = (exit_info_1 & IOIO_REP) ? regs->cx : 1;
-		exit_info_2 = min(op_count, ghcb_count);
-		exit_bytes  = exit_info_2 * io_bytes;
-
-		es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES);
-
-		/* Read bytes of OUTS into the shared buffer */
-		if (!(exit_info_1 & IOIO_TYPE_IN)) {
-			ret = vc_insn_string_read(ctxt,
-					       (void *)(es_base + regs->si),
-					       ghcb->shared_buffer, io_bytes,
-					       exit_info_2, df);
-			if (ret)
-				return ret;
-		}
-
-		/*
-		 * Issue an VMGEXIT to the HV to consume the bytes from the
-		 * shared buffer or to have it write them into the shared buffer
-		 * depending on the instruction: OUTS or INS.
-		 */
-		sw_scratch = __pa(ghcb) + offsetof(struct ghcb, shared_buffer);
-		ghcb_set_sw_scratch(ghcb, sw_scratch);
-		ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO,
-					  exit_info_1, exit_info_2);
-		if (ret != ES_OK)
-			return ret;
-
-		/* Read bytes from shared buffer into the guest's destination. */
-		if (exit_info_1 & IOIO_TYPE_IN) {
-			ret = vc_insn_string_write(ctxt,
-						   (void *)(es_base + regs->di),
-						   ghcb->shared_buffer, io_bytes,
-						   exit_info_2, df);
-			if (ret)
-				return ret;
-
-			if (df)
-				regs->di -= exit_bytes;
-			else
-				regs->di += exit_bytes;
-		} else {
-			if (df)
-				regs->si -= exit_bytes;
-			else
-				regs->si += exit_bytes;
-		}
-
-		if (exit_info_1 & IOIO_REP)
-			regs->cx -= exit_info_2;
-
-		ret = regs->cx ? ES_RETRY : ES_OK;
-
-	} else {
-
-		/* IN/OUT into/from rAX */
-
-		int bits = (exit_info_1 & 0x70) >> 1;
-		u64 rax = 0;
-
-		if (!(exit_info_1 & IOIO_TYPE_IN))
-			rax = lower_bits(regs->ax, bits);
-
-		ghcb_set_rax(ghcb, rax);
-
-		ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, exit_info_1, 0);
-		if (ret != ES_OK)
-			return ret;
-
-		if (exit_info_1 & IOIO_TYPE_IN) {
-			if (!ghcb_rax_is_valid(ghcb))
-				return ES_VMM_ERROR;
-			regs->ax = lower_bits(ghcb->save.rax, bits);
-		}
-	}
-
-	return ret;
-}
-
-static enum es_result vc_handle_cpuid(struct ghcb *ghcb,
-				      struct es_em_ctxt *ctxt)
-{
-	struct pt_regs *regs = ctxt->regs;
-	u32 cr4 = native_read_cr4();
-	enum es_result ret;
-
-	ghcb_set_rax(ghcb, regs->ax);
-	ghcb_set_rcx(ghcb, regs->cx);
-
-	if (cr4 & X86_CR4_OSXSAVE)
-		/* Safe to read xcr0 */
-		ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK));
-	else
-		/* xgetbv will cause #GP - use reset value for xcr0 */
-		ghcb_set_xcr0(ghcb, 1);
-
-	ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0);
-	if (ret != ES_OK)
-		return ret;
-
-	if (!(ghcb_rax_is_valid(ghcb) &&
-	      ghcb_rbx_is_valid(ghcb) &&
-	      ghcb_rcx_is_valid(ghcb) &&
-	      ghcb_rdx_is_valid(ghcb)))
-		return ES_VMM_ERROR;
-
-	regs->ax = ghcb->save.rax;
-	regs->bx = ghcb->save.rbx;
-	regs->cx = ghcb->save.rcx;
-	regs->dx = ghcb->save.rdx;
-
-	return ES_OK;
-}
-
-static enum es_result vc_handle_rdtsc(struct ghcb *ghcb,
-				      struct es_em_ctxt *ctxt,
-				      unsigned long exit_code)
-{
-	bool rdtscp = (exit_code == SVM_EXIT_RDTSCP);
-	enum es_result ret;
-
-	ret = sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, 0, 0);
-	if (ret != ES_OK)
-		return ret;
-
-	if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb) &&
-	     (!rdtscp || ghcb_rcx_is_valid(ghcb))))
-		return ES_VMM_ERROR;
-
-	ctxt->regs->ax = ghcb->save.rax;
-	ctxt->regs->dx = ghcb->save.rdx;
-	if (rdtscp)
-		ctxt->regs->cx = ghcb->save.rcx;
-
-	return ES_OK;
-}
diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c
deleted file mode 100644
index 73873b007838..000000000000
--- a/arch/x86/kernel/sev-es.c
+++ /dev/null
@@ -1,1461 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * AMD Memory Encryption Support
- *
- * Copyright (C) 2019 SUSE
- *
- * Author: Joerg Roedel <jroedel@suse.de>
- */
-
-#define pr_fmt(fmt)	"SEV-ES: " fmt
-
-#include <linux/sched/debug.h>	/* For show_regs() */
-#include <linux/percpu-defs.h>
-#include <linux/mem_encrypt.h>
-#include <linux/lockdep.h>
-#include <linux/printk.h>
-#include <linux/mm_types.h>
-#include <linux/set_memory.h>
-#include <linux/memblock.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-
-#include <asm/cpu_entry_area.h>
-#include <asm/stacktrace.h>
-#include <asm/sev-es.h>
-#include <asm/insn-eval.h>
-#include <asm/fpu/internal.h>
-#include <asm/processor.h>
-#include <asm/realmode.h>
-#include <asm/traps.h>
-#include <asm/svm.h>
-#include <asm/smp.h>
-#include <asm/cpu.h>
-
-#define DR7_RESET_VALUE        0x400
-
-/* For early boot hypervisor communication in SEV-ES enabled guests */
-static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
-
-/*
- * Needs to be in the .data section because we need it NULL before bss is
- * cleared
- */
-static struct ghcb __initdata *boot_ghcb;
-
-/* #VC handler runtime per-CPU data */
-struct sev_es_runtime_data {
-	struct ghcb ghcb_page;
-
-	/* Physical storage for the per-CPU IST stack of the #VC handler */
-	char ist_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE);
-
-	/*
-	 * Physical storage for the per-CPU fall-back stack of the #VC handler.
-	 * The fall-back stack is used when it is not safe to switch back to the
-	 * interrupted stack in the #VC entry code.
-	 */
-	char fallback_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE);
-
-	/*
-	 * Reserve one page per CPU as backup storage for the unencrypted GHCB.
-	 * It is needed when an NMI happens while the #VC handler uses the real
-	 * GHCB, and the NMI handler itself is causing another #VC exception. In
-	 * that case the GHCB content of the first handler needs to be backed up
-	 * and restored.
-	 */
-	struct ghcb backup_ghcb;
-
-	/*
-	 * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions.
-	 * There is no need for it to be atomic, because nothing is written to
-	 * the GHCB between the read and the write of ghcb_active. So it is safe
-	 * to use it when a nested #VC exception happens before the write.
-	 *
-	 * This is necessary for example in the #VC->NMI->#VC case when the NMI
-	 * happens while the first #VC handler uses the GHCB. When the NMI code
-	 * raises a second #VC handler it might overwrite the contents of the
-	 * GHCB written by the first handler. To avoid this the content of the
-	 * GHCB is saved and restored when the GHCB is detected to be in use
-	 * already.
-	 */
-	bool ghcb_active;
-	bool backup_ghcb_active;
-
-	/*
-	 * Cached DR7 value - write it on DR7 writes and return it on reads.
-	 * That value will never make it to the real hardware DR7 as debugging
-	 * is currently unsupported in SEV-ES guests.
-	 */
-	unsigned long dr7;
-};
-
-struct ghcb_state {
-	struct ghcb *ghcb;
-};
-
-static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
-DEFINE_STATIC_KEY_FALSE(sev_es_enable_key);
-
-/* Needed in vc_early_forward_exception */
-void do_early_exception(struct pt_regs *regs, int trapnr);
-
-static void __init setup_vc_stacks(int cpu)
-{
-	struct sev_es_runtime_data *data;
-	struct cpu_entry_area *cea;
-	unsigned long vaddr;
-	phys_addr_t pa;
-
-	data = per_cpu(runtime_data, cpu);
-	cea  = get_cpu_entry_area(cpu);
-
-	/* Map #VC IST stack */
-	vaddr = CEA_ESTACK_BOT(&cea->estacks, VC);
-	pa    = __pa(data->ist_stack);
-	cea_set_pte((void *)vaddr, pa, PAGE_KERNEL);
-
-	/* Map VC fall-back stack */
-	vaddr = CEA_ESTACK_BOT(&cea->estacks, VC2);
-	pa    = __pa(data->fallback_stack);
-	cea_set_pte((void *)vaddr, pa, PAGE_KERNEL);
-}
-
-static __always_inline bool on_vc_stack(struct pt_regs *regs)
-{
-	unsigned long sp = regs->sp;
-
-	/* User-mode RSP is not trusted */
-	if (user_mode(regs))
-		return false;
-
-	/* SYSCALL gap still has user-mode RSP */
-	if (ip_within_syscall_gap(regs))
-		return false;
-
-	return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC)));
-}
-
-/*
- * This function handles the case when an NMI is raised in the #VC
- * exception handler entry code, before the #VC handler has switched off
- * its IST stack. In this case, the IST entry for #VC must be adjusted,
- * so that any nested #VC exception will not overwrite the stack
- * contents of the interrupted #VC handler.
- *
- * The IST entry is adjusted unconditionally so that it can be also be
- * unconditionally adjusted back in __sev_es_ist_exit(). Otherwise a
- * nested sev_es_ist_exit() call may adjust back the IST entry too
- * early.
- *
- * The __sev_es_ist_enter() and __sev_es_ist_exit() functions always run
- * on the NMI IST stack, as they are only called from NMI handling code
- * right now.
- */
-void noinstr __sev_es_ist_enter(struct pt_regs *regs)
-{
-	unsigned long old_ist, new_ist;
-
-	/* Read old IST entry */
-	new_ist = old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
-
-	/*
-	 * If NMI happened while on the #VC IST stack, set the new IST
-	 * value below regs->sp, so that the interrupted stack frame is
-	 * not overwritten by subsequent #VC exceptions.
-	 */
-	if (on_vc_stack(regs))
-		new_ist = regs->sp;
-
-	/*
-	 * Reserve additional 8 bytes and store old IST value so this
-	 * adjustment can be unrolled in __sev_es_ist_exit().
-	 */
-	new_ist -= sizeof(old_ist);
-	*(unsigned long *)new_ist = old_ist;
-
-	/* Set new IST entry */
-	this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist);
-}
-
-void noinstr __sev_es_ist_exit(void)
-{
-	unsigned long ist;
-
-	/* Read IST entry */
-	ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
-
-	if (WARN_ON(ist == __this_cpu_ist_top_va(VC)))
-		return;
-
-	/* Read back old IST entry and write it to the TSS */
-	this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist);
-}
-
-static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state)
-{
-	struct sev_es_runtime_data *data;
-	struct ghcb *ghcb;
-
-	data = this_cpu_read(runtime_data);
-	ghcb = &data->ghcb_page;
-
-	if (unlikely(data->ghcb_active)) {
-		/* GHCB is already in use - save its contents */
-
-		if (unlikely(data->backup_ghcb_active))
-			return NULL;
-
-		/* Mark backup_ghcb active before writing to it */
-		data->backup_ghcb_active = true;
-
-		state->ghcb = &data->backup_ghcb;
-
-		/* Backup GHCB content */
-		*state->ghcb = *ghcb;
-	} else {
-		state->ghcb = NULL;
-		data->ghcb_active = true;
-	}
-
-	return ghcb;
-}
-
-static __always_inline void sev_es_put_ghcb(struct ghcb_state *state)
-{
-	struct sev_es_runtime_data *data;
-	struct ghcb *ghcb;
-
-	data = this_cpu_read(runtime_data);
-	ghcb = &data->ghcb_page;
-
-	if (state->ghcb) {
-		/* Restore GHCB from Backup */
-		*ghcb = *state->ghcb;
-		data->backup_ghcb_active = false;
-		state->ghcb = NULL;
-	} else {
-		data->ghcb_active = false;
-	}
-}
-
-/* Needed in vc_early_forward_exception */
-void do_early_exception(struct pt_regs *regs, int trapnr);
-
-static inline u64 sev_es_rd_ghcb_msr(void)
-{
-	return __rdmsr(MSR_AMD64_SEV_ES_GHCB);
-}
-
-static __always_inline void sev_es_wr_ghcb_msr(u64 val)
-{
-	u32 low, high;
-
-	low  = (u32)(val);
-	high = (u32)(val >> 32);
-
-	native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high);
-}
-
-static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt,
-				unsigned char *buffer)
-{
-	return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
-}
-
-static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt)
-{
-	char buffer[MAX_INSN_SIZE];
-	int res;
-
-	res = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
-	if (!res) {
-		ctxt->fi.vector     = X86_TRAP_PF;
-		ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
-		ctxt->fi.cr2        = ctxt->regs->ip;
-		return ES_EXCEPTION;
-	}
-
-	if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, res))
-		return ES_DECODE_FAILED;
-
-	if (ctxt->insn.immediate.got)
-		return ES_OK;
-	else
-		return ES_DECODE_FAILED;
-}
-
-static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt)
-{
-	char buffer[MAX_INSN_SIZE];
-	int res, ret;
-
-	res = vc_fetch_insn_kernel(ctxt, buffer);
-	if (res) {
-		ctxt->fi.vector     = X86_TRAP_PF;
-		ctxt->fi.error_code = X86_PF_INSTR;
-		ctxt->fi.cr2        = ctxt->regs->ip;
-		return ES_EXCEPTION;
-	}
-
-	ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64);
-	if (ret < 0)
-		return ES_DECODE_FAILED;
-	else
-		return ES_OK;
-}
-
-static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
-{
-	if (user_mode(ctxt->regs))
-		return __vc_decode_user_insn(ctxt);
-	else
-		return __vc_decode_kern_insn(ctxt);
-}
-
-static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
-				   char *dst, char *buf, size_t size)
-{
-	unsigned long error_code = X86_PF_PROT | X86_PF_WRITE;
-	char __user *target = (char __user *)dst;
-	u64 d8;
-	u32 d4;
-	u16 d2;
-	u8  d1;
-
-	/* If instruction ran in kernel mode and the I/O buffer is in kernel space */
-	if (!user_mode(ctxt->regs) && !access_ok(target, size)) {
-		memcpy(dst, buf, size);
-		return ES_OK;
-	}
-
-	switch (size) {
-	case 1:
-		memcpy(&d1, buf, 1);
-		if (put_user(d1, target))
-			goto fault;
-		break;
-	case 2:
-		memcpy(&d2, buf, 2);
-		if (put_user(d2, target))
-			goto fault;
-		break;
-	case 4:
-		memcpy(&d4, buf, 4);
-		if (put_user(d4, target))
-			goto fault;
-		break;
-	case 8:
-		memcpy(&d8, buf, 8);
-		if (put_user(d8, target))
-			goto fault;
-		break;
-	default:
-		WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
-		return ES_UNSUPPORTED;
-	}
-
-	return ES_OK;
-
-fault:
-	if (user_mode(ctxt->regs))
-		error_code |= X86_PF_USER;
-
-	ctxt->fi.vector = X86_TRAP_PF;
-	ctxt->fi.error_code = error_code;
-	ctxt->fi.cr2 = (unsigned long)dst;
-
-	return ES_EXCEPTION;
-}
-
-static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
-				  char *src, char *buf, size_t size)
-{
-	unsigned long error_code = X86_PF_PROT;
-	char __user *s = (char __user *)src;
-	u64 d8;
-	u32 d4;
-	u16 d2;
-	u8  d1;
-
-	/* If instruction ran in kernel mode and the I/O buffer is in kernel space */
-	if (!user_mode(ctxt->regs) && !access_ok(s, size)) {
-		memcpy(buf, src, size);
-		return ES_OK;
-	}
-
-	switch (size) {
-	case 1:
-		if (get_user(d1, s))
-			goto fault;
-		memcpy(buf, &d1, 1);
-		break;
-	case 2:
-		if (get_user(d2, s))
-			goto fault;
-		memcpy(buf, &d2, 2);
-		break;
-	case 4:
-		if (get_user(d4, s))
-			goto fault;
-		memcpy(buf, &d4, 4);
-		break;
-	case 8:
-		if (get_user(d8, s))
-			goto fault;
-		memcpy(buf, &d8, 8);
-		break;
-	default:
-		WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
-		return ES_UNSUPPORTED;
-	}
-
-	return ES_OK;
-
-fault:
-	if (user_mode(ctxt->regs))
-		error_code |= X86_PF_USER;
-
-	ctxt->fi.vector = X86_TRAP_PF;
-	ctxt->fi.error_code = error_code;
-	ctxt->fi.cr2 = (unsigned long)src;
-
-	return ES_EXCEPTION;
-}
-
-static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
-					   unsigned long vaddr, phys_addr_t *paddr)
-{
-	unsigned long va = (unsigned long)vaddr;
-	unsigned int level;
-	phys_addr_t pa;
-	pgd_t *pgd;
-	pte_t *pte;
-
-	pgd = __va(read_cr3_pa());
-	pgd = &pgd[pgd_index(va)];
-	pte = lookup_address_in_pgd(pgd, va, &level);
-	if (!pte) {
-		ctxt->fi.vector     = X86_TRAP_PF;
-		ctxt->fi.cr2        = vaddr;
-		ctxt->fi.error_code = 0;
-
-		if (user_mode(ctxt->regs))
-			ctxt->fi.error_code |= X86_PF_USER;
-
-		return ES_EXCEPTION;
-	}
-
-	if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC))
-		/* Emulated MMIO to/from encrypted memory not supported */
-		return ES_UNSUPPORTED;
-
-	pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
-	pa |= va & ~page_level_mask(level);
-
-	*paddr = pa;
-
-	return ES_OK;
-}
-
-/* Include code shared with pre-decompression boot stage */
-#include "sev-es-shared.c"
-
-void noinstr __sev_es_nmi_complete(void)
-{
-	struct ghcb_state state;
-	struct ghcb *ghcb;
-
-	ghcb = sev_es_get_ghcb(&state);
-
-	vc_ghcb_invalidate(ghcb);
-	ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE);
-	ghcb_set_sw_exit_info_1(ghcb, 0);
-	ghcb_set_sw_exit_info_2(ghcb, 0);
-
-	sev_es_wr_ghcb_msr(__pa_nodebug(ghcb));
-	VMGEXIT();
-
-	sev_es_put_ghcb(&state);
-}
-
-static u64 get_jump_table_addr(void)
-{
-	struct ghcb_state state;
-	unsigned long flags;
-	struct ghcb *ghcb;
-	u64 ret = 0;
-
-	local_irq_save(flags);
-
-	ghcb = sev_es_get_ghcb(&state);
-
-	vc_ghcb_invalidate(ghcb);
-	ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE);
-	ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE);
-	ghcb_set_sw_exit_info_2(ghcb, 0);
-
-	sev_es_wr_ghcb_msr(__pa(ghcb));
-	VMGEXIT();
-
-	if (ghcb_sw_exit_info_1_is_valid(ghcb) &&
-	    ghcb_sw_exit_info_2_is_valid(ghcb))
-		ret = ghcb->save.sw_exit_info_2;
-
-	sev_es_put_ghcb(&state);
-
-	local_irq_restore(flags);
-
-	return ret;
-}
-
-int sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
-{
-	u16 startup_cs, startup_ip;
-	phys_addr_t jump_table_pa;
-	u64 jump_table_addr;
-	u16 __iomem *jump_table;
-
-	jump_table_addr = get_jump_table_addr();
-
-	/* On UP guests there is no jump table so this is not a failure */
-	if (!jump_table_addr)
-		return 0;
-
-	/* Check if AP Jump Table is page-aligned */
-	if (jump_table_addr & ~PAGE_MASK)
-		return -EINVAL;
-
-	jump_table_pa = jump_table_addr & PAGE_MASK;
-
-	startup_cs = (u16)(rmh->trampoline_start >> 4);
-	startup_ip = (u16)(rmh->sev_es_trampoline_start -
-			   rmh->trampoline_start);
-
-	jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE);
-	if (!jump_table)
-		return -EIO;
-
-	writew(startup_ip, &jump_table[0]);
-	writew(startup_cs, &jump_table[1]);
-
-	iounmap(jump_table);
-
-	return 0;
-}
-
-/*
- * This is needed by the OVMF UEFI firmware which will use whatever it finds in
- * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu
- * runtime GHCBs used by the kernel are also mapped in the EFI page-table.
- */
-int __init sev_es_efi_map_ghcbs(pgd_t *pgd)
-{
-	struct sev_es_runtime_data *data;
-	unsigned long address, pflags;
-	int cpu;
-	u64 pfn;
-
-	if (!sev_es_active())
-		return 0;
-
-	pflags = _PAGE_NX | _PAGE_RW;
-
-	for_each_possible_cpu(cpu) {
-		data = per_cpu(runtime_data, cpu);
-
-		address = __pa(&data->ghcb_page);
-		pfn = address >> PAGE_SHIFT;
-
-		if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags))
-			return 1;
-	}
-
-	return 0;
-}
-
-static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
-{
-	struct pt_regs *regs = ctxt->regs;
-	enum es_result ret;
-	u64 exit_info_1;
-
-	/* Is it a WRMSR? */
-	exit_info_1 = (ctxt->insn.opcode.bytes[1] == 0x30) ? 1 : 0;
-
-	ghcb_set_rcx(ghcb, regs->cx);
-	if (exit_info_1) {
-		ghcb_set_rax(ghcb, regs->ax);
-		ghcb_set_rdx(ghcb, regs->dx);
-	}
-
-	ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, exit_info_1, 0);
-
-	if ((ret == ES_OK) && (!exit_info_1)) {
-		regs->ax = ghcb->save.rax;
-		regs->dx = ghcb->save.rdx;
-	}
-
-	return ret;
-}
-
-/*
- * This function runs on the first #VC exception after the kernel
- * switched to virtual addresses.
- */
-static bool __init sev_es_setup_ghcb(void)
-{
-	/* First make sure the hypervisor talks a supported protocol. */
-	if (!sev_es_negotiate_protocol())
-		return false;
-
-	/*
-	 * Clear the boot_ghcb. The first exception comes in before the bss
-	 * section is cleared.
-	 */
-	memset(&boot_ghcb_page, 0, PAGE_SIZE);
-
-	/* Alright - Make the boot-ghcb public */
-	boot_ghcb = &boot_ghcb_page;
-
-	return true;
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-static void sev_es_ap_hlt_loop(void)
-{
-	struct ghcb_state state;
-	struct ghcb *ghcb;
-
-	ghcb = sev_es_get_ghcb(&state);
-
-	while (true) {
-		vc_ghcb_invalidate(ghcb);
-		ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP);
-		ghcb_set_sw_exit_info_1(ghcb, 0);
-		ghcb_set_sw_exit_info_2(ghcb, 0);
-
-		sev_es_wr_ghcb_msr(__pa(ghcb));
-		VMGEXIT();
-
-		/* Wakeup signal? */
-		if (ghcb_sw_exit_info_2_is_valid(ghcb) &&
-		    ghcb->save.sw_exit_info_2)
-			break;
-	}
-
-	sev_es_put_ghcb(&state);
-}
-
-/*
- * Play_dead handler when running under SEV-ES. This is needed because
- * the hypervisor can't deliver an SIPI request to restart the AP.
- * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the
- * hypervisor wakes it up again.
- */
-static void sev_es_play_dead(void)
-{
-	play_dead_common();
-
-	/* IRQs now disabled */
-
-	sev_es_ap_hlt_loop();
-
-	/*
-	 * If we get here, the VCPU was woken up again. Jump to CPU
-	 * startup code to get it back online.
-	 */
-	start_cpu0();
-}
-#else  /* CONFIG_HOTPLUG_CPU */
-#define sev_es_play_dead	native_play_dead
-#endif /* CONFIG_HOTPLUG_CPU */
-
-#ifdef CONFIG_SMP
-static void __init sev_es_setup_play_dead(void)
-{
-	smp_ops.play_dead = sev_es_play_dead;
-}
-#else
-static inline void sev_es_setup_play_dead(void) { }
-#endif
-
-static void __init alloc_runtime_data(int cpu)
-{
-	struct sev_es_runtime_data *data;
-
-	data = memblock_alloc(sizeof(*data), PAGE_SIZE);
-	if (!data)
-		panic("Can't allocate SEV-ES runtime data");
-
-	per_cpu(runtime_data, cpu) = data;
-}
-
-static void __init init_ghcb(int cpu)
-{
-	struct sev_es_runtime_data *data;
-	int err;
-
-	data = per_cpu(runtime_data, cpu);
-
-	err = early_set_memory_decrypted((unsigned long)&data->ghcb_page,
-					 sizeof(data->ghcb_page));
-	if (err)
-		panic("Can't map GHCBs unencrypted");
-
-	memset(&data->ghcb_page, 0, sizeof(data->ghcb_page));
-
-	data->ghcb_active = false;
-	data->backup_ghcb_active = false;
-}
-
-void __init sev_es_init_vc_handling(void)
-{
-	int cpu;
-
-	BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE);
-
-	if (!sev_es_active())
-		return;
-
-	if (!sev_es_check_cpu_features())
-		panic("SEV-ES CPU Features missing");
-
-	/* Enable SEV-ES special handling */
-	static_branch_enable(&sev_es_enable_key);
-
-	/* Initialize per-cpu GHCB pages */
-	for_each_possible_cpu(cpu) {
-		alloc_runtime_data(cpu);
-		init_ghcb(cpu);
-		setup_vc_stacks(cpu);
-	}
-
-	sev_es_setup_play_dead();
-
-	/* Secondary CPUs use the runtime #VC handler */
-	initial_vc_handler = (unsigned long)safe_stack_exc_vmm_communication;
-}
-
-static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
-{
-	int trapnr = ctxt->fi.vector;
-
-	if (trapnr == X86_TRAP_PF)
-		native_write_cr2(ctxt->fi.cr2);
-
-	ctxt->regs->orig_ax = ctxt->fi.error_code;
-	do_early_exception(ctxt->regs, trapnr);
-}
-
-static long *vc_insn_get_reg(struct es_em_ctxt *ctxt)
-{
-	long *reg_array;
-	int offset;
-
-	reg_array = (long *)ctxt->regs;
-	offset    = insn_get_modrm_reg_off(&ctxt->insn, ctxt->regs);
-
-	if (offset < 0)
-		return NULL;
-
-	offset /= sizeof(long);
-
-	return reg_array + offset;
-}
-
-static long *vc_insn_get_rm(struct es_em_ctxt *ctxt)
-{
-	long *reg_array;
-	int offset;
-
-	reg_array = (long *)ctxt->regs;
-	offset    = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs);
-
-	if (offset < 0)
-		return NULL;
-
-	offset /= sizeof(long);
-
-	return reg_array + offset;
-}
-static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
-				 unsigned int bytes, bool read)
-{
-	u64 exit_code, exit_info_1, exit_info_2;
-	unsigned long ghcb_pa = __pa(ghcb);
-	enum es_result res;
-	phys_addr_t paddr;
-	void __user *ref;
-
-	ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs);
-	if (ref == (void __user *)-1L)
-		return ES_UNSUPPORTED;
-
-	exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE;
-
-	res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr);
-	if (res != ES_OK) {
-		if (res == ES_EXCEPTION && !read)
-			ctxt->fi.error_code |= X86_PF_WRITE;
-
-		return res;
-	}
-
-	exit_info_1 = paddr;
-	/* Can never be greater than 8 */
-	exit_info_2 = bytes;
-
-	ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer));
-
-	return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2);
-}
-
-static enum es_result vc_handle_mmio_twobyte_ops(struct ghcb *ghcb,
-						 struct es_em_ctxt *ctxt)
-{
-	struct insn *insn = &ctxt->insn;
-	unsigned int bytes = 0;
-	enum es_result ret;
-	int sign_byte;
-	long *reg_data;
-
-	switch (insn->opcode.bytes[1]) {
-		/* MMIO Read w/ zero-extension */
-	case 0xb6:
-		bytes = 1;
-		fallthrough;
-	case 0xb7:
-		if (!bytes)
-			bytes = 2;
-
-		ret = vc_do_mmio(ghcb, ctxt, bytes, true);
-		if (ret)
-			break;
-
-		/* Zero extend based on operand size */
-		reg_data = vc_insn_get_reg(ctxt);
-		if (!reg_data)
-			return ES_DECODE_FAILED;
-
-		memset(reg_data, 0, insn->opnd_bytes);
-
-		memcpy(reg_data, ghcb->shared_buffer, bytes);
-		break;
-
-		/* MMIO Read w/ sign-extension */
-	case 0xbe:
-		bytes = 1;
-		fallthrough;
-	case 0xbf:
-		if (!bytes)
-			bytes = 2;
-
-		ret = vc_do_mmio(ghcb, ctxt, bytes, true);
-		if (ret)
-			break;
-
-		/* Sign extend based on operand size */
-		reg_data = vc_insn_get_reg(ctxt);
-		if (!reg_data)
-			return ES_DECODE_FAILED;
-
-		if (bytes == 1) {
-			u8 *val = (u8 *)ghcb->shared_buffer;
-
-			sign_byte = (*val & 0x80) ? 0xff : 0x00;
-		} else {
-			u16 *val = (u16 *)ghcb->shared_buffer;
-
-			sign_byte = (*val & 0x8000) ? 0xff : 0x00;
-		}
-		memset(reg_data, sign_byte, insn->opnd_bytes);
-
-		memcpy(reg_data, ghcb->shared_buffer, bytes);
-		break;
-
-	default:
-		ret = ES_UNSUPPORTED;
-	}
-
-	return ret;
-}
-
-/*
- * The MOVS instruction has two memory operands, which raises the
- * problem that it is not known whether the access to the source or the
- * destination caused the #VC exception (and hence whether an MMIO read
- * or write operation needs to be emulated).
- *
- * Instead of playing games with walking page-tables and trying to guess
- * whether the source or destination is an MMIO range, split the move
- * into two operations, a read and a write with only one memory operand.
- * This will cause a nested #VC exception on the MMIO address which can
- * then be handled.
- *
- * This implementation has the benefit that it also supports MOVS where
- * source _and_ destination are MMIO regions.
- *
- * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a
- * rare operation. If it turns out to be a performance problem the split
- * operations can be moved to memcpy_fromio() and memcpy_toio().
- */
-static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt,
-					  unsigned int bytes)
-{
-	unsigned long ds_base, es_base;
-	unsigned char *src, *dst;
-	unsigned char buffer[8];
-	enum es_result ret;
-	bool rep;
-	int off;
-
-	ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS);
-	es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES);
-
-	if (ds_base == -1L || es_base == -1L) {
-		ctxt->fi.vector = X86_TRAP_GP;
-		ctxt->fi.error_code = 0;
-		return ES_EXCEPTION;
-	}
-
-	src = ds_base + (unsigned char *)ctxt->regs->si;
-	dst = es_base + (unsigned char *)ctxt->regs->di;
-
-	ret = vc_read_mem(ctxt, src, buffer, bytes);
-	if (ret != ES_OK)
-		return ret;
-
-	ret = vc_write_mem(ctxt, dst, buffer, bytes);
-	if (ret != ES_OK)
-		return ret;
-
-	if (ctxt->regs->flags & X86_EFLAGS_DF)
-		off = -bytes;
-	else
-		off =  bytes;
-
-	ctxt->regs->si += off;
-	ctxt->regs->di += off;
-
-	rep = insn_has_rep_prefix(&ctxt->insn);
-	if (rep)
-		ctxt->regs->cx -= 1;
-
-	if (!rep || ctxt->regs->cx == 0)
-		return ES_OK;
-	else
-		return ES_RETRY;
-}
-
-static enum es_result vc_handle_mmio(struct ghcb *ghcb,
-				     struct es_em_ctxt *ctxt)
-{
-	struct insn *insn = &ctxt->insn;
-	unsigned int bytes = 0;
-	enum es_result ret;
-	long *reg_data;
-
-	switch (insn->opcode.bytes[0]) {
-	/* MMIO Write */
-	case 0x88:
-		bytes = 1;
-		fallthrough;
-	case 0x89:
-		if (!bytes)
-			bytes = insn->opnd_bytes;
-
-		reg_data = vc_insn_get_reg(ctxt);
-		if (!reg_data)
-			return ES_DECODE_FAILED;
-
-		memcpy(ghcb->shared_buffer, reg_data, bytes);
-
-		ret = vc_do_mmio(ghcb, ctxt, bytes, false);
-		break;
-
-	case 0xc6:
-		bytes = 1;
-		fallthrough;
-	case 0xc7:
-		if (!bytes)
-			bytes = insn->opnd_bytes;
-
-		memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes);
-
-		ret = vc_do_mmio(ghcb, ctxt, bytes, false);
-		break;
-
-		/* MMIO Read */
-	case 0x8a:
-		bytes = 1;
-		fallthrough;
-	case 0x8b:
-		if (!bytes)
-			bytes = insn->opnd_bytes;
-
-		ret = vc_do_mmio(ghcb, ctxt, bytes, true);
-		if (ret)
-			break;
-
-		reg_data = vc_insn_get_reg(ctxt);
-		if (!reg_data)
-			return ES_DECODE_FAILED;
-
-		/* Zero-extend for 32-bit operation */
-		if (bytes == 4)
-			*reg_data = 0;
-
-		memcpy(reg_data, ghcb->shared_buffer, bytes);
-		break;
-
-		/* MOVS instruction */
-	case 0xa4:
-		bytes = 1;
-		fallthrough;
-	case 0xa5:
-		if (!bytes)
-			bytes = insn->opnd_bytes;
-
-		ret = vc_handle_mmio_movs(ctxt, bytes);
-		break;
-		/* Two-Byte Opcodes */
-	case 0x0f:
-		ret = vc_handle_mmio_twobyte_ops(ghcb, ctxt);
-		break;
-	default:
-		ret = ES_UNSUPPORTED;
-	}
-
-	return ret;
-}
-
-static enum es_result vc_handle_dr7_write(struct ghcb *ghcb,
-					  struct es_em_ctxt *ctxt)
-{
-	struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
-	long val, *reg = vc_insn_get_rm(ctxt);
-	enum es_result ret;
-
-	if (!reg)
-		return ES_DECODE_FAILED;
-
-	val = *reg;
-
-	/* Upper 32 bits must be written as zeroes */
-	if (val >> 32) {
-		ctxt->fi.vector = X86_TRAP_GP;
-		ctxt->fi.error_code = 0;
-		return ES_EXCEPTION;
-	}
-
-	/* Clear out other reserved bits and set bit 10 */
-	val = (val & 0xffff23ffL) | BIT(10);
-
-	/* Early non-zero writes to DR7 are not supported */
-	if (!data && (val & ~DR7_RESET_VALUE))
-		return ES_UNSUPPORTED;
-
-	/* Using a value of 0 for ExitInfo1 means RAX holds the value */
-	ghcb_set_rax(ghcb, val);
-	ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0);
-	if (ret != ES_OK)
-		return ret;
-
-	if (data)
-		data->dr7 = val;
-
-	return ES_OK;
-}
-
-static enum es_result vc_handle_dr7_read(struct ghcb *ghcb,
-					 struct es_em_ctxt *ctxt)
-{
-	struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
-	long *reg = vc_insn_get_rm(ctxt);
-
-	if (!reg)
-		return ES_DECODE_FAILED;
-
-	if (data)
-		*reg = data->dr7;
-	else
-		*reg = DR7_RESET_VALUE;
-
-	return ES_OK;
-}
-
-static enum es_result vc_handle_wbinvd(struct ghcb *ghcb,
-				       struct es_em_ctxt *ctxt)
-{
-	return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0);
-}
-
-static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
-{
-	enum es_result ret;
-
-	ghcb_set_rcx(ghcb, ctxt->regs->cx);
-
-	ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0);
-	if (ret != ES_OK)
-		return ret;
-
-	if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb)))
-		return ES_VMM_ERROR;
-
-	ctxt->regs->ax = ghcb->save.rax;
-	ctxt->regs->dx = ghcb->save.rdx;
-
-	return ES_OK;
-}
-
-static enum es_result vc_handle_monitor(struct ghcb *ghcb,
-					struct es_em_ctxt *ctxt)
-{
-	/*
-	 * Treat it as a NOP and do not leak a physical address to the
-	 * hypervisor.
-	 */
-	return ES_OK;
-}
-
-static enum es_result vc_handle_mwait(struct ghcb *ghcb,
-				      struct es_em_ctxt *ctxt)
-{
-	/* Treat the same as MONITOR/MONITORX */
-	return ES_OK;
-}
-
-static enum es_result vc_handle_vmmcall(struct ghcb *ghcb,
-					struct es_em_ctxt *ctxt)
-{
-	enum es_result ret;
-
-	ghcb_set_rax(ghcb, ctxt->regs->ax);
-	ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0);
-
-	if (x86_platform.hyper.sev_es_hcall_prepare)
-		x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs);
-
-	ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0);
-	if (ret != ES_OK)
-		return ret;
-
-	if (!ghcb_rax_is_valid(ghcb))
-		return ES_VMM_ERROR;
-
-	ctxt->regs->ax = ghcb->save.rax;
-
-	/*
-	 * Call sev_es_hcall_finish() after regs->ax is already set.
-	 * This allows the hypervisor handler to overwrite it again if
-	 * necessary.
-	 */
-	if (x86_platform.hyper.sev_es_hcall_finish &&
-	    !x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs))
-		return ES_VMM_ERROR;
-
-	return ES_OK;
-}
-
-static enum es_result vc_handle_trap_ac(struct ghcb *ghcb,
-					struct es_em_ctxt *ctxt)
-{
-	/*
-	 * Calling ecx_alignment_check() directly does not work, because it
-	 * enables IRQs and the GHCB is active. Forward the exception and call
-	 * it later from vc_forward_exception().
-	 */
-	ctxt->fi.vector = X86_TRAP_AC;
-	ctxt->fi.error_code = 0;
-	return ES_EXCEPTION;
-}
-
-static __always_inline void vc_handle_trap_db(struct pt_regs *regs)
-{
-	if (user_mode(regs))
-		noist_exc_debug(regs);
-	else
-		exc_debug(regs);
-}
-
-static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
-					 struct ghcb *ghcb,
-					 unsigned long exit_code)
-{
-	enum es_result result;
-
-	switch (exit_code) {
-	case SVM_EXIT_READ_DR7:
-		result = vc_handle_dr7_read(ghcb, ctxt);
-		break;
-	case SVM_EXIT_WRITE_DR7:
-		result = vc_handle_dr7_write(ghcb, ctxt);
-		break;
-	case SVM_EXIT_EXCP_BASE + X86_TRAP_AC:
-		result = vc_handle_trap_ac(ghcb, ctxt);
-		break;
-	case SVM_EXIT_RDTSC:
-	case SVM_EXIT_RDTSCP:
-		result = vc_handle_rdtsc(ghcb, ctxt, exit_code);
-		break;
-	case SVM_EXIT_RDPMC:
-		result = vc_handle_rdpmc(ghcb, ctxt);
-		break;
-	case SVM_EXIT_INVD:
-		pr_err_ratelimited("#VC exception for INVD??? Seriously???\n");
-		result = ES_UNSUPPORTED;
-		break;
-	case SVM_EXIT_CPUID:
-		result = vc_handle_cpuid(ghcb, ctxt);
-		break;
-	case SVM_EXIT_IOIO:
-		result = vc_handle_ioio(ghcb, ctxt);
-		break;
-	case SVM_EXIT_MSR:
-		result = vc_handle_msr(ghcb, ctxt);
-		break;
-	case SVM_EXIT_VMMCALL:
-		result = vc_handle_vmmcall(ghcb, ctxt);
-		break;
-	case SVM_EXIT_WBINVD:
-		result = vc_handle_wbinvd(ghcb, ctxt);
-		break;
-	case SVM_EXIT_MONITOR:
-		result = vc_handle_monitor(ghcb, ctxt);
-		break;
-	case SVM_EXIT_MWAIT:
-		result = vc_handle_mwait(ghcb, ctxt);
-		break;
-	case SVM_EXIT_NPF:
-		result = vc_handle_mmio(ghcb, ctxt);
-		break;
-	default:
-		/*
-		 * Unexpected #VC exception
-		 */
-		result = ES_UNSUPPORTED;
-	}
-
-	return result;
-}
-
-static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt)
-{
-	long error_code = ctxt->fi.error_code;
-	int trapnr = ctxt->fi.vector;
-
-	ctxt->regs->orig_ax = ctxt->fi.error_code;
-
-	switch (trapnr) {
-	case X86_TRAP_GP:
-		exc_general_protection(ctxt->regs, error_code);
-		break;
-	case X86_TRAP_UD:
-		exc_invalid_op(ctxt->regs);
-		break;
-	case X86_TRAP_AC:
-		exc_alignment_check(ctxt->regs, error_code);
-		break;
-	default:
-		pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n");
-		BUG();
-	}
-}
-
-static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs)
-{
-	unsigned long sp = (unsigned long)regs;
-
-	return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2));
-}
-
-/*
- * Main #VC exception handler. It is called when the entry code was able to
- * switch off the IST to a safe kernel stack.
- *
- * With the current implementation it is always possible to switch to a safe
- * stack because #VC exceptions only happen at known places, like intercepted
- * instructions or accesses to MMIO areas/IO ports. They can also happen with
- * code instrumentation when the hypervisor intercepts #DB, but the critical
- * paths are forbidden to be instrumented, so #DB exceptions currently also
- * only happen in safe places.
- */
-DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
-{
-	struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
-	irqentry_state_t irq_state;
-	struct ghcb_state state;
-	struct es_em_ctxt ctxt;
-	enum es_result result;
-	struct ghcb *ghcb;
-
-	/*
-	 * Handle #DB before calling into !noinstr code to avoid recursive #DB.
-	 */
-	if (error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB) {
-		vc_handle_trap_db(regs);
-		return;
-	}
-
-	irq_state = irqentry_nmi_enter(regs);
-	lockdep_assert_irqs_disabled();
-	instrumentation_begin();
-
-	/*
-	 * This is invoked through an interrupt gate, so IRQs are disabled. The
-	 * code below might walk page-tables for user or kernel addresses, so
-	 * keep the IRQs disabled to protect us against concurrent TLB flushes.
-	 */
-
-	ghcb = sev_es_get_ghcb(&state);
-	if (!ghcb) {
-		/*
-		 * Mark GHCBs inactive so that panic() is able to print the
-		 * message.
-		 */
-		data->ghcb_active        = false;
-		data->backup_ghcb_active = false;
-
-		panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
-	}
-
-	vc_ghcb_invalidate(ghcb);
-	result = vc_init_em_ctxt(&ctxt, regs, error_code);
-
-	if (result == ES_OK)
-		result = vc_handle_exitcode(&ctxt, ghcb, error_code);
-
-	sev_es_put_ghcb(&state);
-
-	/* Done - now check the result */
-	switch (result) {
-	case ES_OK:
-		vc_finish_insn(&ctxt);
-		break;
-	case ES_UNSUPPORTED:
-		pr_err_ratelimited("Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n",
-				   error_code, regs->ip);
-		goto fail;
-	case ES_VMM_ERROR:
-		pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
-				   error_code, regs->ip);
-		goto fail;
-	case ES_DECODE_FAILED:
-		pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
-				   error_code, regs->ip);
-		goto fail;
-	case ES_EXCEPTION:
-		vc_forward_exception(&ctxt);
-		break;
-	case ES_RETRY:
-		/* Nothing to do */
-		break;
-	default:
-		pr_emerg("Unknown result in %s():%d\n", __func__, result);
-		/*
-		 * Emulating the instruction which caused the #VC exception
-		 * failed - can't continue so print debug information
-		 */
-		BUG();
-	}
-
-out:
-	instrumentation_end();
-	irqentry_nmi_exit(regs, irq_state);
-
-	return;
-
-fail:
-	if (user_mode(regs)) {
-		/*
-		 * Do not kill the machine if user-space triggered the
-		 * exception. Send SIGBUS instead and let user-space deal with
-		 * it.
-		 */
-		force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
-	} else {
-		pr_emerg("PANIC: Unhandled #VC exception in kernel space (result=%d)\n",
-			 result);
-
-		/* Show some debug info */
-		show_regs(regs);
-
-		/* Ask hypervisor to sev_es_terminate */
-		sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
-
-		/* If that fails and we get here - just panic */
-		panic("Returned from Terminate-Request to Hypervisor\n");
-	}
-
-	goto out;
-}
-
-/* This handler runs on the #VC fall-back stack. It can cause further #VC exceptions */
-DEFINE_IDTENTRY_VC_IST(exc_vmm_communication)
-{
-	instrumentation_begin();
-	panic("Can't handle #VC exception from unsupported context\n");
-	instrumentation_end();
-}
-
-DEFINE_IDTENTRY_VC(exc_vmm_communication)
-{
-	if (likely(!on_vc_fallback_stack(regs)))
-		safe_stack_exc_vmm_communication(regs, error_code);
-	else
-		ist_exc_vmm_communication(regs, error_code);
-}
-
-bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
-{
-	unsigned long exit_code = regs->orig_ax;
-	struct es_em_ctxt ctxt;
-	enum es_result result;
-
-	/* Do initial setup or terminate the guest */
-	if (unlikely(boot_ghcb == NULL && !sev_es_setup_ghcb()))
-		sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
-
-	vc_ghcb_invalidate(boot_ghcb);
-
-	result = vc_init_em_ctxt(&ctxt, regs, exit_code);
-	if (result == ES_OK)
-		result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code);
-
-	/* Done - now check the result */
-	switch (result) {
-	case ES_OK:
-		vc_finish_insn(&ctxt);
-		break;
-	case ES_UNSUPPORTED:
-		early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n",
-				exit_code, regs->ip);
-		goto fail;
-	case ES_VMM_ERROR:
-		early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
-				exit_code, regs->ip);
-		goto fail;
-	case ES_DECODE_FAILED:
-		early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
-				exit_code, regs->ip);
-		goto fail;
-	case ES_EXCEPTION:
-		vc_early_forward_exception(&ctxt);
-		break;
-	case ES_RETRY:
-		/* Nothing to do */
-		break;
-	default:
-		BUG();
-	}
-
-	return true;
-
-fail:
-	show_regs(regs);
-
-	while (true)
-		halt();
-}
diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
new file mode 100644
index 000000000000..0aa9f13efd57
--- /dev/null
+++ b/arch/x86/kernel/sev-shared.c
@@ -0,0 +1,525 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD Encrypted Register State Support
+ *
+ * Author: Joerg Roedel <jroedel@suse.de>
+ *
+ * This file is not compiled stand-alone. It contains code shared
+ * between the pre-decompression boot code and the running Linux kernel
+ * and is included directly into both code-bases.
+ */
+
+#ifndef __BOOT_COMPRESSED
+#define error(v)	pr_err(v)
+#define has_cpuflag(f)	boot_cpu_has(f)
+#endif
+
+static bool __init sev_es_check_cpu_features(void)
+{
+	if (!has_cpuflag(X86_FEATURE_RDRAND)) {
+		error("RDRAND instruction not supported - no trusted source of randomness available\n");
+		return false;
+	}
+
+	return true;
+}
+
+static void __noreturn sev_es_terminate(unsigned int reason)
+{
+	u64 val = GHCB_SEV_TERMINATE;
+
+	/*
+	 * Tell the hypervisor what went wrong - only reason-set 0 is
+	 * currently supported.
+	 */
+	val |= GHCB_SEV_TERMINATE_REASON(0, reason);
+
+	/* Request Guest Termination from Hypvervisor */
+	sev_es_wr_ghcb_msr(val);
+	VMGEXIT();
+
+	while (true)
+		asm volatile("hlt\n" : : : "memory");
+}
+
+static bool sev_es_negotiate_protocol(void)
+{
+	u64 val;
+
+	/* Do the GHCB protocol version negotiation */
+	sev_es_wr_ghcb_msr(GHCB_SEV_INFO_REQ);
+	VMGEXIT();
+	val = sev_es_rd_ghcb_msr();
+
+	if (GHCB_INFO(val) != GHCB_SEV_INFO)
+		return false;
+
+	if (GHCB_PROTO_MAX(val) < GHCB_PROTO_OUR ||
+	    GHCB_PROTO_MIN(val) > GHCB_PROTO_OUR)
+		return false;
+
+	return true;
+}
+
+static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
+{
+	memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
+}
+
+static bool vc_decoding_needed(unsigned long exit_code)
+{
+	/* Exceptions don't require to decode the instruction */
+	return !(exit_code >= SVM_EXIT_EXCP_BASE &&
+		 exit_code <= SVM_EXIT_LAST_EXCP);
+}
+
+static enum es_result vc_init_em_ctxt(struct es_em_ctxt *ctxt,
+				      struct pt_regs *regs,
+				      unsigned long exit_code)
+{
+	enum es_result ret = ES_OK;
+
+	memset(ctxt, 0, sizeof(*ctxt));
+	ctxt->regs = regs;
+
+	if (vc_decoding_needed(exit_code))
+		ret = vc_decode_insn(ctxt);
+
+	return ret;
+}
+
+static void vc_finish_insn(struct es_em_ctxt *ctxt)
+{
+	ctxt->regs->ip += ctxt->insn.length;
+}
+
+static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
+					  struct es_em_ctxt *ctxt,
+					  u64 exit_code, u64 exit_info_1,
+					  u64 exit_info_2)
+{
+	enum es_result ret;
+
+	/* Fill in protocol and format specifiers */
+	ghcb->protocol_version = GHCB_PROTOCOL_MAX;
+	ghcb->ghcb_usage       = GHCB_DEFAULT_USAGE;
+
+	ghcb_set_sw_exit_code(ghcb, exit_code);
+	ghcb_set_sw_exit_info_1(ghcb, exit_info_1);
+	ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
+
+	sev_es_wr_ghcb_msr(__pa(ghcb));
+	VMGEXIT();
+
+	if ((ghcb->save.sw_exit_info_1 & 0xffffffff) == 1) {
+		u64 info = ghcb->save.sw_exit_info_2;
+		unsigned long v;
+
+		info = ghcb->save.sw_exit_info_2;
+		v = info & SVM_EVTINJ_VEC_MASK;
+
+		/* Check if exception information from hypervisor is sane. */
+		if ((info & SVM_EVTINJ_VALID) &&
+		    ((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) &&
+		    ((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) {
+			ctxt->fi.vector = v;
+			if (info & SVM_EVTINJ_VALID_ERR)
+				ctxt->fi.error_code = info >> 32;
+			ret = ES_EXCEPTION;
+		} else {
+			ret = ES_VMM_ERROR;
+		}
+	} else {
+		ret = ES_OK;
+	}
+
+	return ret;
+}
+
+/*
+ * Boot VC Handler - This is the first VC handler during boot, there is no GHCB
+ * page yet, so it only supports the MSR based communication with the
+ * hypervisor and only the CPUID exit-code.
+ */
+void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
+{
+	unsigned int fn = lower_bits(regs->ax, 32);
+	unsigned long val;
+
+	/* Only CPUID is supported via MSR protocol */
+	if (exit_code != SVM_EXIT_CPUID)
+		goto fail;
+
+	sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EAX));
+	VMGEXIT();
+	val = sev_es_rd_ghcb_msr();
+	if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
+		goto fail;
+	regs->ax = val >> 32;
+
+	sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EBX));
+	VMGEXIT();
+	val = sev_es_rd_ghcb_msr();
+	if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
+		goto fail;
+	regs->bx = val >> 32;
+
+	sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_ECX));
+	VMGEXIT();
+	val = sev_es_rd_ghcb_msr();
+	if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
+		goto fail;
+	regs->cx = val >> 32;
+
+	sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EDX));
+	VMGEXIT();
+	val = sev_es_rd_ghcb_msr();
+	if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
+		goto fail;
+	regs->dx = val >> 32;
+
+	/*
+	 * This is a VC handler and the #VC is only raised when SEV-ES is
+	 * active, which means SEV must be active too. Do sanity checks on the
+	 * CPUID results to make sure the hypervisor does not trick the kernel
+	 * into the no-sev path. This could map sensitive data unencrypted and
+	 * make it accessible to the hypervisor.
+	 *
+	 * In particular, check for:
+	 *	- Availability of CPUID leaf 0x8000001f
+	 *	- SEV CPUID bit.
+	 *
+	 * The hypervisor might still report the wrong C-bit position, but this
+	 * can't be checked here.
+	 */
+
+	if (fn == 0x80000000 && (regs->ax < 0x8000001f))
+		/* SEV leaf check */
+		goto fail;
+	else if ((fn == 0x8000001f && !(regs->ax & BIT(1))))
+		/* SEV bit */
+		goto fail;
+
+	/* Skip over the CPUID two-byte opcode */
+	regs->ip += 2;
+
+	return;
+
+fail:
+	/* Terminate the guest */
+	sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
+}
+
+static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt,
+					  void *src, char *buf,
+					  unsigned int data_size,
+					  unsigned int count,
+					  bool backwards)
+{
+	int i, b = backwards ? -1 : 1;
+	enum es_result ret = ES_OK;
+
+	for (i = 0; i < count; i++) {
+		void *s = src + (i * data_size * b);
+		char *d = buf + (i * data_size);
+
+		ret = vc_read_mem(ctxt, s, d, data_size);
+		if (ret != ES_OK)
+			break;
+	}
+
+	return ret;
+}
+
+static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt,
+					   void *dst, char *buf,
+					   unsigned int data_size,
+					   unsigned int count,
+					   bool backwards)
+{
+	int i, s = backwards ? -1 : 1;
+	enum es_result ret = ES_OK;
+
+	for (i = 0; i < count; i++) {
+		void *d = dst + (i * data_size * s);
+		char *b = buf + (i * data_size);
+
+		ret = vc_write_mem(ctxt, d, b, data_size);
+		if (ret != ES_OK)
+			break;
+	}
+
+	return ret;
+}
+
+#define IOIO_TYPE_STR  BIT(2)
+#define IOIO_TYPE_IN   1
+#define IOIO_TYPE_INS  (IOIO_TYPE_IN | IOIO_TYPE_STR)
+#define IOIO_TYPE_OUT  0
+#define IOIO_TYPE_OUTS (IOIO_TYPE_OUT | IOIO_TYPE_STR)
+
+#define IOIO_REP       BIT(3)
+
+#define IOIO_ADDR_64   BIT(9)
+#define IOIO_ADDR_32   BIT(8)
+#define IOIO_ADDR_16   BIT(7)
+
+#define IOIO_DATA_32   BIT(6)
+#define IOIO_DATA_16   BIT(5)
+#define IOIO_DATA_8    BIT(4)
+
+#define IOIO_SEG_ES    (0 << 10)
+#define IOIO_SEG_DS    (3 << 10)
+
+static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
+{
+	struct insn *insn = &ctxt->insn;
+	*exitinfo = 0;
+
+	switch (insn->opcode.bytes[0]) {
+	/* INS opcodes */
+	case 0x6c:
+	case 0x6d:
+		*exitinfo |= IOIO_TYPE_INS;
+		*exitinfo |= IOIO_SEG_ES;
+		*exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
+		break;
+
+	/* OUTS opcodes */
+	case 0x6e:
+	case 0x6f:
+		*exitinfo |= IOIO_TYPE_OUTS;
+		*exitinfo |= IOIO_SEG_DS;
+		*exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
+		break;
+
+	/* IN immediate opcodes */
+	case 0xe4:
+	case 0xe5:
+		*exitinfo |= IOIO_TYPE_IN;
+		*exitinfo |= (u8)insn->immediate.value << 16;
+		break;
+
+	/* OUT immediate opcodes */
+	case 0xe6:
+	case 0xe7:
+		*exitinfo |= IOIO_TYPE_OUT;
+		*exitinfo |= (u8)insn->immediate.value << 16;
+		break;
+
+	/* IN register opcodes */
+	case 0xec:
+	case 0xed:
+		*exitinfo |= IOIO_TYPE_IN;
+		*exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
+		break;
+
+	/* OUT register opcodes */
+	case 0xee:
+	case 0xef:
+		*exitinfo |= IOIO_TYPE_OUT;
+		*exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
+		break;
+
+	default:
+		return ES_DECODE_FAILED;
+	}
+
+	switch (insn->opcode.bytes[0]) {
+	case 0x6c:
+	case 0x6e:
+	case 0xe4:
+	case 0xe6:
+	case 0xec:
+	case 0xee:
+		/* Single byte opcodes */
+		*exitinfo |= IOIO_DATA_8;
+		break;
+	default:
+		/* Length determined by instruction parsing */
+		*exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16
+						     : IOIO_DATA_32;
+	}
+	switch (insn->addr_bytes) {
+	case 2:
+		*exitinfo |= IOIO_ADDR_16;
+		break;
+	case 4:
+		*exitinfo |= IOIO_ADDR_32;
+		break;
+	case 8:
+		*exitinfo |= IOIO_ADDR_64;
+		break;
+	}
+
+	if (insn_has_rep_prefix(insn))
+		*exitinfo |= IOIO_REP;
+
+	return ES_OK;
+}
+
+static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
+{
+	struct pt_regs *regs = ctxt->regs;
+	u64 exit_info_1, exit_info_2;
+	enum es_result ret;
+
+	ret = vc_ioio_exitinfo(ctxt, &exit_info_1);
+	if (ret != ES_OK)
+		return ret;
+
+	if (exit_info_1 & IOIO_TYPE_STR) {
+
+		/* (REP) INS/OUTS */
+
+		bool df = ((regs->flags & X86_EFLAGS_DF) == X86_EFLAGS_DF);
+		unsigned int io_bytes, exit_bytes;
+		unsigned int ghcb_count, op_count;
+		unsigned long es_base;
+		u64 sw_scratch;
+
+		/*
+		 * For the string variants with rep prefix the amount of in/out
+		 * operations per #VC exception is limited so that the kernel
+		 * has a chance to take interrupts and re-schedule while the
+		 * instruction is emulated.
+		 */
+		io_bytes   = (exit_info_1 >> 4) & 0x7;
+		ghcb_count = sizeof(ghcb->shared_buffer) / io_bytes;
+
+		op_count    = (exit_info_1 & IOIO_REP) ? regs->cx : 1;
+		exit_info_2 = min(op_count, ghcb_count);
+		exit_bytes  = exit_info_2 * io_bytes;
+
+		es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES);
+
+		/* Read bytes of OUTS into the shared buffer */
+		if (!(exit_info_1 & IOIO_TYPE_IN)) {
+			ret = vc_insn_string_read(ctxt,
+					       (void *)(es_base + regs->si),
+					       ghcb->shared_buffer, io_bytes,
+					       exit_info_2, df);
+			if (ret)
+				return ret;
+		}
+
+		/*
+		 * Issue an VMGEXIT to the HV to consume the bytes from the
+		 * shared buffer or to have it write them into the shared buffer
+		 * depending on the instruction: OUTS or INS.
+		 */
+		sw_scratch = __pa(ghcb) + offsetof(struct ghcb, shared_buffer);
+		ghcb_set_sw_scratch(ghcb, sw_scratch);
+		ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO,
+					  exit_info_1, exit_info_2);
+		if (ret != ES_OK)
+			return ret;
+
+		/* Read bytes from shared buffer into the guest's destination. */
+		if (exit_info_1 & IOIO_TYPE_IN) {
+			ret = vc_insn_string_write(ctxt,
+						   (void *)(es_base + regs->di),
+						   ghcb->shared_buffer, io_bytes,
+						   exit_info_2, df);
+			if (ret)
+				return ret;
+
+			if (df)
+				regs->di -= exit_bytes;
+			else
+				regs->di += exit_bytes;
+		} else {
+			if (df)
+				regs->si -= exit_bytes;
+			else
+				regs->si += exit_bytes;
+		}
+
+		if (exit_info_1 & IOIO_REP)
+			regs->cx -= exit_info_2;
+
+		ret = regs->cx ? ES_RETRY : ES_OK;
+
+	} else {
+
+		/* IN/OUT into/from rAX */
+
+		int bits = (exit_info_1 & 0x70) >> 1;
+		u64 rax = 0;
+
+		if (!(exit_info_1 & IOIO_TYPE_IN))
+			rax = lower_bits(regs->ax, bits);
+
+		ghcb_set_rax(ghcb, rax);
+
+		ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, exit_info_1, 0);
+		if (ret != ES_OK)
+			return ret;
+
+		if (exit_info_1 & IOIO_TYPE_IN) {
+			if (!ghcb_rax_is_valid(ghcb))
+				return ES_VMM_ERROR;
+			regs->ax = lower_bits(ghcb->save.rax, bits);
+		}
+	}
+
+	return ret;
+}
+
+static enum es_result vc_handle_cpuid(struct ghcb *ghcb,
+				      struct es_em_ctxt *ctxt)
+{
+	struct pt_regs *regs = ctxt->regs;
+	u32 cr4 = native_read_cr4();
+	enum es_result ret;
+
+	ghcb_set_rax(ghcb, regs->ax);
+	ghcb_set_rcx(ghcb, regs->cx);
+
+	if (cr4 & X86_CR4_OSXSAVE)
+		/* Safe to read xcr0 */
+		ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK));
+	else
+		/* xgetbv will cause #GP - use reset value for xcr0 */
+		ghcb_set_xcr0(ghcb, 1);
+
+	ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0);
+	if (ret != ES_OK)
+		return ret;
+
+	if (!(ghcb_rax_is_valid(ghcb) &&
+	      ghcb_rbx_is_valid(ghcb) &&
+	      ghcb_rcx_is_valid(ghcb) &&
+	      ghcb_rdx_is_valid(ghcb)))
+		return ES_VMM_ERROR;
+
+	regs->ax = ghcb->save.rax;
+	regs->bx = ghcb->save.rbx;
+	regs->cx = ghcb->save.rcx;
+	regs->dx = ghcb->save.rdx;
+
+	return ES_OK;
+}
+
+static enum es_result vc_handle_rdtsc(struct ghcb *ghcb,
+				      struct es_em_ctxt *ctxt,
+				      unsigned long exit_code)
+{
+	bool rdtscp = (exit_code == SVM_EXIT_RDTSCP);
+	enum es_result ret;
+
+	ret = sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, 0, 0);
+	if (ret != ES_OK)
+		return ret;
+
+	if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb) &&
+	     (!rdtscp || ghcb_rcx_is_valid(ghcb))))
+		return ES_VMM_ERROR;
+
+	ctxt->regs->ax = ghcb->save.rax;
+	ctxt->regs->dx = ghcb->save.rdx;
+	if (rdtscp)
+		ctxt->regs->cx = ghcb->save.rcx;
+
+	return ES_OK;
+}
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
new file mode 100644
index 000000000000..9578c82832aa
--- /dev/null
+++ b/arch/x86/kernel/sev.c
@@ -0,0 +1,1461 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Memory Encryption Support
+ *
+ * Copyright (C) 2019 SUSE
+ *
+ * Author: Joerg Roedel <jroedel@suse.de>
+ */
+
+#define pr_fmt(fmt)	"SEV-ES: " fmt
+
+#include <linux/sched/debug.h>	/* For show_regs() */
+#include <linux/percpu-defs.h>
+#include <linux/mem_encrypt.h>
+#include <linux/lockdep.h>
+#include <linux/printk.h>
+#include <linux/mm_types.h>
+#include <linux/set_memory.h>
+#include <linux/memblock.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+
+#include <asm/cpu_entry_area.h>
+#include <asm/stacktrace.h>
+#include <asm/sev.h>
+#include <asm/insn-eval.h>
+#include <asm/fpu/internal.h>
+#include <asm/processor.h>
+#include <asm/realmode.h>
+#include <asm/traps.h>
+#include <asm/svm.h>
+#include <asm/smp.h>
+#include <asm/cpu.h>
+
+#define DR7_RESET_VALUE        0x400
+
+/* For early boot hypervisor communication in SEV-ES enabled guests */
+static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
+
+/*
+ * Needs to be in the .data section because we need it NULL before bss is
+ * cleared
+ */
+static struct ghcb __initdata *boot_ghcb;
+
+/* #VC handler runtime per-CPU data */
+struct sev_es_runtime_data {
+	struct ghcb ghcb_page;
+
+	/* Physical storage for the per-CPU IST stack of the #VC handler */
+	char ist_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE);
+
+	/*
+	 * Physical storage for the per-CPU fall-back stack of the #VC handler.
+	 * The fall-back stack is used when it is not safe to switch back to the
+	 * interrupted stack in the #VC entry code.
+	 */
+	char fallback_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE);
+
+	/*
+	 * Reserve one page per CPU as backup storage for the unencrypted GHCB.
+	 * It is needed when an NMI happens while the #VC handler uses the real
+	 * GHCB, and the NMI handler itself is causing another #VC exception. In
+	 * that case the GHCB content of the first handler needs to be backed up
+	 * and restored.
+	 */
+	struct ghcb backup_ghcb;
+
+	/*
+	 * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions.
+	 * There is no need for it to be atomic, because nothing is written to
+	 * the GHCB between the read and the write of ghcb_active. So it is safe
+	 * to use it when a nested #VC exception happens before the write.
+	 *
+	 * This is necessary for example in the #VC->NMI->#VC case when the NMI
+	 * happens while the first #VC handler uses the GHCB. When the NMI code
+	 * raises a second #VC handler it might overwrite the contents of the
+	 * GHCB written by the first handler. To avoid this the content of the
+	 * GHCB is saved and restored when the GHCB is detected to be in use
+	 * already.
+	 */
+	bool ghcb_active;
+	bool backup_ghcb_active;
+
+	/*
+	 * Cached DR7 value - write it on DR7 writes and return it on reads.
+	 * That value will never make it to the real hardware DR7 as debugging
+	 * is currently unsupported in SEV-ES guests.
+	 */
+	unsigned long dr7;
+};
+
+struct ghcb_state {
+	struct ghcb *ghcb;
+};
+
+static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
+DEFINE_STATIC_KEY_FALSE(sev_es_enable_key);
+
+/* Needed in vc_early_forward_exception */
+void do_early_exception(struct pt_regs *regs, int trapnr);
+
+static void __init setup_vc_stacks(int cpu)
+{
+	struct sev_es_runtime_data *data;
+	struct cpu_entry_area *cea;
+	unsigned long vaddr;
+	phys_addr_t pa;
+
+	data = per_cpu(runtime_data, cpu);
+	cea  = get_cpu_entry_area(cpu);
+
+	/* Map #VC IST stack */
+	vaddr = CEA_ESTACK_BOT(&cea->estacks, VC);
+	pa    = __pa(data->ist_stack);
+	cea_set_pte((void *)vaddr, pa, PAGE_KERNEL);
+
+	/* Map VC fall-back stack */
+	vaddr = CEA_ESTACK_BOT(&cea->estacks, VC2);
+	pa    = __pa(data->fallback_stack);
+	cea_set_pte((void *)vaddr, pa, PAGE_KERNEL);
+}
+
+static __always_inline bool on_vc_stack(struct pt_regs *regs)
+{
+	unsigned long sp = regs->sp;
+
+	/* User-mode RSP is not trusted */
+	if (user_mode(regs))
+		return false;
+
+	/* SYSCALL gap still has user-mode RSP */
+	if (ip_within_syscall_gap(regs))
+		return false;
+
+	return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC)));
+}
+
+/*
+ * This function handles the case when an NMI is raised in the #VC
+ * exception handler entry code, before the #VC handler has switched off
+ * its IST stack. In this case, the IST entry for #VC must be adjusted,
+ * so that any nested #VC exception will not overwrite the stack
+ * contents of the interrupted #VC handler.
+ *
+ * The IST entry is adjusted unconditionally so that it can be also be
+ * unconditionally adjusted back in __sev_es_ist_exit(). Otherwise a
+ * nested sev_es_ist_exit() call may adjust back the IST entry too
+ * early.
+ *
+ * The __sev_es_ist_enter() and __sev_es_ist_exit() functions always run
+ * on the NMI IST stack, as they are only called from NMI handling code
+ * right now.
+ */
+void noinstr __sev_es_ist_enter(struct pt_regs *regs)
+{
+	unsigned long old_ist, new_ist;
+
+	/* Read old IST entry */
+	new_ist = old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
+
+	/*
+	 * If NMI happened while on the #VC IST stack, set the new IST
+	 * value below regs->sp, so that the interrupted stack frame is
+	 * not overwritten by subsequent #VC exceptions.
+	 */
+	if (on_vc_stack(regs))
+		new_ist = regs->sp;
+
+	/*
+	 * Reserve additional 8 bytes and store old IST value so this
+	 * adjustment can be unrolled in __sev_es_ist_exit().
+	 */
+	new_ist -= sizeof(old_ist);
+	*(unsigned long *)new_ist = old_ist;
+
+	/* Set new IST entry */
+	this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist);
+}
+
+void noinstr __sev_es_ist_exit(void)
+{
+	unsigned long ist;
+
+	/* Read IST entry */
+	ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
+
+	if (WARN_ON(ist == __this_cpu_ist_top_va(VC)))
+		return;
+
+	/* Read back old IST entry and write it to the TSS */
+	this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist);
+}
+
+static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state)
+{
+	struct sev_es_runtime_data *data;
+	struct ghcb *ghcb;
+
+	data = this_cpu_read(runtime_data);
+	ghcb = &data->ghcb_page;
+
+	if (unlikely(data->ghcb_active)) {
+		/* GHCB is already in use - save its contents */
+
+		if (unlikely(data->backup_ghcb_active))
+			return NULL;
+
+		/* Mark backup_ghcb active before writing to it */
+		data->backup_ghcb_active = true;
+
+		state->ghcb = &data->backup_ghcb;
+
+		/* Backup GHCB content */
+		*state->ghcb = *ghcb;
+	} else {
+		state->ghcb = NULL;
+		data->ghcb_active = true;
+	}
+
+	return ghcb;
+}
+
+static __always_inline void sev_es_put_ghcb(struct ghcb_state *state)
+{
+	struct sev_es_runtime_data *data;
+	struct ghcb *ghcb;
+
+	data = this_cpu_read(runtime_data);
+	ghcb = &data->ghcb_page;
+
+	if (state->ghcb) {
+		/* Restore GHCB from Backup */
+		*ghcb = *state->ghcb;
+		data->backup_ghcb_active = false;
+		state->ghcb = NULL;
+	} else {
+		data->ghcb_active = false;
+	}
+}
+
+/* Needed in vc_early_forward_exception */
+void do_early_exception(struct pt_regs *regs, int trapnr);
+
+static inline u64 sev_es_rd_ghcb_msr(void)
+{
+	return __rdmsr(MSR_AMD64_SEV_ES_GHCB);
+}
+
+static __always_inline void sev_es_wr_ghcb_msr(u64 val)
+{
+	u32 low, high;
+
+	low  = (u32)(val);
+	high = (u32)(val >> 32);
+
+	native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high);
+}
+
+static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt,
+				unsigned char *buffer)
+{
+	return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
+}
+
+static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt)
+{
+	char buffer[MAX_INSN_SIZE];
+	int res;
+
+	res = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
+	if (!res) {
+		ctxt->fi.vector     = X86_TRAP_PF;
+		ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
+		ctxt->fi.cr2        = ctxt->regs->ip;
+		return ES_EXCEPTION;
+	}
+
+	if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, res))
+		return ES_DECODE_FAILED;
+
+	if (ctxt->insn.immediate.got)
+		return ES_OK;
+	else
+		return ES_DECODE_FAILED;
+}
+
+static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt)
+{
+	char buffer[MAX_INSN_SIZE];
+	int res, ret;
+
+	res = vc_fetch_insn_kernel(ctxt, buffer);
+	if (res) {
+		ctxt->fi.vector     = X86_TRAP_PF;
+		ctxt->fi.error_code = X86_PF_INSTR;
+		ctxt->fi.cr2        = ctxt->regs->ip;
+		return ES_EXCEPTION;
+	}
+
+	ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64);
+	if (ret < 0)
+		return ES_DECODE_FAILED;
+	else
+		return ES_OK;
+}
+
+static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
+{
+	if (user_mode(ctxt->regs))
+		return __vc_decode_user_insn(ctxt);
+	else
+		return __vc_decode_kern_insn(ctxt);
+}
+
+static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
+				   char *dst, char *buf, size_t size)
+{
+	unsigned long error_code = X86_PF_PROT | X86_PF_WRITE;
+	char __user *target = (char __user *)dst;
+	u64 d8;
+	u32 d4;
+	u16 d2;
+	u8  d1;
+
+	/* If instruction ran in kernel mode and the I/O buffer is in kernel space */
+	if (!user_mode(ctxt->regs) && !access_ok(target, size)) {
+		memcpy(dst, buf, size);
+		return ES_OK;
+	}
+
+	switch (size) {
+	case 1:
+		memcpy(&d1, buf, 1);
+		if (put_user(d1, target))
+			goto fault;
+		break;
+	case 2:
+		memcpy(&d2, buf, 2);
+		if (put_user(d2, target))
+			goto fault;
+		break;
+	case 4:
+		memcpy(&d4, buf, 4);
+		if (put_user(d4, target))
+			goto fault;
+		break;
+	case 8:
+		memcpy(&d8, buf, 8);
+		if (put_user(d8, target))
+			goto fault;
+		break;
+	default:
+		WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
+		return ES_UNSUPPORTED;
+	}
+
+	return ES_OK;
+
+fault:
+	if (user_mode(ctxt->regs))
+		error_code |= X86_PF_USER;
+
+	ctxt->fi.vector = X86_TRAP_PF;
+	ctxt->fi.error_code = error_code;
+	ctxt->fi.cr2 = (unsigned long)dst;
+
+	return ES_EXCEPTION;
+}
+
+static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
+				  char *src, char *buf, size_t size)
+{
+	unsigned long error_code = X86_PF_PROT;
+	char __user *s = (char __user *)src;
+	u64 d8;
+	u32 d4;
+	u16 d2;
+	u8  d1;
+
+	/* If instruction ran in kernel mode and the I/O buffer is in kernel space */
+	if (!user_mode(ctxt->regs) && !access_ok(s, size)) {
+		memcpy(buf, src, size);
+		return ES_OK;
+	}
+
+	switch (size) {
+	case 1:
+		if (get_user(d1, s))
+			goto fault;
+		memcpy(buf, &d1, 1);
+		break;
+	case 2:
+		if (get_user(d2, s))
+			goto fault;
+		memcpy(buf, &d2, 2);
+		break;
+	case 4:
+		if (get_user(d4, s))
+			goto fault;
+		memcpy(buf, &d4, 4);
+		break;
+	case 8:
+		if (get_user(d8, s))
+			goto fault;
+		memcpy(buf, &d8, 8);
+		break;
+	default:
+		WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
+		return ES_UNSUPPORTED;
+	}
+
+	return ES_OK;
+
+fault:
+	if (user_mode(ctxt->regs))
+		error_code |= X86_PF_USER;
+
+	ctxt->fi.vector = X86_TRAP_PF;
+	ctxt->fi.error_code = error_code;
+	ctxt->fi.cr2 = (unsigned long)src;
+
+	return ES_EXCEPTION;
+}
+
+static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
+					   unsigned long vaddr, phys_addr_t *paddr)
+{
+	unsigned long va = (unsigned long)vaddr;
+	unsigned int level;
+	phys_addr_t pa;
+	pgd_t *pgd;
+	pte_t *pte;
+
+	pgd = __va(read_cr3_pa());
+	pgd = &pgd[pgd_index(va)];
+	pte = lookup_address_in_pgd(pgd, va, &level);
+	if (!pte) {
+		ctxt->fi.vector     = X86_TRAP_PF;
+		ctxt->fi.cr2        = vaddr;
+		ctxt->fi.error_code = 0;
+
+		if (user_mode(ctxt->regs))
+			ctxt->fi.error_code |= X86_PF_USER;
+
+		return ES_EXCEPTION;
+	}
+
+	if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC))
+		/* Emulated MMIO to/from encrypted memory not supported */
+		return ES_UNSUPPORTED;
+
+	pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
+	pa |= va & ~page_level_mask(level);
+
+	*paddr = pa;
+
+	return ES_OK;
+}
+
+/* Include code shared with pre-decompression boot stage */
+#include "sev-shared.c"
+
+void noinstr __sev_es_nmi_complete(void)
+{
+	struct ghcb_state state;
+	struct ghcb *ghcb;
+
+	ghcb = sev_es_get_ghcb(&state);
+
+	vc_ghcb_invalidate(ghcb);
+	ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE);
+	ghcb_set_sw_exit_info_1(ghcb, 0);
+	ghcb_set_sw_exit_info_2(ghcb, 0);
+
+	sev_es_wr_ghcb_msr(__pa_nodebug(ghcb));
+	VMGEXIT();
+
+	sev_es_put_ghcb(&state);
+}
+
+static u64 get_jump_table_addr(void)
+{
+	struct ghcb_state state;
+	unsigned long flags;
+	struct ghcb *ghcb;
+	u64 ret = 0;
+
+	local_irq_save(flags);
+
+	ghcb = sev_es_get_ghcb(&state);
+
+	vc_ghcb_invalidate(ghcb);
+	ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE);
+	ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE);
+	ghcb_set_sw_exit_info_2(ghcb, 0);
+
+	sev_es_wr_ghcb_msr(__pa(ghcb));
+	VMGEXIT();
+
+	if (ghcb_sw_exit_info_1_is_valid(ghcb) &&
+	    ghcb_sw_exit_info_2_is_valid(ghcb))
+		ret = ghcb->save.sw_exit_info_2;
+
+	sev_es_put_ghcb(&state);
+
+	local_irq_restore(flags);
+
+	return ret;
+}
+
+int sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
+{
+	u16 startup_cs, startup_ip;
+	phys_addr_t jump_table_pa;
+	u64 jump_table_addr;
+	u16 __iomem *jump_table;
+
+	jump_table_addr = get_jump_table_addr();
+
+	/* On UP guests there is no jump table so this is not a failure */
+	if (!jump_table_addr)
+		return 0;
+
+	/* Check if AP Jump Table is page-aligned */
+	if (jump_table_addr & ~PAGE_MASK)
+		return -EINVAL;
+
+	jump_table_pa = jump_table_addr & PAGE_MASK;
+
+	startup_cs = (u16)(rmh->trampoline_start >> 4);
+	startup_ip = (u16)(rmh->sev_es_trampoline_start -
+			   rmh->trampoline_start);
+
+	jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE);
+	if (!jump_table)
+		return -EIO;
+
+	writew(startup_ip, &jump_table[0]);
+	writew(startup_cs, &jump_table[1]);
+
+	iounmap(jump_table);
+
+	return 0;
+}
+
+/*
+ * This is needed by the OVMF UEFI firmware which will use whatever it finds in
+ * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu
+ * runtime GHCBs used by the kernel are also mapped in the EFI page-table.
+ */
+int __init sev_es_efi_map_ghcbs(pgd_t *pgd)
+{
+	struct sev_es_runtime_data *data;
+	unsigned long address, pflags;
+	int cpu;
+	u64 pfn;
+
+	if (!sev_es_active())
+		return 0;
+
+	pflags = _PAGE_NX | _PAGE_RW;
+
+	for_each_possible_cpu(cpu) {
+		data = per_cpu(runtime_data, cpu);
+
+		address = __pa(&data->ghcb_page);
+		pfn = address >> PAGE_SHIFT;
+
+		if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags))
+			return 1;
+	}
+
+	return 0;
+}
+
+static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
+{
+	struct pt_regs *regs = ctxt->regs;
+	enum es_result ret;
+	u64 exit_info_1;
+
+	/* Is it a WRMSR? */
+	exit_info_1 = (ctxt->insn.opcode.bytes[1] == 0x30) ? 1 : 0;
+
+	ghcb_set_rcx(ghcb, regs->cx);
+	if (exit_info_1) {
+		ghcb_set_rax(ghcb, regs->ax);
+		ghcb_set_rdx(ghcb, regs->dx);
+	}
+
+	ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, exit_info_1, 0);
+
+	if ((ret == ES_OK) && (!exit_info_1)) {
+		regs->ax = ghcb->save.rax;
+		regs->dx = ghcb->save.rdx;
+	}
+
+	return ret;
+}
+
+/*
+ * This function runs on the first #VC exception after the kernel
+ * switched to virtual addresses.
+ */
+static bool __init sev_es_setup_ghcb(void)
+{
+	/* First make sure the hypervisor talks a supported protocol. */
+	if (!sev_es_negotiate_protocol())
+		return false;
+
+	/*
+	 * Clear the boot_ghcb. The first exception comes in before the bss
+	 * section is cleared.
+	 */
+	memset(&boot_ghcb_page, 0, PAGE_SIZE);
+
+	/* Alright - Make the boot-ghcb public */
+	boot_ghcb = &boot_ghcb_page;
+
+	return true;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void sev_es_ap_hlt_loop(void)
+{
+	struct ghcb_state state;
+	struct ghcb *ghcb;
+
+	ghcb = sev_es_get_ghcb(&state);
+
+	while (true) {
+		vc_ghcb_invalidate(ghcb);
+		ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP);
+		ghcb_set_sw_exit_info_1(ghcb, 0);
+		ghcb_set_sw_exit_info_2(ghcb, 0);
+
+		sev_es_wr_ghcb_msr(__pa(ghcb));
+		VMGEXIT();
+
+		/* Wakeup signal? */
+		if (ghcb_sw_exit_info_2_is_valid(ghcb) &&
+		    ghcb->save.sw_exit_info_2)
+			break;
+	}
+
+	sev_es_put_ghcb(&state);
+}
+
+/*
+ * Play_dead handler when running under SEV-ES. This is needed because
+ * the hypervisor can't deliver an SIPI request to restart the AP.
+ * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the
+ * hypervisor wakes it up again.
+ */
+static void sev_es_play_dead(void)
+{
+	play_dead_common();
+
+	/* IRQs now disabled */
+
+	sev_es_ap_hlt_loop();
+
+	/*
+	 * If we get here, the VCPU was woken up again. Jump to CPU
+	 * startup code to get it back online.
+	 */
+	start_cpu0();
+}
+#else  /* CONFIG_HOTPLUG_CPU */
+#define sev_es_play_dead	native_play_dead
+#endif /* CONFIG_HOTPLUG_CPU */
+
+#ifdef CONFIG_SMP
+static void __init sev_es_setup_play_dead(void)
+{
+	smp_ops.play_dead = sev_es_play_dead;
+}
+#else
+static inline void sev_es_setup_play_dead(void) { }
+#endif
+
+static void __init alloc_runtime_data(int cpu)
+{
+	struct sev_es_runtime_data *data;
+
+	data = memblock_alloc(sizeof(*data), PAGE_SIZE);
+	if (!data)
+		panic("Can't allocate SEV-ES runtime data");
+
+	per_cpu(runtime_data, cpu) = data;
+}
+
+static void __init init_ghcb(int cpu)
+{
+	struct sev_es_runtime_data *data;
+	int err;
+
+	data = per_cpu(runtime_data, cpu);
+
+	err = early_set_memory_decrypted((unsigned long)&data->ghcb_page,
+					 sizeof(data->ghcb_page));
+	if (err)
+		panic("Can't map GHCBs unencrypted");
+
+	memset(&data->ghcb_page, 0, sizeof(data->ghcb_page));
+
+	data->ghcb_active = false;
+	data->backup_ghcb_active = false;
+}
+
+void __init sev_es_init_vc_handling(void)
+{
+	int cpu;
+
+	BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE);
+
+	if (!sev_es_active())
+		return;
+
+	if (!sev_es_check_cpu_features())
+		panic("SEV-ES CPU Features missing");
+
+	/* Enable SEV-ES special handling */
+	static_branch_enable(&sev_es_enable_key);
+
+	/* Initialize per-cpu GHCB pages */
+	for_each_possible_cpu(cpu) {
+		alloc_runtime_data(cpu);
+		init_ghcb(cpu);
+		setup_vc_stacks(cpu);
+	}
+
+	sev_es_setup_play_dead();
+
+	/* Secondary CPUs use the runtime #VC handler */
+	initial_vc_handler = (unsigned long)safe_stack_exc_vmm_communication;
+}
+
+static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
+{
+	int trapnr = ctxt->fi.vector;
+
+	if (trapnr == X86_TRAP_PF)
+		native_write_cr2(ctxt->fi.cr2);
+
+	ctxt->regs->orig_ax = ctxt->fi.error_code;
+	do_early_exception(ctxt->regs, trapnr);
+}
+
+static long *vc_insn_get_reg(struct es_em_ctxt *ctxt)
+{
+	long *reg_array;
+	int offset;
+
+	reg_array = (long *)ctxt->regs;
+	offset    = insn_get_modrm_reg_off(&ctxt->insn, ctxt->regs);
+
+	if (offset < 0)
+		return NULL;
+
+	offset /= sizeof(long);
+
+	return reg_array + offset;
+}
+
+static long *vc_insn_get_rm(struct es_em_ctxt *ctxt)
+{
+	long *reg_array;
+	int offset;
+
+	reg_array = (long *)ctxt->regs;
+	offset    = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs);
+
+	if (offset < 0)
+		return NULL;
+
+	offset /= sizeof(long);
+
+	return reg_array + offset;
+}
+static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
+				 unsigned int bytes, bool read)
+{
+	u64 exit_code, exit_info_1, exit_info_2;
+	unsigned long ghcb_pa = __pa(ghcb);
+	enum es_result res;
+	phys_addr_t paddr;
+	void __user *ref;
+
+	ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs);
+	if (ref == (void __user *)-1L)
+		return ES_UNSUPPORTED;
+
+	exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE;
+
+	res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr);
+	if (res != ES_OK) {
+		if (res == ES_EXCEPTION && !read)
+			ctxt->fi.error_code |= X86_PF_WRITE;
+
+		return res;
+	}
+
+	exit_info_1 = paddr;
+	/* Can never be greater than 8 */
+	exit_info_2 = bytes;
+
+	ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer));
+
+	return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2);
+}
+
+static enum es_result vc_handle_mmio_twobyte_ops(struct ghcb *ghcb,
+						 struct es_em_ctxt *ctxt)
+{
+	struct insn *insn = &ctxt->insn;
+	unsigned int bytes = 0;
+	enum es_result ret;
+	int sign_byte;
+	long *reg_data;
+
+	switch (insn->opcode.bytes[1]) {
+		/* MMIO Read w/ zero-extension */
+	case 0xb6:
+		bytes = 1;
+		fallthrough;
+	case 0xb7:
+		if (!bytes)
+			bytes = 2;
+
+		ret = vc_do_mmio(ghcb, ctxt, bytes, true);
+		if (ret)
+			break;
+
+		/* Zero extend based on operand size */
+		reg_data = vc_insn_get_reg(ctxt);
+		if (!reg_data)
+			return ES_DECODE_FAILED;
+
+		memset(reg_data, 0, insn->opnd_bytes);
+
+		memcpy(reg_data, ghcb->shared_buffer, bytes);
+		break;
+
+		/* MMIO Read w/ sign-extension */
+	case 0xbe:
+		bytes = 1;
+		fallthrough;
+	case 0xbf:
+		if (!bytes)
+			bytes = 2;
+
+		ret = vc_do_mmio(ghcb, ctxt, bytes, true);
+		if (ret)
+			break;
+
+		/* Sign extend based on operand size */
+		reg_data = vc_insn_get_reg(ctxt);
+		if (!reg_data)
+			return ES_DECODE_FAILED;
+
+		if (bytes == 1) {
+			u8 *val = (u8 *)ghcb->shared_buffer;
+
+			sign_byte = (*val & 0x80) ? 0xff : 0x00;
+		} else {
+			u16 *val = (u16 *)ghcb->shared_buffer;
+
+			sign_byte = (*val & 0x8000) ? 0xff : 0x00;
+		}
+		memset(reg_data, sign_byte, insn->opnd_bytes);
+
+		memcpy(reg_data, ghcb->shared_buffer, bytes);
+		break;
+
+	default:
+		ret = ES_UNSUPPORTED;
+	}
+
+	return ret;
+}
+
+/*
+ * The MOVS instruction has two memory operands, which raises the
+ * problem that it is not known whether the access to the source or the
+ * destination caused the #VC exception (and hence whether an MMIO read
+ * or write operation needs to be emulated).
+ *
+ * Instead of playing games with walking page-tables and trying to guess
+ * whether the source or destination is an MMIO range, split the move
+ * into two operations, a read and a write with only one memory operand.
+ * This will cause a nested #VC exception on the MMIO address which can
+ * then be handled.
+ *
+ * This implementation has the benefit that it also supports MOVS where
+ * source _and_ destination are MMIO regions.
+ *
+ * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a
+ * rare operation. If it turns out to be a performance problem the split
+ * operations can be moved to memcpy_fromio() and memcpy_toio().
+ */
+static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt,
+					  unsigned int bytes)
+{
+	unsigned long ds_base, es_base;
+	unsigned char *src, *dst;
+	unsigned char buffer[8];
+	enum es_result ret;
+	bool rep;
+	int off;
+
+	ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS);
+	es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES);
+
+	if (ds_base == -1L || es_base == -1L) {
+		ctxt->fi.vector = X86_TRAP_GP;
+		ctxt->fi.error_code = 0;
+		return ES_EXCEPTION;
+	}
+
+	src = ds_base + (unsigned char *)ctxt->regs->si;
+	dst = es_base + (unsigned char *)ctxt->regs->di;
+
+	ret = vc_read_mem(ctxt, src, buffer, bytes);
+	if (ret != ES_OK)
+		return ret;
+
+	ret = vc_write_mem(ctxt, dst, buffer, bytes);
+	if (ret != ES_OK)
+		return ret;
+
+	if (ctxt->regs->flags & X86_EFLAGS_DF)
+		off = -bytes;
+	else
+		off =  bytes;
+
+	ctxt->regs->si += off;
+	ctxt->regs->di += off;
+
+	rep = insn_has_rep_prefix(&ctxt->insn);
+	if (rep)
+		ctxt->regs->cx -= 1;
+
+	if (!rep || ctxt->regs->cx == 0)
+		return ES_OK;
+	else
+		return ES_RETRY;
+}
+
+static enum es_result vc_handle_mmio(struct ghcb *ghcb,
+				     struct es_em_ctxt *ctxt)
+{
+	struct insn *insn = &ctxt->insn;
+	unsigned int bytes = 0;
+	enum es_result ret;
+	long *reg_data;
+
+	switch (insn->opcode.bytes[0]) {
+	/* MMIO Write */
+	case 0x88:
+		bytes = 1;
+		fallthrough;
+	case 0x89:
+		if (!bytes)
+			bytes = insn->opnd_bytes;
+
+		reg_data = vc_insn_get_reg(ctxt);
+		if (!reg_data)
+			return ES_DECODE_FAILED;
+
+		memcpy(ghcb->shared_buffer, reg_data, bytes);
+
+		ret = vc_do_mmio(ghcb, ctxt, bytes, false);
+		break;
+
+	case 0xc6:
+		bytes = 1;
+		fallthrough;
+	case 0xc7:
+		if (!bytes)
+			bytes = insn->opnd_bytes;
+
+		memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes);
+
+		ret = vc_do_mmio(ghcb, ctxt, bytes, false);
+		break;
+
+		/* MMIO Read */
+	case 0x8a:
+		bytes = 1;
+		fallthrough;
+	case 0x8b:
+		if (!bytes)
+			bytes = insn->opnd_bytes;
+
+		ret = vc_do_mmio(ghcb, ctxt, bytes, true);
+		if (ret)
+			break;
+
+		reg_data = vc_insn_get_reg(ctxt);
+		if (!reg_data)
+			return ES_DECODE_FAILED;
+
+		/* Zero-extend for 32-bit operation */
+		if (bytes == 4)
+			*reg_data = 0;
+
+		memcpy(reg_data, ghcb->shared_buffer, bytes);
+		break;
+
+		/* MOVS instruction */
+	case 0xa4:
+		bytes = 1;
+		fallthrough;
+	case 0xa5:
+		if (!bytes)
+			bytes = insn->opnd_bytes;
+
+		ret = vc_handle_mmio_movs(ctxt, bytes);
+		break;
+		/* Two-Byte Opcodes */
+	case 0x0f:
+		ret = vc_handle_mmio_twobyte_ops(ghcb, ctxt);
+		break;
+	default:
+		ret = ES_UNSUPPORTED;
+	}
+
+	return ret;
+}
+
+static enum es_result vc_handle_dr7_write(struct ghcb *ghcb,
+					  struct es_em_ctxt *ctxt)
+{
+	struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
+	long val, *reg = vc_insn_get_rm(ctxt);
+	enum es_result ret;
+
+	if (!reg)
+		return ES_DECODE_FAILED;
+
+	val = *reg;
+
+	/* Upper 32 bits must be written as zeroes */
+	if (val >> 32) {
+		ctxt->fi.vector = X86_TRAP_GP;
+		ctxt->fi.error_code = 0;
+		return ES_EXCEPTION;
+	}
+
+	/* Clear out other reserved bits and set bit 10 */
+	val = (val & 0xffff23ffL) | BIT(10);
+
+	/* Early non-zero writes to DR7 are not supported */
+	if (!data && (val & ~DR7_RESET_VALUE))
+		return ES_UNSUPPORTED;
+
+	/* Using a value of 0 for ExitInfo1 means RAX holds the value */
+	ghcb_set_rax(ghcb, val);
+	ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0);
+	if (ret != ES_OK)
+		return ret;
+
+	if (data)
+		data->dr7 = val;
+
+	return ES_OK;
+}
+
+static enum es_result vc_handle_dr7_read(struct ghcb *ghcb,
+					 struct es_em_ctxt *ctxt)
+{
+	struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
+	long *reg = vc_insn_get_rm(ctxt);
+
+	if (!reg)
+		return ES_DECODE_FAILED;
+
+	if (data)
+		*reg = data->dr7;
+	else
+		*reg = DR7_RESET_VALUE;
+
+	return ES_OK;
+}
+
+static enum es_result vc_handle_wbinvd(struct ghcb *ghcb,
+				       struct es_em_ctxt *ctxt)
+{
+	return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0);
+}
+
+static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
+{
+	enum es_result ret;
+
+	ghcb_set_rcx(ghcb, ctxt->regs->cx);
+
+	ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0);
+	if (ret != ES_OK)
+		return ret;
+
+	if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb)))
+		return ES_VMM_ERROR;
+
+	ctxt->regs->ax = ghcb->save.rax;
+	ctxt->regs->dx = ghcb->save.rdx;
+
+	return ES_OK;
+}
+
+static enum es_result vc_handle_monitor(struct ghcb *ghcb,
+					struct es_em_ctxt *ctxt)
+{
+	/*
+	 * Treat it as a NOP and do not leak a physical address to the
+	 * hypervisor.
+	 */
+	return ES_OK;
+}
+
+static enum es_result vc_handle_mwait(struct ghcb *ghcb,
+				      struct es_em_ctxt *ctxt)
+{
+	/* Treat the same as MONITOR/MONITORX */
+	return ES_OK;
+}
+
+static enum es_result vc_handle_vmmcall(struct ghcb *ghcb,
+					struct es_em_ctxt *ctxt)
+{
+	enum es_result ret;
+
+	ghcb_set_rax(ghcb, ctxt->regs->ax);
+	ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0);
+
+	if (x86_platform.hyper.sev_es_hcall_prepare)
+		x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs);
+
+	ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0);
+	if (ret != ES_OK)
+		return ret;
+
+	if (!ghcb_rax_is_valid(ghcb))
+		return ES_VMM_ERROR;
+
+	ctxt->regs->ax = ghcb->save.rax;
+
+	/*
+	 * Call sev_es_hcall_finish() after regs->ax is already set.
+	 * This allows the hypervisor handler to overwrite it again if
+	 * necessary.
+	 */
+	if (x86_platform.hyper.sev_es_hcall_finish &&
+	    !x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs))
+		return ES_VMM_ERROR;
+
+	return ES_OK;
+}
+
+static enum es_result vc_handle_trap_ac(struct ghcb *ghcb,
+					struct es_em_ctxt *ctxt)
+{
+	/*
+	 * Calling ecx_alignment_check() directly does not work, because it
+	 * enables IRQs and the GHCB is active. Forward the exception and call
+	 * it later from vc_forward_exception().
+	 */
+	ctxt->fi.vector = X86_TRAP_AC;
+	ctxt->fi.error_code = 0;
+	return ES_EXCEPTION;
+}
+
+static __always_inline void vc_handle_trap_db(struct pt_regs *regs)
+{
+	if (user_mode(regs))
+		noist_exc_debug(regs);
+	else
+		exc_debug(regs);
+}
+
+static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
+					 struct ghcb *ghcb,
+					 unsigned long exit_code)
+{
+	enum es_result result;
+
+	switch (exit_code) {
+	case SVM_EXIT_READ_DR7:
+		result = vc_handle_dr7_read(ghcb, ctxt);
+		break;
+	case SVM_EXIT_WRITE_DR7:
+		result = vc_handle_dr7_write(ghcb, ctxt);
+		break;
+	case SVM_EXIT_EXCP_BASE + X86_TRAP_AC:
+		result = vc_handle_trap_ac(ghcb, ctxt);
+		break;
+	case SVM_EXIT_RDTSC:
+	case SVM_EXIT_RDTSCP:
+		result = vc_handle_rdtsc(ghcb, ctxt, exit_code);
+		break;
+	case SVM_EXIT_RDPMC:
+		result = vc_handle_rdpmc(ghcb, ctxt);
+		break;
+	case SVM_EXIT_INVD:
+		pr_err_ratelimited("#VC exception for INVD??? Seriously???\n");
+		result = ES_UNSUPPORTED;
+		break;
+	case SVM_EXIT_CPUID:
+		result = vc_handle_cpuid(ghcb, ctxt);
+		break;
+	case SVM_EXIT_IOIO:
+		result = vc_handle_ioio(ghcb, ctxt);
+		break;
+	case SVM_EXIT_MSR:
+		result = vc_handle_msr(ghcb, ctxt);
+		break;
+	case SVM_EXIT_VMMCALL:
+		result = vc_handle_vmmcall(ghcb, ctxt);
+		break;
+	case SVM_EXIT_WBINVD:
+		result = vc_handle_wbinvd(ghcb, ctxt);
+		break;
+	case SVM_EXIT_MONITOR:
+		result = vc_handle_monitor(ghcb, ctxt);
+		break;
+	case SVM_EXIT_MWAIT:
+		result = vc_handle_mwait(ghcb, ctxt);
+		break;
+	case SVM_EXIT_NPF:
+		result = vc_handle_mmio(ghcb, ctxt);
+		break;
+	default:
+		/*
+		 * Unexpected #VC exception
+		 */
+		result = ES_UNSUPPORTED;
+	}
+
+	return result;
+}
+
+static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt)
+{
+	long error_code = ctxt->fi.error_code;
+	int trapnr = ctxt->fi.vector;
+
+	ctxt->regs->orig_ax = ctxt->fi.error_code;
+
+	switch (trapnr) {
+	case X86_TRAP_GP:
+		exc_general_protection(ctxt->regs, error_code);
+		break;
+	case X86_TRAP_UD:
+		exc_invalid_op(ctxt->regs);
+		break;
+	case X86_TRAP_AC:
+		exc_alignment_check(ctxt->regs, error_code);
+		break;
+	default:
+		pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n");
+		BUG();
+	}
+}
+
+static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs)
+{
+	unsigned long sp = (unsigned long)regs;
+
+	return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2));
+}
+
+/*
+ * Main #VC exception handler. It is called when the entry code was able to
+ * switch off the IST to a safe kernel stack.
+ *
+ * With the current implementation it is always possible to switch to a safe
+ * stack because #VC exceptions only happen at known places, like intercepted
+ * instructions or accesses to MMIO areas/IO ports. They can also happen with
+ * code instrumentation when the hypervisor intercepts #DB, but the critical
+ * paths are forbidden to be instrumented, so #DB exceptions currently also
+ * only happen in safe places.
+ */
+DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
+{
+	struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
+	irqentry_state_t irq_state;
+	struct ghcb_state state;
+	struct es_em_ctxt ctxt;
+	enum es_result result;
+	struct ghcb *ghcb;
+
+	/*
+	 * Handle #DB before calling into !noinstr code to avoid recursive #DB.
+	 */
+	if (error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB) {
+		vc_handle_trap_db(regs);
+		return;
+	}
+
+	irq_state = irqentry_nmi_enter(regs);
+	lockdep_assert_irqs_disabled();
+	instrumentation_begin();
+
+	/*
+	 * This is invoked through an interrupt gate, so IRQs are disabled. The
+	 * code below might walk page-tables for user or kernel addresses, so
+	 * keep the IRQs disabled to protect us against concurrent TLB flushes.
+	 */
+
+	ghcb = sev_es_get_ghcb(&state);
+	if (!ghcb) {
+		/*
+		 * Mark GHCBs inactive so that panic() is able to print the
+		 * message.
+		 */
+		data->ghcb_active        = false;
+		data->backup_ghcb_active = false;
+
+		panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
+	}
+
+	vc_ghcb_invalidate(ghcb);
+	result = vc_init_em_ctxt(&ctxt, regs, error_code);
+
+	if (result == ES_OK)
+		result = vc_handle_exitcode(&ctxt, ghcb, error_code);
+
+	sev_es_put_ghcb(&state);
+
+	/* Done - now check the result */
+	switch (result) {
+	case ES_OK:
+		vc_finish_insn(&ctxt);
+		break;
+	case ES_UNSUPPORTED:
+		pr_err_ratelimited("Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n",
+				   error_code, regs->ip);
+		goto fail;
+	case ES_VMM_ERROR:
+		pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
+				   error_code, regs->ip);
+		goto fail;
+	case ES_DECODE_FAILED:
+		pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
+				   error_code, regs->ip);
+		goto fail;
+	case ES_EXCEPTION:
+		vc_forward_exception(&ctxt);
+		break;
+	case ES_RETRY:
+		/* Nothing to do */
+		break;
+	default:
+		pr_emerg("Unknown result in %s():%d\n", __func__, result);
+		/*
+		 * Emulating the instruction which caused the #VC exception
+		 * failed - can't continue so print debug information
+		 */
+		BUG();
+	}
+
+out:
+	instrumentation_end();
+	irqentry_nmi_exit(regs, irq_state);
+
+	return;
+
+fail:
+	if (user_mode(regs)) {
+		/*
+		 * Do not kill the machine if user-space triggered the
+		 * exception. Send SIGBUS instead and let user-space deal with
+		 * it.
+		 */
+		force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
+	} else {
+		pr_emerg("PANIC: Unhandled #VC exception in kernel space (result=%d)\n",
+			 result);
+
+		/* Show some debug info */
+		show_regs(regs);
+
+		/* Ask hypervisor to sev_es_terminate */
+		sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
+
+		/* If that fails and we get here - just panic */
+		panic("Returned from Terminate-Request to Hypervisor\n");
+	}
+
+	goto out;
+}
+
+/* This handler runs on the #VC fall-back stack. It can cause further #VC exceptions */
+DEFINE_IDTENTRY_VC_IST(exc_vmm_communication)
+{
+	instrumentation_begin();
+	panic("Can't handle #VC exception from unsupported context\n");
+	instrumentation_end();
+}
+
+DEFINE_IDTENTRY_VC(exc_vmm_communication)
+{
+	if (likely(!on_vc_fallback_stack(regs)))
+		safe_stack_exc_vmm_communication(regs, error_code);
+	else
+		ist_exc_vmm_communication(regs, error_code);
+}
+
+bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
+{
+	unsigned long exit_code = regs->orig_ax;
+	struct es_em_ctxt ctxt;
+	enum es_result result;
+
+	/* Do initial setup or terminate the guest */
+	if (unlikely(boot_ghcb == NULL && !sev_es_setup_ghcb()))
+		sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
+
+	vc_ghcb_invalidate(boot_ghcb);
+
+	result = vc_init_em_ctxt(&ctxt, regs, exit_code);
+	if (result == ES_OK)
+		result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code);
+
+	/* Done - now check the result */
+	switch (result) {
+	case ES_OK:
+		vc_finish_insn(&ctxt);
+		break;
+	case ES_UNSUPPORTED:
+		early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n",
+				exit_code, regs->ip);
+		goto fail;
+	case ES_VMM_ERROR:
+		early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
+				exit_code, regs->ip);
+		goto fail;
+	case ES_DECODE_FAILED:
+		early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
+				exit_code, regs->ip);
+		goto fail;
+	case ES_EXCEPTION:
+		vc_early_forward_exception(&ctxt);
+		break;
+	case ES_RETRY:
+		/* Nothing to do */
+		break;
+	default:
+		BUG();
+	}
+
+	return true;
+
+fail:
+	show_regs(regs);
+
+	while (true)
+		halt();
+}
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index b93d6cd08a7f..121921b2927c 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -5,7 +5,7 @@
 #include <xen/xen.h>
 
 #include <asm/fpu/internal.h>
-#include <asm/sev-es.h>
+#include <asm/sev.h>
 #include <asm/traps.h>
 #include <asm/kdebug.h>
 
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index df7b5477fc4f..7515e78ef898 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -47,7 +47,7 @@
 #include <asm/realmode.h>
 #include <asm/time.h>
 #include <asm/pgalloc.h>
-#include <asm/sev-es.h>
+#include <asm/sev.h>
 
 /*
  * We allocate runtime services regions top-down, starting from -4G, i.e.
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 1be71ef5e4c4..2e1c1bec0f9e 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -9,7 +9,7 @@
 #include <asm/realmode.h>
 #include <asm/tlbflush.h>
 #include <asm/crash.h>
-#include <asm/sev-es.h>
+#include <asm/sev.h>
 
 struct real_mode_header *real_mode_header;
 u32 *trampoline_cr4_features;
-- 
cgit v1.2.3


From b81fc74d53d1248de6db3136dd6b29e5d5528021 Mon Sep 17 00:00:00 2001
From: Brijesh Singh <brijesh.singh@amd.com>
Date: Tue, 27 Apr 2021 06:16:35 -0500
Subject: x86/sev: Move GHCB MSR protocol and NAE definitions in a common
 header

The guest and the hypervisor contain separate macros to get and set
the GHCB MSR protocol and NAE event fields. Consolidate the GHCB
protocol definitions and helper macros in one place.

Leave the supported protocol version define in separate files to keep
the guest and hypervisor flexibility to support different GHCB version
in the same release.

There is no functional change intended.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Joerg Roedel <jroedel@suse.de>
Link: https://lkml.kernel.org/r/20210427111636.1207-3-brijesh.singh@amd.com
---
 arch/x86/include/asm/sev-common.h | 62 +++++++++++++++++++++++++++++++++++++++
 arch/x86/include/asm/sev.h        | 30 +++----------------
 arch/x86/kernel/sev-shared.c      | 20 ++++++-------
 arch/x86/kvm/svm/svm.h            | 38 +++---------------------
 4 files changed, 80 insertions(+), 70 deletions(-)
 create mode 100644 arch/x86/include/asm/sev-common.h

diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
new file mode 100644
index 000000000000..629c3df243f0
--- /dev/null
+++ b/arch/x86/include/asm/sev-common.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AMD SEV header common between the guest and the hypervisor.
+ *
+ * Author: Brijesh Singh <brijesh.singh@amd.com>
+ */
+
+#ifndef __ASM_X86_SEV_COMMON_H
+#define __ASM_X86_SEV_COMMON_H
+
+#define GHCB_MSR_INFO_POS		0
+#define GHCB_MSR_INFO_MASK		(BIT_ULL(12) - 1)
+
+#define GHCB_MSR_SEV_INFO_RESP		0x001
+#define GHCB_MSR_SEV_INFO_REQ		0x002
+#define GHCB_MSR_VER_MAX_POS		48
+#define GHCB_MSR_VER_MAX_MASK		0xffff
+#define GHCB_MSR_VER_MIN_POS		32
+#define GHCB_MSR_VER_MIN_MASK		0xffff
+#define GHCB_MSR_CBIT_POS		24
+#define GHCB_MSR_CBIT_MASK		0xff
+#define GHCB_MSR_SEV_INFO(_max, _min, _cbit)				\
+	((((_max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) |	\
+	 (((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) |	\
+	 (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) |	\
+	 GHCB_MSR_SEV_INFO_RESP)
+#define GHCB_MSR_INFO(v)		((v) & 0xfffUL)
+#define GHCB_MSR_PROTO_MAX(v)		(((v) >> GHCB_MSR_VER_MAX_POS) & GHCB_MSR_VER_MAX_MASK)
+#define GHCB_MSR_PROTO_MIN(v)		(((v) >> GHCB_MSR_VER_MIN_POS) & GHCB_MSR_VER_MIN_MASK)
+
+#define GHCB_MSR_CPUID_REQ		0x004
+#define GHCB_MSR_CPUID_RESP		0x005
+#define GHCB_MSR_CPUID_FUNC_POS		32
+#define GHCB_MSR_CPUID_FUNC_MASK	0xffffffff
+#define GHCB_MSR_CPUID_VALUE_POS	32
+#define GHCB_MSR_CPUID_VALUE_MASK	0xffffffff
+#define GHCB_MSR_CPUID_REG_POS		30
+#define GHCB_MSR_CPUID_REG_MASK		0x3
+#define GHCB_CPUID_REQ_EAX		0
+#define GHCB_CPUID_REQ_EBX		1
+#define GHCB_CPUID_REQ_ECX		2
+#define GHCB_CPUID_REQ_EDX		3
+#define GHCB_CPUID_REQ(fn, reg)		\
+		(GHCB_MSR_CPUID_REQ | \
+		(((unsigned long)reg & GHCB_MSR_CPUID_REG_MASK) << GHCB_MSR_CPUID_REG_POS) | \
+		(((unsigned long)fn) << GHCB_MSR_CPUID_FUNC_POS))
+
+#define GHCB_MSR_TERM_REQ		0x100
+#define GHCB_MSR_TERM_REASON_SET_POS	12
+#define GHCB_MSR_TERM_REASON_SET_MASK	0xf
+#define GHCB_MSR_TERM_REASON_POS	16
+#define GHCB_MSR_TERM_REASON_MASK	0xff
+#define GHCB_SEV_TERM_REASON(reason_set, reason_val)						  \
+	(((((u64)reason_set) &  GHCB_MSR_TERM_REASON_SET_MASK) << GHCB_MSR_TERM_REASON_SET_POS) | \
+	((((u64)reason_val) & GHCB_MSR_TERM_REASON_MASK) << GHCB_MSR_TERM_REASON_POS))
+
+#define GHCB_SEV_ES_REASON_GENERAL_REQUEST	0
+#define GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED	1
+
+#define GHCB_RESP_CODE(v)		((v) & GHCB_MSR_INFO_MASK)
+
+#endif
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index cf1d957c7091..fa5cd05d3b5b 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -10,34 +10,12 @@
 
 #include <linux/types.h>
 #include <asm/insn.h>
+#include <asm/sev-common.h>
 
-#define GHCB_SEV_INFO		0x001UL
-#define GHCB_SEV_INFO_REQ	0x002UL
-#define		GHCB_INFO(v)		((v) & 0xfffUL)
-#define		GHCB_PROTO_MAX(v)	(((v) >> 48) & 0xffffUL)
-#define		GHCB_PROTO_MIN(v)	(((v) >> 32) & 0xffffUL)
-#define		GHCB_PROTO_OUR		0x0001UL
-#define GHCB_SEV_CPUID_REQ	0x004UL
-#define		GHCB_CPUID_REQ_EAX	0
-#define		GHCB_CPUID_REQ_EBX	1
-#define		GHCB_CPUID_REQ_ECX	2
-#define		GHCB_CPUID_REQ_EDX	3
-#define		GHCB_CPUID_REQ(fn, reg) (GHCB_SEV_CPUID_REQ | \
-					(((unsigned long)reg & 3) << 30) | \
-					(((unsigned long)fn) << 32))
+#define GHCB_PROTO_OUR		0x0001UL
+#define GHCB_PROTOCOL_MAX	1ULL
+#define GHCB_DEFAULT_USAGE	0ULL
 
-#define	GHCB_PROTOCOL_MAX	0x0001UL
-#define GHCB_DEFAULT_USAGE	0x0000UL
-
-#define GHCB_SEV_CPUID_RESP	0x005UL
-#define GHCB_SEV_TERMINATE	0x100UL
-#define		GHCB_SEV_TERMINATE_REASON(reason_set, reason_val)	\
-			(((((u64)reason_set) &  0x7) << 12) |		\
-			 ((((u64)reason_val) & 0xff) << 16))
-#define		GHCB_SEV_ES_REASON_GENERAL_REQUEST	0
-#define		GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED	1
-
-#define	GHCB_SEV_GHCB_RESP_CODE(v)	((v) & 0xfff)
 #define	VMGEXIT()			{ asm volatile("rep; vmmcall\n\r"); }
 
 enum es_result {
diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
index 0aa9f13efd57..6ec8b3bfd76e 100644
--- a/arch/x86/kernel/sev-shared.c
+++ b/arch/x86/kernel/sev-shared.c
@@ -26,13 +26,13 @@ static bool __init sev_es_check_cpu_features(void)
 
 static void __noreturn sev_es_terminate(unsigned int reason)
 {
-	u64 val = GHCB_SEV_TERMINATE;
+	u64 val = GHCB_MSR_TERM_REQ;
 
 	/*
 	 * Tell the hypervisor what went wrong - only reason-set 0 is
 	 * currently supported.
 	 */
-	val |= GHCB_SEV_TERMINATE_REASON(0, reason);
+	val |= GHCB_SEV_TERM_REASON(0, reason);
 
 	/* Request Guest Termination from Hypvervisor */
 	sev_es_wr_ghcb_msr(val);
@@ -47,15 +47,15 @@ static bool sev_es_negotiate_protocol(void)
 	u64 val;
 
 	/* Do the GHCB protocol version negotiation */
-	sev_es_wr_ghcb_msr(GHCB_SEV_INFO_REQ);
+	sev_es_wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ);
 	VMGEXIT();
 	val = sev_es_rd_ghcb_msr();
 
-	if (GHCB_INFO(val) != GHCB_SEV_INFO)
+	if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP)
 		return false;
 
-	if (GHCB_PROTO_MAX(val) < GHCB_PROTO_OUR ||
-	    GHCB_PROTO_MIN(val) > GHCB_PROTO_OUR)
+	if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTO_OUR ||
+	    GHCB_MSR_PROTO_MIN(val) > GHCB_PROTO_OUR)
 		return false;
 
 	return true;
@@ -153,28 +153,28 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
 	sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EAX));
 	VMGEXIT();
 	val = sev_es_rd_ghcb_msr();
-	if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
+	if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
 		goto fail;
 	regs->ax = val >> 32;
 
 	sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EBX));
 	VMGEXIT();
 	val = sev_es_rd_ghcb_msr();
-	if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
+	if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
 		goto fail;
 	regs->bx = val >> 32;
 
 	sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_ECX));
 	VMGEXIT();
 	val = sev_es_rd_ghcb_msr();
-	if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
+	if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
 		goto fail;
 	regs->cx = val >> 32;
 
 	sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EDX));
 	VMGEXIT();
 	val = sev_es_rd_ghcb_msr();
-	if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
+	if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
 		goto fail;
 	regs->dx = val >> 32;
 
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 84b3133c2251..42f8a7b9048f 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -20,6 +20,7 @@
 #include <linux/bits.h>
 
 #include <asm/svm.h>
+#include <asm/sev-common.h>
 
 #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
 
@@ -525,40 +526,9 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu);
 
 /* sev.c */
 
-#define GHCB_VERSION_MAX		1ULL
-#define GHCB_VERSION_MIN		1ULL
-
-#define GHCB_MSR_INFO_POS		0
-#define GHCB_MSR_INFO_MASK		(BIT_ULL(12) - 1)
-
-#define GHCB_MSR_SEV_INFO_RESP		0x001
-#define GHCB_MSR_SEV_INFO_REQ		0x002
-#define GHCB_MSR_VER_MAX_POS		48
-#define GHCB_MSR_VER_MAX_MASK		0xffff
-#define GHCB_MSR_VER_MIN_POS		32
-#define GHCB_MSR_VER_MIN_MASK		0xffff
-#define GHCB_MSR_CBIT_POS		24
-#define GHCB_MSR_CBIT_MASK		0xff
-#define GHCB_MSR_SEV_INFO(_max, _min, _cbit)				\
-	((((_max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) |	\
-	 (((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) |	\
-	 (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) |	\
-	 GHCB_MSR_SEV_INFO_RESP)
-
-#define GHCB_MSR_CPUID_REQ		0x004
-#define GHCB_MSR_CPUID_RESP		0x005
-#define GHCB_MSR_CPUID_FUNC_POS		32
-#define GHCB_MSR_CPUID_FUNC_MASK	0xffffffff
-#define GHCB_MSR_CPUID_VALUE_POS	32
-#define GHCB_MSR_CPUID_VALUE_MASK	0xffffffff
-#define GHCB_MSR_CPUID_REG_POS		30
-#define GHCB_MSR_CPUID_REG_MASK		0x3
-
-#define GHCB_MSR_TERM_REQ		0x100
-#define GHCB_MSR_TERM_REASON_SET_POS	12
-#define GHCB_MSR_TERM_REASON_SET_MASK	0xf
-#define GHCB_MSR_TERM_REASON_POS	16
-#define GHCB_MSR_TERM_REASON_MASK	0xff
+#define GHCB_VERSION_MAX	1ULL
+#define GHCB_VERSION_MIN	1ULL
+
 
 extern unsigned int max_sev_asid;
 
-- 
cgit v1.2.3


From 059e5c321a65657877924256ea8ad9c0df257b45 Mon Sep 17 00:00:00 2001
From: Brijesh Singh <brijesh.singh@amd.com>
Date: Tue, 27 Apr 2021 06:16:36 -0500
Subject: x86/msr: Rename MSR_K8_SYSCFG to MSR_AMD64_SYSCFG

The SYSCFG MSR continued being updated beyond the K8 family; drop the K8
name from it.

Suggested-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Joerg Roedel <jroedel@suse.de>
Link: https://lkml.kernel.org/r/20210427111636.1207-4-brijesh.singh@amd.com
---
 Documentation/virt/kvm/amd-memory-encryption.rst | 2 +-
 Documentation/x86/amd-memory-encryption.rst      | 6 +++---
 arch/x86/include/asm/msr-index.h                 | 6 +++---
 arch/x86/kernel/cpu/amd.c                        | 4 ++--
 arch/x86/kernel/cpu/mtrr/cleanup.c               | 2 +-
 arch/x86/kernel/cpu/mtrr/generic.c               | 4 ++--
 arch/x86/kernel/mmconf-fam10h_64.c               | 2 +-
 arch/x86/kvm/svm/svm.c                           | 4 ++--
 arch/x86/kvm/x86.c                               | 2 +-
 arch/x86/mm/mem_encrypt_identity.c               | 6 +++---
 arch/x86/pci/amd_bus.c                           | 2 +-
 arch/x86/realmode/rm/trampoline_64.S             | 4 ++--
 drivers/edac/amd64_edac.c                        | 2 +-
 tools/arch/x86/include/asm/msr-index.h           | 6 +++---
 14 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/Documentation/virt/kvm/amd-memory-encryption.rst b/Documentation/virt/kvm/amd-memory-encryption.rst
index 5ec8a1902e15..5c081c8c7164 100644
--- a/Documentation/virt/kvm/amd-memory-encryption.rst
+++ b/Documentation/virt/kvm/amd-memory-encryption.rst
@@ -22,7 +22,7 @@ to SEV::
 		  [ecx]:
 			Bits[31:0]  Number of encrypted guests supported simultaneously
 
-If support for SEV is present, MSR 0xc001_0010 (MSR_K8_SYSCFG) and MSR 0xc001_0015
+If support for SEV is present, MSR 0xc001_0010 (MSR_AMD64_SYSCFG) and MSR 0xc001_0015
 (MSR_K7_HWCR) can be used to determine if it can be enabled::
 
 	0xc001_0010:
diff --git a/Documentation/x86/amd-memory-encryption.rst b/Documentation/x86/amd-memory-encryption.rst
index c48d452d0718..a1940ebe7be5 100644
--- a/Documentation/x86/amd-memory-encryption.rst
+++ b/Documentation/x86/amd-memory-encryption.rst
@@ -53,7 +53,7 @@ CPUID function 0x8000001f reports information related to SME::
 			   system physical addresses, not guest physical
 			   addresses)
 
-If support for SME is present, MSR 0xc00100010 (MSR_K8_SYSCFG) can be used to
+If support for SME is present, MSR 0xc00100010 (MSR_AMD64_SYSCFG) can be used to
 determine if SME is enabled and/or to enable memory encryption::
 
 	0xc0010010:
@@ -79,7 +79,7 @@ The state of SME in the Linux kernel can be documented as follows:
 	  The CPU supports SME (determined through CPUID instruction).
 
 	- Enabled:
-	  Supported and bit 23 of MSR_K8_SYSCFG is set.
+	  Supported and bit 23 of MSR_AMD64_SYSCFG is set.
 
 	- Active:
 	  Supported, Enabled and the Linux kernel is actively applying
@@ -89,7 +89,7 @@ The state of SME in the Linux kernel can be documented as follows:
 SME can also be enabled and activated in the BIOS. If SME is enabled and
 activated in the BIOS, then all memory accesses will be encrypted and it will
 not be necessary to activate the Linux memory encryption support.  If the BIOS
-merely enables SME (sets bit 23 of the MSR_K8_SYSCFG), then Linux can activate
+merely enables SME (sets bit 23 of the MSR_AMD64_SYSCFG), then Linux can activate
 memory encryption by default (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=y) or
 by supplying mem_encrypt=on on the kernel command line.  However, if BIOS does
 not enable SME, then Linux will not be able to activate memory encryption, even
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 742d89a00721..211ba3375ee9 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -537,9 +537,9 @@
 /* K8 MSRs */
 #define MSR_K8_TOP_MEM1			0xc001001a
 #define MSR_K8_TOP_MEM2			0xc001001d
-#define MSR_K8_SYSCFG			0xc0010010
-#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT	23
-#define MSR_K8_SYSCFG_MEM_ENCRYPT	BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT)
+#define MSR_AMD64_SYSCFG		0xc0010010
+#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT	23
+#define MSR_AMD64_SYSCFG_MEM_ENCRYPT	BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT)
 #define MSR_K8_INT_PENDING_MSG		0xc0010055
 /* C1E active bits in int pending message */
 #define K8_INTP_C1E_ACTIVE_MASK		0x18000000
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 2d11384dc9ab..0adb0341cd7c 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -593,8 +593,8 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
 	 */
 	if (cpu_has(c, X86_FEATURE_SME) || cpu_has(c, X86_FEATURE_SEV)) {
 		/* Check if memory encryption is enabled */
-		rdmsrl(MSR_K8_SYSCFG, msr);
-		if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+		rdmsrl(MSR_AMD64_SYSCFG, msr);
+		if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
 			goto clear_all;
 
 		/*
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 0c3b372318b7..b5f43049fa5f 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -836,7 +836,7 @@ int __init amd_special_default_mtrr(void)
 	if (boot_cpu_data.x86 < 0xf)
 		return 0;
 	/* In case some hypervisor doesn't pass SYSCFG through: */
-	if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
+	if (rdmsr_safe(MSR_AMD64_SYSCFG, &l, &h) < 0)
 		return 0;
 	/*
 	 * Memory between 4GB and top of mem is forced WB by this magic bit.
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index b90f3f437765..558108296f3c 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -53,13 +53,13 @@ static inline void k8_check_syscfg_dram_mod_en(void)
 	      (boot_cpu_data.x86 >= 0x0f)))
 		return;
 
-	rdmsr(MSR_K8_SYSCFG, lo, hi);
+	rdmsr(MSR_AMD64_SYSCFG, lo, hi);
 	if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) {
 		pr_err(FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]"
 		       " not cleared by BIOS, clearing this bit\n",
 		       smp_processor_id());
 		lo &= ~K8_MTRRFIXRANGE_DRAM_MODIFY;
-		mtrr_wrmsr(MSR_K8_SYSCFG, lo, hi);
+		mtrr_wrmsr(MSR_AMD64_SYSCFG, lo, hi);
 	}
 }
 
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index b5cb49e57df8..c94dec6a1834 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -95,7 +95,7 @@ static void get_fam10h_pci_mmconf_base(void)
 		return;
 
 	/* SYS_CFG */
-	address = MSR_K8_SYSCFG;
+	address = MSR_AMD64_SYSCFG;
 	rdmsrl(address, val);
 
 	/* TOP_MEM2 is not enabled? */
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index b649f92287a2..433e8e4fb3a6 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -858,8 +858,8 @@ static __init void svm_adjust_mmio_mask(void)
 		return;
 
 	/* If memory encryption is not enabled, use existing mask */
-	rdmsrl(MSR_K8_SYSCFG, msr);
-	if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+	rdmsrl(MSR_AMD64_SYSCFG, msr);
+	if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
 		return;
 
 	enc_bit = cpuid_ebx(0x8000001f) & 0x3f;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6eda2834fc05..853c40e89335 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3402,7 +3402,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_LASTBRANCHTOIP:
 	case MSR_IA32_LASTINTFROMIP:
 	case MSR_IA32_LASTINTTOIP:
-	case MSR_K8_SYSCFG:
+	case MSR_AMD64_SYSCFG:
 	case MSR_K8_TSEG_ADDR:
 	case MSR_K8_TSEG_MASK:
 	case MSR_VM_HSAVE_PA:
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index 04aba7e80a36..a9639f663d25 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -529,7 +529,7 @@ void __init sme_enable(struct boot_params *bp)
 		/*
 		 * No SME if Hypervisor bit is set. This check is here to
 		 * prevent a guest from trying to enable SME. For running as a
-		 * KVM guest the MSR_K8_SYSCFG will be sufficient, but there
+		 * KVM guest the MSR_AMD64_SYSCFG will be sufficient, but there
 		 * might be other hypervisors which emulate that MSR as non-zero
 		 * or even pass it through to the guest.
 		 * A malicious hypervisor can still trick a guest into this
@@ -542,8 +542,8 @@ void __init sme_enable(struct boot_params *bp)
 			return;
 
 		/* For SME, check the SYSCFG MSR */
-		msr = __rdmsr(MSR_K8_SYSCFG);
-		if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+		msr = __rdmsr(MSR_AMD64_SYSCFG);
+		if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
 			return;
 	} else {
 		/* SEV state cannot be controlled by a command line option */
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index ae744b6a0785..dd40d3fea74e 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -284,7 +284,7 @@ static int __init early_root_info_init(void)
 
 	/* need to take out [4G, TOM2) for RAM*/
 	/* SYS_CFG */
-	address = MSR_K8_SYSCFG;
+	address = MSR_AMD64_SYSCFG;
 	rdmsrl(address, val);
 	/* TOP_MEM2 is enabled? */
 	if (val & (1<<21)) {
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index 84c5d1b33d10..cc8391f86cdb 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -123,9 +123,9 @@ SYM_CODE_START(startup_32)
 	 */
 	btl	$TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags
 	jnc	.Ldone
-	movl	$MSR_K8_SYSCFG, %ecx
+	movl	$MSR_AMD64_SYSCFG, %ecx
 	rdmsr
-	bts	$MSR_K8_SYSCFG_MEM_ENCRYPT_BIT, %eax
+	bts	$MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT, %eax
 	jc	.Ldone
 
 	/*
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 9fa4dfc6ebee..f0d8f60acee1 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -3083,7 +3083,7 @@ static void read_mc_regs(struct amd64_pvt *pvt)
 	edac_dbg(0, "  TOP_MEM:  0x%016llx\n", pvt->top_mem);
 
 	/* Check first whether TOP_MEM2 is enabled: */
-	rdmsrl(MSR_K8_SYSCFG, msr_val);
+	rdmsrl(MSR_AMD64_SYSCFG, msr_val);
 	if (msr_val & BIT(21)) {
 		rdmsrl(MSR_K8_TOP_MEM2, pvt->top_mem2);
 		edac_dbg(0, "  TOP_MEM2: 0x%016llx\n", pvt->top_mem2);
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 45029354e0a8..c60b09e7602f 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -533,9 +533,9 @@
 /* K8 MSRs */
 #define MSR_K8_TOP_MEM1			0xc001001a
 #define MSR_K8_TOP_MEM2			0xc001001d
-#define MSR_K8_SYSCFG			0xc0010010
-#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT	23
-#define MSR_K8_SYSCFG_MEM_ENCRYPT	BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT)
+#define MSR_AMD64_SYSCFG		0xc0010010
+#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT	23
+#define MSR_AMD64_SYSCFG_MEM_ENCRYPT	BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT)
 #define MSR_K8_INT_PENDING_MSG		0xc0010055
 /* C1E active bits in int pending message */
 #define K8_INTP_C1E_ACTIVE_MASK		0x18000000
-- 
cgit v1.2.3


From 970655aa9b42461f8394e4457307005bdeee14d9 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 23 Apr 2021 07:40:38 +0200
Subject: xen/gntdev: fix gntdev_mmap() error exit path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit d3eeb1d77c5d0af ("xen/gntdev: use mmu_interval_notifier_insert")
introduced an error in gntdev_mmap(): in case the call of
mmu_interval_notifier_insert_locked() fails the exit path should not
call mmu_interval_notifier_remove(), as this might result in NULL
dereferences.

One reason for failure is e.g. a signal pending for the running
process.

Fixes: d3eeb1d77c5d0af ("xen/gntdev: use mmu_interval_notifier_insert")
Cc: stable@vger.kernel.org
Reported-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
Tested-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Luca Fancellu <luca.fancellu@arm.com>
Link: https://lore.kernel.org/r/20210423054038.26696-1-jgross@suse.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/gntdev.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index f01d58c7a042..a3e7be96527d 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -1017,8 +1017,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 		err = mmu_interval_notifier_insert_locked(
 			&map->notifier, vma->vm_mm, vma->vm_start,
 			vma->vm_end - vma->vm_start, &gntdev_mmu_ops);
-		if (err)
+		if (err) {
+			map->vma = NULL;
 			goto out_unlock_put;
+		}
 	}
 	mutex_unlock(&priv->lock);
 
-- 
cgit v1.2.3


From dbc03e81586fc33e4945263fd6e09e22eb4b980f Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Sat, 8 May 2021 10:19:13 +0800
Subject: xen/unpopulated-alloc: fix error return code in fill_list()

Fix to return a negative error code from the error handling case instead
of 0, as done elsewhere in this function.

Fixes: a4574f63edc6 ("mm/memremap_pages: convert to 'struct range'")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20210508021913.1727-1-thunder.leizhen@huawei.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/unpopulated-alloc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/xen/unpopulated-alloc.c b/drivers/xen/unpopulated-alloc.c
index e64e6befc63b..87e6b7db892f 100644
--- a/drivers/xen/unpopulated-alloc.c
+++ b/drivers/xen/unpopulated-alloc.c
@@ -39,8 +39,10 @@ static int fill_list(unsigned int nr_pages)
 	}
 
 	pgmap = kzalloc(sizeof(*pgmap), GFP_KERNEL);
-	if (!pgmap)
+	if (!pgmap) {
+		ret = -ENOMEM;
 		goto err_pgmap;
+	}
 
 	pgmap->type = MEMORY_DEVICE_GENERIC;
 	pgmap->range = (struct range) {
-- 
cgit v1.2.3


From 0c6c2d3615efb7c292573f2e6c886929a2b2da6c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 28 Apr 2021 13:12:31 +0100
Subject: arm64: Generate cpucaps.h

The arm64 code allocates an internal constant to every CPU feature it can
detect, distinct from the public hwcap numbers we use to expose some
features to userspace. Currently this is maintained manually which is an
irritating source of conflicts when working on new features, to avoid this
replace the header with a simple text file listing the names we've assigned
and sort it to minimise conflicts.

As part of doing this we also do the Kbuild hookup required to hook up
an arch tools directory and to generate header files in there.

This will result in a renumbering and reordering of the existing constants,
since they are all internal only the values should not be important. The
reordering will impact the order in which some steps in enumeration handle
features but the algorithm is not intended to depend on this and I haven't
seen any issues when testing. Due to the UAO cpucap having been removed in
the past we end up with ARM64_NCAPS being 1 smaller than it was before.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210428121231.11219-1-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Makefile              |  3 ++
 arch/arm64/include/asm/Kbuild    |  2 ++
 arch/arm64/include/asm/cpucaps.h | 74 ----------------------------------------
 arch/arm64/tools/Makefile        | 22 ++++++++++++
 arch/arm64/tools/cpucaps         | 65 +++++++++++++++++++++++++++++++++++
 arch/arm64/tools/gen-cpucaps.awk | 40 ++++++++++++++++++++++
 6 files changed, 132 insertions(+), 74 deletions(-)
 delete mode 100644 arch/arm64/include/asm/cpucaps.h
 create mode 100644 arch/arm64/tools/Makefile
 create mode 100644 arch/arm64/tools/cpucaps
 create mode 100755 arch/arm64/tools/gen-cpucaps.awk

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 7ef44478560d..b52481f0605d 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -175,6 +175,9 @@ vdso_install:
 	$(if $(CONFIG_COMPAT_VDSO), \
 		$(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso32 $@)
 
+archprepare:
+	$(Q)$(MAKE) $(build)=arch/arm64/tools kapi
+
 # We use MRPROPER_FILES and CLEAN_FILES now
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 07ac208edc89..26889dbfe904 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -5,3 +5,5 @@ generic-y += qrwlock.h
 generic-y += qspinlock.h
 generic-y += set_memory.h
 generic-y += user.h
+
+generated-y += cpucaps.h
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
deleted file mode 100644
index b0c5eda0498f..000000000000
--- a/arch/arm64/include/asm/cpucaps.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * arch/arm64/include/asm/cpucaps.h
- *
- * Copyright (C) 2016 ARM Ltd.
- */
-#ifndef __ASM_CPUCAPS_H
-#define __ASM_CPUCAPS_H
-
-#define ARM64_WORKAROUND_CLEAN_CACHE		0
-#define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE	1
-#define ARM64_WORKAROUND_845719			2
-#define ARM64_HAS_SYSREG_GIC_CPUIF		3
-#define ARM64_HAS_PAN				4
-#define ARM64_HAS_LSE_ATOMICS			5
-#define ARM64_WORKAROUND_CAVIUM_23154		6
-#define ARM64_WORKAROUND_834220			7
-#define ARM64_HAS_NO_HW_PREFETCH		8
-#define ARM64_HAS_VIRT_HOST_EXTN		11
-#define ARM64_WORKAROUND_CAVIUM_27456		12
-#define ARM64_HAS_32BIT_EL0			13
-#define ARM64_SPECTRE_V3A			14
-#define ARM64_HAS_CNP				15
-#define ARM64_HAS_NO_FPSIMD			16
-#define ARM64_WORKAROUND_REPEAT_TLBI		17
-#define ARM64_WORKAROUND_QCOM_FALKOR_E1003	18
-#define ARM64_WORKAROUND_858921			19
-#define ARM64_WORKAROUND_CAVIUM_30115		20
-#define ARM64_HAS_DCPOP				21
-#define ARM64_SVE				22
-#define ARM64_UNMAP_KERNEL_AT_EL0		23
-#define ARM64_SPECTRE_V2			24
-#define ARM64_HAS_RAS_EXTN			25
-#define ARM64_WORKAROUND_843419			26
-#define ARM64_HAS_CACHE_IDC			27
-#define ARM64_HAS_CACHE_DIC			28
-#define ARM64_HW_DBM				29
-#define ARM64_SPECTRE_V4			30
-#define ARM64_MISMATCHED_CACHE_TYPE		31
-#define ARM64_HAS_STAGE2_FWB			32
-#define ARM64_HAS_CRC32				33
-#define ARM64_SSBS				34
-#define ARM64_WORKAROUND_1418040		35
-#define ARM64_HAS_SB				36
-#define ARM64_WORKAROUND_SPECULATIVE_AT		37
-#define ARM64_HAS_ADDRESS_AUTH_ARCH		38
-#define ARM64_HAS_ADDRESS_AUTH_IMP_DEF		39
-#define ARM64_HAS_GENERIC_AUTH_ARCH		40
-#define ARM64_HAS_GENERIC_AUTH_IMP_DEF		41
-#define ARM64_HAS_IRQ_PRIO_MASKING		42
-#define ARM64_HAS_DCPODP			43
-#define ARM64_WORKAROUND_1463225		44
-#define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM	45
-#define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM	46
-#define ARM64_WORKAROUND_1542419		47
-#define ARM64_HAS_E0PD				48
-#define ARM64_HAS_RNG				49
-#define ARM64_HAS_AMU_EXTN			50
-#define ARM64_HAS_ADDRESS_AUTH			51
-#define ARM64_HAS_GENERIC_AUTH			52
-#define ARM64_HAS_32BIT_EL1			53
-#define ARM64_BTI				54
-#define ARM64_HAS_ARMv8_4_TTL			55
-#define ARM64_HAS_TLB_RANGE			56
-#define ARM64_MTE				57
-#define ARM64_WORKAROUND_1508412		58
-#define ARM64_HAS_LDAPR				59
-#define ARM64_KVM_PROTECTED_MODE		60
-#define ARM64_WORKAROUND_NVIDIA_CARMEL_CNP	61
-#define ARM64_HAS_EPAN				62
-
-#define ARM64_NCAPS				63
-
-#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/tools/Makefile b/arch/arm64/tools/Makefile
new file mode 100644
index 000000000000..932b4fe5c768
--- /dev/null
+++ b/arch/arm64/tools/Makefile
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: GPL-2.0
+
+gen := arch/$(ARCH)/include/generated
+kapi := $(gen)/asm
+
+kapi-hdrs-y := $(kapi)/cpucaps.h
+
+targets += $(addprefix ../../../,$(gen-y) $(kapi-hdrs-y))
+
+PHONY += kapi
+
+kapi:   $(kapi-hdrs-y) $(gen-y)
+
+# Create output directory if not already present
+_dummy := $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
+
+quiet_cmd_gen_cpucaps = GEN     $@
+      cmd_gen_cpucaps = mkdir -p $(dir $@) && \
+                     $(AWK) -f $(filter-out $(PHONY),$^) > $@
+
+$(kapi)/cpucaps.h: $(src)/gen-cpucaps.awk $(src)/cpucaps FORCE
+	$(call if_changed,gen_cpucaps)
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
new file mode 100644
index 000000000000..21fbdda7086e
--- /dev/null
+++ b/arch/arm64/tools/cpucaps
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Internal CPU capabilities constants, keep this list sorted
+
+BTI
+HAS_32BIT_EL0
+HAS_32BIT_EL1
+HAS_ADDRESS_AUTH
+HAS_ADDRESS_AUTH_ARCH
+HAS_ADDRESS_AUTH_IMP_DEF
+HAS_AMU_EXTN
+HAS_ARMv8_4_TTL
+HAS_CACHE_DIC
+HAS_CACHE_IDC
+HAS_CNP
+HAS_CRC32
+HAS_DCPODP
+HAS_DCPOP
+HAS_E0PD
+HAS_EPAN
+HAS_GENERIC_AUTH
+HAS_GENERIC_AUTH_ARCH
+HAS_GENERIC_AUTH_IMP_DEF
+HAS_IRQ_PRIO_MASKING
+HAS_LDAPR
+HAS_LSE_ATOMICS
+HAS_NO_FPSIMD
+HAS_NO_HW_PREFETCH
+HAS_PAN
+HAS_RAS_EXTN
+HAS_RNG
+HAS_SB
+HAS_STAGE2_FWB
+HAS_SYSREG_GIC_CPUIF
+HAS_TLB_RANGE
+HAS_VIRT_HOST_EXTN
+HW_DBM
+KVM_PROTECTED_MODE
+MISMATCHED_CACHE_TYPE
+MTE
+SPECTRE_V2
+SPECTRE_V3A
+SPECTRE_V4
+SSBS
+SVE
+UNMAP_KERNEL_AT_EL0
+WORKAROUND_834220
+WORKAROUND_843419
+WORKAROUND_845719
+WORKAROUND_858921
+WORKAROUND_1418040
+WORKAROUND_1463225
+WORKAROUND_1508412
+WORKAROUND_1542419
+WORKAROUND_CAVIUM_23154
+WORKAROUND_CAVIUM_27456
+WORKAROUND_CAVIUM_30115
+WORKAROUND_CAVIUM_TX2_219_PRFM
+WORKAROUND_CAVIUM_TX2_219_TVM
+WORKAROUND_CLEAN_CACHE
+WORKAROUND_DEVICE_LOAD_ACQUIRE
+WORKAROUND_NVIDIA_CARMEL_CNP
+WORKAROUND_QCOM_FALKOR_E1003
+WORKAROUND_REPEAT_TLBI
+WORKAROUND_SPECULATIVE_AT
diff --git a/arch/arm64/tools/gen-cpucaps.awk b/arch/arm64/tools/gen-cpucaps.awk
new file mode 100755
index 000000000000..18737a1ce044
--- /dev/null
+++ b/arch/arm64/tools/gen-cpucaps.awk
@@ -0,0 +1,40 @@
+#!/bin/awk -f
+# SPDX-License-Identifier: GPL-2.0
+# gen-cpucaps.awk: arm64 cpucaps header generator
+#
+# Usage: awk -f gen-cpucaps.awk cpucaps.txt
+
+# Log an error and terminate
+function fatal(msg) {
+	print "Error at line " NR ": " msg > "/dev/stderr"
+	exit 1
+}
+
+# skip blank lines and comment lines
+/^$/ { next }
+/^#/ { next }
+
+BEGIN {
+	print "#ifndef __ASM_CPUCAPS_H"
+	print "#define __ASM_CPUCAPS_H"
+	print ""
+	print "/* Generated file - do not edit */"
+	cap_num = 0
+	print ""
+}
+
+/^[vA-Z0-9_]+$/ {
+	printf("#define ARM64_%-30s\t%d\n", $0, cap_num++)
+	next
+}
+
+END {
+	printf("#define ARM64_NCAPS\t\t\t\t%d\n", cap_num)
+	print ""
+	print "#endif"
+}
+
+# Any lines not handled by previous rules are unexpected
+{
+	fatal("unhandled statement")
+}
-- 
cgit v1.2.3


From a1bed090fc56e6e24517d96bc076595544fb5317 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Fri, 7 May 2021 17:25:42 +0100
Subject: kselftest/arm64: Add missing stddef.h include to BTI tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Explicitly include stddef.h when building the BTI tests so that we have
a definition of NULL, with at least some toolchains this is not done
implicitly by anything else:

test.c: In function ‘start’:
test.c:214:25: error: ‘NULL’ undeclared (first use in this function)
  214 |  sigaction(SIGILL, &sa, NULL);
      |                         ^~~~
test.c:20:1: note: ‘NULL’ is defined in header ‘<stddef.h>’; did you forget to ‘#include <stddef.h>’?

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210507162542.23149-1-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/bti/test.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/arm64/bti/test.c b/tools/testing/selftests/arm64/bti/test.c
index 656b04976ccc..67b77ab83c20 100644
--- a/tools/testing/selftests/arm64/bti/test.c
+++ b/tools/testing/selftests/arm64/bti/test.c
@@ -6,6 +6,7 @@
 
 #include "system.h"
 
+#include <stddef.h>
 #include <linux/errno.h>
 #include <linux/auxvec.h>
 #include <linux/signal.h>
-- 
cgit v1.2.3


From e5af36b2adb858e982d78d41d7363d05d951a19a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 21 Apr 2021 19:40:56 +0200
Subject: cpufreq: intel_pstate: Use HWP if enabled by platform firmware

It turns out that there are systems where HWP is enabled during
initialization by the platform firmware (BIOS), but HWP EPP support
is not advertised.

After commit 7aa1031223bc ("cpufreq: intel_pstate: Avoid enabling HWP
if EPP is not supported") intel_pstate refuses to use HWP on those
systems, but the fallback PERF_CTL interface does not work on them
either because of enabled HWP, and once enabled, HWP cannot be
disabled.  Consequently, the users of those systems cannot control
CPU performance scaling.

Address this issue by making intel_pstate use HWP unconditionally if
it is enabled already when the driver starts.

Fixes: 7aa1031223bc ("cpufreq: intel_pstate: Avoid enabling HWP if EPP is not supported")
Reported-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Tested-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: 5.9+ <stable@vger.kernel.org> # 5.9+
---
 drivers/cpufreq/intel_pstate.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index f0401064d7aa..0e69dffd5a76 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -3033,6 +3033,14 @@ static const struct x86_cpu_id hwp_support_ids[] __initconst = {
 	{}
 };
 
+static bool intel_pstate_hwp_is_enabled(void)
+{
+	u64 value;
+
+	rdmsrl(MSR_PM_ENABLE, value);
+	return !!(value & 0x1);
+}
+
 static int __init intel_pstate_init(void)
 {
 	const struct x86_cpu_id *id;
@@ -3051,8 +3059,12 @@ static int __init intel_pstate_init(void)
 		 * Avoid enabling HWP for processors without EPP support,
 		 * because that means incomplete HWP implementation which is a
 		 * corner case and supporting it is generally problematic.
+		 *
+		 * If HWP is enabled already, though, there is no choice but to
+		 * deal with it.
 		 */
-		if (!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) {
+		if ((!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) ||
+		    intel_pstate_hwp_is_enabled()) {
 			hwp_active++;
 			hwp_mode_bdw = id->driver_data;
 			intel_pstate.attr = hwp_cpufreq_attrs;
-- 
cgit v1.2.3


From 680ec0549a055eb464dce6ffb4bfb736ef87236e Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 9 May 2021 21:12:27 +0200
Subject: spi: spi-fsl-dspi: Fix a resource leak in an error handling path

'dspi_request_dma()' should be undone by a 'dspi_release_dma()' call in the
error handling path of the probe function, as already done in the remove
function

Fixes: 90ba37033cb9 ("spi: spi-fsl-dspi: Add DMA support for Vybrid")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Link: https://lore.kernel.org/r/d51caaac747277a1099ba8dea07acd85435b857e.1620587472.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-fsl-dspi.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index 028736687488..fb45e6af6638 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -1375,11 +1375,13 @@ poll_mode:
 	ret = spi_register_controller(ctlr);
 	if (ret != 0) {
 		dev_err(&pdev->dev, "Problem registering DSPI ctlr\n");
-		goto out_free_irq;
+		goto out_release_dma;
 	}
 
 	return ret;
 
+out_release_dma:
+	dspi_release_dma(dspi);
 out_free_irq:
 	if (dspi->irq)
 		free_irq(dspi->irq, dspi);
-- 
cgit v1.2.3


From dc5fa590273890a8541ce6e999d606bfb2d73797 Mon Sep 17 00:00:00 2001
From: Leilk Liu <leilk.liu@mediatek.com>
Date: Sat, 8 May 2021 14:02:14 +0800
Subject: spi: take the SPI IO-mutex in the spi_set_cs_timing method

this patch takes the io_mutex to prevent an unprotected HW
register modification in the set_cs_timing callback.

Fixes: 4cea6b8cc34e ("spi: add power control when set_cs_timing")
Signed-off-by: Leilk Liu <leilk.liu@mediatek.com>
Link: https://lore.kernel.org/r/20210508060214.1485-1-leilk.liu@mediatek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index f9885c096563..a565e7d6bf3b 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -3457,9 +3457,12 @@ int spi_set_cs_timing(struct spi_device *spi, struct spi_delay *setup,
 
 	if (spi->controller->set_cs_timing &&
 	    !(spi->cs_gpiod || gpio_is_valid(spi->cs_gpio))) {
+		mutex_lock(&spi->controller->io_mutex);
+
 		if (spi->controller->auto_runtime_pm) {
 			status = pm_runtime_get_sync(parent);
 			if (status < 0) {
+				mutex_unlock(&spi->controller->io_mutex);
 				pm_runtime_put_noidle(parent);
 				dev_err(&spi->controller->dev, "Failed to power device: %d\n",
 					status);
@@ -3470,11 +3473,13 @@ int spi_set_cs_timing(struct spi_device *spi, struct spi_delay *setup,
 								hold, inactive);
 			pm_runtime_mark_last_busy(parent);
 			pm_runtime_put_autosuspend(parent);
-			return status;
 		} else {
-			return spi->controller->set_cs_timing(spi, setup, hold,
+			status = spi->controller->set_cs_timing(spi, setup, hold,
 							      inactive);
 		}
+
+		mutex_unlock(&spi->controller->io_mutex);
+		return status;
 	}
 
 	if ((setup && setup->unit == SPI_DELAY_UNIT_SCK) ||
-- 
cgit v1.2.3


From a3bc4ffeedf4693262fe7c6d133dcfcacd3d18c2 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 3 May 2021 11:48:26 -0300
Subject: tools headers UAPI: Update tools's copy of drm.h headers

Picking the changes from:

  b603e810f740e76b ("drm/uapi: document kernel capabilities")

Doesn't result in any tooling changes:

  $ tools/perf/trace/beauty/drm_ioctl.sh  > before
  $ cp include/uapi/drm/drm.h tools/include/uapi/drm/drm.h
  $ tools/perf/trace/beauty/drm_ioctl.sh  > after
  $ diff -u before after

Silencing these perf build warnings:

  Warning: Kernel ABI header at 'tools/include/uapi/drm/drm.h' differs from latest version at 'include/uapi/drm/drm.h'
  diff -u tools/include/uapi/drm/drm.h include/uapi/drm/drm.h

Cc: Simon Ser <contact@emersion.fr>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/drm/drm.h | 125 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 121 insertions(+), 4 deletions(-)

diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index 0827037c5484..67b94bc3c885 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -625,30 +625,147 @@ struct drm_gem_open {
 	__u64 size;
 };
 
+/**
+ * DRM_CAP_DUMB_BUFFER
+ *
+ * If set to 1, the driver supports creating dumb buffers via the
+ * &DRM_IOCTL_MODE_CREATE_DUMB ioctl.
+ */
 #define DRM_CAP_DUMB_BUFFER		0x1
+/**
+ * DRM_CAP_VBLANK_HIGH_CRTC
+ *
+ * If set to 1, the kernel supports specifying a CRTC index in the high bits of
+ * &drm_wait_vblank_request.type.
+ *
+ * Starting kernel version 2.6.39, this capability is always set to 1.
+ */
 #define DRM_CAP_VBLANK_HIGH_CRTC	0x2
+/**
+ * DRM_CAP_DUMB_PREFERRED_DEPTH
+ *
+ * The preferred bit depth for dumb buffers.
+ *
+ * The bit depth is the number of bits used to indicate the color of a single
+ * pixel excluding any padding. This is different from the number of bits per
+ * pixel. For instance, XRGB8888 has a bit depth of 24 but has 32 bits per
+ * pixel.
+ *
+ * Note that this preference only applies to dumb buffers, it's irrelevant for
+ * other types of buffers.
+ */
 #define DRM_CAP_DUMB_PREFERRED_DEPTH	0x3
+/**
+ * DRM_CAP_DUMB_PREFER_SHADOW
+ *
+ * If set to 1, the driver prefers userspace to render to a shadow buffer
+ * instead of directly rendering to a dumb buffer. For best speed, userspace
+ * should do streaming ordered memory copies into the dumb buffer and never
+ * read from it.
+ *
+ * Note that this preference only applies to dumb buffers, it's irrelevant for
+ * other types of buffers.
+ */
 #define DRM_CAP_DUMB_PREFER_SHADOW	0x4
+/**
+ * DRM_CAP_PRIME
+ *
+ * Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT
+ * and &DRM_PRIME_CAP_EXPORT.
+ *
+ * PRIME buffers are exposed as dma-buf file descriptors. See
+ * Documentation/gpu/drm-mm.rst, section "PRIME Buffer Sharing".
+ */
 #define DRM_CAP_PRIME			0x5
+/**
+ * DRM_PRIME_CAP_IMPORT
+ *
+ * If this bit is set in &DRM_CAP_PRIME, the driver supports importing PRIME
+ * buffers via the &DRM_IOCTL_PRIME_FD_TO_HANDLE ioctl.
+ */
 #define  DRM_PRIME_CAP_IMPORT		0x1
+/**
+ * DRM_PRIME_CAP_EXPORT
+ *
+ * If this bit is set in &DRM_CAP_PRIME, the driver supports exporting PRIME
+ * buffers via the &DRM_IOCTL_PRIME_HANDLE_TO_FD ioctl.
+ */
 #define  DRM_PRIME_CAP_EXPORT		0x2
+/**
+ * DRM_CAP_TIMESTAMP_MONOTONIC
+ *
+ * If set to 0, the kernel will report timestamps with ``CLOCK_REALTIME`` in
+ * struct drm_event_vblank. If set to 1, the kernel will report timestamps with
+ * ``CLOCK_MONOTONIC``. See ``clock_gettime(2)`` for the definition of these
+ * clocks.
+ *
+ * Starting from kernel version 2.6.39, the default value for this capability
+ * is 1. Starting kernel version 4.15, this capability is always set to 1.
+ */
 #define DRM_CAP_TIMESTAMP_MONOTONIC	0x6
+/**
+ * DRM_CAP_ASYNC_PAGE_FLIP
+ *
+ * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC.
+ */
 #define DRM_CAP_ASYNC_PAGE_FLIP		0x7
-/*
- * The CURSOR_WIDTH and CURSOR_HEIGHT capabilities return a valid widthxheight
- * combination for the hardware cursor. The intention is that a hardware
- * agnostic userspace can query a cursor plane size to use.
+/**
+ * DRM_CAP_CURSOR_WIDTH
+ *
+ * The ``CURSOR_WIDTH`` and ``CURSOR_HEIGHT`` capabilities return a valid
+ * width x height combination for the hardware cursor. The intention is that a
+ * hardware agnostic userspace can query a cursor plane size to use.
  *
  * Note that the cross-driver contract is to merely return a valid size;
  * drivers are free to attach another meaning on top, eg. i915 returns the
  * maximum plane size.
  */
 #define DRM_CAP_CURSOR_WIDTH		0x8
+/**
+ * DRM_CAP_CURSOR_HEIGHT
+ *
+ * See &DRM_CAP_CURSOR_WIDTH.
+ */
 #define DRM_CAP_CURSOR_HEIGHT		0x9
+/**
+ * DRM_CAP_ADDFB2_MODIFIERS
+ *
+ * If set to 1, the driver supports supplying modifiers in the
+ * &DRM_IOCTL_MODE_ADDFB2 ioctl.
+ */
 #define DRM_CAP_ADDFB2_MODIFIERS	0x10
+/**
+ * DRM_CAP_PAGE_FLIP_TARGET
+ *
+ * If set to 1, the driver supports the &DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE and
+ * &DRM_MODE_PAGE_FLIP_TARGET_RELATIVE flags in
+ * &drm_mode_crtc_page_flip_target.flags for the &DRM_IOCTL_MODE_PAGE_FLIP
+ * ioctl.
+ */
 #define DRM_CAP_PAGE_FLIP_TARGET	0x11
+/**
+ * DRM_CAP_CRTC_IN_VBLANK_EVENT
+ *
+ * If set to 1, the kernel supports reporting the CRTC ID in
+ * &drm_event_vblank.crtc_id for the &DRM_EVENT_VBLANK and
+ * &DRM_EVENT_FLIP_COMPLETE events.
+ *
+ * Starting kernel version 4.12, this capability is always set to 1.
+ */
 #define DRM_CAP_CRTC_IN_VBLANK_EVENT	0x12
+/**
+ * DRM_CAP_SYNCOBJ
+ *
+ * If set to 1, the driver supports sync objects. See
+ * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects".
+ */
 #define DRM_CAP_SYNCOBJ		0x13
+/**
+ * DRM_CAP_SYNCOBJ_TIMELINE
+ *
+ * If set to 1, the driver supports timeline operations on sync objects. See
+ * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects".
+ */
 #define DRM_CAP_SYNCOBJ_TIMELINE	0x14
 
 /* DRM_IOCTL_GET_CAP ioctl argument type */
-- 
cgit v1.2.3


From 0fdee797d60d71e5a6fd59aa573d84a858e715dd Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 3 May 2021 11:51:17 -0300
Subject: tools headers UAPI: Sync drm/i915_drm.h with the kernel sources

To pick the changes in:

  b5b6f6a610127b17 ("drm/i915/gem: Drop legacy execbuffer support (v2)")

That don't result in any change in tooling as this is just adding a
comment.

Only silences this perf build warning:

  Warning: Kernel ABI header at 'tools/include/uapi/drm/i915_drm.h' differs from latest version at 'include/uapi/drm/i915_drm.h'
  diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h

Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Jason Ekstrand <jason@jlekstrand.net>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/drm/i915_drm.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 1987e2ea79a3..ddc47bbf48b6 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -943,6 +943,7 @@ struct drm_i915_gem_exec_object {
 	__u64 offset;
 };
 
+/* DRM_IOCTL_I915_GEM_EXECBUFFER was removed in Linux 5.13 */
 struct drm_i915_gem_execbuffer {
 	/**
 	 * List of buffers to be validated with their relocations to be
-- 
cgit v1.2.3


From b3172585b13d7171c32cfabdf938eca7fdfe9b31 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 3 May 2021 11:53:37 -0300
Subject: tools arch x86: Sync the msr-index.h copy with the kernel sources

To pick up the changes from these csets:

  d0946a882e622022 ("perf/x86/intel: Hybrid PMU support for perf capabilities")

That cause no changes to tooling as it isn't adding any new MSR, just
some capabilities for a pre-existing one:

  $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > before
  $ cp arch/x86/include/asm/msr-index.h tools/arch/x86/include/asm/msr-index.h
  $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > after
  $ diff -u before after
  $

Just silences this perf build warning:

  Warning: Kernel ABI header at 'tools/arch/x86/include/asm/msr-index.h' differs from latest version at 'arch/x86/include/asm/msr-index.h'
  diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h

Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/include/asm/msr-index.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 45029354e0a8..742d89a00721 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -185,6 +185,9 @@
 #define MSR_PEBS_DATA_CFG		0x000003f2
 #define MSR_IA32_DS_AREA		0x00000600
 #define MSR_IA32_PERF_CAPABILITIES	0x00000345
+#define PERF_CAP_METRICS_IDX		15
+#define PERF_CAP_PT_IDX			16
+
 #define MSR_PEBS_LD_LAT_THRESHOLD	0x000003f6
 
 #define MSR_IA32_RTIT_CTL		0x00000570
@@ -265,6 +268,7 @@
 #define DEBUGCTLMSR_LBR			(1UL <<  0) /* last branch recording */
 #define DEBUGCTLMSR_BTF_SHIFT		1
 #define DEBUGCTLMSR_BTF			(1UL <<  1) /* single-step on branches */
+#define DEBUGCTLMSR_BUS_LOCK_DETECT	(1UL <<  2)
 #define DEBUGCTLMSR_TR			(1UL <<  6)
 #define DEBUGCTLMSR_BTS			(1UL <<  7)
 #define DEBUGCTLMSR_BTINT		(1UL <<  8)
-- 
cgit v1.2.3


From e8c1167606c63fd8f9934d0b6ce80281463a4945 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 2 Apr 2021 18:40:20 +0900
Subject: perf record: Disallow -c and -F option at the same time

It's confusing which one is effective when the both options are given.
The current code happens to use -c in this case but users might not be
aware of it.  We can change it to complain about that instead of relying
on the implicit priority.

Before:

  $ perf record -c 111111 -F 99 true
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.031 MB perf.data (8 samples) ]

  $ perf evlist -F
  cycles: sample_period=111111
  $

After:
  $ perf record -c 111111 -F 99 true
  cannot set frequency and period at the same time
  $

So this change can break existing usages, but I think it's rare to have
both options and it'd be better changing them.

Suggested-by: Alexey Alexandrov <aalexand@google.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/20210402094020.28164-1-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/record.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index f99852d54b14..43e5b563dee8 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -157,9 +157,15 @@ static int get_max_rate(unsigned int *rate)
 static int record_opts__config_freq(struct record_opts *opts)
 {
 	bool user_freq = opts->user_freq != UINT_MAX;
+	bool user_interval = opts->user_interval != ULLONG_MAX;
 	unsigned int max_rate;
 
-	if (opts->user_interval != ULLONG_MAX)
+	if (user_interval && user_freq) {
+		pr_err("cannot set frequency and period at the same time\n");
+		return -1;
+	}
+
+	if (user_interval)
 		opts->default_interval = opts->user_interval;
 	if (user_freq)
 		opts->freq = opts->user_freq;
-- 
cgit v1.2.3


From 7aa3c9eabdf76017679e975e2ffd50cde3c010b8 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Thu, 6 May 2021 15:56:40 -0700
Subject: perf jevents: Silence warning for ArchStd files

JSON files in the level 1 directory are used for ArchStd events (see
preprocess_arch_std_files), as such they shouldn't be warned about.

Signed-off-by: Ian Rogers <irogers@google.com>
Reviewed-by: John Garry <john.garry@huawei.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joakim Zhang <qiangqing.zhang@nxp.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Kim Phillips <kim.phillips@amd.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/20210506225640.1461000-1-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/pmu-events/jevents.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index ed4f0bd72e5a..7422b0ea8790 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -1123,8 +1123,10 @@ static int process_one_file(const char *fpath, const struct stat *sb,
 			mapfile = strdup(fpath);
 			return 0;
 		}
-
-		pr_info("%s: Ignoring file %s\n", prog, fpath);
+		if (is_json_file(bname))
+			pr_debug("%s: ArchStd json is preprocessed %s\n", prog, fpath);
+		else
+			pr_info("%s: Ignoring file %s\n", prog, fpath);
 		return 0;
 	}
 
-- 
cgit v1.2.3


From a11c9a6e472457cf9eeafb585fc5c912f51d1b23 Mon Sep 17 00:00:00 2001
From: Dmitry Koshelev <karaghiozis@gmail.com>
Date: Thu, 6 May 2021 13:11:49 +0000
Subject: perf session: Fix swapping of cpu_map and stat_config records

'data' field in perf_record_cpu_map_data struct is 16-bit
wide and so should be swapped using bswap_16().

'nr' field in perf_record_stat_config struct should be
swapped before being used for size calculation.

Signed-off-by: Dmitry Koshelev <karaghiozis@gmail.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/20210506131244.13328-1-karaghiozis@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/session.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index a12cf4f0e97a..106b3d60881a 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -904,7 +904,7 @@ static void perf_event__cpu_map_swap(union perf_event *event,
 	struct perf_record_record_cpu_map *mask;
 	unsigned i;
 
-	data->type = bswap_64(data->type);
+	data->type = bswap_16(data->type);
 
 	switch (data->type) {
 	case PERF_CPU_MAP__CPUS:
@@ -937,7 +937,7 @@ static void perf_event__stat_config_swap(union perf_event *event,
 {
 	u64 size;
 
-	size  = event->stat_config.nr * sizeof(event->stat_config.data[0]);
+	size  = bswap_64(event->stat_config.nr) * sizeof(event->stat_config.data[0]);
 	size += 1; /* nr item itself */
 	mem_bswap_64(&event->stat_config.nr, size);
 }
-- 
cgit v1.2.3


From ad1237c30d975535a669746496cbed136aa5a045 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sat, 8 May 2021 22:50:20 +0200
Subject: perf tools: Fix dynamic libbpf link

Justin reported broken build with LIBBPF_DYNAMIC=1.

When linking libbpf dynamically we need to use perf's
hashmap object, because it's not exported in libbpf.so
(only in libbpf.a).

Following build is now passing:

  $ make LIBBPF_DYNAMIC=1
    BUILD:   Doing 'make -j8' parallel build
    ...
  $ ldd perf | grep libbpf
        libbpf.so.0 => /lib64/libbpf.so.0 (0x00007fa7630db000)

Fixes: eee19501926d ("perf tools: Grab a copy of libbpf's hashmap")
Reported-by: Justin M. Forbes <jforbes@redhat.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/20210508205020.617984-1-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.config | 1 +
 tools/perf/util/Build      | 7 +++++++
 2 files changed, 8 insertions(+)

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 0d6619064a83..406a9519145e 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -540,6 +540,7 @@ ifndef NO_LIBELF
       ifdef LIBBPF_DYNAMIC
         ifeq ($(feature-libbpf), 1)
           EXTLIBS += -lbpf
+          $(call detected,CONFIG_LIBBPF_DYNAMIC)
         else
           dummy := $(error Error: No libbpf devel library found, please install libbpf-devel);
         endif
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 8c0d9f368ebc..b64bdc1a7026 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -145,7 +145,14 @@ perf-$(CONFIG_LIBELF) += symbol-elf.o
 perf-$(CONFIG_LIBELF) += probe-file.o
 perf-$(CONFIG_LIBELF) += probe-event.o
 
+ifdef CONFIG_LIBBPF_DYNAMIC
+  hashmap := 1
+endif
 ifndef CONFIG_LIBBPF
+  hashmap := 1
+endif
+
+ifdef hashmap
 perf-y += hashmap.o
 endif
 
-- 
cgit v1.2.3


From 0d943d5fde6070c2661a99618ea95b99655589ad Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 9 May 2021 09:39:02 -0300
Subject: tools headers UAPI: Sync linux/kvm.h with the kernel sources

To pick the changes in:

  15fb7de1a7f5af0d ("KVM: SVM: Add KVM_SEV_RECEIVE_UPDATE_DATA command")
  3bf725699bf62494 ("KVM: arm64: Add support for the KVM PTP service")
  4cfdd47d6d95aca4 ("KVM: SVM: Add KVM_SEV SEND_START command")
  54526d1fd59338fd ("KVM: x86: Support KVM VMs sharing SEV context")
  5569e2e7a650dfff ("KVM: SVM: Add support for KVM_SEV_SEND_CANCEL command")
  8b13c36493d8cb56 ("KVM: introduce KVM_CAP_SET_GUEST_DEBUG2")
  af43cbbf954b50ca ("KVM: SVM: Add support for KVM_SEV_RECEIVE_START command")
  d3d1af85e2c75bb5 ("KVM: SVM: Add KVM_SEND_UPDATE_DATA command")
  fe7e948837f312d8 ("KVM: x86: Add capability to grant VM access to privileged SGX attribute")

That don't cause any change in tooling as it doesn't introduce any new
ioctl.

  $ grep kvm tools/perf/trace/beauty/*.sh
  tools/perf/trace/beauty/kvm_ioctl.sh:printf "static const char *kvm_ioctl_cmds[] = {\n"
  tools/perf/trace/beauty/kvm_ioctl.sh:egrep $regex ${header_dir}/kvm.h	| \
  $
  $ tools/perf/trace/beauty/kvm_ioctl.sh > before
  $ cp include/uapi/linux/kvm.h tools/include/uapi/linux/kvm.h
  $ tools/perf/trace/beauty/kvm_ioctl.sh > after
  $ diff -u before after
  $

This silences this perf build warning:

  Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h'
  diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h

Cc: Brijesh Singh <brijesh.singh@amd.com>
Cc: Jianyong Wu <jianyong.wu@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Nathan Tempelman <natet@google.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Steve Rutherford <srutherford@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/kvm.h | 45 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index f6afee209620..3fd9a7e9d90c 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1078,6 +1078,10 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_DIRTY_LOG_RING 192
 #define KVM_CAP_X86_BUS_LOCK_EXIT 193
 #define KVM_CAP_PPC_DAWR1 194
+#define KVM_CAP_SET_GUEST_DEBUG2 195
+#define KVM_CAP_SGX_ATTRIBUTE 196
+#define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197
+#define KVM_CAP_PTP_KVM 198
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1671,6 +1675,8 @@ enum sev_cmd_id {
 	KVM_SEV_CERT_EXPORT,
 	/* Attestation report */
 	KVM_SEV_GET_ATTESTATION_REPORT,
+	/* Guest Migration Extension */
+	KVM_SEV_SEND_CANCEL,
 
 	KVM_SEV_NR_MAX,
 };
@@ -1729,6 +1735,45 @@ struct kvm_sev_attestation_report {
 	__u32 len;
 };
 
+struct kvm_sev_send_start {
+	__u32 policy;
+	__u64 pdh_cert_uaddr;
+	__u32 pdh_cert_len;
+	__u64 plat_certs_uaddr;
+	__u32 plat_certs_len;
+	__u64 amd_certs_uaddr;
+	__u32 amd_certs_len;
+	__u64 session_uaddr;
+	__u32 session_len;
+};
+
+struct kvm_sev_send_update_data {
+	__u64 hdr_uaddr;
+	__u32 hdr_len;
+	__u64 guest_uaddr;
+	__u32 guest_len;
+	__u64 trans_uaddr;
+	__u32 trans_len;
+};
+
+struct kvm_sev_receive_start {
+	__u32 handle;
+	__u32 policy;
+	__u64 pdh_uaddr;
+	__u32 pdh_len;
+	__u64 session_uaddr;
+	__u32 session_len;
+};
+
+struct kvm_sev_receive_update_data {
+	__u64 hdr_uaddr;
+	__u32 hdr_len;
+	__u64 guest_uaddr;
+	__u32 guest_len;
+	__u64 trans_uaddr;
+	__u32 trans_len;
+};
+
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
 #define KVM_DEV_ASSIGN_MASK_INTX	(1 << 2)
-- 
cgit v1.2.3


From b35629bc2fd59691504debda99c320cf966c8e3a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 9 May 2021 09:45:25 -0300
Subject: tools headers kvm: Sync kvm headers with the kernel sources

To pick the changes in:

  3c0c2ad1ae75963c ("KVM: VMX: Add basic handling of VM-Exit from SGX enclave")

None of them trigger any changes in tooling, this time this is just to silence
these perf build warnings:

  Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/vmx.h' differs from latest version at 'arch/x86/include/uapi/asm/vmx.h'
  diff -u tools/arch/x86/include/uapi/asm/vmx.h arch/x86/include/uapi/asm/vmx.h

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/include/uapi/asm/vmx.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h
index b8e650a985e3..946d761adbd3 100644
--- a/tools/arch/x86/include/uapi/asm/vmx.h
+++ b/tools/arch/x86/include/uapi/asm/vmx.h
@@ -27,6 +27,7 @@
 
 
 #define VMX_EXIT_REASONS_FAILED_VMENTRY         0x80000000
+#define VMX_EXIT_REASONS_SGX_ENCLAVE_MODE	0x08000000
 
 #define EXIT_REASON_EXCEPTION_NMI       0
 #define EXIT_REASON_EXTERNAL_INTERRUPT  1
-- 
cgit v1.2.3


From a00b7e39d6b56e6f49cdd51a9ebf92627a19d877 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Fri, 7 May 2021 17:54:35 +0900
Subject: perf tools: Fix a build error on arm64 with clang

Since clang's -Wmissing-field-initializers warns if a data
structure is initialized with a signle NULL as below,

 ----
 tools/perf $ make CC=clang LLVM=1
 ...
 arch/arm64/util/kvm-stat.c:74:9: error: missing field 'ops' initializer [-Werror,-Wmissing-field-initializers]
         { NULL },
                ^
 1 error generated.
 ----

add another field initializer expressly as same as other
arch's kvm-stat.c code.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Anders Roxell <anders.roxell@linaro.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Link: http://lore.kernel.org/lkml/162037767540.94840.15758657049033010518.stgit@devnote2
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/arm64/util/kvm-stat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/arch/arm64/util/kvm-stat.c b/tools/perf/arch/arm64/util/kvm-stat.c
index 2303256b7d05..73d18e0ed6f6 100644
--- a/tools/perf/arch/arm64/util/kvm-stat.c
+++ b/tools/perf/arch/arm64/util/kvm-stat.c
@@ -71,7 +71,7 @@ struct kvm_reg_events_ops kvm_reg_events_ops[] = {
 		.name	= "vmexit",
 		.ops	= &exit_events,
 	},
-	{ NULL },
+	{ NULL, NULL },
 };
 
 const char * const kvm_skip_events[] = {
-- 
cgit v1.2.3


From f8bcb061ea013a9b39a071b9dd9f6ea0aa2caf72 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 9 May 2021 09:55:30 -0300
Subject: tools headers UAPI: Sync files changed by landlock, quotactl_path and
 mount_settattr new syscalls
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To pick the changes in these csets:

  a49f4f81cb48925e ("arch: Wire up Landlock syscalls")
  2a1867219c7b27f9 ("fs: add mount_setattr()")
  fa8b90070a80bb1a ("quota: wire up quotactl_path")

That silences these perf build warnings and add support for those new
syscalls in tools such as 'perf trace'.

For instance, this is now possible:

  # ~acme/bin/perf trace -v -e landlock*
  event qualifier tracepoint filter: (common_pid != 129365 && common_pid != 3502) && (id == 444 || id == 445 || id == 446)
  ^C#

That is tha filter expression attached to the raw_syscalls:sys_{enter,exit}
tracepoints.

  $ grep landlock tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
  444	common	landlock_create_ruleset	sys_landlock_create_ruleset
  445	common	landlock_add_rule	sys_landlock_add_rule
  446	common	landlock_restrict_self	sys_landlock_restrict_self
  $

This addresses these perf build warnings:

  Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/unistd.h' differs from latest version at 'include/uapi/asm-generic/unistd.h'
  diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h
  Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl'
  diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
  Warning: Kernel ABI header at 'tools/perf/arch/powerpc/entry/syscalls/syscall.tbl' differs from latest version at 'arch/powerpc/kernel/syscalls/syscall.tbl'
  diff -u tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl
  Warning: Kernel ABI header at 'tools/perf/arch/s390/entry/syscalls/syscall.tbl' differs from latest version at 'arch/s390/kernel/syscalls/syscall.tbl'
  diff -u tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl
  Warning: Kernel ABI header at 'tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl' differs from latest version at 'arch/mips/kernel/syscalls/syscall_n64.tbl'
  diff -u tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl arch/mips/kernel/syscalls/syscall_n64.tbl

Cc: Christian Brauner <christian.brauner@ubuntu.com>
Cc: James Morris <jamorris@linux.microsoft.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Mickaël Salaün <mic@linux.microsoft.com>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/asm-generic/unistd.h             | 11 ++++++++++-
 tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl |  5 +++++
 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl  |  4 ++++
 tools/perf/arch/s390/entry/syscalls/syscall.tbl     |  4 ++++
 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl   |  4 ++++
 5 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index ce58cff99b66..6de5a7fc066b 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -863,9 +863,18 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise)
 __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
 #define __NR_mount_setattr 442
 __SYSCALL(__NR_mount_setattr, sys_mount_setattr)
+#define __NR_quotactl_path 443
+__SYSCALL(__NR_quotactl_path, sys_quotactl_path)
+
+#define __NR_landlock_create_ruleset 444
+__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
+#define __NR_landlock_add_rule 445
+__SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule)
+#define __NR_landlock_restrict_self 446
+__SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
 
 #undef __NR_syscalls
-#define __NR_syscalls 443
+#define __NR_syscalls 447
 
 /*
  * 32 bit systems traditionally used different
diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
index 91649690b52f..9974f5f8e49b 100644
--- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
+++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
@@ -356,3 +356,8 @@
 439	n64	faccessat2			sys_faccessat2
 440	n64	process_madvise			sys_process_madvise
 441	n64	epoll_pwait2			sys_epoll_pwait2
+442	n64	mount_setattr			sys_mount_setattr
+443	n64	quotactl_path			sys_quotactl_path
+444	n64	landlock_create_ruleset		sys_landlock_create_ruleset
+445	n64	landlock_add_rule		sys_landlock_add_rule
+446	n64	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 0b2480cf3e47..2e68fbb57cc6 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -522,3 +522,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
+443	common	quotactl_path			sys_quotactl_path
+444	common	landlock_create_ruleset		sys_landlock_create_ruleset
+445	common	landlock_add_rule		sys_landlock_add_rule
+446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index 3abef2144dac..7e4a2aba366d 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -445,3 +445,7 @@
 440  common	process_madvise		sys_process_madvise		sys_process_madvise
 441  common	epoll_pwait2		sys_epoll_pwait2		compat_sys_epoll_pwait2
 442  common	mount_setattr		sys_mount_setattr		sys_mount_setattr
+443  common	quotactl_path		sys_quotactl_path		sys_quotactl_path
+444  common	landlock_create_ruleset	sys_landlock_create_ruleset	sys_landlock_create_ruleset
+445  common	landlock_add_rule	sys_landlock_add_rule		sys_landlock_add_rule
+446  common	landlock_restrict_self	sys_landlock_restrict_self	sys_landlock_restrict_self
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 7bf01cbe582f..ecd551b08d05 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -364,6 +364,10 @@
 440	common	process_madvise		sys_process_madvise
 441	common	epoll_pwait2		sys_epoll_pwait2
 442	common	mount_setattr		sys_mount_setattr
+443	common	quotactl_path		sys_quotactl_path
+444	common	landlock_create_ruleset	sys_landlock_create_ruleset
+445	common	landlock_add_rule	sys_landlock_add_rule
+446	common	landlock_restrict_self	sys_landlock_restrict_self
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
-- 
cgit v1.2.3


From 5a80ee4219a52194f0e815bbceec40eb32c523ec Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 9 May 2021 10:06:40 -0300
Subject: tools headers UAPI: Sync linux/prctl.h with the kernel sources

To pick a new prctl introduced in:

  201698626fbca1cf ("arm64: Introduce prctl(PR_PAC_{SET,GET}_ENABLED_KEYS)")

That results in

  $ grep prctl tools/perf/trace/beauty/*.sh
  tools/perf/trace/beauty/prctl_option.sh:printf "static const char *prctl_options[] = {\n"
  tools/perf/trace/beauty/prctl_option.sh:egrep $regex ${header_dir}/prctl.h | grep -v PR_SET_PTRACER | \
  tools/perf/trace/beauty/prctl_option.sh:printf "static const char *prctl_set_mm_options[] = {\n"
  tools/perf/trace/beauty/prctl_option.sh:egrep $regex ${header_dir}/prctl.h | \
  tools/perf/trace/beauty/x86_arch_prctl.sh:prctl_arch_header=${x86_header_dir}/prctl.h
  tools/perf/trace/beauty/x86_arch_prctl.sh:	printf "#define x86_arch_prctl_codes_%d_offset %s\n" $idx $first_entry
  tools/perf/trace/beauty/x86_arch_prctl.sh:	printf "static const char *x86_arch_prctl_codes_%d[] = {\n" $idx
  tools/perf/trace/beauty/x86_arch_prctl.sh:	egrep -q $regex ${prctl_arch_header} && \
  tools/perf/trace/beauty/x86_arch_prctl.sh:	(egrep $regex ${prctl_arch_header} | \
  $ tools/perf/trace/beauty/prctl_option.sh > before
  $ cp include/uapi/linux/prctl.h tools/include/uapi/linux/prctl.h
  $ tools/perf/trace/beauty/prctl_option.sh > after
  $ diff -u before after
  --- before	2021-05-09 10:06:10.064559675 -0300
  +++ after	2021-05-09 10:06:21.319791396 -0300
  @@ -54,6 +54,8 @@
   	[57] = "SET_IO_FLUSHER",
   	[58] = "GET_IO_FLUSHER",
   	[59] = "SET_SYSCALL_USER_DISPATCH",
  +	[60] = "PAC_SET_ENABLED_KEYS",
  +	[61] = "PAC_GET_ENABLED_KEYS",
   };
   static const char *prctl_set_mm_options[] = {
   	[1] = "START_CODE",
  $

Now users can do:

  # perf trace -e syscalls:sys_enter_prctl --filter "option==PAC_GET_ENABLED_KEYS"
^C#
  # trace -v -e syscalls:sys_enter_prctl --filter "option==PAC_GET_ENABLED_KEYS"
  New filter for syscalls:sys_enter_prctl: (option==0x3d) && (common_pid != 5519 && common_pid != 3404)
^C#

And also when prctl appears in a session, its options will be
translated to the string.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Peter Collingbourne <pcc@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/prctl.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index 667f1aed091c..18a9f59dc067 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -255,4 +255,8 @@ struct prctl_mm_map {
 # define SYSCALL_DISPATCH_FILTER_ALLOW	0
 # define SYSCALL_DISPATCH_FILTER_BLOCK	1
 
+/* Set/get enabled arm64 pointer authentication keys */
+#define PR_PAC_SET_ENABLED_KEYS		60
+#define PR_PAC_GET_ENABLED_KEYS		61
+
 #endif /* _LINUX_PRCTL_H */
-- 
cgit v1.2.3


From fb24e308b6310541e70d11a3f19dc40742974b95 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 9 May 2021 10:19:37 -0300
Subject: tools arch: Update arch/x86/lib/mem{cpy,set}_64.S copies used in
 'perf bench mem memcpy'

To bring in the change made in this cset:

 5e21a3ecad1500e3 ("x86/alternative: Merge include files")

This just silences these perf tools build warnings, no change in the tools:

  Warning: Kernel ABI header at 'tools/arch/x86/lib/memcpy_64.S' differs from latest version at 'arch/x86/lib/memcpy_64.S'
  diff -u tools/arch/x86/lib/memcpy_64.S arch/x86/lib/memcpy_64.S
  Warning: Kernel ABI header at 'tools/arch/x86/lib/memset_64.S' differs from latest version at 'arch/x86/lib/memset_64.S'
  diff -u tools/arch/x86/lib/memset_64.S arch/x86/lib/memset_64.S

Cc: Borislav Petkov <bp@suse.de>
Cc: Juergen Gross <jgross@suse.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/lib/memcpy_64.S      |  2 +-
 tools/arch/x86/lib/memset_64.S      |  2 +-
 tools/include/asm/alternative-asm.h | 10 ----------
 tools/include/asm/alternative.h     | 10 ++++++++++
 4 files changed, 12 insertions(+), 12 deletions(-)
 delete mode 100644 tools/include/asm/alternative-asm.h
 create mode 100644 tools/include/asm/alternative.h

diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
index 1e299ac73c86..1cc9da6e29c7 100644
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -4,7 +4,7 @@
 #include <linux/linkage.h>
 #include <asm/errno.h>
 #include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
 #include <asm/export.h>
 
 .pushsection .noinstr.text, "ax"
diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S
index 0bfd26e4ca9e..9827ae267f96 100644
--- a/tools/arch/x86/lib/memset_64.S
+++ b/tools/arch/x86/lib/memset_64.S
@@ -3,7 +3,7 @@
 
 #include <linux/linkage.h>
 #include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
 #include <asm/export.h>
 
 /*
diff --git a/tools/include/asm/alternative-asm.h b/tools/include/asm/alternative-asm.h
deleted file mode 100644
index b54bd860dff6..000000000000
--- a/tools/include/asm/alternative-asm.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _TOOLS_ASM_ALTERNATIVE_ASM_H
-#define _TOOLS_ASM_ALTERNATIVE_ASM_H
-
-/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */
-
-#define altinstruction_entry #
-#define ALTERNATIVE_2 #
-
-#endif
diff --git a/tools/include/asm/alternative.h b/tools/include/asm/alternative.h
new file mode 100644
index 000000000000..b54bd860dff6
--- /dev/null
+++ b/tools/include/asm/alternative.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_ASM_ALTERNATIVE_ASM_H
+#define _TOOLS_ASM_ALTERNATIVE_ASM_H
+
+/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */
+
+#define altinstruction_entry #
+#define ALTERNATIVE_2 #
+
+#endif
-- 
cgit v1.2.3


From 3916329309eace19e8c32bc821064a119474c309 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 9 May 2021 10:21:33 -0300
Subject: tools include UAPI powerpc: Sync errno.h with the kernel headers

To pick the change in:

  7de21e679e6a789f ("powerpc: fix EDEADLOCK redefinition error in uapi/asm/errno.h")

That will make the errno number -> string tables to pick this change on powerpc.

Silencing this perf build warning:

  Warning: Kernel ABI header at 'tools/arch/powerpc/include/uapi/asm/errno.h' differs from latest version at 'arch/powerpc/include/uapi/asm/errno.h'
  diff -u tools/arch/powerpc/include/uapi/asm/errno.h arch/powerpc/include/uapi/asm/errno.h

Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Tony Ambardar <tony.ambardar@gmail.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/powerpc/include/uapi/asm/errno.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/arch/powerpc/include/uapi/asm/errno.h b/tools/arch/powerpc/include/uapi/asm/errno.h
index cc79856896a1..4ba87de32be0 100644
--- a/tools/arch/powerpc/include/uapi/asm/errno.h
+++ b/tools/arch/powerpc/include/uapi/asm/errno.h
@@ -2,6 +2,7 @@
 #ifndef _ASM_POWERPC_ERRNO_H
 #define _ASM_POWERPC_ERRNO_H
 
+#undef	EDEADLOCK
 #include <asm-generic/errno.h>
 
 #undef	EDEADLOCK
-- 
cgit v1.2.3


From 6faf64f5248166ecaf50107e883c383e0b66bb70 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 9 May 2021 10:24:29 -0300
Subject: tools headers cpufeatures: Sync with the kernel sources

To pick the changes from:

  4e6292114c741221 ("x86/paravirt: Add new features for paravirt patching")
  a161545ab53b174c ("x86/cpufeatures: Enumerate Intel Hybrid Technology feature bit")
  a89dfde3dc3c2dbf ("x86: Remove dynamic NOP selection")
  b8921dccf3b25798 ("x86/cpufeatures: Add SGX1 and SGX2 sub-features")
  f21d4d3b97a86035 ("x86/cpufeatures: Enumerate #DB for bus lock detection")
  f333374e108e7e4c ("x86/cpufeatures: Add the Virtual SPEC_CTRL feature")

This only causes these perf files to be rebuilt:

  CC       /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o
  CC       /tmp/build/perf/bench/mem-memset-x86-64-asm.o

And addresses this perf build warning:

  Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h'
  diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h

Cc: Babu Moger <babu.moger@amd.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/include/asm/cpufeatures.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index cc96e26d69f7..ac37830ae941 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -84,7 +84,7 @@
 
 /* CPU types for specific tunings: */
 #define X86_FEATURE_K8			( 3*32+ 4) /* "" Opteron, Athlon64 */
-#define X86_FEATURE_K7			( 3*32+ 5) /* "" Athlon */
+/* FREE, was #define X86_FEATURE_K7			( 3*32+ 5) "" Athlon */
 #define X86_FEATURE_P3			( 3*32+ 6) /* "" P3 */
 #define X86_FEATURE_P4			( 3*32+ 7) /* "" P4 */
 #define X86_FEATURE_CONSTANT_TSC	( 3*32+ 8) /* TSC ticks at a constant rate */
@@ -236,6 +236,8 @@
 #define X86_FEATURE_EPT_AD		( 8*32+17) /* Intel Extended Page Table access-dirty bit */
 #define X86_FEATURE_VMCALL		( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
 #define X86_FEATURE_VMW_VMMCALL		( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
+#define X86_FEATURE_PVUNLOCK		( 8*32+20) /* "" PV unlock function */
+#define X86_FEATURE_VCPUPREEMPT		( 8*32+21) /* "" PV vcpu_is_preempted function */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
 #define X86_FEATURE_FSGSBASE		( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
@@ -290,6 +292,8 @@
 #define X86_FEATURE_FENCE_SWAPGS_KERNEL	(11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
 #define X86_FEATURE_SPLIT_LOCK_DETECT	(11*32+ 6) /* #AC for split lock */
 #define X86_FEATURE_PER_THREAD_MBA	(11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
+#define X86_FEATURE_SGX1		(11*32+ 8) /* "" Basic SGX */
+#define X86_FEATURE_SGX2		(11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
 #define X86_FEATURE_AVX_VNNI		(12*32+ 4) /* AVX VNNI instructions */
@@ -336,6 +340,7 @@
 #define X86_FEATURE_AVIC		(15*32+13) /* Virtual Interrupt Controller */
 #define X86_FEATURE_V_VMSAVE_VMLOAD	(15*32+15) /* Virtual VMSAVE VMLOAD */
 #define X86_FEATURE_VGIF		(15*32+16) /* Virtual GIF */
+#define X86_FEATURE_V_SPEC_CTRL		(15*32+20) /* Virtual SPEC_CTRL */
 #define X86_FEATURE_SVME_ADDR_CHK	(15*32+28) /* "" SVME addr check */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
@@ -354,6 +359,7 @@
 #define X86_FEATURE_AVX512_VPOPCNTDQ	(16*32+14) /* POPCNT for vectors of DW/QW */
 #define X86_FEATURE_LA57		(16*32+16) /* 5-level page tables */
 #define X86_FEATURE_RDPID		(16*32+22) /* RDPID instruction */
+#define X86_FEATURE_BUS_LOCK_DETECT	(16*32+24) /* Bus Lock detect */
 #define X86_FEATURE_CLDEMOTE		(16*32+25) /* CLDEMOTE instruction */
 #define X86_FEATURE_MOVDIRI		(16*32+27) /* MOVDIRI instruction */
 #define X86_FEATURE_MOVDIR64B		(16*32+28) /* MOVDIR64B instruction */
@@ -374,6 +380,7 @@
 #define X86_FEATURE_MD_CLEAR		(18*32+10) /* VERW clears CPU buffers */
 #define X86_FEATURE_TSX_FORCE_ABORT	(18*32+13) /* "" TSX_FORCE_ABORT */
 #define X86_FEATURE_SERIALIZE		(18*32+14) /* SERIALIZE instruction */
+#define X86_FEATURE_HYBRID_CPU		(18*32+15) /* "" This part has CPUs of more than one type */
 #define X86_FEATURE_TSXLDTRK		(18*32+16) /* TSX Suspend Load Address Tracking */
 #define X86_FEATURE_PCONFIG		(18*32+18) /* Intel PCONFIG */
 #define X86_FEATURE_ARCH_LBR		(18*32+19) /* Intel ARCH LBR */
-- 
cgit v1.2.3


From 71d7924b3e8acaca6a3b0fc3261170031ada3b70 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 9 May 2021 10:29:10 -0300
Subject: tools headers UAPI: Sync perf_event.h with the kernel sources

To pick up the changes in:

  2b26f0aa004995f4 ("perf: Support only inheriting events if cloned with CLONE_THREAD")
  2e498d0a74e5b88a ("perf: Add support for event removal on exec")
  547b60988e631f74 ("perf: aux: Add flags for the buffer format")
  55bcf6ef314ae8ba ("perf: Extend PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE")
  7dde51767ca5339e ("perf: aux: Add CoreSight PMU buffer formats")
  97ba62b278674293 ("perf: Add support for SIGTRAP on perf events")
  d0d1dd628527c77d ("perf core: Add PERF_COUNT_SW_CGROUP_SWITCHES event")

Also change the expected sizeof(struct perf_event_attr) from 120 to 128 due to
fields being added for the SIGTRAP changes.

Addressing this perf build warning:

  Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h' differs from latest version at 'include/uapi/linux/perf_event.h'
  diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h

Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Marco Elver <elver@google.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/perf_event.h   | 26 +++++++++++++++++++++-----
 tools/perf/tests/attr/base-record       |  2 +-
 tools/perf/tests/attr/base-stat         |  2 +-
 tools/perf/tests/attr/system-wide-dummy |  2 +-
 4 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 14332f4cf816..bf8143505c49 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -127,6 +127,7 @@ enum perf_sw_ids {
 	PERF_COUNT_SW_EMULATION_FAULTS		= 8,
 	PERF_COUNT_SW_DUMMY			= 9,
 	PERF_COUNT_SW_BPF_OUTPUT		= 10,
+	PERF_COUNT_SW_CGROUP_SWITCHES		= 11,
 
 	PERF_COUNT_SW_MAX,			/* non-ABI */
 };
@@ -326,6 +327,7 @@ enum perf_event_read_format {
 #define PERF_ATTR_SIZE_VER4	104	/* add: sample_regs_intr */
 #define PERF_ATTR_SIZE_VER5	112	/* add: aux_watermark */
 #define PERF_ATTR_SIZE_VER6	120	/* add: aux_sample_size */
+#define PERF_ATTR_SIZE_VER7	128	/* add: sig_data */
 
 /*
  * Hardware event_id to monitor via a performance monitoring event:
@@ -404,7 +406,10 @@ struct perf_event_attr {
 				cgroup         :  1, /* include cgroup events */
 				text_poke      :  1, /* include text poke events */
 				build_id       :  1, /* use build id in mmap2 events */
-				__reserved_1   : 29;
+				inherit_thread :  1, /* children only inherit if cloned with CLONE_THREAD */
+				remove_on_exec :  1, /* event is removed from task on exec */
+				sigtrap        :  1, /* send synchronous SIGTRAP on event */
+				__reserved_1   : 26;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -456,6 +461,12 @@ struct perf_event_attr {
 	__u16	__reserved_2;
 	__u32	aux_sample_size;
 	__u32	__reserved_3;
+
+	/*
+	 * User provided data if sigtrap=1, passed back to user via
+	 * siginfo_t::si_perf, e.g. to permit user to identify the event.
+	 */
+	__u64	sig_data;
 };
 
 /*
@@ -1171,10 +1182,15 @@ enum perf_callchain_context {
 /**
  * PERF_RECORD_AUX::flags bits
  */
-#define PERF_AUX_FLAG_TRUNCATED		0x01	/* record was truncated to fit */
-#define PERF_AUX_FLAG_OVERWRITE		0x02	/* snapshot from overwrite mode */
-#define PERF_AUX_FLAG_PARTIAL		0x04	/* record contains gaps */
-#define PERF_AUX_FLAG_COLLISION		0x08	/* sample collided with another */
+#define PERF_AUX_FLAG_TRUNCATED			0x01	/* record was truncated to fit */
+#define PERF_AUX_FLAG_OVERWRITE			0x02	/* snapshot from overwrite mode */
+#define PERF_AUX_FLAG_PARTIAL			0x04	/* record contains gaps */
+#define PERF_AUX_FLAG_COLLISION			0x08	/* sample collided with another */
+#define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK	0xff00	/* PMU specific trace format type */
+
+/* CoreSight PMU AUX buffer formats */
+#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT	0x0000 /* Default for backward compatibility */
+#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW		0x0100 /* Raw format of the source */
 
 #define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
 #define PERF_FLAG_FD_OUTPUT		(1UL << 1)
diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record
index 645009c08b3c..4a7b8deef3fd 100644
--- a/tools/perf/tests/attr/base-record
+++ b/tools/perf/tests/attr/base-record
@@ -5,7 +5,7 @@ group_fd=-1
 flags=0|8
 cpu=*
 type=0|1
-size=120
+size=128
 config=0
 sample_period=*
 sample_type=263
diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/attr/base-stat
index b0f42c34882e..408164456530 100644
--- a/tools/perf/tests/attr/base-stat
+++ b/tools/perf/tests/attr/base-stat
@@ -5,7 +5,7 @@ group_fd=-1
 flags=0|8
 cpu=*
 type=0
-size=120
+size=128
 config=0
 sample_period=0
 sample_type=65536
diff --git a/tools/perf/tests/attr/system-wide-dummy b/tools/perf/tests/attr/system-wide-dummy
index eba723cc0d38..86a15dd359d9 100644
--- a/tools/perf/tests/attr/system-wide-dummy
+++ b/tools/perf/tests/attr/system-wide-dummy
@@ -7,7 +7,7 @@ cpu=*
 pid=-1
 flags=8
 type=1
-size=120
+size=128
 config=9
 sample_period=4000
 sample_type=455
-- 
cgit v1.2.3


From 29038ae2ae566d9441e81cda3539db17c20bf06a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 10 May 2021 14:02:17 +0200
Subject: Revert "Revert "ACPI: scan: Turn off unused power resources during
 initialization""

Revert commit 5db91e9cb5b3 ("Revert "ACPI: scan: Turn off unused
power resources during initialization") which was not necessary.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/internal.h | 1 +
 drivers/acpi/power.c    | 2 +-
 drivers/acpi/scan.c     | 2 ++
 drivers/acpi/sleep.h    | 1 -
 4 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index e6a5d997241c..9fcefcdc1dbe 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -139,6 +139,7 @@ int acpi_device_sleep_wake(struct acpi_device *dev,
 int acpi_power_get_inferred_state(struct acpi_device *device, int *state);
 int acpi_power_on_resources(struct acpi_device *device, int state);
 int acpi_power_transition(struct acpi_device *device, int state);
+void acpi_turn_off_unused_power_resources(void);
 
 /* --------------------------------------------------------------------------
                               Device Power Management
diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index 7e69931be828..bacae6d178ff 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -996,6 +996,7 @@ void acpi_resume_power_resources(void)
 
 	mutex_unlock(&power_resource_list_lock);
 }
+#endif
 
 void acpi_turn_off_unused_power_resources(void)
 {
@@ -1016,4 +1017,3 @@ void acpi_turn_off_unused_power_resources(void)
 
 	mutex_unlock(&power_resource_list_lock);
 }
-#endif
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index a184529d8fa4..1584c9e463bd 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -2360,6 +2360,8 @@ int __init acpi_scan_init(void)
 		}
 	}
 
+	acpi_turn_off_unused_power_resources();
+
 	acpi_scan_initialized = true;
 
  out:
diff --git a/drivers/acpi/sleep.h b/drivers/acpi/sleep.h
index 1856f76ac83f..7fe41ee489d6 100644
--- a/drivers/acpi/sleep.h
+++ b/drivers/acpi/sleep.h
@@ -8,7 +8,6 @@ extern struct list_head acpi_wakeup_device_list;
 extern struct mutex acpi_device_lock;
 
 extern void acpi_resume_power_resources(void);
-extern void acpi_turn_off_unused_power_resources(void);
 
 static inline acpi_status acpi_set_waking_vector(u32 wakeup_address)
 {
-- 
cgit v1.2.3


From 14b6cff54edaca5740068e9ed070152727ed7718 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 22 Apr 2021 17:26:19 +0200
Subject: staging: rtl8723bs: avoid bogus gcc warning

gcc gets confused by some of the type casts and produces an
apparently senseless warning about an out-of-bound memcpy to
an unrelated array in the same structure:

drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c: In function 'rtw_cfg80211_ap_set_encryption':
cc1: error: writing 8 bytes into a region of size 0 [-Werror=stringop-overflow=]
In file included from drivers/staging/rtl8723bs/include/drv_types.h:32,
                 from drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c:10:
drivers/staging/rtl8723bs/include/rtw_security.h:98:15: note: at offset [184, 4264] into destination object 'dot11AuthAlgrthm' of size 4
   98 |         u32   dot11AuthAlgrthm;         /*  802.11 auth, could be open, shared, 8021x and authswitch */
      |               ^~~~~~~~~~~~~~~~
cc1: error: writing 8 bytes into a region of size 0 [-Werror=stringop-overflow=]
drivers/staging/rtl8723bs/include/rtw_security.h:98:15: note: at offset [264, 4344] into destination object 'dot11AuthAlgrthm' of size 4

This is a known gcc bug, and the patch here is only a workaround,
but the approach of using a temporary variable to hold a pointer
to the key also improves readability in addition to avoiding the
warning, so overall this should still help.

Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99673
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20210422152648.2891996-1-arnd@kernel.org
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c | 23 +++++++++++++----------
 drivers/staging/rtl8723bs/os_dep/ioctl_linux.c    | 21 ++++++++++++---------
 2 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
index c1dac6eec59f..a6d731e959a2 100644
--- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
+++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
@@ -527,6 +527,9 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa
 	struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
 	struct security_priv *psecuritypriv =  &(padapter->securitypriv);
 	struct sta_priv *pstapriv = &padapter->stapriv;
+	char *grpkey = padapter->securitypriv.dot118021XGrpKey[param->u.crypt.idx].skey;
+	char *txkey = padapter->securitypriv.dot118021XGrptxmickey[param->u.crypt.idx].skey;
+	char *rxkey = padapter->securitypriv.dot118021XGrprxmickey[param->u.crypt.idx].skey;
 
 	param->u.crypt.err = 0;
 	param->u.crypt.alg[IEEE_CRYPT_ALG_NAME_LEN - 1] = '\0';
@@ -609,7 +612,7 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa
 		{
 			if (strcmp(param->u.crypt.alg, "WEP") == 0)
 			{
-				memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+				memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
 
 				psecuritypriv->dot118021XGrpPrivacy = _WEP40_;
 				if (param->u.crypt.key_len == 13)
@@ -622,12 +625,12 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa
 			{
 				psecuritypriv->dot118021XGrpPrivacy = _TKIP_;
 
-				memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+				memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
 
 				/* DEBUG_ERR("set key length :param->u.crypt.key_len =%d\n", param->u.crypt.key_len); */
 				/* set mic key */
-				memcpy(psecuritypriv->dot118021XGrptxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[16]), 8);
-				memcpy(psecuritypriv->dot118021XGrprxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[24]), 8);
+				memcpy(txkey, &(param->u.crypt.key[16]), 8);
+				memcpy(rxkey, &(param->u.crypt.key[24]), 8);
 
 				psecuritypriv->busetkipkey = true;
 
@@ -636,7 +639,7 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa
 			{
 				psecuritypriv->dot118021XGrpPrivacy = _AES_;
 
-				memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+				memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
 			}
 			else
 			{
@@ -713,7 +716,7 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa
 			{
 				if (strcmp(param->u.crypt.alg, "WEP") == 0)
 				{
-					memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+					memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
 
 					psecuritypriv->dot118021XGrpPrivacy = _WEP40_;
 					if (param->u.crypt.key_len == 13)
@@ -725,12 +728,12 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa
 				{
 					psecuritypriv->dot118021XGrpPrivacy = _TKIP_;
 
-					memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+					memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
 
 					/* DEBUG_ERR("set key length :param->u.crypt.key_len =%d\n", param->u.crypt.key_len); */
 					/* set mic key */
-					memcpy(psecuritypriv->dot118021XGrptxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[16]), 8);
-					memcpy(psecuritypriv->dot118021XGrprxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[24]), 8);
+					memcpy(txkey, &(param->u.crypt.key[16]), 8);
+					memcpy(rxkey, &(param->u.crypt.key[24]), 8);
 
 					psecuritypriv->busetkipkey = true;
 
@@ -739,7 +742,7 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa
 				{
 					psecuritypriv->dot118021XGrpPrivacy = _AES_;
 
-					memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+					memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
 				}
 				else
 				{
diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
index e98e5388d5c7..5088c3731b6d 100644
--- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
+++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
@@ -2963,6 +2963,9 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param,
 	struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
 	struct security_priv *psecuritypriv = &(padapter->securitypriv);
 	struct sta_priv *pstapriv = &padapter->stapriv;
+	char *txkey = padapter->securitypriv.dot118021XGrptxmickey[param->u.crypt.idx].skey;
+	char *rxkey = padapter->securitypriv.dot118021XGrprxmickey[param->u.crypt.idx].skey;
+	char *grpkey = psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey;
 
 	param->u.crypt.err = 0;
 	param->u.crypt.alg[IEEE_CRYPT_ALG_NAME_LEN - 1] = '\0';
@@ -3064,7 +3067,7 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param,
 	if (!psta && check_fwstate(pmlmepriv, WIFI_AP_STATE)) { /*  group key */
 		if (param->u.crypt.set_tx == 1) {
 			if (strcmp(param->u.crypt.alg, "WEP") == 0) {
-				memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+				memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
 
 				psecuritypriv->dot118021XGrpPrivacy = _WEP40_;
 				if (param->u.crypt.key_len == 13)
@@ -3073,11 +3076,11 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param,
 			} else if (strcmp(param->u.crypt.alg, "TKIP") == 0) {
 				psecuritypriv->dot118021XGrpPrivacy = _TKIP_;
 
-				memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+				memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
 
 				/* DEBUG_ERR("set key length :param->u.crypt.key_len =%d\n", param->u.crypt.key_len); */
 				/* set mic key */
-				memcpy(psecuritypriv->dot118021XGrptxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[16]), 8);
+				memcpy(txkey, &(param->u.crypt.key[16]), 8);
 				memcpy(psecuritypriv->dot118021XGrprxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[24]), 8);
 
 				psecuritypriv->busetkipkey = true;
@@ -3086,7 +3089,7 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param,
 			else if (strcmp(param->u.crypt.alg, "CCMP") == 0) {
 				psecuritypriv->dot118021XGrpPrivacy = _AES_;
 
-				memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+				memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
 			} else {
 				psecuritypriv->dot118021XGrpPrivacy = _NO_PRIVACY_;
 			}
@@ -3142,7 +3145,7 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param,
 
 			} else { /* group key??? */
 				if (strcmp(param->u.crypt.alg, "WEP") == 0) {
-					memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+					memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
 
 					psecuritypriv->dot118021XGrpPrivacy = _WEP40_;
 					if (param->u.crypt.key_len == 13)
@@ -3150,19 +3153,19 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param,
 				} else if (strcmp(param->u.crypt.alg, "TKIP") == 0) {
 					psecuritypriv->dot118021XGrpPrivacy = _TKIP_;
 
-					memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+					memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
 
 					/* DEBUG_ERR("set key length :param->u.crypt.key_len =%d\n", param->u.crypt.key_len); */
 					/* set mic key */
-					memcpy(psecuritypriv->dot118021XGrptxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[16]), 8);
-					memcpy(psecuritypriv->dot118021XGrprxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[24]), 8);
+					memcpy(txkey, &(param->u.crypt.key[16]), 8);
+					memcpy(rxkey, &(param->u.crypt.key[24]), 8);
 
 					psecuritypriv->busetkipkey = true;
 
 				} else if (strcmp(param->u.crypt.alg, "CCMP") == 0) {
 					psecuritypriv->dot118021XGrpPrivacy = _AES_;
 
-					memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+					memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
 				} else {
 					psecuritypriv->dot118021XGrpPrivacy = _NO_PRIVACY_;
 				}
-- 
cgit v1.2.3


From cabb1bb60e88ccaaa122ba01862403cd44e8e8f8 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Mon, 26 Apr 2021 19:55:58 +0200
Subject: mmc: meson-gx: make replace WARN_ONCE with dev_warn_once about
 scatterlist offset alignment

Some drivers like ath10k can sometimg give an sg buffer with an offset whose alignment
is not compatible with the Amlogic DMA descriptor engine requirements.

Simply replace with dev_warn_once() to inform user this should be fixed to avoid
degraded performance.

This should be ultimately fixed in ath10k, but since it's only a performance issue
the warning should be removed.

Fixes: 79ed05e329c3 ("mmc: meson-gx: add support for descriptor chain mode")
Cc: stable@vger.kernel.org
Reported-by: Christian Hewitt <christianshewitt@gmail.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Link: https://lore.kernel.org/r/20210426175559.3110575-1-narmstrong@baylibre.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/meson-gx-mmc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
index b8b771b643cc..1c61f0f24c09 100644
--- a/drivers/mmc/host/meson-gx-mmc.c
+++ b/drivers/mmc/host/meson-gx-mmc.c
@@ -258,7 +258,9 @@ static void meson_mmc_get_transfer_mode(struct mmc_host *mmc,
 	for_each_sg(data->sg, sg, data->sg_len, i) {
 		/* check for 8 byte alignment */
 		if (sg->offset % 8) {
-			WARN_ONCE(1, "unaligned scatterlist buffer\n");
+			dev_warn_once(mmc_dev(mmc),
+				      "unaligned sg offset %u, disabling descriptor DMA for transfer\n",
+				      sg->offset);
 			return;
 		}
 	}
-- 
cgit v1.2.3


From 9b81354d7ebc1fd17f666a168dcabf27dae290bd Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Mon, 26 Apr 2021 19:55:59 +0200
Subject: mmc: meson-gx: also check SD_IO_RW_EXTENDED for scatterlist size
 alignment

The brcmfmac driver can generate a scatterlist from a skb with each packets
not aligned to the block size. This is not supported by the Amlogic Descriptor
dma engine where each descriptor must match a multiple of the block size.

The sg list is valid, since the sum of the sg buffers is a multiple of the
block size, but we must discard those when in SD_IO_RW_EXTENDED mode since
SDIO block mode can be used under the hood even with data->blocks == 1.

Those transfers are very rare, thus can be replaced by a bounce buffer
without real performance loss.

Fixes: 7412dee9f1fd ("mmc: meson-gx: replace WARN_ONCE with dev_warn_once about scatterlist size alignment in block mode")
Cc: stable@vger.kernel.org
Reported-by: Christian Hewitt <christianshewitt@gmail.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Link: https://lore.kernel.org/r/20210426175559.3110575-2-narmstrong@baylibre.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/meson-gx-mmc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
index 1c61f0f24c09..016a6106151a 100644
--- a/drivers/mmc/host/meson-gx-mmc.c
+++ b/drivers/mmc/host/meson-gx-mmc.c
@@ -236,7 +236,8 @@ static void meson_mmc_get_transfer_mode(struct mmc_host *mmc,
 	if (host->dram_access_quirk)
 		return;
 
-	if (data->blocks > 1) {
+	/* SD_IO_RW_EXTENDED (CMD53) can also use block mode under the hood */
+	if (data->blocks > 1 || mrq->cmd->opcode == SD_IO_RW_EXTENDED) {
 		/*
 		 * In block mode DMA descriptor format, "length" field indicates
 		 * number of blocks and there is no way to pass DMA size that
-- 
cgit v1.2.3


From a1149a6c06ee094a6e62886b0c0e8e66967a728a Mon Sep 17 00:00:00 2001
From: Daniel Beer <dlbeer@gmail.com>
Date: Sat, 24 Apr 2021 20:16:52 +1200
Subject: mmc: sdhci-pci-gli: increase 1.8V regulator wait

Inserting an SD-card on an Intel NUC10i3FNK4 (which contains a GL9755)
results in the message:

    mmc0: 1.8V regulator output did not become stable

Following this message, some cards work (sometimes), but most cards fail
with EILSEQ. This behaviour is observed on Debian 10 running kernel
4.19.188, but also with 5.8.18 and 5.11.15.

The driver currently waits 5ms after switching on the 1.8V regulator for
it to become stable. Increasing this to 10ms gets rid of the warning
about stability, but most cards still fail. Increasing it to 20ms gets
some cards working (a 32GB Samsung micro SD works, a 128GB ADATA
doesn't). At 50ms, the ADATA works most of the time, and at 100ms both
cards work reliably.

Signed-off-by: Daniel Beer <dlbeer@gmail.com>
Acked-by: Ben Chuang <benchuanggli@gmail.com>
Fixes: e51df6ce668a ("mmc: host: sdhci-pci: Add Genesys Logic GL975x support")
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210424081652.GA16047@nyquist.nev
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-pci-gli.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c
index 592d79082f58..061618aa247f 100644
--- a/drivers/mmc/host/sdhci-pci-gli.c
+++ b/drivers/mmc/host/sdhci-pci-gli.c
@@ -627,8 +627,13 @@ static void sdhci_gli_voltage_switch(struct sdhci_host *host)
 	 *
 	 * Wait 5ms after set 1.8V signal enable in Host Control 2 register
 	 * to ensure 1.8V signal enable bit is set by GL9750/GL9755.
+	 *
+	 * ...however, the controller in the NUC10i3FNK4 (a 9755) requires
+	 * slightly longer than 5ms before the control register reports that
+	 * 1.8V is ready, and far longer still before the card will actually
+	 * work reliably.
 	 */
-	usleep_range(5000, 5500);
+	usleep_range(100000, 110000);
 }
 
 static void sdhci_gl9750_reset(struct sdhci_host *host, u8 mask)
-- 
cgit v1.2.3


From 18abf874367456540846319574864e6ff32752e2 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Mon, 26 Apr 2021 11:26:22 +0200
Subject: cdc-wdm: untangle a circular dependency between callback and softint

We have a cycle of callbacks scheduling works which submit
URBs with those callbacks. This needs to be blocked, stopped
and unblocked to untangle the circle.

Signed-off-by: Oliver Neukum <oneukum@suse.com>
Link: https://lore.kernel.org/r/20210426092622.20433-1-oneukum@suse.com
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/cdc-wdm.c | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index 508b1c3f8b73..d1e4a7379beb 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -321,12 +321,23 @@ exit:
 
 }
 
-static void kill_urbs(struct wdm_device *desc)
+static void poison_urbs(struct wdm_device *desc)
 {
 	/* the order here is essential */
-	usb_kill_urb(desc->command);
-	usb_kill_urb(desc->validity);
-	usb_kill_urb(desc->response);
+	usb_poison_urb(desc->command);
+	usb_poison_urb(desc->validity);
+	usb_poison_urb(desc->response);
+}
+
+static void unpoison_urbs(struct wdm_device *desc)
+{
+	/*
+	 *  the order here is not essential
+	 *  it is symmetrical just to be nice
+	 */
+	usb_unpoison_urb(desc->response);
+	usb_unpoison_urb(desc->validity);
+	usb_unpoison_urb(desc->command);
 }
 
 static void free_urbs(struct wdm_device *desc)
@@ -741,11 +752,12 @@ static int wdm_release(struct inode *inode, struct file *file)
 	if (!desc->count) {
 		if (!test_bit(WDM_DISCONNECTING, &desc->flags)) {
 			dev_dbg(&desc->intf->dev, "wdm_release: cleanup\n");
-			kill_urbs(desc);
+			poison_urbs(desc);
 			spin_lock_irq(&desc->iuspin);
 			desc->resp_count = 0;
 			spin_unlock_irq(&desc->iuspin);
 			desc->manage_power(desc->intf, 0);
+			unpoison_urbs(desc);
 		} else {
 			/* must avoid dev_printk here as desc->intf is invalid */
 			pr_debug(KBUILD_MODNAME " %s: device gone - cleaning up\n", __func__);
@@ -1037,9 +1049,9 @@ static void wdm_disconnect(struct usb_interface *intf)
 	wake_up_all(&desc->wait);
 	mutex_lock(&desc->rlock);
 	mutex_lock(&desc->wlock);
+	poison_urbs(desc);
 	cancel_work_sync(&desc->rxwork);
 	cancel_work_sync(&desc->service_outs_intr);
-	kill_urbs(desc);
 	mutex_unlock(&desc->wlock);
 	mutex_unlock(&desc->rlock);
 
@@ -1080,9 +1092,10 @@ static int wdm_suspend(struct usb_interface *intf, pm_message_t message)
 		set_bit(WDM_SUSPENDING, &desc->flags);
 		spin_unlock_irq(&desc->iuspin);
 		/* callback submits work - order is essential */
-		kill_urbs(desc);
+		poison_urbs(desc);
 		cancel_work_sync(&desc->rxwork);
 		cancel_work_sync(&desc->service_outs_intr);
+		unpoison_urbs(desc);
 	}
 	if (!PMSG_IS_AUTO(message)) {
 		mutex_unlock(&desc->wlock);
@@ -1140,7 +1153,7 @@ static int wdm_pre_reset(struct usb_interface *intf)
 	wake_up_all(&desc->wait);
 	mutex_lock(&desc->rlock);
 	mutex_lock(&desc->wlock);
-	kill_urbs(desc);
+	poison_urbs(desc);
 	cancel_work_sync(&desc->rxwork);
 	cancel_work_sync(&desc->service_outs_intr);
 	return 0;
@@ -1151,6 +1164,7 @@ static int wdm_post_reset(struct usb_interface *intf)
 	struct wdm_device *desc = wdm_find_device(intf);
 	int rv;
 
+	unpoison_urbs(desc);
 	clear_bit(WDM_OVERFLOW, &desc->flags);
 	clear_bit(WDM_RESETTING, &desc->flags);
 	rv = recover_from_urb_loss(desc);
-- 
cgit v1.2.3


From 04357fafea9c7ed34525eb9680c760245c3bb958 Mon Sep 17 00:00:00 2001
From: Ferry Toth <ftoth@exalondelft.nl>
Date: Sun, 25 Apr 2021 17:09:47 +0200
Subject: usb: dwc3: pci: Enable usb2-gadget-lpm-disable for Intel Merrifield

On Intel Merrifield LPM is causing host to reset port after a timeout.
By disabling LPM entirely this is prevented.

Fixes: 066c09593454 ("usb: dwc3: pci: Enable extcon driver for Intel Merrifield")
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Ferry Toth <ftoth@exalondelft.nl>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210425150947.5862-1-ftoth@exalondelft.nl
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/dwc3-pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index e7b932dcbf82..1e51460938b8 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -123,6 +123,7 @@ static const struct property_entry dwc3_pci_mrfld_properties[] = {
 	PROPERTY_ENTRY_STRING("linux,extcon-name", "mrfld_bcove_pwrsrc"),
 	PROPERTY_ENTRY_BOOL("snps,dis_u3_susphy_quirk"),
 	PROPERTY_ENTRY_BOOL("snps,dis_u2_susphy_quirk"),
+	PROPERTY_ENTRY_BOOL("snps,usb2-gadget-lpm-disable"),
 	PROPERTY_ENTRY_BOOL("linux,sysdev_is_parent"),
 	{}
 };
-- 
cgit v1.2.3


From 9cbc7eb17cdf6d1adaa2aebfe0079077d31d39a9 Mon Sep 17 00:00:00 2001
From: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
Date: Mon, 26 Apr 2021 14:08:40 -0700
Subject: usb: dwc3: core: Add missing GHWPARAMS9 doc

Add missing documentation for struct dwc3_hwparams new field hwparams9
to avoid kernel doc build warning.

Fixes: 16710380d3aa ("usb: dwc3: Capture new capability register GHWPARAMS9")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Felipe Balbi <balbi@kernel.org>
Signed-off-by: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
Link: https://lore.kernel.org/r/f4c491f7614e623755fafe640b7e690e7c5634e2.1619471127.git.Thinh.Nguyen@synopsys.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/core.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index b1e875c58f20..3859d8cad3cb 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -850,6 +850,7 @@ struct dwc3_trb {
  * @hwparams6: GHWPARAMS6
  * @hwparams7: GHWPARAMS7
  * @hwparams8: GHWPARAMS8
+ * @hwparams9: GHWPARAMS9
  */
 struct dwc3_hwparams {
 	u32	hwparams0;
-- 
cgit v1.2.3


From 6c05cdbb9ef1de0264cac9135f6e90dad1e8763f Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@gmail.com>
Date: Sun, 25 Apr 2021 12:32:53 -0300
Subject: usb: Restore the reference to ch9.h

Keep the textual reference to ch9.h as it was prior to commit
caa93d9bd2d7 ("usb: Fix up movement of USB core kerneldoc location").

As linux/usb/ch9.h does not contain comments anymore, explain
that drivers/usb/common/common.c includes such header and provides
declarations of a few utilities routines for manipulating the data types
from ch9.h. Also mention that drivers/usb/common/debug.c contains
some functions for creating debug output.

Fixes: caa93d9bd2d7 ("usb: Fix up movement of USB core kerneldoc location")
Reported-by: Alan Stern <stern@rowland.harvard.edu>
Suggested-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Fabio Estevam <festevam@gmail.com>
Link: https://lore.kernel.org/r/20210425153253.2542816-1-festevam@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/usb/usb.rst | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/Documentation/driver-api/usb/usb.rst b/Documentation/driver-api/usb/usb.rst
index 543e70434da2..820e867af45a 100644
--- a/Documentation/driver-api/usb/usb.rst
+++ b/Documentation/driver-api/usb/usb.rst
@@ -109,16 +109,19 @@ well as to make sure they aren't relying on some HCD-specific behavior.
 USB-Standard Types
 ==================
 
-In ``drivers/usb/common/common.c`` and ``drivers/usb/common/debug.c`` you
-will find the USB data types defined in chapter 9 of the USB specification.
-These data types are used throughout USB, and in APIs including this host
-side API, gadget APIs, usb character devices and debugfs interfaces.
+In ``include/uapi/linux/usb/ch9.h`` you will find the USB data types defined
+in chapter 9 of the USB specification. These data types are used throughout
+USB, and in APIs including this host side API, gadget APIs, usb character
+devices and debugfs interfaces. That file is itself included by
+``include/linux/usb/ch9.h``, which also contains declarations of a few
+utility routines for manipulating these data types; the implementations
+are in ``drivers/usb/common/common.c``.
 
 .. kernel-doc:: drivers/usb/common/common.c
    :export:
 
-.. kernel-doc:: drivers/usb/common/debug.c
-   :export:
+In addition, some functions useful for creating debugging output are
+defined in ``drivers/usb/common/debug.c``.
 
 Host-Side Data Types and Macros
 ===============================
-- 
cgit v1.2.3


From d1d90dd27254c44d087ad3f8b5b3e4fff0571f45 Mon Sep 17 00:00:00 2001
From: Jack Pham <jackp@codeaurora.org>
Date: Wed, 28 Apr 2021 02:01:10 -0700
Subject: usb: dwc3: gadget: Enable suspend events

commit 72704f876f50 ("dwc3: gadget: Implement the suspend entry event
handler") introduced (nearly 5 years ago!) an interrupt handler for
U3/L1-L2 suspend events.  The problem is that these events aren't
currently enabled in the DEVTEN register so the handler is never
even invoked.  Fix this simply by enabling the corresponding bit
in dwc3_gadget_enable_irq() using the same revision check as found
in the handler.

Fixes: 72704f876f50 ("dwc3: gadget: Implement the suspend entry event handler")
Acked-by: Felipe Balbi <balbi@kernel.org>
Signed-off-by: Jack Pham <jackp@codeaurora.org>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210428090111.3370-1-jackp@codeaurora.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/gadget.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index dd80e5ca8c78..cab3a9184068 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2323,6 +2323,10 @@ static void dwc3_gadget_enable_irq(struct dwc3 *dwc)
 	if (DWC3_VER_IS_PRIOR(DWC3, 250A))
 		reg |= DWC3_DEVTEN_ULSTCNGEN;
 
+	/* On 2.30a and above this bit enables U3/L2-L1 Suspend Events */
+	if (!DWC3_VER_IS_PRIOR(DWC3, 230A))
+		reg |= DWC3_DEVTEN_EOPFEN;
+
 	dwc3_writel(dwc->regs, DWC3_DEVTEN, reg);
 }
 
-- 
cgit v1.2.3


From 6f26ebb79a84bcad211cb2d8a2ef74dfc427322d Mon Sep 17 00:00:00 2001
From: Jack Pham <jackp@codeaurora.org>
Date: Wed, 28 Apr 2021 02:01:11 -0700
Subject: usb: dwc3: gadget: Rename EOPF event macros to Suspend

The device event corresponding to End of Periodic Frame is only
found on older IP revisions (2.10a and prior, according to a
cursory SNPS databook search).  On revisions 2.30a and newer,
including DWC3.1, the same event value and corresponding DEVTEN
bit were repurposed to indicate that the link has gone into
suspend state (U3 or L2/L1).

EOPF events had never been enabled before in this driver, and
going forward we expect current and future DWC3-based devices
won't likely to be using such old DWC3 IP revisions either.
Hence rather than keeping the deprecated EOPF macro names let's
rename them to indicate their usage for suspend events.

Acked-by: Felipe Balbi <balbi@kernel.org>
Signed-off-by: Jack Pham <jackp@codeaurora.org>
Link: https://lore.kernel.org/r/20210428090111.3370-2-jackp@codeaurora.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/core.h   | 6 +++---
 drivers/usb/dwc3/debug.h  | 8 ++++----
 drivers/usb/dwc3/gadget.c | 4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 3859d8cad3cb..c5d5760cdf53 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -57,7 +57,7 @@
 #define DWC3_DEVICE_EVENT_LINK_STATUS_CHANGE	3
 #define DWC3_DEVICE_EVENT_WAKEUP		4
 #define DWC3_DEVICE_EVENT_HIBER_REQ		5
-#define DWC3_DEVICE_EVENT_EOPF			6
+#define DWC3_DEVICE_EVENT_SUSPEND		6
 #define DWC3_DEVICE_EVENT_SOF			7
 #define DWC3_DEVICE_EVENT_ERRATIC_ERROR		9
 #define DWC3_DEVICE_EVENT_CMD_CMPL		10
@@ -460,7 +460,7 @@
 #define DWC3_DEVTEN_CMDCMPLTEN		BIT(10)
 #define DWC3_DEVTEN_ERRTICERREN		BIT(9)
 #define DWC3_DEVTEN_SOFEN		BIT(7)
-#define DWC3_DEVTEN_EOPFEN		BIT(6)
+#define DWC3_DEVTEN_U3L2L1SUSPEN	BIT(6)
 #define DWC3_DEVTEN_HIBERNATIONREQEVTEN	BIT(5)
 #define DWC3_DEVTEN_WKUPEVTEN		BIT(4)
 #define DWC3_DEVTEN_ULSTCNGEN		BIT(3)
@@ -1375,7 +1375,7 @@ struct dwc3_event_depevt {
  *	3	- ULStChng
  *	4	- WkUpEvt
  *	5	- Reserved
- *	6	- EOPF
+ *	6	- Suspend (EOPF on revisions 2.10a and prior)
  *	7	- SOF
  *	8	- Reserved
  *	9	- ErrticErr
diff --git a/drivers/usb/dwc3/debug.h b/drivers/usb/dwc3/debug.h
index db231de46bb3..d0ac89c5b317 100644
--- a/drivers/usb/dwc3/debug.h
+++ b/drivers/usb/dwc3/debug.h
@@ -221,8 +221,8 @@ static inline const char *dwc3_gadget_event_string(char *str, size_t size,
 		snprintf(str, size, "WakeUp [%s]",
 				dwc3_gadget_link_string(state));
 		break;
-	case DWC3_DEVICE_EVENT_EOPF:
-		snprintf(str, size, "End-Of-Frame [%s]",
+	case DWC3_DEVICE_EVENT_SUSPEND:
+		snprintf(str, size, "Suspend [%s]",
 				dwc3_gadget_link_string(state));
 		break;
 	case DWC3_DEVICE_EVENT_SOF:
@@ -353,8 +353,8 @@ static inline const char *dwc3_gadget_event_type_string(u8 event)
 		return "Wake-Up";
 	case DWC3_DEVICE_EVENT_HIBER_REQ:
 		return "Hibernation";
-	case DWC3_DEVICE_EVENT_EOPF:
-		return "End of Periodic Frame";
+	case DWC3_DEVICE_EVENT_SUSPEND:
+		return "Suspend";
 	case DWC3_DEVICE_EVENT_SOF:
 		return "Start of Frame";
 	case DWC3_DEVICE_EVENT_ERRATIC_ERROR:
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index cab3a9184068..6eab78f8a1a7 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2325,7 +2325,7 @@ static void dwc3_gadget_enable_irq(struct dwc3 *dwc)
 
 	/* On 2.30a and above this bit enables U3/L2-L1 Suspend Events */
 	if (!DWC3_VER_IS_PRIOR(DWC3, 230A))
-		reg |= DWC3_DEVTEN_EOPFEN;
+		reg |= DWC3_DEVTEN_U3L2L1SUSPEN;
 
 	dwc3_writel(dwc->regs, DWC3_DEVTEN, reg);
 }
@@ -3744,7 +3744,7 @@ static void dwc3_gadget_interrupt(struct dwc3 *dwc,
 	case DWC3_DEVICE_EVENT_LINK_STATUS_CHANGE:
 		dwc3_gadget_linksts_change_interrupt(dwc, event->event_info);
 		break;
-	case DWC3_DEVICE_EVENT_EOPF:
+	case DWC3_DEVICE_EVENT_SUSPEND:
 		/* It changed to be suspend event for version 2.30a and above */
 		if (!DWC3_VER_IS_PRIOR(DWC3, 230A)) {
 			/*
-- 
cgit v1.2.3


From 75a41ce46bae6cbe7d3bb2584eb844291d642874 Mon Sep 17 00:00:00 2001
From: Phil Elwell <phil@raspberrypi.com>
Date: Thu, 6 May 2021 12:22:00 +0100
Subject: usb: dwc2: Fix gadget DMA unmap direction

The dwc2 gadget support maps and unmaps DMA buffers as necessary. When
mapping and unmapping it uses the direction of the endpoint to select
the direction of the DMA transfer, but this fails for Control OUT
transfers because the unmap occurs after the endpoint direction has
been reversed for the status phase.

A possible solution would be to unmap the buffer before the direction
is changed, but a safer, less invasive fix is to remember the buffer
direction independently of the endpoint direction.

Fixes: fe0b94abcdf6 ("usb: dwc2: gadget: manage ep0 state in software")
Acked-by: Minas Harutyunyan <Minas.Harutyunyan@synopsys.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Phil Elwell <phil@raspberrypi.com>
Link: https://lore.kernel.org/r/20210506112200.2893922-1-phil@raspberrypi.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc2/core.h   | 2 ++
 drivers/usb/dwc2/gadget.c | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h
index da5ac4a4595b..ab6b815e0089 100644
--- a/drivers/usb/dwc2/core.h
+++ b/drivers/usb/dwc2/core.h
@@ -113,6 +113,7 @@ struct dwc2_hsotg_req;
  * @debugfs: File entry for debugfs file for this endpoint.
  * @dir_in: Set to true if this endpoint is of the IN direction, which
  *          means that it is sending data to the Host.
+ * @map_dir: Set to the value of dir_in when the DMA buffer is mapped.
  * @index: The index for the endpoint registers.
  * @mc: Multi Count - number of transactions per microframe
  * @interval: Interval for periodic endpoints, in frames or microframes.
@@ -162,6 +163,7 @@ struct dwc2_hsotg_ep {
 	unsigned short		fifo_index;
 
 	unsigned char           dir_in;
+	unsigned char           map_dir;
 	unsigned char           index;
 	unsigned char           mc;
 	u16                     interval;
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index e6bb1bdb2760..184964174dc0 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -422,7 +422,7 @@ static void dwc2_hsotg_unmap_dma(struct dwc2_hsotg *hsotg,
 {
 	struct usb_request *req = &hs_req->req;
 
-	usb_gadget_unmap_request(&hsotg->gadget, req, hs_ep->dir_in);
+	usb_gadget_unmap_request(&hsotg->gadget, req, hs_ep->map_dir);
 }
 
 /*
@@ -1242,6 +1242,7 @@ static int dwc2_hsotg_map_dma(struct dwc2_hsotg *hsotg,
 {
 	int ret;
 
+	hs_ep->map_dir = hs_ep->dir_in;
 	ret = usb_gadget_map_request(&hsotg->gadget, req, hs_ep->dir_in);
 	if (ret)
 		goto dma_error;
-- 
cgit v1.2.3


From bb9c74a5bd1462499fe5ccb1e3c5ac40dcfa9139 Mon Sep 17 00:00:00 2001
From: Jack Pham <jackp@codeaurora.org>
Date: Sat, 1 May 2021 02:35:58 -0700
Subject: usb: dwc3: gadget: Free gadget structure only after freeing endpoints

As part of commit e81a7018d93a ("usb: dwc3: allocate gadget structure
dynamically") the dwc3_gadget_release() was added which will free
the dwc->gadget structure upon the device's removal when
usb_del_gadget_udc() is called in dwc3_gadget_exit().

However, simply freeing the gadget results a dangling pointer
situation: the endpoints created in dwc3_gadget_init_endpoints()
have their dep->endpoint.ep_list members chained off the list_head
anchored at dwc->gadget->ep_list.  Thus when dwc->gadget is freed,
the first dwc3_ep in the list now has a dangling prev pointer and
likewise for the next pointer of the dwc3_ep at the tail of the list.
The dwc3_gadget_free_endpoints() that follows will result in a
use-after-free when it calls list_del().

This was caught by enabling KASAN and performing a driver unbind.
The recent commit 568262bf5492 ("usb: dwc3: core: Add shutdown
callback for dwc3") also exposes this as a panic during shutdown.

There are a few possibilities to fix this.  One could be to perform
a list_del() of the gadget->ep_list itself which removes it from
the rest of the dwc3_ep chain.

Another approach is what this patch does, by splitting up the
usb_del_gadget_udc() call into its separate "del" and "put"
components.  This allows dwc3_gadget_free_endpoints() to be
called before the gadget is finally freed with usb_put_gadget().

Fixes: e81a7018d93a ("usb: dwc3: allocate gadget structure dynamically")
Reviewed-by: Peter Chen <peter.chen@kernel.org>
Signed-off-by: Jack Pham <jackp@codeaurora.org>
Link: https://lore.kernel.org/r/20210501093558.7375-1-jackp@codeaurora.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/gadget.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 6eab78f8a1a7..dd1342403bb2 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -4062,8 +4062,9 @@ err0:
 
 void dwc3_gadget_exit(struct dwc3 *dwc)
 {
-	usb_del_gadget_udc(dwc->gadget);
+	usb_del_gadget(dwc->gadget);
 	dwc3_gadget_free_endpoints(dwc);
+	usb_put_gadget(dwc->gadget);
 	dma_free_coherent(dwc->sysdev, DWC3_BOUNCE_SIZE, dwc->bounce,
 			  dwc->bounce_addr);
 	kfree(dwc->setup_buf);
-- 
cgit v1.2.3


From 18ffa988dbae69cc6e9949cddd9606f6fe533894 Mon Sep 17 00:00:00 2001
From: Wesley Cheng <wcheng@codeaurora.org>
Date: Fri, 7 May 2021 10:55:19 -0700
Subject: usb: dwc3: gadget: Return success always for kick transfer in ep
 queue

If an error is received when issuing a start or update transfer
command, the error handler will stop all active requests (including
the current USB request), and call dwc3_gadget_giveback() to notify
function drivers of the requests which have been stopped.  Avoid
returning an error for kick transfer during EP queue, to remove
duplicate cleanup operations on the request being queued.

Fixes: 8d99087c2db8 ("usb: dwc3: gadget: Properly handle failed kick_transfer")
cc: stable@vger.kernel.org
Signed-off-by: Wesley Cheng <wcheng@codeaurora.org>
Link: https://lore.kernel.org/r/1620410119-24971-1-git-send-email-wcheng@codeaurora.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/gadget.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index dd1342403bb2..49ca5da5e279 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1684,7 +1684,9 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req)
 		}
 	}
 
-	return __dwc3_gadget_kick_transfer(dep);
+	__dwc3_gadget_kick_transfer(dep);
+
+	return 0;
 }
 
 static int dwc3_gadget_ep_queue(struct usb_ep *ep, struct usb_request *request,
-- 
cgit v1.2.3


From b96992081fde19806b5beb5b25f9327820ead77b Mon Sep 17 00:00:00 2001
From: Li Jun <jun.li@nxp.com>
Date: Fri, 30 Apr 2021 14:57:16 +0800
Subject: usb: dwc3: imx8mp: detect dwc3 core node via compatible string

New schema of usb controller DT-node should be named with prefix
"^usb(@.*)?", dt changed the node name, but missed counter part
change in driver, fix it by switching to use compatible string as
the dwc3 core compatible string keeps "snps,dwc3" in all dt.

Fixes: d1689cd3c0f4 ("arm64: dts: imx8mp: Use the correct name for child node "snps, dwc3"")
Acked-by: Felipe Balbi <balbi@kernel.org>
Signed-off-by: Li Jun <jun.li@nxp.com>
Link: https://lore.kernel.org/r/1619765836-20387-1-git-send-email-jun.li@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/dwc3-imx8mp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/dwc3/dwc3-imx8mp.c b/drivers/usb/dwc3/dwc3-imx8mp.c
index b13cfab89d53..e9fced6f7a7c 100644
--- a/drivers/usb/dwc3/dwc3-imx8mp.c
+++ b/drivers/usb/dwc3/dwc3-imx8mp.c
@@ -165,7 +165,7 @@ static int dwc3_imx8mp_probe(struct platform_device *pdev)
 	if (err < 0)
 		goto disable_rpm;
 
-	dwc3_np = of_get_child_by_name(node, "dwc3");
+	dwc3_np = of_get_compatible_child(node, "snps,dwc3");
 	if (!dwc3_np) {
 		dev_err(dev, "failed to find dwc3 core child\n");
 		goto disable_rpm;
-- 
cgit v1.2.3


From 0b2b149e918f6dddb4ea53615551bf7bc131f875 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Sat, 8 May 2021 09:53:10 +0800
Subject: usb: dwc3: imx8mp: fix error return code in dwc3_imx8mp_probe()

Fix to return a negative error code from the error handling case instead
of 0, as done elsewhere in this function.

Fixes: 6dd2565989b4 ("usb: dwc3: add imx8mp dwc3 glue layer driver")
Reported-by: Hulk Robot <hulkci@huawei.com>
Acked-by: Felipe Balbi <balbi@kernel.org>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210508015310.1627-1-thunder.leizhen@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/dwc3-imx8mp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/dwc3/dwc3-imx8mp.c b/drivers/usb/dwc3/dwc3-imx8mp.c
index e9fced6f7a7c..756faa46d33a 100644
--- a/drivers/usb/dwc3/dwc3-imx8mp.c
+++ b/drivers/usb/dwc3/dwc3-imx8mp.c
@@ -167,6 +167,7 @@ static int dwc3_imx8mp_probe(struct platform_device *pdev)
 
 	dwc3_np = of_get_compatible_child(node, "snps,dwc3");
 	if (!dwc3_np) {
+		err = -ENODEV;
 		dev_err(dev, "failed to find dwc3 core child\n");
 		goto disable_rpm;
 	}
-- 
cgit v1.2.3


From e89baeba4f64bab679618b3330cdcda5929fb8d5 Mon Sep 17 00:00:00 2001
From: Matthijs Kooijman <matthijs@stdin.nl>
Date: Mon, 3 May 2021 20:05:38 +0200
Subject: usb: dwc2: Remove obsolete MODULE_ constants from platform.c

Originally, the core and platform drivers were separate modules, so each
had its own module info. Since commit 2d1165a4b95e (usb: dwc2: remove
dwc2_platform.ko) platform.c is included in the core module, which now
contains duplicate module info (from core.c and platform.c).

Due to the linking order and modinfo implementation, running `modinfo`
on the resulting dwc2.ko shows just the info from platform.c, rather
than that from core.c, suggesting that I am the author of the entire
dwc2 module. Since platform.c is just a minor part of the entire module,
this removes its module info in favor of the info from core.c.

Acked-by: Minas Harutyunyan <Minas.Harutyunyan@synopsys.com>
Signed-off-by: Matthijs Kooijman <matthijs@stdin.nl>
Link: https://lore.kernel.org/r/20210503180538.64423-1-matthijs@stdin.nl
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc2/platform.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c
index 3024785d84cb..520a0beef77c 100644
--- a/drivers/usb/dwc2/platform.c
+++ b/drivers/usb/dwc2/platform.c
@@ -776,7 +776,3 @@ static struct platform_driver dwc2_platform_driver = {
 };
 
 module_platform_driver(dwc2_platform_driver);
-
-MODULE_DESCRIPTION("DESIGNWARE HS OTG Platform Glue");
-MODULE_AUTHOR("Matthijs Kooijman <matthijs@stdin.nl>");
-MODULE_LICENSE("Dual BSD/GPL");
-- 
cgit v1.2.3


From 2e2b8d15adc2f6ab2d4aa0550e241b9742a436a0 Mon Sep 17 00:00:00 2001
From: Kyle Tso <kyletso@google.com>
Date: Tue, 4 May 2021 01:18:49 +0800
Subject: usb: typec: tcpm: Fix wrong handling in GET_SINK_CAP

After receiving Sink Capabilities Message in GET_SINK_CAP AMS, it is
incorrect to call tcpm_pd_handle_state because the Message is expected
and the current state is not Ready states. The result of this incorrect
operation ends in Soft Reset which is definitely wrong. Simply
forwarding to Ready States is enough to finish the AMS.

Fixes: 8dea75e11380 ("usb: typec: tcpm: Protocol Error handling")
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Kyle Tso <kyletso@google.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503171849.2605302-1-kyletso@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index c4fdc00a3bc8..68e04e397e92 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -2390,7 +2390,7 @@ static void tcpm_pd_data_request(struct tcpm_port *port,
 		port->nr_sink_caps = cnt;
 		port->sink_cap_done = true;
 		if (port->ams == GET_SINK_CAPABILITIES)
-			tcpm_pd_handle_state(port, ready_state(port), NONE_AMS, 0);
+			tcpm_set_state(port, ready_state(port), 0);
 		/* Unexpected Sink Capabilities */
 		else
 			tcpm_pd_handle_msg(port,
-- 
cgit v1.2.3


From 8edb79af88efc6e49e735f9baf61d9f0748b881f Mon Sep 17 00:00:00 2001
From: Dinghao Liu <dinghao.liu@zju.edu.cn>
Date: Wed, 7 Apr 2021 11:49:27 +0800
Subject: iio: light: gp2ap002: Fix rumtime PM imbalance on error

When devm_request_threaded_irq() fails, we should decrease the
runtime PM counter to keep the counter balanced. But when
iio_device_register() fails, we need not to decrease it because
we have already decreased it before.

Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Fixes: 97d642e23037 ("iio: light: Add a driver for Sharp GP2AP002x00F")
Link: https://lore.kernel.org/r/20210407034927.16882-1-dinghao.liu@zju.edu.cn
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/gp2ap002.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/light/gp2ap002.c b/drivers/iio/light/gp2ap002.c
index d048ae257c51..f960be7d4001 100644
--- a/drivers/iio/light/gp2ap002.c
+++ b/drivers/iio/light/gp2ap002.c
@@ -582,7 +582,7 @@ static int gp2ap002_probe(struct i2c_client *client,
 					"gp2ap002", indio_dev);
 	if (ret) {
 		dev_err(dev, "unable to request IRQ\n");
-		goto out_disable_vio;
+		goto out_put_pm;
 	}
 	gp2ap002->irq = client->irq;
 
@@ -612,8 +612,9 @@ static int gp2ap002_probe(struct i2c_client *client,
 
 	return 0;
 
-out_disable_pm:
+out_put_pm:
 	pm_runtime_put_noidle(dev);
+out_disable_pm:
 	pm_runtime_disable(dev);
 out_disable_vio:
 	regulator_disable(gp2ap002->vio);
-- 
cgit v1.2.3


From a2fa9242e89f27696515699fe0f0296bf1ac1815 Mon Sep 17 00:00:00 2001
From: Dinghao Liu <dinghao.liu@zju.edu.cn>
Date: Mon, 12 Apr 2021 13:32:02 +0800
Subject: iio: proximity: pulsedlight: Fix rumtime PM imbalance on error

When lidar_write_control() fails, a pairing PM usage counter
decrement is needed to keep the counter balanced.

Fixes: 4ac4e086fd8c5 ("iio: pulsedlight-lidar-lite: add runtime PM")
Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20210412053204.4889-1-dinghao.liu@zju.edu.cn
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/proximity/pulsedlight-lidar-lite-v2.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c b/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c
index c685f10b5ae4..cc206bfa09c7 100644
--- a/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c
+++ b/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c
@@ -160,6 +160,7 @@ static int lidar_get_measurement(struct lidar_data *data, u16 *reg)
 	ret = lidar_write_control(data, LIDAR_REG_CONTROL_ACQUIRE);
 	if (ret < 0) {
 		dev_err(&client->dev, "cannot send start measurement command");
+		pm_runtime_put_noidle(&client->dev);
 		return ret;
 	}
 
-- 
cgit v1.2.3


From 7061803522ee7876df1ca18cdd1e1551f761352d Mon Sep 17 00:00:00 2001
From: Alexandru Ardelean <aardelean@deviqon.com>
Date: Wed, 14 Apr 2021 11:49:55 +0300
Subject: iio: hid-sensors: select IIO_TRIGGERED_BUFFER under
 HID_SENSOR_IIO_TRIGGER

During commit 067fda1c065ff ("iio: hid-sensors: move triggered buffer
setup into hid_sensor_setup_trigger"), the
iio_triggered_buffer_{setup,cleanup}() functions got moved under the
hid-sensor-trigger module.

The above change works fine, if any of the sensors get built. However, when
only the common hid-sensor-trigger module gets built (and none of the
drivers), then the IIO_TRIGGERED_BUFFER symbol isn't selected/enforced.

Previously, each driver would enforce/select the IIO_TRIGGERED_BUFFER
symbol. With this change the HID_SENSOR_IIO_TRIGGER (for the
hid-sensor-trigger module) will enforce that IIO_TRIGGERED_BUFFER gets
selected.

All HID sensor drivers select the HID_SENSOR_IIO_TRIGGER symbol. So, this
change removes the IIO_TRIGGERED_BUFFER enforcement from each driver.

Fixes: 067fda1c065ff ("iio: hid-sensors: move triggered buffer setup into hid_sensor_setup_trigger")
Reported-by: Thomas Deutschmann <whissi@gentoo.org>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Alexandru Ardelean <aardelean@deviqon.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Link: https://lore.kernel.org/r/20210414084955.260117-1-aardelean@deviqon.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/Kconfig              | 1 -
 drivers/iio/common/hid-sensors/Kconfig | 1 +
 drivers/iio/gyro/Kconfig               | 1 -
 drivers/iio/humidity/Kconfig           | 1 -
 drivers/iio/light/Kconfig              | 2 --
 drivers/iio/magnetometer/Kconfig       | 1 -
 drivers/iio/orientation/Kconfig        | 2 --
 drivers/iio/pressure/Kconfig           | 1 -
 drivers/iio/temperature/Kconfig        | 1 -
 9 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig
index cceda3cecbcf..8b1723635cce 100644
--- a/drivers/iio/accel/Kconfig
+++ b/drivers/iio/accel/Kconfig
@@ -229,7 +229,6 @@ config DMARD10
 config HID_SENSOR_ACCEL_3D
 	depends on HID_SENSOR_HUB
 	select IIO_BUFFER
-	select IIO_TRIGGERED_BUFFER
 	select HID_SENSOR_IIO_COMMON
 	select HID_SENSOR_IIO_TRIGGER
 	tristate "HID Accelerometers 3D"
diff --git a/drivers/iio/common/hid-sensors/Kconfig b/drivers/iio/common/hid-sensors/Kconfig
index 24d492567336..2a3dd3b907be 100644
--- a/drivers/iio/common/hid-sensors/Kconfig
+++ b/drivers/iio/common/hid-sensors/Kconfig
@@ -19,6 +19,7 @@ config HID_SENSOR_IIO_TRIGGER
 	tristate "Common module (trigger) for all HID Sensor IIO drivers"
 	depends on HID_SENSOR_HUB && HID_SENSOR_IIO_COMMON && IIO_BUFFER
 	select IIO_TRIGGER
+	select IIO_TRIGGERED_BUFFER
 	help
 	  Say yes here to build trigger support for HID sensors.
 	  Triggers will be send if all requested attributes were read.
diff --git a/drivers/iio/gyro/Kconfig b/drivers/iio/gyro/Kconfig
index 5824f2edf975..20b5ac7ab66a 100644
--- a/drivers/iio/gyro/Kconfig
+++ b/drivers/iio/gyro/Kconfig
@@ -111,7 +111,6 @@ config FXAS21002C_SPI
 config HID_SENSOR_GYRO_3D
 	depends on HID_SENSOR_HUB
 	select IIO_BUFFER
-	select IIO_TRIGGERED_BUFFER
 	select HID_SENSOR_IIO_COMMON
 	select HID_SENSOR_IIO_TRIGGER
 	tristate "HID Gyroscope 3D"
diff --git a/drivers/iio/humidity/Kconfig b/drivers/iio/humidity/Kconfig
index 6549fcf6db69..2de5494e7c22 100644
--- a/drivers/iio/humidity/Kconfig
+++ b/drivers/iio/humidity/Kconfig
@@ -52,7 +52,6 @@ config HID_SENSOR_HUMIDITY
 	tristate "HID Environmental humidity sensor"
 	depends on HID_SENSOR_HUB
 	select IIO_BUFFER
-	select IIO_TRIGGERED_BUFFER
 	select HID_SENSOR_IIO_COMMON
 	select HID_SENSOR_IIO_TRIGGER
 	help
diff --git a/drivers/iio/light/Kconfig b/drivers/iio/light/Kconfig
index 33ad4dd0b5c7..917f9becf9c7 100644
--- a/drivers/iio/light/Kconfig
+++ b/drivers/iio/light/Kconfig
@@ -256,7 +256,6 @@ config ISL29125
 config HID_SENSOR_ALS
 	depends on HID_SENSOR_HUB
 	select IIO_BUFFER
-	select IIO_TRIGGERED_BUFFER
 	select HID_SENSOR_IIO_COMMON
 	select HID_SENSOR_IIO_TRIGGER
 	tristate "HID ALS"
@@ -270,7 +269,6 @@ config HID_SENSOR_ALS
 config HID_SENSOR_PROX
 	depends on HID_SENSOR_HUB
 	select IIO_BUFFER
-	select IIO_TRIGGERED_BUFFER
 	select HID_SENSOR_IIO_COMMON
 	select HID_SENSOR_IIO_TRIGGER
 	tristate "HID PROX"
diff --git a/drivers/iio/magnetometer/Kconfig b/drivers/iio/magnetometer/Kconfig
index 5d4ffd66032e..74ad5701c6c2 100644
--- a/drivers/iio/magnetometer/Kconfig
+++ b/drivers/iio/magnetometer/Kconfig
@@ -95,7 +95,6 @@ config MAG3110
 config HID_SENSOR_MAGNETOMETER_3D
 	depends on HID_SENSOR_HUB
 	select IIO_BUFFER
-	select IIO_TRIGGERED_BUFFER
 	select HID_SENSOR_IIO_COMMON
 	select HID_SENSOR_IIO_TRIGGER
 	tristate "HID Magenetometer 3D"
diff --git a/drivers/iio/orientation/Kconfig b/drivers/iio/orientation/Kconfig
index a505583cc2fd..396cbbb867f4 100644
--- a/drivers/iio/orientation/Kconfig
+++ b/drivers/iio/orientation/Kconfig
@@ -9,7 +9,6 @@ menu "Inclinometer sensors"
 config HID_SENSOR_INCLINOMETER_3D
 	depends on HID_SENSOR_HUB
 	select IIO_BUFFER
-	select IIO_TRIGGERED_BUFFER
 	select HID_SENSOR_IIO_COMMON
 	select HID_SENSOR_IIO_TRIGGER
 	tristate "HID Inclinometer 3D"
@@ -20,7 +19,6 @@ config HID_SENSOR_INCLINOMETER_3D
 config HID_SENSOR_DEVICE_ROTATION
 	depends on HID_SENSOR_HUB
 	select IIO_BUFFER
-	select IIO_TRIGGERED_BUFFER
 	select HID_SENSOR_IIO_COMMON
 	select HID_SENSOR_IIO_TRIGGER
 	tristate "HID Device Rotation"
diff --git a/drivers/iio/pressure/Kconfig b/drivers/iio/pressure/Kconfig
index 689b978db4f9..fc0d3cfca418 100644
--- a/drivers/iio/pressure/Kconfig
+++ b/drivers/iio/pressure/Kconfig
@@ -79,7 +79,6 @@ config DPS310
 config HID_SENSOR_PRESS
 	depends on HID_SENSOR_HUB
 	select IIO_BUFFER
-	select IIO_TRIGGERED_BUFFER
 	select HID_SENSOR_IIO_COMMON
 	select HID_SENSOR_IIO_TRIGGER
 	tristate "HID PRESS"
diff --git a/drivers/iio/temperature/Kconfig b/drivers/iio/temperature/Kconfig
index f1f2a1499c9e..4df60082c1fa 100644
--- a/drivers/iio/temperature/Kconfig
+++ b/drivers/iio/temperature/Kconfig
@@ -45,7 +45,6 @@ config HID_SENSOR_TEMP
 	tristate "HID Environmental temperature sensor"
 	depends on HID_SENSOR_HUB
 	select IIO_BUFFER
-	select IIO_TRIGGERED_BUFFER
 	select HID_SENSOR_IIO_COMMON
 	select HID_SENSOR_IIO_TRIGGER
 	help
-- 
cgit v1.2.3


From f73c730774d88a14d7b60feee6d0e13570f99499 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Fri, 23 Apr 2021 05:09:59 +0300
Subject: iio: gyro: mpu3050: Fix reported temperature value

The raw temperature value is a 16-bit signed integer. The sign casting
is missing in the code, which results in a wrong temperature reported
by userspace tools, fix it.

Cc: stable@vger.kernel.org
Fixes: 3904b28efb2c ("iio: gyro: Add driver for the MPU-3050 gyroscope")
Datasheet: https://www.cdiweb.com/datasheets/invensense/mpu-3000a.pdf
Tested-by: Maxim Schwalm <maxim.schwalm@gmail.com> # Asus TF700T
Tested-by: Svyatoslav Ryhel <clamor95@gmail.com> # Asus TF201
Reported-by: Svyatoslav Ryhel <clamor95@gmail.com>
Reviewed-by: Andy Shevchenko <Andy.Shevchenko@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Acked-by: Jean-Baptiste Maneyrol <jmaneyrol@invensense.com>
Link: https://lore.kernel.org/r/20210423020959.5023-1-digetx@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/gyro/mpu3050-core.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/gyro/mpu3050-core.c b/drivers/iio/gyro/mpu3050-core.c
index ac90be03332a..f17a93519535 100644
--- a/drivers/iio/gyro/mpu3050-core.c
+++ b/drivers/iio/gyro/mpu3050-core.c
@@ -272,7 +272,16 @@ static int mpu3050_read_raw(struct iio_dev *indio_dev,
 	case IIO_CHAN_INFO_OFFSET:
 		switch (chan->type) {
 		case IIO_TEMP:
-			/* The temperature scaling is (x+23000)/280 Celsius */
+			/*
+			 * The temperature scaling is (x+23000)/280 Celsius
+			 * for the "best fit straight line" temperature range
+			 * of -30C..85C.  The 23000 includes room temperature
+			 * offset of +35C, 280 is the precision scale and x is
+			 * the 16-bit signed integer reported by hardware.
+			 *
+			 * Temperature value itself represents temperature of
+			 * the sensor die.
+			 */
 			*val = 23000;
 			return IIO_VAL_INT;
 		default:
@@ -329,7 +338,7 @@ static int mpu3050_read_raw(struct iio_dev *indio_dev,
 				goto out_read_raw_unlock;
 			}
 
-			*val = be16_to_cpu(raw_val);
+			*val = (s16)be16_to_cpu(raw_val);
 			ret = IIO_VAL_INT;
 
 			goto out_read_raw_unlock;
-- 
cgit v1.2.3


From 901f84de0e16bde10a72d7eb2f2eb73fcde8fa1a Mon Sep 17 00:00:00 2001
From: Tomasz Duszynski <tomasz.duszynski@octakon.com>
Date: Fri, 23 Apr 2021 10:02:44 +0200
Subject: iio: core: fix ioctl handlers removal

Currently ioctl handlers are removed twice. For the first time during
iio_device_unregister() then later on inside
iio_device_unregister_eventset() and iio_buffers_free_sysfs_and_mask().
Double free leads to kernel panic.

Fix this by not touching ioctl handlers list directly but rather
letting code responsible for registration call the matching cleanup
routine itself.

Fixes: 8dedcc3eee3ac ("iio: core: centralize ioctl() calls to the main chardev")
Signed-off-by: Tomasz Duszynski <tomasz.duszynski@octakon.com>
Acked-by: Alexandru Ardelean <ardeleanalex@gmail.com>
Cc: <Stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210423080244.2790-1-tomasz.duszynski@octakon.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/industrialio-core.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index d92c58a94fe4..9e59f5da3d28 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -1926,9 +1926,6 @@ EXPORT_SYMBOL(__iio_device_register);
  **/
 void iio_device_unregister(struct iio_dev *indio_dev)
 {
-	struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
-	struct iio_ioctl_handler *h, *t;
-
 	cdev_device_del(&indio_dev->chrdev, &indio_dev->dev);
 
 	mutex_lock(&indio_dev->info_exist_lock);
@@ -1939,9 +1936,6 @@ void iio_device_unregister(struct iio_dev *indio_dev)
 
 	indio_dev->info = NULL;
 
-	list_for_each_entry_safe(h, t, &iio_dev_opaque->ioctl_handlers, entry)
-		list_del(&h->entry);
-
 	iio_device_wakeup_eventset(indio_dev);
 	iio_buffer_wakeup_poll(indio_dev);
 
-- 
cgit v1.2.3


From af0670b0bf1b116fd729b1b1011cf814bc34e12e Mon Sep 17 00:00:00 2001
From: Alexandru Ardelean <aardelean@deviqon.com>
Date: Mon, 3 May 2021 17:43:50 +0300
Subject: iio: core: return ENODEV if ioctl is unknown
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the ioctl() mechanism was introduced in IIO core to centralize the
registration of all ioctls in one place via commit 8dedcc3eee3ac ("iio:
core: centralize ioctl() calls to the main chardev"), the return code was
changed from ENODEV to EINVAL, when the ioctl code isn't known.

This was done by accident.

This change reverts back to the old behavior, where if the ioctl() code
isn't known, ENODEV is returned (vs EINVAL).

This was brought into perspective by this patch:
  https://lore.kernel.org/linux-iio/20210428150815.136150-1-paul@crapouillou.net/

Fixes: 8dedcc3eee3ac ("iio: core: centralize ioctl() calls to the main chardev")
Signed-off-by: Alexandru Ardelean <aardelean@deviqon.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Tested-by: Paul Cercueil <paul@crapouillou.net>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/industrialio-core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 9e59f5da3d28..59efb36db2c7 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -1778,7 +1778,6 @@ static long iio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	if (!indio_dev->info)
 		goto out_unlock;
 
-	ret = -EINVAL;
 	list_for_each_entry(h, &iio_dev_opaque->ioctl_handlers, entry) {
 		ret = h->ioctl(indio_dev, filp, cmd, arg);
 		if (ret != IIO_IOCTL_UNHANDLED)
@@ -1786,7 +1785,7 @@ static long iio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	}
 
 	if (ret == IIO_IOCTL_UNHANDLED)
-		ret = -EINVAL;
+		ret = -ENODEV;
 
 out_unlock:
 	mutex_unlock(&indio_dev->info_exist_lock);
-- 
cgit v1.2.3


From af0e1871d79cfbb91f732d2c6fa7558e45c31038 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 7 May 2021 19:30:41 +0100
Subject: iio: tsl2583: Fix division by a zero lux_val

The lux_val returned from tsl2583_get_lux can potentially be zero,
so check for this to avoid a division by zero and an overflowed
gain_trim_val.

Fixes clang scan-build warning:

drivers/iio/light/tsl2583.c:345:40: warning: Either the
condition 'lux_val<0' is redundant or there is division
by zero at line 345. [zerodivcond]

Fixes: ac4f6eee8fe8 ("staging: iio: TAOS tsl258x: Device driver")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/tsl2583.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/iio/light/tsl2583.c b/drivers/iio/light/tsl2583.c
index 0f787bfc88fc..c9d8f07a6fcd 100644
--- a/drivers/iio/light/tsl2583.c
+++ b/drivers/iio/light/tsl2583.c
@@ -341,6 +341,14 @@ static int tsl2583_als_calibrate(struct iio_dev *indio_dev)
 		return lux_val;
 	}
 
+	/* Avoid division by zero of lux_value later on */
+	if (lux_val == 0) {
+		dev_err(&chip->client->dev,
+			"%s: lux_val of 0 will produce out of range trim_value\n",
+			__func__);
+		return -ENODATA;
+	}
+
 	gain_trim_val = (unsigned int)(((chip->als_settings.als_cal_target)
 			* chip->als_settings.als_gain_trim) / lux_val);
 	if ((gain_trim_val < 250) || (gain_trim_val > 4000)) {
-- 
cgit v1.2.3


From b9a0866a5bdf6a4643a52872ada6be6184c6f4f2 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andy.shevchenko@gmail.com>
Date: Wed, 5 May 2021 01:23:37 +0300
Subject: usb: typec: ucsi: Put fwnode in any case during ->probe()

device_for_each_child_node() bumps a reference counting of a returned variable.
We have to balance it whenever we return to the caller.

Fixes: c1b0bc2dabfa ("usb: typec: Add support for UCSI interface")
Cc: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20210504222337.3151726-1-andy.shevchenko@gmail.com
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/ucsi/ucsi.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
index 282c3c825c13..0e1cec346e0f 100644
--- a/drivers/usb/typec/ucsi/ucsi.c
+++ b/drivers/usb/typec/ucsi/ucsi.c
@@ -999,6 +999,7 @@ static const struct typec_operations ucsi_ops = {
 	.pr_set = ucsi_pr_swap
 };
 
+/* Caller must call fwnode_handle_put() after use */
 static struct fwnode_handle *ucsi_find_fwnode(struct ucsi_connector *con)
 {
 	struct fwnode_handle *fwnode;
@@ -1033,7 +1034,7 @@ static int ucsi_register_port(struct ucsi *ucsi, int index)
 	command |= UCSI_CONNECTOR_NUMBER(con->num);
 	ret = ucsi_send_command(ucsi, command, &con->cap, sizeof(con->cap));
 	if (ret < 0)
-		goto out;
+		goto out_unlock;
 
 	if (con->cap.op_mode & UCSI_CONCAP_OPMODE_DRP)
 		cap->data = TYPEC_PORT_DRD;
@@ -1151,6 +1152,8 @@ static int ucsi_register_port(struct ucsi *ucsi, int index)
 	trace_ucsi_register_port(con->num, &con->status);
 
 out:
+	fwnode_handle_put(cap->fwnode);
+out_unlock:
 	mutex_unlock(&con->lock);
 	return ret;
 }
-- 
cgit v1.2.3


From e17b02d4970913233d543c79c9c66e72cac05bdd Mon Sep 17 00:00:00 2001
From: Marcel Hamer <marcel@solidxs.se>
Date: Tue, 27 Apr 2021 14:21:18 +0200
Subject: usb: dwc3: omap: improve extcon initialization

When extcon is used in combination with dwc3, it is assumed that the dwc3
registers are untouched and as such are only configured if VBUS is valid
or ID is tied to ground.

In case VBUS is not valid or ID is floating, the registers are not
configured as such during driver initialization, causing a wrong
default state during boot.

If the registers are not in a default state, because they are for
instance touched by a boot loader, this can cause for a kernel error.

Signed-off-by: Marcel Hamer <marcel@solidxs.se>
Link: https://lore.kernel.org/r/20210427122118.1948340-1-marcel@solidxs.se
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/dwc3-omap.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c
index 3db17806e92e..e196673f5c64 100644
--- a/drivers/usb/dwc3/dwc3-omap.c
+++ b/drivers/usb/dwc3/dwc3-omap.c
@@ -437,8 +437,13 @@ static int dwc3_omap_extcon_register(struct dwc3_omap *omap)
 
 		if (extcon_get_state(edev, EXTCON_USB) == true)
 			dwc3_omap_set_mailbox(omap, OMAP_DWC3_VBUS_VALID);
+		else
+			dwc3_omap_set_mailbox(omap, OMAP_DWC3_VBUS_OFF);
+
 		if (extcon_get_state(edev, EXTCON_USB_HOST) == true)
 			dwc3_omap_set_mailbox(omap, OMAP_DWC3_ID_GROUND);
+		else
+			dwc3_omap_set_mailbox(omap, OMAP_DWC3_ID_FLOAT);
 
 		omap->edev = edev;
 	}
-- 
cgit v1.2.3


From f75297853470627c4ee4e2b80eed40af7441c96b Mon Sep 17 00:00:00 2001
From: Wei Ming Chen <jj251510319013@gmail.com>
Date: Thu, 6 May 2021 20:20:20 +0800
Subject: docs: usb: function: Modify path name

Original path does not exists, so changed to
"Documentation/ABI/testing/configfs-usb-gadget"

Signed-off-by: Wei Ming Chen <jj251510319013@gmail.com>
Link: https://lore.kernel.org/r/20210506122020.7117-1-jj251510319013@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/usb/gadget_configfs.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/usb/gadget_configfs.rst b/Documentation/usb/gadget_configfs.rst
index 158e48dab586..e4566ffb223f 100644
--- a/Documentation/usb/gadget_configfs.rst
+++ b/Documentation/usb/gadget_configfs.rst
@@ -140,7 +140,7 @@ is an arbitrary string allowed in a filesystem, e.g.::
 Each function provides its specific set of attributes, with either read-only
 or read-write access. Where applicable they need to be written to as
 appropriate.
-Please refer to Documentation/ABI/*/configfs-usb-gadget* for more information.
+Please refer to Documentation/ABI/testing/configfs-usb-gadget for more information.
 
 4. Associating the functions with their configurations
 ------------------------------------------------------
-- 
cgit v1.2.3


From a60a34366e0d09ca002c966dd7c43a68c28b1f82 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Thu, 6 May 2021 22:39:10 +0200
Subject: usb: fotg210-hcd: Fix an error message

'retval' is known to be -ENODEV here.
This is a hard-coded default error code which is not useful in the error
message. Moreover, another error message is printed at the end of the
error handling path. The corresponding error code (-ENOMEM) is more
informative.

So remove simplify the first error message.

While at it, also remove the useless initialization of 'retval'.

Fixes: 7d50195f6c50 ("usb: host: Faraday fotg210-hcd driver")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/94531bcff98e46d4f9c20183a90b7f47f699126c.1620333419.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/fotg210-hcd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/host/fotg210-hcd.c b/drivers/usb/host/fotg210-hcd.c
index 6cac642520fc..9c2eda0918e1 100644
--- a/drivers/usb/host/fotg210-hcd.c
+++ b/drivers/usb/host/fotg210-hcd.c
@@ -5568,7 +5568,7 @@ static int fotg210_hcd_probe(struct platform_device *pdev)
 	struct usb_hcd *hcd;
 	struct resource *res;
 	int irq;
-	int retval = -ENODEV;
+	int retval;
 	struct fotg210_hcd *fotg210;
 
 	if (usb_disabled())
@@ -5588,7 +5588,7 @@ static int fotg210_hcd_probe(struct platform_device *pdev)
 	hcd = usb_create_hcd(&fotg210_fotg210_hc_driver, dev,
 			dev_name(dev));
 	if (!hcd) {
-		dev_err(dev, "failed to create hcd with err %d\n", retval);
+		dev_err(dev, "failed to create hcd\n");
 		retval = -ENOMEM;
 		goto fail_create_hcd;
 	}
-- 
cgit v1.2.3


From be1c2bb3ba5a39c20b1d54e01ffbcb2b1ca7e46c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 27 Apr 2021 09:00:28 +0100
Subject: ARM: PXA: Fix cplds irqdesc allocation when using legacy mode

The Mainstone PXA platform uses CONFIG_SPARSE_IRQ, and thus we
cannot rely on the irq descriptors to be readilly allocated
before creating the irqdomain in legacy mode. The kernel then
complains loudly about not being able to associate the interrupt
in the domain -- can't blame it.

Fix it by allocating the irqdescs upfront in the legacy case.

Fixes: b68761da0111 ("ARM: PXA: Kill use of irq_create_strict_mappings()")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210426223942.GA213931@roeck-us.net
---
 arch/arm/mach-pxa/pxa_cplds_irqs.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-pxa/pxa_cplds_irqs.c b/arch/arm/mach-pxa/pxa_cplds_irqs.c
index ec0d9b094744..bddfc7cd5d40 100644
--- a/arch/arm/mach-pxa/pxa_cplds_irqs.c
+++ b/arch/arm/mach-pxa/pxa_cplds_irqs.c
@@ -121,8 +121,13 @@ static int cplds_probe(struct platform_device *pdev)
 		return fpga->irq;
 
 	base_irq = platform_get_irq(pdev, 1);
-	if (base_irq < 0)
+	if (base_irq < 0) {
 		base_irq = 0;
+	} else {
+		ret = devm_irq_alloc_descs(&pdev->dev, base_irq, base_irq, CPLDS_NB_IRQ, 0);
+		if (ret < 0)
+			return ret;
+	}
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	fpga->base = devm_ioremap_resource(&pdev->dev, res);
-- 
cgit v1.2.3


From 5b44955dc19808fa209444ccb192343050e95ab0 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 13 Apr 2021 14:21:58 +0200
Subject: irqchip/apple-aic: APPLE_AIC should depend on ARCH_APPLE

The Apple Interrupt Controller is only present on Apple Silicon SoCs.
Hence add a dependency on ARCH_APPLE, to prevent asking the user about
this driver when configuring a kernel without Apple Silicon SoC support.

Drop the default, as ARCH_APPLE already selects APPLE_AIC.

Fixes: 76cde26394114f6a ("irqchip/apple-aic: Add support for the Apple Interrupt Controller")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Hector Martin <marcan@marcan.st>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/f37e8daea37d50651d2164b0b3dad90780188548.1618316398.git.geert+renesas@glider.be
---
 drivers/irqchip/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index b90e825df7e1..62543a4eccc0 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -596,7 +596,7 @@ config IRQ_IDT3243X
 config APPLE_AIC
 	bool "Apple Interrupt Controller (AIC)"
 	depends on ARM64
-	default ARCH_APPLE
+	depends on ARCH_APPLE || COMPILE_TEST
 	help
 	  Support for the Apple Interrupt Controller found on Apple Silicon SoCs,
 	  such as the M1.
-- 
cgit v1.2.3


From 726c945ab2ebd104631b6105ab455a5bc604a3f1 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Tue, 27 Apr 2021 12:42:19 +0800
Subject: hwmon: (corsair-psu) Remove unneeded semicolons

Fix the following coccicheck warning:

./drivers/hwmon/corsair-psu.c:379:2-3: Unneeded semicolon

Remove unneeded semicolons.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Link: https://lore.kernel.org/r/20210427044219.7799-1-wanjiabing@vivo.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/corsair-psu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/hwmon/corsair-psu.c b/drivers/hwmon/corsair-psu.c
index 3a5807e4a2ef..02298b86b57b 100644
--- a/drivers/hwmon/corsair-psu.c
+++ b/drivers/hwmon/corsair-psu.c
@@ -355,7 +355,7 @@ static umode_t corsairpsu_hwmon_power_is_visible(const struct corsairpsu_data *p
 		return 0444;
 	default:
 		return 0;
-	};
+	}
 }
 
 static umode_t corsairpsu_hwmon_in_is_visible(const struct corsairpsu_data *priv, u32 attr,
@@ -376,7 +376,7 @@ static umode_t corsairpsu_hwmon_in_is_visible(const struct corsairpsu_data *priv
 		break;
 	default:
 		break;
-	};
+	}
 
 	return res;
 }
-- 
cgit v1.2.3


From 5216dff22dc2bbbbe6f00335f9fd2879670e753b Mon Sep 17 00:00:00 2001
From: Eddie James <eajames@linux.ibm.com>
Date: Thu, 29 Apr 2021 10:13:36 -0500
Subject: hwmon: (occ) Fix poll rate limiting

The poll rate limiter time was initialized at zero. This breaks the
comparison in time_after if jiffies is large. Switch to storing the
next update time rather than the previous time, and initialize the
time when the device is probed.

Fixes: c10e753d43eb ("hwmon (occ): Add sensor types and versions")
Signed-off-by: Eddie James <eajames@linux.ibm.com>
Link: https://lore.kernel.org/r/20210429151336.18980-1-eajames@linux.ibm.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/occ/common.c | 5 +++--
 drivers/hwmon/occ/common.h | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c
index f1ac153d0b56..967532afb1c0 100644
--- a/drivers/hwmon/occ/common.c
+++ b/drivers/hwmon/occ/common.c
@@ -217,9 +217,9 @@ int occ_update_response(struct occ *occ)
 		return rc;
 
 	/* limit the maximum rate of polling the OCC */
-	if (time_after(jiffies, occ->last_update + OCC_UPDATE_FREQUENCY)) {
+	if (time_after(jiffies, occ->next_update)) {
 		rc = occ_poll(occ);
-		occ->last_update = jiffies;
+		occ->next_update = jiffies + OCC_UPDATE_FREQUENCY;
 	} else {
 		rc = occ->last_error;
 	}
@@ -1165,6 +1165,7 @@ int occ_setup(struct occ *occ, const char *name)
 		return rc;
 	}
 
+	occ->next_update = jiffies + OCC_UPDATE_FREQUENCY;
 	occ_parse_poll_response(occ);
 
 	rc = occ_setup_sensor_attrs(occ);
diff --git a/drivers/hwmon/occ/common.h b/drivers/hwmon/occ/common.h
index 67e6968b8978..e6df719770e8 100644
--- a/drivers/hwmon/occ/common.h
+++ b/drivers/hwmon/occ/common.h
@@ -99,7 +99,7 @@ struct occ {
 	u8 poll_cmd_data;		/* to perform OCC poll command */
 	int (*send_cmd)(struct occ *occ, u8 *cmd);
 
-	unsigned long last_update;
+	unsigned long next_update;
 	struct mutex lock;		/* lock OCC access */
 
 	struct device *hwmon;
-- 
cgit v1.2.3


From 2d101db3e5be3bbee6001d4227705cec70ecb82e Mon Sep 17 00:00:00 2001
From: Václav Kubernát <kubernat@cesnet.cz>
Date: Thu, 29 Apr 2021 09:53:38 +0200
Subject: hwmon: (pmbus/fsp-3y) Fix FSP-3Y YH-5151E non-compliant vout encoding
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I didn't properly test the driver for YH-5151E, so it was completely
broken. Firstly, the log/real mapping was incorrect in one case.
Secondly, PMBus specifies that output voltages should be in the linear16
encoding. However, the YH-5151E is non-compliant and uses linear11.
YM-2151E isn't affected by this. Fix this by converting the values
inside the read functions. linear16 gets the exponent from the VOUT_MODE
command. The device doesn't support it, so I have to manually supply the
value for it.

Both supported devices have now been tested to report correct vout
values.

Fixes: 1734b4135a62 ("hwmon: Add driver for fsp-3y PSUs and PDUs")
Signed-off-by: Václav Kubernát <kubernat@cesnet.cz>
Link: https://lore.kernel.org/r/20210429075337.110502-1-kubernat@cesnet.cz
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/fsp-3y.c | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/drivers/hwmon/pmbus/fsp-3y.c b/drivers/hwmon/pmbus/fsp-3y.c
index b177987286ae..e24842475254 100644
--- a/drivers/hwmon/pmbus/fsp-3y.c
+++ b/drivers/hwmon/pmbus/fsp-3y.c
@@ -57,7 +57,7 @@ static int page_log_to_page_real(int page_log, enum chips chip)
 		case YH5151E_PAGE_12V_LOG:
 			return YH5151E_PAGE_12V_REAL;
 		case YH5151E_PAGE_5V_LOG:
-			return YH5151E_PAGE_5V_LOG;
+			return YH5151E_PAGE_5V_REAL;
 		case YH5151E_PAGE_3V3_LOG:
 			return YH5151E_PAGE_3V3_REAL;
 		}
@@ -103,8 +103,18 @@ static int set_page(struct i2c_client *client, int page_log)
 
 static int fsp3y_read_byte_data(struct i2c_client *client, int page, int reg)
 {
+	const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
+	struct fsp3y_data *data = to_fsp3y_data(info);
 	int rv;
 
+	/*
+	 * YH5151-E outputs vout in linear11. The conversion is done when
+	 * reading. Here, we have to inject pmbus_core with the correct
+	 * exponent (it is -6).
+	 */
+	if (data->chip == yh5151e && reg == PMBUS_VOUT_MODE)
+		return 0x1A;
+
 	rv = set_page(client, page);
 	if (rv < 0)
 		return rv;
@@ -114,6 +124,8 @@ static int fsp3y_read_byte_data(struct i2c_client *client, int page, int reg)
 
 static int fsp3y_read_word_data(struct i2c_client *client, int page, int phase, int reg)
 {
+	const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
+	struct fsp3y_data *data = to_fsp3y_data(info);
 	int rv;
 
 	/*
@@ -144,7 +156,18 @@ static int fsp3y_read_word_data(struct i2c_client *client, int page, int phase,
 	if (rv < 0)
 		return rv;
 
-	return i2c_smbus_read_word_data(client, reg);
+	rv = i2c_smbus_read_word_data(client, reg);
+	if (rv < 0)
+		return rv;
+
+	/*
+	 * YH-5151E is non-compliant and outputs output voltages in linear11
+	 * instead of linear16.
+	 */
+	if (data->chip == yh5151e && reg == PMBUS_READ_VOUT)
+		rv = sign_extend32(rv, 10) & 0xffff;
+
+	return rv;
 }
 
 static struct pmbus_driver_info fsp3y_info[] = {
-- 
cgit v1.2.3


From 1f4642b72be79757f050924a9b9673b6a02034bc Mon Sep 17 00:00:00 2001
From: Jack Pham <jackp@codeaurora.org>
Date: Mon, 3 May 2021 00:46:11 -0700
Subject: usb: typec: ucsi: Retrieve all the PDOs instead of just the first 4

commit 4dbc6a4ef06d ("usb: typec: ucsi: save power data objects
in PD mode") introduced retrieval of the PDOs when connected to a
PD-capable source. But only the first 4 PDOs are received since
that is the maximum number that can be fetched at a time given the
MESSAGE_IN length limitation (16 bytes). However, as per the PD spec
a connected source may advertise up to a maximum of 7 PDOs.

If such a source is connected it's possible the PPM could have
negotiated a power contract with one of the PDOs at index greater
than 4, and would be reflected in the request data object's (RDO)
object position field. This would result in an out-of-bounds access
when the rdo_index() is used to index into the src_pdos array in
ucsi_psy_get_voltage_now().

With the help of the UBSAN -fsanitize=array-bounds checker enabled
this exact issue is revealed when connecting to a PD source adapter
that advertise 5 PDOs and the PPM enters a contract having selected
the 5th one.

[  151.545106][   T70] Unexpected kernel BRK exception at EL1
[  151.545112][   T70] Internal error: BRK handler: f2005512 [#1] PREEMPT SMP
...
[  151.545499][   T70] pc : ucsi_psy_get_prop+0x208/0x20c
[  151.545507][   T70] lr : power_supply_show_property+0xc0/0x328
...
[  151.545542][   T70] Call trace:
[  151.545544][   T70]  ucsi_psy_get_prop+0x208/0x20c
[  151.545546][   T70]  power_supply_uevent+0x1a4/0x2f0
[  151.545550][   T70]  dev_uevent+0x200/0x384
[  151.545555][   T70]  kobject_uevent_env+0x1d4/0x7e8
[  151.545557][   T70]  power_supply_changed_work+0x174/0x31c
[  151.545562][   T70]  process_one_work+0x244/0x6f0
[  151.545564][   T70]  worker_thread+0x3e0/0xa64

We can resolve this by instead retrieving and storing up to the
maximum of 7 PDOs in the con->src_pdos array. This would involve
two calls to the GET_PDOS command.

Fixes: 992a60ed0d5e ("usb: typec: ucsi: register with power_supply class")
Fixes: 4dbc6a4ef06d ("usb: typec: ucsi: save power data objects in PD mode")
Cc: stable@vger.kernel.org
Reported-and-tested-by: Subbaraman Narayanamurthy <subbaram@codeaurora.org>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Jack Pham <jackp@codeaurora.org>
Link: https://lore.kernel.org/r/20210503074611.30973-1-jackp@codeaurora.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/ucsi/ucsi.c | 41 ++++++++++++++++++++++++++++++++---------
 drivers/usb/typec/ucsi/ucsi.h |  6 ++++--
 2 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
index 0e1cec346e0f..1d8b7df59ff4 100644
--- a/drivers/usb/typec/ucsi/ucsi.c
+++ b/drivers/usb/typec/ucsi/ucsi.c
@@ -495,7 +495,8 @@ static void ucsi_unregister_altmodes(struct ucsi_connector *con, u8 recipient)
 	}
 }
 
-static void ucsi_get_pdos(struct ucsi_connector *con, int is_partner)
+static int ucsi_get_pdos(struct ucsi_connector *con, int is_partner,
+			 u32 *pdos, int offset, int num_pdos)
 {
 	struct ucsi *ucsi = con->ucsi;
 	u64 command;
@@ -503,17 +504,39 @@ static void ucsi_get_pdos(struct ucsi_connector *con, int is_partner)
 
 	command = UCSI_COMMAND(UCSI_GET_PDOS) | UCSI_CONNECTOR_NUMBER(con->num);
 	command |= UCSI_GET_PDOS_PARTNER_PDO(is_partner);
-	command |= UCSI_GET_PDOS_NUM_PDOS(UCSI_MAX_PDOS - 1);
+	command |= UCSI_GET_PDOS_PDO_OFFSET(offset);
+	command |= UCSI_GET_PDOS_NUM_PDOS(num_pdos - 1);
 	command |= UCSI_GET_PDOS_SRC_PDOS;
-	ret = ucsi_send_command(ucsi, command, con->src_pdos,
-			       sizeof(con->src_pdos));
-	if (ret < 0) {
+	ret = ucsi_send_command(ucsi, command, pdos + offset,
+				num_pdos * sizeof(u32));
+	if (ret < 0)
 		dev_err(ucsi->dev, "UCSI_GET_PDOS failed (%d)\n", ret);
+	if (ret == 0 && offset == 0)
+		dev_warn(ucsi->dev, "UCSI_GET_PDOS returned 0 bytes\n");
+
+	return ret;
+}
+
+static void ucsi_get_src_pdos(struct ucsi_connector *con, int is_partner)
+{
+	int ret;
+
+	/* UCSI max payload means only getting at most 4 PDOs at a time */
+	ret = ucsi_get_pdos(con, 1, con->src_pdos, 0, UCSI_MAX_PDOS);
+	if (ret < 0)
 		return;
-	}
+
 	con->num_pdos = ret / sizeof(u32); /* number of bytes to 32-bit PDOs */
-	if (ret == 0)
-		dev_warn(ucsi->dev, "UCSI_GET_PDOS returned 0 bytes\n");
+	if (con->num_pdos < UCSI_MAX_PDOS)
+		return;
+
+	/* get the remaining PDOs, if any */
+	ret = ucsi_get_pdos(con, 1, con->src_pdos, UCSI_MAX_PDOS,
+			    PDO_MAX_OBJECTS - UCSI_MAX_PDOS);
+	if (ret < 0)
+		return;
+
+	con->num_pdos += ret / sizeof(u32);
 }
 
 static void ucsi_pwr_opmode_change(struct ucsi_connector *con)
@@ -522,7 +545,7 @@ static void ucsi_pwr_opmode_change(struct ucsi_connector *con)
 	case UCSI_CONSTAT_PWR_OPMODE_PD:
 		con->rdo = con->status.request_data_obj;
 		typec_set_pwr_opmode(con->port, TYPEC_PWR_MODE_PD);
-		ucsi_get_pdos(con, 1);
+		ucsi_get_src_pdos(con, 1);
 		break;
 	case UCSI_CONSTAT_PWR_OPMODE_TYPEC1_5:
 		con->rdo = 0;
diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h
index 3920e20a9e9e..cee666790907 100644
--- a/drivers/usb/typec/ucsi/ucsi.h
+++ b/drivers/usb/typec/ucsi/ucsi.h
@@ -8,6 +8,7 @@
 #include <linux/power_supply.h>
 #include <linux/types.h>
 #include <linux/usb/typec.h>
+#include <linux/usb/pd.h>
 #include <linux/usb/role.h>
 
 /* -------------------------------------------------------------------------- */
@@ -134,7 +135,9 @@ void ucsi_connector_change(struct ucsi *ucsi, u8 num);
 
 /* GET_PDOS command bits */
 #define UCSI_GET_PDOS_PARTNER_PDO(_r_)		((u64)(_r_) << 23)
+#define UCSI_GET_PDOS_PDO_OFFSET(_r_)		((u64)(_r_) << 24)
 #define UCSI_GET_PDOS_NUM_PDOS(_r_)		((u64)(_r_) << 32)
+#define UCSI_MAX_PDOS				(4)
 #define UCSI_GET_PDOS_SRC_PDOS			((u64)1 << 34)
 
 /* -------------------------------------------------------------------------- */
@@ -302,7 +305,6 @@ struct ucsi {
 
 #define UCSI_MAX_SVID		5
 #define UCSI_MAX_ALTMODES	(UCSI_MAX_SVID * 6)
-#define UCSI_MAX_PDOS		(4)
 
 #define UCSI_TYPEC_VSAFE5V	5000
 #define UCSI_TYPEC_1_5_CURRENT	1500
@@ -330,7 +332,7 @@ struct ucsi_connector {
 	struct power_supply *psy;
 	struct power_supply_desc psy_desc;
 	u32 rdo;
-	u32 src_pdos[UCSI_MAX_PDOS];
+	u32 src_pdos[PDO_MAX_OBJECTS];
 	int num_pdos;
 
 	struct usb_role_switch *usb_role_sw;
-- 
cgit v1.2.3


From c34e85fa69b9f4568f19da3af06c3870dd8fcc50 Mon Sep 17 00:00:00 2001
From: Kyle Tso <kyletso@google.com>
Date: Fri, 7 May 2021 14:22:59 +0800
Subject: usb: typec: tcpm: Send DISCOVER_IDENTITY from dedicated work

In current design, DISCOVER_IDENTITY is queued to VDM state machine
immediately in Ready states and never retries if it fails in the AMS.
Move the process to a delayed work so that when it fails for some
reasons (e.g. Sink Tx No Go), it can be retried by queueing the work
again. Also fix a problem that the vdm_state is not set to a proper
state if it is blocked by Collision Avoidance mechanism.

Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Kyle Tso <kyletso@google.com>
Link: https://lore.kernel.org/r/20210507062300.1945009-2-kyletso@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpm.c | 85 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 75 insertions(+), 10 deletions(-)

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 68e04e397e92..ae1e84252d38 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -259,6 +259,7 @@ enum frs_typec_current {
 #define ALTMODE_DISCOVERY_MAX	(SVID_DISCOVERY_MAX * MODE_DISCOVERY_MAX)
 
 #define GET_SINK_CAP_RETRY_MS	100
+#define SEND_DISCOVER_RETRY_MS	100
 
 struct pd_mode_data {
 	int svid_index;		/* current SVID index		*/
@@ -366,6 +367,8 @@ struct tcpm_port {
 	struct kthread_work vdm_state_machine;
 	struct hrtimer enable_frs_timer;
 	struct kthread_work enable_frs;
+	struct hrtimer send_discover_timer;
+	struct kthread_work send_discover_work;
 	bool state_machine_running;
 	bool vdm_sm_running;
 
@@ -1178,6 +1181,16 @@ static void mod_enable_frs_delayed_work(struct tcpm_port *port, unsigned int del
 	}
 }
 
+static void mod_send_discover_delayed_work(struct tcpm_port *port, unsigned int delay_ms)
+{
+	if (delay_ms) {
+		hrtimer_start(&port->send_discover_timer, ms_to_ktime(delay_ms), HRTIMER_MODE_REL);
+	} else {
+		hrtimer_cancel(&port->send_discover_timer);
+		kthread_queue_work(port->wq, &port->send_discover_work);
+	}
+}
+
 static void tcpm_set_state(struct tcpm_port *port, enum tcpm_state state,
 			   unsigned int delay_ms)
 {
@@ -1855,6 +1868,9 @@ static void vdm_run_state_machine(struct tcpm_port *port)
 				res = tcpm_ams_start(port, DISCOVER_IDENTITY);
 				if (res == 0)
 					port->send_discover = false;
+				else if (res == -EAGAIN)
+					mod_send_discover_delayed_work(port,
+								       SEND_DISCOVER_RETRY_MS);
 				break;
 			case CMD_DISCOVER_SVID:
 				res = tcpm_ams_start(port, DISCOVER_SVIDS);
@@ -1880,6 +1896,7 @@ static void vdm_run_state_machine(struct tcpm_port *port)
 			}
 
 			if (res < 0) {
+				port->vdm_state = VDM_STATE_ERR_BUSY;
 				port->vdm_sm_running = false;
 				return;
 			}
@@ -3682,14 +3699,6 @@ static inline enum tcpm_state unattached_state(struct tcpm_port *port)
 	return SNK_UNATTACHED;
 }
 
-static void tcpm_check_send_discover(struct tcpm_port *port)
-{
-	if ((port->data_role == TYPEC_HOST || port->negotiated_rev > PD_REV20) &&
-	    port->send_discover && port->pd_capable)
-		tcpm_send_vdm(port, USB_SID_PD, CMD_DISCOVER_IDENT, NULL, 0);
-	port->send_discover = false;
-}
-
 static void tcpm_swap_complete(struct tcpm_port *port, int result)
 {
 	if (port->swap_pending) {
@@ -3926,7 +3935,18 @@ static void run_state_machine(struct tcpm_port *port)
 			break;
 		}
 
-		tcpm_check_send_discover(port);
+		/*
+		 * 6.4.4.3.1 Discover Identity
+		 * "The Discover Identity Command Shall only be sent to SOP when there is an
+		 * Explicit Contract."
+		 * For now, this driver only supports SOP for DISCOVER_IDENTITY, thus using
+		 * port->explicit_contract to decide whether to send the command.
+		 */
+		if (port->explicit_contract)
+			mod_send_discover_delayed_work(port, 0);
+		else
+			port->send_discover = false;
+
 		/*
 		 * 6.3.5
 		 * Sending ping messages is not necessary if
@@ -4194,7 +4214,18 @@ static void run_state_machine(struct tcpm_port *port)
 			break;
 		}
 
-		tcpm_check_send_discover(port);
+		/*
+		 * 6.4.4.3.1 Discover Identity
+		 * "The Discover Identity Command Shall only be sent to SOP when there is an
+		 * Explicit Contract."
+		 * For now, this driver only supports SOP for DISCOVER_IDENTITY, thus using
+		 * port->explicit_contract.
+		 */
+		if (port->explicit_contract)
+			mod_send_discover_delayed_work(port, 0);
+		else
+			port->send_discover = false;
+
 		power_supply_changed(port->psy);
 		break;
 
@@ -5288,6 +5319,29 @@ unlock:
 	mutex_unlock(&port->lock);
 }
 
+static void tcpm_send_discover_work(struct kthread_work *work)
+{
+	struct tcpm_port *port = container_of(work, struct tcpm_port, send_discover_work);
+
+	mutex_lock(&port->lock);
+	/* No need to send DISCOVER_IDENTITY anymore */
+	if (!port->send_discover)
+		goto unlock;
+
+	/* Retry if the port is not idle */
+	if ((port->state != SRC_READY && port->state != SNK_READY) || port->vdm_sm_running) {
+		mod_send_discover_delayed_work(port, SEND_DISCOVER_RETRY_MS);
+		goto unlock;
+	}
+
+	/* Only send the Message if the port is host for PD rev2.0 */
+	if (port->data_role == TYPEC_HOST || port->negotiated_rev > PD_REV20)
+		tcpm_send_vdm(port, USB_SID_PD, CMD_DISCOVER_IDENT, NULL, 0);
+
+unlock:
+	mutex_unlock(&port->lock);
+}
+
 static int tcpm_dr_set(struct typec_port *p, enum typec_data_role data)
 {
 	struct tcpm_port *port = typec_get_drvdata(p);
@@ -6093,6 +6147,14 @@ static enum hrtimer_restart enable_frs_timer_handler(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
+static enum hrtimer_restart send_discover_timer_handler(struct hrtimer *timer)
+{
+	struct tcpm_port *port = container_of(timer, struct tcpm_port, send_discover_timer);
+
+	kthread_queue_work(port->wq, &port->send_discover_work);
+	return HRTIMER_NORESTART;
+}
+
 struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc)
 {
 	struct tcpm_port *port;
@@ -6123,12 +6185,15 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc)
 	kthread_init_work(&port->vdm_state_machine, vdm_state_machine_work);
 	kthread_init_work(&port->event_work, tcpm_pd_event_handler);
 	kthread_init_work(&port->enable_frs, tcpm_enable_frs_work);
+	kthread_init_work(&port->send_discover_work, tcpm_send_discover_work);
 	hrtimer_init(&port->state_machine_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	port->state_machine_timer.function = state_machine_timer_handler;
 	hrtimer_init(&port->vdm_state_machine_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	port->vdm_state_machine_timer.function = vdm_state_machine_timer_handler;
 	hrtimer_init(&port->enable_frs_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	port->enable_frs_timer.function = enable_frs_timer_handler;
+	hrtimer_init(&port->send_discover_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	port->send_discover_timer.function = send_discover_timer_handler;
 
 	spin_lock_init(&port->pd_event_lock);
 
-- 
cgit v1.2.3


From f1fbd950b59b67bc5c202216c8e1c6ca8c99a3b4 Mon Sep 17 00:00:00 2001
From: Kyle Tso <kyletso@google.com>
Date: Fri, 7 May 2021 14:23:00 +0800
Subject: usb: typec: tcpm: Fix wrong handling for Not_Supported in VDM AMS

Not_Supported Message is acceptable in VDM AMS. Redirect the VDM state
machine to VDM_STATE_DONE when receiving Not_Supported and finish the
VDM AMS.

Also, after the loop in vdm_state_machine_work, add more conditions of
VDM states to clear the vdm_sm_running flag because those are all
stopping states when leaving the loop.

In addition, finish the VDM AMS if the port partner responds BUSY.

Fixes: 8dea75e11380 ("usb: typec: tcpm: Protocol Error handling")
Fixes: 8d3a0578ad1a ("usb: typec: tcpm: Respond Wait if VDM state machine is running")
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Kyle Tso <kyletso@google.com>
Link: https://lore.kernel.org/r/20210507062300.1945009-3-kyletso@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpm.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index ae1e84252d38..db567e6fde92 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -1897,7 +1897,6 @@ static void vdm_run_state_machine(struct tcpm_port *port)
 
 			if (res < 0) {
 				port->vdm_state = VDM_STATE_ERR_BUSY;
-				port->vdm_sm_running = false;
 				return;
 			}
 		}
@@ -1913,6 +1912,7 @@ static void vdm_run_state_machine(struct tcpm_port *port)
 		port->vdo_data[0] = port->vdo_retry;
 		port->vdo_count = 1;
 		port->vdm_state = VDM_STATE_READY;
+		tcpm_ams_finish(port);
 		break;
 	case VDM_STATE_BUSY:
 		port->vdm_state = VDM_STATE_ERR_TMOUT;
@@ -1978,7 +1978,7 @@ static void vdm_state_machine_work(struct kthread_work *work)
 		 port->vdm_state != VDM_STATE_BUSY &&
 		 port->vdm_state != VDM_STATE_SEND_MESSAGE);
 
-	if (port->vdm_state == VDM_STATE_ERR_TMOUT)
+	if (port->vdm_state < VDM_STATE_READY)
 		port->vdm_sm_running = false;
 
 	mutex_unlock(&port->lock);
@@ -2569,6 +2569,16 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port,
 			port->sink_cap_done = true;
 			tcpm_set_state(port, ready_state(port), 0);
 			break;
+		case SRC_READY:
+		case SNK_READY:
+			if (port->vdm_state > VDM_STATE_READY) {
+				port->vdm_state = VDM_STATE_DONE;
+				if (tcpm_vdm_ams(port))
+					tcpm_ams_finish(port);
+				mod_vdm_delayed_work(port, 0);
+				break;
+			}
+			fallthrough;
 		default:
 			tcpm_pd_handle_state(port,
 					     port->pwr_role == TYPEC_SOURCE ?
-- 
cgit v1.2.3


From d9ff1096a840dddea3d5cfa2149ff7da9f499fb2 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Tue, 4 May 2021 22:26:29 +0200
Subject: usb: musb: Fix an error message

'ret' is known to be 0 here.
Initialize 'ret' with the expected error code before using it.

Fixes: 0990366bab3c ("usb: musb: Add support for MediaTek musb controller")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/69f514dc7134e3c917cad208e73cc650cb9e2bd6.1620159879.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/mediatek.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/musb/mediatek.c b/drivers/usb/musb/mediatek.c
index eebeadd26946..6b92d037d8fc 100644
--- a/drivers/usb/musb/mediatek.c
+++ b/drivers/usb/musb/mediatek.c
@@ -518,8 +518,8 @@ static int mtk_musb_probe(struct platform_device *pdev)
 
 	glue->xceiv = devm_usb_get_phy(dev, USB_PHY_TYPE_USB2);
 	if (IS_ERR(glue->xceiv)) {
-		dev_err(dev, "fail to getting usb-phy %d\n", ret);
 		ret = PTR_ERR(glue->xceiv);
+		dev_err(dev, "fail to getting usb-phy %d\n", ret);
 		goto err_unregister_usb_phy;
 	}
 
-- 
cgit v1.2.3


From 28ec344bb8911bb0d4910456b22ba0dd4f662521 Mon Sep 17 00:00:00 2001
From: Saravana Kannan <saravanak@google.com>
Date: Wed, 5 May 2021 17:44:22 -0700
Subject: usb: typec: tcpm: Don't block probing of consumers of "connector"
 nodes

fw_devlink expects DT device nodes with "compatible" property to have
struct devices created for them. Since the connector node might not be
populated as a device, mark it as such so that fw_devlink knows not to
wait on this fwnode being populated as a struct device.

Without this patch, USB functionality can be broken on some boards.

Fixes: f7514a663016 ("of: property: fw_devlink: Add support for remote-endpoint")
Reported-by: John Stultz <john.stultz@linaro.org>
Tested-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Saravana Kannan <saravanak@google.com>
Link: https://lore.kernel.org/r/20210506004423.345199-1-saravanak@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c           | 3 ++-
 drivers/usb/typec/tcpm/tcpm.c | 9 +++++++++
 include/linux/fwnode.h        | 1 +
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 4a8bf8cda52b..628e33939aca 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -150,7 +150,7 @@ void fwnode_links_purge(struct fwnode_handle *fwnode)
 	fwnode_links_purge_consumers(fwnode);
 }
 
-static void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode)
+void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode)
 {
 	struct fwnode_handle *child;
 
@@ -164,6 +164,7 @@ static void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode)
 	fwnode_for_each_available_child_node(fwnode, child)
 		fw_devlink_purge_absent_suppliers(child);
 }
+EXPORT_SYMBOL_GPL(fw_devlink_purge_absent_suppliers);
 
 #ifdef CONFIG_SRCU
 static DEFINE_MUTEX(device_links_lock);
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index c4fdc00a3bc8..bffa342d4e38 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -5754,6 +5754,15 @@ static int tcpm_fw_get_caps(struct tcpm_port *port,
 	if (!fwnode)
 		return -EINVAL;
 
+	/*
+	 * This fwnode has a "compatible" property, but is never populated as a
+	 * struct device. Instead we simply parse it to read the properties.
+	 * This it breaks fw_devlink=on. To maintain backward compatibility
+	 * with existing DT files, we work around this by deleting any
+	 * fwnode_links to/from this fwnode.
+	 */
+	fw_devlink_purge_absent_suppliers(fwnode);
+
 	/* USB data support is optional */
 	ret = fwnode_property_read_string(fwnode, "data-role", &cap_str);
 	if (ret == 0) {
diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index ed4e67a7ff1c..59828516ebaf 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h
@@ -187,5 +187,6 @@ extern u32 fw_devlink_get_flags(void);
 extern bool fw_devlink_is_strict(void);
 int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup);
 void fwnode_links_purge(struct fwnode_handle *fwnode);
+void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode);
 
 #endif
-- 
cgit v1.2.3


From 8c721cb0f742f9a01f2f1985b274b544f89904f4 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Wed, 28 Apr 2021 10:44:19 +0200
Subject: quota: Use 'hlist_for_each_entry' to simplify code

Use 'hlist_for_each_entry' instead of hand writing it.
This saves a few lines of code.

Link: https://lore.kernel.org/r/f82d3e33964dcbd2aac19866735e0a8381c8a735.1619599407.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 4f1373463766..22d904bde6ab 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -288,14 +288,12 @@ static inline void remove_dquot_hash(struct dquot *dquot)
 static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb,
 				struct kqid qid)
 {
-	struct hlist_node *node;
 	struct dquot *dquot;
 
-	hlist_for_each (node, dquot_hash+hashent) {
-		dquot = hlist_entry(node, struct dquot, dq_hash);
+	hlist_for_each_entry(dquot, dquot_hash+hashent, dq_hash)
 		if (dquot->dq_sb == sb && qid_eq(dquot->dq_id, qid))
 			return dquot;
-	}
+
 	return NULL;
 }
 
-- 
cgit v1.2.3


From 8370e5b093080c03cf89f7ebf0bef6984545429e Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andy.shevchenko@gmail.com>
Date: Mon, 10 May 2021 13:01:36 +0300
Subject: hwmon: (ltc2992) Put fwnode in error case during ->probe()

In each iteration fwnode_for_each_available_child_node() bumps a reference
counting of a loop variable followed by dropping in on a next iteration,

Since in error case the loop is broken, we have to drop a reference count
by ourselves. Do it for port_fwnode in error case during ->probe().

Fixes: b0bd407e94b0 ("hwmon: (ltc2992) Add support")
Cc: Alexandru Tachici <alexandru.tachici@analog.com>
Signed-off-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20210510100136.3303142-1-andy.shevchenko@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/ltc2992.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/hwmon/ltc2992.c b/drivers/hwmon/ltc2992.c
index 4382105bf142..2a4bed0ab226 100644
--- a/drivers/hwmon/ltc2992.c
+++ b/drivers/hwmon/ltc2992.c
@@ -900,11 +900,15 @@ static int ltc2992_parse_dt(struct ltc2992_state *st)
 
 	fwnode_for_each_available_child_node(fwnode, child) {
 		ret = fwnode_property_read_u32(child, "reg", &addr);
-		if (ret < 0)
+		if (ret < 0) {
+			fwnode_handle_put(child);
 			return ret;
+		}
 
-		if (addr > 1)
+		if (addr > 1) {
+			fwnode_handle_put(child);
 			return -EINVAL;
+		}
 
 		ret = fwnode_property_read_u32(child, "shunt-resistor-micro-ohms", &val);
 		if (!ret)
-- 
cgit v1.2.3


From e84749a78dc82bc545f12ce009e3dbcc2c5a8a91 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 10 May 2021 17:06:59 +0200
Subject: ALSA: usb-audio: Validate MS endpoint descriptors

snd_usbmidi_get_ms_info() may access beyond the border when a
malformed descriptor is passed.  This patch adds the sanity checks of
the given MS endpoint descriptors, and skips invalid ones.

Reported-by: syzbot+6bb23a5d5548b93c94aa@syzkaller.appspotmail.com
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210510150659.17710-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/midi.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sound/usb/midi.c b/sound/usb/midi.c
index a10ac75969a8..649eb8d1ab7d 100644
--- a/sound/usb/midi.c
+++ b/sound/usb/midi.c
@@ -1956,8 +1956,12 @@ static int snd_usbmidi_get_ms_info(struct snd_usb_midi *umidi,
 		ms_ep = find_usb_ms_endpoint_descriptor(hostep);
 		if (!ms_ep)
 			continue;
+		if (ms_ep->bLength <= sizeof(*ms_ep))
+			continue;
 		if (ms_ep->bNumEmbMIDIJack > 0x10)
 			continue;
+		if (ms_ep->bLength < sizeof(*ms_ep) + ms_ep->bNumEmbMIDIJack)
+			continue;
 		if (usb_endpoint_dir_out(ep)) {
 			if (endpoints[epidx].out_ep) {
 				if (++epidx >= MIDI_MAX_ENDPOINTS) {
-- 
cgit v1.2.3


From 7ee06ddc4038f936b0d4459d37a7d4d844fb03db Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 7 May 2021 11:38:10 -0400
Subject: dm snapshot: fix a crash when an origin has no snapshots

If an origin target has no snapshots, o->split_boundary is set to 0.
This causes BUG_ON(sectors <= 0) in block/bio.c:bio_split().

Fix this by initializing chunk_size, and in turn split_boundary, to
rounddown_pow_of_two(UINT_MAX) -- the largest power of two that fits
into "unsigned" type.

Reported-by: Michael Tokarev <mjt@tls.msk.ru>
Tested-by: Michael Tokarev <mjt@tls.msk.ru>
Cc: stable@vger.kernel.org
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-snap.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index a2acb014c13a..2a51ddd840b4 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -855,12 +855,11 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new)
 static uint32_t __minimum_chunk_size(struct origin *o)
 {
 	struct dm_snapshot *snap;
-	unsigned chunk_size = 0;
+	unsigned chunk_size = rounddown_pow_of_two(UINT_MAX);
 
 	if (o)
 		list_for_each_entry(snap, &o->snapshots, list)
-			chunk_size = min_not_zero(chunk_size,
-						  snap->store->chunk_size);
+			chunk_size = min(chunk_size, snap->store->chunk_size);
 
 	return (uint32_t) chunk_size;
 }
-- 
cgit v1.2.3


From 63c8af5687f6b1b70e9458cac1ffb25e86db1695 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Mon, 10 May 2021 08:48:06 +0900
Subject: block: uapi: fix comment about block device ioctl

Fix the comment mentioning ioctl command range used for zoned block
devices to reflect the range of commands actually implemented.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Link: https://lore.kernel.org/r/20210509234806.3000-1-damien.lemoal@wdc.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index f44eb0a04afd..4c32e97dcdf0 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -185,7 +185,7 @@ struct fsxattr {
 #define BLKROTATIONAL _IO(0x12,126)
 #define BLKZEROOUT _IO(0x12,127)
 /*
- * A jump here: 130-131 are reserved for zoned block devices
+ * A jump here: 130-136 are reserved for zoned block devices
  * (see uapi/linux/blkzoned.h)
  */
 
-- 
cgit v1.2.3


From 56a8d3fd1f342d10ee7b27e9ac0f4d00b5fbb91c Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Tue, 13 Apr 2021 18:18:34 +0200
Subject: mtd: rawnand: cs553x: Fix external use of SW Hamming ECC helper

Since the Hamming software ECC engine has been updated to become a
proper and independent ECC engine, it is now mandatory to either
initialize the engine before using any one of his functions or use one
of the bare helpers which only perform the calculations. As there is no
actual need for a proper ECC initialization, let's just use the bare
helper instead of the rawnand one.

Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions")
Cc: stable@vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-2-miquel.raynal@bootlin.com
---
 drivers/mtd/nand/raw/cs553x_nand.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/raw/cs553x_nand.c b/drivers/mtd/nand/raw/cs553x_nand.c
index 6edf78c16fc8..df40927e5678 100644
--- a/drivers/mtd/nand/raw/cs553x_nand.c
+++ b/drivers/mtd/nand/raw/cs553x_nand.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/mtd/mtd.h>
+#include <linux/mtd/nand-ecc-sw-hamming.h>
 #include <linux/mtd/rawnand.h>
 #include <linux/mtd/partitions.h>
 #include <linux/iopoll.h>
@@ -240,6 +241,15 @@ static int cs_calculate_ecc(struct nand_chip *this, const u_char *dat,
 	return 0;
 }
 
+static int cs553x_ecc_correct(struct nand_chip *chip,
+			      unsigned char *buf,
+			      unsigned char *read_ecc,
+			      unsigned char *calc_ecc)
+{
+	return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc,
+				      chip->ecc.size, false);
+}
+
 static struct cs553x_nand_controller *controllers[4];
 
 static int cs553x_attach_chip(struct nand_chip *chip)
@@ -251,7 +261,7 @@ static int cs553x_attach_chip(struct nand_chip *chip)
 	chip->ecc.bytes = 3;
 	chip->ecc.hwctl  = cs_enable_hwecc;
 	chip->ecc.calculate = cs_calculate_ecc;
-	chip->ecc.correct  = rawnand_sw_hamming_correct;
+	chip->ecc.correct  = cs553x_ecc_correct;
 	chip->ecc.strength = 1;
 
 	return 0;
-- 
cgit v1.2.3


From ad9ffdce453934cdc22fac0a0268119bd630260f Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Tue, 13 Apr 2021 18:18:35 +0200
Subject: mtd: rawnand: fsmc: Fix external use of SW Hamming ECC helper

Since the Hamming software ECC engine has been updated to become a
proper and independent ECC engine, it is now mandatory to either
initialize the engine before using any one of his functions or use one
of the bare helpers which only perform the calculations. As there is no
actual need for a proper ECC initialization, let's just use the bare
helper instead of the rawnand one.

Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions")
Cc: stable@vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-3-miquel.raynal@bootlin.com
---
 drivers/mtd/nand/raw/fsmc_nand.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c
index bf695255b43a..a3e66155ae40 100644
--- a/drivers/mtd/nand/raw/fsmc_nand.c
+++ b/drivers/mtd/nand/raw/fsmc_nand.c
@@ -25,6 +25,7 @@
 #include <linux/sched.h>
 #include <linux/types.h>
 #include <linux/mtd/mtd.h>
+#include <linux/mtd/nand-ecc-sw-hamming.h>
 #include <linux/mtd/rawnand.h>
 #include <linux/platform_device.h>
 #include <linux/of.h>
@@ -432,6 +433,15 @@ static int fsmc_read_hwecc_ecc1(struct nand_chip *chip, const u8 *data,
 	return 0;
 }
 
+static int fsmc_correct_ecc1(struct nand_chip *chip,
+			     unsigned char *buf,
+			     unsigned char *read_ecc,
+			     unsigned char *calc_ecc)
+{
+	return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc,
+				      chip->ecc.size, false);
+}
+
 /* Count the number of 0's in buff upto a max of max_bits */
 static int count_written_bits(u8 *buff, int size, int max_bits)
 {
@@ -917,7 +927,7 @@ static int fsmc_nand_attach_chip(struct nand_chip *nand)
 	case NAND_ECC_ENGINE_TYPE_ON_HOST:
 		dev_info(host->dev, "Using 1-bit HW ECC scheme\n");
 		nand->ecc.calculate = fsmc_read_hwecc_ecc1;
-		nand->ecc.correct = rawnand_sw_hamming_correct;
+		nand->ecc.correct = fsmc_correct_ecc1;
 		nand->ecc.hwctl = fsmc_enable_hwecc;
 		nand->ecc.bytes = 3;
 		nand->ecc.strength = 1;
-- 
cgit v1.2.3


From c4b7d7c480d607e4f52d310d9d16b194868d0917 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Tue, 13 Apr 2021 18:18:36 +0200
Subject: mtd: rawnand: lpc32xx_slc: Fix external use of SW Hamming ECC helper

Since the Hamming software ECC engine has been updated to become a
proper and independent ECC engine, it is now mandatory to either
initialize the engine before using any one of his functions or use one
of the bare helpers which only perform the calculations. As there is no
actual need for a proper ECC initialization, let's just use the bare
helper instead of the rawnand one.

Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions")
Cc: stable@vger.kernel.org
Cc: Vladimir Zapolskiy <vz@mleia.com>
Reported-by: Trevor Woerner <twoerner@gmail.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Tested-by: Trevor Woerner <twoerner@gmail.com>
Acked-by: Vladimir Zapolskiy <vz@mleia.com>
Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-4-miquel.raynal@bootlin.com
---
 drivers/mtd/nand/raw/lpc32xx_slc.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c
index 6b7269cfb7d8..d7dfc6fd85ca 100644
--- a/drivers/mtd/nand/raw/lpc32xx_slc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_slc.c
@@ -27,6 +27,7 @@
 #include <linux/of.h>
 #include <linux/of_gpio.h>
 #include <linux/mtd/lpc32xx_slc.h>
+#include <linux/mtd/nand-ecc-sw-hamming.h>
 
 #define LPC32XX_MODNAME		"lpc32xx-nand"
 
@@ -344,6 +345,18 @@ static int lpc32xx_nand_ecc_calculate(struct nand_chip *chip,
 	return 0;
 }
 
+/*
+ * Corrects the data
+ */
+static int lpc32xx_nand_ecc_correct(struct nand_chip *chip,
+				    unsigned char *buf,
+				    unsigned char *read_ecc,
+				    unsigned char *calc_ecc)
+{
+	return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc,
+				      chip->ecc.size, false);
+}
+
 /*
  * Read a single byte from NAND device
  */
@@ -802,7 +815,7 @@ static int lpc32xx_nand_attach_chip(struct nand_chip *chip)
 	chip->ecc.write_oob = lpc32xx_nand_write_oob_syndrome;
 	chip->ecc.read_oob = lpc32xx_nand_read_oob_syndrome;
 	chip->ecc.calculate = lpc32xx_nand_ecc_calculate;
-	chip->ecc.correct = rawnand_sw_hamming_correct;
+	chip->ecc.correct = lpc32xx_nand_ecc_correct;
 	chip->ecc.hwctl = lpc32xx_nand_ecc_enable;
 
 	/*
-- 
cgit v1.2.3


From 3e09c0252501829b14b10f14e1982aaab77d0b80 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Tue, 13 Apr 2021 18:18:37 +0200
Subject: mtd: rawnand: ndfc: Fix external use of SW Hamming ECC helper

Since the Hamming software ECC engine has been updated to become a
proper and independent ECC engine, it is now mandatory to either
initialize the engine before using any one of his functions or use one
of the bare helpers which only perform the calculations. As there is no
actual need for a proper ECC initialization, let's just use the bare
helper instead of the rawnand one.

Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions")
Cc: stable@vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-5-miquel.raynal@bootlin.com
---
 drivers/mtd/nand/raw/ndfc.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/raw/ndfc.c b/drivers/mtd/nand/raw/ndfc.c
index 338d6b1a189e..98d5a94c3a24 100644
--- a/drivers/mtd/nand/raw/ndfc.c
+++ b/drivers/mtd/nand/raw/ndfc.c
@@ -22,6 +22,7 @@
 #include <linux/mtd/ndfc.h>
 #include <linux/slab.h>
 #include <linux/mtd/mtd.h>
+#include <linux/mtd/nand-ecc-sw-hamming.h>
 #include <linux/of_address.h>
 #include <linux/of_platform.h>
 #include <asm/io.h>
@@ -100,6 +101,15 @@ static int ndfc_calculate_ecc(struct nand_chip *chip,
 	return 0;
 }
 
+static int ndfc_correct_ecc(struct nand_chip *chip,
+			    unsigned char *buf,
+			    unsigned char *read_ecc,
+			    unsigned char *calc_ecc)
+{
+	return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc,
+				      chip->ecc.size, false);
+}
+
 /*
  * Speedups for buffer read/write/verify
  *
@@ -145,7 +155,7 @@ static int ndfc_chip_init(struct ndfc_controller *ndfc,
 	chip->controller = &ndfc->ndfc_control;
 	chip->legacy.read_buf = ndfc_read_buf;
 	chip->legacy.write_buf = ndfc_write_buf;
-	chip->ecc.correct = rawnand_sw_hamming_correct;
+	chip->ecc.correct = ndfc_correct_ecc;
 	chip->ecc.hwctl = ndfc_enable_hwecc;
 	chip->ecc.calculate = ndfc_calculate_ecc;
 	chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
-- 
cgit v1.2.3


From 46fcb57e6b7283533ebf8ba17a6bd30fa88bdc9f Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Tue, 13 Apr 2021 18:18:38 +0200
Subject: mtd: rawnand: sharpsl: Fix external use of SW Hamming ECC helper

Since the Hamming software ECC engine has been updated to become a
proper and independent ECC engine, it is now mandatory to either
initialize the engine before using any one of his functions or use one
of the bare helpers which only perform the calculations. As there is no
actual need for a proper ECC initialization, let's just use the bare
helper instead of the rawnand one.

Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions")
Cc: stable@vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-6-miquel.raynal@bootlin.com
---
 drivers/mtd/nand/raw/sharpsl.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/raw/sharpsl.c b/drivers/mtd/nand/raw/sharpsl.c
index 5612ee628425..2f1fe464e663 100644
--- a/drivers/mtd/nand/raw/sharpsl.c
+++ b/drivers/mtd/nand/raw/sharpsl.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/mtd/mtd.h>
+#include <linux/mtd/nand-ecc-sw-hamming.h>
 #include <linux/mtd/rawnand.h>
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/sharpsl.h>
@@ -96,6 +97,15 @@ static int sharpsl_nand_calculate_ecc(struct nand_chip *chip,
 	return readb(sharpsl->io + ECCCNTR) != 0;
 }
 
+static int sharpsl_nand_correct_ecc(struct nand_chip *chip,
+				    unsigned char *buf,
+				    unsigned char *read_ecc,
+				    unsigned char *calc_ecc)
+{
+	return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc,
+				      chip->ecc.size, false);
+}
+
 static int sharpsl_attach_chip(struct nand_chip *chip)
 {
 	if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST)
@@ -106,7 +116,7 @@ static int sharpsl_attach_chip(struct nand_chip *chip)
 	chip->ecc.strength = 1;
 	chip->ecc.hwctl = sharpsl_nand_enable_hwecc;
 	chip->ecc.calculate = sharpsl_nand_calculate_ecc;
-	chip->ecc.correct = rawnand_sw_hamming_correct;
+	chip->ecc.correct = sharpsl_nand_correct_ecc;
 
 	return 0;
 }
-- 
cgit v1.2.3


From 6a4c5ada577467a5f79e06f2c5e69c09983c22fb Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Tue, 13 Apr 2021 18:18:39 +0200
Subject: mtd: rawnand: tmio: Fix external use of SW Hamming ECC helper

Since the Hamming software ECC engine has been updated to become a
proper and independent ECC engine, it is now mandatory to either
initialize the engine before using any one of his functions or use one
of the bare helpers which only perform the calculations. As there is no
actual need for a proper ECC initialization, let's just use the bare
helper instead of the rawnand one.

Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions")
Cc: stable@vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-7-miquel.raynal@bootlin.com
---
 drivers/mtd/nand/raw/tmio_nand.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/mtd/nand/raw/tmio_nand.c b/drivers/mtd/nand/raw/tmio_nand.c
index de8e919d0ebe..6d93dd31969b 100644
--- a/drivers/mtd/nand/raw/tmio_nand.c
+++ b/drivers/mtd/nand/raw/tmio_nand.c
@@ -34,6 +34,7 @@
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
 #include <linux/mtd/mtd.h>
+#include <linux/mtd/nand-ecc-sw-hamming.h>
 #include <linux/mtd/rawnand.h>
 #include <linux/mtd/partitions.h>
 #include <linux/slab.h>
@@ -292,11 +293,12 @@ static int tmio_nand_correct_data(struct nand_chip *chip, unsigned char *buf,
 	int r0, r1;
 
 	/* assume ecc.size = 512 and ecc.bytes = 6 */
-	r0 = rawnand_sw_hamming_correct(chip, buf, read_ecc, calc_ecc);
+	r0 = ecc_sw_hamming_correct(buf, read_ecc, calc_ecc,
+				    chip->ecc.size, false);
 	if (r0 < 0)
 		return r0;
-	r1 = rawnand_sw_hamming_correct(chip, buf + 256, read_ecc + 3,
-					calc_ecc + 3);
+	r1 = ecc_sw_hamming_correct(buf + 256, read_ecc + 3, calc_ecc + 3,
+				    chip->ecc.size, false);
 	if (r1 < 0)
 		return r1;
 	return r0 + r1;
-- 
cgit v1.2.3


From 3d227a0b0ce319edbff6fd0d8af4d66689e477cc Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Tue, 13 Apr 2021 18:18:40 +0200
Subject: mtd: rawnand: txx9ndfmc: Fix external use of SW Hamming ECC helper

Since the Hamming software ECC engine has been updated to become a
proper and independent ECC engine, it is now mandatory to either
initialize the engine before using any one of his functions or use one
of the bare helpers which only perform the calculations. As there is no
actual need for a proper ECC initialization, let's just use the bare
helper instead of the rawnand one.

Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions")
Cc: stable@vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-8-miquel.raynal@bootlin.com
---
 drivers/mtd/nand/raw/txx9ndfmc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/nand/raw/txx9ndfmc.c b/drivers/mtd/nand/raw/txx9ndfmc.c
index 1a9449e53bf9..b8894ac27073 100644
--- a/drivers/mtd/nand/raw/txx9ndfmc.c
+++ b/drivers/mtd/nand/raw/txx9ndfmc.c
@@ -13,6 +13,7 @@
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/mtd/mtd.h>
+#include <linux/mtd/nand-ecc-sw-hamming.h>
 #include <linux/mtd/rawnand.h>
 #include <linux/mtd/partitions.h>
 #include <linux/io.h>
@@ -193,8 +194,8 @@ static int txx9ndfmc_correct_data(struct nand_chip *chip, unsigned char *buf,
 	int stat;
 
 	for (eccsize = chip->ecc.size; eccsize > 0; eccsize -= 256) {
-		stat = rawnand_sw_hamming_correct(chip, buf, read_ecc,
-						  calc_ecc);
+		stat = ecc_sw_hamming_correct(buf, read_ecc, calc_ecc,
+					      chip->ecc.size, false);
 		if (stat < 0)
 			return stat;
 		corrected += stat;
-- 
cgit v1.2.3


From 562b4e91d3b221f737f84ff78ee7d348c8a6891f Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <rafal@milecki.pl>
Date: Sat, 8 May 2021 19:32:14 +0200
Subject: mtd: parsers: ofpart: fix parsing subpartitions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ofpart was recently patched to not scan random partition nodes as
subpartitions. That change unfortunately broke scanning valid
subpartitions like:

partitions {
	compatible = "fixed-partitions";
	#address-cells = <1>;
	#size-cells = <1>;

	partition@0 {
		compatible = "fixed-partitions";
		label = "bootloader";
		reg = <0x0 0x100000>;

		partition@0 {
			label = "config";
			reg = <0x80000 0x80000>;
		};
	};
};

Fix that regression by adding 1 more code path. We actually need 3
conditional blocks to support 3 possible cases. This change also makes
code easier to understand & follow.

Reported-by: David Bauer <mail@david-bauer.net>
Fixes: 2d751203aacf ("mtd: parsers: ofpart: limit parsing of deprecated DT syntax
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Tested-by: Andrew Cameron <apcameron@softhome.net>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20210508173214.28365-1-zajec5@gmail.com
---
 drivers/mtd/parsers/ofpart_core.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/drivers/mtd/parsers/ofpart_core.c b/drivers/mtd/parsers/ofpart_core.c
index 0fd8d2a0db97..192190c42fc8 100644
--- a/drivers/mtd/parsers/ofpart_core.c
+++ b/drivers/mtd/parsers/ofpart_core.c
@@ -57,20 +57,22 @@ static int parse_fixed_partitions(struct mtd_info *master,
 	if (!mtd_node)
 		return 0;
 
-	ofpart_node = of_get_child_by_name(mtd_node, "partitions");
-	if (!ofpart_node && !master->parent) {
-		/*
-		 * We might get here even when ofpart isn't used at all (e.g.,
-		 * when using another parser), so don't be louder than
-		 * KERN_DEBUG
-		 */
-		pr_debug("%s: 'partitions' subnode not found on %pOF. Trying to parse direct subnodes as partitions.\n",
-			 master->name, mtd_node);
+	if (!master->parent) { /* Master */
+		ofpart_node = of_get_child_by_name(mtd_node, "partitions");
+		if (!ofpart_node) {
+			/*
+			 * We might get here even when ofpart isn't used at all (e.g.,
+			 * when using another parser), so don't be louder than
+			 * KERN_DEBUG
+			 */
+			pr_debug("%s: 'partitions' subnode not found on %pOF. Trying to parse direct subnodes as partitions.\n",
+				master->name, mtd_node);
+			ofpart_node = mtd_node;
+			dedicated = false;
+		}
+	} else { /* Partition */
 		ofpart_node = mtd_node;
-		dedicated = false;
 	}
-	if (!ofpart_node)
-		return 0;
 
 	of_id = of_match_node(parse_ofpart_match_table, ofpart_node);
 	if (dedicated && !of_id) {
-- 
cgit v1.2.3


From 5311221304fa60e357aada75efdf2f2da8c30a57 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Thu, 6 May 2021 19:49:39 +0800
Subject: dt-bindings: phy: cadence-torrent: update reference file of docs

In commit fd7abc3c5b87 ("phy: cadence-torrent: Use a common header
file for Cadence SERDES"), phy-cadence-torrent.h was renamed to
phy-cadence.h. Fix it of the Documentation.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Link: https://lore.kernel.org/r/20210506114940.22215-1-wanjiabing@vivo.com
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml b/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml
index 01dcd14e7b2a..320a232c7208 100644
--- a/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml
+++ b/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml
@@ -118,7 +118,7 @@ patternProperties:
         description:
           Specifies the Spread Spectrum Clocking mode used. It can be NO_SSC,
           EXTERNAL_SSC or INTERNAL_SSC.
-          Refer include/dt-bindings/phy/phy-cadence-torrent.h for the constants to be used.
+          Refer include/dt-bindings/phy/phy-cadence.h for the constants to be used.
         $ref: /schemas/types.yaml#/definitions/uint32
         enum: [0, 1, 2]
         default: 0
-- 
cgit v1.2.3


From 0c8bd174f0fc131bc9dfab35cd8784f59045da87 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 8 May 2021 09:23:09 +0200
Subject: ACPI: scan: Fix a memory leak in an error handling path

If 'acpi_device_set_name()' fails, we must free
'acpi_device_bus_id->bus_id' or there is a (potential) memory leak.

Fixes: eb50aaf960e3 ("ACPI: scan: Use unique number for instance_no")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/scan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index a22778e880c2..651a431e2bbf 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -700,6 +700,7 @@ int acpi_device_add(struct acpi_device *device,
 
 		result = acpi_device_set_name(device, acpi_device_bus_id);
 		if (result) {
+			kfree_const(acpi_device_bus_id->bus_id);
 			kfree(acpi_device_bus_id);
 			goto err_unlock;
 		}
-- 
cgit v1.2.3


From a568814a55a0e82bbc7c7b51333d0c38e8fb5520 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Sun, 9 May 2021 14:39:21 +0300
Subject: RDMA/siw: Properly check send and receive CQ pointers

The check for the NULL of pointer received from container_of() is
incorrect by definition as it points to some offset from NULL.

Change such check with proper NULL check of SIW QP attributes.

Fixes: 303ae1cdfdf7 ("rdma/siw: application interface")
Link: https://lore.kernel.org/r/a7535a82925f6f4c1f062abaa294f3ae6e54bdd2.1620560310.git.leonro@nvidia.com
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Bernard Metzler <bmt@zurich.ibm.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/siw/siw_verbs.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index d2313efb26db..917c8a919f38 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -300,7 +300,6 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
 	struct siw_ucontext *uctx =
 		rdma_udata_to_drv_context(udata, struct siw_ucontext,
 					  base_ucontext);
-	struct siw_cq *scq = NULL, *rcq = NULL;
 	unsigned long flags;
 	int num_sqe, num_rqe, rv = 0;
 	size_t length;
@@ -343,10 +342,8 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
 		rv = -EINVAL;
 		goto err_out;
 	}
-	scq = to_siw_cq(attrs->send_cq);
-	rcq = to_siw_cq(attrs->recv_cq);
 
-	if (!scq || (!rcq && !attrs->srq)) {
+	if (!attrs->send_cq || (!attrs->recv_cq && !attrs->srq)) {
 		siw_dbg(base_dev, "send CQ or receive CQ invalid\n");
 		rv = -EINVAL;
 		goto err_out;
@@ -401,8 +398,8 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
 		}
 	}
 	qp->pd = pd;
-	qp->scq = scq;
-	qp->rcq = rcq;
+	qp->scq = to_siw_cq(attrs->send_cq);
+	qp->rcq = to_siw_cq(attrs->recv_cq);
 
 	if (attrs->srq) {
 		/*
-- 
cgit v1.2.3


From a3d83276d98886879b5bf7b30b7c29882754e4df Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Sun, 9 May 2021 14:41:38 +0300
Subject: RDMA/siw: Release xarray entry

The xarray entry is allocated in siw_qp_add(), but release was
missed in case zero-sized SQ was discovered.

Fixes: 661f385961f0 ("RDMA/siw: Fix handling of zero-sized Read and Receive Queues.")
Link: https://lore.kernel.org/r/f070b59d5a1114d5a4e830346755c2b3f141cde5.1620560472.git.leonro@nvidia.com
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Bernard Metzler <bmt@zurich.ibm.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/siw/siw_verbs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 917c8a919f38..3f175f220a22 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -375,7 +375,7 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
 	else {
 		/* Zero sized SQ is not supported */
 		rv = -EINVAL;
-		goto err_out;
+		goto err_out_xa;
 	}
 	if (num_rqe)
 		num_rqe = roundup_pow_of_two(num_rqe);
-- 
cgit v1.2.3


From 54d87913f147a983589923c7f651f97de9af5be1 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Mon, 10 May 2021 17:46:00 +0300
Subject: RDMA/core: Prevent divide-by-zero error triggered by the user

The user_entry_size is supplied by the user and later used as a
denominator to calculate number of entries. The zero supplied by the user
will trigger the following divide-by-zero error:

 divide error: 0000 [#1] SMP KASAN PTI
 CPU: 4 PID: 497 Comm: c_repro Not tainted 5.13.0-rc1+ #281
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
 RIP: 0010:ib_uverbs_handler_UVERBS_METHOD_QUERY_GID_TABLE+0x1b1/0x510
 Code: 87 59 03 00 00 e8 9f ab 1e ff 48 8d bd a8 00 00 00 e8 d3 70 41 ff 44 0f b7 b5 a8 00 00 00 e8 86 ab 1e ff 31 d2 4c 89 f0 31 ff <49> f7 f5 48 89 d6 48 89 54 24 10 48 89 04 24 e8 1b ad 1e ff 48 8b
 RSP: 0018:ffff88810416f828 EFLAGS: 00010246
 RAX: 0000000000000008 RBX: 1ffff1102082df09 RCX: ffffffff82183f3d
 RDX: 0000000000000000 RSI: ffff888105f2da00 RDI: 0000000000000000
 RBP: ffff88810416fa98 R08: 0000000000000001 R09: ffffed102082df5f
 R10: ffff88810416faf7 R11: ffffed102082df5e R12: 0000000000000000
 R13: 0000000000000000 R14: 0000000000000008 R15: ffff88810416faf0
 FS:  00007f5715efa740(0000) GS:ffff88811a700000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000000020000840 CR3: 000000010c2e0001 CR4: 0000000000370ea0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
 Call Trace:
  ? ib_uverbs_handler_UVERBS_METHOD_INFO_HANDLES+0x4b0/0x4b0
  ib_uverbs_cmd_verbs+0x1546/0x1940
  ib_uverbs_ioctl+0x186/0x240
  __x64_sys_ioctl+0x38a/0x1220
  do_syscall_64+0x3f/0x80
  entry_SYSCALL_64_after_hwframe+0x44/0xae

Fixes: 9f85cbe50aa0 ("RDMA/uverbs: Expose the new GID query API to user space")
Link: https://lore.kernel.org/r/b971cc70a8b240a8b5eda33c99fa0558a0071be2.1620657876.git.leonro@nvidia.com
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/core/uverbs_std_types_device.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c
index 9ec6971056fa..a03021d94e11 100644
--- a/drivers/infiniband/core/uverbs_std_types_device.c
+++ b/drivers/infiniband/core/uverbs_std_types_device.c
@@ -331,6 +331,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_TABLE)(
 	if (ret)
 		return ret;
 
+	if (!user_entry_size)
+		return -EINVAL;
+
 	max_entries = uverbs_attr_ptr_get_array_size(
 		attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
 		user_entry_size);
-- 
cgit v1.2.3


From c745253e2a691a40c66790defe85c104a887e14a Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Wed, 5 May 2021 14:09:15 +0300
Subject: PM: runtime: Fix unpaired parent child_count for force_resume

As pm_runtime_need_not_resume() relies also on usage_count, it can return
a different value in pm_runtime_force_suspend() compared to when called in
pm_runtime_force_resume(). Different return values can happen if anything
calls PM runtime functions in between, and causes the parent child_count
to increase on every resume.

So far I've seen the issue only for omapdrm that does complicated things
with PM runtime calls during system suspend for legacy reasons:

omap_atomic_commit_tail() for omapdrm.0
 dispc_runtime_get()
  wakes up 58000000.dss as it's the dispc parent
   dispc_runtime_resume()
    rpm_resume() increases parent child_count
 dispc_runtime_put() won't idle, PM runtime suspend blocked
pm_runtime_force_suspend() for 58000000.dss, !pm_runtime_need_not_resume()
 __update_runtime_status()
system suspended
pm_runtime_force_resume() for 58000000.dss, pm_runtime_need_not_resume()
 pm_runtime_enable() only called because of pm_runtime_need_not_resume()
omap_atomic_commit_tail() for omapdrm.0
 dispc_runtime_get()
  wakes up 58000000.dss as it's the dispc parent
   dispc_runtime_resume()
    rpm_resume() increases parent child_count
 dispc_runtime_put() won't idle, PM runtime suspend blocked
...
rpm_suspend for 58000000.dss but parent child_count is now unbalanced

Let's fix the issue by adding a flag for needs_force_resume and use it in
pm_runtime_force_resume() instead of pm_runtime_need_not_resume().

Additionally omapdrm system suspend could be simplified later on to avoid
lots of unnecessary PM runtime calls and the complexity it adds. The
driver can just use internal functions that are shared between the PM
runtime and system suspend related functions.

Fixes: 4918e1f87c5f ("PM / runtime: Rework pm_runtime_force_suspend/resume()")
Signed-off-by: Tony Lindgren <tony@atomide.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Cc: 4.16+ <stable@vger.kernel.org> # 4.16+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/runtime.c | 10 +++++++---
 include/linux/pm.h           |  1 +
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 1fc1a992f90c..b570848d23e0 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1637,6 +1637,7 @@ void pm_runtime_init(struct device *dev)
 	dev->power.request_pending = false;
 	dev->power.request = RPM_REQ_NONE;
 	dev->power.deferred_resume = false;
+	dev->power.needs_force_resume = 0;
 	INIT_WORK(&dev->power.work, pm_runtime_work);
 
 	dev->power.timer_expires = 0;
@@ -1804,10 +1805,12 @@ int pm_runtime_force_suspend(struct device *dev)
 	 * its parent, but set its status to RPM_SUSPENDED anyway in case this
 	 * function will be called again for it in the meantime.
 	 */
-	if (pm_runtime_need_not_resume(dev))
+	if (pm_runtime_need_not_resume(dev)) {
 		pm_runtime_set_suspended(dev);
-	else
+	} else {
 		__update_runtime_status(dev, RPM_SUSPENDED);
+		dev->power.needs_force_resume = 1;
+	}
 
 	return 0;
 
@@ -1834,7 +1837,7 @@ int pm_runtime_force_resume(struct device *dev)
 	int (*callback)(struct device *);
 	int ret = 0;
 
-	if (!pm_runtime_status_suspended(dev) || pm_runtime_need_not_resume(dev))
+	if (!pm_runtime_status_suspended(dev) || !dev->power.needs_force_resume)
 		goto out;
 
 	/*
@@ -1853,6 +1856,7 @@ int pm_runtime_force_resume(struct device *dev)
 
 	pm_runtime_mark_last_busy(dev);
 out:
+	dev->power.needs_force_resume = 0;
 	pm_runtime_enable(dev);
 	return ret;
 }
diff --git a/include/linux/pm.h b/include/linux/pm.h
index c9657408fee1..1d8209c09686 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -601,6 +601,7 @@ struct dev_pm_info {
 	unsigned int		idle_notification:1;
 	unsigned int		request_pending:1;
 	unsigned int		deferred_resume:1;
+	unsigned int		needs_force_resume:1;
 	unsigned int		runtime_auto:1;
 	bool			ignore_children:1;
 	unsigned int		no_callbacks:1;
-- 
cgit v1.2.3


From 37a8024d265564eba680575df6421f19db21dfce Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Fri, 7 May 2021 11:59:05 -0700
Subject: arm64: mte: initialize RGSR_EL1.SEED in __cpu_setup

A valid implementation choice for the ChooseRandomNonExcludedTag()
pseudocode function used by IRG is to behave in the same way as with
GCR_EL1.RRND=0. This would mean that RGSR_EL1.SEED is used as an LFSR
which must have a non-zero value in order for IRG to properly produce
pseudorandom numbers. However, RGSR_EL1 is reset to an UNKNOWN value
on soft reset and thus may reset to 0. Therefore we must initialize
RGSR_EL1.SEED to a non-zero value in order to ensure that IRG behaves
as expected.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Fixes: 3b714d24ef17 ("arm64: mte: CPU feature detection and initial sysreg configuration")
Cc: <stable@vger.kernel.org> # 5.10
Link: https://linux-review.googlesource.com/id/I2b089b6c7d6f17ee37e2f0db7df5ad5bcc04526c
Acked-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210507185905.1745402-1-pcc@google.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/proc.S | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 0a48191534ff..97d7bcd8d4f2 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -447,6 +447,18 @@ SYM_FUNC_START(__cpu_setup)
 	mov	x10, #(SYS_GCR_EL1_RRND | SYS_GCR_EL1_EXCL_MASK)
 	msr_s	SYS_GCR_EL1, x10
 
+	/*
+	 * If GCR_EL1.RRND=1 is implemented the same way as RRND=0, then
+	 * RGSR_EL1.SEED must be non-zero for IRG to produce
+	 * pseudorandom numbers. As RGSR_EL1 is UNKNOWN out of reset, we
+	 * must initialize it.
+	 */
+	mrs	x10, CNTVCT_EL0
+	ands	x10, x10, #SYS_RGSR_EL1_SEED_MASK
+	csinc	x10, x10, xzr, ne
+	lsl	x10, x10, #SYS_RGSR_EL1_SEED_SHIFT
+	msr_s	SYS_RGSR_EL1, x10
+
 	/* clear any pending tag check faults in TFSR*_EL1 */
 	msr_s	SYS_TFSR_EL1, xzr
 	msr_s	SYS_TFSRE0_EL1, xzr
-- 
cgit v1.2.3


From f79f7a2d96769d2a3e663a3e673066be77c30cc3 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Mon, 22 Mar 2021 17:58:19 +0530
Subject: arc: Fix typos/spellos

s/commiting/committing/
s/defintion/definition/
s/gaurantees/guarantees/
s/interrpted/interrupted/
s/interrutps/interrupts/
s/succeded/succeeded/
s/unconditonally/unconditionally/

Reviewed-by: Christian Brauner <christian.brauner@ubuntu.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/Makefile              | 2 +-
 arch/arc/include/asm/cmpxchg.h | 4 ++--
 arch/arc/kernel/process.c      | 8 ++++----
 arch/arc/kernel/signal.c       | 4 ++--
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 4392c9c189c4..e47adc97a89b 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -31,7 +31,7 @@ endif
 
 
 ifdef CONFIG_ARC_CURR_IN_REG
-# For a global register defintion, make sure it gets passed to every file
+# For a global register definition, make sure it gets passed to every file
 # We had a customer reported bug where some code built in kernel was NOT using
 # any kernel headers, and missing the r25 global register
 # Can't do unconditionally because of recursive include issues
diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h
index 9b87e162e539..dfeffa25499b 100644
--- a/arch/arc/include/asm/cmpxchg.h
+++ b/arch/arc/include/asm/cmpxchg.h
@@ -116,7 +116,7 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
  *
  * Technically the lock is also needed for UP (boils down to irq save/restore)
  * but we can cheat a bit since cmpxchg() atomic_ops_lock() would cause irqs to
- * be disabled thus can't possibly be interrpted/preempted/clobbered by xchg()
+ * be disabled thus can't possibly be interrupted/preempted/clobbered by xchg()
  * Other way around, xchg is one instruction anyways, so can't be interrupted
  * as such
  */
@@ -143,7 +143,7 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
 /*
  * "atomic" variant of xchg()
  * REQ: It needs to follow the same serialization rules as other atomic_xxx()
- * Since xchg() doesn't always do that, it would seem that following defintion
+ * Since xchg() doesn't always do that, it would seem that following definition
  * is incorrect. But here's the rationale:
  *   SMP : Even xchg() takes the atomic_ops_lock, so OK.
  *   LLSC: atomic_ops_lock are not relevant at all (even if SMP, since LLSC
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index d838d0d57696..3793876f42d9 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -50,14 +50,14 @@ SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new)
 	int ret;
 
 	/*
-	 * This is only for old cores lacking LLOCK/SCOND, which by defintion
+	 * This is only for old cores lacking LLOCK/SCOND, which by definition
 	 * can't possibly be SMP. Thus doesn't need to be SMP safe.
 	 * And this also helps reduce the overhead for serializing in
 	 * the UP case
 	 */
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_SMP));
 
-	/* Z indicates to userspace if operation succeded */
+	/* Z indicates to userspace if operation succeeded */
 	regs->status32 &= ~STATUS_Z_MASK;
 
 	ret = access_ok(uaddr, sizeof(*uaddr));
@@ -107,7 +107,7 @@ fail:
 
 void arch_cpu_idle(void)
 {
-	/* Re-enable interrupts <= default irq priority before commiting SLEEP */
+	/* Re-enable interrupts <= default irq priority before committing SLEEP */
 	const unsigned int arg = 0x10 | ARCV2_IRQ_DEF_PRIO;
 
 	__asm__ __volatile__(
@@ -120,7 +120,7 @@ void arch_cpu_idle(void)
 
 void arch_cpu_idle(void)
 {
-	/* sleep, but enable both set E1/E2 (levels of interrutps) before committing */
+	/* sleep, but enable both set E1/E2 (levels of interrupts) before committing */
 	__asm__ __volatile__("sleep 0x3	\n");
 }
 
diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c
index fdbe06c98895..b3ccb9e5ffe4 100644
--- a/arch/arc/kernel/signal.c
+++ b/arch/arc/kernel/signal.c
@@ -259,7 +259,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
 		regs->r2 = (unsigned long)&sf->uc;
 
 		/*
-		 * small optim to avoid unconditonally calling do_sigaltstack
+		 * small optim to avoid unconditionally calling do_sigaltstack
 		 * in sigreturn path, now that we only have rt_sigreturn
 		 */
 		magic = MAGIC_SIGALTSTK;
@@ -391,7 +391,7 @@ void do_signal(struct pt_regs *regs)
 void do_notify_resume(struct pt_regs *regs)
 {
 	/*
-	 * ASM glue gaurantees that this is only called when returning to
+	 * ASM glue guarantees that this is only called when returning to
 	 * user mode
 	 */
 	if (test_thread_flag(TIF_NOTIFY_RESUME))
-- 
cgit v1.2.3


From 8e97bf39fa0361af3e64739b3766992b9dafa11d Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 21 Apr 2021 22:16:53 -0700
Subject: ARC: kgdb: add 'fallthrough' to prevent a warning

Use the 'fallthrough' macro to document that this switch case
does indeed fall through to the next case.

../arch/arc/kernel/kgdb.c: In function 'kgdb_arch_handle_exception':
../arch/arc/kernel/kgdb.c:141:6: warning: this statement may fall through [-Wimplicit-fallthrough=]
  141 |   if (kgdb_hex2long(&ptr, &addr))
      |      ^
../arch/arc/kernel/kgdb.c:144:2: note: here
  144 |  case 'D':
      |  ^~~~

Cc: linux-snps-arc@lists.infradead.org
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/kgdb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c
index ecfbc42d3a40..345a0000554c 100644
--- a/arch/arc/kernel/kgdb.c
+++ b/arch/arc/kernel/kgdb.c
@@ -140,6 +140,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
 		ptr = &remcomInBuffer[1];
 		if (kgdb_hex2long(&ptr, &addr))
 			regs->ret = addr;
+		fallthrough;
 
 	case 'D':
 	case 'k':
-- 
cgit v1.2.3


From 3433adc8bd09fc9f29b8baddf33b4ecd1ecd2cdc Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 23 Apr 2021 12:16:25 -0700
Subject: ARC: entry: fix off-by-one error in syscall number validation

We have NR_syscall syscalls from [0 .. NR_syscall-1].
However the check for invalid syscall number is "> NR_syscall" as
opposed to >=. This off-by-one error erronesously allows "NR_syscall"
to be treated as valid syscall causeing out-of-bounds access into
syscall-call table ensuing a crash (holes within syscall table have a
invalid-entry handler but this is beyond the array implementing the
table).

This problem showed up on v5.6 kernel when testing glibc 2.33 (v5.10
kernel capable, includng faccessat2 syscall 439). The v5.6 kernel has
NR_syscalls=439 (0 to 438). Due to the bug, 439 passed by glibc was
not handled as -ENOSYS but processed leading to a crash.

Link: https://github.com/foss-for-synopsys-dwc-arc-processors/linux/issues/48
Reported-by: Shahab Vahedi <shahab@synopsys.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/entry.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index 1743506081da..2cb8dfe866b6 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -177,7 +177,7 @@ tracesys:
 
 	; Do the Sys Call as we normally would.
 	; Validate the Sys Call number
-	cmp     r8,  NR_syscalls
+	cmp     r8,  NR_syscalls - 1
 	mov.hi  r0, -ENOSYS
 	bhi     tracesys_exit
 
@@ -255,7 +255,7 @@ ENTRY(EV_Trap)
 	;============ Normal syscall case
 
 	; syscall num shd not exceed the total system calls avail
-	cmp     r8,  NR_syscalls
+	cmp     r8,  NR_syscalls - 1
 	mov.hi  r0, -ENOSYS
 	bhi     .Lret_from_system_call
 
-- 
cgit v1.2.3


From c5f756d8c6265ebb1736a7787231f010a3b782e5 Mon Sep 17 00:00:00 2001
From: Vladimir Isaev <isaev@synopsys.com>
Date: Tue, 27 Apr 2021 15:12:37 +0300
Subject: ARC: mm: PAE: use 40-bit physical page mask

32-bit PAGE_MASK can not be used as a mask for physical addresses
when PAE is enabled. PAGE_MASK_PHYS must be used for physical
addresses instead of PAGE_MASK.

Without this, init gets SIGSEGV if pte_modify was called:

| potentially unexpected fatal signal 11.
| Path: /bin/busybox
| CPU: 0 PID: 1 Comm: init Not tainted 5.12.0-rc5-00003-g1e43c377a79f-dirty
| Insn could not be fetched
|     @No matching VMA found
|  ECR: 0x00040000 EFA: 0x00000000 ERET: 0x00000000
| STAT: 0x80080082 [IE U     ]   BTA: 0x00000000
|  SP: 0x5f9ffe44  FP: 0x00000000 BLK: 0xaf3d4
| LPS: 0x000d093e LPE: 0x000d0950 LPC: 0x00000000
| r00: 0x00000002 r01: 0x5f9fff14 r02: 0x5f9fff20
| ...
| Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b

Signed-off-by: Vladimir Isaev <isaev@synopsys.com>
Reported-by: kernel test robot <lkp@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: stable@vger.kernel.org
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/page.h      | 12 ++++++++++++
 arch/arc/include/asm/pgtable.h   | 12 +++---------
 arch/arc/include/uapi/asm/page.h |  1 -
 arch/arc/mm/ioremap.c            |  5 +++--
 arch/arc/mm/tlb.c                |  2 +-
 5 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
index ad9b7fe4dba3..4a9d33372fe2 100644
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@@ -7,6 +7,18 @@
 
 #include <uapi/asm/page.h>
 
+#ifdef CONFIG_ARC_HAS_PAE40
+
+#define MAX_POSSIBLE_PHYSMEM_BITS	40
+#define PAGE_MASK_PHYS			(0xff00000000ull | PAGE_MASK)
+
+#else /* CONFIG_ARC_HAS_PAE40 */
+
+#define MAX_POSSIBLE_PHYSMEM_BITS	32
+#define PAGE_MASK_PHYS			PAGE_MASK
+
+#endif /* CONFIG_ARC_HAS_PAE40 */
+
 #ifndef __ASSEMBLY__
 
 #define clear_page(paddr)		memset((paddr), 0, PAGE_SIZE)
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 163641726a2b..5878846f00cf 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -107,8 +107,8 @@
 #define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE)
 
 /* Set of bits not changed in pte_modify */
-#define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_SPECIAL)
-
+#define _PAGE_CHG_MASK	(PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \
+							   _PAGE_SPECIAL)
 /* More Abbrevaited helpers */
 #define PAGE_U_NONE     __pgprot(___DEF)
 #define PAGE_U_R        __pgprot(___DEF | _PAGE_READ)
@@ -132,13 +132,7 @@
 #define PTE_BITS_IN_PD0		(_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
 #define PTE_BITS_RWX		(_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
 
-#ifdef CONFIG_ARC_HAS_PAE40
-#define PTE_BITS_NON_RWX_IN_PD1	(0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE)
-#define MAX_POSSIBLE_PHYSMEM_BITS 40
-#else
-#define PTE_BITS_NON_RWX_IN_PD1	(PAGE_MASK | _PAGE_CACHEABLE)
-#define MAX_POSSIBLE_PHYSMEM_BITS 32
-#endif
+#define PTE_BITS_NON_RWX_IN_PD1	(PAGE_MASK_PHYS | _PAGE_CACHEABLE)
 
 /**************************************************************************
  * Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
diff --git a/arch/arc/include/uapi/asm/page.h b/arch/arc/include/uapi/asm/page.h
index 2a97e2718a21..2a4ad619abfb 100644
--- a/arch/arc/include/uapi/asm/page.h
+++ b/arch/arc/include/uapi/asm/page.h
@@ -33,5 +33,4 @@
 
 #define PAGE_MASK	(~(PAGE_SIZE-1))
 
-
 #endif /* _UAPI__ASM_ARC_PAGE_H */
diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c
index fac4adc90204..95c649fbc95a 100644
--- a/arch/arc/mm/ioremap.c
+++ b/arch/arc/mm/ioremap.c
@@ -53,9 +53,10 @@ EXPORT_SYMBOL(ioremap);
 void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size,
 			   unsigned long flags)
 {
+	unsigned int off;
 	unsigned long vaddr;
 	struct vm_struct *area;
-	phys_addr_t off, end;
+	phys_addr_t end;
 	pgprot_t prot = __pgprot(flags);
 
 	/* Don't allow wraparound, zero size */
@@ -72,7 +73,7 @@ void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size,
 
 	/* Mappings have to be page-aligned */
 	off = paddr & ~PAGE_MASK;
-	paddr &= PAGE_MASK;
+	paddr &= PAGE_MASK_PHYS;
 	size = PAGE_ALIGN(end + 1) - paddr;
 
 	/*
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 9bb3c24f3677..9c7c68247289 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -576,7 +576,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
 		      pte_t *ptep)
 {
 	unsigned long vaddr = vaddr_unaligned & PAGE_MASK;
-	phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK;
+	phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK_PHYS;
 	struct page *page = pfn_to_page(pte_pfn(*ptep));
 
 	create_tlb(vma, vaddr, ptep);
-- 
cgit v1.2.3


From 1d5e4640e5df15252398c1b621f6bd432f2d7f17 Mon Sep 17 00:00:00 2001
From: Vladimir Isaev <isaev@synopsys.com>
Date: Tue, 27 Apr 2021 15:13:54 +0300
Subject: ARC: mm: Use max_high_pfn as a HIGHMEM zone border

Commit 4af22ded0ecf ("arc: fix memory initialization for systems
with two memory banks") fixed highmem, but for the PAE case it causes
bug messages:

| BUG: Bad page state in process swapper  pfn:80000
| page:(ptrval) refcount:0 mapcount:1 mapping:00000000 index:0x0 pfn:0x80000 flags: 0x0()
| raw: 00000000 00000100 00000122 00000000 00000000 00000000 00000000 00000000
| raw: 00000000
| page dumped because: nonzero mapcount
| Modules linked in:
| CPU: 0 PID: 0 Comm: swapper Not tainted 5.12.0-rc5-00003-g1e43c377a79f #1

This is because the fix expects highmem to be always less than
lowmem and uses min_low_pfn as an upper zone border for highmem.

max_high_pfn should be ok for both highmem and highmem+PAE cases.

Fixes: 4af22ded0ecf ("arc: fix memory initialization for systems with two memory banks")
Signed-off-by: Vladimir Isaev <isaev@synopsys.com>
Cc: Mike Rapoport <rppt@linux.ibm.com>
Cc: stable@vger.kernel.org  #5.8 onwards
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/init.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index 33832e36bdb7..e2ed355438c9 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -157,7 +157,16 @@ void __init setup_arch_memory(void)
 	min_high_pfn = PFN_DOWN(high_mem_start);
 	max_high_pfn = PFN_DOWN(high_mem_start + high_mem_sz);
 
-	max_zone_pfn[ZONE_HIGHMEM] = min_low_pfn;
+	/*
+	 * max_high_pfn should be ok here for both HIGHMEM and HIGHMEM+PAE.
+	 * For HIGHMEM without PAE max_high_pfn should be less than
+	 * min_low_pfn to guarantee that these two regions don't overlap.
+	 * For PAE case highmem is greater than lowmem, so it is natural
+	 * to use max_high_pfn.
+	 *
+	 * In both cases, holes should be handled by pfn_valid().
+	 */
+	max_zone_pfn[ZONE_HIGHMEM] = max_high_pfn;
 
 	high_memory = (void *)(min_high_pfn << PAGE_SHIFT);
 
-- 
cgit v1.2.3


From bf9e262fcfa6350269f00a95658f701f2595db13 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Sat, 8 May 2021 11:07:33 +0800
Subject: docs/zh_CN: Remove obsolete translation file

This translation file was replaced by
   Documentation/translations/zh_CN/admin-guide/security-bugs.rst
which was created in commit 2d153571003b ("docs/zh_CN: Add
zh_CN/admin-guide/security-bugs.rst").
This is a translation left over from history. Remove it.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Acked-by: Wu XiangCheng <bobwxc@email.cn>
Link: https://lore.kernel.org/r/20210508030741.82655-1-wanjiabing@vivo.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/translations/zh_CN/SecurityBugs | 50 ---------------------------
 1 file changed, 50 deletions(-)
 delete mode 100644 Documentation/translations/zh_CN/SecurityBugs

diff --git a/Documentation/translations/zh_CN/SecurityBugs b/Documentation/translations/zh_CN/SecurityBugs
deleted file mode 100644
index 2d0fffd122ce..000000000000
--- a/Documentation/translations/zh_CN/SecurityBugs
+++ /dev/null
@@ -1,50 +0,0 @@
-Chinese translated version of Documentation/admin-guide/security-bugs.rst
-
-If you have any comment or update to the content, please contact the
-original document maintainer directly.  However, if you have a problem
-communicating in English you can also ask the Chinese maintainer for
-help.  Contact the Chinese maintainer if this translation is outdated
-or if there is a problem with the translation.
-
-Chinese maintainer: Harry Wei <harryxiyou@gmail.com>
----------------------------------------------------------------------
-Documentation/admin-guide/security-bugs.rst 的中文翻译
-
-如果想评论或更新本文的内容，请直接联系原文档的维护者。如果你使用英文
-交流有困难的话，也可以向中文版维护者求助。如果本翻译更新不及时或者翻
-译存在问题，请联系中文版维护者。
-
-中文版维护者： 贾威威 Harry Wei <harryxiyou@gmail.com>
-中文版翻译者： 贾威威 Harry Wei <harryxiyou@gmail.com>
-中文版校译者： 贾威威 Harry Wei <harryxiyou@gmail.com>
-
-
-以下为正文
----------------------------------------------------------------------
-Linux内核开发者认为安全非常重要。因此，我们想要知道当一个有关于
-安全的漏洞被发现的时候，并且它可能会被尽快的修复或者公开。请把这个安全
-漏洞报告给Linux内核安全团队。
-
-1) 联系
-
-linux内核安全团队可以通过email<security@kernel.org>来联系。这是
-一组独立的安全工作人员，可以帮助改善漏洞报告并且公布和取消一个修复。安
-全团队有可能会从部分的维护者那里引进额外的帮助来了解并且修复安全漏洞。
-当遇到任何漏洞，所能提供的信息越多就越能诊断和修复。如果你不清楚什么
-是有帮助的信息，那就请重温一下admin-guide/reporting-bugs.rst文件中的概述过程。任
-何攻击性的代码都是非常有用的，未经报告者的同意不会被取消，除非它已经
-被公布于众。
-
-2) 公开
-
-Linux内核安全团队的宗旨就是和漏洞提交者一起处理漏洞的解决方案直
-到公开。我们喜欢尽快地完全公开漏洞。当一个漏洞或者修复还没有被完全地理
-解，解决方案没有通过测试或者供应商协调，可以合理地延迟公开。然而，我们
-期望这些延迟尽可能的短些，是可数的几天，而不是几个星期或者几个月。公开
-日期是通过安全团队和漏洞提供者以及供应商洽谈后的结果。公开时间表是从很
-短（特殊的，它已经被公众所知道）到几个星期。作为一个基本的默认政策，我
-们所期望通知公众的日期是7天的安排。
-
-3) 保密协议
-
-Linux内核安全团队不是一个正式的团体，因此不能加入任何的保密协议。
-- 
cgit v1.2.3


From 9e255e2b9afe948fb795cbaa854acc3904d4212c Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 6 May 2021 16:19:07 -0700
Subject: Documentation: drop optional BOMs

A few of the Documentation .rst files begin with a Unicode
byte order mark (BOM). The BOM may signify endianess for
16-bit or 32-bit encodings or indicate that the text stream
is indeed Unicode. We don't need it for either of those uses.
It may also interfere with (confuse) some software.

Since we don't need it and its use is optional, just delete
the uses of it in Documentation/.

https://en.wikipedia.org/wiki/Byte_order_mark

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Link: https://lore.kernel.org/r/20210506231907.14359-1-rdunlap@infradead.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/block/data-integrity.rst                 | 2 +-
 Documentation/process/kernel-enforcement-statement.rst | 2 +-
 Documentation/security/tpm/xen-tpmfront.rst            | 2 +-
 Documentation/timers/no_hz.rst                         | 2 +-
 Documentation/usb/mtouchusb.rst                        | 2 +-
 Documentation/usb/usb-serial.rst                       | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/Documentation/block/data-integrity.rst b/Documentation/block/data-integrity.rst
index 4f2452a95c43..07a97aa26668 100644
--- a/Documentation/block/data-integrity.rst
+++ b/Documentation/block/data-integrity.rst
@@ -1,4 +1,4 @@
-﻿==============
+==============
 Data Integrity
 ==============
 
diff --git a/Documentation/process/kernel-enforcement-statement.rst b/Documentation/process/kernel-enforcement-statement.rst
index e5a1be476047..dc2d813b2e79 100644
--- a/Documentation/process/kernel-enforcement-statement.rst
+++ b/Documentation/process/kernel-enforcement-statement.rst
@@ -1,4 +1,4 @@
-﻿.. _process_statement_kernel:
+.. _process_statement_kernel:
 
 Linux Kernel Enforcement Statement
 ----------------------------------
diff --git a/Documentation/security/tpm/xen-tpmfront.rst b/Documentation/security/tpm/xen-tpmfront.rst
index 00d5b1db227d..31c67522f2ad 100644
--- a/Documentation/security/tpm/xen-tpmfront.rst
+++ b/Documentation/security/tpm/xen-tpmfront.rst
@@ -1,4 +1,4 @@
-﻿=============================
+=============================
 Virtual TPM interface for Xen
 =============================
 
diff --git a/Documentation/timers/no_hz.rst b/Documentation/timers/no_hz.rst
index c4c70e1aada3..6cadad7c3aad 100644
--- a/Documentation/timers/no_hz.rst
+++ b/Documentation/timers/no_hz.rst
@@ -1,4 +1,4 @@
-﻿======================================
+======================================
 NO_HZ: Reducing Scheduling-Clock Ticks
 ======================================
 
diff --git a/Documentation/usb/mtouchusb.rst b/Documentation/usb/mtouchusb.rst
index d1111b74bf75..5ae1f74fe74b 100644
--- a/Documentation/usb/mtouchusb.rst
+++ b/Documentation/usb/mtouchusb.rst
@@ -1,4 +1,4 @@
-﻿================
+================
 mtouchusb driver
 ================
 
diff --git a/Documentation/usb/usb-serial.rst b/Documentation/usb/usb-serial.rst
index 8fa7dbd3da9a..69586aeb60bb 100644
--- a/Documentation/usb/usb-serial.rst
+++ b/Documentation/usb/usb-serial.rst
@@ -1,4 +1,4 @@
-﻿==========
+==========
 USB serial
 ==========
 
-- 
cgit v1.2.3


From 8ab78863e9eff11910e1ac8bcf478060c29b379e Mon Sep 17 00:00:00 2001
From: Jeimon <jjjinmeng.zhou@gmail.com>
Date: Sat, 8 May 2021 11:52:30 +0800
Subject: net/nfc/rawsock.c: fix a permission check bug

The function rawsock_create() calls a privileged function sk_alloc(), which requires a ns-aware check to check net->user_ns, i.e., ns_capable(). However, the original code checks the init_user_ns using capable(). So we replace the capable() with ns_capable().

Signed-off-by: Jeimon <jjjinmeng.zhou@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/nfc/rawsock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index 9c7eb8455ba8..5f1d438a0a23 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -329,7 +329,7 @@ static int rawsock_create(struct net *net, struct socket *sock,
 		return -ESOCKTNOSUPPORT;
 
 	if (sock->type == SOCK_RAW) {
-		if (!capable(CAP_NET_RAW))
+		if (!ns_capable(net->user_ns, CAP_NET_RAW))
 			return -EPERM;
 		sock->ops = &rawsock_raw_ops;
 	} else {
-- 
cgit v1.2.3


From ddb6e00f8413e885ff826e32521cff7924661de0 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 8 May 2021 07:38:22 +0200
Subject: net: netcp: Fix an error message

'ret' is known to be 0 here.
The expected error code is stored in 'tx_pipe->dma_queue', so use it
instead.

While at it, switch from %d to %pe which is more user friendly.

Fixes: 84640e27f230 ("net: netcp: Add Keystone NetCP core ethernet driver")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/netcp_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index 9030e619e543..97942b0e3897 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -1350,8 +1350,8 @@ int netcp_txpipe_open(struct netcp_tx_pipe *tx_pipe)
 	tx_pipe->dma_queue = knav_queue_open(name, tx_pipe->dma_queue_id,
 					     KNAV_QUEUE_SHARED);
 	if (IS_ERR(tx_pipe->dma_queue)) {
-		dev_err(dev, "Could not open DMA queue for channel \"%s\": %d\n",
-			name, ret);
+		dev_err(dev, "Could not open DMA queue for channel \"%s\": %pe\n",
+			name, tx_pipe->dma_queue);
 		ret = PTR_ERR(tx_pipe->dma_queue);
 		goto err;
 	}
-- 
cgit v1.2.3


From 0d3ae948741ac6d80e39ab27b45297367ee477de Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 14 Apr 2021 10:05:17 -0700
Subject: sh: Remove unused variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Removes this annoying warning:

arch/sh/kernel/traps.c: In function ‘nmi_trap_handler’:
arch/sh/kernel/traps.c:183:15: warning: unused variable ‘cpu’ [-Wunused-variable]
  183 |  unsigned int cpu = smp_processor_id();

Fixes: fe3f1d5d7cd3 ("sh: Get rid of nmi_count()")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210414170517.1205430-1-eric.dumazet@gmail.com
---
 arch/sh/kernel/traps.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c
index f5beecdac693..e76b22157099 100644
--- a/arch/sh/kernel/traps.c
+++ b/arch/sh/kernel/traps.c
@@ -180,7 +180,6 @@ static inline void arch_ftrace_nmi_exit(void) { }
 
 BUILD_TRAP_HANDLER(nmi)
 {
-	unsigned int cpu = smp_processor_id();
 	TRAP_HANDLER_DECL;
 
 	arch_ftrace_nmi_enter();
-- 
cgit v1.2.3


From a269333fa5c0c8e53c92b5a28a6076a28cde3e83 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 8 May 2021 16:30:35 +0300
Subject: net: dsa: fix a crash if ->get_sset_count() fails

If ds->ops->get_sset_count() fails then it "count" is a negative error
code such as -EOPNOTSUPP.  Because "i" is an unsigned int, the negative
error code is type promoted to a very high value and the loop will
corrupt memory until the system crashes.

Fix this by checking for error codes and changing the type of "i" to
just int.

Fixes: badf3ada60ab ("net: dsa: Provide CPU port statistics to master netdev")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/master.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/dsa/master.c b/net/dsa/master.c
index 052a977914a6..63adbc21a735 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -147,8 +147,7 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
 	struct dsa_switch *ds = cpu_dp->ds;
 	int port = cpu_dp->index;
 	int len = ETH_GSTRING_LEN;
-	int mcount = 0, count;
-	unsigned int i;
+	int mcount = 0, count, i;
 	uint8_t pfx[4];
 	uint8_t *ndata;
 
@@ -178,6 +177,8 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
 		 */
 		ds->ops->get_strings(ds, port, stringset, ndata);
 		count = ds->ops->get_sset_count(ds, port, stringset);
+		if (count < 0)
+			return;
 		for (i = 0; i < count; i++) {
 			memmove(ndata + (i * len + sizeof(pfx)),
 				ndata + i * len, len - sizeof(pfx));
-- 
cgit v1.2.3


From db825feefc6868896fed5e361787ba3bee2fd906 Mon Sep 17 00:00:00 2001
From: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
Date: Sun, 9 May 2021 09:43:18 +0300
Subject: net/mlx4: Fix EEPROM dump support

Fix SFP and QSFP* EEPROM queries by setting i2c_address, offset and page
number correctly. For SFP set the following params:
- I2C address for offsets 0-255 is 0x50. For 256-511 - 0x51.
- Page number is zero.
- Offset is 0-255.

At the same time, QSFP* parameters are different:
- I2C address is always 0x50.
- Page number is not limited to zero.
- Offset is 0-255 for page zero and 128-255 for others.

To set parameters accordingly to cable used, implement function to query
module ID and implement respective helper functions to set parameters
correctly.

Fixes: 135dd9594f12 ("net/mlx4_en: ethtool, Remove unsupported SFP EEPROM high pages query")
Signed-off-by: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c |   4 +-
 drivers/net/ethernet/mellanox/mlx4/port.c       | 107 +++++++++++++++++++++++-
 2 files changed, 104 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 1434df66fcf2..3616b77caa0a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -2027,8 +2027,6 @@ static int mlx4_en_set_tunable(struct net_device *dev,
 	return ret;
 }
 
-#define MLX4_EEPROM_PAGE_LEN 256
-
 static int mlx4_en_get_module_info(struct net_device *dev,
 				   struct ethtool_modinfo *modinfo)
 {
@@ -2063,7 +2061,7 @@ static int mlx4_en_get_module_info(struct net_device *dev,
 		break;
 	case MLX4_MODULE_ID_SFP:
 		modinfo->type = ETH_MODULE_SFF_8472;
-		modinfo->eeprom_len = MLX4_EEPROM_PAGE_LEN;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
 		break;
 	default:
 		return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index ba6ac31a339d..256a06b3c096 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -1973,6 +1973,7 @@ EXPORT_SYMBOL(mlx4_get_roce_gid_from_slave);
 #define I2C_ADDR_LOW  0x50
 #define I2C_ADDR_HIGH 0x51
 #define I2C_PAGE_SIZE 256
+#define I2C_HIGH_PAGE_SIZE 128
 
 /* Module Info Data */
 struct mlx4_cable_info {
@@ -2026,6 +2027,88 @@ static inline const char *cable_info_mad_err_str(u16 mad_status)
 	return "Unknown Error";
 }
 
+static int mlx4_get_module_id(struct mlx4_dev *dev, u8 port, u8 *module_id)
+{
+	struct mlx4_cmd_mailbox *inbox, *outbox;
+	struct mlx4_mad_ifc *inmad, *outmad;
+	struct mlx4_cable_info *cable_info;
+	int ret;
+
+	inbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(inbox))
+		return PTR_ERR(inbox);
+
+	outbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(outbox)) {
+		mlx4_free_cmd_mailbox(dev, inbox);
+		return PTR_ERR(outbox);
+	}
+
+	inmad = (struct mlx4_mad_ifc *)(inbox->buf);
+	outmad = (struct mlx4_mad_ifc *)(outbox->buf);
+
+	inmad->method = 0x1; /* Get */
+	inmad->class_version = 0x1;
+	inmad->mgmt_class = 0x1;
+	inmad->base_version = 0x1;
+	inmad->attr_id = cpu_to_be16(0xFF60); /* Module Info */
+
+	cable_info = (struct mlx4_cable_info *)inmad->data;
+	cable_info->dev_mem_address = 0;
+	cable_info->page_num = 0;
+	cable_info->i2c_addr = I2C_ADDR_LOW;
+	cable_info->size = cpu_to_be16(1);
+
+	ret = mlx4_cmd_box(dev, inbox->dma, outbox->dma, port, 3,
+			   MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
+			   MLX4_CMD_NATIVE);
+	if (ret)
+		goto out;
+
+	if (be16_to_cpu(outmad->status)) {
+		/* Mad returned with bad status */
+		ret = be16_to_cpu(outmad->status);
+		mlx4_warn(dev,
+			  "MLX4_CMD_MAD_IFC Get Module ID attr(%x) port(%d) i2c_addr(%x) offset(%d) size(%d): Response Mad Status(%x) - %s\n",
+			  0xFF60, port, I2C_ADDR_LOW, 0, 1, ret,
+			  cable_info_mad_err_str(ret));
+		ret = -ret;
+		goto out;
+	}
+	cable_info = (struct mlx4_cable_info *)outmad->data;
+	*module_id = cable_info->data[0];
+out:
+	mlx4_free_cmd_mailbox(dev, inbox);
+	mlx4_free_cmd_mailbox(dev, outbox);
+	return ret;
+}
+
+static void mlx4_sfp_eeprom_params_set(u8 *i2c_addr, u8 *page_num, u16 *offset)
+{
+	*i2c_addr = I2C_ADDR_LOW;
+	*page_num = 0;
+
+	if (*offset < I2C_PAGE_SIZE)
+		return;
+
+	*i2c_addr = I2C_ADDR_HIGH;
+	*offset -= I2C_PAGE_SIZE;
+}
+
+static void mlx4_qsfp_eeprom_params_set(u8 *i2c_addr, u8 *page_num, u16 *offset)
+{
+	/* Offsets 0-255 belong to page 0.
+	 * Offsets 256-639 belong to pages 01, 02, 03.
+	 * For example, offset 400 is page 02: 1 + (400 - 256) / 128 = 2
+	 */
+	if (*offset < I2C_PAGE_SIZE)
+		*page_num = 0;
+	else
+		*page_num = 1 + (*offset - I2C_PAGE_SIZE) / I2C_HIGH_PAGE_SIZE;
+	*i2c_addr = I2C_ADDR_LOW;
+	*offset -= *page_num * I2C_HIGH_PAGE_SIZE;
+}
+
 /**
  * mlx4_get_module_info - Read cable module eeprom data
  * @dev: mlx4_dev.
@@ -2045,12 +2128,30 @@ int mlx4_get_module_info(struct mlx4_dev *dev, u8 port,
 	struct mlx4_cmd_mailbox *inbox, *outbox;
 	struct mlx4_mad_ifc *inmad, *outmad;
 	struct mlx4_cable_info *cable_info;
-	u16 i2c_addr;
+	u8 module_id, i2c_addr, page_num;
 	int ret;
 
 	if (size > MODULE_INFO_MAX_READ)
 		size = MODULE_INFO_MAX_READ;
 
+	ret = mlx4_get_module_id(dev, port, &module_id);
+	if (ret)
+		return ret;
+
+	switch (module_id) {
+	case MLX4_MODULE_ID_SFP:
+		mlx4_sfp_eeprom_params_set(&i2c_addr, &page_num, &offset);
+		break;
+	case MLX4_MODULE_ID_QSFP:
+	case MLX4_MODULE_ID_QSFP_PLUS:
+	case MLX4_MODULE_ID_QSFP28:
+		mlx4_qsfp_eeprom_params_set(&i2c_addr, &page_num, &offset);
+		break;
+	default:
+		mlx4_err(dev, "Module ID not recognized: %#x\n", module_id);
+		return -EINVAL;
+	}
+
 	inbox = mlx4_alloc_cmd_mailbox(dev);
 	if (IS_ERR(inbox))
 		return PTR_ERR(inbox);
@@ -2076,11 +2177,9 @@ int mlx4_get_module_info(struct mlx4_dev *dev, u8 port,
 		 */
 		size -= offset + size - I2C_PAGE_SIZE;
 
-	i2c_addr = I2C_ADDR_LOW;
-
 	cable_info = (struct mlx4_cable_info *)inmad->data;
 	cable_info->dev_mem_address = cpu_to_be16(offset);
-	cable_info->page_num = 0;
+	cable_info->page_num = page_num;
 	cable_info->i2c_addr = i2c_addr;
 	cable_info->size = cpu_to_be16(size);
 
-- 
cgit v1.2.3


From b94cbc909f1d80378a1f541968309e5c1178c98b Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sun, 9 May 2021 22:33:38 +0300
Subject: net: dsa: fix error code getting shifted with 4 in
 dsa_slave_get_sset_count

DSA implements a bunch of 'standardized' ethtool statistics counters,
namely tx_packets, tx_bytes, rx_packets, rx_bytes. So whatever the
hardware driver returns in .get_sset_count(), we need to add 4 to that.

That is ok, except that .get_sset_count() can return a negative error
code, for example:

b53_get_sset_count
-> phy_ethtool_get_sset_count
   -> return -EIO

-EIO is -5, and with 4 added to it, it becomes -1, aka -EPERM. One can
imagine that certain error codes may even become positive, although
based on code inspection I did not see instances of that.

Check the error code first, if it is negative return it as-is.

Based on a similar patch for dsa_master_get_strings from Dan Carpenter:
https://patchwork.kernel.org/project/netdevbpf/patch/YJaSe3RPgn7gKxZv@mwanda/

Fixes: 91da11f870f0 ("net: Distributed Switch Architecture protocol support")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/slave.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 8c0f3c6ab365..d4756b920108 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -776,13 +776,15 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
 	struct dsa_switch *ds = dp->ds;
 
 	if (sset == ETH_SS_STATS) {
-		int count;
+		int count = 0;
 
-		count = 4;
-		if (ds->ops->get_sset_count)
-			count += ds->ops->get_sset_count(ds, dp->index, sset);
+		if (ds->ops->get_sset_count) {
+			count = ds->ops->get_sset_count(ds, dp->index, sset);
+			if (count < 0)
+				return count;
+		}
 
-		return count;
+		return count + 4;
 	} else if (sset ==  ETH_SS_TEST) {
 		return net_selftest_get_count();
 	}
-- 
cgit v1.2.3


From 3058e01d31bbdbe50e02cafece2b22817a6a0eae Mon Sep 17 00:00:00 2001
From: Hoang Le <hoang.h.le@dektech.com.au>
Date: Mon, 10 May 2021 09:57:38 +0700
Subject: tipc: make node link identity publish thread safe

The using of the node address and node link identity are not thread safe,
meaning that two publications may be published the same values, as result
one of them will get failure because of already existing in the name table.
To avoid this we have to use the node address and node link identity values
from inside the node item's write lock protection.

Fixes: 50a3499ab853 ("tipc: simplify signature of tipc_namtbl_publish()")
Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: Hoang Le <hoang.h.le@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/node.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/tipc/node.c b/net/tipc/node.c
index 8217905348f4..81af92954c6c 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -423,18 +423,18 @@ static void tipc_node_write_unlock(struct tipc_node *n)
 	write_unlock_bh(&n->lock);
 
 	if (flags & TIPC_NOTIFY_NODE_DOWN)
-		tipc_publ_notify(net, publ_list, n->addr, n->capabilities);
+		tipc_publ_notify(net, publ_list, sk.node, n->capabilities);
 
 	if (flags & TIPC_NOTIFY_NODE_UP)
-		tipc_named_node_up(net, n->addr, n->capabilities);
+		tipc_named_node_up(net, sk.node, n->capabilities);
 
 	if (flags & TIPC_NOTIFY_LINK_UP) {
-		tipc_mon_peer_up(net, n->addr, bearer_id);
-		tipc_nametbl_publish(net, &ua, &sk, n->link_id);
+		tipc_mon_peer_up(net, sk.node, bearer_id);
+		tipc_nametbl_publish(net, &ua, &sk, sk.ref);
 	}
 	if (flags & TIPC_NOTIFY_LINK_DOWN) {
-		tipc_mon_peer_down(net, n->addr, bearer_id);
-		tipc_nametbl_withdraw(net, &ua, &sk, n->link_id);
+		tipc_mon_peer_down(net, sk.node, bearer_id);
+		tipc_nametbl_withdraw(net, &ua, &sk, sk.ref);
 	}
 }
 
-- 
cgit v1.2.3


From 297c4de6f780b63b6d2af75a730720483bf1904a Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Mon, 10 May 2021 13:07:08 +0200
Subject: net: dsa: felix: re-enable TAS guard band mode

Commit 316bcffe4479 ("net: dsa: felix: disable always guard band bit for
TAS config") disabled the guard band and broke 802.3Qbv compliance.

There are two issues here:
 (1) Without the guard band the end of the scheduling window could be
     overrun by a frame in transit.
 (2) Frames that don't fit into a configured window will still be sent.

The reason for both issues is that the switch will schedule the _start_
of a frame transmission inside the predefined window without taking the
length of the frame into account. Thus, we'll need the guard band which
will close the gate early, so that a complete frame can still be sent.
Revert the commit and add a note.

For a lengthy discussion see [1].

[1] https://lore.kernel.org/netdev/c7618025da6723418c56a54fe4683bd7@walle.cc/

Fixes: 316bcffe4479 ("net: dsa: felix: disable always guard band bit for TAS config")
Signed-off-by: Michael Walle <michael@walle.cc>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/ocelot/felix_vsc9959.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
index 2473bebe48e6..f966a253d1c7 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -1227,12 +1227,17 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
 	if (taprio->num_entries > VSC9959_TAS_GCL_ENTRY_MAX)
 		return -ERANGE;
 
-	/* Set port num and disable ALWAYS_GUARD_BAND_SCH_Q, which means set
-	 * guard band to be implemented for nonschedule queues to schedule
-	 * queues transition.
+	/* Enable guard band. The switch will schedule frames without taking
+	 * their length into account. Thus we'll always need to enable the
+	 * guard band which reserves the time of a maximum sized frame at the
+	 * end of the time window.
+	 *
+	 * Although the ALWAYS_GUARD_BAND_SCH_Q bit is global for all ports, we
+	 * need to set PORT_NUM, because subsequent writes to PARAM_CFG_REG_n
+	 * operate on the port number.
 	 */
-	ocelot_rmw(ocelot,
-		   QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM(port),
+	ocelot_rmw(ocelot, QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM(port) |
+		   QSYS_TAS_PARAM_CFG_CTRL_ALWAYS_GUARD_BAND_SCH_Q,
 		   QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM_M |
 		   QSYS_TAS_PARAM_CFG_CTRL_ALWAYS_GUARD_BAND_SCH_Q,
 		   QSYS_TAS_PARAM_CFG_CTRL);
-- 
cgit v1.2.3


From bb4031b8af804244a7e4349d38f6624f68664bd6 Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Mon, 26 Apr 2021 09:56:18 +0300
Subject: clk: Skip clk provider registration when np is NULL

commit 6579c8d97ad7 ("clk: Mark fwnodes when their clock provider is added")
revealed that clk/bcm/clk-raspberrypi.c driver calls
devm_of_clk_add_hw_provider(), with a NULL dev->of_node, which resulted in a
NULL pointer dereference in of_clk_add_hw_provider() when calling
fwnode_dev_initialized().

Returning 0 is reducing the if conditions in driver code and is being
consistent with the CONFIG_OF=n inline stub that returns 0 when CONFIG_OF
is disabled. The downside is that drivers will maybe register clkdev lookups
when they don't need to and waste some memory.

Fixes: 6579c8d97ad7 ("clk: Mark fwnodes when their clock provider is added")
Fixes: 3c9ea42802a1 ("clk: Mark fwnodes when their clock provider is added/removed")
Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Stephen Boyd <sboyd@kernel.org>
Reviewed-by: Saravana Kannan <saravanak@google.com>
Reviewed-by: Nicolas Saenz Julienne <nsaenz@kernel.org>
Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Link: https://lore.kernel.org/r/20210426065618.588144-1-tudor.ambarus@microchip.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/clk.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index e2ec1b745243..65508eb89ec9 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -4540,6 +4540,9 @@ int of_clk_add_provider(struct device_node *np,
 	struct of_clk_provider *cp;
 	int ret;
 
+	if (!np)
+		return 0;
+
 	cp = kzalloc(sizeof(*cp), GFP_KERNEL);
 	if (!cp)
 		return -ENOMEM;
@@ -4579,6 +4582,9 @@ int of_clk_add_hw_provider(struct device_node *np,
 	struct of_clk_provider *cp;
 	int ret;
 
+	if (!np)
+		return 0;
+
 	cp = kzalloc(sizeof(*cp), GFP_KERNEL);
 	if (!cp)
 		return -ENOMEM;
@@ -4676,6 +4682,9 @@ void of_clk_del_provider(struct device_node *np)
 {
 	struct of_clk_provider *cp;
 
+	if (!np)
+		return;
+
 	mutex_lock(&of_clk_mutex);
 	list_for_each_entry(cp, &of_clk_providers, link) {
 		if (cp->node == np) {
-- 
cgit v1.2.3


From 049c4e13714ecbca567b4d5f6d563f05d431c80e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 10 May 2021 13:10:44 +0000
Subject: bpf: Fix alu32 const subreg bound tracking on bitwise operations

Fix a bug in the verifier's scalar32_min_max_*() functions which leads to
incorrect tracking of 32 bit bounds for the simulation of and/or/xor bitops.
When both the src & dst subreg is a known constant, then the assumption is
that scalar_min_max_*() will take care to update bounds correctly. However,
this is not the case, for example, consider a register R2 which has a tnum
of 0xffffffff00000000, meaning, lower 32 bits are known constant and in this
case of value 0x00000001. R2 is then and'ed with a register R3 which is a
64 bit known constant, here, 0x100000002.

What can be seen in line '10:' is that 32 bit bounds reach an invalid state
where {u,s}32_min_value > {u,s}32_max_value. The reason is scalar32_min_max_*()
delegates 32 bit bounds updates to scalar_min_max_*(), however, that really
only takes place when both the 64 bit src & dst register is a known constant.
Given scalar32_min_max_*() is intended to be designed as closely as possible
to scalar_min_max_*(), update the 32 bit bounds in this situation through
__mark_reg32_known() which will set all {u,s}32_{min,max}_value to the correct
constant, which is 0x00000000 after the fix (given 0x00000001 & 0x00000002 in
32 bit space). This is possible given var32_off already holds the final value
as dst_reg->var_off is updated before calling scalar32_min_max_*().

Before fix, invalid tracking of R2:

  [...]
  9: R0_w=inv1337 R1=ctx(id=0,off=0,imm=0) R2_w=inv(id=0,smin_value=-9223372036854775807 (0x8000000000000001),smax_value=9223372032559808513 (0x7fffffff00000001),umin_value=1,umax_value=0xffffffff00000001,var_off=(0x1; 0xffffffff00000000),s32_min_value=1,s32_max_value=1,u32_min_value=1,u32_max_value=1) R3_w=inv4294967298 R10=fp0
  9: (5f) r2 &= r3
  10: R0_w=inv1337 R1=ctx(id=0,off=0,imm=0) R2_w=inv(id=0,smin_value=0,smax_value=4294967296 (0x100000000),umin_value=0,umax_value=0x100000000,var_off=(0x0; 0x100000000),s32_min_value=1,s32_max_value=0,u32_min_value=1,u32_max_value=0) R3_w=inv4294967298 R10=fp0
  [...]

After fix, correct tracking of R2:

  [...]
  9: R0_w=inv1337 R1=ctx(id=0,off=0,imm=0) R2_w=inv(id=0,smin_value=-9223372036854775807 (0x8000000000000001),smax_value=9223372032559808513 (0x7fffffff00000001),umin_value=1,umax_value=0xffffffff00000001,var_off=(0x1; 0xffffffff00000000),s32_min_value=1,s32_max_value=1,u32_min_value=1,u32_max_value=1) R3_w=inv4294967298 R10=fp0
  9: (5f) r2 &= r3
  10: R0_w=inv1337 R1=ctx(id=0,off=0,imm=0) R2_w=inv(id=0,smin_value=0,smax_value=4294967296 (0x100000000),umin_value=0,umax_value=0x100000000,var_off=(0x0; 0x100000000),s32_min_value=0,s32_max_value=0,u32_min_value=0,u32_max_value=0) R3_w=inv4294967298 R10=fp0
  [...]

Fixes: 3f50f132d840 ("bpf: Verifier, do explicit ALU32 bounds tracking")
Fixes: 2921c90d4718 ("bpf: Fix a verifier failure with xor")
Reported-by: Manfred Paul (@_manfp)
Reported-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 757476c91c98..9352a1b7de2d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7084,11 +7084,10 @@ static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
 	s32 smin_val = src_reg->s32_min_value;
 	u32 umax_val = src_reg->u32_max_value;
 
-	/* Assuming scalar64_min_max_and will be called so its safe
-	 * to skip updating register for known 32-bit case.
-	 */
-	if (src_known && dst_known)
+	if (src_known && dst_known) {
+		__mark_reg32_known(dst_reg, var32_off.value);
 		return;
+	}
 
 	/* We get our minimum from the var_off, since that's inherently
 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
@@ -7108,7 +7107,6 @@ static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
 		dst_reg->s32_min_value = dst_reg->u32_min_value;
 		dst_reg->s32_max_value = dst_reg->u32_max_value;
 	}
-
 }
 
 static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
@@ -7155,11 +7153,10 @@ static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
 	s32 smin_val = src_reg->s32_min_value;
 	u32 umin_val = src_reg->u32_min_value;
 
-	/* Assuming scalar64_min_max_or will be called so it is safe
-	 * to skip updating register for known case.
-	 */
-	if (src_known && dst_known)
+	if (src_known && dst_known) {
+		__mark_reg32_known(dst_reg, var32_off.value);
 		return;
+	}
 
 	/* We get our maximum from the var_off, and our minimum is the
 	 * maximum of the operands' minima
@@ -7224,11 +7221,10 @@ static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
 	s32 smin_val = src_reg->s32_min_value;
 
-	/* Assuming scalar64_min_max_xor will be called so it is safe
-	 * to skip updating register for known case.
-	 */
-	if (src_known && dst_known)
+	if (src_known && dst_known) {
+		__mark_reg32_known(dst_reg, var32_off.value);
 		return;
+	}
 
 	/* We get both minimum and maximum from the var32_off. */
 	dst_reg->u32_min_value = var32_off.value;
-- 
cgit v1.2.3


From 2515dd6ce8e545b0b2eece84920048ef9ed846c4 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Mon, 19 Apr 2021 16:17:41 -0700
Subject: stack: Replace "o" output with "r" input constraint

"o" isn't a common asm() constraint to use; it triggers an assertion in
assert-enabled builds of LLVM that it's not recognized when targeting
aarch64 (though it appears to fall back to "m"). It's fixed in LLVM 13 now,
but there isn't really a good reason to use "o" in particular here. To
avoid causing build issues for those using assert-enabled builds of earlier
LLVM versions, the constraint needs changing.

Instead, if the point is to retain the __builtin_alloca(), make ptr appear
to "escape" via being an input to an empty inline asm block. This is
preferable anyways, since otherwise this looks like a dead store.

While the use of "r" was considered in

  https://lore.kernel.org/lkml/202104011447.2E7F543@keescook/

it was only tested as an output (which looks like a dead store, and wasn't
sufficient).

Use "r" as an input constraint instead, which behaves correctly across
compilers and architectures.

Fixes: 39218ff4c625 ("stack: Optionally randomize kernel stack offset each syscall")
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Kees Cook <keescook@chromium.org>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Link: https://reviews.llvm.org/D100412
Link: https://bugs.llvm.org/show_bug.cgi?id=49956
Link: https://lore.kernel.org/r/20210419231741.4084415-1-keescook@chromium.org
---
 include/linux/randomize_kstack.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h
index fd80fab663a9..bebc911161b6 100644
--- a/include/linux/randomize_kstack.h
+++ b/include/linux/randomize_kstack.h
@@ -38,7 +38,7 @@ void *__builtin_alloca(size_t size);
 		u32 offset = raw_cpu_read(kstack_offset);		\
 		u8 *ptr = __builtin_alloca(KSTACK_OFFSET_MAX(offset));	\
 		/* Keep allocation even after "ptr" loses scope. */	\
-		asm volatile("" : "=o"(*ptr) :: "memory");		\
+		asm volatile("" :: "r"(ptr) : "memory");		\
 	}								\
 } while (0)
 
-- 
cgit v1.2.3


From cc2520909c2df9ad51d642bf09b3da26a9f56393 Mon Sep 17 00:00:00 2001
From: Jernej Skrabec <jernej.skrabec@gmail.com>
Date: Wed, 5 May 2021 19:33:35 +0200
Subject: MAINTAINERS: Update my e-mail

Old e-mail address doesn't work anymore, update it to new one.

Link: https://lore.kernel.org/r/20210505173335.1483575-1-jernej.skrabec@gmail.com
Signed-off-by: Jernej Skrabec <jernej.skrabec@gmail.com>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
---
 .mailmap    |  1 +
 MAINTAINERS | 10 +++++-----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/.mailmap b/.mailmap
index 2d93232ed72b..ca235ef4755f 100644
--- a/.mailmap
+++ b/.mailmap
@@ -159,6 +159,7 @@ Jeff Layton <jlayton@kernel.org> <jlayton@primarydata.com>
 Jeff Layton <jlayton@kernel.org> <jlayton@redhat.com>
 Jens Axboe <axboe@suse.de>
 Jens Osterkamp <Jens.Osterkamp@de.ibm.com>
+Jernej Skrabec <jernej.skrabec@gmail.com> <jernej.skrabec@siol.net>
 Jiri Slaby <jirislaby@kernel.org> <jirislaby@gmail.com>
 Jiri Slaby <jirislaby@kernel.org> <jslaby@novell.com>
 Jiri Slaby <jirislaby@kernel.org> <jslaby@suse.com>
diff --git a/MAINTAINERS b/MAINTAINERS
index 7fdc513392f4..2e9063d018d0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1572,7 +1572,7 @@ F:	drivers/clk/sunxi/
 ARM/Allwinner sunXi SoC support
 M:	Maxime Ripard <mripard@kernel.org>
 M:	Chen-Yu Tsai <wens@csie.org>
-R:	Jernej Skrabec <jernej.skrabec@siol.net>
+R:	Jernej Skrabec <jernej.skrabec@gmail.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sunxi/linux.git
@@ -5003,7 +5003,7 @@ S:	Maintained
 F:	drivers/net/fddi/defza.*
 
 DEINTERLACE DRIVERS FOR ALLWINNER H3
-M:	Jernej Skrabec <jernej.skrabec@siol.net>
+M:	Jernej Skrabec <jernej.skrabec@gmail.com>
 L:	linux-media@vger.kernel.org
 S:	Maintained
 T:	git git://linuxtv.org/media_tree.git
@@ -5527,7 +5527,7 @@ F:	include/linux/power/smartreflex.h
 DRM DRIVER FOR ALLWINNER DE2 AND DE3 ENGINE
 M:	Maxime Ripard <mripard@kernel.org>
 M:	Chen-Yu Tsai <wens@csie.org>
-R:	Jernej Skrabec <jernej.skrabec@siol.net>
+R:	Jernej Skrabec <jernej.skrabec@gmail.com>
 L:	dri-devel@lists.freedesktop.org
 S:	Supported
 T:	git git://anongit.freedesktop.org/drm/drm-misc
@@ -5903,7 +5903,7 @@ M:	Andrzej Hajda <a.hajda@samsung.com>
 M:	Neil Armstrong <narmstrong@baylibre.com>
 R:	Laurent Pinchart <Laurent.pinchart@ideasonboard.com>
 R:	Jonas Karlman <jonas@kwiboo.se>
-R:	Jernej Skrabec <jernej.skrabec@siol.net>
+R:	Jernej Skrabec <jernej.skrabec@gmail.com>
 S:	Maintained
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	drivers/gpu/drm/bridge/
@@ -15490,7 +15490,7 @@ F:	include/uapi/linux/rose.h
 F:	net/rose/
 
 ROTATION DRIVER FOR ALLWINNER A83T
-M:	Jernej Skrabec <jernej.skrabec@siol.net>
+M:	Jernej Skrabec <jernej.skrabec@gmail.com>
 L:	linux-media@vger.kernel.org
 S:	Maintained
 T:	git git://linuxtv.org/media_tree.git
-- 
cgit v1.2.3


From 1b55767dfdd93c42712e67e986ac14f0c4debd0c Mon Sep 17 00:00:00 2001
From: Gao Xiang <xiang@kernel.org>
Date: Tue, 11 May 2021 00:25:05 +0800
Subject: erofs: fix broken illustration in documentation

Illustration was broken after ReST conversion by accident.
(checked by 'make SPHINXDIRS="filesystems" htmldocs')

Link: https://lore.kernel.org/r/20210510162506.28637-1-xiang@kernel.org
Fixes: e66d8631ddb3 ("docs: filesystems: convert erofs.txt to ReST")
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Gao Xiang <xiang@kernel.org>
---
 Documentation/filesystems/erofs.rst | 119 ++++++++++++++++++------------------
 1 file changed, 59 insertions(+), 60 deletions(-)

diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst
index bf145171c2bf..869b183ff215 100644
--- a/Documentation/filesystems/erofs.rst
+++ b/Documentation/filesystems/erofs.rst
@@ -113,31 +113,31 @@ may not. All metadatas can be now observed in two different spaces (views):
 
     ::
 
-				    |-> aligned with 8B
-					    |-> followed closely
-	+ meta_blkaddr blocks                                      |-> another slot
-	_____________________________________________________________________
-	|  ...   | inode |  xattrs  | extents  | data inline | ... | inode ...
-	|________|_______|(optional)|(optional)|__(optional)_|_____|__________
-		|-> aligned with the inode slot size
-		    .                   .
-		    .                         .
-		.                              .
-		.                                    .
-	    .                                         .
-	    .                                              .
-	.____________________________________________________|-> aligned with 4B
-	| xattr_ibody_header | shared xattrs | inline xattrs |
-	|____________________|_______________|_______________|
-	|->    12 bytes    <-|->x * 4 bytes<-|               .
-			    .                .                 .
-			.                      .                   .
-		.                           .                     .
-	    ._______________________________.______________________.
-	    | id | id | id | id |  ... | id | ent | ... | ent| ... |
-	    |____|____|____|____|______|____|_____|_____|____|_____|
-					    |-> aligned with 4B
-							|-> aligned with 4B
+                                 |-> aligned with 8B
+                                            |-> followed closely
+     + meta_blkaddr blocks                                      |-> another slot
+       _____________________________________________________________________
+     |  ...   | inode |  xattrs  | extents  | data inline | ... | inode ...
+     |________|_______|(optional)|(optional)|__(optional)_|_____|__________
+              |-> aligned with the inode slot size
+                   .                   .
+                 .                         .
+               .                              .
+             .                                    .
+           .                                         .
+         .                                              .
+       .____________________________________________________|-> aligned with 4B
+       | xattr_ibody_header | shared xattrs | inline xattrs |
+       |____________________|_______________|_______________|
+       |->    12 bytes    <-|->x * 4 bytes<-|               .
+                           .                .                 .
+                     .                      .                   .
+                .                           .                     .
+            ._______________________________.______________________.
+            | id | id | id | id |  ... | id | ent | ... | ent| ... |
+            |____|____|____|____|______|____|_____|_____|____|_____|
+                                            |-> aligned with 4B
+                                                        |-> aligned with 4B
 
     Inode could be 32 or 64 bytes, which can be distinguished from a common
     field which all inode versions have -- i_format::
@@ -175,13 +175,13 @@ may not. All metadatas can be now observed in two different spaces (views):
     Each share xattr can also be directly found by the following formula:
          xattr offset = xattr_blkaddr * block_size + 4 * xattr_id
 
-    ::
+::
 
-			    |-> aligned by  4 bytes
-	+ xattr_blkaddr blocks                     |-> aligned with 4 bytes
-	_________________________________________________________________________
-	|  ...   | xattr_entry |  xattr data | ... |  xattr_entry | xattr data  ...
-	|________|_____________|_____________|_____|______________|_______________
+                           |-> aligned by  4 bytes
+    + xattr_blkaddr blocks                     |-> aligned with 4 bytes
+     _________________________________________________________________________
+    |  ...   | xattr_entry |  xattr data | ... |  xattr_entry | xattr data  ...
+    |________|_____________|_____________|_____|______________|_______________
 
 Directories
 -----------
@@ -193,19 +193,18 @@ algorithm (could refer to the related source code).
 
 ::
 
-		    ___________________________
-		    /                           |
-		/              ______________|________________
-		/              /              | nameoff1       | nameoffN-1
-    ____________.______________._______________v________________v__________
-    | dirent | dirent | ... | dirent | filename | filename | ... | filename |
-    |___.0___|____1___|_____|___N-1__|____0_____|____1_____|_____|___N-1____|
-	\                           ^
-	\                          |                           * could have
-	\                         |                             trailing '\0'
-	    \________________________| nameoff0
-
-				Directory block
+                  ___________________________
+                 /                           |
+                /              ______________|________________
+               /              /              | nameoff1       | nameoffN-1
+  ____________.______________._______________v________________v__________
+ | dirent | dirent | ... | dirent | filename | filename | ... | filename |
+ |___.0___|____1___|_____|___N-1__|____0_____|____1_____|_____|___N-1____|
+      \                           ^
+       \                          |                           * could have
+        \                         |                             trailing '\0'
+         \________________________| nameoff0
+                             Directory block
 
 Note that apart from the offset of the first filename, nameoff0 also indicates
 the total number of directory entries in this block since it is no need to
@@ -216,22 +215,22 @@ Compression
 Currently, EROFS supports 4KB fixed-sized output transparent file compression,
 as illustrated below::
 
-	    |---- Variant-Length Extent ----|-------- VLE --------|----- VLE -----
-	    clusterofs                      clusterofs            clusterofs
-	    |                               |                     |   logical data
-    _________v_______________________________v_____________________v_______________
-    ... |    .        |             |        .    |             |  .          | ...
-    ____|____.________|_____________|________.____|_____________|__.__________|____
-	|-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|
-	    size          size          size          size          size
-	    .                             .                .                   .
-	    .                       .               .                  .
-		.                  .              .                .
-	_______._____________._____________._____________._____________________
-	    ... |             |             |             | ... physical data
-	_______|_____________|_____________|_____________|_____________________
-		|-> cluster <-|-> cluster <-|-> cluster <-|
-		    size          size          size
+          |<-    variable-sized extent    ->|<-       VLE         ->|
+        clusterofs                        clusterofs              clusterofs
+          |                                 |                       |
+ _________v_________________________________v_______________________v________
+ ... |    .         |              |        .     |              |  .   ...
+ ____|____._________|______________|________.___ _|______________|__.________
+     |-> lcluster <-|-> lcluster <-|-> lcluster <-|-> lcluster <-|
+          size           size           size           size   .             .
+           .                            .                .              .
+            .                       .               .               .
+             .                   .              .               .
+       _______.______________.______________.______________._________________
+          ... |              |              |              | ...
+       _______|______________|______________|______________|_________________
+              |-> pcluster <-|-> pcluster <-|-> pcluster <-|
+                    size           size           size
 
 Currently each on-disk physical cluster can contain 4KB (un)compressed data
 at most. For each logical cluster, there is a corresponding on-disk index to
-- 
cgit v1.2.3


From 46f2e04484aee056c97f79162da83ac7d2d621bb Mon Sep 17 00:00:00 2001
From: Gao Xiang <xiang@kernel.org>
Date: Tue, 11 May 2021 16:44:14 +0800
Subject: erofs: update documentation about data compression

Add more description about (NON)HEAD lclusters, and the new big
pcluster feature.

Link: https://lore.kernel.org/r/20210511084414.21305-1-xiang@kernel.org
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Gao Xiang <xiang@kernel.org>
---
 Documentation/filesystems/erofs.rst | 68 ++++++++++++++++++++++++++-----------
 1 file changed, 49 insertions(+), 19 deletions(-)

diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst
index 869b183ff215..832839fcf4c3 100644
--- a/Documentation/filesystems/erofs.rst
+++ b/Documentation/filesystems/erofs.rst
@@ -50,8 +50,8 @@ Here is the main features of EROFS:
 
  - Support POSIX.1e ACLs by using xattrs;
 
- - Support transparent file compression as an option:
-   LZ4 algorithm with 4 KB fixed-sized output compression for high performance.
+ - Support transparent data compression as an option:
+   LZ4 algorithm with the fixed-sized output compression for high performance.
 
 The following git tree provides the file system user-space tools under
 development (ex, formatting tool mkfs.erofs):
@@ -210,10 +210,21 @@ Note that apart from the offset of the first filename, nameoff0 also indicates
 the total number of directory entries in this block since it is no need to
 introduce another on-disk field at all.
 
-Compression
------------
-Currently, EROFS supports 4KB fixed-sized output transparent file compression,
-as illustrated below::
+Data compression
+----------------
+EROFS implements LZ4 fixed-sized output compression which generates fixed-sized
+compressed data blocks from variable-sized input in contrast to other existing
+fixed-sized input solutions. Relatively higher compression ratios can be gotten
+by using fixed-sized output compression since nowadays popular data compression
+algorithms are mostly LZ77-based and such fixed-sized output approach can be
+benefited from the historical dictionary (aka. sliding window).
+
+In details, original (uncompressed) data is turned into several variable-sized
+extents and in the meanwhile, compressed into physical clusters (pclusters).
+In order to record each variable-sized extent, logical clusters (lclusters) are
+introduced as the basic unit of compress indexes to indicate whether a new
+extent is generated within the range (HEAD) or not (NONHEAD). Lclusters are now
+fixed in block size, as illustrated below::
 
           |<-    variable-sized extent    ->|<-       VLE         ->|
         clusterofs                        clusterofs              clusterofs
@@ -222,18 +233,37 @@ as illustrated below::
  ... |    .         |              |        .     |              |  .   ...
  ____|____._________|______________|________.___ _|______________|__.________
      |-> lcluster <-|-> lcluster <-|-> lcluster <-|-> lcluster <-|
-          size           size           size           size   .             .
-           .                            .                .              .
-            .                       .               .               .
-             .                   .              .               .
-       _______.______________.______________.______________._________________
+          (HEAD)        (NONHEAD)       (HEAD)        (NONHEAD)    .
+           .             CBLKCNT            .                    .
+            .                               .                  .
+             .                              .                .
+       _______._____________________________.______________._________________
           ... |              |              |              | ...
        _______|______________|______________|______________|_________________
-              |-> pcluster <-|-> pcluster <-|-> pcluster <-|
-                    size           size           size
-
-Currently each on-disk physical cluster can contain 4KB (un)compressed data
-at most. For each logical cluster, there is a corresponding on-disk index to
-describe its cluster type, physical cluster address, etc.
-
-See "struct z_erofs_vle_decompressed_index" in erofs_fs.h for more details.
+              |->      big pcluster       <-|-> pcluster <-|
+
+A physical cluster can be seen as a container of physical compressed blocks
+which contains compressed data. Previously, only lcluster-sized (4KB) pclusters
+were supported. After big pcluster feature is introduced (available since
+Linux v5.13), pcluster can be a multiple of lcluster size.
+
+For each HEAD lcluster, clusterofs is recorded to indicate where a new extent
+starts and blkaddr is used to seek the compressed data. For each NONHEAD
+lcluster, delta0 and delta1 are available instead of blkaddr to indicate the
+distance to its HEAD lcluster and the next HEAD lcluster. A PLAIN lcluster is
+also a HEAD lcluster except that its data is uncompressed. See the comments
+around "struct z_erofs_vle_decompressed_index" in erofs_fs.h for more details.
+
+If big pcluster is enabled, pcluster size in lclusters needs to be recorded as
+well. Let the delta0 of the first NONHEAD lcluster store the compressed block
+count with a special flag as a new called CBLKCNT NONHEAD lcluster. It's easy
+to understand its delta0 is constantly 1, as illustrated below::
+
+   __________________________________________________________
+  | HEAD |  NONHEAD  | NONHEAD | ... | NONHEAD | HEAD | HEAD |
+  |__:___|_(CBLKCNT)_|_________|_____|_________|__:___|____:_|
+     |<----- a big pcluster (with CBLKCNT) ------>|<--  -->|
+           a lcluster-sized pcluster (without CBLKCNT) ^
+
+If another HEAD follows a HEAD lcluster, there is no room to record CBLKCNT,
+but it's easy to know the size of such pcluster is 1 lcluster as well.
-- 
cgit v1.2.3


From 35f3f8504c3b60a1ae5576e178b27fc0ddd6157d Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 16:12:42 +0300
Subject: spi: Switch to signed types for *_native_cs SPI controller fields

While fixing undefined behaviour the commit f60d7270c8a3 ("spi: Avoid
undefined behaviour when counting unused native CSs") missed the case
when all CSs are GPIOs and thus unused_native_cs will be evaluated to
-1 in unsigned representation. This will falsely trigger a condition
in the spi_get_gpio_descs().

Switch to signed types for *_native_cs SPI controller fields to fix above.

Fixes: f60d7270c8a3 ("spi: Avoid undefined behaviour when counting unused native CSs")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210510131242.49455-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 360a3bc767ca..74239d65c7fd 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -644,8 +644,8 @@ struct spi_controller {
 	int			*cs_gpios;
 	struct gpio_desc	**cs_gpiods;
 	bool			use_gpio_descriptors;
-	u8			unused_native_cs;
-	u8			max_native_cs;
+	s8			unused_native_cs;
+	s8			max_native_cs;
 
 	/* statistics */
 	struct spi_statistics	statistics;
-- 
cgit v1.2.3


From 91e02557f377b6837d4f82b14229d92cae231001 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 11 May 2021 11:05:00 +0200
Subject: ALSA: usb-audio: Fix potential out-of-bounce access in MIDI EP parser

The recently introduced MIDI endpoint parser code has an access to the
field without the size validation, hence it might lead to
out-of-bounce access.  Add the sanity checks for the descriptor
sizes.

Fixes: eb596e0fd13c ("ALSA: usb-audio: generate midi streaming substream names from jack names")
Link: https://lore.kernel.org/r/20210511090500.2637-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/midi.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sound/usb/midi.c b/sound/usb/midi.c
index 649eb8d1ab7d..2c01649c70f6 100644
--- a/sound/usb/midi.c
+++ b/sound/usb/midi.c
@@ -1750,7 +1750,7 @@ static struct usb_midi_in_jack_descriptor *find_usb_in_jack_descriptor(
 		struct usb_midi_in_jack_descriptor *injd =
 				(struct usb_midi_in_jack_descriptor *)extra;
 
-		if (injd->bLength > 4 &&
+		if (injd->bLength >= sizeof(*injd) &&
 		    injd->bDescriptorType == USB_DT_CS_INTERFACE &&
 		    injd->bDescriptorSubtype == UAC_MIDI_IN_JACK &&
 				injd->bJackID == jack_id)
@@ -1773,7 +1773,7 @@ static struct usb_midi_out_jack_descriptor *find_usb_out_jack_descriptor(
 		struct usb_midi_out_jack_descriptor *outjd =
 				(struct usb_midi_out_jack_descriptor *)extra;
 
-		if (outjd->bLength > 4 &&
+		if (outjd->bLength >= sizeof(*outjd) &&
 		    outjd->bDescriptorType == USB_DT_CS_INTERFACE &&
 		    outjd->bDescriptorSubtype == UAC_MIDI_OUT_JACK &&
 				outjd->bJackID == jack_id)
@@ -1820,7 +1820,8 @@ static void snd_usbmidi_init_substream(struct snd_usb_midi *umidi,
 			outjd = find_usb_out_jack_descriptor(hostif, jack_id);
 			if (outjd) {
 				sz = USB_DT_MIDI_OUT_SIZE(outjd->bNrInputPins);
-				iJack = *(((uint8_t *) outjd) + sz - sizeof(uint8_t));
+				if (outjd->bLength >= sz)
+					iJack = *(((uint8_t *) outjd) + sz - sizeof(uint8_t));
 			}
 		} else {
 			/* and out jacks connect to ins */
-- 
cgit v1.2.3


From 4b81ccebaeee885ab1aa1438133f2991e3a2b6ea Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Date: Tue, 27 Apr 2021 10:12:12 -0300
Subject: bpf, ringbuf: Deny reserve of buffers larger than ringbuf

A BPF program might try to reserve a buffer larger than the ringbuf size.
If the consumer pointer is way ahead of the producer, that would be
successfully reserved, allowing the BPF program to read or write out of
the ringbuf allocated area.

Reported-by: Ryota Shiga (Flatt Security)
Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it")
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/ringbuf.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index f25b719ac786..b86d80c9cd59 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -315,6 +315,9 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
 		return NULL;
 
 	len = round_up(size + BPF_RINGBUF_HDR_SZ, 8);
+	if (len > rb->mask + 1)
+		return NULL;
+
 	cons_pos = smp_load_acquire(&rb->consumer_pos);
 
 	if (in_nmi()) {
-- 
cgit v1.2.3


From 04ea3086c4d73da7009de1e84962a904139af219 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 4 May 2021 16:38:00 -0700
Subject: bpf: Prevent writable memory-mapping of read-only ringbuf pages

Only the very first page of BPF ringbuf that contains consumer position
counter is supposed to be mapped as writeable by user-space. Producer
position is read-only and can be modified only by the kernel code. BPF ringbuf
data pages are read-only as well and are not meant to be modified by
user-code to maintain integrity of per-record headers.

This patch allows to map only consumer position page as writeable and
everything else is restricted to be read-only. remap_vmalloc_range()
internally adds VM_DONTEXPAND, so all the established memory mappings can't be
extended, which prevents any future violations through mremap()'ing.

Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it")
Reported-by: Ryota Shiga (Flatt Security)
Reported-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/ringbuf.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index b86d80c9cd59..84b3b35fc0d0 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -221,25 +221,20 @@ static int ringbuf_map_get_next_key(struct bpf_map *map, void *key,
 	return -ENOTSUPP;
 }
 
-static size_t bpf_ringbuf_mmap_page_cnt(const struct bpf_ringbuf *rb)
-{
-	size_t data_pages = (rb->mask + 1) >> PAGE_SHIFT;
-
-	/* consumer page + producer page + 2 x data pages */
-	return RINGBUF_POS_PAGES + 2 * data_pages;
-}
-
 static int ringbuf_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
 {
 	struct bpf_ringbuf_map *rb_map;
-	size_t mmap_sz;
 
 	rb_map = container_of(map, struct bpf_ringbuf_map, map);
-	mmap_sz = bpf_ringbuf_mmap_page_cnt(rb_map->rb) << PAGE_SHIFT;
-
-	if (vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start) > mmap_sz)
-		return -EINVAL;
 
+	if (vma->vm_flags & VM_WRITE) {
+		/* allow writable mapping for the consumer_pos only */
+		if (vma->vm_pgoff != 0 || vma->vm_end - vma->vm_start != PAGE_SIZE)
+			return -EPERM;
+	} else {
+		vma->vm_flags &= ~VM_MAYWRITE;
+	}
+	/* remap_vmalloc_range() checks size and offset constraints */
 	return remap_vmalloc_range(vma, rb_map->rb,
 				   vma->vm_pgoff + RINGBUF_PGOFF);
 }
-- 
cgit v1.2.3


From ff67dbd554b2aaa22be933eced32610ff90209dd Mon Sep 17 00:00:00 2001
From: Qiu Wenbo <qiuwenbo@kylinos.com.cn>
Date: Wed, 28 Apr 2021 13:06:36 +0800
Subject: platform/x86: ideapad-laptop: fix a NULL pointer dereference

The third parameter of dytc_cql_command should not be NULL since it will
be dereferenced immediately.

Fixes: ff36b0d953dc4 ("platform/x86: ideapad-laptop: rework and create new ACPI helpers")
Signed-off-by: Qiu Wenbo <qiuwenbo@kylinos.com.cn>
Acked-by: Ike Panhc <ike.pan@canonical.com>
Link: https://lore.kernel.org/r/20210428050636.8003-1-qiuwenbo@kylinos.com.cn
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/ideapad-laptop.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index 6cb5ad4be231..8f871151f0cc 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -809,6 +809,7 @@ static int dytc_profile_set(struct platform_profile_handler *pprof,
 {
 	struct ideapad_dytc_priv *dytc = container_of(pprof, struct ideapad_dytc_priv, pprof);
 	struct ideapad_private *priv = dytc->priv;
+	unsigned long output;
 	int err;
 
 	err = mutex_lock_interruptible(&dytc->mutex);
@@ -829,7 +830,7 @@ static int dytc_profile_set(struct platform_profile_handler *pprof,
 
 		/* Determine if we are in CQL mode. This alters the commands we do */
 		err = dytc_cql_command(priv, DYTC_SET_COMMAND(DYTC_FUNCTION_MMC, perfmode, 1),
-				       NULL);
+				       &output);
 		if (err)
 			goto unlock;
 	}
-- 
cgit v1.2.3


From b09aaa3f2c0edeeed670cd29961a0e35bddc78cf Mon Sep 17 00:00:00 2001
From: Barnabás Pőcze <pobrn@protonmail.com>
Date: Fri, 7 May 2021 23:53:44 +0000
Subject: platform/x86: ideapad-laptop: fix method name typo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

"smbc" should be "sbmc". `eval_smbc()` incorrectly called
the SMBC ACPI method instead of SBMC. This resulted in
partial loss of functionality. Rectify that by calling
the correct ACPI method (SBMC), and also rename
methods and constants.

BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=212985
Fixes: 0b765671cb80 ("platform/x86: ideapad-laptop: group and separate (un)related constants into enums")
Fixes: ff36b0d953dc ("platform/x86: ideapad-laptop: rework and create new ACPI helpers")
Cc: stable@vger.kernel.org # 5.12
Signed-off-by: Barnabás Pőcze <pobrn@protonmail.com>
Link: https://lore.kernel.org/r/20210507235333.286505-1-pobrn@protonmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/ideapad-laptop.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index 8f871151f0cc..387817290921 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -57,8 +57,8 @@ enum {
 };
 
 enum {
-	SMBC_CONSERVATION_ON  = 3,
-	SMBC_CONSERVATION_OFF = 5,
+	SBMC_CONSERVATION_ON  = 3,
+	SBMC_CONSERVATION_OFF = 5,
 };
 
 enum {
@@ -182,9 +182,9 @@ static int eval_gbmd(acpi_handle handle, unsigned long *res)
 	return eval_int(handle, "GBMD", res);
 }
 
-static int exec_smbc(acpi_handle handle, unsigned long arg)
+static int exec_sbmc(acpi_handle handle, unsigned long arg)
 {
-	return exec_simple_method(handle, "SMBC", arg);
+	return exec_simple_method(handle, "SBMC", arg);
 }
 
 static int eval_hals(acpi_handle handle, unsigned long *res)
@@ -477,7 +477,7 @@ static ssize_t conservation_mode_store(struct device *dev,
 	if (err)
 		return err;
 
-	err = exec_smbc(priv->adev->handle, state ? SMBC_CONSERVATION_ON : SMBC_CONSERVATION_OFF);
+	err = exec_sbmc(priv->adev->handle, state ? SBMC_CONSERVATION_ON : SBMC_CONSERVATION_OFF);
 	if (err)
 		return err;
 
-- 
cgit v1.2.3


From 79d341e26ebcdbc622348aaaab6f8f89b6fdb25f Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Fri, 30 Apr 2021 14:07:35 +0800
Subject: platform/x86: hp_accel: Avoid invoking _INI to speed up resume
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

hp_accel can take almost two seconds to resume on some HP laptops.

The bottleneck is on evaluating _INI, which is only needed to run once.

Resolve the issue by only invoking _INI when it's necessary. Namely, on
probe and on hibernation restore.

Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Acked-by: Éric Piel <eric.piel@trempplin-utc.net>
Link: https://lore.kernel.org/r/20210430060736.590321-1-kai.heng.feng@canonical.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/misc/lis3lv02d/lis3lv02d.h |  1 +
 drivers/platform/x86/hp_accel.c    | 22 +++++++++++++++++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/lis3lv02d/lis3lv02d.h b/drivers/misc/lis3lv02d/lis3lv02d.h
index c394c0b08519..7ac788fae1b8 100644
--- a/drivers/misc/lis3lv02d/lis3lv02d.h
+++ b/drivers/misc/lis3lv02d/lis3lv02d.h
@@ -271,6 +271,7 @@ struct lis3lv02d {
 	int			regs_size;
 	u8                      *reg_cache;
 	bool			regs_stored;
+	bool			init_required;
 	u8                      odr_mask;  /* ODR bit mask */
 	u8			whoami;    /* indicates measurement precision */
 	s16 (*read_data) (struct lis3lv02d *lis3, int reg);
diff --git a/drivers/platform/x86/hp_accel.c b/drivers/platform/x86/hp_accel.c
index 799cbe2ffcf3..8c0867bda828 100644
--- a/drivers/platform/x86/hp_accel.c
+++ b/drivers/platform/x86/hp_accel.c
@@ -88,6 +88,9 @@ MODULE_DEVICE_TABLE(acpi, lis3lv02d_device_ids);
 static int lis3lv02d_acpi_init(struct lis3lv02d *lis3)
 {
 	struct acpi_device *dev = lis3->bus_priv;
+	if (!lis3->init_required)
+		return 0;
+
 	if (acpi_evaluate_object(dev->handle, METHOD_NAME__INI,
 				 NULL, NULL) != AE_OK)
 		return -EINVAL;
@@ -356,6 +359,7 @@ static int lis3lv02d_add(struct acpi_device *device)
 	}
 
 	/* call the core layer do its init */
+	lis3_dev.init_required = true;
 	ret = lis3lv02d_init_device(&lis3_dev);
 	if (ret)
 		return ret;
@@ -403,11 +407,27 @@ static int lis3lv02d_suspend(struct device *dev)
 
 static int lis3lv02d_resume(struct device *dev)
 {
+	lis3_dev.init_required = false;
+	lis3lv02d_poweron(&lis3_dev);
+	return 0;
+}
+
+static int lis3lv02d_restore(struct device *dev)
+{
+	lis3_dev.init_required = true;
 	lis3lv02d_poweron(&lis3_dev);
 	return 0;
 }
 
-static SIMPLE_DEV_PM_OPS(hp_accel_pm, lis3lv02d_suspend, lis3lv02d_resume);
+static const struct dev_pm_ops hp_accel_pm = {
+	.suspend = lis3lv02d_suspend,
+	.resume = lis3lv02d_resume,
+	.freeze = lis3lv02d_suspend,
+	.thaw = lis3lv02d_resume,
+	.poweroff = lis3lv02d_suspend,
+	.restore = lis3lv02d_restore,
+};
+
 #define HP_ACCEL_PM (&hp_accel_pm)
 #else
 #define HP_ACCEL_PM NULL
-- 
cgit v1.2.3


From a5c936add6a23c15c6ae538ab7a12f80751fdf0f Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Wed, 21 Apr 2021 13:20:31 +0800
Subject: drm/i915/dp: Use slow and wide link training for everything
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Screen flickers on Innolux eDP 1.3 panel when clock rate 540000 is in use.

According to the panel vendor, though clock rate 540000 is advertised,
but the max clock rate it really supports is 270000.

Ville Syrjälä mentioned that fast and narrow also breaks some eDP 1.4
panel, so use slow and wide training for all panels to resolve the
issue.

User also confirmed that the new strategy doesn't introduce any
regression on XPS 9380.

v2:
 - Use slow and wide for everything.

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/3384
References: https://gitlab.freedesktop.org/drm/intel/-/issues/272
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210421052054.1434718-1-kai.heng.feng@canonical.com
(cherry picked from commit acca7762eb71bc05a8f28d29320d193150051f79)
Fixes: 2bbd6dba84d4 ("drm/i915: Try to use fast+narrow link on eDP again and fall back to the old max strategy on failure")
Cc: <stable@vger.kernel.org> # v5.12+
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dp.c | 59 +++------------------------------
 1 file changed, 5 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 6a2dee8cef1f..1e026177ed1b 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -1095,44 +1095,6 @@ intel_dp_compute_link_config_wide(struct intel_dp *intel_dp,
 	return -EINVAL;
 }
 
-/* Optimize link config in order: max bpp, min lanes, min clock */
-static int
-intel_dp_compute_link_config_fast(struct intel_dp *intel_dp,
-				  struct intel_crtc_state *pipe_config,
-				  const struct link_config_limits *limits)
-{
-	const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
-	int bpp, clock, lane_count;
-	int mode_rate, link_clock, link_avail;
-
-	for (bpp = limits->max_bpp; bpp >= limits->min_bpp; bpp -= 2 * 3) {
-		int output_bpp = intel_dp_output_bpp(pipe_config->output_format, bpp);
-
-		mode_rate = intel_dp_link_required(adjusted_mode->crtc_clock,
-						   output_bpp);
-
-		for (lane_count = limits->min_lane_count;
-		     lane_count <= limits->max_lane_count;
-		     lane_count <<= 1) {
-			for (clock = limits->min_clock; clock <= limits->max_clock; clock++) {
-				link_clock = intel_dp->common_rates[clock];
-				link_avail = intel_dp_max_data_rate(link_clock,
-								    lane_count);
-
-				if (mode_rate <= link_avail) {
-					pipe_config->lane_count = lane_count;
-					pipe_config->pipe_bpp = bpp;
-					pipe_config->port_clock = link_clock;
-
-					return 0;
-				}
-			}
-		}
-	}
-
-	return -EINVAL;
-}
-
 static int intel_dp_dsc_compute_bpp(struct intel_dp *intel_dp, u8 dsc_max_bpc)
 {
 	int i, num_bpc;
@@ -1382,22 +1344,11 @@ intel_dp_compute_link_config(struct intel_encoder *encoder,
 	    intel_dp_can_bigjoiner(intel_dp))
 		pipe_config->bigjoiner = true;
 
-	if (intel_dp_is_edp(intel_dp))
-		/*
-		 * Optimize for fast and narrow. eDP 1.3 section 3.3 and eDP 1.4
-		 * section A.1: "It is recommended that the minimum number of
-		 * lanes be used, using the minimum link rate allowed for that
-		 * lane configuration."
-		 *
-		 * Note that we fall back to the max clock and lane count for eDP
-		 * panels that fail with the fast optimal settings (see
-		 * intel_dp->use_max_params), in which case the fast vs. wide
-		 * choice doesn't matter.
-		 */
-		ret = intel_dp_compute_link_config_fast(intel_dp, pipe_config, &limits);
-	else
-		/* Optimize for slow and wide. */
-		ret = intel_dp_compute_link_config_wide(intel_dp, pipe_config, &limits);
+	/*
+	 * Optimize for slow and wide for everything, because there are some
+	 * eDP 1.3 and 1.4 panels don't work well with fast and narrow.
+	 */
+	ret = intel_dp_compute_link_config_wide(intel_dp, pipe_config, &limits);
 
 	/* enable compression if the mode doesn't fit available BW */
 	drm_dbg_kms(&i915->drm, "Force DSC en = %d\n", intel_dp->force_dsc_en);
-- 
cgit v1.2.3


From 9b8a233bc294dd71d3c7d30692a78ab32f246a0f Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Fri, 30 Apr 2021 21:30:55 +0530
Subject: btrfs: handle transaction start error in btrfs_fileattr_set

Add error handling in btrfs_fileattr_set in case of an error while
starting a transaction. This fixes btrfs/232 which otherwise used to
fail with below signature on Power.

  btrfs/232 [ 1119.474650] run fstests btrfs/232 at 2021-04-21 02:21:22
  <...>
  [ 1366.638585] BUG: Unable to handle kernel data access on read at 0xffffffffffffff86
  [ 1366.638768] Faulting instruction address: 0xc0000000009a5c88
  cpu 0x0: Vector: 380 (Data SLB Access) at [c000000014f177b0]
      pc: c0000000009a5c88: btrfs_update_root_times+0x58/0xc0
      lr: c0000000009a5c84: btrfs_update_root_times+0x54/0xc0
      <...>
      pid   = 24881, comm = fsstress
	   btrfs_update_inode+0xa0/0x140
	   btrfs_fileattr_set+0x5d0/0x6f0
	   vfs_fileattr_set+0x2a8/0x390
	   do_vfs_ioctl+0x1290/0x1ac0
	   sys_ioctl+0x6c/0x120
	   system_call_exception+0x3d4/0x410
	   system_call_common+0xec/0x278

Fixes: 97fc29775487 ("btrfs: convert to fileattr")
Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index ee1dbabb5d3c..98ecb70466bf 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -259,6 +259,8 @@ int btrfs_fileattr_set(struct user_namespace *mnt_userns,
 	if (!fa->flags_valid) {
 		/* 1 item for the inode */
 		trans = btrfs_start_transaction(root, 1);
+		if (IS_ERR(trans))
+			return PTR_ERR(trans);
 		goto update_flags;
 	}
 
-- 
cgit v1.2.3


From f2be77fee648ddd6d0d259d3527344ba0120e314 Mon Sep 17 00:00:00 2001
From: Elia Devito <eliadevito@gmail.com>
Date: Tue, 11 May 2021 14:46:49 +0200
Subject: ALSA: hda/realtek: Add fixup for HP Spectre x360 15-df0xxx

Fixup to enable all 4 speaker on HP Spectre x360 15-df0xxx and probably
on similar models.

0x14 pin config override is required to enable all speakers and
alc285-speaker2-to-dac1 fixup to enable volume adjustment.

BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=189331
Signed-off-by: Elia Devito <eliadevito@gmail.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210511124651.4802-1-eliadevito@gmail.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index b4b71609dff1..3e269de84079 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6542,6 +6542,7 @@ enum {
 	ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST,
 	ALC295_FIXUP_ASUS_DACS,
 	ALC295_FIXUP_HP_OMEN,
+	ALC285_FIXUP_HP_SPECTRE_X360,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -8099,6 +8100,15 @@ static const struct hda_fixup alc269_fixups[] = {
 		.chained = true,
 		.chain_id = ALC269_FIXUP_HP_LINE1_MIC1_LED,
 	},
+	[ALC285_FIXUP_HP_SPECTRE_X360] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x14, 0x90170110 }, /* enable top speaker */
+			{}
+		},
+		.chained = true,
+		.chain_id = ALC285_FIXUP_SPEAKER2_TO_DAC1,
+	},
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -8259,6 +8269,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x8497, "HP Envy x360", ALC269_FIXUP_HP_MUTE_LED_MIC3),
 	SND_PCI_QUIRK(0x103c, 0x84da, "HP OMEN dc0019-ur", ALC295_FIXUP_HP_OMEN),
 	SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3),
+	SND_PCI_QUIRK(0x103c, 0x8519, "HP Spectre x360 15-df0xxx", ALC285_FIXUP_HP_SPECTRE_X360),
 	SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED),
 	SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO),
 	SND_PCI_QUIRK(0x103c, 0x8724, "HP EliteBook 850 G7", ALC285_FIXUP_HP_GPIO_LED),
@@ -8665,6 +8676,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
 	{.id = ALC274_FIXUP_HP_MIC, .name = "alc274-hp-mic-detect"},
 	{.id = ALC245_FIXUP_HP_X360_AMP, .name = "alc245-hp-x360-amp"},
 	{.id = ALC295_FIXUP_HP_OMEN, .name = "alc295-hp-omen"},
+	{.id = ALC285_FIXUP_HP_SPECTRE_X360, .name = "alc285-hp-spectre-x360"},
 	{}
 };
 #define ALC225_STANDARD_PINS \
-- 
cgit v1.2.3


From efed9a3337e341bd0989161b97453b52567bc59d Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Mon, 10 May 2021 17:05:35 -0700
Subject: kyber: fix out of bounds access when preempted

__blk_mq_sched_bio_merge() gets the ctx and hctx for the current CPU and
passes the hctx to ->bio_merge(). kyber_bio_merge() then gets the ctx
for the current CPU again and uses that to get the corresponding Kyber
context in the passed hctx. However, the thread may be preempted between
the two calls to blk_mq_get_ctx(), and the ctx returned the second time
may no longer correspond to the passed hctx. This "works" accidentally
most of the time, but it can cause us to read garbage if the second ctx
came from an hctx with more ctx's than the first one (i.e., if
ctx->index_hw[hctx->type] > hctx->nr_ctx).

This manifested as this UBSAN array index out of bounds error reported
by Jakub:

UBSAN: array-index-out-of-bounds in ../kernel/locking/qspinlock.c:130:9
index 13106 is out of range for type 'long unsigned int [128]'
Call Trace:
 dump_stack+0xa4/0xe5
 ubsan_epilogue+0x5/0x40
 __ubsan_handle_out_of_bounds.cold.13+0x2a/0x34
 queued_spin_lock_slowpath+0x476/0x480
 do_raw_spin_lock+0x1c2/0x1d0
 kyber_bio_merge+0x112/0x180
 blk_mq_submit_bio+0x1f5/0x1100
 submit_bio_noacct+0x7b0/0x870
 submit_bio+0xc2/0x3a0
 btrfs_map_bio+0x4f0/0x9d0
 btrfs_submit_data_bio+0x24e/0x310
 submit_one_bio+0x7f/0xb0
 submit_extent_page+0xc4/0x440
 __extent_writepage_io+0x2b8/0x5e0
 __extent_writepage+0x28d/0x6e0
 extent_write_cache_pages+0x4d7/0x7a0
 extent_writepages+0xa2/0x110
 do_writepages+0x8f/0x180
 __writeback_single_inode+0x99/0x7f0
 writeback_sb_inodes+0x34e/0x790
 __writeback_inodes_wb+0x9e/0x120
 wb_writeback+0x4d2/0x660
 wb_workfn+0x64d/0xa10
 process_one_work+0x53a/0xa80
 worker_thread+0x69/0x5b0
 kthread+0x20b/0x240
 ret_from_fork+0x1f/0x30

Only Kyber uses the hctx, so fix it by passing the request_queue to
->bio_merge() instead. BFQ and mq-deadline just use that, and Kyber can
map the queues itself to avoid the mismatch.

Fixes: a6088845c2bf ("block: kyber: make kyber more friendly with merging")
Reported-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Link: https://lore.kernel.org/r/c7598605401a48d5cfeadebb678abd10af22b83f.1620691329.git.osandov@fb.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c      | 3 +--
 block/blk-mq-sched.c     | 8 +++++---
 block/kyber-iosched.c    | 5 +++--
 block/mq-deadline.c      | 3 +--
 include/linux/elevator.h | 2 +-
 5 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 0270cd7ca165..59b2499d3f8b 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2263,10 +2263,9 @@ static void bfq_remove_request(struct request_queue *q,
 
 }
 
-static bool bfq_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio,
+static bool bfq_bio_merge(struct request_queue *q, struct bio *bio,
 		unsigned int nr_segs)
 {
-	struct request_queue *q = hctx->queue;
 	struct bfq_data *bfqd = q->elevator->elevator_data;
 	struct request *free = NULL;
 	/*
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 42a365b1b9c0..996a4b2f73aa 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -358,14 +358,16 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
 		unsigned int nr_segs)
 {
 	struct elevator_queue *e = q->elevator;
-	struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
-	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
+	struct blk_mq_ctx *ctx;
+	struct blk_mq_hw_ctx *hctx;
 	bool ret = false;
 	enum hctx_type type;
 
 	if (e && e->type->ops.bio_merge)
-		return e->type->ops.bio_merge(hctx, bio, nr_segs);
+		return e->type->ops.bio_merge(q, bio, nr_segs);
 
+	ctx = blk_mq_get_ctx(q);
+	hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
 	type = hctx->type;
 	if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE) ||
 	    list_empty_careful(&ctx->rq_lists[type]))
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index 8969e122f081..81e3279ecd57 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -561,11 +561,12 @@ static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
 	}
 }
 
-static bool kyber_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio,
+static bool kyber_bio_merge(struct request_queue *q, struct bio *bio,
 		unsigned int nr_segs)
 {
+	struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
+	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
 	struct kyber_hctx_data *khd = hctx->sched_data;
-	struct blk_mq_ctx *ctx = blk_mq_get_ctx(hctx->queue);
 	struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw[hctx->type]];
 	unsigned int sched_domain = kyber_sched_domain(bio->bi_opf);
 	struct list_head *rq_list = &kcq->rq_list[sched_domain];
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 04aded71ead2..8eea2cbf2bf4 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -461,10 +461,9 @@ static int dd_request_merge(struct request_queue *q, struct request **rq,
 	return ELEVATOR_NO_MERGE;
 }
 
-static bool dd_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio,
+static bool dd_bio_merge(struct request_queue *q, struct bio *bio,
 		unsigned int nr_segs)
 {
-	struct request_queue *q = hctx->queue;
 	struct deadline_data *dd = q->elevator->elevator_data;
 	struct request *free = NULL;
 	bool ret;
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 1fe8e105b83b..dcb2f9022c1d 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -34,7 +34,7 @@ struct elevator_mq_ops {
 	void (*depth_updated)(struct blk_mq_hw_ctx *);
 
 	bool (*allow_merge)(struct request_queue *, struct request *, struct bio *);
-	bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *, unsigned int);
+	bool (*bio_merge)(struct request_queue *, struct bio *, unsigned int);
 	int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
 	void (*request_merged)(struct request_queue *, struct request *, enum elv_merge);
 	void (*requests_merged)(struct request_queue *, struct request *, struct request *);
-- 
cgit v1.2.3


From 5e1f689913a4498e3081093670ef9d85b2c60920 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 29 Apr 2021 14:18:53 +0200
Subject: nvme-multipath: fix double initialization of ANA state

nvme_init_identify and thus nvme_mpath_init can be called multiple
times and thus must not overwrite potentially initialized or in-use
fields.  Split out a helper for the basic initialization when the
controller is initialized and make sure the init_identify path does
not blindly change in-use data structures.

Fixes: 0d0b660f214d ("nvme: add ANA support")
Reported-by: Martin Wilck <mwilck@suse.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
 drivers/nvme/host/core.c      |  3 ++-
 drivers/nvme/host/multipath.c | 55 +++++++++++++++++++++++--------------------
 drivers/nvme/host/nvme.h      |  8 +++++--
 3 files changed, 37 insertions(+), 29 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 522c9b229f80..762125f2905f 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2901,7 +2901,7 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
 		ctrl->hmmaxd = le16_to_cpu(id->hmmaxd);
 	}
 
-	ret = nvme_mpath_init(ctrl, id);
+	ret = nvme_mpath_init_identify(ctrl, id);
 	if (ret < 0)
 		goto out_free;
 
@@ -4364,6 +4364,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 		min(default_ps_max_latency_us, (unsigned long)S32_MAX));
 
 	nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device));
+	nvme_mpath_init_ctrl(ctrl);
 
 	return 0;
 out_free_name:
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 0551796517e6..deb14562c96a 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -781,9 +781,18 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
 	put_disk(head->disk);
 }
 
-int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
+void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl)
 {
-	int error;
+	mutex_init(&ctrl->ana_lock);
+	timer_setup(&ctrl->anatt_timer, nvme_anatt_timeout, 0);
+	INIT_WORK(&ctrl->ana_work, nvme_ana_work);
+}
+
+int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
+{
+	size_t max_transfer_size = ctrl->max_hw_sectors << SECTOR_SHIFT;
+	size_t ana_log_size;
+	int error = 0;
 
 	/* check if multipath is enabled and we have the capability */
 	if (!multipath || !ctrl->subsys ||
@@ -795,37 +804,31 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
 	ctrl->nanagrpid = le32_to_cpu(id->nanagrpid);
 	ctrl->anagrpmax = le32_to_cpu(id->anagrpmax);
 
-	mutex_init(&ctrl->ana_lock);
-	timer_setup(&ctrl->anatt_timer, nvme_anatt_timeout, 0);
-	ctrl->ana_log_size = sizeof(struct nvme_ana_rsp_hdr) +
-		ctrl->nanagrpid * sizeof(struct nvme_ana_group_desc);
-	ctrl->ana_log_size += ctrl->max_namespaces * sizeof(__le32);
-
-	if (ctrl->ana_log_size > ctrl->max_hw_sectors << SECTOR_SHIFT) {
+	ana_log_size = sizeof(struct nvme_ana_rsp_hdr) +
+		ctrl->nanagrpid * sizeof(struct nvme_ana_group_desc) +
+		ctrl->max_namespaces * sizeof(__le32);
+	if (ana_log_size > max_transfer_size) {
 		dev_err(ctrl->device,
-			"ANA log page size (%zd) larger than MDTS (%d).\n",
-			ctrl->ana_log_size,
-			ctrl->max_hw_sectors << SECTOR_SHIFT);
+			"ANA log page size (%zd) larger than MDTS (%zd).\n",
+			ana_log_size, max_transfer_size);
 		dev_err(ctrl->device, "disabling ANA support.\n");
-		return 0;
+		goto out_uninit;
 	}
-
-	INIT_WORK(&ctrl->ana_work, nvme_ana_work);
-	kfree(ctrl->ana_log_buf);
-	ctrl->ana_log_buf = kmalloc(ctrl->ana_log_size, GFP_KERNEL);
-	if (!ctrl->ana_log_buf) {
-		error = -ENOMEM;
-		goto out;
+	if (ana_log_size > ctrl->ana_log_size) {
+		nvme_mpath_stop(ctrl);
+		kfree(ctrl->ana_log_buf);
+		ctrl->ana_log_buf = kmalloc(ctrl->ana_log_size, GFP_KERNEL);
+		if (!ctrl->ana_log_buf)
+			return -ENOMEM;
 	}
-
+	ctrl->ana_log_size = ana_log_size;
 	error = nvme_read_ana_log(ctrl);
 	if (error)
-		goto out_free_ana_log_buf;
+		goto out_uninit;
 	return 0;
-out_free_ana_log_buf:
-	kfree(ctrl->ana_log_buf);
-	ctrl->ana_log_buf = NULL;
-out:
+
+out_uninit:
+	nvme_mpath_uninit(ctrl);
 	return error;
 }
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 05f31a2c64bb..0015860ec12b 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -712,7 +712,8 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
 int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
 void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id);
 void nvme_mpath_remove_disk(struct nvme_ns_head *head);
-int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
+int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
+void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl);
 void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
 void nvme_mpath_stop(struct nvme_ctrl *ctrl);
 bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
@@ -780,7 +781,10 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
 static inline void nvme_trace_bio_complete(struct request *req)
 {
 }
-static inline int nvme_mpath_init(struct nvme_ctrl *ctrl,
+static inline void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl)
+{
+}
+static inline int nvme_mpath_init_identify(struct nvme_ctrl *ctrl,
 		struct nvme_id_ctrl *id)
 {
 	if (ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA)
-- 
cgit v1.2.3


From 608a969046e6e0567d05a166be66c77d2dd8220b Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Thu, 6 May 2021 18:51:35 -0700
Subject: nvmet: fix inline bio check for bdev-ns

When handling rw commands, for inline bio case we only consider
transfer size. This works well when req->sg_cnt fits into the
req->inline_bvec, but it will result in the warning in
__bio_add_page() when req->sg_cnt > NVMET_MAX_INLINE_BVEC.

Consider an I/O size 32768 and first page is not aligned to the page
boundary, then I/O is split in following manner :-

[ 2206.256140] nvmet: sg->length 3440 sg->offset 656
[ 2206.256144] nvmet: sg->length 4096 sg->offset 0
[ 2206.256148] nvmet: sg->length 4096 sg->offset 0
[ 2206.256152] nvmet: sg->length 4096 sg->offset 0
[ 2206.256155] nvmet: sg->length 4096 sg->offset 0
[ 2206.256159] nvmet: sg->length 4096 sg->offset 0
[ 2206.256163] nvmet: sg->length 4096 sg->offset 0
[ 2206.256166] nvmet: sg->length 4096 sg->offset 0
[ 2206.256170] nvmet: sg->length 656 sg->offset 0

Now the req->transfer_size == NVMET_MAX_INLINE_DATA_LEN i.e. 32768, but
the req->sg_cnt is (9) > NVMET_MAX_INLINE_BIOVEC which is (8).
This will result in the following warning message :-

nvmet_bdev_execute_rw()
	bio_add_page()
		__bio_add_page()
			WARN_ON_ONCE(bio_full(bio, len));

This scenario is very hard to reproduce on the nvme-loop transport only
with rw commands issued with the passthru IOCTL interface from the host
application and the data buffer is allocated with the malloc() and not
the posix_memalign().

Fixes: 73383adfad24 ("nvmet: don't split large I/Os unconditionally")
Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/io-cmd-bdev.c | 2 +-
 drivers/nvme/target/nvmet.h       | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 9a8b3726a37c..429263ca9b97 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -258,7 +258,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 
 	sector = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba);
 
-	if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) {
+	if (nvmet_use_inline_bvec(req)) {
 		bio = &req->b.inline_bio;
 		bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
 	} else {
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 5566ed403576..d69a409515d6 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -616,4 +616,10 @@ static inline sector_t nvmet_lba_to_sect(struct nvmet_ns *ns, __le64 lba)
 	return le64_to_cpu(lba) << (ns->blksize_shift - SECTOR_SHIFT);
 }
 
+static inline bool nvmet_use_inline_bvec(struct nvmet_req *req)
+{
+	return req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN &&
+	       req->sg_cnt <= NVMET_MAX_INLINE_BIOVEC;
+}
+
 #endif /* _NVMET_H */
-- 
cgit v1.2.3


From ab96de5def854d8fc51280b6a20597e64b14ac31 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Thu, 6 May 2021 18:51:36 -0700
Subject: nvmet: fix inline bio check for passthru

When handling passthru commands, for inline bio allocation we only
consider the transfer size. This works well when req->sg_cnt fits into
the req->inline_bvec, but it will result in the early return from
bio_add_hw_page() when req->sg_cnt > NVMET_MAX_INLINE_BVEC.

Consider an I/O of size 32768 and first buffer is not aligned to the
page boundary, then I/O is split in following manner :-

[ 2206.256140] nvmet: sg->length 3440 sg->offset 656
[ 2206.256144] nvmet: sg->length 4096 sg->offset 0
[ 2206.256148] nvmet: sg->length 4096 sg->offset 0
[ 2206.256152] nvmet: sg->length 4096 sg->offset 0
[ 2206.256155] nvmet: sg->length 4096 sg->offset 0
[ 2206.256159] nvmet: sg->length 4096 sg->offset 0
[ 2206.256163] nvmet: sg->length 4096 sg->offset 0
[ 2206.256166] nvmet: sg->length 4096 sg->offset 0
[ 2206.256170] nvmet: sg->length 656 sg->offset 0

Now the req->transfer_size == NVMET_MAX_INLINE_DATA_LEN i.e. 32768, but
the req->sg_cnt is (9) > NVMET_MAX_INLINE_BIOVEC which is (8).
This will result in early return in the following code path :-

nvmet_bdev_execute_rw()
	bio_add_pc_page()
		bio_add_hw_page()
			if (bio_full(bio, len))
				return 0;

Use previously introduced helper nvmet_use_inline_bvec() to consider
req->sg_cnt when using inline bio. This only affects nvme-loop
transport.

Fixes: dab3902b19a0 ("nvmet: use inline bio for passthru fast path")
Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/passthru.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c
index 2798944899b7..39b1473f7204 100644
--- a/drivers/nvme/target/passthru.c
+++ b/drivers/nvme/target/passthru.c
@@ -194,7 +194,7 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
 	if (req->sg_cnt > BIO_MAX_VECS)
 		return -EINVAL;
 
-	if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) {
+	if (nvmet_use_inline_bvec(req)) {
 		bio = &req->p.inline_bio;
 		bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
 	} else {
-- 
cgit v1.2.3


From 8cc365f9559b86802afc0208389f5c8d46b4ad61 Mon Sep 17 00:00:00 2001
From: Michal Kalderon <michal.kalderon@marvell.com>
Date: Thu, 6 May 2021 10:08:19 +0300
Subject: nvmet-rdma: Fix NULL deref when SEND is completed with error

When running some traffic and taking down the link on peer, a
retry counter exceeded error is received. This leads to
nvmet_rdma_error_comp which tried accessing the cq_context to
obtain the queue. The cq_context is no longer valid after the
fix to use shared CQ mechanism and should be obtained similar
to how it is obtained in other functions from the wc->qp.

[ 905.786331] nvmet_rdma: SEND for CQE 0x00000000e3337f90 failed with status transport retry counter exceeded (12).
[ 905.832048] BUG: unable to handle kernel NULL pointer dereference at 0000000000000048
[ 905.839919] PGD 0 P4D 0
[ 905.842464] Oops: 0000 1 SMP NOPTI
[ 905.846144] CPU: 13 PID: 1557 Comm: kworker/13:1H Kdump: loaded Tainted: G OE --------- - - 4.18.0-304.el8.x86_64 #1
[ 905.872135] RIP: 0010:nvmet_rdma_error_comp+0x5/0x1b [nvmet_rdma]
[ 905.878259] Code: 19 4f c0 e8 89 b3 a5 f6 e9 5b e0 ff ff 0f b7 75 14 4c 89 ea 48 c7 c7 08 1a 4f c0 e8 71 b3 a5 f6 e9 4b e0 ff ff 0f 1f 44 00 00 <48> 8b 47 48 48 85 c0 74 08 48 89 c7 e9 98 bf 49 00 e9 c3 e3 ff ff
[ 905.897135] RSP: 0018:ffffab601c45fe28 EFLAGS: 00010246
[ 905.902387] RAX: 0000000000000065 RBX: ffff9e729ea2f800 RCX: 0000000000000000
[ 905.909558] RDX: 0000000000000000 RSI: ffff9e72df9567c8 RDI: 0000000000000000
[ 905.916731] RBP: ffff9e729ea2b400 R08: 000000000000074d R09: 0000000000000074
[ 905.923903] R10: 0000000000000000 R11: ffffab601c45fcc0 R12: 0000000000000010
[ 905.931074] R13: 0000000000000000 R14: 0000000000000010 R15: ffff9e729ea2f400
[ 905.938247] FS: 0000000000000000(0000) GS:ffff9e72df940000(0000) knlGS:0000000000000000
[ 905.938249] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 905.950067] nvmet_rdma: SEND for CQE 0x00000000c7356cca failed with status transport retry counter exceeded (12).
[ 905.961855] CR2: 0000000000000048 CR3: 000000678d010004 CR4: 00000000007706e0
[ 905.961855] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 905.961856] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 905.961857] PKRU: 55555554
[ 906.010315] Call Trace:
[ 906.012778] __ib_process_cq+0x89/0x170 [ib_core]
[ 906.017509] ib_cq_poll_work+0x26/0x80 [ib_core]
[ 906.022152] process_one_work+0x1a7/0x360
[ 906.026182] ? create_worker+0x1a0/0x1a0
[ 906.030123] worker_thread+0x30/0x390
[ 906.033802] ? create_worker+0x1a0/0x1a0
[ 906.037744] kthread+0x116/0x130
[ 906.040988] ? kthread_flush_work_fn+0x10/0x10
[ 906.045456] ret_from_fork+0x1f/0x40

Fixes: ca0f1a8055be2 ("nvmet-rdma: use new shared CQ mechanism")
Signed-off-by: Shai Malin <smalin@marvell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/rdma.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 6c1f3ab7649c..7d607f435e36 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -700,7 +700,7 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
 {
 	struct nvmet_rdma_rsp *rsp =
 		container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe);
-	struct nvmet_rdma_queue *queue = cq->cq_context;
+	struct nvmet_rdma_queue *queue = wc->qp->qp_context;
 
 	nvmet_rdma_release_rsp(rsp);
 
@@ -786,7 +786,7 @@ static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc)
 {
 	struct nvmet_rdma_rsp *rsp =
 		container_of(wc->wr_cqe, struct nvmet_rdma_rsp, write_cqe);
-	struct nvmet_rdma_queue *queue = cq->cq_context;
+	struct nvmet_rdma_queue *queue = wc->qp->qp_context;
 	struct rdma_cm_id *cm_id = rsp->queue->cm_id;
 	u16 status;
 
-- 
cgit v1.2.3


From 3651aaacd10b2f8cee3780c490fc2df55bd4f543 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Mon, 10 May 2021 12:15:36 -0700
Subject: nvmet: demote discovery cmd parse err msg to debug
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Host can send invalid commands and flood the target with error messages
for the discovery controller. Demote the error message from pr_err() to
pr_debug( in nvmet_parse_discovery_cmd(). 

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/discovery.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 4845d12e374a..fc3645fc2c24 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -379,7 +379,7 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
 		req->execute = nvmet_execute_disc_identify;
 		return 0;
 	default:
-		pr_err("unhandled cmd %d\n", cmd->common.opcode);
+		pr_debug("unhandled cmd %d\n", cmd->common.opcode);
 		req->error_loc = offsetof(struct nvme_common_command, opcode);
 		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
 	}
-- 
cgit v1.2.3


From 4c2dab2bf5ace0ddc07ca7f04a7ba32fc3b23492 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Mon, 10 May 2021 12:15:37 -0700
Subject: nvmet: use helper to remove the duplicate code

Use the helper nvmet_report_invalid_opcode() to report invalid opcode
so we can remove the duplicate code.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/admin-cmd.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index e7a367cf6d36..dcd49a72f2f3 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -975,10 +975,7 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
 	case nvme_admin_keep_alive:
 		req->execute = nvmet_execute_keep_alive;
 		return 0;
+	default:
+		return nvmet_report_invalid_opcode(req);
 	}
-
-	pr_debug("unhandled cmd %d on qid %d\n", cmd->common.opcode,
-	       req->sq->qid);
-	req->error_loc = offsetof(struct nvme_common_command, opcode);
-	return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
 }
-- 
cgit v1.2.3


From 7a4ffd20ec6d31dfde2cc5608851e5109ffed7c9 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Mon, 10 May 2021 12:15:38 -0700
Subject: nvmet: demote fabrics cmd parse err msg to debug

Host can send invalid commands and flood the target with error messages.
Demote the error message from pr_err() to pr_debug() in
nvmet_parse_fabrics_cmd() and nvmet_parse_connect_cmd().

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/fabrics-cmd.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index 1420a8e3e0b1..7d0f3523fdab 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -94,7 +94,7 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req)
 		req->execute = nvmet_execute_prop_get;
 		break;
 	default:
-		pr_err("received unknown capsule type 0x%x\n",
+		pr_debug("received unknown capsule type 0x%x\n",
 			cmd->fabrics.fctype);
 		req->error_loc = offsetof(struct nvmf_common_command, fctype);
 		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
@@ -284,13 +284,13 @@ u16 nvmet_parse_connect_cmd(struct nvmet_req *req)
 	struct nvme_command *cmd = req->cmd;
 
 	if (!nvme_is_fabrics(cmd)) {
-		pr_err("invalid command 0x%x on unconnected queue.\n",
+		pr_debug("invalid command 0x%x on unconnected queue.\n",
 			cmd->fabrics.opcode);
 		req->error_loc = offsetof(struct nvme_common_command, opcode);
 		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
 	}
 	if (cmd->fabrics.fctype != nvme_fabrics_type_connect) {
-		pr_err("invalid capsule type 0x%x on unconnected queue.\n",
+		pr_debug("invalid capsule type 0x%x on unconnected queue.\n",
 			cmd->fabrics.fctype);
 		req->error_loc = offsetof(struct nvmf_common_command, fctype);
 		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
-- 
cgit v1.2.3


From 918d9c77791cc8267b5b5ab556c868dfa57e0d93 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 11 May 2021 17:01:28 +0200
Subject: docs: cdrom-standard.rst: get rid of uneeded UTF-8 chars

This file was converted from a LaTeX one. The conversion used
some UTF-8 characters at the literal blocks. Replace them
by normal ASCII characters.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Jens Axboe <axboe@kernel.dk>
Link: https://lore.kernel.org/r/79c3f482da17ea48d69b6e6ad1b7fb102b9dd7bf.1620744606.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/cdrom/cdrom-standard.rst | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/Documentation/cdrom/cdrom-standard.rst b/Documentation/cdrom/cdrom-standard.rst
index 70500b189cc8..5845960ca382 100644
--- a/Documentation/cdrom/cdrom-standard.rst
+++ b/Documentation/cdrom/cdrom-standard.rst
@@ -146,18 +146,18 @@ with the kernel as a block device by registering the following general
 *struct file_operations*::
 
 	struct file_operations cdrom_fops = {
-		NULL,			/∗ lseek ∗/
-		block _read ,		/∗ read—general block-dev read ∗/
-		block _write,		/∗ write—general block-dev write ∗/
-		NULL,			/∗ readdir ∗/
-		NULL,			/∗ select ∗/
-		cdrom_ioctl,		/∗ ioctl ∗/
-		NULL,			/∗ mmap ∗/
-		cdrom_open,		/∗ open ∗/
-		cdrom_release,		/∗ release ∗/
-		NULL,			/∗ fsync ∗/
-		NULL,			/∗ fasync ∗/
-		NULL			/∗ revalidate ∗/
+		NULL,			/* lseek */
+		block _read ,		/* read--general block-dev read */
+		block _write,		/* write--general block-dev write */
+		NULL,			/* readdir */
+		NULL,			/* select */
+		cdrom_ioctl,		/* ioctl */
+		NULL,			/* mmap */
+		cdrom_open,		/* open */
+		cdrom_release,		/* release */
+		NULL,			/* fsync */
+		NULL,			/* fasync */
+		NULL			/* revalidate */
 	};
 
 Every active CD-ROM device shares this *struct*. The routines
@@ -250,12 +250,12 @@ The drive-specific, minor-like information that is registered with
 `cdrom.c`, currently contains the following fields::
 
   struct cdrom_device_info {
-	const struct cdrom_device_ops * ops; 	/* device operations for this major */
+	const struct cdrom_device_ops * ops;	/* device operations for this major */
 	struct list_head list;			/* linked list of all device_info */
 	struct gendisk * disk;			/* matching block layer disk */
 	void *  handle;				/* driver-dependent data */
 
-	int mask; 				/* mask of capability: disables them */
+	int mask;				/* mask of capability: disables them */
 	int speed;				/* maximum speed for reading data */
 	int capacity;				/* number of discs in a jukebox */
 
@@ -569,7 +569,7 @@ the *CDC_CLOSE_TRAY* bit in *mask*.
 
 In the file `cdrom.c` you will encounter many constructions of the type::
 
-	if (cdo->capability & ∼cdi->mask & CDC _⟨capability⟩) ...
+	if (cdo->capability & ~cdi->mask & CDC _<capability>) ...
 
 There is no *ioctl* to set the mask... The reason is that
 I think it is better to control the **behavior** rather than the
-- 
cgit v1.2.3


From 8d3926c09e043448d4d26896b8225943f12d0933 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 11 May 2021 17:01:29 +0200
Subject: docs: ABI: remove a meaningless UTF-8 character
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Those two files have this character:
	- U+00ac ('¬'): NOT SIGN

at the end of the first line, apparently for no reason. Drop them.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/6cd3f0b47568fecb7889fd18d1d744c3aaf73866.1620744606.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/ABI/obsolete/sysfs-kernel-fadump_registered  | 2 +-
 Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered b/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered
index 0360be39c98e..dae880b1a5d5 100644
--- a/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered
+++ b/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered
@@ -1,4 +1,4 @@
-This ABI is renamed and moved to a new location /sys/kernel/fadump/registered.¬
+This ABI is renamed and moved to a new location /sys/kernel/fadump/registered.
 
 What:		/sys/kernel/fadump_registered
 Date:		Feb 2012
diff --git a/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem b/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem
index 6ce0b129ab12..ca2396edb5f1 100644
--- a/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem
+++ b/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem
@@ -1,4 +1,4 @@
-This ABI is renamed and moved to a new location /sys/kernel/fadump/release_mem.¬
+This ABI is renamed and moved to a new location /sys/kernel/fadump/release_mem.
 
 What:		/sys/kernel/fadump_release_mem
 Date:		Feb 2012
-- 
cgit v1.2.3


From 6f3bceba03b4f18e0b83261e2fb761e0ad5da625 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 11 May 2021 17:01:30 +0200
Subject: docs: ABI: remove some spurious characters

The KernelVersion tag contains some spurious UTF-8 characters
for no reason. Drop them.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/6d774ad6cb3795a177309503a39f8f1b5e309d64.1620744606.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/ABI/testing/sysfs-module | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-module b/Documentation/ABI/testing/sysfs-module
index a485434d2a0f..88bddf192ceb 100644
--- a/Documentation/ABI/testing/sysfs-module
+++ b/Documentation/ABI/testing/sysfs-module
@@ -37,13 +37,13 @@ Description:	Maximum time allowed for periodic transfers per microframe (μs)
 
 What:		/sys/module/*/{coresize,initsize}
 Date:		Jan 2012
-KernelVersion:»·3.3
+KernelVersion:	3.3
 Contact:	Kay Sievers <kay.sievers@vrfy.org>
 Description:	Module size in bytes.
 
 What:		/sys/module/*/taint
 Date:		Jan 2012
-KernelVersion:»·3.3
+KernelVersion:	3.3
 Contact:	Kay Sievers <kay.sievers@vrfy.org>
 Description:	Module taint flags:
 			==  =====================
-- 
cgit v1.2.3


From d1f2722d5357d7a5138b1be8bd64946f0a14c81e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 11 May 2021 17:01:31 +0200
Subject: docs: hwmon: tmp103.rst: fix bad usage of UTF-8 chars
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While UTF-8 characters can be used at the Linux documentation,
the best is to use them only when ASCII doesn't offer a good replacement.
So, replace the occurences of the following UTF-8 characters:

	- U+2013 ('–'): EN DASH

In this specific case, EN DASH was used instead of a minus
sign. So, replace it by a single hyphen.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/73b3c7c1eef5c12ddc941624d23689313bd56529.1620744606.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/hwmon/tmp103.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/hwmon/tmp103.rst b/Documentation/hwmon/tmp103.rst
index e195a7d14309..b3ef81475cf8 100644
--- a/Documentation/hwmon/tmp103.rst
+++ b/Documentation/hwmon/tmp103.rst
@@ -21,10 +21,10 @@ Description
 The TMP103 is a digital output temperature sensor in a four-ball
 wafer chip-scale package (WCSP). The TMP103 is capable of reading
 temperatures to a resolution of 1°C. The TMP103 is specified for
-operation over a temperature range of –40°C to +125°C.
+operation over a temperature range of -40°C to +125°C.
 
 Resolution: 8 Bits
-Accuracy: ±1°C Typ (–10°C to +100°C)
+Accuracy: ±1°C Typ (-10°C to +100°C)
 
 The driver provides the common sysfs-interface for temperatures (see
 Documentation/hwmon/sysfs-interface.rst under Temperatures).
-- 
cgit v1.2.3


From 5e716ec68b4a75a84e28c0efa68db613deb64981 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 11 May 2021 17:01:32 +0200
Subject: docs: networking: device_drivers: fix bad usage of UTF-8 chars
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Probably because the original file was pre-processed by some
tool, both i40e.rst and iavf.rst files are using this character:

	- U+2013 ('–'): EN DASH

meaning an hyphen when calling a command line application, which
is obviously wrong. So, replace them by an hyphen, ensuring
that it will be properly displayed as literals when building
the documentation.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/95eb2a48d0ca3528780ce0dfce64359977fa8cb3.1620744606.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/networking/device_drivers/ethernet/intel/i40e.rst | 4 ++--
 Documentation/networking/device_drivers/ethernet/intel/iavf.rst | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/networking/device_drivers/ethernet/intel/i40e.rst b/Documentation/networking/device_drivers/ethernet/intel/i40e.rst
index 8a9b18573688..2d3f6bd969a2 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/i40e.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/i40e.rst
@@ -173,7 +173,7 @@ Director rule is added from ethtool (Sideband filter), ATR is turned off by the
 driver. To re-enable ATR, the sideband can be disabled with the ethtool -K
 option. For example::
 
-  ethtool –K [adapter] ntuple [off|on]
+  ethtool -K [adapter] ntuple [off|on]
 
 If sideband is re-enabled after ATR is re-enabled, ATR remains enabled until a
 TCP-IP flow is added. When all TCP-IP sideband rules are deleted, ATR is
@@ -688,7 +688,7 @@ shaper bw_rlimit: for each tc, sets minimum and maximum bandwidth rates.
 Totals must be equal or less than port speed.
 
 For example: min_rate 1Gbit 3Gbit: Verify bandwidth limit using network
-monitoring tools such as ifstat or sar –n DEV [interval] [number of samples]
+monitoring tools such as `ifstat` or `sar -n DEV [interval] [number of samples]`
 
 2. Enable HW TC offload on interface::
 
diff --git a/Documentation/networking/device_drivers/ethernet/intel/iavf.rst b/Documentation/networking/device_drivers/ethernet/intel/iavf.rst
index 52e037b11c97..25330b7b5168 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/iavf.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/iavf.rst
@@ -179,7 +179,7 @@ shaper bw_rlimit: for each tc, sets minimum and maximum bandwidth rates.
 Totals must be equal or less than port speed.
 
 For example: min_rate 1Gbit 3Gbit: Verify bandwidth limit using network
-monitoring tools such as ifstat or sar –n DEV [interval] [number of samples]
+monitoring tools such as ``ifstat`` or ``sar -n DEV [interval] [number of samples]``
 
 NOTE:
   Setting up channels via ethtool (ethtool -L) is not supported when the
-- 
cgit v1.2.3


From 7240cd200541543008a7ce4fcaf2ba5a5556128f Mon Sep 17 00:00:00 2001
From: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
Date: Tue, 11 May 2021 09:49:37 -0400
Subject: Remove link to nonexistent rocket driver docs

The rocket driver and documentation were removed in this commit, but
the corresponding entry in index.rst was not removed.

Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
Fixes: 3b00b6af7a5b ("tty: rocket, remove the driver")
Link: https://lore.kernel.org/r/20210511134937.2442291-1-desmondcheongzx@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/driver-api/serial/index.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Documentation/driver-api/serial/index.rst b/Documentation/driver-api/serial/index.rst
index 21351b8c95a4..8f7d7af3b90b 100644
--- a/Documentation/driver-api/serial/index.rst
+++ b/Documentation/driver-api/serial/index.rst
@@ -19,7 +19,6 @@ Serial drivers
 
     moxa-smartio
     n_gsm
-    rocket
     serial-iso7816
     serial-rs485
 
-- 
cgit v1.2.3


From 965a7d72e798eb7af0aa67210e37cf7ecd1c9cad Mon Sep 17 00:00:00 2001
From: Mathy Vanhoef <Mathy.Vanhoef@kuleuven.be>
Date: Tue, 11 May 2021 20:02:42 +0200
Subject: mac80211: assure all fragments are encrypted

Do not mix plaintext and encrypted fragments in protected Wi-Fi
networks. This fixes CVE-2020-26147.

Previously, an attacker was able to first forward a legitimate encrypted
fragment towards a victim, followed by a plaintext fragment. The
encrypted and plaintext fragment would then be reassembled. For further
details see Section 6.3 and Appendix D in the paper "Fragment and Forge:
Breaking Wi-Fi Through Frame Aggregation and Fragmentation".

Because of this change there are now two equivalent conditions in the
code to determine if a received fragment requires sequential PNs, so we
also move this test to a separate function to make the code easier to
maintain.

Cc: stable@vger.kernel.org
Signed-off-by: Mathy Vanhoef <Mathy.Vanhoef@kuleuven.be>
Link: https://lore.kernel.org/r/20210511200110.30c4394bb835.I5acfdb552cc1d20c339c262315950b3eac491397@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 62047e93e217..65fc674e27cc 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2194,6 +2194,16 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata,
 	return NULL;
 }
 
+static bool requires_sequential_pn(struct ieee80211_rx_data *rx, __le16 fc)
+{
+	return rx->key &&
+		(rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP ||
+		 rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256 ||
+		 rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP ||
+		 rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP_256) &&
+		ieee80211_has_protected(fc);
+}
+
 static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 {
@@ -2238,12 +2248,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 		/* This is the first fragment of a new frame. */
 		entry = ieee80211_reassemble_add(rx->sdata, frag, seq,
 						 rx->seqno_idx, &(rx->skb));
-		if (rx->key &&
-		    (rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP ||
-		     rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256 ||
-		     rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP ||
-		     rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP_256) &&
-		    ieee80211_has_protected(fc)) {
+		if (requires_sequential_pn(rx, fc)) {
 			int queue = rx->security_idx;
 
 			/* Store CCMP/GCMP PN so that we can verify that the
@@ -2285,11 +2290,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 		u8 pn[IEEE80211_CCMP_PN_LEN], *rpn;
 		int queue;
 
-		if (!rx->key ||
-		    (rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP &&
-		     rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP_256 &&
-		     rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP &&
-		     rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP_256))
+		if (!requires_sequential_pn(rx, fc))
 			return RX_DROP_UNUSABLE;
 		memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN);
 		for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) {
-- 
cgit v1.2.3


From 94034c40ab4a3fcf581fbc7f8fdf4e29943c4a24 Mon Sep 17 00:00:00 2001
From: Mathy Vanhoef <Mathy.Vanhoef@kuleuven.be>
Date: Tue, 11 May 2021 20:02:43 +0200
Subject: mac80211: prevent mixed key and fragment cache attacks

Simultaneously prevent mixed key attacks (CVE-2020-24587) and fragment
cache attacks (CVE-2020-24586). This is accomplished by assigning a
unique color to every key (per interface) and using this to track which
key was used to decrypt a fragment. When reassembling frames, it is
now checked whether all fragments were decrypted using the same key.

To assure that fragment cache attacks are also prevented, the ID that is
assigned to keys is unique even over (re)associations and (re)connects.
This means fragments separated by a (re)association or (re)connect will
not be reassembled. Because mac80211 now also prevents the reassembly of
mixed encrypted and plaintext fragments, all cache attacks are prevented.

Cc: stable@vger.kernel.org
Signed-off-by: Mathy Vanhoef <Mathy.Vanhoef@kuleuven.be>
Link: https://lore.kernel.org/r/20210511200110.3f8290e59823.I622a67769ed39257327a362cfc09c812320eb979@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h | 1 +
 net/mac80211/key.c         | 7 +++++++
 net/mac80211/key.h         | 2 ++
 net/mac80211/rx.c          | 6 ++++++
 4 files changed, 16 insertions(+)

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 8fcbaa1eedf3..874ffe7819e5 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -97,6 +97,7 @@ struct ieee80211_fragment_entry {
 	u8 rx_queue;
 	bool check_sequential_pn; /* needed for CCMP/GCMP */
 	u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
+	unsigned int key_color;
 };
 
 
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 56c068cb49c4..f695fc80088b 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -799,6 +799,7 @@ int ieee80211_key_link(struct ieee80211_key *key,
 		       struct ieee80211_sub_if_data *sdata,
 		       struct sta_info *sta)
 {
+	static atomic_t key_color = ATOMIC_INIT(0);
 	struct ieee80211_key *old_key;
 	int idx = key->conf.keyidx;
 	bool pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE;
@@ -850,6 +851,12 @@ int ieee80211_key_link(struct ieee80211_key *key,
 	key->sdata = sdata;
 	key->sta = sta;
 
+	/*
+	 * Assign a unique ID to every key so we can easily prevent mixed
+	 * key and fragment cache attacks.
+	 */
+	key->color = atomic_inc_return(&key_color);
+
 	increment_tailroom_need_count(sdata);
 
 	ret = ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 7ad72e9b4991..1e326c89d721 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -128,6 +128,8 @@ struct ieee80211_key {
 	} debugfs;
 #endif
 
+	unsigned int color;
+
 	/*
 	 * key config, must be last because it contains key
 	 * material as variable length member
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 65fc674e27cc..531232b91bc4 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2255,6 +2255,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 			 * next fragment has a sequential PN value.
 			 */
 			entry->check_sequential_pn = true;
+			entry->key_color = rx->key->color;
 			memcpy(entry->last_pn,
 			       rx->key->u.ccmp.rx_pn[queue],
 			       IEEE80211_CCMP_PN_LEN);
@@ -2292,6 +2293,11 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 
 		if (!requires_sequential_pn(rx, fc))
 			return RX_DROP_UNUSABLE;
+
+		/* Prevent mixed key and fragment cache attacks */
+		if (entry->key_color != rx->key->color)
+			return RX_DROP_UNUSABLE;
+
 		memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN);
 		for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) {
 			pn[i]++;
-- 
cgit v1.2.3


From a1d5ff5651ea592c67054233b14b30bf4452999c Mon Sep 17 00:00:00 2001
From: Mathy Vanhoef <Mathy.Vanhoef@kuleuven.be>
Date: Tue, 11 May 2021 20:02:44 +0200
Subject: mac80211: properly handle A-MSDUs that start with an RFC 1042 header

Properly parse A-MSDUs whose first 6 bytes happen to equal a rfc1042
header. This can occur in practice when the destination MAC address
equals AA:AA:03:00:00:00. More importantly, this simplifies the next
patch to mitigate A-MSDU injection attacks.

Cc: stable@vger.kernel.org
Signed-off-by: Mathy Vanhoef <Mathy.Vanhoef@kuleuven.be>
Link: https://lore.kernel.org/r/20210511200110.0b2b886492f0.I23dd5d685fe16d3b0ec8106e8f01b59f499dffed@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 4 ++--
 net/mac80211/rx.c      | 2 +-
 net/wireless/util.c    | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 5224f885a99a..58c2cd417e89 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5760,7 +5760,7 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr);
  */
 int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 				  const u8 *addr, enum nl80211_iftype iftype,
-				  u8 data_offset);
+				  u8 data_offset, bool is_amsdu);
 
 /**
  * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3
@@ -5772,7 +5772,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
 					 enum nl80211_iftype iftype)
 {
-	return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype, 0);
+	return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype, 0, false);
 }
 
 /**
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 531232b91bc4..f14d32a5001d 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2682,7 +2682,7 @@ __ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset)
 	if (ieee80211_data_to_8023_exthdr(skb, &ethhdr,
 					  rx->sdata->vif.addr,
 					  rx->sdata->vif.type,
-					  data_offset))
+					  data_offset, true))
 		return RX_DROP_UNUSABLE;
 
 	ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr,
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 382c5262d997..39966a873e40 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -542,7 +542,7 @@ EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen);
 
 int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 				  const u8 *addr, enum nl80211_iftype iftype,
-				  u8 data_offset)
+				  u8 data_offset, bool is_amsdu)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct {
@@ -629,7 +629,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 	skb_copy_bits(skb, hdrlen, &payload, sizeof(payload));
 	tmp.h_proto = payload.proto;
 
-	if (likely((ether_addr_equal(payload.hdr, rfc1042_header) &&
+	if (likely((!is_amsdu && ether_addr_equal(payload.hdr, rfc1042_header) &&
 		    tmp.h_proto != htons(ETH_P_AARP) &&
 		    tmp.h_proto != htons(ETH_P_IPX)) ||
 		   ether_addr_equal(payload.hdr, bridge_tunnel_header)))
-- 
cgit v1.2.3


From 2b8a1fee3488c602aca8bea004a087e60806a5cf Mon Sep 17 00:00:00 2001
From: Mathy Vanhoef <Mathy.Vanhoef@kuleuven.be>
Date: Tue, 11 May 2021 20:02:45 +0200
Subject: cfg80211: mitigate A-MSDU aggregation attacks

Mitigate A-MSDU injection attacks (CVE-2020-24588) by detecting if the
destination address of a subframe equals an RFC1042 (i.e., LLC/SNAP)
header, and if so dropping the complete A-MSDU frame. This mitigates
known attacks, although new (unknown) aggregation-based attacks may
remain possible.

This defense works because in A-MSDU aggregation injection attacks, a
normal encrypted Wi-Fi frame is turned into an A-MSDU frame. This means
the first 6 bytes of the first A-MSDU subframe correspond to an RFC1042
header. In other words, the destination MAC address of the first A-MSDU
subframe contains the start of an RFC1042 header during an aggregation
attack. We can detect this and thereby prevent this specific attack.
For details, see Section 7.2 of "Fragment and Forge: Breaking Wi-Fi
Through Frame Aggregation and Fragmentation".

Note that for kernel 4.9 and above this patch depends on "mac80211:
properly handle A-MSDUs that start with a rfc1042 header". Otherwise
this patch has no impact and attacks will remain possible.

Cc: stable@vger.kernel.org
Signed-off-by: Mathy Vanhoef <Mathy.Vanhoef@kuleuven.be>
Link: https://lore.kernel.org/r/20210511200110.25d93176ddaf.I9e265b597f2cd23eb44573f35b625947b386a9de@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/util.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/wireless/util.c b/net/wireless/util.c
index 39966a873e40..7ec021a610ae 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -771,6 +771,9 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
 		remaining = skb->len - offset;
 		if (subframe_len > remaining)
 			goto purge;
+		/* mitigate A-MSDU aggregation injection attacks */
+		if (ether_addr_equal(eth.h_dest, rfc1042_header))
+			goto purge;
 
 		offset += sizeof(struct ethhdr);
 		last = remaining <= subframe_len + padding;
-- 
cgit v1.2.3


From 270032a2a9c4535799736142e1e7c413ca7b836e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 11 May 2021 20:02:46 +0200
Subject: mac80211: drop A-MSDUs on old ciphers

With old ciphers (WEP and TKIP) we shouldn't be using A-MSDUs
since A-MSDUs are only supported if we know that they are, and
the only practical way for that is HT support which doesn't
support old ciphers.

However, we would normally accept them anyway. Since we check
the MMIC before deaggregating A-MSDUs, and the A-MSDU bit in
the QoS header is not protected in TKIP (or WEP), this enables
attacks similar to CVE-2020-24588. To prevent that, drop A-MSDUs
completely with old ciphers.

Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210511200110.076543300172.I548e6e71f1ee9cad4b9a37bf212ae7db723587aa@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index f14d32a5001d..8a72d48ad6e0 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -6,7 +6,7 @@
  * Copyright 2007-2010	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
  */
 
 #include <linux/jiffies.h>
@@ -2739,6 +2739,23 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
 	if (is_multicast_ether_addr(hdr->addr1))
 		return RX_DROP_UNUSABLE;
 
+	if (rx->key) {
+		/*
+		 * We should not receive A-MSDUs on pre-HT connections,
+		 * and HT connections cannot use old ciphers. Thus drop
+		 * them, as in those cases we couldn't even have SPP
+		 * A-MSDUs or such.
+		 */
+		switch (rx->key->conf.cipher) {
+		case WLAN_CIPHER_SUITE_WEP40:
+		case WLAN_CIPHER_SUITE_WEP104:
+		case WLAN_CIPHER_SUITE_TKIP:
+			return RX_DROP_UNUSABLE;
+		default:
+			break;
+		}
+	}
+
 	return __ieee80211_rx_h_amsdu(rx, 0);
 }
 
-- 
cgit v1.2.3


From 3a11ce08c45b50d69c891d71760b7c5b92074709 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 11 May 2021 20:02:47 +0200
Subject: mac80211: add fragment cache to sta_info

Prior patches protected against fragmentation cache attacks
by coloring keys, but this shows that it can lead to issues
when multiple stations use the same sequence number. Add a
fragment cache to struct sta_info (in addition to the one in
the interface) to separate fragments for different stations
properly.

This then automatically clear most of the fragment cache when a
station disconnects (or reassociates) from an AP, or when client
interfaces disconnect from the network, etc.

On the way, also fix the comment there since this brings us in line
with the recommendation in 802.11-2016 ("An AP should support ...").
Additionally, remove a useless condition (since there's no problem
purging an already empty list).

Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210511200110.fc35046b0d52.I1ef101e3784d13e8f6600d83de7ec9a3a45bcd52@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h | 26 ++++----------------------
 net/mac80211/iface.c       | 11 +++--------
 net/mac80211/rx.c          | 41 ++++++++++++++++++++++++++++++-----------
 net/mac80211/sta_info.c    |  6 +++++-
 net/mac80211/sta_info.h    | 32 +++++++++++++++++++++++++++++++-
 5 files changed, 73 insertions(+), 43 deletions(-)

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 874ffe7819e5..4c714375bad0 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -50,12 +50,6 @@ struct ieee80211_local;
 #define IEEE80211_ENCRYPT_HEADROOM 8
 #define IEEE80211_ENCRYPT_TAILROOM 18
 
-/* IEEE 802.11 (Ch. 9.5 Defragmentation) requires support for concurrent
- * reception of at least three fragmented frames. This limit can be increased
- * by changing this define, at the cost of slower frame reassembly and
- * increased memory use (about 2 kB of RAM per entry). */
-#define IEEE80211_FRAGMENT_MAX 4
-
 /* power level hasn't been configured (or set to automatic) */
 #define IEEE80211_UNSET_POWER_LEVEL	INT_MIN
 
@@ -88,19 +82,6 @@ extern const u8 ieee80211_ac_to_qos_mask[IEEE80211_NUM_ACS];
 
 #define IEEE80211_MAX_NAN_INSTANCE_ID 255
 
-struct ieee80211_fragment_entry {
-	struct sk_buff_head skb_list;
-	unsigned long first_frag_time;
-	u16 seq;
-	u16 extra_len;
-	u16 last_frag;
-	u8 rx_queue;
-	bool check_sequential_pn; /* needed for CCMP/GCMP */
-	u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
-	unsigned int key_color;
-};
-
-
 struct ieee80211_bss {
 	u32 device_ts_beacon, device_ts_presp;
 
@@ -903,9 +884,7 @@ struct ieee80211_sub_if_data {
 
 	char name[IFNAMSIZ];
 
-	/* Fragment table for host-based reassembly */
-	struct ieee80211_fragment_entry	fragments[IEEE80211_FRAGMENT_MAX];
-	unsigned int fragment_next;
+	struct ieee80211_fragment_cache frags;
 
 	/* TID bitmap for NoAck policy */
 	u16 noack_map;
@@ -2321,4 +2300,7 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw,
 #define debug_noinline
 #endif
 
+void ieee80211_init_frag_cache(struct ieee80211_fragment_cache *cache);
+void ieee80211_destroy_frag_cache(struct ieee80211_fragment_cache *cache);
+
 #endif /* IEEE80211_I_H */
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 7032a2b59249..2e2f73a4aa73 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -8,7 +8,7 @@
  * Copyright 2008, Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (c) 2016        Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
  */
 #include <linux/slab.h>
 #include <linux/kernel.h>
@@ -677,16 +677,12 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
  */
 static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata)
 {
-	int i;
-
 	/* free extra data */
 	ieee80211_free_keys(sdata, false);
 
 	ieee80211_debugfs_remove_netdev(sdata);
 
-	for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++)
-		__skb_queue_purge(&sdata->fragments[i].skb_list);
-	sdata->fragment_next = 0;
+	ieee80211_destroy_frag_cache(&sdata->frags);
 
 	if (ieee80211_vif_is_mesh(&sdata->vif))
 		ieee80211_mesh_teardown_sdata(sdata);
@@ -1930,8 +1926,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 	sdata->wdev.wiphy = local->hw.wiphy;
 	sdata->local = local;
 
-	for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++)
-		skb_queue_head_init(&sdata->fragments[i].skb_list);
+	ieee80211_init_frag_cache(&sdata->frags);
 
 	INIT_LIST_HEAD(&sdata->key_list);
 
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 8a72d48ad6e0..7212a1bebd0c 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2123,19 +2123,34 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 	return result;
 }
 
+void ieee80211_init_frag_cache(struct ieee80211_fragment_cache *cache)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(cache->entries); i++)
+		skb_queue_head_init(&cache->entries[i].skb_list);
+}
+
+void ieee80211_destroy_frag_cache(struct ieee80211_fragment_cache *cache)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(cache->entries); i++)
+		__skb_queue_purge(&cache->entries[i].skb_list);
+}
+
 static inline struct ieee80211_fragment_entry *
-ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata,
+ieee80211_reassemble_add(struct ieee80211_fragment_cache *cache,
 			 unsigned int frag, unsigned int seq, int rx_queue,
 			 struct sk_buff **skb)
 {
 	struct ieee80211_fragment_entry *entry;
 
-	entry = &sdata->fragments[sdata->fragment_next++];
-	if (sdata->fragment_next >= IEEE80211_FRAGMENT_MAX)
-		sdata->fragment_next = 0;
+	entry = &cache->entries[cache->next++];
+	if (cache->next >= IEEE80211_FRAGMENT_MAX)
+		cache->next = 0;
 
-	if (!skb_queue_empty(&entry->skb_list))
-		__skb_queue_purge(&entry->skb_list);
+	__skb_queue_purge(&entry->skb_list);
 
 	__skb_queue_tail(&entry->skb_list, *skb); /* no need for locking */
 	*skb = NULL;
@@ -2150,14 +2165,14 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata,
 }
 
 static inline struct ieee80211_fragment_entry *
-ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata,
+ieee80211_reassemble_find(struct ieee80211_fragment_cache *cache,
 			  unsigned int frag, unsigned int seq,
 			  int rx_queue, struct ieee80211_hdr *hdr)
 {
 	struct ieee80211_fragment_entry *entry;
 	int i, idx;
 
-	idx = sdata->fragment_next;
+	idx = cache->next;
 	for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) {
 		struct ieee80211_hdr *f_hdr;
 		struct sk_buff *f_skb;
@@ -2166,7 +2181,7 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata,
 		if (idx < 0)
 			idx = IEEE80211_FRAGMENT_MAX - 1;
 
-		entry = &sdata->fragments[idx];
+		entry = &cache->entries[idx];
 		if (skb_queue_empty(&entry->skb_list) || entry->seq != seq ||
 		    entry->rx_queue != rx_queue ||
 		    entry->last_frag + 1 != frag)
@@ -2207,6 +2222,7 @@ static bool requires_sequential_pn(struct ieee80211_rx_data *rx, __le16 fc)
 static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 {
+	struct ieee80211_fragment_cache *cache = &rx->sdata->frags;
 	struct ieee80211_hdr *hdr;
 	u16 sc;
 	__le16 fc;
@@ -2228,6 +2244,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 		goto out_no_led;
 	}
 
+	if (rx->sta)
+		cache = &rx->sta->frags;
+
 	if (likely(!ieee80211_has_morefrags(fc) && frag == 0))
 		goto out;
 
@@ -2246,7 +2265,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 
 	if (frag == 0) {
 		/* This is the first fragment of a new frame. */
-		entry = ieee80211_reassemble_add(rx->sdata, frag, seq,
+		entry = ieee80211_reassemble_add(cache, frag, seq,
 						 rx->seqno_idx, &(rx->skb));
 		if (requires_sequential_pn(rx, fc)) {
 			int queue = rx->security_idx;
@@ -2274,7 +2293,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 	/* This is a fragment for a frame that should already be pending in
 	 * fragment cache. Add this fragment to the end of the pending entry.
 	 */
-	entry = ieee80211_reassemble_find(rx->sdata, frag, seq,
+	entry = ieee80211_reassemble_find(cache, frag, seq,
 					  rx->seqno_idx, hdr);
 	if (!entry) {
 		I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index ec6973ee88ef..f2fb69da9b6e 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -4,7 +4,7 @@
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
  */
 
 #include <linux/module.h>
@@ -392,6 +392,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 
 	u64_stats_init(&sta->rx_stats.syncp);
 
+	ieee80211_init_frag_cache(&sta->frags);
+
 	sta->sta_state = IEEE80211_STA_NONE;
 
 	/* Mark TID as unreserved */
@@ -1102,6 +1104,8 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
 
 	ieee80211_sta_debugfs_remove(sta);
 
+	ieee80211_destroy_frag_cache(&sta->frags);
+
 	cleanup_single_sta(sta);
 }
 
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 78b9d0c7cc58..5c56d29a619e 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -3,7 +3,7 @@
  * Copyright 2002-2005, Devicescape Software, Inc.
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright(c) 2015-2017 Intel Deutschland GmbH
- * Copyright(c) 2020 Intel Corporation
+ * Copyright(c) 2020-2021 Intel Corporation
  */
 
 #ifndef STA_INFO_H
@@ -438,6 +438,33 @@ struct ieee80211_sta_rx_stats {
 	u64 msdu[IEEE80211_NUM_TIDS + 1];
 };
 
+/*
+ * IEEE 802.11-2016 (10.6 "Defragmentation") recommends support for "concurrent
+ * reception of at least one MSDU per access category per associated STA"
+ * on APs, or "at least one MSDU per access category" on other interface types.
+ *
+ * This limit can be increased by changing this define, at the cost of slower
+ * frame reassembly and increased memory use while fragments are pending.
+ */
+#define IEEE80211_FRAGMENT_MAX 4
+
+struct ieee80211_fragment_entry {
+	struct sk_buff_head skb_list;
+	unsigned long first_frag_time;
+	u16 seq;
+	u16 extra_len;
+	u16 last_frag;
+	u8 rx_queue;
+	bool check_sequential_pn; /* needed for CCMP/GCMP */
+	u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
+	unsigned int key_color;
+};
+
+struct ieee80211_fragment_cache {
+	struct ieee80211_fragment_entry	entries[IEEE80211_FRAGMENT_MAX];
+	unsigned int next;
+};
+
 /*
  * The bandwidth threshold below which the per-station CoDel parameters will be
  * scaled to be more lenient (to prevent starvation of slow stations). This
@@ -531,6 +558,7 @@ struct ieee80211_sta_rx_stats {
  * @status_stats.last_ack_signal: last ACK signal
  * @status_stats.ack_signal_filled: last ACK signal validity
  * @status_stats.avg_ack_signal: average ACK signal
+ * @frags: fragment cache
  */
 struct sta_info {
 	/* General information, mostly static */
@@ -639,6 +667,8 @@ struct sta_info {
 
 	struct cfg80211_chan_def tdls_chandef;
 
+	struct ieee80211_fragment_cache frags;
+
 	/* keep last! */
 	struct ieee80211_sta sta;
 };
-- 
cgit v1.2.3


From bf30ca922a0c0176007e074b0acc77ed345e9990 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 11 May 2021 20:02:48 +0200
Subject: mac80211: check defrag PN against current frame

As pointed out by Mathy Vanhoef, we implement the RX PN check
on fragmented frames incorrectly - we check against the last
received PN prior to the new frame, rather than to the one in
this frame itself.

Prior patches addressed the security issue here, but in order
to be able to reason better about the code, fix it to really
compare against the current frame's PN, not the last stored
one.

Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210511200110.bfbc340ff071.Id0b690e581da7d03d76df90bb0e3fd55930bc8a0@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h | 11 +++++++++--
 net/mac80211/rx.c          |  5 ++---
 net/mac80211/wpa.c         | 13 +++++++++----
 3 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 4c714375bad0..214404a558fb 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -223,8 +223,15 @@ struct ieee80211_rx_data {
 	 */
 	int security_idx;
 
-	u32 tkip_iv32;
-	u16 tkip_iv16;
+	union {
+		struct {
+			u32 iv32;
+			u16 iv16;
+		} tkip;
+		struct {
+			u8 pn[IEEE80211_CCMP_PN_LEN];
+		} ccm_gcm;
+	};
 };
 
 struct ieee80211_csa_settings {
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 7212a1bebd0c..b619c47e1d12 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2308,7 +2308,6 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 	if (entry->check_sequential_pn) {
 		int i;
 		u8 pn[IEEE80211_CCMP_PN_LEN], *rpn;
-		int queue;
 
 		if (!requires_sequential_pn(rx, fc))
 			return RX_DROP_UNUSABLE;
@@ -2323,8 +2322,8 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 			if (pn[i])
 				break;
 		}
-		queue = rx->security_idx;
-		rpn = rx->key->u.ccmp.rx_pn[queue];
+
+		rpn = rx->ccm_gcm.pn;
 		if (memcmp(pn, rpn, IEEE80211_CCMP_PN_LEN))
 			return RX_DROP_UNUSABLE;
 		memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN);
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 91bf32af55e9..bca47fad5a16 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -3,6 +3,7 @@
  * Copyright 2002-2004, Instant802 Networks, Inc.
  * Copyright 2008, Jouni Malinen <j@w1.fi>
  * Copyright (C) 2016-2017 Intel Deutschland GmbH
+ * Copyright (C) 2020-2021 Intel Corporation
  */
 
 #include <linux/netdevice.h>
@@ -167,8 +168,8 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
 
 update_iv:
 	/* update IV in key information to be able to detect replays */
-	rx->key->u.tkip.rx[rx->security_idx].iv32 = rx->tkip_iv32;
-	rx->key->u.tkip.rx[rx->security_idx].iv16 = rx->tkip_iv16;
+	rx->key->u.tkip.rx[rx->security_idx].iv32 = rx->tkip.iv32;
+	rx->key->u.tkip.rx[rx->security_idx].iv16 = rx->tkip.iv16;
 
 	return RX_CONTINUE;
 
@@ -294,8 +295,8 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
 					  key, skb->data + hdrlen,
 					  skb->len - hdrlen, rx->sta->sta.addr,
 					  hdr->addr1, hwaccel, rx->security_idx,
-					  &rx->tkip_iv32,
-					  &rx->tkip_iv16);
+					  &rx->tkip.iv32,
+					  &rx->tkip.iv16);
 	if (res != TKIP_DECRYPT_OK)
 		return RX_DROP_UNUSABLE;
 
@@ -553,6 +554,8 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
 		}
 
 		memcpy(key->u.ccmp.rx_pn[queue], pn, IEEE80211_CCMP_PN_LEN);
+		if (unlikely(ieee80211_is_frag(hdr)))
+			memcpy(rx->ccm_gcm.pn, pn, IEEE80211_CCMP_PN_LEN);
 	}
 
 	/* Remove CCMP header and MIC */
@@ -781,6 +784,8 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx)
 		}
 
 		memcpy(key->u.gcmp.rx_pn[queue], pn, IEEE80211_GCMP_PN_LEN);
+		if (unlikely(ieee80211_is_frag(hdr)))
+			memcpy(rx->ccm_gcm.pn, pn, IEEE80211_CCMP_PN_LEN);
 	}
 
 	/* Remove GCMP header and MIC */
-- 
cgit v1.2.3


From 7e44a0b597f04e67eee8cdcbe7ee706c6f5de38b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 11 May 2021 20:02:49 +0200
Subject: mac80211: prevent attacks on TKIP/WEP as well

Similar to the issues fixed in previous patches, TKIP and WEP
should be protected even if for TKIP we have the Michael MIC
protecting it, and WEP is broken anyway.

However, this also somewhat protects potential other algorithms
that drivers might implement.

Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210511200110.430e8c202313.Ia37e4e5b6b3eaab1a5ae050e015f6c92859dbe27@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c       | 12 ++++++++++++
 net/mac80211/sta_info.h |  3 ++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index b619c47e1d12..4454ec47283f 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2274,6 +2274,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 			 * next fragment has a sequential PN value.
 			 */
 			entry->check_sequential_pn = true;
+			entry->is_protected = true;
 			entry->key_color = rx->key->color;
 			memcpy(entry->last_pn,
 			       rx->key->u.ccmp.rx_pn[queue],
@@ -2286,6 +2287,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 				     sizeof(rx->key->u.gcmp.rx_pn[queue]));
 			BUILD_BUG_ON(IEEE80211_CCMP_PN_LEN !=
 				     IEEE80211_GCMP_PN_LEN);
+		} else if (rx->key && ieee80211_has_protected(fc)) {
+			entry->is_protected = true;
+			entry->key_color = rx->key->color;
 		}
 		return RX_QUEUED;
 	}
@@ -2327,6 +2331,14 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 		if (memcmp(pn, rpn, IEEE80211_CCMP_PN_LEN))
 			return RX_DROP_UNUSABLE;
 		memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN);
+	} else if (entry->is_protected &&
+		   (!rx->key || !ieee80211_has_protected(fc) ||
+		    rx->key->color != entry->key_color)) {
+		/* Drop this as a mixed key or fragment cache attack, even
+		 * if for TKIP Michael MIC should protect us, and WEP is a
+		 * lost cause anyway.
+		 */
+		return RX_DROP_UNUSABLE;
 	}
 
 	skb_pull(rx->skb, ieee80211_hdrlen(fc));
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 5c56d29a619e..0333072ebd98 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -455,7 +455,8 @@ struct ieee80211_fragment_entry {
 	u16 extra_len;
 	u16 last_frag;
 	u8 rx_queue;
-	bool check_sequential_pn; /* needed for CCMP/GCMP */
+	u8 check_sequential_pn:1, /* needed for CCMP/GCMP */
+	   is_protected:1;
 	u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
 	unsigned int key_color;
 };
-- 
cgit v1.2.3


From a8c4d76a8dd4fb9666fc8919a703d85fb8f44ed8 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 11 May 2021 20:02:50 +0200
Subject: mac80211: do not accept/forward invalid EAPOL frames

EAPOL frames are used for authentication and key management between the
AP and each individual STA associated in the BSS. Those frames are not
supposed to be sent by one associated STA to another associated STA
(either unicast for broadcast/multicast).

Similarly, in 802.11 they're supposed to be sent to the authenticator
(AP) address.

Since it is possible for unexpected EAPOL frames to result in misbehavior
in supplicant implementations, it is better for the AP to not allow such
cases to be forwarded to other clients either directly, or indirectly if
the AP interface is part of a bridge.

Accept EAPOL (control port) frames only if they're transmitted to the
own address, or, due to interoperability concerns, to the PAE group
address.

Disable forwarding of EAPOL (or well, the configured control port
protocol) frames back to wireless medium in all cases. Previously, these
frames were accepted from fully authenticated and authorized stations
and also from unauthenticated stations for one of the cases.

Additionally, to avoid forwarding by the bridge, rewrite the PAE group
address case to the local MAC address.

Cc: stable@vger.kernel.org
Co-developed-by: Jouni Malinen <jouni@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Link: https://lore.kernel.org/r/20210511200110.cb327ed0cabe.Ib7dcffa2a31f0913d660de65ba3c8aca75b1d10f@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 33 +++++++++++++++++++++++++++------
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 4454ec47283f..22a925899a9e 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2531,13 +2531,13 @@ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx, __le16 fc)
 	struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data;
 
 	/*
-	 * Allow EAPOL frames to us/the PAE group address regardless
-	 * of whether the frame was encrypted or not.
+	 * Allow EAPOL frames to us/the PAE group address regardless of
+	 * whether the frame was encrypted or not, and always disallow
+	 * all other destination addresses for them.
 	 */
-	if (ehdr->h_proto == rx->sdata->control_port_protocol &&
-	    (ether_addr_equal(ehdr->h_dest, rx->sdata->vif.addr) ||
-	     ether_addr_equal(ehdr->h_dest, pae_group_addr)))
-		return true;
+	if (unlikely(ehdr->h_proto == rx->sdata->control_port_protocol))
+		return ether_addr_equal(ehdr->h_dest, rx->sdata->vif.addr) ||
+		       ether_addr_equal(ehdr->h_dest, pae_group_addr);
 
 	if (ieee80211_802_1x_port_control(rx) ||
 	    ieee80211_drop_unencrypted(rx, fc))
@@ -2562,8 +2562,28 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb,
 		cfg80211_rx_control_port(dev, skb, noencrypt);
 		dev_kfree_skb(skb);
 	} else {
+		struct ethhdr *ehdr = (void *)skb_mac_header(skb);
+
 		memset(skb->cb, 0, sizeof(skb->cb));
 
+		/*
+		 * 802.1X over 802.11 requires that the authenticator address
+		 * be used for EAPOL frames. However, 802.1X allows the use of
+		 * the PAE group address instead. If the interface is part of
+		 * a bridge and we pass the frame with the PAE group address,
+		 * then the bridge will forward it to the network (even if the
+		 * client was not associated yet), which isn't supposed to
+		 * happen.
+		 * To avoid that, rewrite the destination address to our own
+		 * address, so that the authenticator (e.g. hostapd) will see
+		 * the frame, but bridge won't forward it anywhere else. Note
+		 * that due to earlier filtering, the only other address can
+		 * be the PAE group address.
+		 */
+		if (unlikely(skb->protocol == sdata->control_port_protocol &&
+			     !ether_addr_equal(ehdr->h_dest, sdata->vif.addr)))
+			ether_addr_copy(ehdr->h_dest, sdata->vif.addr);
+
 		/* deliver to local stack */
 		if (rx->list)
 			list_add_tail(&skb->list, rx->list);
@@ -2603,6 +2623,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
 	if ((sdata->vif.type == NL80211_IFTYPE_AP ||
 	     sdata->vif.type == NL80211_IFTYPE_AP_VLAN) &&
 	    !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) &&
+	    ehdr->h_proto != rx->sdata->control_port_protocol &&
 	    (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) {
 		if (is_multicast_ether_addr(ehdr->h_dest) &&
 		    ieee80211_vif_get_num_mcast_if(sdata) != 0) {
-- 
cgit v1.2.3


From 3edc6b0d6c061a70d8ca3c3c72eb1f58ce29bfb1 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Tue, 11 May 2021 20:02:51 +0200
Subject: mac80211: extend protection against mixed key and fragment cache
 attacks

For some chips/drivers, e.g., QCA6174 with ath10k, the decryption is
done by the hardware, and the Protected bit in the Frame Control field
is cleared in the lower level driver before the frame is passed to
mac80211. In such cases, the condition for ieee80211_has_protected() is
not met in ieee80211_rx_h_defragment() of mac80211 and the new security
validation steps are not executed.

Extend mac80211 to cover the case where the Protected bit has been
cleared, but the frame is indicated as having been decrypted by the
hardware. This extends protection against mixed key and fragment cache
attack for additional drivers/chips. This fixes CVE-2020-24586 and
CVE-2020-24587 for such cases.

Tested-on: QCA6174 hw3.2 PCI WLAN.RM.4.4.1-00110-QCARMSWP-1

Cc: stable@vger.kernel.org
Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Link: https://lore.kernel.org/r/20210511200110.037aa5ca0390.I7bb888e2965a0db02a67075fcb5deb50eb7408aa@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 22a925899a9e..1bb43edd47b6 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2229,6 +2229,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 	unsigned int frag, seq;
 	struct ieee80211_fragment_entry *entry;
 	struct sk_buff *skb;
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
 
 	hdr = (struct ieee80211_hdr *)rx->skb->data;
 	fc = hdr->frame_control;
@@ -2287,7 +2288,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 				     sizeof(rx->key->u.gcmp.rx_pn[queue]));
 			BUILD_BUG_ON(IEEE80211_CCMP_PN_LEN !=
 				     IEEE80211_GCMP_PN_LEN);
-		} else if (rx->key && ieee80211_has_protected(fc)) {
+		} else if (rx->key &&
+			   (ieee80211_has_protected(fc) ||
+			    (status->flag & RX_FLAG_DECRYPTED))) {
 			entry->is_protected = true;
 			entry->key_color = rx->key->color;
 		}
@@ -2332,13 +2335,19 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 			return RX_DROP_UNUSABLE;
 		memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN);
 	} else if (entry->is_protected &&
-		   (!rx->key || !ieee80211_has_protected(fc) ||
+		   (!rx->key ||
+		    (!ieee80211_has_protected(fc) &&
+		     !(status->flag & RX_FLAG_DECRYPTED)) ||
 		    rx->key->color != entry->key_color)) {
 		/* Drop this as a mixed key or fragment cache attack, even
 		 * if for TKIP Michael MIC should protect us, and WEP is a
 		 * lost cause anyway.
 		 */
 		return RX_DROP_UNUSABLE;
+	} else if (entry->is_protected && rx->key &&
+		   entry->key_color != rx->key->color &&
+		   (status->flag & RX_FLAG_DECRYPTED)) {
+		return RX_DROP_UNUSABLE;
 	}
 
 	skb_pull(rx->skb, ieee80211_hdrlen(fc));
-- 
cgit v1.2.3


From a1166b2653db2f3de7338b9fb8a0f6e924b904ee Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Tue, 11 May 2021 20:02:52 +0200
Subject: ath10k: add CCMP PN replay protection for fragmented frames for PCIe

PN replay check for not fragmented frames is finished in the firmware,
but this was not done for fragmented frames when ath10k is used with
QCA6174/QCA6377 PCIe. mac80211 has the function
ieee80211_rx_h_defragment() for PN replay check for fragmented frames,
but this does not get checked with QCA6174 due to the
ieee80211_has_protected() condition not matching the cleared Protected
bit case.

Validate the PN of received fragmented frames within ath10k when CCMP is
used and drop the fragment if the PN is not correct (incremented by
exactly one from the previous fragment). This applies only for
QCA6174/QCA6377 PCIe.

Tested-on: QCA6174 hw3.2 PCI WLAN.RM.4.4.1-00110-QCARMSWP-1

Cc: stable@vger.kernel.org
Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Link: https://lore.kernel.org/r/20210511200110.9ba2664866a4.I756e47b67e210dba69966d989c4711ffc02dc6bc@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath10k/htt.h    |  1 +
 drivers/net/wireless/ath/ath10k/htt_rx.c | 99 ++++++++++++++++++++++++++++++--
 2 files changed, 96 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/htt.h b/drivers/net/wireless/ath/ath10k/htt.h
index 956157946106..dbc8aef82a65 100644
--- a/drivers/net/wireless/ath/ath10k/htt.h
+++ b/drivers/net/wireless/ath/ath10k/htt.h
@@ -845,6 +845,7 @@ enum htt_security_types {
 
 #define ATH10K_HTT_TXRX_PEER_SECURITY_MAX 2
 #define ATH10K_TXRX_NUM_EXT_TIDS 19
+#define ATH10K_TXRX_NON_QOS_TID 16
 
 enum htt_security_flags {
 #define HTT_SECURITY_TYPE_MASK 0x7F
diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index 1a08156d5011..f1e5bce8b14f 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -1746,16 +1746,87 @@ static void ath10k_htt_rx_h_csum_offload(struct sk_buff *msdu)
 	msdu->ip_summed = ath10k_htt_rx_get_csum_state(msdu);
 }
 
+static u64 ath10k_htt_rx_h_get_pn(struct ath10k *ar, struct sk_buff *skb,
+				  u16 offset,
+				  enum htt_rx_mpdu_encrypt_type enctype)
+{
+	struct ieee80211_hdr *hdr;
+	u64 pn = 0;
+	u8 *ehdr;
+
+	hdr = (struct ieee80211_hdr *)(skb->data + offset);
+	ehdr = skb->data + offset + ieee80211_hdrlen(hdr->frame_control);
+
+	if (enctype == HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2) {
+		pn = ehdr[0];
+		pn |= (u64)ehdr[1] << 8;
+		pn |= (u64)ehdr[4] << 16;
+		pn |= (u64)ehdr[5] << 24;
+		pn |= (u64)ehdr[6] << 32;
+		pn |= (u64)ehdr[7] << 40;
+	}
+	return pn;
+}
+
+static bool ath10k_htt_rx_h_frag_pn_check(struct ath10k *ar,
+					  struct sk_buff *skb,
+					  u16 peer_id,
+					  u16 offset,
+					  enum htt_rx_mpdu_encrypt_type enctype)
+{
+	struct ath10k_peer *peer;
+	union htt_rx_pn_t *last_pn, new_pn = {0};
+	struct ieee80211_hdr *hdr;
+	bool more_frags;
+	u8 tid, frag_number;
+	u32 seq;
+
+	peer = ath10k_peer_find_by_id(ar, peer_id);
+	if (!peer) {
+		ath10k_dbg(ar, ATH10K_DBG_HTT, "invalid peer for frag pn check\n");
+		return false;
+	}
+
+	hdr = (struct ieee80211_hdr *)(skb->data + offset);
+	if (ieee80211_is_data_qos(hdr->frame_control))
+		tid = ieee80211_get_tid(hdr);
+	else
+		tid = ATH10K_TXRX_NON_QOS_TID;
+
+	last_pn = &peer->frag_tids_last_pn[tid];
+	new_pn.pn48 = ath10k_htt_rx_h_get_pn(ar, skb, offset, enctype);
+	more_frags = ieee80211_has_morefrags(hdr->frame_control);
+	frag_number = le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_FRAG;
+	seq = (__le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_SEQ) >> 4;
+
+	if (frag_number == 0) {
+		last_pn->pn48 = new_pn.pn48;
+		peer->frag_tids_seq[tid] = seq;
+	} else {
+		if (seq != peer->frag_tids_seq[tid])
+			return false;
+
+		if (new_pn.pn48 != last_pn->pn48 + 1)
+			return false;
+
+		last_pn->pn48 = new_pn.pn48;
+	}
+
+	return true;
+}
+
 static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
 				 struct sk_buff_head *amsdu,
 				 struct ieee80211_rx_status *status,
 				 bool fill_crypt_header,
 				 u8 *rx_hdr,
-				 enum ath10k_pkt_rx_err *err)
+				 enum ath10k_pkt_rx_err *err,
+				 u16 peer_id,
+				 bool frag)
 {
 	struct sk_buff *first;
 	struct sk_buff *last;
-	struct sk_buff *msdu;
+	struct sk_buff *msdu, *temp;
 	struct htt_rx_desc *rxd;
 	struct ieee80211_hdr *hdr;
 	enum htt_rx_mpdu_encrypt_type enctype;
@@ -1768,6 +1839,7 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
 	bool is_decrypted;
 	bool is_mgmt;
 	u32 attention;
+	bool frag_pn_check = true;
 
 	if (skb_queue_empty(amsdu))
 		return;
@@ -1866,6 +1938,24 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
 	}
 
 	skb_queue_walk(amsdu, msdu) {
+		if (frag && !fill_crypt_header && is_decrypted &&
+		    enctype == HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2)
+			frag_pn_check = ath10k_htt_rx_h_frag_pn_check(ar,
+								      msdu,
+								      peer_id,
+								      0,
+								      enctype);
+
+		if (!frag_pn_check) {
+			/* Discard the fragment with invalid PN */
+			temp = msdu->prev;
+			__skb_unlink(msdu, amsdu);
+			dev_kfree_skb_any(msdu);
+			msdu = temp;
+			frag_pn_check = true;
+			continue;
+		}
+
 		ath10k_htt_rx_h_csum_offload(msdu);
 		ath10k_htt_rx_h_undecap(ar, msdu, status, first_hdr, enctype,
 					is_decrypted);
@@ -2071,7 +2161,8 @@ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt)
 		ath10k_htt_rx_h_unchain(ar, &amsdu, &drop_cnt, &unchain_cnt);
 
 	ath10k_htt_rx_h_filter(ar, &amsdu, rx_status, &drop_cnt_filter);
-	ath10k_htt_rx_h_mpdu(ar, &amsdu, rx_status, true, first_hdr, &err);
+	ath10k_htt_rx_h_mpdu(ar, &amsdu, rx_status, true, first_hdr, &err, 0,
+			     false);
 	msdus_to_queue = skb_queue_len(&amsdu);
 	ath10k_htt_rx_h_enqueue(ar, &amsdu, rx_status);
 
@@ -3027,7 +3118,7 @@ static int ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb)
 			ath10k_htt_rx_h_ppdu(ar, &amsdu, status, vdev_id);
 			ath10k_htt_rx_h_filter(ar, &amsdu, status, NULL);
 			ath10k_htt_rx_h_mpdu(ar, &amsdu, status, false, NULL,
-					     NULL);
+					     NULL, peer_id, frag);
 			ath10k_htt_rx_h_enqueue(ar, &amsdu, status);
 			break;
 		case -EAGAIN:
-- 
cgit v1.2.3


From 65c415a144ad8132b6a6d97d4a1919ffc728e2d1 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Tue, 11 May 2021 20:02:53 +0200
Subject: ath10k: drop fragments with multicast DA for PCIe

Fragmentation is not used with multicast frames. Discard unexpected
fragments with multicast DA. This fixes CVE-2020-26145.

Tested-on: QCA6174 hw3.2 PCI WLAN.RM.4.4.1-00110-QCARMSWP-1

Cc: stable@vger.kernel.org
Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Link: https://lore.kernel.org/r/20210511200110.5a0bd289bda8.Idd6ebea20038fb1cfee6de924aa595e5647c9eae@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath10k/htt_rx.c | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index f1e5bce8b14f..cb04848ed5cb 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -1768,6 +1768,16 @@ static u64 ath10k_htt_rx_h_get_pn(struct ath10k *ar, struct sk_buff *skb,
 	return pn;
 }
 
+static bool ath10k_htt_rx_h_frag_multicast_check(struct ath10k *ar,
+						 struct sk_buff *skb,
+						 u16 offset)
+{
+	struct ieee80211_hdr *hdr;
+
+	hdr = (struct ieee80211_hdr *)(skb->data + offset);
+	return !is_multicast_ether_addr(hdr->addr1);
+}
+
 static bool ath10k_htt_rx_h_frag_pn_check(struct ath10k *ar,
 					  struct sk_buff *skb,
 					  u16 peer_id,
@@ -1839,7 +1849,7 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
 	bool is_decrypted;
 	bool is_mgmt;
 	u32 attention;
-	bool frag_pn_check = true;
+	bool frag_pn_check = true, multicast_check = true;
 
 	if (skb_queue_empty(amsdu))
 		return;
@@ -1946,13 +1956,20 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
 								      0,
 								      enctype);
 
-		if (!frag_pn_check) {
-			/* Discard the fragment with invalid PN */
+		if (frag)
+			multicast_check = ath10k_htt_rx_h_frag_multicast_check(ar,
+									       msdu,
+									       0);
+
+		if (!frag_pn_check || !multicast_check) {
+			/* Discard the fragment with invalid PN or multicast DA
+			 */
 			temp = msdu->prev;
 			__skb_unlink(msdu, amsdu);
 			dev_kfree_skb_any(msdu);
 			msdu = temp;
 			frag_pn_check = true;
+			multicast_check = true;
 			continue;
 		}
 
-- 
cgit v1.2.3


From 40e7462dad6f3d06efdb17d26539e61ab6e34db1 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Tue, 11 May 2021 20:02:54 +0200
Subject: ath10k: drop fragments with multicast DA for SDIO

Fragmentation is not used with multicast frames. Discard unexpected
fragments with multicast DA. This fixes CVE-2020-26145.

Tested-on: QCA6174 hw3.2 SDIO WLAN.RMH.4.4.1-00049

Cc: stable@vger.kernel.org
Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Link: https://lore.kernel.org/r/20210511200110.9ca6ca7945a9.I1e18b514590af17c155bda86699bc3a971a8dcf4@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath10k/htt_rx.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index cb04848ed5cb..b1d93ff5215a 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -2617,6 +2617,13 @@ static bool ath10k_htt_rx_proc_rx_frag_ind_hl(struct ath10k_htt *htt,
 	rx_desc = (struct htt_hl_rx_desc *)(skb->data + tot_hdr_len);
 	rx_desc_info = __le32_to_cpu(rx_desc->info);
 
+	hdr = (struct ieee80211_hdr *)((u8 *)rx_desc + rx_hl->fw_desc.len);
+
+	if (is_multicast_ether_addr(hdr->addr1)) {
+		/* Discard the fragment with multicast DA */
+		goto err;
+	}
+
 	if (!MS(rx_desc_info, HTT_RX_DESC_HL_INFO_ENCRYPTED)) {
 		spin_unlock_bh(&ar->data_lock);
 		return ath10k_htt_rx_proc_rx_ind_hl(htt, &resp->rx_ind_hl, skb,
@@ -2624,8 +2631,6 @@ static bool ath10k_htt_rx_proc_rx_frag_ind_hl(struct ath10k_htt *htt,
 						    HTT_RX_NON_TKIP_MIC);
 	}
 
-	hdr = (struct ieee80211_hdr *)((u8 *)rx_desc + rx_hl->fw_desc.len);
-
 	if (ieee80211_has_retry(hdr->frame_control))
 		goto err;
 
-- 
cgit v1.2.3


From 079a108feba474b4b32bd3471db03e11f2f83b81 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Tue, 11 May 2021 20:02:55 +0200
Subject: ath10k: drop MPDU which has discard flag set by firmware for SDIO

When the discard flag is set by the firmware for an MPDU, it should be
dropped. This allows a mitigation for CVE-2020-24588 to be implemented
in the firmware.

Tested-on: QCA6174 hw3.2 SDIO WLAN.RMH.4.4.1-00049

Cc: stable@vger.kernel.org
Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Link: https://lore.kernel.org/r/20210511200110.11968c725b5c.Idd166365ebea2771c0c0a38c78b5060750f90e17@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath10k/htt_rx.c  |  5 +++++
 drivers/net/wireless/ath/ath10k/rx_desc.h | 14 +++++++++++++-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index b1d93ff5215a..12451ab66a19 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -2312,6 +2312,11 @@ static bool ath10k_htt_rx_proc_rx_ind_hl(struct ath10k_htt *htt,
 	fw_desc = &rx->fw_desc;
 	rx_desc_len = fw_desc->len;
 
+	if (fw_desc->u.bits.discard) {
+		ath10k_dbg(ar, ATH10K_DBG_HTT, "htt discard mpdu\n");
+		goto err;
+	}
+
 	/* I have not yet seen any case where num_mpdu_ranges > 1.
 	 * qcacld does not seem handle that case either, so we introduce the
 	 * same limitiation here as well.
diff --git a/drivers/net/wireless/ath/ath10k/rx_desc.h b/drivers/net/wireless/ath/ath10k/rx_desc.h
index f2b6bf8f0d60..705b6295e466 100644
--- a/drivers/net/wireless/ath/ath10k/rx_desc.h
+++ b/drivers/net/wireless/ath/ath10k/rx_desc.h
@@ -1282,7 +1282,19 @@ struct fw_rx_desc_base {
 #define FW_RX_DESC_UDP              (1 << 6)
 
 struct fw_rx_desc_hl {
-	u8 info0;
+	union {
+		struct {
+		u8 discard:1,
+		   forward:1,
+		   any_err:1,
+		   dup_err:1,
+		   reserved:1,
+		   inspect:1,
+		   extension:2;
+		} bits;
+		u8 info0;
+	} u;
+
 	u8 version;
 	u8 len;
 	u8 flags;
-- 
cgit v1.2.3


From 0dc267b13f3a7e8424a898815dd357211b737330 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Tue, 11 May 2021 20:02:56 +0200
Subject: ath10k: Fix TKIP Michael MIC verification for PCIe

TKIP Michael MIC was not verified properly for PCIe cases since the
validation steps in ieee80211_rx_h_michael_mic_verify() in mac80211 did
not get fully executed due to unexpected flag values in
ieee80211_rx_status.

Fix this by setting the flags property to meet mac80211 expectations for
performing Michael MIC validation there. This fixes CVE-2020-26141. It
does the same as ath10k_htt_rx_proc_rx_ind_hl() for SDIO which passed
MIC verification case. This applies only to QCA6174/QCA9377 PCIe.

Tested-on: QCA6174 hw3.2 PCI WLAN.RM.4.4.1-00110-QCARMSWP-1

Cc: stable@vger.kernel.org
Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Link: https://lore.kernel.org/r/20210511200110.c3f1d42c6746.I795593fcaae941c471425b8c7d5f7bb185d29142@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath10k/htt_rx.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index 12451ab66a19..87196f9bbdea 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -1974,6 +1974,11 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
 		}
 
 		ath10k_htt_rx_h_csum_offload(msdu);
+
+		if (frag && !fill_crypt_header &&
+		    enctype == HTT_RX_MPDU_ENCRYPT_TKIP_WPA)
+			status->flag &= ~RX_FLAG_MMIC_STRIPPED;
+
 		ath10k_htt_rx_h_undecap(ar, msdu, status, first_hdr, enctype,
 					is_decrypted);
 
@@ -1991,6 +1996,11 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
 
 		hdr = (void *)msdu->data;
 		hdr->frame_control &= ~__cpu_to_le16(IEEE80211_FCTL_PROTECTED);
+
+		if (frag && !fill_crypt_header &&
+		    enctype == HTT_RX_MPDU_ENCRYPT_TKIP_WPA)
+			status->flag &= ~RX_FLAG_IV_STRIPPED &
+					~RX_FLAG_MMIC_STRIPPED;
 	}
 }
 
-- 
cgit v1.2.3


From 62a8ff67eba52dae9b107e1fb8827054ed00a265 Mon Sep 17 00:00:00 2001
From: Sriram R <srirrama@codeaurora.org>
Date: Tue, 11 May 2021 20:02:57 +0200
Subject: ath10k: Validate first subframe of A-MSDU before processing the list

In certain scenarios a normal MSDU can be received as an A-MSDU when
the A-MSDU present bit of a QoS header gets flipped during reception.
Since this bit is unauthenticated, the hardware crypto engine can pass
the frame to the driver without any error indication.

This could result in processing unintended subframes collected in the
A-MSDU list. Hence, validate A-MSDU list by checking if the first frame
has a valid subframe header.

Comparing the non-aggregated MSDU and an A-MSDU, the fields of the first
subframe DA matches the LLC/SNAP header fields of a normal MSDU.
In order to avoid processing such frames, add a validation to
filter such A-MSDU frames where the first subframe header DA matches
with the LLC/SNAP header pattern.

Tested-on: QCA9984 hw1.0 PCI 10.4-3.10-00047

Cc: stable@vger.kernel.org
Signed-off-by: Sriram R <srirrama@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Link: https://lore.kernel.org/r/20210511200110.e6f5eb7b9847.I38a77ae26096862527a5eab73caebd7346af8b66@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath10k/htt_rx.c | 61 +++++++++++++++++++++++++++++---
 1 file changed, 57 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index 87196f9bbdea..7ffb5d5b2a70 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -2108,14 +2108,62 @@ static void ath10k_htt_rx_h_unchain(struct ath10k *ar,
 	ath10k_unchain_msdu(amsdu, unchain_cnt);
 }
 
+static bool ath10k_htt_rx_validate_amsdu(struct ath10k *ar,
+					 struct sk_buff_head *amsdu)
+{
+	u8 *subframe_hdr;
+	struct sk_buff *first;
+	bool is_first, is_last;
+	struct htt_rx_desc *rxd;
+	struct ieee80211_hdr *hdr;
+	size_t hdr_len, crypto_len;
+	enum htt_rx_mpdu_encrypt_type enctype;
+	int bytes_aligned = ar->hw_params.decap_align_bytes;
+
+	first = skb_peek(amsdu);
+
+	rxd = (void *)first->data - sizeof(*rxd);
+	hdr = (void *)rxd->rx_hdr_status;
+
+	is_first = !!(rxd->msdu_end.common.info0 &
+		      __cpu_to_le32(RX_MSDU_END_INFO0_FIRST_MSDU));
+	is_last = !!(rxd->msdu_end.common.info0 &
+		     __cpu_to_le32(RX_MSDU_END_INFO0_LAST_MSDU));
+
+	/* Return in case of non-aggregated msdu */
+	if (is_first && is_last)
+		return true;
+
+	/* First msdu flag is not set for the first msdu of the list */
+	if (!is_first)
+		return false;
+
+	enctype = MS(__le32_to_cpu(rxd->mpdu_start.info0),
+		     RX_MPDU_START_INFO0_ENCRYPT_TYPE);
+
+	hdr_len = ieee80211_hdrlen(hdr->frame_control);
+	crypto_len = ath10k_htt_rx_crypto_param_len(ar, enctype);
+
+	subframe_hdr = (u8 *)hdr + round_up(hdr_len, bytes_aligned) +
+		       crypto_len;
+
+	/* Validate if the amsdu has a proper first subframe.
+	 * There are chances a single msdu can be received as amsdu when
+	 * the unauthenticated amsdu flag of a QoS header
+	 * gets flipped in non-SPP AMSDU's, in such cases the first
+	 * subframe has llc/snap header in place of a valid da.
+	 * return false if the da matches rfc1042 pattern
+	 */
+	if (ether_addr_equal(subframe_hdr, rfc1042_header))
+		return false;
+
+	return true;
+}
+
 static bool ath10k_htt_rx_amsdu_allowed(struct ath10k *ar,
 					struct sk_buff_head *amsdu,
 					struct ieee80211_rx_status *rx_status)
 {
-	/* FIXME: It might be a good idea to do some fuzzy-testing to drop
-	 * invalid/dangerous frames.
-	 */
-
 	if (!rx_status->freq) {
 		ath10k_dbg(ar, ATH10K_DBG_HTT, "no channel configured; ignoring frame(s)!\n");
 		return false;
@@ -2126,6 +2174,11 @@ static bool ath10k_htt_rx_amsdu_allowed(struct ath10k *ar,
 		return false;
 	}
 
+	if (!ath10k_htt_rx_validate_amsdu(ar, amsdu)) {
+		ath10k_dbg(ar, ATH10K_DBG_HTT, "invalid amsdu received\n");
+		return false;
+	}
+
 	return true;
 }
 
-- 
cgit v1.2.3


From c3944a5621026c176001493d48ee66ff94e1a39a Mon Sep 17 00:00:00 2001
From: Sriram R <srirrama@codeaurora.org>
Date: Tue, 11 May 2021 20:02:58 +0200
Subject: ath11k: Clear the fragment cache during key install

Currently the fragment cache setup during peer assoc is
cleared only during peer delete. In case a key reinstallation
happens with the same peer, the same fragment cache with old
fragments added before key installation could be clubbed
with fragments received after. This might be exploited
to mix fragments of different data resulting in a proper
unintended reassembled packet to be passed up the stack.

Hence flush the fragment cache on every key installation to prevent
potential attacks (CVE-2020-24587).

Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.4.0.1-01734-QCAHKSWPL_SILICONZ-1 v2

Cc: stable@vger.kernel.org
Signed-off-by: Sriram R <srirrama@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Link: https://lore.kernel.org/r/20210511200110.218dc777836f.I9af6fc76215a35936c4152552018afb5079c5d8c@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath11k/dp_rx.c | 18 ++++++++++++++++++
 drivers/net/wireless/ath/ath11k/dp_rx.h |  1 +
 drivers/net/wireless/ath/ath11k/mac.c   |  6 ++++++
 3 files changed, 25 insertions(+)

diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c
index 1d9aa1bb6b6e..3382f8bfcb48 100644
--- a/drivers/net/wireless/ath/ath11k/dp_rx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_rx.c
@@ -852,6 +852,24 @@ static void ath11k_dp_rx_frags_cleanup(struct dp_rx_tid *rx_tid, bool rel_link_d
 	__skb_queue_purge(&rx_tid->rx_frags);
 }
 
+void ath11k_peer_frags_flush(struct ath11k *ar, struct ath11k_peer *peer)
+{
+	struct dp_rx_tid *rx_tid;
+	int i;
+
+	lockdep_assert_held(&ar->ab->base_lock);
+
+	for (i = 0; i <= IEEE80211_NUM_TIDS; i++) {
+		rx_tid = &peer->rx_tid[i];
+
+		spin_unlock_bh(&ar->ab->base_lock);
+		del_timer_sync(&rx_tid->frag_timer);
+		spin_lock_bh(&ar->ab->base_lock);
+
+		ath11k_dp_rx_frags_cleanup(rx_tid, true);
+	}
+}
+
 void ath11k_peer_rx_tid_cleanup(struct ath11k *ar, struct ath11k_peer *peer)
 {
 	struct dp_rx_tid *rx_tid;
diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.h b/drivers/net/wireless/ath/ath11k/dp_rx.h
index bf399312b5ff..623da3bf9dc8 100644
--- a/drivers/net/wireless/ath/ath11k/dp_rx.h
+++ b/drivers/net/wireless/ath/ath11k/dp_rx.h
@@ -49,6 +49,7 @@ int ath11k_dp_peer_rx_pn_replay_config(struct ath11k_vif *arvif,
 				       const u8 *peer_addr,
 				       enum set_key_cmd key_cmd,
 				       struct ieee80211_key_conf *key);
+void ath11k_peer_frags_flush(struct ath11k *ar, struct ath11k_peer *peer);
 void ath11k_peer_rx_tid_cleanup(struct ath11k *ar, struct ath11k_peer *peer);
 void ath11k_peer_rx_tid_delete(struct ath11k *ar,
 			       struct ath11k_peer *peer, u8 tid);
diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index 4df425dd31a2..9d0ff150ec30 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -2779,6 +2779,12 @@ static int ath11k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	 */
 	spin_lock_bh(&ab->base_lock);
 	peer = ath11k_peer_find(ab, arvif->vdev_id, peer_addr);
+
+	/* flush the fragments cache during key (re)install to
+	 * ensure all frags in the new frag list belong to the same key.
+	 */
+	if (peer && cmd == SET_KEY)
+		ath11k_peer_frags_flush(ar, peer);
 	spin_unlock_bh(&ab->base_lock);
 
 	if (!peer) {
-- 
cgit v1.2.3


From 210f563b097997ce917e82feab356b298bfd12b0 Mon Sep 17 00:00:00 2001
From: Sriram R <srirrama@codeaurora.org>
Date: Tue, 11 May 2021 20:02:59 +0200
Subject: ath11k: Drop multicast fragments

Fragmentation is used only with unicast frames. Drop multicast fragments
to avoid any undesired behavior.

Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.4.0.1-01734-QCAHKSWPL_SILICONZ-1 v2

Cc: stable@vger.kernel.org
Signed-off-by: Sriram R <srirrama@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Link: https://lore.kernel.org/r/20210511200110.1d53bfd20a8b.Ibb63283051bb5e2c45951932c6e1f351d5a73dc3@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath11k/dp_rx.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c
index 3382f8bfcb48..603d2f93ac18 100644
--- a/drivers/net/wireless/ath/ath11k/dp_rx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_rx.c
@@ -260,6 +260,16 @@ static void ath11k_dp_rxdesc_set_msdu_len(struct ath11k_base *ab,
 	ab->hw_params.hw_ops->rx_desc_set_msdu_len(desc, len);
 }
 
+static bool ath11k_dp_rx_h_attn_is_mcbc(struct ath11k_base *ab,
+					struct hal_rx_desc *desc)
+{
+	struct rx_attention *attn = ath11k_dp_rx_get_attention(ab, desc);
+
+	return ath11k_dp_rx_h_msdu_end_first_msdu(ab, desc) &&
+		(!!FIELD_GET(RX_ATTENTION_INFO1_MCAST_BCAST,
+		 __le32_to_cpu(attn->info1)));
+}
+
 static void ath11k_dp_service_mon_ring(struct timer_list *t)
 {
 	struct ath11k_base *ab = from_timer(ab, t, mon_reap_timer);
@@ -3468,6 +3478,7 @@ static int ath11k_dp_rx_frag_h_mpdu(struct ath11k *ar,
 	u8 tid;
 	int ret = 0;
 	bool more_frags;
+	bool is_mcbc;
 
 	rx_desc = (struct hal_rx_desc *)msdu->data;
 	peer_id = ath11k_dp_rx_h_mpdu_start_peer_id(ar->ab, rx_desc);
@@ -3475,6 +3486,11 @@ static int ath11k_dp_rx_frag_h_mpdu(struct ath11k *ar,
 	seqno = ath11k_dp_rx_h_mpdu_start_seq_no(ar->ab, rx_desc);
 	frag_no = ath11k_dp_rx_h_mpdu_start_frag_no(ar->ab, msdu);
 	more_frags = ath11k_dp_rx_h_mpdu_start_more_frags(ar->ab, msdu);
+	is_mcbc = ath11k_dp_rx_h_attn_is_mcbc(ar->ab, rx_desc);
+
+	/* Multicast/Broadcast fragments are not expected */
+	if (is_mcbc)
+		return -EINVAL;
 
 	if (!ath11k_dp_rx_h_mpdu_start_seq_ctrl_valid(ar->ab, rx_desc) ||
 	    !ath11k_dp_rx_h_mpdu_start_fc_valid(ar->ab, rx_desc) ||
-- 
cgit v1.2.3


From 875d598db60ac81e768fdfd2c589f6209038488b Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Tue, 11 May 2021 18:34:13 +0200
Subject: MAINTAINERS: Update address for Emma Anholt

Reviewed-by: Emma Anholt <emma@anholt.net>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
---
 MAINTAINERS | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index bd7aff0c120f..38a1e3bf5af0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5639,7 +5639,7 @@ T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	drivers/gpu/drm/sun4i/sun8i*
 
 DRM DRIVER FOR ARM PL111 CLCD
-M:	Eric Anholt <eric@anholt.net>
+M:	Emma Anholt <emma@anholt.net>
 S:	Supported
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	drivers/gpu/drm/pl111/
@@ -5719,7 +5719,7 @@ T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	drivers/gpu/drm/tiny/gm12u320.c
 
 DRM DRIVER FOR HX8357D PANELS
-M:	Eric Anholt <eric@anholt.net>
+M:	Emma Anholt <emma@anholt.net>
 S:	Maintained
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	Documentation/devicetree/bindings/display/himax,hx8357d.txt
@@ -6177,7 +6177,7 @@ F:	Documentation/devicetree/bindings/display/ti/
 F:	drivers/gpu/drm/omapdrm/
 
 DRM DRIVERS FOR V3D
-M:	Eric Anholt <eric@anholt.net>
+M:	Emma Anholt <emma@anholt.net>
 S:	Supported
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml
@@ -6185,7 +6185,7 @@ F:	drivers/gpu/drm/v3d/
 F:	include/uapi/drm/v3d_drm.h
 
 DRM DRIVERS FOR VC4
-M:	Eric Anholt <eric@anholt.net>
+M:	Emma Anholt <emma@anholt.net>
 M:	Maxime Ripard <mripard@kernel.org>
 S:	Supported
 T:	git git://github.com/anholt/linux
-- 
cgit v1.2.3


From e09784a8a751e539dffc94d43bc917b0ac1e934a Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Tue, 11 May 2021 03:45:16 +0200
Subject: alarmtimer: Check RTC features instead of ops

RTC drivers used to leave .set_alarm() NULL in order to signal the RTC
device doesn't support alarms. The drivers are now clearing the
RTC_FEATURE_ALARM bit for that purpose in order to keep the rtc_class_ops
structure const. So now, .set_alarm() is set unconditionally and this
possibly causes the alarmtimer code to select an RTC device that doesn't
support alarms.

Test RTC_FEATURE_ALARM instead of relying on ops->set_alarm to determine
whether alarms are available.

Fixes: 7ae41220ef58 ("rtc: introduce features bitfield")
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210511014516.563031-1-alexandre.belloni@bootlin.com
---
 kernel/time/alarmtimer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index bea9d08b1698..5897828b9d7e 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -92,7 +92,7 @@ static int alarmtimer_rtc_add_device(struct device *dev,
 	if (rtcdev)
 		return -EBUSY;
 
-	if (!rtc->ops->set_alarm)
+	if (!test_bit(RTC_FEATURE_ALARM, rtc->features))
 		return -1;
 	if (!device_may_wakeup(rtc->dev.parent))
 		return -1;
-- 
cgit v1.2.3


From 0bd50826a40e012a35c58ed3576b3873643e7a7d Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Thu, 6 May 2021 15:08:24 +0800
Subject: leds: Fix reference file name of documentation

In commit 56b01acc1c79a ("dt-bindings: gpio: fairchild,74hc595:
Convert to json-schema"), gpio-74x164.txt was deleted and replaced
by fairchild,74hc595.yaml. Fix the reference file name.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Link: https://lore.kernel.org/r/20210506070824.10965-1-wanjiabing@vivo.com
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/leds/leds-bcm6328.txt | 4 ++--
 Documentation/devicetree/bindings/leds/leds-bcm6358.txt | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/leds/leds-bcm6328.txt b/Documentation/devicetree/bindings/leds/leds-bcm6328.txt
index ccebce597f37..a555d94084b7 100644
--- a/Documentation/devicetree/bindings/leds/leds-bcm6328.txt
+++ b/Documentation/devicetree/bindings/leds/leds-bcm6328.txt
@@ -4,8 +4,8 @@ This controller is present on BCM6318, BCM6328, BCM6362 and BCM63268.
 In these SoCs it's possible to control LEDs both as GPIOs or by hardware.
 However, on some devices there are Serial LEDs (LEDs connected to a 74x164
 controller), which can either be controlled by software (exporting the 74x164
-as spi-gpio. See Documentation/devicetree/bindings/gpio/gpio-74x164.txt), or
-by hardware using this driver.
+as spi-gpio. See Documentation/devicetree/bindings/gpio/fairchild,74hc595.yaml),
+or by hardware using this driver.
 Some of these Serial LEDs are hardware controlled (e.g. ethernet LEDs) and
 exporting the 74x164 as spi-gpio prevents those LEDs to be hardware
 controlled, so the only chance to keep them working is by using this driver.
diff --git a/Documentation/devicetree/bindings/leds/leds-bcm6358.txt b/Documentation/devicetree/bindings/leds/leds-bcm6358.txt
index da5708e7b43b..6e51c6b91ee5 100644
--- a/Documentation/devicetree/bindings/leds/leds-bcm6358.txt
+++ b/Documentation/devicetree/bindings/leds/leds-bcm6358.txt
@@ -3,7 +3,7 @@ LEDs connected to Broadcom BCM6358 controller
 This controller is present on BCM6358 and BCM6368.
 In these SoCs there are Serial LEDs (LEDs connected to a 74x164 controller),
 which can either be controlled by software (exporting the 74x164 as spi-gpio.
-See Documentation/devicetree/bindings/gpio/gpio-74x164.txt), or
+See Documentation/devicetree/bindings/gpio/fairchild,74hc595.yaml), or
 by hardware using this driver.
 
 Required properties:
-- 
cgit v1.2.3


From 67f29896fdc83298eed5a6576ff8f9873f709228 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Tue, 11 May 2021 10:26:03 +0300
Subject: RDMA/rxe: Clear all QP fields if creation failed

rxe_qp_do_cleanup() relies on valid pointer values in QP for the properly
created ones, but in case rxe_qp_from_init() failed it was filled with
garbage and caused tot the following error.

  refcount_t: underflow; use-after-free.
  WARNING: CPU: 1 PID: 12560 at lib/refcount.c:28 refcount_warn_saturate+0x1d1/0x1e0 lib/refcount.c:28
  Modules linked in:
  CPU: 1 PID: 12560 Comm: syz-executor.4 Not tainted 5.12.0-syzkaller #0
  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
  RIP: 0010:refcount_warn_saturate+0x1d1/0x1e0 lib/refcount.c:28
  Code: e9 db fe ff ff 48 89 df e8 2c c2 ea fd e9 8a fe ff ff e8 72 6a a7 fd 48 c7 c7 e0 b2 c1 89 c6 05 dc 3a e6 09 01 e8 ee 74 fb 04 <0f> 0b e9 af fe ff ff 0f 1f 84 00 00 00 00 00 41 56 41 55 41 54 55
  RSP: 0018:ffffc900097ceba8 EFLAGS: 00010286
  RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
  RDX: 0000000000040000 RSI: ffffffff815bb075 RDI: fffff520012f9d67
  RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000000000000
  R10: ffffffff815b4eae R11: 0000000000000000 R12: ffff8880322a4800
  R13: ffff8880322a4940 R14: ffff888033044e00 R15: 0000000000000000
  FS:  00007f6eb2be3700(0000) GS:ffff8880b9d00000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007fdbe5d41000 CR3: 000000001d181000 CR4: 00000000001506e0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  Call Trace:
   __refcount_sub_and_test include/linux/refcount.h:283 [inline]
   __refcount_dec_and_test include/linux/refcount.h:315 [inline]
   refcount_dec_and_test include/linux/refcount.h:333 [inline]
   kref_put include/linux/kref.h:64 [inline]
   rxe_qp_do_cleanup+0x96f/0xaf0 drivers/infiniband/sw/rxe/rxe_qp.c:805
   execute_in_process_context+0x37/0x150 kernel/workqueue.c:3327
   rxe_elem_release+0x9f/0x180 drivers/infiniband/sw/rxe/rxe_pool.c:391
   kref_put include/linux/kref.h:65 [inline]
   rxe_create_qp+0x2cd/0x310 drivers/infiniband/sw/rxe/rxe_verbs.c:425
   _ib_create_qp drivers/infiniband/core/core_priv.h:331 [inline]
   ib_create_named_qp+0x2ad/0x1370 drivers/infiniband/core/verbs.c:1231
   ib_create_qp include/rdma/ib_verbs.h:3644 [inline]
   create_mad_qp+0x177/0x2d0 drivers/infiniband/core/mad.c:2920
   ib_mad_port_open drivers/infiniband/core/mad.c:3001 [inline]
   ib_mad_init_device+0xd6f/0x1400 drivers/infiniband/core/mad.c:3092
   add_client_context+0x405/0x5e0 drivers/infiniband/core/device.c:717
   enable_device_and_get+0x1cd/0x3b0 drivers/infiniband/core/device.c:1331
   ib_register_device drivers/infiniband/core/device.c:1413 [inline]
   ib_register_device+0x7c7/0xa50 drivers/infiniband/core/device.c:1365
   rxe_register_device+0x3d5/0x4a0 drivers/infiniband/sw/rxe/rxe_verbs.c:1147
   rxe_add+0x12fe/0x16d0 drivers/infiniband/sw/rxe/rxe.c:247
   rxe_net_add+0x8c/0xe0 drivers/infiniband/sw/rxe/rxe_net.c:503
   rxe_newlink drivers/infiniband/sw/rxe/rxe.c:269 [inline]
   rxe_newlink+0xb7/0xe0 drivers/infiniband/sw/rxe/rxe.c:250
   nldev_newlink+0x30e/0x550 drivers/infiniband/core/nldev.c:1555
   rdma_nl_rcv_msg+0x36d/0x690 drivers/infiniband/core/netlink.c:195
   rdma_nl_rcv_skb drivers/infiniband/core/netlink.c:239 [inline]
   rdma_nl_rcv+0x2ee/0x430 drivers/infiniband/core/netlink.c:259
   netlink_unicast_kernel net/netlink/af_netlink.c:1312 [inline]
   netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1338
   netlink_sendmsg+0x856/0xd90 net/netlink/af_netlink.c:1927
   sock_sendmsg_nosec net/socket.c:654 [inline]
   sock_sendmsg+0xcf/0x120 net/socket.c:674
   ____sys_sendmsg+0x6e8/0x810 net/socket.c:2350
   ___sys_sendmsg+0xf3/0x170 net/socket.c:2404
   __sys_sendmsg+0xe5/0x1b0 net/socket.c:2433
   do_syscall_64+0x3a/0xb0 arch/x86/entry/common.c:47
   entry_SYSCALL_64_after_hwframe+0x44/0xae

Fixes: 8700e3e7c485 ("Soft RoCE driver")
Link: https://lore.kernel.org/r/7bf8d548764d406dbbbaf4b574960ebfd5af8387.1620717918.git.leonro@nvidia.com
Reported-by: syzbot+36a7f280de4e11c6f04e@syzkaller.appspotmail.com
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Zhu Yanjun <zyjzyj2000@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_qp.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index 34ae957a315c..b0f350d674fd 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -242,6 +242,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
 	if (err) {
 		vfree(qp->sq.queue->buf);
 		kfree(qp->sq.queue);
+		qp->sq.queue = NULL;
 		return err;
 	}
 
@@ -295,6 +296,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
 		if (err) {
 			vfree(qp->rq.queue->buf);
 			kfree(qp->rq.queue);
+			qp->rq.queue = NULL;
 			return err;
 		}
 	}
@@ -355,6 +357,11 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
 err2:
 	rxe_queue_cleanup(qp->sq.queue);
 err1:
+	qp->pd = NULL;
+	qp->rcq = NULL;
+	qp->scq = NULL;
+	qp->srq = NULL;
+
 	if (srq)
 		rxe_drop_ref(srq);
 	rxe_drop_ref(scq);
-- 
cgit v1.2.3


From b24abcff918a5cbf44b0c982bd3477a93e8e4911 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 11 May 2021 22:35:16 +0200
Subject: bpf, kconfig: Add consolidated menu entry for bpf with core options

Right now, all core BPF related options are scattered in different Kconfig
locations mainly due to historic reasons. Moving forward, lets add a proper
subsystem entry under ...

  General setup  --->
    BPF subsystem  --->

... in order to have all knobs in a single location and thus ease BPF related
configuration. Networking related bits such as sockmap are out of scope for
the general setup and therefore better suited to remain in net/Kconfig.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/f23f58765a4d59244ebd8037da7b6a6b2fb58446.1620765074.git.daniel@iogearbox.net
---
 init/Kconfig       | 41 +---------------------------
 kernel/bpf/Kconfig | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/Kconfig        | 27 -------------------
 3 files changed, 79 insertions(+), 67 deletions(-)
 create mode 100644 kernel/bpf/Kconfig

diff --git a/init/Kconfig b/init/Kconfig
index ca559ccdaa32..2282a6842dc6 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -439,6 +439,7 @@ config AUDITSYSCALL
 
 source "kernel/irq/Kconfig"
 source "kernel/time/Kconfig"
+source "kernel/bpf/Kconfig"
 source "kernel/Kconfig.preempt"
 
 menu "CPU/Task time and stats accounting"
@@ -1705,46 +1706,6 @@ config KALLSYMS_BASE_RELATIVE
 
 # syscall, maps, verifier
 
-config BPF_LSM
-	bool "LSM Instrumentation with BPF"
-	depends on BPF_EVENTS
-	depends on BPF_SYSCALL
-	depends on SECURITY
-	depends on BPF_JIT
-	help
-	  Enables instrumentation of the security hooks with eBPF programs for
-	  implementing dynamic MAC and Audit Policies.
-
-	  If you are unsure how to answer this question, answer N.
-
-config BPF_SYSCALL
-	bool "Enable bpf() system call"
-	select BPF
-	select IRQ_WORK
-	select TASKS_TRACE_RCU
-	select BINARY_PRINTF
-	select NET_SOCK_MSG if INET
-	default n
-	help
-	  Enable the bpf() system call that allows to manipulate eBPF
-	  programs and maps via file descriptors.
-
-config ARCH_WANT_DEFAULT_BPF_JIT
-	bool
-
-config BPF_JIT_ALWAYS_ON
-	bool "Permanently enable BPF JIT and remove BPF interpreter"
-	depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
-	help
-	  Enables BPF JIT and removes BPF interpreter to avoid
-	  speculative execution of BPF instructions by the interpreter
-
-config BPF_JIT_DEFAULT_ON
-	def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON
-	depends on HAVE_EBPF_JIT && BPF_JIT
-
-source "kernel/bpf/preload/Kconfig"
-
 config USERFAULTFD
 	bool "Enable userfaultfd() system call"
 	depends on MMU
diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
new file mode 100644
index 000000000000..b4edaefc6255
--- /dev/null
+++ b/kernel/bpf/Kconfig
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+# BPF interpreter that, for example, classic socket filters depend on.
+config BPF
+	bool
+
+# Used by archs to tell that they support BPF JIT compiler plus which
+# flavour. Only one of the two can be selected for a specific arch since
+# eBPF JIT supersedes the cBPF JIT.
+
+# Classic BPF JIT (cBPF)
+config HAVE_CBPF_JIT
+	bool
+
+# Extended BPF JIT (eBPF)
+config HAVE_EBPF_JIT
+	bool
+
+# Used by archs to tell that they want the BPF JIT compiler enabled by
+# default for kernels that were compiled with BPF JIT support.
+config ARCH_WANT_DEFAULT_BPF_JIT
+	bool
+
+menu "BPF subsystem"
+
+config BPF_SYSCALL
+	bool "Enable bpf() system call"
+	select BPF
+	select IRQ_WORK
+	select TASKS_TRACE_RCU
+	select BINARY_PRINTF
+	select NET_SOCK_MSG if INET
+	default n
+	help
+	  Enable the bpf() system call that allows to manipulate BPF programs
+	  and maps via file descriptors.
+
+config BPF_JIT
+	bool "Enable BPF Just In Time compiler"
+	depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT
+	depends on MODULES
+	help
+	  BPF programs are normally handled by a BPF interpreter. This option
+	  allows the kernel to generate native code when a program is loaded
+	  into the kernel. This will significantly speed-up processing of BPF
+	  programs.
+
+	  Note, an admin should enable this feature changing:
+	  /proc/sys/net/core/bpf_jit_enable
+	  /proc/sys/net/core/bpf_jit_harden   (optional)
+	  /proc/sys/net/core/bpf_jit_kallsyms (optional)
+
+config BPF_JIT_ALWAYS_ON
+	bool "Permanently enable BPF JIT and remove BPF interpreter"
+	depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
+	help
+	  Enables BPF JIT and removes BPF interpreter to avoid speculative
+	  execution of BPF instructions by the interpreter.
+
+config BPF_JIT_DEFAULT_ON
+	def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON
+	depends on HAVE_EBPF_JIT && BPF_JIT
+
+source "kernel/bpf/preload/Kconfig"
+
+config BPF_LSM
+	bool "Enable BPF LSM Instrumentation"
+	depends on BPF_EVENTS
+	depends on BPF_SYSCALL
+	depends on SECURITY
+	depends on BPF_JIT
+	help
+	  Enables instrumentation of the security hooks with BPF programs for
+	  implementing dynamic MAC and Audit Policies.
+
+	  If you are unsure how to answer this question, answer N.
+
+endmenu # "BPF subsystem"
diff --git a/net/Kconfig b/net/Kconfig
index f5ee7c65e6b4..c7392c449b25 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -302,21 +302,6 @@ config BQL
 	select DQL
 	default y
 
-config BPF_JIT
-	bool "enable BPF Just In Time compiler"
-	depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT
-	depends on MODULES
-	help
-	  Berkeley Packet Filter filtering capabilities are normally handled
-	  by an interpreter. This option allows kernel to generate a native
-	  code when filter is loaded in memory. This should speedup
-	  packet sniffing (libpcap/tcpdump).
-
-	  Note, admin should enable this feature changing:
-	  /proc/sys/net/core/bpf_jit_enable
-	  /proc/sys/net/core/bpf_jit_harden   (optional)
-	  /proc/sys/net/core/bpf_jit_kallsyms (optional)
-
 config BPF_STREAM_PARSER
 	bool "enable BPF STREAM_PARSER"
 	depends on INET
@@ -470,15 +455,3 @@ config ETHTOOL_NETLINK
 	  e.g. notification messages.
 
 endif   # if NET
-
-# Used by archs to tell that they support BPF JIT compiler plus which flavour.
-# Only one of the two can be selected for a specific arch since eBPF JIT supersedes
-# the cBPF JIT.
-
-# Classic BPF JIT (cBPF)
-config HAVE_CBPF_JIT
-	bool
-
-# Extended BPF JIT (eBPF)
-config HAVE_EBPF_JIT
-	bool
-- 
cgit v1.2.3


From 08389d888287c3823f80b0216766b71e17f0aba5 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 11 May 2021 22:35:17 +0200
Subject: bpf: Add kconfig knob for disabling unpriv bpf by default

Add a kconfig knob which allows for unprivileged bpf to be disabled by default.
If set, the knob sets /proc/sys/kernel/unprivileged_bpf_disabled to value of 2.

This still allows a transition of 2 -> {0,1} through an admin. Similarly,
this also still keeps 1 -> {1} behavior intact, so that once set to permanently
disabled, it cannot be undone aside from a reboot.

We've also added extra2 with max of 2 for the procfs handler, so that an admin
still has a chance to toggle between 0 <-> 2.

Either way, as an additional alternative, applications can make use of CAP_BPF
that we added a while ago.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/74ec548079189e4e4dffaeb42b8987bb3c852eee.1620765074.git.daniel@iogearbox.net
---
 Documentation/admin-guide/sysctl/kernel.rst | 17 ++++++++++++++---
 kernel/bpf/Kconfig                          | 10 ++++++++++
 kernel/bpf/syscall.c                        |  3 ++-
 kernel/sysctl.c                             | 29 ++++++++++++++++++++++++-----
 4 files changed, 50 insertions(+), 9 deletions(-)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 1d56a6b73a4e..24ab20d7a50a 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -1457,11 +1457,22 @@ unprivileged_bpf_disabled
 =========================
 
 Writing 1 to this entry will disable unprivileged calls to ``bpf()``;
-once disabled, calling ``bpf()`` without ``CAP_SYS_ADMIN`` will return
-``-EPERM``.
+once disabled, calling ``bpf()`` without ``CAP_SYS_ADMIN`` or ``CAP_BPF``
+will return ``-EPERM``. Once set to 1, this can't be cleared from the
+running kernel anymore.
 
-Once set, this can't be cleared.
+Writing 2 to this entry will also disable unprivileged calls to ``bpf()``,
+however, an admin can still change this setting later on, if needed, by
+writing 0 or 1 to this entry.
 
+If ``BPF_UNPRIV_DEFAULT_OFF`` is enabled in the kernel config, then this
+entry will default to 2 instead of 0.
+
+= =============================================================
+0 Unprivileged calls to ``bpf()`` are enabled
+1 Unprivileged calls to ``bpf()`` are disabled without recovery
+2 Unprivileged calls to ``bpf()`` are disabled
+= =============================================================
 
 watchdog
 ========
diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
index b4edaefc6255..26b591e23f16 100644
--- a/kernel/bpf/Kconfig
+++ b/kernel/bpf/Kconfig
@@ -61,6 +61,16 @@ config BPF_JIT_DEFAULT_ON
 	def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON
 	depends on HAVE_EBPF_JIT && BPF_JIT
 
+config BPF_UNPRIV_DEFAULT_OFF
+	bool "Disable unprivileged BPF by default"
+	depends on BPF_SYSCALL
+	help
+	  Disables unprivileged BPF by default by setting the corresponding
+	  /proc/sys/kernel/unprivileged_bpf_disabled knob to 2. An admin can
+	  still reenable it by setting it to 0 later on, or permanently
+	  disable it by setting it to 1 (from which no other transition to
+	  0 is possible anymore).
+
 source "kernel/bpf/preload/Kconfig"
 
 config BPF_LSM
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 941ca06d9dfa..ea04b0deb5ce 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -50,7 +50,8 @@ static DEFINE_SPINLOCK(map_idr_lock);
 static DEFINE_IDR(link_idr);
 static DEFINE_SPINLOCK(link_idr_lock);
 
-int sysctl_unprivileged_bpf_disabled __read_mostly;
+int sysctl_unprivileged_bpf_disabled __read_mostly =
+	IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0;
 
 static const struct bpf_map_ops * const bpf_map_types[] = {
 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index f91d327273c1..6df7c81f7cdd 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -225,7 +225,27 @@ static int bpf_stats_handler(struct ctl_table *table, int write,
 	mutex_unlock(&bpf_stats_enabled_mutex);
 	return ret;
 }
-#endif
+
+static int bpf_unpriv_handler(struct ctl_table *table, int write,
+			      void *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret, unpriv_enable = *(int *)table->data;
+	bool locked_state = unpriv_enable == 1;
+	struct ctl_table tmp = *table;
+
+	if (write && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	tmp.data = &unpriv_enable;
+	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+	if (write && !ret) {
+		if (locked_state && unpriv_enable != 1)
+			return -EPERM;
+		*(int *)table->data = unpriv_enable;
+	}
+	return ret;
+}
+#endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */
 
 /*
  * /proc/sys support
@@ -2600,10 +2620,9 @@ static struct ctl_table kern_table[] = {
 		.data		= &sysctl_unprivileged_bpf_disabled,
 		.maxlen		= sizeof(sysctl_unprivileged_bpf_disabled),
 		.mode		= 0644,
-		/* only handle a transition from default "0" to "1" */
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ONE,
-		.extra2		= SYSCTL_ONE,
+		.proc_handler	= bpf_unpriv_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &two,
 	},
 	{
 		.procname	= "bpf_stats_enabled",
-- 
cgit v1.2.3


From 35e3815fa8102fab4dee75f3547472c66581125d Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 29 Apr 2021 13:47:12 +0200
Subject: bpf: Add deny list of btf ids check for tracing programs

The recursion check in __bpf_prog_enter and __bpf_prog_exit
leaves some (not inlined) functions unprotected:

In __bpf_prog_enter:
  - migrate_disable is called before prog->active is checked

In __bpf_prog_exit:
  - migrate_enable,rcu_read_unlock_strict are called after
    prog->active is decreased

When attaching trampoline to them we get panic like:

  traps: PANIC: double fault, error_code: 0x0
  double fault: 0000 [#1] SMP PTI
  RIP: 0010:__bpf_prog_enter+0x4/0x50
  ...
  Call Trace:
   <IRQ>
   bpf_trampoline_6442466513_0+0x18/0x1000
   migrate_disable+0x5/0x50
   __bpf_prog_enter+0x9/0x50
   bpf_trampoline_6442466513_0+0x18/0x1000
   migrate_disable+0x5/0x50
   __bpf_prog_enter+0x9/0x50
   bpf_trampoline_6442466513_0+0x18/0x1000
   migrate_disable+0x5/0x50
   __bpf_prog_enter+0x9/0x50
   bpf_trampoline_6442466513_0+0x18/0x1000
   migrate_disable+0x5/0x50
   ...

Fixing this by adding deny list of btf ids for tracing
programs and checking btf id during program verification.
Adding above functions to this list.

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210429114712.43783-1-jolsa@kernel.org
---
 kernel/bpf/verifier.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9352a1b7de2d..c58598ef4b5b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -13196,6 +13196,17 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
 	return 0;
 }
 
+BTF_SET_START(btf_id_deny)
+BTF_ID_UNUSED
+#ifdef CONFIG_SMP
+BTF_ID(func, migrate_disable)
+BTF_ID(func, migrate_enable)
+#endif
+#if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
+BTF_ID(func, rcu_read_unlock_strict)
+#endif
+BTF_SET_END(btf_id_deny)
+
 static int check_attach_btf_id(struct bpf_verifier_env *env)
 {
 	struct bpf_prog *prog = env->prog;
@@ -13255,6 +13266,9 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
 		ret = bpf_lsm_verify_prog(&env->log, prog);
 		if (ret < 0)
 			return ret;
+	} else if (prog->type == BPF_PROG_TYPE_TRACING &&
+		   btf_id_set_contains(&btf_id_deny, btf_id)) {
+		return -EINVAL;
 	}
 
 	key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
-- 
cgit v1.2.3


From e2d5b2bb769fa5f500760caba76436ba3a10a895 Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Tue, 11 May 2021 10:10:54 +0200
Subject: bpf: Fix nested bpf_bprintf_prepare with more per-cpu buffers

The bpf_seq_printf, bpf_trace_printk and bpf_snprintf helpers share one
per-cpu buffer that they use to store temporary data (arguments to
bprintf). They "get" that buffer with try_get_fmt_tmp_buf and "put" it
by the end of their scope with bpf_bprintf_cleanup.

If one of these helpers gets called within the scope of one of these
helpers, for example: a first bpf program gets called, uses
bpf_trace_printk which calls raw_spin_lock_irqsave which is traced by
another bpf program that calls bpf_snprintf, then the second "get"
fails. Essentially, these helpers are not re-entrant. They would return
-EBUSY and print a warning message once.

This patch triples the number of bprintf buffers to allow three levels
of nesting. This is very similar to what was done for tracepoints in
"9594dc3c7e7 bpf: fix nested bpf tracepoints with per-cpu data"

Fixes: d9c9e4db186a ("bpf: Factorize bpf_trace_printk and bpf_seq_printf")
Reported-by: syzbot+63122d0bc347f18c1884@syzkaller.appspotmail.com
Signed-off-by: Florent Revest <revest@chromium.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210511081054.2125874-1-revest@chromium.org
---
 kernel/bpf/helpers.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 544773970dbc..ef658a9ea5c9 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -696,34 +696,35 @@ static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
  */
 #define MAX_PRINTF_BUF_LEN	512
 
-struct bpf_printf_buf {
-	char tmp_buf[MAX_PRINTF_BUF_LEN];
+/* Support executing three nested bprintf helper calls on a given CPU */
+struct bpf_bprintf_buffers {
+	char tmp_bufs[3][MAX_PRINTF_BUF_LEN];
 };
-static DEFINE_PER_CPU(struct bpf_printf_buf, bpf_printf_buf);
-static DEFINE_PER_CPU(int, bpf_printf_buf_used);
+static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs);
+static DEFINE_PER_CPU(int, bpf_bprintf_nest_level);
 
 static int try_get_fmt_tmp_buf(char **tmp_buf)
 {
-	struct bpf_printf_buf *bufs;
-	int used;
+	struct bpf_bprintf_buffers *bufs;
+	int nest_level;
 
 	preempt_disable();
-	used = this_cpu_inc_return(bpf_printf_buf_used);
-	if (WARN_ON_ONCE(used > 1)) {
-		this_cpu_dec(bpf_printf_buf_used);
+	nest_level = this_cpu_inc_return(bpf_bprintf_nest_level);
+	if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bufs->tmp_bufs))) {
+		this_cpu_dec(bpf_bprintf_nest_level);
 		preempt_enable();
 		return -EBUSY;
 	}
-	bufs = this_cpu_ptr(&bpf_printf_buf);
-	*tmp_buf = bufs->tmp_buf;
+	bufs = this_cpu_ptr(&bpf_bprintf_bufs);
+	*tmp_buf = bufs->tmp_bufs[nest_level - 1];
 
 	return 0;
 }
 
 void bpf_bprintf_cleanup(void)
 {
-	if (this_cpu_read(bpf_printf_buf_used)) {
-		this_cpu_dec(bpf_printf_buf_used);
+	if (this_cpu_read(bpf_bprintf_nest_level)) {
+		this_cpu_dec(bpf_bprintf_nest_level);
 		preempt_enable();
 	}
 }
-- 
cgit v1.2.3


From 67e7ec0bd4535fc6e6d3f5d174f80e10a8a80c6e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@kernel.org>
Date: Sat, 8 May 2021 12:22:12 -0300
Subject: libbpf: Provide GELF_ST_VISIBILITY() define for older libelf

Where that macro isn't available.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/YJaspEh0qZr4LYOc@kernel.org
---
 tools/lib/bpf/libbpf_internal.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index ee426226928f..acbcf6c7bdf8 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -41,6 +41,11 @@
 #define ELF_C_READ_MMAP ELF_C_READ
 #endif
 
+/* Older libelf all end up in this expression, for both 32 and 64 bit */
+#ifndef GELF_ST_VISIBILITY
+#define GELF_ST_VISIBILITY(o) ((o) & 0x03)
+#endif
+
 #define BTF_INFO_ENC(kind, kind_flag, vlen) \
 	((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
 #define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type)
-- 
cgit v1.2.3


From 096eccdef0b32f47e9354231ddc3aaaf9527d51c Mon Sep 17 00:00:00 2001
From: Jussi Maki <joamaki@gmail.com>
Date: Wed, 5 May 2021 08:59:25 +0000
Subject: selftests/bpf: Rewrite test_tc_redirect.sh as
 prog_tests/tc_redirect.c

As discussed in [0], this ports test_tc_redirect.sh to the test_progs
framework and removes the old test.

This makes it more in line with rest of the tests and makes it possible
to run this test case with vmtest.sh and under the bpf CI.

The upcoming skb_change_head() helper fix in [0] is depending on it and
extending the test case to redirect a packet from L3 device to veth.

  [0] https://lore.kernel.org/bpf/20210427135550.807355-1-joamaki@gmail.com

Signed-off-by: Jussi Maki <joamaki@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210505085925.783985-1-joamaki@gmail.com
---
 tools/testing/selftests/bpf/network_helpers.c      |   2 +-
 tools/testing/selftests/bpf/network_helpers.h      |   1 +
 .../testing/selftests/bpf/prog_tests/tc_redirect.c | 589 +++++++++++++++++++++
 tools/testing/selftests/bpf/progs/test_tc_neigh.c  |  33 +-
 .../selftests/bpf/progs/test_tc_neigh_fib.c        |   9 +-
 tools/testing/selftests/bpf/progs/test_tc_peer.c   |  33 +-
 tools/testing/selftests/bpf/test_tc_redirect.sh    | 216 --------
 7 files changed, 617 insertions(+), 266 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/tc_redirect.c
 delete mode 100755 tools/testing/selftests/bpf/test_tc_redirect.sh

diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 12ee40284da0..2060bc122c53 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -40,7 +40,7 @@ struct ipv6_packet pkt_v6 = {
 	.tcp.doff = 5,
 };
 
-static int settimeo(int fd, int timeout_ms)
+int settimeo(int fd, int timeout_ms)
 {
 	struct timeval timeout = { .tv_sec = 3 };
 
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index 7205f8afdba1..5e0d51c07b63 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -33,6 +33,7 @@ struct ipv6_packet {
 } __packed;
 extern struct ipv6_packet pkt_v6;
 
+int settimeo(int fd, int timeout_ms);
 int start_server(int family, int type, const char *addr, __u16 port,
 		 int timeout_ms);
 int connect_to_fd(int server_fd, int timeout_ms);
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
new file mode 100644
index 000000000000..95ef9fcd31d8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -0,0 +1,589 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
+ * between src and dst. The netns fwd has veth links to each src and dst. The
+ * client is in src and server in dst. The test installs a TC BPF program to each
+ * host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
+ * neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
+ * switch from ingress side; it also installs a checker prog on the egress side
+ * to drop unexpected traffic.
+ */
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <linux/limits.h>
+#include <linux/sysctl.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "test_tc_neigh_fib.skel.h"
+#include "test_tc_neigh.skel.h"
+#include "test_tc_peer.skel.h"
+
+#define NS_SRC "ns_src"
+#define NS_FWD "ns_fwd"
+#define NS_DST "ns_dst"
+
+#define IP4_SRC "172.16.1.100"
+#define IP4_DST "172.16.2.100"
+#define IP4_PORT 9004
+
+#define IP6_SRC "::1:dead:beef:cafe"
+#define IP6_DST "::2:dead:beef:cafe"
+#define IP6_PORT 9006
+
+#define IP4_SLL "169.254.0.1"
+#define IP4_DLL "169.254.0.2"
+#define IP4_NET "169.254.0.0"
+
+#define IFADDR_STR_LEN 18
+#define PING_ARGS "-c 3 -w 10 -q"
+
+#define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src"
+#define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst"
+#define CHK_PROG_PIN_FILE "/sys/fs/bpf/test_tc_chk"
+
+#define TIMEOUT_MILLIS 10000
+
+#define MAX_PROC_MODS 128
+#define MAX_PROC_VALUE_LEN 16
+
+#define log_err(MSG, ...) \
+	fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
+		__FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
+
+struct proc_mod {
+	char path[PATH_MAX];
+	char oldval[MAX_PROC_VALUE_LEN];
+	int oldlen;
+};
+
+static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
+static int root_netns_fd = -1;
+static int num_proc_mods;
+static struct proc_mod proc_mods[MAX_PROC_MODS];
+
+/**
+ * modify_proc() - Modify entry in /proc
+ *
+ * Modifies an entry in /proc and saves the original value for later
+ * restoration with restore_proc().
+ */
+static int modify_proc(const char *path, const char *newval)
+{
+	struct proc_mod *mod;
+	FILE *f;
+
+	if (num_proc_mods + 1 > MAX_PROC_MODS)
+		return -1;
+
+	f = fopen(path, "r+");
+	if (!f)
+		return -1;
+
+	mod = &proc_mods[num_proc_mods];
+	num_proc_mods++;
+
+	strncpy(mod->path, path, PATH_MAX);
+
+	if (!fread(mod->oldval, 1, MAX_PROC_VALUE_LEN, f)) {
+		log_err("reading from %s failed", path);
+		goto fail;
+	}
+	rewind(f);
+	if (fwrite(newval, strlen(newval), 1, f) != 1) {
+		log_err("writing to %s failed", path);
+		goto fail;
+	}
+
+	fclose(f);
+	return 0;
+
+fail:
+	fclose(f);
+	num_proc_mods--;
+	return -1;
+}
+
+/**
+ * restore_proc() - Restore all /proc modifications
+ */
+static void restore_proc(void)
+{
+	int i;
+
+	for (i = 0; i < num_proc_mods; i++) {
+		struct proc_mod *mod = &proc_mods[i];
+		FILE *f;
+
+		f = fopen(mod->path, "w");
+		if (!f) {
+			log_err("fopen of %s failed", mod->path);
+			continue;
+		}
+
+		if (fwrite(mod->oldval, mod->oldlen, 1, f) != 1)
+			log_err("fwrite to %s failed", mod->path);
+
+		fclose(f);
+	}
+	num_proc_mods = 0;
+}
+
+/**
+ * setns_by_name() - Set networks namespace by name
+ */
+static int setns_by_name(const char *name)
+{
+	int nsfd;
+	char nspath[PATH_MAX];
+	int err;
+
+	snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
+	nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
+	if (nsfd < 0)
+		return nsfd;
+
+	err = setns(nsfd, CLONE_NEWNET);
+	close(nsfd);
+
+	return err;
+}
+
+/**
+ * setns_root() - Set network namespace to original (root) namespace
+ *
+ * Not expected to ever fail, so error not returned, but failure logged
+ * and test marked as failed.
+ */
+static void setns_root(void)
+{
+	ASSERT_OK(setns(root_netns_fd, CLONE_NEWNET), "setns root");
+}
+
+static int netns_setup_namespaces(const char *verb)
+{
+	const char * const *ns = namespaces;
+	char cmd[128];
+
+	while (*ns) {
+		snprintf(cmd, sizeof(cmd), "ip netns %s %s", verb, *ns);
+		if (!ASSERT_OK(system(cmd), cmd))
+			return -1;
+		ns++;
+	}
+	return 0;
+}
+
+struct netns_setup_result {
+	int ifindex_veth_src_fwd;
+	int ifindex_veth_dst_fwd;
+};
+
+static int get_ifaddr(const char *name, char *ifaddr)
+{
+	char path[PATH_MAX];
+	FILE *f;
+	int ret;
+
+	snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name);
+	f = fopen(path, "r");
+	if (!ASSERT_OK_PTR(f, path))
+		return -1;
+
+	ret = fread(ifaddr, 1, IFADDR_STR_LEN, f);
+	if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) {
+		fclose(f);
+		return -1;
+	}
+	fclose(f);
+	return 0;
+}
+
+static int get_ifindex(const char *name)
+{
+	char path[PATH_MAX];
+	char buf[32];
+	FILE *f;
+	int ret;
+
+	snprintf(path, PATH_MAX, "/sys/class/net/%s/ifindex", name);
+	f = fopen(path, "r");
+	if (!ASSERT_OK_PTR(f, path))
+		return -1;
+
+	ret = fread(buf, 1, sizeof(buf), f);
+	if (!ASSERT_GT(ret, 0, "fread ifindex")) {
+		fclose(f);
+		return -1;
+	}
+	fclose(f);
+	return atoi(buf);
+}
+
+#define SYS(fmt, ...)						\
+	({							\
+		char cmd[1024];					\
+		snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__);	\
+		if (!ASSERT_OK(system(cmd), cmd))		\
+			goto fail;				\
+	})
+
+static int netns_setup_links_and_routes(struct netns_setup_result *result)
+{
+	char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
+	char veth_dst_fwd_addr[IFADDR_STR_LEN+1] = {};
+
+	SYS("ip link add veth_src type veth peer name veth_src_fwd");
+	SYS("ip link add veth_dst type veth peer name veth_dst_fwd");
+	if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
+		goto fail;
+	if (get_ifaddr("veth_dst_fwd", veth_dst_fwd_addr))
+		goto fail;
+
+	result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd");
+	if (result->ifindex_veth_src_fwd < 0)
+		goto fail;
+	result->ifindex_veth_dst_fwd = get_ifindex("veth_dst_fwd");
+	if (result->ifindex_veth_dst_fwd < 0)
+		goto fail;
+
+	SYS("ip link set veth_src netns " NS_SRC);
+	SYS("ip link set veth_src_fwd netns " NS_FWD);
+	SYS("ip link set veth_dst_fwd netns " NS_FWD);
+	SYS("ip link set veth_dst netns " NS_DST);
+
+	/** setup in 'src' namespace */
+	if (!ASSERT_OK(setns_by_name(NS_SRC), "setns src"))
+		goto fail;
+
+	SYS("ip addr add " IP4_SRC "/32 dev veth_src");
+	SYS("ip addr add " IP6_SRC "/128 dev veth_src nodad");
+	SYS("ip link set dev veth_src up");
+
+	SYS("ip route add " IP4_DST "/32 dev veth_src scope global");
+	SYS("ip route add " IP4_NET "/16 dev veth_src scope global");
+	SYS("ip route add " IP6_DST "/128 dev veth_src scope global");
+
+	SYS("ip neigh add " IP4_DST " dev veth_src lladdr %s",
+	    veth_src_fwd_addr);
+	SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s",
+	    veth_src_fwd_addr);
+
+	/** setup in 'fwd' namespace */
+	if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
+		goto fail;
+
+	/* The fwd netns automatically gets a v6 LL address / routes, but also
+	 * needs v4 one in order to start ARP probing. IP4_NET route is added
+	 * to the endpoints so that the ARP processing will reply.
+	 */
+	SYS("ip addr add " IP4_SLL "/32 dev veth_src_fwd");
+	SYS("ip addr add " IP4_DLL "/32 dev veth_dst_fwd");
+	SYS("ip link set dev veth_src_fwd up");
+	SYS("ip link set dev veth_dst_fwd up");
+
+	SYS("ip route add " IP4_SRC "/32 dev veth_src_fwd scope global");
+	SYS("ip route add " IP6_SRC "/128 dev veth_src_fwd scope global");
+	SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
+	SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
+
+	/** setup in 'dst' namespace */
+	if (!ASSERT_OK(setns_by_name(NS_DST), "setns dst"))
+		goto fail;
+
+	SYS("ip addr add " IP4_DST "/32 dev veth_dst");
+	SYS("ip addr add " IP6_DST "/128 dev veth_dst nodad");
+	SYS("ip link set dev veth_dst up");
+
+	SYS("ip route add " IP4_SRC "/32 dev veth_dst scope global");
+	SYS("ip route add " IP4_NET "/16 dev veth_dst scope global");
+	SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global");
+
+	SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr %s",
+	    veth_dst_fwd_addr);
+	SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr %s",
+	    veth_dst_fwd_addr);
+
+	setns_root();
+	return 0;
+fail:
+	setns_root();
+	return -1;
+}
+
+static int netns_load_bpf(void)
+{
+	if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
+		return -1;
+
+	SYS("tc qdisc add dev veth_src_fwd clsact");
+	SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned "
+	    SRC_PROG_PIN_FILE);
+	SYS("tc filter add dev veth_src_fwd egress bpf da object-pinned "
+	    CHK_PROG_PIN_FILE);
+
+	SYS("tc qdisc add dev veth_dst_fwd clsact");
+	SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
+	    DST_PROG_PIN_FILE);
+	SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
+	    CHK_PROG_PIN_FILE);
+
+	setns_root();
+	return -1;
+fail:
+	setns_root();
+	return -1;
+}
+
+static int netns_unload_bpf(void)
+{
+	if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
+		goto fail;
+	SYS("tc qdisc delete dev veth_src_fwd clsact");
+	SYS("tc qdisc delete dev veth_dst_fwd clsact");
+
+	setns_root();
+	return 0;
+fail:
+	setns_root();
+	return -1;
+}
+
+
+static void test_tcp(int family, const char *addr, __u16 port)
+{
+	int listen_fd = -1, accept_fd = -1, client_fd = -1;
+	char buf[] = "testing testing";
+	int n;
+
+	if (!ASSERT_OK(setns_by_name(NS_DST), "setns dst"))
+		return;
+
+	listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
+	if (!ASSERT_GE(listen_fd, 0, "listen"))
+		goto done;
+
+	if (!ASSERT_OK(setns_by_name(NS_SRC), "setns src"))
+		goto done;
+
+	client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
+	if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
+		goto done;
+
+	accept_fd = accept(listen_fd, NULL, NULL);
+	if (!ASSERT_GE(accept_fd, 0, "accept"))
+		goto done;
+
+	if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo"))
+		goto done;
+
+	n = write(client_fd, buf, sizeof(buf));
+	if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
+		goto done;
+
+	n = read(accept_fd, buf, sizeof(buf));
+	ASSERT_EQ(n, sizeof(buf), "recv from server");
+
+done:
+	setns_root();
+	if (listen_fd >= 0)
+		close(listen_fd);
+	if (accept_fd >= 0)
+		close(accept_fd);
+	if (client_fd >= 0)
+		close(client_fd);
+}
+
+static int test_ping(int family, const char *addr)
+{
+	const char *ping = family == AF_INET6 ? "ping6" : "ping";
+
+	SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s", ping, addr);
+	return 0;
+fail:
+	return -1;
+}
+
+static void test_connectivity(void)
+{
+	test_tcp(AF_INET, IP4_DST, IP4_PORT);
+	test_ping(AF_INET, IP4_DST);
+	test_tcp(AF_INET6, IP6_DST, IP6_PORT);
+	test_ping(AF_INET6, IP6_DST);
+}
+
+static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
+{
+	struct test_tc_neigh_fib *skel;
+	int err;
+
+	skel = test_tc_neigh_fib__open();
+	if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
+		return;
+
+	if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load")) {
+		test_tc_neigh_fib__destroy(skel);
+		return;
+	}
+
+	err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
+	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
+		goto done;
+
+	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
+	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
+		goto done;
+
+	err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
+	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
+		goto done;
+
+	if (netns_load_bpf())
+		goto done;
+
+	/* bpf_fib_lookup() checks if forwarding is enabled */
+	if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
+		goto done;
+
+	err = modify_proc("/proc/sys/net/ipv4/ip_forward", "1");
+	if (!ASSERT_OK(err, "set ipv4.ip_forward"))
+		goto done;
+
+	err = modify_proc("/proc/sys/net/ipv6/conf/all/forwarding", "1");
+	if (!ASSERT_OK(err, "set ipv6.forwarding"))
+		goto done;
+	setns_root();
+
+	test_connectivity();
+done:
+	bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
+	bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
+	bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
+	test_tc_neigh_fib__destroy(skel);
+	netns_unload_bpf();
+	setns_root();
+	restore_proc();
+}
+
+static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
+{
+	struct test_tc_neigh *skel;
+	int err;
+
+	skel = test_tc_neigh__open();
+	if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
+		return;
+
+	skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
+	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+
+	err = test_tc_neigh__load(skel);
+	if (!ASSERT_OK(err, "test_tc_neigh__load")) {
+		test_tc_neigh__destroy(skel);
+		return;
+	}
+
+	err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
+	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
+		goto done;
+
+	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
+	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
+		goto done;
+
+	err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
+	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
+		goto done;
+
+	if (netns_load_bpf())
+		goto done;
+
+	test_connectivity();
+
+done:
+	bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
+	bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
+	bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
+	test_tc_neigh__destroy(skel);
+	netns_unload_bpf();
+	setns_root();
+}
+
+static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
+{
+	struct test_tc_peer *skel;
+	int err;
+
+	skel = test_tc_peer__open();
+	if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
+		return;
+
+	skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
+	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+
+	err = test_tc_peer__load(skel);
+	if (!ASSERT_OK(err, "test_tc_peer__load")) {
+		test_tc_peer__destroy(skel);
+		return;
+	}
+
+	err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
+	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
+		goto done;
+
+	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
+	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
+		goto done;
+
+	err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
+	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
+		goto done;
+
+	if (netns_load_bpf())
+		goto done;
+
+	test_connectivity();
+
+done:
+	bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
+	bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
+	bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
+	test_tc_peer__destroy(skel);
+	netns_unload_bpf();
+	setns_root();
+}
+
+void test_tc_redirect(void)
+{
+	struct netns_setup_result setup_result;
+
+	root_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+	if (!ASSERT_GE(root_netns_fd, 0, "open /proc/self/ns/net"))
+		return;
+
+	if (netns_setup_namespaces("add"))
+		goto done;
+
+	if (netns_setup_links_and_routes(&setup_result))
+		goto done;
+
+	if (test__start_subtest("tc_redirect_peer"))
+		test_tc_redirect_peer(&setup_result);
+
+	if (test__start_subtest("tc_redirect_neigh"))
+		test_tc_redirect_neigh(&setup_result);
+
+	if (test__start_subtest("tc_redirect_neigh_fib"))
+		test_tc_redirect_neigh_fib(&setup_result);
+
+done:
+	close(root_netns_fd);
+	netns_setup_namespaces("delete");
+}
diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
index b985ac4e7a81..90f64a85998f 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_neigh.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
@@ -33,17 +33,8 @@
 				 a.s6_addr32[3] == b.s6_addr32[3])
 #endif
 
-enum {
-	dev_src,
-	dev_dst,
-};
-
-struct bpf_map_def SEC("maps") ifindex_map = {
-	.type		= BPF_MAP_TYPE_ARRAY,
-	.key_size	= sizeof(int),
-	.value_size	= sizeof(int),
-	.max_entries	= 2,
-};
+static volatile const __u32 IFINDEX_SRC;
+static volatile const __u32 IFINDEX_DST;
 
 static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb,
 					    __be32 addr)
@@ -79,14 +70,8 @@ static __always_inline bool is_remote_ep_v6(struct __sk_buff *skb,
 	return v6_equal(ip6h->daddr, addr);
 }
 
-static __always_inline int get_dev_ifindex(int which)
-{
-	int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which);
-
-	return ifindex ? *ifindex : 0;
-}
-
-SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
+SEC("classifier/chk_egress")
+int tc_chk(struct __sk_buff *skb)
 {
 	void *data_end = ctx_ptr(skb->data_end);
 	void *data = ctx_ptr(skb->data);
@@ -98,7 +83,8 @@ SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
 	return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK;
 }
 
-SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
+SEC("classifier/dst_ingress")
+int tc_dst(struct __sk_buff *skb)
 {
 	__u8 zero[ETH_ALEN * 2];
 	bool redirect = false;
@@ -119,10 +105,11 @@ SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
 	if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
 		return TC_ACT_SHOT;
 
-	return bpf_redirect_neigh(get_dev_ifindex(dev_src), NULL, 0, 0);
+	return bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0);
 }
 
-SEC("src_ingress") int tc_src(struct __sk_buff *skb)
+SEC("classifier/src_ingress")
+int tc_src(struct __sk_buff *skb)
 {
 	__u8 zero[ETH_ALEN * 2];
 	bool redirect = false;
@@ -143,7 +130,7 @@ SEC("src_ingress") int tc_src(struct __sk_buff *skb)
 	if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
 		return TC_ACT_SHOT;
 
-	return bpf_redirect_neigh(get_dev_ifindex(dev_dst), NULL, 0, 0);
+	return bpf_redirect_neigh(IFINDEX_DST, NULL, 0, 0);
 }
 
 char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c
index d82ed3457030..f7ab69cf018e 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c
@@ -75,7 +75,8 @@ static __always_inline int fill_fib_params_v6(struct __sk_buff *skb,
 	return 0;
 }
 
-SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
+SEC("classifier/chk_egress")
+int tc_chk(struct __sk_buff *skb)
 {
 	void *data_end = ctx_ptr(skb->data_end);
 	void *data = ctx_ptr(skb->data);
@@ -142,12 +143,14 @@ static __always_inline int tc_redir(struct __sk_buff *skb)
 /* these are identical, but keep them separate for compatibility with the
  * section names expected by test_tc_redirect.sh
  */
-SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
+SEC("classifier/dst_ingress")
+int tc_dst(struct __sk_buff *skb)
 {
 	return tc_redir(skb);
 }
 
-SEC("src_ingress") int tc_src(struct __sk_buff *skb)
+SEC("classifier/src_ingress")
+int tc_src(struct __sk_buff *skb)
 {
 	return tc_redir(skb);
 }
diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c
index fc84a7685aa2..72c72950c3bb 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_peer.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c
@@ -8,38 +8,25 @@
 
 #include <bpf/bpf_helpers.h>
 
-enum {
-	dev_src,
-	dev_dst,
-};
+static volatile const __u32 IFINDEX_SRC;
+static volatile const __u32 IFINDEX_DST;
 
-struct bpf_map_def SEC("maps") ifindex_map = {
-	.type		= BPF_MAP_TYPE_ARRAY,
-	.key_size	= sizeof(int),
-	.value_size	= sizeof(int),
-	.max_entries	= 2,
-};
-
-static __always_inline int get_dev_ifindex(int which)
-{
-	int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which);
-
-	return ifindex ? *ifindex : 0;
-}
-
-SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
+SEC("classifier/chk_egress")
+int tc_chk(struct __sk_buff *skb)
 {
 	return TC_ACT_SHOT;
 }
 
-SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
+SEC("classifier/dst_ingress")
+int tc_dst(struct __sk_buff *skb)
 {
-	return bpf_redirect_peer(get_dev_ifindex(dev_src), 0);
+	return bpf_redirect_peer(IFINDEX_SRC, 0);
 }
 
-SEC("src_ingress") int tc_src(struct __sk_buff *skb)
+SEC("classifier/src_ingress")
+int tc_src(struct __sk_buff *skb)
 {
-	return bpf_redirect_peer(get_dev_ifindex(dev_dst), 0);
+	return bpf_redirect_peer(IFINDEX_DST, 0);
 }
 
 char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_tc_redirect.sh b/tools/testing/selftests/bpf/test_tc_redirect.sh
deleted file mode 100755
index 8868aa1ca902..000000000000
--- a/tools/testing/selftests/bpf/test_tc_redirect.sh
+++ /dev/null
@@ -1,216 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
-# between src and dst. The netns fwd has veth links to each src and dst. The
-# client is in src and server in dst. The test installs a TC BPF program to each
-# host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
-# neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
-# switch from ingress side; it also installs a checker prog on the egress side
-# to drop unexpected traffic.
-
-if [[ $EUID -ne 0 ]]; then
-	echo "This script must be run as root"
-	echo "FAIL"
-	exit 1
-fi
-
-# check that needed tools are present
-command -v nc >/dev/null 2>&1 || \
-	{ echo >&2 "nc is not available"; exit 1; }
-command -v dd >/dev/null 2>&1 || \
-	{ echo >&2 "dd is not available"; exit 1; }
-command -v timeout >/dev/null 2>&1 || \
-	{ echo >&2 "timeout is not available"; exit 1; }
-command -v ping >/dev/null 2>&1 || \
-	{ echo >&2 "ping is not available"; exit 1; }
-if command -v ping6 >/dev/null 2>&1; then PING6=ping6; else PING6=ping; fi
-command -v perl >/dev/null 2>&1 || \
-	{ echo >&2 "perl is not available"; exit 1; }
-command -v jq >/dev/null 2>&1 || \
-	{ echo >&2 "jq is not available"; exit 1; }
-command -v bpftool >/dev/null 2>&1 || \
-	{ echo >&2 "bpftool is not available"; exit 1; }
-
-readonly GREEN='\033[0;92m'
-readonly RED='\033[0;31m'
-readonly NC='\033[0m' # No Color
-
-readonly PING_ARG="-c 3 -w 10 -q"
-
-readonly TIMEOUT=10
-
-readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)"
-readonly NS_FWD="ns-fwd-$(mktemp -u XXXXXX)"
-readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)"
-
-readonly IP4_SRC="172.16.1.100"
-readonly IP4_DST="172.16.2.100"
-
-readonly IP6_SRC="::1:dead:beef:cafe"
-readonly IP6_DST="::2:dead:beef:cafe"
-
-readonly IP4_SLL="169.254.0.1"
-readonly IP4_DLL="169.254.0.2"
-readonly IP4_NET="169.254.0.0"
-
-netns_cleanup()
-{
-	ip netns del ${NS_SRC}
-	ip netns del ${NS_FWD}
-	ip netns del ${NS_DST}
-}
-
-netns_setup()
-{
-	ip netns add "${NS_SRC}"
-	ip netns add "${NS_FWD}"
-	ip netns add "${NS_DST}"
-
-	ip link add veth_src type veth peer name veth_src_fwd
-	ip link add veth_dst type veth peer name veth_dst_fwd
-
-	ip link set veth_src netns ${NS_SRC}
-	ip link set veth_src_fwd netns ${NS_FWD}
-
-	ip link set veth_dst netns ${NS_DST}
-	ip link set veth_dst_fwd netns ${NS_FWD}
-
-	ip -netns ${NS_SRC} addr add ${IP4_SRC}/32 dev veth_src
-	ip -netns ${NS_DST} addr add ${IP4_DST}/32 dev veth_dst
-
-	# The fwd netns automatically get a v6 LL address / routes, but also
-	# needs v4 one in order to start ARP probing. IP4_NET route is added
-	# to the endpoints so that the ARP processing will reply.
-
-	ip -netns ${NS_FWD} addr add ${IP4_SLL}/32 dev veth_src_fwd
-	ip -netns ${NS_FWD} addr add ${IP4_DLL}/32 dev veth_dst_fwd
-
-	ip -netns ${NS_SRC} addr add ${IP6_SRC}/128 dev veth_src nodad
-	ip -netns ${NS_DST} addr add ${IP6_DST}/128 dev veth_dst nodad
-
-	ip -netns ${NS_SRC} link set dev veth_src up
-	ip -netns ${NS_FWD} link set dev veth_src_fwd up
-
-	ip -netns ${NS_DST} link set dev veth_dst up
-	ip -netns ${NS_FWD} link set dev veth_dst_fwd up
-
-	ip -netns ${NS_SRC} route add ${IP4_DST}/32 dev veth_src scope global
-	ip -netns ${NS_SRC} route add ${IP4_NET}/16 dev veth_src scope global
-	ip -netns ${NS_FWD} route add ${IP4_SRC}/32 dev veth_src_fwd scope global
-
-	ip -netns ${NS_SRC} route add ${IP6_DST}/128 dev veth_src scope global
-	ip -netns ${NS_FWD} route add ${IP6_SRC}/128 dev veth_src_fwd scope global
-
-	ip -netns ${NS_DST} route add ${IP4_SRC}/32 dev veth_dst scope global
-	ip -netns ${NS_DST} route add ${IP4_NET}/16 dev veth_dst scope global
-	ip -netns ${NS_FWD} route add ${IP4_DST}/32 dev veth_dst_fwd scope global
-
-	ip -netns ${NS_DST} route add ${IP6_SRC}/128 dev veth_dst scope global
-	ip -netns ${NS_FWD} route add ${IP6_DST}/128 dev veth_dst_fwd scope global
-
-	fmac_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/address)
-	fmac_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/address)
-
-	ip -netns ${NS_SRC} neigh add ${IP4_DST} dev veth_src lladdr $fmac_src
-	ip -netns ${NS_DST} neigh add ${IP4_SRC} dev veth_dst lladdr $fmac_dst
-
-	ip -netns ${NS_SRC} neigh add ${IP6_DST} dev veth_src lladdr $fmac_src
-	ip -netns ${NS_DST} neigh add ${IP6_SRC} dev veth_dst lladdr $fmac_dst
-}
-
-netns_test_connectivity()
-{
-	set +e
-
-	ip netns exec ${NS_DST} bash -c "nc -4 -l -p 9004 &"
-	ip netns exec ${NS_DST} bash -c "nc -6 -l -p 9006 &"
-
-	TEST="TCPv4 connectivity test"
-	ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP4_DST}/9004"
-	if [ $? -ne 0 ]; then
-		echo -e "${TEST}: ${RED}FAIL${NC}"
-		exit 1
-	fi
-	echo -e "${TEST}: ${GREEN}PASS${NC}"
-
-	TEST="TCPv6 connectivity test"
-	ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP6_DST}/9006"
-	if [ $? -ne 0 ]; then
-		echo -e "${TEST}: ${RED}FAIL${NC}"
-		exit 1
-	fi
-	echo -e "${TEST}: ${GREEN}PASS${NC}"
-
-	TEST="ICMPv4 connectivity test"
-	ip netns exec ${NS_SRC} ping  $PING_ARG ${IP4_DST}
-	if [ $? -ne 0 ]; then
-		echo -e "${TEST}: ${RED}FAIL${NC}"
-		exit 1
-	fi
-	echo -e "${TEST}: ${GREEN}PASS${NC}"
-
-	TEST="ICMPv6 connectivity test"
-	ip netns exec ${NS_SRC} $PING6 $PING_ARG ${IP6_DST}
-	if [ $? -ne 0 ]; then
-		echo -e "${TEST}: ${RED}FAIL${NC}"
-		exit 1
-	fi
-	echo -e "${TEST}: ${GREEN}PASS${NC}"
-
-	set -e
-}
-
-hex_mem_str()
-{
-	perl -e 'print join(" ", unpack("(H2)8", pack("L", @ARGV)))' $1
-}
-
-netns_setup_bpf()
-{
-	local obj=$1
-	local use_forwarding=${2:-0}
-
-	ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact
-	ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj $obj sec src_ingress
-	ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress  bpf da obj $obj sec chk_egress
-
-	ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact
-	ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj $obj sec dst_ingress
-	ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress  bpf da obj $obj sec chk_egress
-
-	if [ "$use_forwarding" -eq "1" ]; then
-		# bpf_fib_lookup() checks if forwarding is enabled
-		ip netns exec ${NS_FWD} sysctl -w net.ipv4.ip_forward=1
-		ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_dst_fwd.forwarding=1
-		ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_src_fwd.forwarding=1
-		return 0
-	fi
-
-	veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex)
-	veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex)
-
-	progs=$(ip netns exec ${NS_FWD} bpftool net --json | jq -r '.[] | .tc | map(.id) | .[]')
-	for prog in $progs; do
-		map=$(bpftool prog show id $prog --json | jq -r '.map_ids | .? | .[]')
-		if [ ! -z "$map" ]; then
-			bpftool map update id $map key hex $(hex_mem_str 0) value hex $(hex_mem_str $veth_src)
-			bpftool map update id $map key hex $(hex_mem_str 1) value hex $(hex_mem_str $veth_dst)
-		fi
-	done
-}
-
-trap netns_cleanup EXIT
-set -e
-
-netns_setup
-netns_setup_bpf test_tc_neigh.o
-netns_test_connectivity
-netns_cleanup
-netns_setup
-netns_setup_bpf test_tc_neigh_fib.o 1
-netns_test_connectivity
-netns_cleanup
-netns_setup
-netns_setup_bpf test_tc_peer.o
-netns_test_connectivity
-- 
cgit v1.2.3


From 569c484f9995f489f2b80dd134269fe07d2b900d Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Fri, 7 May 2021 17:50:11 -0700
Subject: bpf: Limit static tcp-cc functions in the .BTF_ids list to x86
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

During the discussion in [0]. It was pointed out that static functions
in ppc64 is prefixed with ".". For example, the 'readelf -s vmlinux.ppc':

  89326: c000000001383280    24 NOTYPE  LOCAL  DEFAULT   31 cubictcp_init
  89327: c000000000c97c50   168 FUNC    LOCAL  DEFAULT    2 .cubictcp_init

The one with FUNC type is ".cubictcp_init" instead of "cubictcp_init".
The "." seems to be done by arch/powerpc/include/asm/ppc_asm.h.

This caused that pahole cannot generate the BTF for these tcp-cc kernel
functions because pahole only captures the FUNC type and "cubictcp_init"
is not. It then failed the kernel compilation in ppc64.

This behavior is only reported in ppc64 so far. I tried arm64, s390,
and sparc64 and did not observe this "." prefix and NOTYPE behavior.

Since the kfunc call is only supported in the x86_64 and x86_32 JIT,
this patch limits those tcp-cc functions to x86 only to avoid unnecessary
compilation issue in other ARCHs. In the future, we can examine if it
is better to change all those functions from static to extern.

  [0] https://lore.kernel.org/bpf/4e051459-8532-7b61-c815-f3435767f8a0@kernel.org/

Fixes: e78aea8b2170 ("bpf: tcp: Put some tcp cong functions in allowlist for bpf-tcp-cc")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Michal Suchánek <msuchanek@suse.de>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: https://lore.kernel.org/bpf/20210508005011.3863757-1-kafai@fb.com
---
 net/ipv4/bpf_tcp_ca.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index dff4f0eb96b0..9e41eff4a685 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -185,6 +185,7 @@ BTF_ID(func, tcp_reno_cong_avoid)
 BTF_ID(func, tcp_reno_undo_cwnd)
 BTF_ID(func, tcp_slow_start)
 BTF_ID(func, tcp_cong_avoid_ai)
+#ifdef CONFIG_X86
 #ifdef CONFIG_DYNAMIC_FTRACE
 #if IS_BUILTIN(CONFIG_TCP_CONG_CUBIC)
 BTF_ID(func, cubictcp_init)
@@ -213,6 +214,7 @@ BTF_ID(func, bbr_min_tso_segs)
 BTF_ID(func, bbr_set_state)
 #endif
 #endif  /* CONFIG_DYNAMIC_FTRACE */
+#endif	/* CONFIG_X86 */
 BTF_SET_END(bpf_tcp_ca_kfunc_ids)
 
 static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id)
-- 
cgit v1.2.3


From 349c4d6c75d74b62d8e39913b40bd06117b85e4a Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Sun, 9 May 2021 21:53:03 -0700
Subject: f2fs: avoid null pointer access when handling IPU error

 Unable to handle kernel NULL pointer dereference at virtual address 000000000000001a
 pc : f2fs_inplace_write_data+0x144/0x208
 lr : f2fs_inplace_write_data+0x134/0x208
 Call trace:
  f2fs_inplace_write_data+0x144/0x208
  f2fs_do_write_data_page+0x270/0x770
  f2fs_write_single_data_page+0x47c/0x830
  __f2fs_write_data_pages+0x444/0x98c
  f2fs_write_data_pages.llvm.16514453770497736882+0x2c/0x38
  do_writepages+0x58/0x118
  __writeback_single_inode+0x44/0x300
  writeback_sb_inodes+0x4b8/0x9c8
  wb_writeback+0x148/0x42c
  wb_do_writeback+0xc8/0x390
  wb_workfn+0xb0/0x2f4
  process_one_work+0x1fc/0x444
  worker_thread+0x268/0x4b4
  kthread+0x13c/0x158
  ret_from_fork+0x10/0x18

Fixes: 955772787667 ("f2fs: drop inplace IO if fs status is abnormal")
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index c605415840b5..51dc79fad4fe 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -3574,12 +3574,12 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
 
 	return err;
 drop_bio:
-	if (fio->bio) {
+	if (fio->bio && *(fio->bio)) {
 		struct bio *bio = *(fio->bio);
 
 		bio->bi_status = BLK_STS_IOERR;
 		bio_endio(bio);
-		fio->bio = NULL;
+		*(fio->bio) = NULL;
 	}
 	return err;
 }
-- 
cgit v1.2.3


From a753103909a7e3d22147505d944da3d20759e1a5 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 6 May 2021 12:11:14 -0700
Subject: f2fs: support iflag change given the mask

In f2fs_fileattr_set(),

	if (!fa->flags_valid)
		mask &= FS_COMMON_FL;

In this case, we can set supported flags by mask only instead of BUG_ON.

/* Flags shared betwen flags/xflags */
	(FS_SYNC_FL | FS_IMMUTABLE_FL | FS_APPEND_FL | \
	 FS_NODUMP_FL |	FS_NOATIME_FL | FS_DAX_FL | \
	 FS_PROJINHERIT_FL)

Fixes: 9b1bb01c8ae7 ("f2fs: convert to fileattr")
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/file.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 44a4650aea7b..ceb575f99048 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1817,7 +1817,8 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
 	struct f2fs_inode_info *fi = F2FS_I(inode);
 	u32 masked_flags = fi->i_flags & mask;
 
-	f2fs_bug_on(F2FS_I_SB(inode), (iflags & ~mask));
+	/* mask can be shrunk by flags_valid selector */
+	iflags &= mask;
 
 	/* Is it quota file? Do not allow user to mess with it */
 	if (IS_NOQUOTA(inode))
-- 
cgit v1.2.3


From a12cc5b423d4f36dc1a1ea3911e49cf9dff43898 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Thu, 6 May 2021 17:00:43 +0800
Subject: f2fs: compress: fix to free compress page correctly

In error path of f2fs_write_compressed_pages(), it needs to call
f2fs_compress_free_page() to release temporary page.

Fixes: 5e6bbde95982 ("f2fs: introduce mempool for {,de}compress intermediate page allocation")
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/compress.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 53b13787eb2c..2acaefa10036 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1372,7 +1372,8 @@ out_destroy_crypt:
 	for (i = 0; i < cc->nr_cpages; i++) {
 		if (!cc->cpages[i])
 			continue;
-		f2fs_put_page(cc->cpages[i], 1);
+		f2fs_compress_free_page(cc->cpages[i]);
+		cc->cpages[i] = NULL;
 	}
 out_put_cic:
 	kmem_cache_free(cic_entry_slab, cic);
-- 
cgit v1.2.3


From a949dc5f2c5cfe0c910b664650f45371254c0744 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Mon, 10 May 2021 17:30:31 +0800
Subject: f2fs: compress: fix race condition of overwrite vs truncate

pos_fsstress testcase complains a panic as belew:

------------[ cut here ]------------
kernel BUG at fs/f2fs/compress.c:1082!
invalid opcode: 0000 [#1] SMP PTI
CPU: 4 PID: 2753477 Comm: kworker/u16:2 Tainted: G           OE     5.12.0-rc1-custom #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
Workqueue: writeback wb_workfn (flush-252:16)
RIP: 0010:prepare_compress_overwrite+0x4c0/0x760 [f2fs]
Call Trace:
 f2fs_prepare_compress_overwrite+0x5f/0x80 [f2fs]
 f2fs_write_cache_pages+0x468/0x8a0 [f2fs]
 f2fs_write_data_pages+0x2a4/0x2f0 [f2fs]
 do_writepages+0x38/0xc0
 __writeback_single_inode+0x44/0x2a0
 writeback_sb_inodes+0x223/0x4d0
 __writeback_inodes_wb+0x56/0xf0
 wb_writeback+0x1dd/0x290
 wb_workfn+0x309/0x500
 process_one_work+0x220/0x3c0
 worker_thread+0x53/0x420
 kthread+0x12f/0x150
 ret_from_fork+0x22/0x30

The root cause is truncate() may race with overwrite as below,
so that one reference count left in page can not guarantee the
page attaching in mapping tree all the time, after truncation,
later find_lock_page() may return NULL pointer.

- prepare_compress_overwrite
 - f2fs_pagecache_get_page
 - unlock_page
					- f2fs_setattr
					 - truncate_setsize
					  - truncate_inode_page
					   - delete_from_page_cache
 - find_lock_page

Fix this by avoiding referencing updated page.

Fixes: 4c8ff7095bef ("f2fs: support data compression")
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/compress.c | 35 ++++++++++++-----------------------
 1 file changed, 12 insertions(+), 23 deletions(-)

diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 2acaefa10036..79348bc56e35 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -117,19 +117,6 @@ static void f2fs_unlock_rpages(struct compress_ctx *cc, int len)
 	f2fs_drop_rpages(cc, len, true);
 }
 
-static void f2fs_put_rpages_mapping(struct address_space *mapping,
-				pgoff_t start, int len)
-{
-	int i;
-
-	for (i = 0; i < len; i++) {
-		struct page *page = find_get_page(mapping, start + i);
-
-		put_page(page);
-		put_page(page);
-	}
-}
-
 static void f2fs_put_rpages_wbc(struct compress_ctx *cc,
 		struct writeback_control *wbc, bool redirty, int unlock)
 {
@@ -1036,7 +1023,7 @@ retry:
 		}
 
 		if (PageUptodate(page))
-			unlock_page(page);
+			f2fs_put_page(page, 1);
 		else
 			f2fs_compress_ctx_add_page(cc, page);
 	}
@@ -1046,32 +1033,34 @@ retry:
 
 		ret = f2fs_read_multi_pages(cc, &bio, cc->cluster_size,
 					&last_block_in_bio, false, true);
+		f2fs_put_rpages(cc);
 		f2fs_destroy_compress_ctx(cc);
 		if (ret)
-			goto release_pages;
+			goto out;
 		if (bio)
 			f2fs_submit_bio(sbi, bio, DATA);
 
 		ret = f2fs_init_compress_ctx(cc);
 		if (ret)
-			goto release_pages;
+			goto out;
 	}
 
 	for (i = 0; i < cc->cluster_size; i++) {
 		f2fs_bug_on(sbi, cc->rpages[i]);
 
 		page = find_lock_page(mapping, start_idx + i);
-		f2fs_bug_on(sbi, !page);
+		if (!page) {
+			/* page can be truncated */
+			goto release_and_retry;
+		}
 
 		f2fs_wait_on_page_writeback(page, DATA, true, true);
-
 		f2fs_compress_ctx_add_page(cc, page);
-		f2fs_put_page(page, 0);
 
 		if (!PageUptodate(page)) {
+release_and_retry:
+			f2fs_put_rpages(cc);
 			f2fs_unlock_rpages(cc, i + 1);
-			f2fs_put_rpages_mapping(mapping, start_idx,
-					cc->cluster_size);
 			f2fs_destroy_compress_ctx(cc);
 			goto retry;
 		}
@@ -1103,10 +1092,10 @@ retry:
 	}
 
 unlock_pages:
+	f2fs_put_rpages(cc);
 	f2fs_unlock_rpages(cc, i);
-release_pages:
-	f2fs_put_rpages_mapping(mapping, start_idx, i);
 	f2fs_destroy_compress_ctx(cc);
+out:
 	return ret;
 }
 
-- 
cgit v1.2.3


From 8bfbfb0ddd706b1ce2e89259ecc45f192c0ec2bf Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Mon, 10 May 2021 17:30:32 +0800
Subject: f2fs: compress: fix to assign cc.cluster_idx correctly

In f2fs_destroy_compress_ctx(), after f2fs_destroy_compress_ctx(),
cc.cluster_idx will be cleared w/ NULL_CLUSTER, f2fs_cluster_blocks()
may check wrong cluster metadata, fix it.

Fixes: 4c8ff7095bef ("f2fs: support data compression")
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/compress.c | 17 +++++++++--------
 fs/f2fs/data.c     |  6 +++---
 fs/f2fs/f2fs.h     |  2 +-
 3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 79348bc56e35..925a5ca3744a 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -145,13 +145,14 @@ int f2fs_init_compress_ctx(struct compress_ctx *cc)
 	return cc->rpages ? 0 : -ENOMEM;
 }
 
-void f2fs_destroy_compress_ctx(struct compress_ctx *cc)
+void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse)
 {
 	page_array_free(cc->inode, cc->rpages, cc->cluster_size);
 	cc->rpages = NULL;
 	cc->nr_rpages = 0;
 	cc->nr_cpages = 0;
-	cc->cluster_idx = NULL_CLUSTER;
+	if (!reuse)
+		cc->cluster_idx = NULL_CLUSTER;
 }
 
 void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page)
@@ -1034,7 +1035,7 @@ retry:
 		ret = f2fs_read_multi_pages(cc, &bio, cc->cluster_size,
 					&last_block_in_bio, false, true);
 		f2fs_put_rpages(cc);
-		f2fs_destroy_compress_ctx(cc);
+		f2fs_destroy_compress_ctx(cc, true);
 		if (ret)
 			goto out;
 		if (bio)
@@ -1061,7 +1062,7 @@ retry:
 release_and_retry:
 			f2fs_put_rpages(cc);
 			f2fs_unlock_rpages(cc, i + 1);
-			f2fs_destroy_compress_ctx(cc);
+			f2fs_destroy_compress_ctx(cc, true);
 			goto retry;
 		}
 	}
@@ -1094,7 +1095,7 @@ release_and_retry:
 unlock_pages:
 	f2fs_put_rpages(cc);
 	f2fs_unlock_rpages(cc, i);
-	f2fs_destroy_compress_ctx(cc);
+	f2fs_destroy_compress_ctx(cc, true);
 out:
 	return ret;
 }
@@ -1130,7 +1131,7 @@ bool f2fs_compress_write_end(struct inode *inode, void *fsdata,
 		set_cluster_dirty(&cc);
 
 	f2fs_put_rpages_wbc(&cc, NULL, false, 1);
-	f2fs_destroy_compress_ctx(&cc);
+	f2fs_destroy_compress_ctx(&cc, false);
 
 	return first_index;
 }
@@ -1350,7 +1351,7 @@ unlock_continue:
 	f2fs_put_rpages(cc);
 	page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
 	cc->cpages = NULL;
-	f2fs_destroy_compress_ctx(cc);
+	f2fs_destroy_compress_ctx(cc, false);
 	return 0;
 
 out_destroy_crypt:
@@ -1512,7 +1513,7 @@ write:
 	err = f2fs_write_raw_pages(cc, submitted, wbc, io_type);
 	f2fs_put_rpages_wbc(cc, wbc, false, 0);
 destroy_out:
-	f2fs_destroy_compress_ctx(cc);
+	f2fs_destroy_compress_ctx(cc, false);
 	return err;
 }
 
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 96f1a354f89f..33e56ae84e35 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2287,7 +2287,7 @@ static int f2fs_mpage_readpages(struct inode *inode,
 							max_nr_pages,
 							&last_block_in_bio,
 							rac != NULL, false);
-				f2fs_destroy_compress_ctx(&cc);
+				f2fs_destroy_compress_ctx(&cc, false);
 				if (ret)
 					goto set_error_page;
 			}
@@ -2332,7 +2332,7 @@ next_page:
 							max_nr_pages,
 							&last_block_in_bio,
 							rac != NULL, false);
-				f2fs_destroy_compress_ctx(&cc);
+				f2fs_destroy_compress_ctx(&cc, false);
 			}
 		}
 #endif
@@ -3033,7 +3033,7 @@ next:
 		}
 	}
 	if (f2fs_compressed_file(inode))
-		f2fs_destroy_compress_ctx(&cc);
+		f2fs_destroy_compress_ctx(&cc, false);
 #endif
 	if (retry) {
 		index = 0;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 044878866ca3..c83d90125ebd 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3956,7 +3956,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc);
 void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed);
 void f2fs_put_page_dic(struct page *page);
 int f2fs_init_compress_ctx(struct compress_ctx *cc);
-void f2fs_destroy_compress_ctx(struct compress_ctx *cc);
+void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse);
 void f2fs_init_compress_info(struct f2fs_sb_info *sbi);
 int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi);
 void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi);
-- 
cgit v1.2.3


From 576f9eacc680d2b1f37e8010cff62f7b227ea769 Mon Sep 17 00:00:00 2001
From: Joakim Zhang <qiangqing.zhang@nxp.com>
Date: Mon, 10 May 2021 14:55:09 +0800
Subject: net: stmmac: Fix MAC WoL not working if PHY does not support WoL

Both get and set WoL will check device_can_wakeup(), if MAC supports PMT, it
will set device wakeup capability. After commit 1d8e5b0f3f2c ("net: stmmac:
Support WOL with phy"), device wakeup capability will be overwrite in
stmmac_init_phy() according to phy's Wol feature. If phy doesn't support WoL,
then MAC will lose wakeup capability. To fix this issue, only overwrite device
wakeup capability when MAC doesn't support PMT.

For STMMAC now driver checks MAC's WoL capability if MAC supports PMT, if
not support, driver will check PHY's WoL capability.

Fixes: 1d8e5b0f3f2c ("net: stmmac: Support WOL with phy")
Reviewed-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 345b4c6d1fd4..fea3bf07ae89 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1196,7 +1196,6 @@ static void stmmac_check_pcs_mode(struct stmmac_priv *priv)
  */
 static int stmmac_init_phy(struct net_device *dev)
 {
-	struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
 	struct stmmac_priv *priv = netdev_priv(dev);
 	struct device_node *node;
 	int ret;
@@ -1222,8 +1221,12 @@ static int stmmac_init_phy(struct net_device *dev)
 		ret = phylink_connect_phy(priv->phylink, phydev);
 	}
 
-	phylink_ethtool_get_wol(priv->phylink, &wol);
-	device_set_wakeup_capable(priv->device, !!wol.supported);
+	if (!priv->plat->pmt) {
+		struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
+
+		phylink_ethtool_get_wol(priv->phylink, &wol);
+		device_set_wakeup_capable(priv->device, !!wol.supported);
+	}
 
 	return ret;
 }
-- 
cgit v1.2.3


From 29249eac5225429b898f278230a6ca2baa1ae154 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 11 May 2021 19:13:51 +0200
Subject: mptcp: fix data stream corruption

Maxim reported several issues when forcing a TCP transparent proxy
to use the MPTCP protocol for the inbound connections. He also
provided a clean reproducer.

The problem boils down to 'mptcp_frag_can_collapse_to()' assuming
that only MPTCP will use the given page_frag.

If others - e.g. the plain TCP protocol - allocate page fragments,
we can end-up re-using already allocated memory for mptcp_data_frag.

Fix the issue ensuring that the to-be-expanded data fragment is
located at the current page frag end.

v1 -> v2:
 - added missing fixes tag (Mat)

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/178
Reported-and-tested-by: Maxim Galaganov <max@internet.ru>
Fixes: 18b683bff89d ("mptcp: queue data for mptcp level retransmission")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 29a2d690d8d5..2d21a4793d9d 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -879,12 +879,18 @@ static bool mptcp_skb_can_collapse_to(u64 write_seq,
 	       !mpext->frozen;
 }
 
+/* we can append data to the given data frag if:
+ * - there is space available in the backing page_frag
+ * - the data frag tail matches the current page_frag free offset
+ * - the data frag end sequence number matches the current write seq
+ */
 static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk,
 				       const struct page_frag *pfrag,
 				       const struct mptcp_data_frag *df)
 {
 	return df && pfrag->page == df->page &&
 		pfrag->size - pfrag->offset > 0 &&
+		pfrag->offset == (df->offset + df->data_len) &&
 		df->data_seq + df->data_len == msk->write_seq;
 }
 
-- 
cgit v1.2.3


From bcbda3fc616272686208f9c4d5f6dccb65360bd8 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Tue, 11 May 2021 11:11:32 -0700
Subject: ionic: fix ptp support config breakage

When IONIC=y and PTP_1588_CLOCK=m were set in the .config file
the driver link failed with undefined references.

We add the dependancy
	depends on PTP_1588_CLOCK || !PTP_1588_CLOCK
to clear this up.

If PTP_1588_CLOCK=m, the depends limits IONIC to =m (or disabled).
If PTP_1588_CLOCK is disabled, IONIC can be any of y/m/n.

Fixes: 61db421da31b ("ionic: link in the new hw timestamp code")
Reported-by: kernel test robot <lkp@intel.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Allen Hubbe <allenbh@pensando.io>
Signed-off-by: Shannon Nelson <snelson@pensando.io>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/pensando/Kconfig b/drivers/net/ethernet/pensando/Kconfig
index 5f8b0bb3af6e..202973a82712 100644
--- a/drivers/net/ethernet/pensando/Kconfig
+++ b/drivers/net/ethernet/pensando/Kconfig
@@ -20,6 +20,7 @@ if NET_VENDOR_PENSANDO
 config IONIC
 	tristate "Pensando Ethernet IONIC Support"
 	depends on 64BIT && PCI
+	depends on PTP_1588_CLOCK || !PTP_1588_CLOCK
 	select NET_DEVLINK
 	select DIMLIB
 	help
-- 
cgit v1.2.3


From 440c3247cba3d9433ac435d371dd7927d68772a7 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Tue, 11 May 2021 14:42:04 -0500
Subject: net: ipa: memory region array is variable size

IPA configuration data includes an array of memory region
descriptors.  That was a fixed-size array at one time, but
at some point we started defining it such that it was only
as big as required for a given platform.  The actual number
of entries in the array is recorded in the configuration data
along with the array.

A loop in ipa_mem_config() still assumes the array has entries
for all defined memory region IDs.  As a result, this loop can
go past the end of the actual array and attempt to write
"canary" values based on nonsensical data.

Fix this, by stashing the number of entries in the array, and
using that rather than IPA_MEM_COUNT in the initialization loop
found in ipa_mem_config().

The only remaining use of IPA_MEM_COUNT is in a validation check
to ensure configuration data doesn't have too many entries.
That's fine for now.

Fixes: 3128aae8c439a ("net: ipa: redefine struct ipa_mem_data")
Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa.h     | 2 ++
 drivers/net/ipa/ipa_mem.c | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ipa/ipa.h b/drivers/net/ipa/ipa.h
index e7ff376cb5b7..744406832a77 100644
--- a/drivers/net/ipa/ipa.h
+++ b/drivers/net/ipa/ipa.h
@@ -58,6 +58,7 @@ enum ipa_flag {
  * @mem_virt:		Virtual address of IPA-local memory space
  * @mem_offset:		Offset from @mem_virt used for access to IPA memory
  * @mem_size:		Total size (bytes) of memory at @mem_virt
+ * @mem_count:		Number of entries in the mem array
  * @mem:		Array of IPA-local memory region descriptors
  * @imem_iova:		I/O virtual address of IPA region in IMEM
  * @imem_size:		Size of IMEM region
@@ -103,6 +104,7 @@ struct ipa {
 	void *mem_virt;
 	u32 mem_offset;
 	u32 mem_size;
+	u32 mem_count;
 	const struct ipa_mem *mem;
 
 	unsigned long imem_iova;
diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index c5c3b1b7e67d..1624125e7459 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -180,7 +180,7 @@ int ipa_mem_config(struct ipa *ipa)
 	 * for the region, write "canary" values in the space prior to
 	 * the region's base address.
 	 */
-	for (mem_id = 0; mem_id < IPA_MEM_COUNT; mem_id++) {
+	for (mem_id = 0; mem_id < ipa->mem_count; mem_id++) {
 		const struct ipa_mem *mem = &ipa->mem[mem_id];
 		u16 canary_count;
 		__le32 *canary;
@@ -487,6 +487,7 @@ int ipa_mem_init(struct ipa *ipa, const struct ipa_mem_data *mem_data)
 	ipa->mem_size = resource_size(res);
 
 	/* The ipa->mem[] array is indexed by enum ipa_mem_id values */
+	ipa->mem_count = mem_data->local_count;
 	ipa->mem = mem_data->local;
 
 	ret = ipa_imem_init(ipa, mem_data->imem_addr, mem_data->imem_size);
-- 
cgit v1.2.3


From a78339698ab1f43435fbe67fcd6de8f4f6eb9eec Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 6 May 2021 14:49:45 +0000
Subject: powerpc/interrupts: Fix kuep_unlock() call

Same as kuap_user_restore(), kuep_unlock() has to be called when
really returning to user, that is in interrupt_exit_user_prepare(),
not in interrupt_exit_prepare().

Fixes: b5efec00b671 ("powerpc/32s: Move KUEP locking/unlocking in C")
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/b831e54a2579db24fbef836ed415588ce2b3e825.1620312573.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/interrupt.h | 2 --
 arch/powerpc/kernel/interrupt.c      | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 44cde2e129b8..c77e8f57ff06 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -153,8 +153,6 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
  */
 static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state)
 {
-	if (user_mode(regs))
-		kuep_unlock();
 }
 
 static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index e4559f8914eb..ed6cebcb7847 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -427,6 +427,7 @@ again:
 
 	/* Restore user access locks last */
 	kuap_user_restore(regs);
+	kuep_unlock();
 
 	return ret;
 }
-- 
cgit v1.2.3


From 5d510ed78bcfcbbd3b3891cbe79cd7543bce1d05 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 6 May 2021 11:56:31 +0000
Subject: powerpc/syscall: Calling kuap_save_and_lock() is wrong

kuap_save_and_lock() is only for interrupts inside kernel.

system call are only from user, calling kuap_save_and_lock()
is wrong.

Fixes: c16728835eec ("powerpc/32: Manage KUAP in C")
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/332773775cf24a422105dee2d383fb8f04589045.1620302182.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/interrupt.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index ed6cebcb7847..e0938ba298f2 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -34,9 +34,6 @@ notrace long system_call_exception(long r3, long r4, long r5,
 	syscall_fn f;
 
 	kuep_lock();
-#ifdef CONFIG_PPC32
-	kuap_save_and_lock(regs);
-#endif
 
 	regs->orig_gpr3 = r3;
 
-- 
cgit v1.2.3


From 2c8c89b95831f46a2fb31a8d0fef4601694023ce Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 8 May 2021 20:14:52 +1000
Subject: powerpc/pseries: Fix hcall tracing recursion in pv queued spinlocks

The paravit queued spinlock slow path adds itself to the queue then
calls pv_wait to wait for the lock to become free. This is implemented
by calling H_CONFER to donate cycles.

When hcall tracing is enabled, this H_CONFER call can lead to a spin
lock being taken in the tracing code, which will result in the lock to
be taken again, which will also go to the slow path because it queues
behind itself and so won't ever make progress.

An example trace of a deadlock:

  __pv_queued_spin_lock_slowpath
  trace_clock_global
  ring_buffer_lock_reserve
  trace_event_buffer_lock_reserve
  trace_event_buffer_reserve
  trace_event_raw_event_hcall_exit
  __trace_hcall_exit
  plpar_hcall_norets_trace
  __pv_queued_spin_lock_slowpath
  trace_clock_global
  ring_buffer_lock_reserve
  trace_event_buffer_lock_reserve
  trace_event_buffer_reserve
  trace_event_raw_event_rcu_dyntick
  rcu_irq_exit
  irq_exit
  __do_irq
  call_do_irq
  do_IRQ
  hardware_interrupt_common_virt

Fix this by introducing plpar_hcall_norets_notrace(), and using that to
make SPLPAR virtual processor dispatching hcalls by the paravirt
spinlock code.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210508101455.1578318-2-npiggin@gmail.com
---
 arch/powerpc/include/asm/hvcall.h       |  3 +++
 arch/powerpc/include/asm/paravirt.h     | 22 +++++++++++++++++++---
 arch/powerpc/platforms/pseries/hvCall.S | 10 ++++++++++
 arch/powerpc/platforms/pseries/lpar.c   |  3 +--
 4 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 443050906018..e3b29eda8074 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -448,6 +448,9 @@
  */
 long plpar_hcall_norets(unsigned long opcode, ...);
 
+/* Variant which does not do hcall tracing */
+long plpar_hcall_norets_notrace(unsigned long opcode, ...);
+
 /**
  * plpar_hcall: - Make a pseries hypervisor call
  * @opcode: The hypervisor call to make.
diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h
index 5d1726bb28e7..bcb7b5f917be 100644
--- a/arch/powerpc/include/asm/paravirt.h
+++ b/arch/powerpc/include/asm/paravirt.h
@@ -28,19 +28,35 @@ static inline u32 yield_count_of(int cpu)
 	return be32_to_cpu(yield_count);
 }
 
+/*
+ * Spinlock code confers and prods, so don't trace the hcalls because the
+ * tracing code takes spinlocks which can cause recursion deadlocks.
+ *
+ * These calls are made while the lock is not held: the lock slowpath yields if
+ * it can not acquire the lock, and unlock slow path might prod if a waiter has
+ * yielded). So this may not be a problem for simple spin locks because the
+ * tracing does not technically recurse on the lock, but we avoid it anyway.
+ *
+ * However the queued spin lock contended path is more strictly ordered: the
+ * H_CONFER hcall is made after the task has queued itself on the lock, so then
+ * recursing on that lock will cause the task to then queue up again behind the
+ * first instance (or worse: queued spinlocks use tricks that assume a context
+ * never waits on more than one spinlock, so such recursion may cause random
+ * corruption in the lock code).
+ */
 static inline void yield_to_preempted(int cpu, u32 yield_count)
 {
-	plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(cpu), yield_count);
+	plpar_hcall_norets_notrace(H_CONFER, get_hard_smp_processor_id(cpu), yield_count);
 }
 
 static inline void prod_cpu(int cpu)
 {
-	plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu));
+	plpar_hcall_norets_notrace(H_PROD, get_hard_smp_processor_id(cpu));
 }
 
 static inline void yield_to_any(void)
 {
-	plpar_hcall_norets(H_CONFER, -1, 0);
+	plpar_hcall_norets_notrace(H_CONFER, -1, 0);
 }
 #else
 static inline bool is_shared_processor(void)
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index 2136e42833af..8a2b8d64265b 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -102,6 +102,16 @@ END_FTR_SECTION(0, 1);						\
 #define HCALL_BRANCH(LABEL)
 #endif
 
+_GLOBAL_TOC(plpar_hcall_norets_notrace)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+	HVSC				/* invoke the hypervisor */
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+	blr				/* return r3 = status */
+
 _GLOBAL_TOC(plpar_hcall_norets)
 	HMT_MEDIUM
 
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 1f3152ad7213..b619568a4d04 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -1830,8 +1830,7 @@ void hcall_tracepoint_unregfunc(void)
 
 /*
  * Since the tracing code might execute hcalls we need to guard against
- * recursion. One example of this are spinlocks calling H_YIELD on
- * shared processor partitions.
+ * recursion.
  */
 static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
 
-- 
cgit v1.2.3


From a3f1a39a5643d5c5ed3eee4edd933e0ebfeeed6e Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 8 May 2021 20:14:53 +1000
Subject: powerpc/pseries: Don't trace hcall tracing wrapper

This doesn't seem very useful to trace before the recursion check, even
if the ftrace code has any recursion checks of its own. Be on the safe
side and don't trace the hcall trace wrappers.

Reported-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210508101455.1578318-3-npiggin@gmail.com
---
 arch/powerpc/platforms/pseries/lpar.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index b619568a4d04..d79d7410c320 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -1835,7 +1835,7 @@ void hcall_tracepoint_unregfunc(void)
 static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
 
 
-void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
+notrace void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
 {
 	unsigned long flags;
 	unsigned int *depth;
@@ -1863,7 +1863,7 @@ out:
 	local_irq_restore(flags);
 }
 
-void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf)
+notrace void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf)
 {
 	unsigned long flags;
 	unsigned int *depth;
-- 
cgit v1.2.3


From 7058f4b13edd9dd2cb3c5b4fe340d8307dbe0208 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 8 May 2021 20:14:54 +1000
Subject: powerpc/pseries: use notrace hcall variant for H_CEDE idle

Rather than special-case H_CEDE in the hcall trace wrappers, make the
idle H_CEDE call use plpar_hcall_norets_notrace().

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210508101455.1578318-4-npiggin@gmail.com
---
 arch/powerpc/include/asm/plpar_wrappers.h |  6 +++++-
 arch/powerpc/platforms/pseries/lpar.c     | 10 ----------
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index ece84a430701..83e0f701ebc6 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -28,7 +28,11 @@ static inline void set_cede_latency_hint(u8 latency_hint)
 
 static inline long cede_processor(void)
 {
-	return plpar_hcall_norets(H_CEDE);
+	/*
+	 * We cannot call tracepoints inside RCU idle regions which
+	 * means we must not trace H_CEDE.
+	 */
+	return plpar_hcall_norets_notrace(H_CEDE);
 }
 
 static inline long extended_cede_processor(unsigned long latency_hint)
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index d79d7410c320..ad1cec80019b 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -1840,13 +1840,6 @@ notrace void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
 	unsigned long flags;
 	unsigned int *depth;
 
-	/*
-	 * We cannot call tracepoints inside RCU idle regions which
-	 * means we must not trace H_CEDE.
-	 */
-	if (opcode == H_CEDE)
-		return;
-
 	local_irq_save(flags);
 
 	depth = this_cpu_ptr(&hcall_trace_depth);
@@ -1868,9 +1861,6 @@ notrace void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf)
 	unsigned long flags;
 	unsigned int *depth;
 
-	if (opcode == H_CEDE)
-		return;
-
 	local_irq_save(flags);
 
 	depth = this_cpu_ptr(&hcall_trace_depth);
-- 
cgit v1.2.3


From 4f242fc5f2e24412b89e934dad025b10293b2712 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 8 May 2021 20:14:55 +1000
Subject: powerpc/pseries: warn if recursing into the hcall tracing code

The hcall tracing code has a recursion check built in, which skips
tracing if we are already tracing an hcall.

However if the tracing code has problems with recursion, this check
may not catch all cases because the tracing code could be invoked from
a different tracepoint first, then make an hcall that gets traced,
then recurse.

Add an explicit warning if recursion is detected here, which might help
to notice tracing code making hcalls. Really the core trace code should
have its own recursion checking and warnings though.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210508101455.1578318-5-npiggin@gmail.com
---
 arch/powerpc/platforms/pseries/lpar.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index ad1cec80019b..dab356e3ff87 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -1829,8 +1829,14 @@ void hcall_tracepoint_unregfunc(void)
 #endif
 
 /*
- * Since the tracing code might execute hcalls we need to guard against
- * recursion.
+ * Keep track of hcall tracing depth and prevent recursion. Warn if any is
+ * detected because it may indicate a problem. This will not catch all
+ * problems with tracing code making hcalls, because the tracing might have
+ * been invoked from a non-hcall, so the first hcall could recurse into it
+ * without warning here, but this better than nothing.
+ *
+ * Hcalls with specific problems being traced should use the _notrace
+ * plpar_hcall variants.
  */
 static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
 
@@ -1844,7 +1850,7 @@ notrace void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
 
 	depth = this_cpu_ptr(&hcall_trace_depth);
 
-	if (*depth)
+	if (WARN_ON_ONCE(*depth))
 		goto out;
 
 	(*depth)++;
@@ -1865,7 +1871,7 @@ notrace void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf)
 
 	depth = this_cpu_ptr(&hcall_trace_depth);
 
-	if (*depth)
+	if (*depth) /* Don't warn again on the way out */
 		goto out;
 
 	(*depth)++;
-- 
cgit v1.2.3


From 7315e457d6bc342d06ba0b7ee498221c5237a547 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Sat, 8 May 2021 09:25:32 +0000
Subject: powerpc/uaccess: Fix __get_user() with CONFIG_CC_HAS_ASM_GOTO_OUTPUT

Building kernel mainline with GCC 11 leads to following failure
when starting 'init':

  init[1]: bad frame in sys_sigreturn: 7ff5a900 nip 001083cc lr 001083c4
  Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b

This is an issue due to a segfault happening in
__unsafe_restore_general_regs() in a loop copying registers from user
to kernel:

  10:	7d 09 03 a6 	mtctr   r8
  14:	80 ca 00 00 	lwz     r6,0(r10)
  18:	80 ea 00 04 	lwz     r7,4(r10)
  1c:	90 c9 00 08 	stw     r6,8(r9)
  20:	90 e9 00 0c 	stw     r7,12(r9)
  24:	39 0a 00 08 	addi    r8,r10,8
  28:	39 29 00 08 	addi    r9,r9,8
  2c:	81 4a 00 08 	lwz     r10,8(r10)  <== r10 is clobbered here
  30:	81 6a 00 0c 	lwz     r11,12(r10)
  34:	91 49 00 08 	stw     r10,8(r9)
  38:	91 69 00 0c 	stw     r11,12(r9)
  3c:	39 48 00 08 	addi    r10,r8,8
  40:	39 29 00 08 	addi    r9,r9,8
  44:	42 00 ff d0 	bdnz    14 <__unsafe_restore_general_regs+0x14>

As shown above, this is due to r10 being re-used by GCC. This didn't
happen with CLANG.

This is fixed by tagging 'x' output as an earlyclobber operand in
__get_user_asm2_goto().

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/cf0a050d124d4f426cdc7a74009d17b01d8d8969.1620465917.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/uaccess.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index a09e4240c5b1..22c79ab40006 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -157,7 +157,7 @@ do {								\
 		"2:	lwz%X1 %L0, %L1\n"			\
 		EX_TABLE(1b, %l2)				\
 		EX_TABLE(2b, %l2)				\
-		: "=r" (x)					\
+		: "=&r" (x)					\
 		: "m" (*addr)					\
 		:						\
 		: label)
-- 
cgit v1.2.3


From bc581dbab26edf0b6acc98c76943b4a0c7d672a2 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Sat, 8 May 2021 09:25:44 +0000
Subject: powerpc/signal: Fix possible build failure with
 unsafe_copy_fpr_{to/from}_user

When neither CONFIG_VSX nor CONFIG_PPC_FPU_REGS are selected,
unsafe_copy_fpr_to_user() and unsafe_copy_fpr_from_user() are
doing nothing.

Then, unless the 'label' operand is used elsewhere, GCC complains
about it being defined but not used.

To fix that, add an impossible 'goto label'.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/cadc0a328bc8e6c5bf133193e7547d5c10ae7895.1620465920.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/signal.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index f4aafa337c2e..1f07317964e4 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -166,9 +166,9 @@ copy_ckfpr_from_user(struct task_struct *task, void __user *from)
 }
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 #else
-#define unsafe_copy_fpr_to_user(to, task, label) do { } while (0)
+#define unsafe_copy_fpr_to_user(to, task, label) do { if (0) goto label;} while (0)
 
-#define unsafe_copy_fpr_from_user(task, from, label) do { } while (0)
+#define unsafe_copy_fpr_from_user(task, from, label) do { if (0) goto label;} while (0)
 
 static inline unsigned long
 copy_fpr_to_user(void __user *to, struct task_struct *task)
-- 
cgit v1.2.3


From 63970f3c37e75997ed86dbdfdc83df35f2152bb1 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Sat, 8 May 2021 06:36:21 +0000
Subject: powerpc/legacy_serial: Fix UBSAN: array-index-out-of-bounds

UBSAN complains when a pointer is calculated with invalid
'legacy_serial_console' index, allthough the index is verified
before dereferencing the pointer.

Fix it by checking 'legacy_serial_console' validity before
calculating pointers.

Fixes: 0bd3f9e953bd ("powerpc/legacy_serial: Use early_ioremap()")
Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210511010712.750096-1-mpe@ellerman.id.au
---
 arch/powerpc/kernel/legacy_serial.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 8b2c1a8553a0..cfc03e016ff2 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -356,13 +356,16 @@ static void __init setup_legacy_serial_console(int console)
 
 static int __init ioremap_legacy_serial_console(void)
 {
-	struct legacy_serial_info *info = &legacy_serial_infos[legacy_serial_console];
-	struct plat_serial8250_port *port = &legacy_serial_ports[legacy_serial_console];
+	struct plat_serial8250_port *port;
+	struct legacy_serial_info *info;
 	void __iomem *vaddr;
 
 	if (legacy_serial_console < 0)
 		return 0;
 
+	info = &legacy_serial_infos[legacy_serial_console];
+	port = &legacy_serial_ports[legacy_serial_console];
+
 	if (!info->early_addr)
 		return 0;
 
-- 
cgit v1.2.3


From da3bb206c9ceb0736d9e2897ea697acabad35833 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 11 May 2021 20:54:59 +1000
Subject: KVM: PPC: Book3S HV: Fix kvm_unmap_gfn_range_hv() for Hash MMU

Commit 32b48bf8514c ("KVM: PPC: Book3S HV: Fix conversion to gfn-based
MMU notifier callbacks") fixed kvm_unmap_gfn_range_hv() by adding a for
loop over each gfn in the range.

But for the Hash MMU it repeatedly calls kvm_unmap_rmapp() with the
first gfn of the range, rather than iterating through the range.

This exhibits as strange guest behaviour, sometimes crashing in firmare,
or booting and then guest userspace crashing unexpectedly.

Fix it by passing the iterator, gfn, to kvm_unmap_rmapp().

Fixes: 32b48bf8514c ("KVM: PPC: Book3S HV: Fix conversion to gfn-based MMU notifier callbacks")
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210511105459.800788-1-mpe@ellerman.id.au
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 2d9193cd73be..c63e263312a4 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -840,7 +840,7 @@ bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range)
 			kvm_unmap_radix(kvm, range->slot, gfn);
 	} else {
 		for (gfn = range->start; gfn < range->end; gfn++)
-			kvm_unmap_rmapp(kvm, range->slot, range->start);
+			kvm_unmap_rmapp(kvm, range->slot, gfn);
 	}
 
 	return false;
-- 
cgit v1.2.3


From e9f4eee9a0023ba22db9560d4cc6ee63f933dae8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 11 May 2021 21:38:36 -0400
Subject: blk-iocost: fix weight updates of inner active iocgs

When the weight of an active iocg is updated, weight_updated() is called
which in turn calls __propagate_weights() to update the active and inuse
weights so that the effective hierarchical weights are update accordingly.

The current implementation is incorrect for inner active nodes. For an
active leaf iocg, inuse can be any value between 1 and active and the
difference represents how much the iocg is donating. When weight is updated,
as long as inuse is clamped between 1 and the new weight, we're alright and
this is what __propagate_weights() currently implements.

However, that's not how an active inner node's inuse is set. An inner node's
inuse is solely determined by the ratio between the sums of inuse's and
active's of its children - ie. they're results of propagating the leaves'
active and inuse weights upwards. __propagate_weights() incorrectly applies
the same clamping as for a leaf when an active inner node's weight is
updated. Consider a hierarchy which looks like the following with saturating
workloads in AA and BB.

     R
   /   \
  A     B
  |     |
 AA     BB

1. For both A and B, active=100, inuse=100, hwa=0.5, hwi=0.5.

2. echo 200 > A/io.weight

3. __propagate_weights() update A's active to 200 and leave inuse at 100 as
   it's already between 1 and the new active, making A:active=200,
   A:inuse=100. As R's active_sum is updated along with A's active,
   A:hwa=2/3, B:hwa=1/3. However, because the inuses didn't change, the
   hwi's remain unchanged at 0.5.

4. The weight of A is now twice that of B but AA and BB still have the same
   hwi of 0.5 and thus are doing the same amount of IOs.

Fix it by making __propgate_weights() always calculate the inuse of an
active inner iocg based on the ratio of child_inuse_sum to child_active_sum.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Dan Schatzberg <dschatzberg@fb.com>
Fixes: 7caa47151ab2 ("blkcg: implement blk-iocost")
Cc: stable@vger.kernel.org # v5.4+
Link: https://lore.kernel.org/r/YJsxnLZV1MnBcqjj@slm.duckdns.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-iocost.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index e0c4baa01857..c2d6bc88d3f1 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -1069,7 +1069,17 @@ static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse,
 
 	lockdep_assert_held(&ioc->lock);
 
-	inuse = clamp_t(u32, inuse, 1, active);
+	/*
+	 * For an active leaf node, its inuse shouldn't be zero or exceed
+	 * @active. An active internal node's inuse is solely determined by the
+	 * inuse to active ratio of its children regardless of @inuse.
+	 */
+	if (list_empty(&iocg->active_list) && iocg->child_active_sum) {
+		inuse = DIV64_U64_ROUND_UP(active * iocg->child_inuse_sum,
+					   iocg->child_active_sum);
+	} else {
+		inuse = clamp_t(u32, inuse, 1, active);
+	}
 
 	iocg->last_inuse = iocg->inuse;
 	if (save)
@@ -1086,7 +1096,7 @@ static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse,
 		/* update the level sums */
 		parent->child_active_sum += (s32)(active - child->active);
 		parent->child_inuse_sum += (s32)(inuse - child->inuse);
-		/* apply the udpates */
+		/* apply the updates */
 		child->active = active;
 		child->inuse = inuse;
 
-- 
cgit v1.2.3


From ca298241bc229303ff683db7265a2c625a9c00fe Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Tue, 11 May 2021 14:38:47 -0700
Subject: f2fs: avoid swapon failure by giving a warning first

The final solution can be migrating blocks to form a section-aligned file
internally. Meanwhile, let's ask users to do that when preparing the swap
file initially like:
1) create()
2) ioctl(F2FS_IOC_SET_PIN_FILE)
3) fallocate()

Reported-by: kernel test robot <oliver.sang@intel.com>
Fixes: 36e4d95891ed ("f2fs: check if swapfile is section-alligned")
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 33e56ae84e35..41e260680b27 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -3801,6 +3801,7 @@ static int f2fs_is_file_aligned(struct inode *inode)
 	block_t pblock;
 	unsigned long nr_pblocks;
 	unsigned int blocks_per_sec = BLKS_PER_SEC(sbi);
+	unsigned int not_aligned = 0;
 	int ret = 0;
 
 	cur_lblock = 0;
@@ -3833,13 +3834,20 @@ static int f2fs_is_file_aligned(struct inode *inode)
 
 		if ((pblock - main_blkaddr) & (blocks_per_sec - 1) ||
 			nr_pblocks & (blocks_per_sec - 1)) {
-			f2fs_err(sbi, "Swapfile does not align to section");
-			ret = -EINVAL;
-			goto out;
+			if (f2fs_is_pinned_file(inode)) {
+				f2fs_err(sbi, "Swapfile does not align to section");
+				ret = -EINVAL;
+				goto out;
+			}
+			not_aligned++;
 		}
 
 		cur_lblock += nr_pblocks;
 	}
+	if (not_aligned)
+		f2fs_warn(sbi, "Swapfile (%u) is not align to section: \n"
+			"\t1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate()",
+			not_aligned);
 out:
 	return ret;
 }
@@ -3858,6 +3866,7 @@ static int check_swap_activate_fast(struct swap_info_struct *sis,
 	int nr_extents = 0;
 	unsigned long nr_pblocks;
 	unsigned int blocks_per_sec = BLKS_PER_SEC(sbi);
+	unsigned int not_aligned = 0;
 	int ret = 0;
 
 	/*
@@ -3896,9 +3905,12 @@ static int check_swap_activate_fast(struct swap_info_struct *sis,
 
 		if ((pblock - SM_I(sbi)->main_blkaddr) & (blocks_per_sec - 1) ||
 				nr_pblocks & (blocks_per_sec - 1)) {
-			f2fs_err(sbi, "Swapfile does not align to section");
-			ret = -EINVAL;
-			goto out;
+			if (f2fs_is_pinned_file(inode)) {
+				f2fs_err(sbi, "Swapfile does not align to section");
+				ret = -EINVAL;
+				goto out;
+			}
+			not_aligned++;
 		}
 
 		if (cur_lblock + nr_pblocks >= sis->max)
@@ -3927,6 +3939,11 @@ static int check_swap_activate_fast(struct swap_info_struct *sis,
 	sis->max = cur_lblock;
 	sis->pages = cur_lblock - 1;
 	sis->highest_bit = cur_lblock - 1;
+
+	if (not_aligned)
+		f2fs_warn(sbi, "Swapfile (%u) is not align to section: \n"
+			"\t1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate()",
+			not_aligned);
 out:
 	return ret;
 }
-- 
cgit v1.2.3


From 2b17c400aeb44daf041627722581ade527bb3c1d Mon Sep 17 00:00:00 2001
From: Norbert Slusarek <nslusarek@gmx.net>
Date: Wed, 12 May 2021 00:43:54 +0200
Subject: can: isotp: prevent race between isotp_bind() and isotp_setsockopt()

A race condition was found in isotp_setsockopt() which allows to
change socket options after the socket was bound.
For the specific case of SF_BROADCAST support, this might lead to possible
use-after-free because can_rx_unregister() is not called.

Checking for the flag under the socket lock in isotp_bind() and taking
the lock in isotp_setsockopt() fixes the issue.

Fixes: 921ca574cd38 ("can: isotp: add SF_BROADCAST support for functional addressing")
Link: https://lore.kernel.org/r/trinity-e6ae9efa-9afb-4326-84c0-f3609b9b8168-1620773528307@3c-app-gmx-bs06
Reported-by: Norbert Slusarek <nslusarek@gmx.net>
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Signed-off-by: Norbert Slusarek <nslusarek@gmx.net>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/isotp.c | 49 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 33 insertions(+), 16 deletions(-)

diff --git a/net/can/isotp.c b/net/can/isotp.c
index 9f94ad3caee9..253b24417c8e 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -1062,27 +1062,31 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
 	if (len < ISOTP_MIN_NAMELEN)
 		return -EINVAL;
 
+	if (addr->can_addr.tp.tx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG))
+		return -EADDRNOTAVAIL;
+
+	if (!addr->can_ifindex)
+		return -ENODEV;
+
+	lock_sock(sk);
+
 	/* do not register frame reception for functional addressing */
 	if (so->opt.flags & CAN_ISOTP_SF_BROADCAST)
 		do_rx_reg = 0;
 
 	/* do not validate rx address for functional addressing */
 	if (do_rx_reg) {
-		if (addr->can_addr.tp.rx_id == addr->can_addr.tp.tx_id)
-			return -EADDRNOTAVAIL;
+		if (addr->can_addr.tp.rx_id == addr->can_addr.tp.tx_id) {
+			err = -EADDRNOTAVAIL;
+			goto out;
+		}
 
-		if (addr->can_addr.tp.rx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG))
-			return -EADDRNOTAVAIL;
+		if (addr->can_addr.tp.rx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) {
+			err = -EADDRNOTAVAIL;
+			goto out;
+		}
 	}
 
-	if (addr->can_addr.tp.tx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG))
-		return -EADDRNOTAVAIL;
-
-	if (!addr->can_ifindex)
-		return -ENODEV;
-
-	lock_sock(sk);
-
 	if (so->bound && addr->can_ifindex == so->ifindex &&
 	    addr->can_addr.tp.rx_id == so->rxid &&
 	    addr->can_addr.tp.tx_id == so->txid)
@@ -1164,16 +1168,13 @@ static int isotp_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
 	return ISOTP_MIN_NAMELEN;
 }
 
-static int isotp_setsockopt(struct socket *sock, int level, int optname,
+static int isotp_setsockopt_locked(struct socket *sock, int level, int optname,
 			    sockptr_t optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct isotp_sock *so = isotp_sk(sk);
 	int ret = 0;
 
-	if (level != SOL_CAN_ISOTP)
-		return -EINVAL;
-
 	if (so->bound)
 		return -EISCONN;
 
@@ -1248,6 +1249,22 @@ static int isotp_setsockopt(struct socket *sock, int level, int optname,
 	return ret;
 }
 
+static int isotp_setsockopt(struct socket *sock, int level, int optname,
+			    sockptr_t optval, unsigned int optlen)
+
+{
+	struct sock *sk = sock->sk;
+	int ret;
+
+	if (level != SOL_CAN_ISOTP)
+		return -EINVAL;
+
+	lock_sock(sk);
+	ret = isotp_setsockopt_locked(sock, level, optname, optval, optlen);
+	release_sock(sk);
+	return ret;
+}
+
 static int isotp_getsockopt(struct socket *sock, int level, int optname,
 			    char __user *optval, int __user *optlen)
 {
-- 
cgit v1.2.3


From 02dbb7246c5bbbbe1607ebdc546ba5c454a664b1 Mon Sep 17 00:00:00 2001
From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
Date: Tue, 11 May 2021 20:46:09 +0530
Subject: sched/fair: Fix clearing of has_idle_cores flag in select_idle_cpu()

In commit:

  9fe1f127b913 ("sched/fair: Merge select_idle_core/cpu()")

in select_idle_cpu(), we check if an idle core is present in the LLC
of the target CPU via the flag "has_idle_cores". We look for the idle
core in select_idle_cores(). If select_idle_cores() isn't able to find
an idle core/CPU, we need to unset the has_idle_cores flag in the LLC
of the target to prevent other CPUs from going down this route.

However, the current code is unsetting it in the LLC of the current
CPU instead of the target CPU. This patch fixes this issue.

Fixes: 9fe1f127b913 ("sched/fair: Merge select_idle_core/cpu()")
Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Link: https://lore.kernel.org/r/1620746169-13996-1-git-send-email-ego@linux.vnet.ibm.com
---
 kernel/sched/fair.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 20aa234ffe04..3248e24a90b0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6217,7 +6217,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
 	}
 
 	if (has_idle_core)
-		set_idle_cores(this, false);
+		set_idle_cores(target, false);
 
 	if (sched_feat(SIS_PROP) && !has_idle_core) {
 		time = cpu_clock(this) - time;
-- 
cgit v1.2.3


From 1e948b1752b58c9c570989ab29ceef5b38fdccda Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Wed, 12 May 2021 11:17:47 +0800
Subject: gpio: cadence: Add missing MODULE_DEVICE_TABLE

This patch adds missing MODULE_DEVICE_TABLE definition which generates
correct modalias for automatic loading of this driver when it is built
as an external module.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 drivers/gpio/gpio-cadence.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpio/gpio-cadence.c b/drivers/gpio/gpio-cadence.c
index a4d3239d2594..4ab3fcd9b9ba 100644
--- a/drivers/gpio/gpio-cadence.c
+++ b/drivers/gpio/gpio-cadence.c
@@ -278,6 +278,7 @@ static const struct of_device_id cdns_of_ids[] = {
 	{ .compatible = "cdns,gpio-r1p02" },
 	{ /* sentinel */ },
 };
+MODULE_DEVICE_TABLE(of, cdns_of_ids);
 
 static struct platform_driver cdns_gpio_driver = {
 	.driver = {
-- 
cgit v1.2.3


From a0579474effff6a139768b300d8439c2327b3848 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 22:46:30 +0300
Subject: gpio: xilinx: Correct kernel doc for xgpio_probe()

Kernel doc validator complains:

.../gpio-xilinx.c:556: warning: expecting prototype for xgpio_of_probe(). Prototype was for xgpio_probe() instead

Correct as suggested by changing the name of the function in the doc..

Fixes: 749564ffd52d ("gpio/xilinx: Convert the driver to platform device interface")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Neeli Srinivas <sneeli@xilinx.com>
Reviewed-by: Michal Simek <michal.simek@xilinx.com>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 drivers/gpio/gpio-xilinx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-xilinx.c b/drivers/gpio/gpio-xilinx.c
index b411d3156e0b..136557e7dd3c 100644
--- a/drivers/gpio/gpio-xilinx.c
+++ b/drivers/gpio/gpio-xilinx.c
@@ -542,7 +542,7 @@ static void xgpio_irqhandler(struct irq_desc *desc)
 }
 
 /**
- * xgpio_of_probe - Probe method for the GPIO device.
+ * xgpio_probe - Probe method for the GPIO device.
  * @pdev: pointer to the platform device
  *
  * Return:
-- 
cgit v1.2.3


From bdbe871ef0caa660e16461a2a94579d9f9ef7ba4 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Fri, 7 May 2021 11:34:11 +0100
Subject: gpio: tegra186: Don't set parent IRQ affinity

When hotplugging CPUs on Tegra186 and Tegra194 errors such as the
following are seen ...

 IRQ63: set affinity failed(-22).
 IRQ65: set affinity failed(-22).
 IRQ66: set affinity failed(-22).
 IRQ67: set affinity failed(-22).

Looking at the /proc/interrupts the above are all interrupts associated
with GPIOs. The reason why these error messages occur is because there
is no 'parent_data' associated with any of the GPIO interrupts and so
tegra186_irq_set_affinity() simply returns -EINVAL.

To understand why there is no 'parent_data' it is first necessary to
understand that in addition to the GPIO interrupts being routed to the
interrupt controller (GIC), the interrupts for some GPIOs are also
routed to the Tegra Power Management Controller (PMC) to wake up the
system from low power states. In order to configure GPIO events as
wake events in the PMC, the PMC is configured as IRQ parent domain
for the GPIO IRQ domain. Originally the GIC was the IRQ parent domain
of the PMC and although this was working, this started causing issues
once commit 64a267e9a41c ("irqchip/gic: Configure SGIs as standard
interrupts") was added, because technically, the GIC is not a parent
of the PMC. Commit c351ab7bf2a5 ("soc/tegra: pmc: Don't create fake
interrupt hierarchy levels") fixed this by severing the IRQ domain
hierarchy for the Tegra GPIOs and hence, there may be no IRQ parent
domain for the GPIOs.

The GPIO controllers on Tegra186 and Tegra194 have either one or six
interrupt lines to the interrupt controller. For GPIO controllers with
six interrupts, the mapping of the GPIO interrupt to the controller
interrupt is configurable within the GPIO controller. Currently a
default mapping is used, however, it could be possible to use the
set affinity callback for the Tegra186 GPIO driver to do something a
bit more interesting. Currently, because interrupts for all GPIOs are
have the same mapping and any attempts to configure the affinity for
a given GPIO can conflict with another that shares the same IRQ, for
now it is simpler to just remove set affinity support and this avoids
the above warnings being seen.

Cc: <stable@vger.kernel.org>
Fixes: c4e1f7d92cd6 ("gpio: tegra186: Set affinity callback to parent")
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 drivers/gpio/gpio-tegra186.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/drivers/gpio/gpio-tegra186.c b/drivers/gpio/gpio-tegra186.c
index 1bd9e44df718..05974b760796 100644
--- a/drivers/gpio/gpio-tegra186.c
+++ b/drivers/gpio/gpio-tegra186.c
@@ -444,16 +444,6 @@ static int tegra186_irq_set_wake(struct irq_data *data, unsigned int on)
 	return 0;
 }
 
-static int tegra186_irq_set_affinity(struct irq_data *data,
-				     const struct cpumask *dest,
-				     bool force)
-{
-	if (data->parent_data)
-		return irq_chip_set_affinity_parent(data, dest, force);
-
-	return -EINVAL;
-}
-
 static void tegra186_gpio_irq(struct irq_desc *desc)
 {
 	struct tegra_gpio *gpio = irq_desc_get_handler_data(desc);
@@ -700,7 +690,6 @@ static int tegra186_gpio_probe(struct platform_device *pdev)
 	gpio->intc.irq_unmask = tegra186_irq_unmask;
 	gpio->intc.irq_set_type = tegra186_irq_set_type;
 	gpio->intc.irq_set_wake = tegra186_irq_set_wake;
-	gpio->intc.irq_set_affinity = tegra186_irq_set_affinity;
 
 	irq = &gpio->gpio.irq;
 	irq->chip = &gpio->intc;
-- 
cgit v1.2.3


From 7907cad7d07e0055789ec0c534452f19dfe1fc80 Mon Sep 17 00:00:00 2001
From: Chunyan Zhang <chunyan.zhang@unisoc.com>
Date: Wed, 12 May 2021 17:35:34 +0800
Subject: spi: sprd: Add missing MODULE_DEVICE_TABLE

MODULE_DEVICE_TABLE is used to extract the device information out of the
driver and builds a table when being compiled. If using this macro,
kernel can find the driver if available when the device is plugged in,
and then loads that driver and initializes the device.

Signed-off-by: Chunyan Zhang <chunyan.zhang@unisoc.com>
Link: https://lore.kernel.org/r/20210512093534.243040-1-zhang.lyra@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-sprd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/spi/spi-sprd.c b/drivers/spi/spi-sprd.c
index b41a75749b49..28e70db9bbba 100644
--- a/drivers/spi/spi-sprd.c
+++ b/drivers/spi/spi-sprd.c
@@ -1068,6 +1068,7 @@ static const struct of_device_id sprd_spi_of_match[] = {
 	{ .compatible = "sprd,sc9860-spi", },
 	{ /* sentinel */ }
 };
+MODULE_DEVICE_TABLE(of, sprd_spi_of_match);
 
 static struct platform_driver sprd_spi_driver = {
 	.driver = {
-- 
cgit v1.2.3


From 6b69546912a57ff8c31061f98e56383cc0beffd3 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 11 May 2021 17:09:12 +0300
Subject: spi: Assume GPIO CS active high in ACPI case

Currently GPIO CS handling, when descriptors are in use, doesn't
take into consideration that in ACPI case the default polarity
is Active High and can't be altered. Instead we have to use the
per-chip definition provided by SPISerialBus() resource.

Fixes: 766c6b63aa04 ("spi: fix client driver breakages when using GPIO descriptors")
Cc: Liguang Zhang <zhangliguang@linux.alibaba.com>
Cc: Jay Fang <f.fangjian@huawei.com>
Cc: Sven Van Asbroeck <thesven73@gmail.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Xin Hao <xhao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20210511140912.30757-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index a565e7d6bf3b..98048af04abf 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -820,15 +820,29 @@ static void spi_set_cs(struct spi_device *spi, bool enable, bool force)
 
 	if (spi->cs_gpiod || gpio_is_valid(spi->cs_gpio)) {
 		if (!(spi->mode & SPI_NO_CS)) {
-			if (spi->cs_gpiod)
-				/* polarity handled by gpiolib */
-				gpiod_set_value_cansleep(spi->cs_gpiod, activate);
-			else
+			if (spi->cs_gpiod) {
+				/*
+				 * Historically ACPI has no means of the GPIO polarity and
+				 * thus the SPISerialBus() resource defines it on the per-chip
+				 * basis. In order to avoid a chain of negations, the GPIO
+				 * polarity is considered being Active High. Even for the cases
+				 * when _DSD() is involved (in the updated versions of ACPI)
+				 * the GPIO CS polarity must be defined Active High to avoid
+				 * ambiguity. That's why we use enable, that takes SPI_CS_HIGH
+				 * into account.
+				 */
+				if (has_acpi_companion(&spi->dev))
+					gpiod_set_value_cansleep(spi->cs_gpiod, !enable);
+				else
+					/* Polarity handled by GPIO library */
+					gpiod_set_value_cansleep(spi->cs_gpiod, activate);
+			} else {
 				/*
 				 * invert the enable line, as active low is
 				 * default for SPI.
 				 */
 				gpio_set_value_cansleep(spi->cs_gpio, !enable);
+			}
 		}
 		/* Some SPI masters need both GPIO CS & slave_select */
 		if ((spi->controller->flags & SPI_MASTER_GPIO_SS) &&
-- 
cgit v1.2.3


From 2ca4dcc4909d787ee153272f7efc2bff3b498720 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner@ubuntu.com>
Date: Tue, 11 May 2021 16:30:15 +0200
Subject: fs/mount_setattr: tighten permission checks

We currently don't have any filesystems that support idmapped mounts
which are mountable inside a user namespace. That was a deliberate
decision for now as a userns root can just mount the filesystem
themselves. So enforce this restriction explicitly until there's a real
use-case for this. This way we can notice it and will have a chance to
adapt and audit our translation helpers and fstests appropriately if we
need to support such filesystems.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: stable@vger.kernel.org
CC: linux-fsdevel@vger.kernel.org
Suggested-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 fs/namespace.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index f63337828e1c..c3f1a78ba369 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3855,8 +3855,12 @@ static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
 	if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP))
 		return -EINVAL;
 
+	/* Don't yet support filesystem mountable in user namespaces. */
+	if (m->mnt_sb->s_user_ns != &init_user_ns)
+		return -EINVAL;
+
 	/* We're not controlling the superblock. */
-	if (!ns_capable(m->mnt_sb->s_user_ns, CAP_SYS_ADMIN))
+	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
 	/* Mount has already been visible in the filesystem hierarchy. */
-- 
cgit v1.2.3


From 7ea96eefb0097d243af62fc672be9f17b10338b3 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Wed, 12 May 2021 11:43:52 +0200
Subject: block, bfq: avoid circular stable merges

BFQ may merge a new bfq_queue, stably, with the last bfq_queue
created. In particular, BFQ first waits a little bit for some I/O to
flow inside the new queue, say Q2, if this is needed to understand
whether it is better or worse to merge Q2 with the last queue created,
say Q1. This delayed stable merge is performed by assigning
bic->stable_merge_bfqq = Q1, for the bic associated with Q1.

Yet, while waiting for some I/O to flow in Q2, a non-stable queue
merge of Q2 with Q1 may happen, causing the bic previously associated
with Q2 to be associated with exactly Q1 (bic->bfqq = Q1). After that,
Q2 and Q1 may happen to be split, and, in the split, Q1 may happen to
be recycled as a non-shared bfq_queue. In that case, Q1 may then
happen to undergo a stable merge with the bfq_queue pointed by
bic->stable_merge_bfqq. Yet bic->stable_merge_bfqq still points to
Q1. So Q1 would be merged with itself.

This commit fixes this error by intercepting this situation, and
canceling the schedule of the stable merge.

Fixes: 430a67f9d616 ("block, bfq: merge bursts of newly-created queues")
Signed-off-by: Pietro Pedroni <pedroni.pietro.96@gmail.com>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Link: https://lore.kernel.org/r/20210512094352.85545-2-paolo.valente@linaro.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 59b2499d3f8b..acd1f881273e 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -372,9 +372,38 @@ struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync)
 	return bic->bfqq[is_sync];
 }
 
+static void bfq_put_stable_ref(struct bfq_queue *bfqq);
+
 void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync)
 {
+	/*
+	 * If bfqq != NULL, then a non-stable queue merge between
+	 * bic->bfqq and bfqq is happening here. This causes troubles
+	 * in the following case: bic->bfqq has also been scheduled
+	 * for a possible stable merge with bic->stable_merge_bfqq,
+	 * and bic->stable_merge_bfqq == bfqq happens to
+	 * hold. Troubles occur because bfqq may then undergo a split,
+	 * thereby becoming eligible for a stable merge. Yet, if
+	 * bic->stable_merge_bfqq points exactly to bfqq, then bfqq
+	 * would be stably merged with itself. To avoid this anomaly,
+	 * we cancel the stable merge if
+	 * bic->stable_merge_bfqq == bfqq.
+	 */
 	bic->bfqq[is_sync] = bfqq;
+
+	if (bfqq && bic->stable_merge_bfqq == bfqq) {
+		/*
+		 * Actually, these same instructions are executed also
+		 * in bfq_setup_cooperator, in case of abort or actual
+		 * execution of a stable merge. We could avoid
+		 * repeating these instructions there too, but if we
+		 * did so, we would nest even more complexity in this
+		 * function.
+		 */
+		bfq_put_stable_ref(bic->stable_merge_bfqq);
+
+		bic->stable_merge_bfqq = NULL;
+	}
 }
 
 struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic)
@@ -2630,8 +2659,6 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq,
 static bool idling_boosts_thr_without_issues(struct bfq_data *bfqd,
 					     struct bfq_queue *bfqq);
 
-static void bfq_put_stable_ref(struct bfq_queue *bfqq);
-
 /*
  * Attempt to schedule a merge of bfqq with the currently in-service
  * queue or with a close queue among the scheduled queues.  Return
-- 
cgit v1.2.3


From 190515f610946db025cdedebde93958b725fb583 Mon Sep 17 00:00:00 2001
From: Lin Feng <linf@wangsu.com>
Date: Wed, 12 May 2021 18:01:24 +0800
Subject: blkdev.h: remove unused codes blk_account_rq

Last users of blk_account_rq gone with patch commit a1ce35fa49852db
("block: remove dead elevator code") and now it gets no caller, it can
be safely removed.

Signed-off-by: Lin Feng <linf@wangsu.com>
Link: https://lore.kernel.org/r/20210512100124.173769-1-linf@wangsu.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b91ba6207365..26c3e368656f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -677,11 +677,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 extern void blk_set_pm_only(struct request_queue *q);
 extern void blk_clear_pm_only(struct request_queue *q);
 
-static inline bool blk_account_rq(struct request *rq)
-{
-	return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq);
-}
-
 #define list_entry_rq(ptr)	list_entry((ptr), struct request, queuelist)
 
 #define rq_data_dir(rq)		(op_is_write(req_op(rq)) ? WRITE : READ)
-- 
cgit v1.2.3


From 2404b8747019184002823dba7d2f0ecf89d802b7 Mon Sep 17 00:00:00 2001
From: Sumeet Pawnikar <sumeet.r.pawnikar@intel.com>
Date: Tue, 11 May 2021 23:31:42 +0530
Subject: ACPI: PM: Add ACPI ID of Alder Lake Fan

Add a new unique fan ACPI device ID for Alder Lake to
support it in acpi_dev_pm_attach() function.

Fixes: 38748bcb940e ("ACPI: DPTF: Support Alder Lake")
Signed-off-by: Sumeet Pawnikar <sumeet.r.pawnikar@intel.com>
Acked-by: Zhang Rui <rui.zhang@intel.com>
Cc: 5.10+ <stable@vger.kernel.org> # 5.10+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/device_pm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 096153761ebc..58876248b192 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -1310,6 +1310,7 @@ int acpi_dev_pm_attach(struct device *dev, bool power_on)
 		{"PNP0C0B", }, /* Generic ACPI fan */
 		{"INT3404", }, /* Fan */
 		{"INTC1044", }, /* Fan for Tiger Lake generation */
+		{"INTC1048", }, /* Fan for Alder Lake generation */
 		{}
 	};
 	struct acpi_device *adev = ACPI_COMPANION(dev);
-- 
cgit v1.2.3


From f395183f9544ba2f56b25938d6ea7042bd873521 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 12 May 2021 07:38:00 -0700
Subject: f2fs: return EINVAL for hole cases in swap file

This tries to fix xfstests/generic/495.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 41e260680b27..009a09fb9d88 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -3896,7 +3896,7 @@ static int check_swap_activate_fast(struct swap_info_struct *sis,
 		/* hole */
 		if (!(map.m_flags & F2FS_MAP_FLAGS)) {
 			f2fs_err(sbi, "Swapfile has holes\n");
-			ret = -ENOENT;
+			ret = -EINVAL;
 			goto out;
 		}
 
@@ -4052,7 +4052,7 @@ out:
 	return ret;
 bad_bmap:
 	f2fs_err(sbi, "Swapfile has holes\n");
-	return -ENOENT;
+	return -EINVAL;
 }
 
 static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
-- 
cgit v1.2.3


From 79ebe9110fa458d58f1fceb078e2068d7ad37390 Mon Sep 17 00:00:00 2001
From: Sun Ke <sunke32@huawei.com>
Date: Wed, 12 May 2021 19:43:30 +0800
Subject: nbd: Fix NULL pointer in flush_workqueue

Open /dev/nbdX first, the config_refs will be 1 and
the pointers in nbd_device are still null. Disconnect
/dev/nbdX, then reference a null recv_workq. The
protection by config_refs in nbd_genl_disconnect is useless.

[  656.366194] BUG: kernel NULL pointer dereference, address: 0000000000000020
[  656.368943] #PF: supervisor write access in kernel mode
[  656.369844] #PF: error_code(0x0002) - not-present page
[  656.370717] PGD 10cc87067 P4D 10cc87067 PUD 1074b4067 PMD 0
[  656.371693] Oops: 0002 [#1] SMP
[  656.372242] CPU: 5 PID: 7977 Comm: nbd-client Not tainted 5.11.0-rc5-00040-g76c057c84d28 #1
[  656.373661] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc64le-16.ppc.fedoraproject.org-3.fc31 04/01/2014
[  656.375904] RIP: 0010:mutex_lock+0x29/0x60
[  656.376627] Code: 00 0f 1f 44 00 00 55 48 89 fd 48 83 05 6f d7 fe 08 01 e8 7a c3 ff ff 48 83 05 6a d7 fe 08 01 31 c0 65 48 8b 14 25 00 6d 01 00 <f0> 48 0f b1 55 d
[  656.378934] RSP: 0018:ffffc900005eb9b0 EFLAGS: 00010246
[  656.379350] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
[  656.379915] RDX: ffff888104cf2600 RSI: ffffffffaae8f452 RDI: 0000000000000020
[  656.380473] RBP: 0000000000000020 R08: 0000000000000000 R09: ffff88813bd6b318
[  656.381039] R10: 00000000000000c7 R11: fefefefefefefeff R12: ffff888102710b40
[  656.381599] R13: ffffc900005eb9e0 R14: ffffffffb2930680 R15: ffff88810770ef00
[  656.382166] FS:  00007fdf117ebb40(0000) GS:ffff88813bd40000(0000) knlGS:0000000000000000
[  656.382806] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  656.383261] CR2: 0000000000000020 CR3: 0000000100c84000 CR4: 00000000000006e0
[  656.383819] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  656.384370] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  656.384927] Call Trace:
[  656.385111]  flush_workqueue+0x92/0x6c0
[  656.385395]  nbd_disconnect_and_put+0x81/0xd0
[  656.385716]  nbd_genl_disconnect+0x125/0x2a0
[  656.386034]  genl_family_rcv_msg_doit.isra.0+0x102/0x1b0
[  656.386422]  genl_rcv_msg+0xfc/0x2b0
[  656.386685]  ? nbd_ioctl+0x490/0x490
[  656.386954]  ? genl_family_rcv_msg_doit.isra.0+0x1b0/0x1b0
[  656.387354]  netlink_rcv_skb+0x62/0x180
[  656.387638]  genl_rcv+0x34/0x60
[  656.387874]  netlink_unicast+0x26d/0x590
[  656.388162]  netlink_sendmsg+0x398/0x6c0
[  656.388451]  ? netlink_rcv_skb+0x180/0x180
[  656.388750]  ____sys_sendmsg+0x1da/0x320
[  656.389038]  ? ____sys_recvmsg+0x130/0x220
[  656.389334]  ___sys_sendmsg+0x8e/0xf0
[  656.389605]  ? ___sys_recvmsg+0xa2/0xf0
[  656.389889]  ? handle_mm_fault+0x1671/0x21d0
[  656.390201]  __sys_sendmsg+0x6d/0xe0
[  656.390464]  __x64_sys_sendmsg+0x23/0x30
[  656.390751]  do_syscall_64+0x45/0x70
[  656.391017]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

To fix it, just add if (nbd->recv_workq) to nbd_disconnect_and_put().

Fixes: e9e006f5fcf2 ("nbd: fix max number of supported devs")
Signed-off-by: Sun Ke <sunke32@huawei.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Link: https://lore.kernel.org/r/20210512114331.1233964-2-sunke32@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/nbd.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 4ff71b579cfc..974da561b8e5 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1980,7 +1980,8 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd)
 	 * config ref and try to destroy the workqueue from inside the work
 	 * queue.
 	 */
-	flush_workqueue(nbd->recv_workq);
+	if (nbd->recv_workq)
+		flush_workqueue(nbd->recv_workq);
 	if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
 			       &nbd->config->runtime_flags))
 		nbd_config_put(nbd);
-- 
cgit v1.2.3


From bedf78c4cbbbb65e42ede5ca2bd21887ef5b7060 Mon Sep 17 00:00:00 2001
From: Sun Ke <sunke32@huawei.com>
Date: Wed, 12 May 2021 19:43:31 +0800
Subject: nbd: share nbd_put and return by goto put_nbd
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the following two statements by the statement “goto put_nbd;”

	nbd_put(nbd);
	return 0;

Signed-off-by: Sun Ke <sunke32@huawei.com>
Suggested-by: Markus Elfring <Markus.Elfring@web.de>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Link: https://lore.kernel.org/r/20210512114331.1233964-3-sunke32@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/nbd.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 974da561b8e5..45d2c28c8fc8 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -2015,12 +2015,11 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
 		return -EINVAL;
 	}
 	mutex_unlock(&nbd_index_mutex);
-	if (!refcount_inc_not_zero(&nbd->config_refs)) {
-		nbd_put(nbd);
-		return 0;
-	}
+	if (!refcount_inc_not_zero(&nbd->config_refs))
+		goto put_nbd;
 	nbd_disconnect_and_put(nbd);
 	nbd_config_put(nbd);
+put_nbd:
 	nbd_put(nbd);
 	return 0;
 }
-- 
cgit v1.2.3


From dbb5afad100a828c97e012c6106566d99f041db6 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 12 May 2021 15:33:08 +0200
Subject: ptrace: make ptrace() fail if the tracee changed its pid unexpectedly

Suppose we have 2 threads, the group-leader L and a sub-theread T,
both parked in ptrace_stop(). Debugger tries to resume both threads
and does

	ptrace(PTRACE_CONT, T);
	ptrace(PTRACE_CONT, L);

If the sub-thread T execs in between, the 2nd PTRACE_CONT doesn not
resume the old leader L, it resumes the post-exec thread T which was
actually now stopped in PTHREAD_EVENT_EXEC. In this case the
PTHREAD_EVENT_EXEC event is lost, and the tracer can't know that the
tracee changed its pid.

This patch makes ptrace() fail in this case until debugger does wait()
and consumes PTHREAD_EVENT_EXEC which reports old_pid. This affects all
ptrace requests except the "asynchronous" PTRACE_INTERRUPT/KILL.

The patch doesn't add the new PTRACE_ option to not complicate the API,
and I _hope_ this won't cause any noticeable regression:

	- If debugger uses PTRACE_O_TRACEEXEC and the thread did an exec
	  and the tracer does a ptrace request without having consumed
	  the exec event, it's 100% sure that the thread the ptracer
	  thinks it is targeting does not exist anymore, or isn't the
	  same as the one it thinks it is targeting.

	- To some degree this patch adds nothing new. In the scenario
	  above ptrace(L) can fail with -ESRCH if it is called after the
	  execing sub-thread wakes the leader up and before it "steals"
	  the leader's pid.

Test-case:

	#include <stdio.h>
	#include <unistd.h>
	#include <signal.h>
	#include <sys/ptrace.h>
	#include <sys/wait.h>
	#include <errno.h>
	#include <pthread.h>
	#include <assert.h>

	void *tf(void *arg)
	{
		execve("/usr/bin/true", NULL, NULL);
		assert(0);

		return NULL;
	}

	int main(void)
	{
		int leader = fork();
		if (!leader) {
			kill(getpid(), SIGSTOP);

			pthread_t th;
			pthread_create(&th, NULL, tf, NULL);
			for (;;)
				pause();

			return 0;
		}

		waitpid(leader, NULL, WSTOPPED);

		ptrace(PTRACE_SEIZE, leader, 0,
				PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXEC);
		waitpid(leader, NULL, 0);

		ptrace(PTRACE_CONT, leader, 0,0);
		waitpid(leader, NULL, 0);

		int status, thread = waitpid(-1, &status, 0);
		assert(thread > 0 && thread != leader);
		assert(status == 0x80137f);

		ptrace(PTRACE_CONT, thread, 0,0);
		/*
		 * waitid() because waitpid(leader, &status, WNOWAIT) does not
		 * report status. Why ????
		 *
		 * Why WEXITED? because we have another kernel problem connected
		 * to mt-exec.
		 */
		siginfo_t info;
		assert(waitid(P_PID, leader, &info, WSTOPPED|WEXITED|WNOWAIT) == 0);
		assert(info.si_pid == leader && info.si_status == 0x0405);

		/* OK, it sleeps in ptrace(PTRACE_EVENT_EXEC == 0x04) */
		assert(ptrace(PTRACE_CONT, leader, 0,0) == -1);
		assert(errno == ESRCH);

		assert(leader == waitpid(leader, &status, WNOHANG));
		assert(status == 0x04057f);

		assert(ptrace(PTRACE_CONT, leader, 0,0) == 0);

		return 0;
	}

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reported-by: Simon Marchi <simon.marchi@efficios.com>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: Pedro Alves <palves@redhat.com>
Acked-by: Simon Marchi <simon.marchi@efficios.com>
Acked-by: Jan Kratochvil <jan.kratochvil@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/ptrace.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 76f09456ec4b..2997ca600d18 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -170,6 +170,21 @@ void __ptrace_unlink(struct task_struct *child)
 	spin_unlock(&child->sighand->siglock);
 }
 
+static bool looks_like_a_spurious_pid(struct task_struct *task)
+{
+	if (task->exit_code != ((PTRACE_EVENT_EXEC << 8) | SIGTRAP))
+		return false;
+
+	if (task_pid_vnr(task) == task->ptrace_message)
+		return false;
+	/*
+	 * The tracee changed its pid but the PTRACE_EVENT_EXEC event
+	 * was not wait()'ed, most probably debugger targets the old
+	 * leader which was destroyed in de_thread().
+	 */
+	return true;
+}
+
 /* Ensure that nothing can wake it up, even SIGKILL */
 static bool ptrace_freeze_traced(struct task_struct *task)
 {
@@ -180,7 +195,8 @@ static bool ptrace_freeze_traced(struct task_struct *task)
 		return ret;
 
 	spin_lock_irq(&task->sighand->siglock);
-	if (task_is_traced(task) && !__fatal_signal_pending(task)) {
+	if (task_is_traced(task) && !looks_like_a_spurious_pid(task) &&
+	    !__fatal_signal_pending(task)) {
 		task->state = __TASK_TRACED;
 		ret = true;
 	}
-- 
cgit v1.2.3


From 85428beac80dbcace5b146b218697c73e367dcf5 Mon Sep 17 00:00:00 2001
From: Daniel Wagner <dwagner@suse.de>
Date: Wed, 12 May 2021 16:50:05 +0200
Subject: nvmet: seset ns->file when open fails

Reset the ns->file value to NULL also in the error case in
nvmet_file_ns_enable().

The ns->file variable points either to file object or contains the
error code after the filp_open() call. This can lead to following
problem:

When the user first setups an invalid file backend and tries to enable
the ns, it will fail. Then the user switches over to a bdev backend
and enables successfully the ns. The first received I/O will crash the
system because the IO backend is chosen based on the ns->file value:

static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
{
	[...]

	if (req->ns->file)
		return nvmet_file_parse_io_cmd(req);

	return nvmet_bdev_parse_io_cmd(req);
}

Reported-by: Enzo Matsumiya <ematsumiya@suse.com>
Signed-off-by: Daniel Wagner <dwagner@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/io-cmd-file.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index 715d4376c997..7fdbdc496597 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -49,9 +49,11 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns)
 
 	ns->file = filp_open(ns->device_path, flags, 0);
 	if (IS_ERR(ns->file)) {
-		pr_err("failed to open file %s: (%ld)\n",
-				ns->device_path, PTR_ERR(ns->file));
-		return PTR_ERR(ns->file);
+		ret = PTR_ERR(ns->file);
+		pr_err("failed to open file %s: (%d)\n",
+			ns->device_path, ret);
+		ns->file = NULL;
+		return ret;
 	}
 
 	ret = nvmet_file_ns_revalidate(ns);
-- 
cgit v1.2.3


From 4819d16d91145966ce03818a95169df1fd56b299 Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date: Wed, 21 Apr 2021 18:33:58 +0300
Subject: drm/i915: Avoid div-by-zero on gen2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Gen2 tiles are 2KiB in size so i915_gem_object_get_tile_row_size()
can in fact return <4KiB, which leads to div-by-zero here.
Avoid that.

Not sure i915_gem_object_get_tile_row_size() is entirely
sane anyway since it doesn't account for the different tile
layouts on i8xx/i915...

I'm not able to hit this before commit 6846895fde05 ("drm/i915:
Replace PIN_NONFAULT with calls to PIN_NOEVICT") and it looks
like I also need to run recent version of Mesa. With those in
place xonotic trips on this quite easily on my 85x.

Cc: stable@vger.kernel.org
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210421153401.13847-2-ville.syrjala@linux.intel.com
(cherry picked from commit ed52c62d386f764194e0184fdb905d5f24194cae)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 23f6b00e08e2..f6fe5cb01438 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -189,7 +189,7 @@ compute_partial_view(const struct drm_i915_gem_object *obj,
 	struct i915_ggtt_view view;
 
 	if (i915_gem_object_is_tiled(obj))
-		chunk = roundup(chunk, tile_row_pages(obj));
+		chunk = roundup(chunk, tile_row_pages(obj) ?: 1);
 
 	view.type = I915_GGTT_VIEW_PARTIAL;
 	view.partial.offset = rounddown(page_offset, chunk);
-- 
cgit v1.2.3


From 04d019961fd15de92874575536310243a0d4c5c5 Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date: Wed, 21 Apr 2021 18:33:59 +0300
Subject: drm/i915: Read C0DRB3/C1DRB3 as 16 bits again
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We've defined C0DRB3/C1DRB3 as 16 bit registers, so access them
as such.

Fixes: 1c8242c3a4b2 ("drm/i915: Use unchecked writes for setting up the fences")
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210421153401.13847-3-ville.syrjala@linux.intel.com
(cherry picked from commit f765a5b48c667bdada5e49d5e0f23f8c0687b21b)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
index e72b7a0dc316..8a322594210c 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
@@ -653,8 +653,8 @@ static void detect_bit_6_swizzle(struct i915_ggtt *ggtt)
 		 * banks of memory are paired and unswizzled on the
 		 * uneven portion, so leave that as unknown.
 		 */
-		if (intel_uncore_read(uncore, C0DRB3) ==
-		    intel_uncore_read(uncore, C1DRB3)) {
+		if (intel_uncore_read16(uncore, C0DRB3) ==
+		    intel_uncore_read16(uncore, C1DRB3)) {
 			swizzle_x = I915_BIT_6_SWIZZLE_9_10;
 			swizzle_y = I915_BIT_6_SWIZZLE_9;
 		}
-- 
cgit v1.2.3


From ea995218dddba171fecd05496c69617c5ef3c5b8 Mon Sep 17 00:00:00 2001
From: Lv Yunlong <lyl2019@mail.ustc.edu.cn>
Date: Mon, 26 Apr 2021 05:43:40 -0700
Subject: drm/i915/gt: Fix a double free in gen8_preallocate_top_level_pdp

Our code analyzer reported a double free bug.

In gen8_preallocate_top_level_pdp, pde and pde->pt.base are allocated
via alloc_pd(vm) with one reference. If pin_pt_dma() failed, pde->pt.base
is freed by i915_gem_object_put() with a reference dropped. Then free_pd
calls free_px() defined in intel_ppgtt.c, which calls i915_gem_object_put()
to put pde->pt.base again.

As pde->pt.base is protected by refcount, so the second put will not free
pde->pt.base actually. But, maybe it is better to remove the first put?

Fixes: 82adf901138cc ("drm/i915/gt: Shrink i915_page_directory's slab bucket")
Signed-off-by: Lv Yunlong <lyl2019@mail.ustc.edu.cn>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210426124340.4238-1-lyl2019@mail.ustc.edu.cn
(cherry picked from commit ac69496fe65cca0611d5917b7d232730ff605bc7)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 176c19633412..74bf6fc8461f 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -641,7 +641,6 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
 
 		err = pin_pt_dma(vm, pde->pt.base);
 		if (err) {
-			i915_gem_object_put(pde->pt.base);
 			free_pd(vm, pde);
 			return err;
 		}
-- 
cgit v1.2.3


From 402be8a101190969fc7ff122d07e262df86e132b Mon Sep 17 00:00:00 2001
From: Stéphane Marchesin <marcheu@chromium.org>
Date: Thu, 29 Apr 2021 03:10:21 +0000
Subject: drm/i915: Fix crash in auto_retire
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The retire logic uses the 2 lower bits of the pointer to the retire
function to store flags. However, the auto_retire function is not
guaranteed to be aligned to a multiple of 4, which causes crashes as
we jump to the wrong address, for example like this:

2021-04-24T18:03:53.804300Z WARNING kernel: [  516.876901] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
2021-04-24T18:03:53.804310Z WARNING kernel: [  516.876906] CPU: 7 PID: 146 Comm: kworker/u16:6 Tainted: G     U            5.4.105-13595-g3cd84167b2df #1
2021-04-24T18:03:53.804311Z WARNING kernel: [  516.876907] Hardware name: Google Volteer2/Volteer2, BIOS Google_Volteer2.13672.76.0 02/22/2021
2021-04-24T18:03:53.804312Z WARNING kernel: [  516.876911] Workqueue: events_unbound active_work
2021-04-24T18:03:53.804313Z WARNING kernel: [  516.876914] RIP: 0010:auto_retire+0x1/0x20
2021-04-24T18:03:53.804314Z WARNING kernel: [  516.876916] Code: e8 01 f2 ff ff eb 02 31 db 48 89 d8 5b 5d c3 0f 1f 44 00 00 55 48 89 e5 f0 ff 87 c8 00 00 00 0f 88 ab 47 4a 00 31 c0 5d c3 0f <1f> 44 00 00 55 48 89 e5 f0 ff 8f c8 00 00 00 0f 88 9a 47 4a 00 74
2021-04-24T18:03:53.804319Z WARNING kernel: [  516.876918] RSP: 0018:ffff9b4d809fbe38 EFLAGS: 00010286
2021-04-24T18:03:53.804320Z WARNING kernel: [  516.876919] RAX: 0000000000000007 RBX: ffff927915079600 RCX: 0000000000000007
2021-04-24T18:03:53.804320Z WARNING kernel: [  516.876921] RDX: ffff9b4d809fbe40 RSI: 0000000000000286 RDI: ffff927915079600
2021-04-24T18:03:53.804321Z WARNING kernel: [  516.876922] RBP: ffff9b4d809fbe68 R08: 8080808080808080 R09: fefefefefefefeff
2021-04-24T18:03:53.804321Z WARNING kernel: [  516.876924] R10: 0000000000000010 R11: ffffffff92e44bd8 R12: ffff9279150796a0
2021-04-24T18:03:53.804322Z WARNING kernel: [  516.876925] R13: ffff92791c368180 R14: ffff927915079640 R15: 000000001c867605
2021-04-24T18:03:53.804323Z WARNING kernel: [  516.876926] FS:  0000000000000000(0000) GS:ffff92791ffc0000(0000) knlGS:0000000000000000
2021-04-24T18:03:53.804323Z WARNING kernel: [  516.876928] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
2021-04-24T18:03:53.804324Z WARNING kernel: [  516.876929] CR2: 0000239514955000 CR3: 00000007f82da001 CR4: 0000000000760ee0
2021-04-24T18:03:53.804325Z WARNING kernel: [  516.876930] PKRU: 55555554
2021-04-24T18:03:53.804325Z WARNING kernel: [  516.876931] Call Trace:
2021-04-24T18:03:53.804326Z WARNING kernel: [  516.876935]  __active_retire+0x77/0xcf
2021-04-24T18:03:53.804326Z WARNING kernel: [  516.876939]  process_one_work+0x1da/0x394
2021-04-24T18:03:53.804327Z WARNING kernel: [  516.876941]  worker_thread+0x216/0x375
2021-04-24T18:03:53.804327Z WARNING kernel: [  516.876944]  kthread+0x147/0x156
2021-04-24T18:03:53.804335Z WARNING kernel: [  516.876946]  ? pr_cont_work+0x58/0x58
2021-04-24T18:03:53.804335Z WARNING kernel: [  516.876948]  ? kthread_blkcg+0x2e/0x2e
2021-04-24T18:03:53.804336Z WARNING kernel: [  516.876950]  ret_from_fork+0x1f/0x40
2021-04-24T18:03:53.804336Z WARNING kernel: [  516.876952] Modules linked in: cdc_mbim cdc_ncm cdc_wdm xt_cgroup rfcomm cmac algif_hash algif_skcipher af_alg xt_MASQUERADE uinput snd_soc_rt5682_sdw snd_soc_rt5682 snd_soc_max98373_sdw snd_soc_max98373 snd_soc_rl6231 regmap_sdw snd_soc_sof_sdw snd_soc_hdac_hdmi snd_soc_dmic snd_hda_codec_hdmi snd_sof_pci snd_sof_intel_hda_common intel_ipu6_psys snd_sof_xtensa_dsp soundwire_intel soundwire_generic_allocation soundwire_cadence snd_sof_intel_hda snd_sof snd_soc_hdac_hda snd_soc_acpi_intel_match snd_soc_acpi snd_hda_ext_core soundwire_bus snd_hda_intel snd_intel_dspcfg snd_hda_codec snd_hwdep snd_hda_core intel_ipu6_isys videobuf2_dma_contig videobuf2_v4l2 videobuf2_common videobuf2_memops mei_hdcp intel_ipu6 ov2740 ov8856 at24 sx9310 dw9768 v4l2_fwnode cros_ec_typec intel_pmc_mux roles acpi_als typec fuse iio_trig_sysfs cros_ec_light_prox cros_ec_lid_angle cros_ec_sensors cros_ec_sensors_core industrialio_triggered_buffer cros_ec_sensors_ring kfifo_buf industrialio cros_ec_sensorhub
2021-04-24T18:03:53.804337Z WARNING kernel: [  516.876972]  cdc_ether usbnet iwlmvm lzo_rle lzo_compress iwl7000_mac80211 iwlwifi zram cfg80211 r8152 mii btusb btrtl btintel btbcm bluetooth ecdh_generic ecc joydev
2021-04-24T18:03:53.804337Z EMERG kernel: [  516.879169] gsmi: Log Shutdown Reason 0x03

This change fixes this by aligning the function.

Signed-off-by: Stéphane Marchesin <marcheu@chromium.org>
Fixes: 229007e02d69 ("drm/i915: Wrap i915_active in a simple kreffed struct")
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210429031021.1218091-1-marcheu@chromium.org
(cherry picked from commit ca419f407b43cc89942ebc297c7a63d94abbcae4)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_active.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index cf9a3d384971..aa573b078ae7 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -1156,7 +1156,8 @@ static int auto_active(struct i915_active *ref)
 	return 0;
 }
 
-static void auto_retire(struct i915_active *ref)
+__i915_active_call static void
+auto_retire(struct i915_active *ref)
 {
 	i915_active_put(ref);
 }
-- 
cgit v1.2.3


From a915fe5e9601c632417ef5261af70788d7d23a8a Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Thu, 29 Apr 2021 09:35:29 +0100
Subject: drm/i915/overlay: Fix active retire callback alignment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

__i915_active_call annotation is required on the retire callback to ensure
correct function alignment.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Fixes: a21ce8ad12d2 ("drm/i915/overlay: Switch to using i915_active tracking")
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210429083530.849546-1-tvrtko.ursulin@linux.intel.com
(cherry picked from commit d8e44e4dd221ee283ea60a6fb87bca08807aa0ab)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_overlay.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index e5dadde422f7..bbaf05515e88 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -383,7 +383,7 @@ static void intel_overlay_off_tail(struct intel_overlay *overlay)
 		i830_overlay_clock_gating(dev_priv, true);
 }
 
-static void
+__i915_active_call static void
 intel_overlay_last_flip_retire(struct i915_active *active)
 {
 	struct intel_overlay *overlay =
-- 
cgit v1.2.3


From e4527420ed087f99c6aa2ac22c6d3458c7dc1a94 Mon Sep 17 00:00:00 2001
From: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Date: Tue, 11 May 2021 17:39:30 +0530
Subject: drm/i915: Use correct downstream caps for check Src-Ctl mode for PCON

Fix the typo in DPCD caps used for checking SRC CTL mode of
HDMI2.1 PCON

v2: Corrected Fixes tag (Jani Nikula).
v3: Rebased.

Fixes: 04b6603d13be ("drm/i915/display: Configure HDMI2.1 Pcon for FRL only if Src-Ctl mode is available")

Cc: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Cc: Uma Shankar <uma.shankar@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: "Ville Syrj_l_" <ville.syrjala@linux.intel.com>
Cc: Imre Deak <imre.deak@intel.com>
Cc: Manasi Navare <manasi.d.navare@intel.com>
Cc: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Sean Paul <seanpaul@chromium.org>

Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Reviewed-by: Swati Sharma <swati2.sharma@intel.com>
Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210511120930.12218-1-ankit.k.nautiyal@intel.com
(cherry picked from commit 88a9c5485c48ab60c89612a17fc89f4162bbdb9d)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 1e026177ed1b..642c60f3d9b1 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -2111,7 +2111,7 @@ void intel_dp_check_frl_training(struct intel_dp *intel_dp)
 	 * -PCON supports SRC_CTL_MODE (VESA DP2.0-HDMI2.1 PCON Spec Draft-1 Sec-7)
 	 * -sink is HDMI2.1
 	 */
-	if (!(intel_dp->dpcd[2] & DP_PCON_SOURCE_CTL_MODE) ||
+	if (!(intel_dp->downstream_ports[2] & DP_PCON_SOURCE_CTL_MODE) ||
 	    !intel_dp_is_hdmi_2_1_sink(intel_dp) ||
 	    intel_dp->frl.is_trained)
 		return;
-- 
cgit v1.2.3


From 46c7405df7de8deb97229eacebcee96d61415f3f Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Wed, 12 May 2021 19:42:10 +0200
Subject: objtool: Fix elf_create_undef_symbol() endianness

Currently x86 cross-compilation fails on big endian system with:

  x86_64-cross-ld: init/main.o: invalid string offset 488112128 >= 6229 for section `.strtab'

Mark new ELF data in elf_create_undef_symbol() as symbol, so that libelf
does endianness handling correctly.

Fixes: 2f2f7e47f052 ("objtool: Add elf_create_undef_symbol()")
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: https://lore.kernel.org/r/patch-1.thread-6c9df9.git-d39264656387.your-ad-here.call-01620841104-ext-2554@work.hours
---
 tools/objtool/elf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index d08f5f3670f8..743c2e9d0f56 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -762,6 +762,7 @@ struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
 	data->d_buf = &sym->sym;
 	data->d_size = sizeof(sym->sym);
 	data->d_align = 1;
+	data->d_type = ELF_T_SYM;
 
 	sym->idx = symtab->len / sizeof(sym->sym);
 
-- 
cgit v1.2.3


From f66c05d6baf36069c01a02f869bebb75586f2318 Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Wed, 12 May 2021 19:42:13 +0200
Subject: objtool/x86: Fix elf_add_alternative() endianness

Currently x86 kernel cross-compiled on big endian system fails at boot with:

  kernel BUG at arch/x86/kernel/alternative.c:258!

Corresponding bug condition look like the following:

  BUG_ON(feature >= (NCAPINTS + NBUGINTS) * 32);

Fix that by converting alternative feature/cpuid to target endianness.

Fixes: 9bc0bb50727c ("objtool/x86: Rewrite retpoline thunk calls")
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: https://lore.kernel.org/r/patch-2.thread-6c9df9.git-6c9df9a8098d.your-ad-here.call-01620841104-ext-2554@work.hours
---
 tools/objtool/arch/x86/decode.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index cedf3ede7545..24295d39713b 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -19,6 +19,7 @@
 #include <objtool/elf.h>
 #include <objtool/arch.h>
 #include <objtool/warn.h>
+#include <objtool/endianness.h>
 #include <arch/elf.h>
 
 static int is_x86_64(const struct elf *elf)
@@ -725,7 +726,7 @@ static int elf_add_alternative(struct elf *elf,
 		return -1;
 	}
 
-	alt->cpuid = cpuid;
+	alt->cpuid = bswap_if_needed(cpuid);
 	alt->instrlen = orig_len;
 	alt->replacementlen = repl_len;
 
-- 
cgit v1.2.3


From 83a775d5f9bfda95b1c295f95a3a041a40c7f321 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 30 Apr 2021 12:37:24 +0100
Subject: KEYS: trusted: Fix memory leak on object td

Two error return paths are neglecting to free allocated object td,
causing a memory leak. Fix this by returning via the error return
path that securely kfree's td.

Fixes clang scan-build warning:
security/keys/trusted-keys/trusted_tpm1.c:496:10: warning: Potential
memory leak [unix.Malloc]

Cc: stable@vger.kernel.org
Fixes: 5df16caada3f ("KEYS: trusted: Fix incorrect handling of tpm_get_random()")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 security/keys/trusted-keys/trusted_tpm1.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/security/keys/trusted-keys/trusted_tpm1.c b/security/keys/trusted-keys/trusted_tpm1.c
index 469394550801..aa108bea6739 100644
--- a/security/keys/trusted-keys/trusted_tpm1.c
+++ b/security/keys/trusted-keys/trusted_tpm1.c
@@ -493,10 +493,12 @@ static int tpm_seal(struct tpm_buf *tb, uint16_t keytype,
 
 	ret = tpm_get_random(chip, td->nonceodd, TPM_NONCE_SIZE);
 	if (ret < 0)
-		return ret;
+		goto out;
 
-	if (ret != TPM_NONCE_SIZE)
-		return -EIO;
+	if (ret != TPM_NONCE_SIZE) {
+		ret = -EIO;
+		goto out;
+	}
 
 	ordinal = htonl(TPM_ORD_SEAL);
 	datsize = htonl(datalen);
-- 
cgit v1.2.3


From b3ad7855b7ae3bed4242894d07bdb7f186652dbe Mon Sep 17 00:00:00 2001
From: Ben Boeckel <mathstuf@gmail.com>
Date: Thu, 29 Apr 2021 15:21:56 -0400
Subject: trusted-keys: match tpm_get_ops on all return paths

The `tpm_get_ops` call at the beginning of the function is not paired
with a `tpm_put_ops` on this return path.

Cc: stable@vger.kernel.org
Fixes: f2219745250f ("security: keys: trusted: use ASN.1 TPM2 key format for the blobs")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Ben Boeckel <mathstuf@gmail.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 security/keys/trusted-keys/trusted_tpm2.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/security/keys/trusted-keys/trusted_tpm2.c b/security/keys/trusted-keys/trusted_tpm2.c
index 617fabd4d913..0165da386289 100644
--- a/security/keys/trusted-keys/trusted_tpm2.c
+++ b/security/keys/trusted-keys/trusted_tpm2.c
@@ -336,9 +336,9 @@ out:
 			rc = -EPERM;
 	}
 	if (blob_len < 0)
-		return blob_len;
-
-	payload->blob_len = blob_len;
+		rc = blob_len;
+	else
+		payload->blob_len = blob_len;
 
 	tpm_put_ops(chip);
 	return rc;
-- 
cgit v1.2.3


From e630af7dfb450d1c00c30077314acf33032ff9e4 Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko@kernel.org>
Date: Mon, 10 May 2021 15:28:30 +0300
Subject: tpm, tpm_tis: Extend locality handling to TPM2 in
 tpm_tis_gen_interrupt()

The earlier fix (linked) only partially fixed the locality handling bug
in tpm_tis_gen_interrupt(), i.e. only for TPM 1.x.

Extend the locality handling to cover TPM2.

Cc: Hans de Goede <hdegoede@redhat.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/linux-integrity/20210220125534.20707-1-jarkko@kernel.org/
Fixes: a3fbfae82b4c ("tpm: take TPM chip power gating out of tpm_transmit()")
Reported-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
Tested-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
---
 drivers/char/tpm/tpm_tis_core.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index a2e0395cbe61..6fa150a3b75e 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -709,16 +709,14 @@ static int tpm_tis_gen_interrupt(struct tpm_chip *chip)
 	cap_t cap;
 	int ret;
 
-	/* TPM 2.0 */
-	if (chip->flags & TPM_CHIP_FLAG_TPM2)
-		return tpm2_get_tpm_pt(chip, 0x100, &cap2, desc);
-
-	/* TPM 1.2 */
 	ret = request_locality(chip, 0);
 	if (ret < 0)
 		return ret;
 
-	ret = tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc, 0);
+	if (chip->flags & TPM_CHIP_FLAG_TPM2)
+		ret = tpm2_get_tpm_pt(chip, 0x100, &cap2, desc);
+	else
+		ret = tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc, 0);
 
 	release_locality(chip, 0);
 
-- 
cgit v1.2.3


From 8a2d296aaebadd68d9c1f6908667df1d1c84c051 Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko@kernel.org>
Date: Mon, 10 May 2021 15:28:31 +0300
Subject: tpm, tpm_tis: Reserve locality in tpm_tis_resume()

Reserve locality in tpm_tis_resume(), as it could be unsert after waking
up from a sleep state.

Cc: stable@vger.kernel.org
Cc: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Reported-by: Hans de Goede <hdegoede@redhat.com>
Fixes: a3fbfae82b4c ("tpm: take TPM chip power gating out of tpm_transmit()")
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/tpm_tis_core.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index 6fa150a3b75e..55b9d3965ae1 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -1125,12 +1125,20 @@ int tpm_tis_resume(struct device *dev)
 	if (ret)
 		return ret;
 
-	/* TPM 1.2 requires self-test on resume. This function actually returns
+	/*
+	 * TPM 1.2 requires self-test on resume. This function actually returns
 	 * an error code but for unknown reason it isn't handled.
 	 */
-	if (!(chip->flags & TPM_CHIP_FLAG_TPM2))
+	if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) {
+		ret = request_locality(chip, 0);
+		if (ret < 0)
+			return ret;
+
 		tpm1_do_selftest(chip);
 
+		release_locality(chip, 0);
+	}
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(tpm_tis_resume);
-- 
cgit v1.2.3


From 1df83992d977355177810c2b711afc30546c81ce Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Wed, 12 May 2021 21:39:26 +0800
Subject: tpm: fix error return code in tpm2_get_cc_attrs_tbl()

If the total number of commands queried through TPM2_CAP_COMMANDS is
different from that queried through TPM2_CC_GET_CAPABILITY, it indicates
an unknown error. In this case, an appropriate error code -EFAULT should
be returned. However, we currently do not explicitly assign this error
code to 'rc'. As a result, 0 was incorrectly returned.

Cc: stable@vger.kernel.org
Fixes: 58472f5cd4f6("tpm: validate TPM 2.0 commands")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/tpm2-cmd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
index eff1f12d981a..c84d23951219 100644
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -656,6 +656,7 @@ int tpm2_get_cc_attrs_tbl(struct tpm_chip *chip)
 
 	if (nr_commands !=
 	    be32_to_cpup((__be32 *)&buf.data[TPM_HEADER_SIZE + 5])) {
+		rc = -EFAULT;
 		tpm_buf_destroy(&buf);
 		goto out;
 	}
-- 
cgit v1.2.3


From 681865a03d3ec6ac3dda147044ed2a1a0f49f7bf Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Mon, 19 Apr 2021 19:27:25 +0800
Subject: libnvdimm: Remove duplicate struct declaration

struct device is declared at 133rd line. The second declaration is
unnecessary, remove it.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Link: https://lore.kernel.org/r/20210419112725.42145-1-wanjiabing@vivo.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/libnvdimm.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 01f251b6e36c..89b69e645ac7 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -141,7 +141,6 @@ static inline void __iomem *devm_nvdimm_ioremap(struct device *dev,
 
 struct nvdimm_bus;
 struct module;
-struct device;
 struct nd_blk_region;
 struct nd_blk_region_desc {
 	int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
-- 
cgit v1.2.3


From 7ddb4cc2b885c740523e6ea54a1f4434acfa3368 Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Tue, 20 Apr 2021 15:47:47 +0800
Subject: tools/testing/nvdimm: Make symbol '__nfit_test_ioremap' static

The sparse tool complains as follows:

tools/testing/nvdimm/test/iomap.c:65:14: warning:
 symbol '__nfit_test_ioremap' was not declared. Should it be static?

This symbol is not used outside of iomap.c, so this
commit marks it static.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Link: https://lore.kernel.org/r/1618904867-25275-1-git-send-email-zou_wei@huawei.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/nvdimm/test/iomap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index c62d372d426f..ed563bdd88f3 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -62,7 +62,7 @@ struct nfit_test_resource *get_nfit_res(resource_size_t resource)
 }
 EXPORT_SYMBOL(get_nfit_res);
 
-void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size,
+static void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size,
 		void __iomem *(*fallback_fn)(resource_size_t, unsigned long))
 {
 	struct nfit_test_resource *nfit_res = get_nfit_res(offset);
-- 
cgit v1.2.3


From 3dd4fe4b4dfa34e7487edfe159ef787ba397cfa9 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 21 Apr 2021 00:05:28 -0700
Subject: MAINTAINERS: Move nvdimm mailing list

After seeing some users have subscription management trouble, more spam
than other Linux development lists, and considering some of the benefits
of kernel.org hosted lists, nvdimm and persistent memory development is
moving to nvdimm@lists.linux.dev.

The old list will remain up until v5.14-rc1 and shutdown thereafter.

Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Oliver O'Halloran <oohall@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Link: https://lore.kernel.org/r/161898872871.3406469.4054282559340528393.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/ABI/obsolete/sysfs-class-dax    |  2 +-
 Documentation/ABI/removed/sysfs-bus-nfit      |  2 +-
 Documentation/ABI/testing/sysfs-bus-nfit      | 40 +++++++++++++--------------
 Documentation/ABI/testing/sysfs-bus-papr-pmem |  4 +--
 Documentation/driver-api/nvdimm/nvdimm.rst    |  2 +-
 MAINTAINERS                                   | 14 +++++-----
 6 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/Documentation/ABI/obsolete/sysfs-class-dax b/Documentation/ABI/obsolete/sysfs-class-dax
index 0faf1354cd05..5bcce27458e3 100644
--- a/Documentation/ABI/obsolete/sysfs-class-dax
+++ b/Documentation/ABI/obsolete/sysfs-class-dax
@@ -1,7 +1,7 @@
 What:           /sys/class/dax/
 Date:           May, 2016
 KernelVersion:  v4.7
-Contact:        linux-nvdimm@lists.01.org
+Contact:        nvdimm@lists.linux.dev
 Description:	Device DAX is the device-centric analogue of Filesystem
 		DAX (CONFIG_FS_DAX).  It allows memory ranges to be
 		allocated and mapped without need of an intervening file
diff --git a/Documentation/ABI/removed/sysfs-bus-nfit b/Documentation/ABI/removed/sysfs-bus-nfit
index ae8c1ca53828..277437005def 100644
--- a/Documentation/ABI/removed/sysfs-bus-nfit
+++ b/Documentation/ABI/removed/sysfs-bus-nfit
@@ -1,7 +1,7 @@
 What:		/sys/bus/nd/devices/regionX/nfit/ecc_unit_size
 Date:		Aug, 2017
 KernelVersion:	v4.14 (Removed v4.18)
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) Size of a write request to a DIMM that will not incur a
 		read-modify-write cycle at the memory controller.
diff --git a/Documentation/ABI/testing/sysfs-bus-nfit b/Documentation/ABI/testing/sysfs-bus-nfit
index 63ef0b9ecce7..e7282d184a74 100644
--- a/Documentation/ABI/testing/sysfs-bus-nfit
+++ b/Documentation/ABI/testing/sysfs-bus-nfit
@@ -5,7 +5,7 @@ Interface Table (NFIT)' section in the ACPI specification
 What:		/sys/bus/nd/devices/nmemX/nfit/serial
 Date:		Jun, 2015
 KernelVersion:	v4.2
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) Serial number of the NVDIMM (non-volatile dual in-line
 		memory module), assigned by the module vendor.
@@ -14,7 +14,7 @@ Description:
 What:		/sys/bus/nd/devices/nmemX/nfit/handle
 Date:		Apr, 2015
 KernelVersion:	v4.2
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) The address (given by the _ADR object) of the device on its
 		parent bus of the NVDIMM device containing the NVDIMM region.
@@ -23,7 +23,7 @@ Description:
 What:		/sys/bus/nd/devices/nmemX/nfit/device
 Date:		Apr, 2015
 KernelVersion:	v4.1
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) Device id for the NVDIMM, assigned by the module vendor.
 
@@ -31,7 +31,7 @@ Description:
 What:		/sys/bus/nd/devices/nmemX/nfit/rev_id
 Date:		Jun, 2015
 KernelVersion:	v4.2
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) Revision of the NVDIMM, assigned by the module vendor.
 
@@ -39,7 +39,7 @@ Description:
 What:		/sys/bus/nd/devices/nmemX/nfit/phys_id
 Date:		Apr, 2015
 KernelVersion:	v4.2
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) Handle (i.e., instance number) for the SMBIOS (system
 		management BIOS) Memory Device structure describing the NVDIMM
@@ -49,7 +49,7 @@ Description:
 What:		/sys/bus/nd/devices/nmemX/nfit/flags
 Date:		Jun, 2015
 KernelVersion:	v4.2
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) The flags in the NFIT memory device sub-structure indicate
 		the state of the data on the nvdimm relative to its energy
@@ -68,7 +68,7 @@ What:		/sys/bus/nd/devices/nmemX/nfit/format1
 What:		/sys/bus/nd/devices/nmemX/nfit/formats
 Date:		Apr, 2016
 KernelVersion:	v4.7
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) The interface codes indicate support for persistent memory
 		mapped directly into system physical address space and / or a
@@ -84,7 +84,7 @@ Description:
 What:		/sys/bus/nd/devices/nmemX/nfit/vendor
 Date:		Apr, 2016
 KernelVersion:	v4.7
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) Vendor id of the NVDIMM.
 
@@ -92,7 +92,7 @@ Description:
 What:		/sys/bus/nd/devices/nmemX/nfit/dsm_mask
 Date:		May, 2016
 KernelVersion:	v4.7
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) The bitmask indicates the supported device specific control
 		functions relative to the NVDIMM command family supported by the
@@ -102,7 +102,7 @@ Description:
 What:		/sys/bus/nd/devices/nmemX/nfit/family
 Date:		Apr, 2016
 KernelVersion:	v4.7
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) Displays the NVDIMM family command sets. Values
 		0, 1, 2 and 3 correspond to NVDIMM_FAMILY_INTEL,
@@ -118,7 +118,7 @@ Description:
 What:		/sys/bus/nd/devices/nmemX/nfit/id
 Date:		Apr, 2016
 KernelVersion:	v4.7
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) ACPI specification 6.2 section 5.2.25.9, defines an
 		identifier for an NVDIMM, which refelects the id attribute.
@@ -127,7 +127,7 @@ Description:
 What:		/sys/bus/nd/devices/nmemX/nfit/subsystem_vendor
 Date:		Apr, 2016
 KernelVersion:	v4.7
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) Sub-system vendor id of the NVDIMM non-volatile memory
 		subsystem controller.
@@ -136,7 +136,7 @@ Description:
 What:		/sys/bus/nd/devices/nmemX/nfit/subsystem_rev_id
 Date:		Apr, 2016
 KernelVersion:	v4.7
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) Sub-system revision id of the NVDIMM non-volatile memory subsystem
 		controller, assigned by the non-volatile memory subsystem
@@ -146,7 +146,7 @@ Description:
 What:		/sys/bus/nd/devices/nmemX/nfit/subsystem_device
 Date:		Apr, 2016
 KernelVersion:	v4.7
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) Sub-system device id for the NVDIMM non-volatile memory
 		subsystem controller, assigned by the non-volatile memory
@@ -156,7 +156,7 @@ Description:
 What:		/sys/bus/nd/devices/ndbusX/nfit/revision
 Date:		Jun, 2015
 KernelVersion:	v4.2
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) ACPI NFIT table revision number.
 
@@ -164,7 +164,7 @@ Description:
 What:		/sys/bus/nd/devices/ndbusX/nfit/scrub
 Date:		Sep, 2016
 KernelVersion:	v4.9
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RW) This shows the number of full Address Range Scrubs (ARS)
 		that have been completed since driver load time. Userspace can
@@ -177,7 +177,7 @@ Description:
 What:		/sys/bus/nd/devices/ndbusX/nfit/hw_error_scrub
 Date:		Sep, 2016
 KernelVersion:	v4.9
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RW) Provides a way to toggle the behavior between just adding
 		the address (cache line) where the MCE happened to the poison
@@ -196,7 +196,7 @@ Description:
 What:		/sys/bus/nd/devices/ndbusX/nfit/dsm_mask
 Date:		Jun, 2017
 KernelVersion:	v4.13
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) The bitmask indicates the supported bus specific control
 		functions. See the section named 'NVDIMM Root Device _DSMs' in
@@ -205,7 +205,7 @@ Description:
 What:		/sys/bus/nd/devices/ndbusX/nfit/firmware_activate_noidle
 Date:		Apr, 2020
 KernelVersion:	v5.8
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RW) The Intel platform implementation of firmware activate
 		support exposes an option let the platform force idle devices in
@@ -225,7 +225,7 @@ Description:
 What:		/sys/bus/nd/devices/regionX/nfit/range_index
 Date:		Jun, 2015
 KernelVersion:	v4.2
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) A unique number provided by the BIOS to identify an address
 		range. Used by NVDIMM Region Mapping Structure to uniquely refer
diff --git a/Documentation/ABI/testing/sysfs-bus-papr-pmem b/Documentation/ABI/testing/sysfs-bus-papr-pmem
index 8316c33862a0..92e2db0e2d3d 100644
--- a/Documentation/ABI/testing/sysfs-bus-papr-pmem
+++ b/Documentation/ABI/testing/sysfs-bus-papr-pmem
@@ -1,7 +1,7 @@
 What:		/sys/bus/nd/devices/nmemX/papr/flags
 Date:		Apr, 2020
 KernelVersion:	v5.8
-Contact:	linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, linux-nvdimm@lists.01.org,
+Contact:	linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, nvdimm@lists.linux.dev,
 Description:
 		(RO) Report flags indicating various states of a
 		papr-pmem NVDIMM device. Each flag maps to a one or
@@ -36,7 +36,7 @@ Description:
 What:		/sys/bus/nd/devices/nmemX/papr/perf_stats
 Date:		May, 2020
 KernelVersion:	v5.9
-Contact:	linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, linux-nvdimm@lists.01.org,
+Contact:	linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, nvdimm@lists.linux.dev,
 Description:
 		(RO) Report various performance stats related to papr-scm NVDIMM
 		device.  Each stat is reported on a new line with each line
diff --git a/Documentation/driver-api/nvdimm/nvdimm.rst b/Documentation/driver-api/nvdimm/nvdimm.rst
index ef6d59e0978e..1d8302b89bd4 100644
--- a/Documentation/driver-api/nvdimm/nvdimm.rst
+++ b/Documentation/driver-api/nvdimm/nvdimm.rst
@@ -4,7 +4,7 @@ LIBNVDIMM: Non-Volatile Devices
 
 libnvdimm - kernel / libndctl - userspace helper library
 
-linux-nvdimm@lists.01.org
+nvdimm@lists.linux.dev
 
 Version 13
 
diff --git a/MAINTAINERS b/MAINTAINERS
index bd7aff0c120f..6b5d489022ea 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5237,7 +5237,7 @@ DEVICE DIRECT ACCESS (DAX)
 M:	Dan Williams <dan.j.williams@intel.com>
 M:	Vishal Verma <vishal.l.verma@intel.com>
 M:	Dave Jiang <dave.jiang@intel.com>
-L:	linux-nvdimm@lists.01.org
+L:	nvdimm@lists.linux.dev
 S:	Supported
 F:	drivers/dax/
 
@@ -7006,7 +7006,7 @@ M:	Dan Williams <dan.j.williams@intel.com>
 R:	Matthew Wilcox <willy@infradead.org>
 R:	Jan Kara <jack@suse.cz>
 L:	linux-fsdevel@vger.kernel.org
-L:	linux-nvdimm@lists.01.org
+L:	nvdimm@lists.linux.dev
 S:	Supported
 F:	fs/dax.c
 F:	include/linux/dax.h
@@ -10378,7 +10378,7 @@ LIBNVDIMM BLK: MMIO-APERTURE DRIVER
 M:	Dan Williams <dan.j.williams@intel.com>
 M:	Vishal Verma <vishal.l.verma@intel.com>
 M:	Dave Jiang <dave.jiang@intel.com>
-L:	linux-nvdimm@lists.01.org
+L:	nvdimm@lists.linux.dev
 S:	Supported
 Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
 P:	Documentation/nvdimm/maintainer-entry-profile.rst
@@ -10389,7 +10389,7 @@ LIBNVDIMM BTT: BLOCK TRANSLATION TABLE
 M:	Vishal Verma <vishal.l.verma@intel.com>
 M:	Dan Williams <dan.j.williams@intel.com>
 M:	Dave Jiang <dave.jiang@intel.com>
-L:	linux-nvdimm@lists.01.org
+L:	nvdimm@lists.linux.dev
 S:	Supported
 Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
 P:	Documentation/nvdimm/maintainer-entry-profile.rst
@@ -10399,7 +10399,7 @@ LIBNVDIMM PMEM: PERSISTENT MEMORY DRIVER
 M:	Dan Williams <dan.j.williams@intel.com>
 M:	Vishal Verma <vishal.l.verma@intel.com>
 M:	Dave Jiang <dave.jiang@intel.com>
-L:	linux-nvdimm@lists.01.org
+L:	nvdimm@lists.linux.dev
 S:	Supported
 Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
 P:	Documentation/nvdimm/maintainer-entry-profile.rst
@@ -10407,7 +10407,7 @@ F:	drivers/nvdimm/pmem*
 
 LIBNVDIMM: DEVICETREE BINDINGS
 M:	Oliver O'Halloran <oohall@gmail.com>
-L:	linux-nvdimm@lists.01.org
+L:	nvdimm@lists.linux.dev
 S:	Supported
 Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
 F:	Documentation/devicetree/bindings/pmem/pmem-region.txt
@@ -10418,7 +10418,7 @@ M:	Dan Williams <dan.j.williams@intel.com>
 M:	Vishal Verma <vishal.l.verma@intel.com>
 M:	Dave Jiang <dave.jiang@intel.com>
 M:	Ira Weiny <ira.weiny@intel.com>
-L:	linux-nvdimm@lists.01.org
+L:	nvdimm@lists.linux.dev
 S:	Supported
 Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
 P:	Documentation/nvdimm/maintainer-entry-profile.rst
-- 
cgit v1.2.3


From e9cfd259c6d386f6235395a13bd4f357a979b2d0 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 7 May 2021 00:33:50 -0700
Subject: ACPI: NFIT: Fix support for variable 'SPA' structure size

ACPI 6.4 introduced the "SpaLocationCookie" to the NFIT "System Physical
Address (SPA) Range Structure". The presence of that new field is
indicated by the ACPI_NFIT_LOCATION_COOKIE_VALID flag. Pre-ACPI-6.4
firmware implementations omit the flag and maintain the original size of
the structure.

Update the implementation to check that flag to determine the size
rather than the ACPI 6.4 compliant definition of 'struct
acpi_nfit_system_address' from the Linux ACPICA definitions.

Update the test infrastructure for the new expectations as well, i.e.
continue to emulate the ACPI 6.3 definition of that structure.

Without this fix the kernel fails to validate 'SPA' structures and this
leads to a crash in nfit_get_smbios_id() since that routine assumes that
SPAs are valid if it finds valid SMBIOS tables.

    BUG: unable to handle page fault for address: ffffffffffffffa8
    [..]
    Call Trace:
     skx_get_nvdimm_info+0x56/0x130 [skx_edac]
     skx_get_dimm_config+0x1f5/0x213 [skx_edac]
     skx_register_mci+0x132/0x1c0 [skx_edac]

Cc: Bob Moore <robert.moore@intel.com>
Cc: Erik Kaneda <erik.kaneda@intel.com>
Fixes: cf16b05c607b ("ACPICA: ACPI 6.4: NFIT: add Location Cookie field")
Reported-by: Yi Zhang <yi.zhang@redhat.com>
Tested-by: Yi Zhang <yi.zhang@redhat.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://lore.kernel.org/r/162037273007.1195827.10907249070709169329.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c         | 15 ++++++++++----
 tools/testing/nvdimm/test/nfit.c | 42 ++++++++++++++++++++++++----------------
 2 files changed, 36 insertions(+), 21 deletions(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 958aaac869e8..23d9a09d7060 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -686,6 +686,13 @@ int nfit_spa_type(struct acpi_nfit_system_address *spa)
 	return -1;
 }
 
+static size_t sizeof_spa(struct acpi_nfit_system_address *spa)
+{
+	if (spa->flags & ACPI_NFIT_LOCATION_COOKIE_VALID)
+		return sizeof(*spa);
+	return sizeof(*spa) - 8;
+}
+
 static bool add_spa(struct acpi_nfit_desc *acpi_desc,
 		struct nfit_table_prev *prev,
 		struct acpi_nfit_system_address *spa)
@@ -693,22 +700,22 @@ static bool add_spa(struct acpi_nfit_desc *acpi_desc,
 	struct device *dev = acpi_desc->dev;
 	struct nfit_spa *nfit_spa;
 
-	if (spa->header.length != sizeof(*spa))
+	if (spa->header.length != sizeof_spa(spa))
 		return false;
 
 	list_for_each_entry(nfit_spa, &prev->spas, list) {
-		if (memcmp(nfit_spa->spa, spa, sizeof(*spa)) == 0) {
+		if (memcmp(nfit_spa->spa, spa, sizeof_spa(spa)) == 0) {
 			list_move_tail(&nfit_spa->list, &acpi_desc->spas);
 			return true;
 		}
 	}
 
-	nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa) + sizeof(*spa),
+	nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa) + sizeof_spa(spa),
 			GFP_KERNEL);
 	if (!nfit_spa)
 		return false;
 	INIT_LIST_HEAD(&nfit_spa->list);
-	memcpy(nfit_spa->spa, spa, sizeof(*spa));
+	memcpy(nfit_spa->spa, spa, sizeof_spa(spa));
 	list_add_tail(&nfit_spa->list, &acpi_desc->spas);
 	dev_dbg(dev, "spa index: %d type: %s\n",
 			spa->range_index,
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 9b185bf82da8..54f367cbadae 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -1871,9 +1871,16 @@ static void smart_init(struct nfit_test *t)
 	}
 }
 
+static size_t sizeof_spa(struct acpi_nfit_system_address *spa)
+{
+	/* until spa location cookie support is added... */
+	return sizeof(*spa) - 8;
+}
+
 static int nfit_test0_alloc(struct nfit_test *t)
 {
-	size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA
+	struct acpi_nfit_system_address *spa = NULL;
+	size_t nfit_size = sizeof_spa(spa) * NUM_SPA
 			+ sizeof(struct acpi_nfit_memory_map) * NUM_MEM
 			+ sizeof(struct acpi_nfit_control_region) * NUM_DCR
 			+ offsetof(struct acpi_nfit_control_region,
@@ -1937,7 +1944,8 @@ static int nfit_test0_alloc(struct nfit_test *t)
 
 static int nfit_test1_alloc(struct nfit_test *t)
 {
-	size_t nfit_size = sizeof(struct acpi_nfit_system_address) * 2
+	struct acpi_nfit_system_address *spa = NULL;
+	size_t nfit_size = sizeof_spa(spa) * 2
 		+ sizeof(struct acpi_nfit_memory_map) * 2
 		+ offsetof(struct acpi_nfit_control_region, window_size) * 2;
 	int i;
@@ -2000,7 +2008,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	 */
 	spa = nfit_buf;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-	spa->header.length = sizeof(*spa);
+	spa->header.length = sizeof_spa(spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
 	spa->range_index = 0+1;
 	spa->address = t->spa_set_dma[0];
@@ -2014,7 +2022,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	 */
 	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-	spa->header.length = sizeof(*spa);
+	spa->header.length = sizeof_spa(spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
 	spa->range_index = 1+1;
 	spa->address = t->spa_set_dma[1];
@@ -2024,7 +2032,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	/* spa2 (dcr0) dimm0 */
 	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-	spa->header.length = sizeof(*spa);
+	spa->header.length = sizeof_spa(spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
 	spa->range_index = 2+1;
 	spa->address = t->dcr_dma[0];
@@ -2034,7 +2042,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	/* spa3 (dcr1) dimm1 */
 	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-	spa->header.length = sizeof(*spa);
+	spa->header.length = sizeof_spa(spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
 	spa->range_index = 3+1;
 	spa->address = t->dcr_dma[1];
@@ -2044,7 +2052,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	/* spa4 (dcr2) dimm2 */
 	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-	spa->header.length = sizeof(*spa);
+	spa->header.length = sizeof_spa(spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
 	spa->range_index = 4+1;
 	spa->address = t->dcr_dma[2];
@@ -2054,7 +2062,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	/* spa5 (dcr3) dimm3 */
 	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-	spa->header.length = sizeof(*spa);
+	spa->header.length = sizeof_spa(spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
 	spa->range_index = 5+1;
 	spa->address = t->dcr_dma[3];
@@ -2064,7 +2072,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	/* spa6 (bdw for dcr0) dimm0 */
 	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-	spa->header.length = sizeof(*spa);
+	spa->header.length = sizeof_spa(spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
 	spa->range_index = 6+1;
 	spa->address = t->dimm_dma[0];
@@ -2074,7 +2082,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	/* spa7 (bdw for dcr1) dimm1 */
 	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-	spa->header.length = sizeof(*spa);
+	spa->header.length = sizeof_spa(spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
 	spa->range_index = 7+1;
 	spa->address = t->dimm_dma[1];
@@ -2084,7 +2092,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	/* spa8 (bdw for dcr2) dimm2 */
 	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-	spa->header.length = sizeof(*spa);
+	spa->header.length = sizeof_spa(spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
 	spa->range_index = 8+1;
 	spa->address = t->dimm_dma[2];
@@ -2094,7 +2102,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	/* spa9 (bdw for dcr3) dimm3 */
 	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-	spa->header.length = sizeof(*spa);
+	spa->header.length = sizeof_spa(spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
 	spa->range_index = 9+1;
 	spa->address = t->dimm_dma[3];
@@ -2581,7 +2589,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 		/* spa10 (dcr4) dimm4 */
 		spa = nfit_buf + offset;
 		spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-		spa->header.length = sizeof(*spa);
+		spa->header.length = sizeof_spa(spa);
 		memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
 		spa->range_index = 10+1;
 		spa->address = t->dcr_dma[4];
@@ -2595,7 +2603,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 		 */
 		spa = nfit_buf + offset;
 		spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-		spa->header.length = sizeof(*spa);
+		spa->header.length = sizeof_spa(spa);
 		memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
 		spa->range_index = 11+1;
 		spa->address = t->spa_set_dma[2];
@@ -2605,7 +2613,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 		/* spa12 (bdw for dcr4) dimm4 */
 		spa = nfit_buf + offset;
 		spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-		spa->header.length = sizeof(*spa);
+		spa->header.length = sizeof_spa(spa);
 		memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
 		spa->range_index = 12+1;
 		spa->address = t->dimm_dma[4];
@@ -2739,7 +2747,7 @@ static void nfit_test1_setup(struct nfit_test *t)
 	/* spa0 (flat range with no bdw aliasing) */
 	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-	spa->header.length = sizeof(*spa);
+	spa->header.length = sizeof_spa(spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
 	spa->range_index = 0+1;
 	spa->address = t->spa_set_dma[0];
@@ -2749,7 +2757,7 @@ static void nfit_test1_setup(struct nfit_test *t)
 	/* virtual cd region */
 	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-	spa->header.length = sizeof(*spa);
+	spa->header.length = sizeof_spa(spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16);
 	spa->range_index = 0;
 	spa->address = t->spa_set_dma[1];
-- 
cgit v1.2.3


From a554e740b66a83c7560b30e6b50bece37555ced3 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Thu, 22 Apr 2021 12:04:42 -0700
Subject: x86/boot/compressed: Enable -Wundef

A discussion around -Wundef showed that there were still a few boolean
Kconfigs where #if was used rather than #ifdef to guard different code.
Kconfig doesn't define boolean configs, which can result in -Wundef
warnings.

arch/x86/boot/compressed/Makefile resets the CFLAGS used for this
directory, and doesn't re-enable -Wundef as the top level Makefile does.
If re-added, with RANDOMIZE_BASE and X86_NEED_RELOCS disabled, the
following warnings are visible.

  arch/x86/boot/compressed/misc.h:82:5: warning: 'CONFIG_RANDOMIZE_BASE'
  is not defined, evaluates to 0 [-Wundef]
      ^
  arch/x86/boot/compressed/misc.c:175:5: warning: 'CONFIG_X86_NEED_RELOCS'
  is not defined, evaluates to 0 [-Wundef]
      ^

Simply fix these and re-enable this warning for this directory.

Suggested-by: Nathan Chancellor <nathan@kernel.org>
Suggested-by: Joe Perches <joe@perches.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lore.kernel.org/r/20210422190450.3903999-1-ndesaulniers@google.com
---
 arch/x86/boot/compressed/Makefile | 1 +
 arch/x86/boot/compressed/misc.c   | 2 +-
 arch/x86/boot/compressed/misc.h   | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 2a2975236c9e..431bf7f846c3 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -30,6 +30,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
 
 KBUILD_CFLAGS := -m$(BITS) -O2
 KBUILD_CFLAGS += -fno-strict-aliasing -fPIE
+KBUILD_CFLAGS += -Wundef
 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
 cflags-$(CONFIG_X86_32) := -march=i386
 cflags-$(CONFIG_X86_64) := -mcmodel=small -mno-red-zone
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index dde042f64cca..743f13ea25c1 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -172,7 +172,7 @@ void __puthex(unsigned long value)
 	}
 }
 
-#if CONFIG_X86_NEED_RELOCS
+#ifdef CONFIG_X86_NEED_RELOCS
 static void handle_relocations(void *output, unsigned long output_len,
 			       unsigned long virt_addr)
 {
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index e5612f035498..31139256859f 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -79,7 +79,7 @@ struct mem_vector {
 	u64 size;
 };
 
-#if CONFIG_RANDOMIZE_BASE
+#ifdef CONFIG_RANDOMIZE_BASE
 /* kaslr.c */
 void choose_random_location(unsigned long input,
 			    unsigned long input_size,
-- 
cgit v1.2.3


From 098116e7e640ba677d9e345cbee83d253c13d556 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 11 May 2021 10:35:21 +0200
Subject: net: really orphan skbs tied to closing sk

If the owing socket is shutting down - e.g. the sock reference
count already dropped to 0 and only sk_wmem_alloc is keeping
the sock alive, skb_orphan_partial() becomes a no-op.

When forwarding packets over veth with GRO enabled, the above
causes refcount errors.

This change addresses the issue with a plain skb_orphan() call
in the critical scenario.

Fixes: 9adc89af724f ("net: let skb_orphan_partial wake-up waiters.")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 4 +++-
 net/core/sock.c    | 8 ++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 42bc5e1a627f..0e962d8bc73b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2231,13 +2231,15 @@ static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
 	sk_mem_charge(sk, skb->truesize);
 }
 
-static inline void skb_set_owner_sk_safe(struct sk_buff *skb, struct sock *sk)
+static inline __must_check bool skb_set_owner_sk_safe(struct sk_buff *skb, struct sock *sk)
 {
 	if (sk && refcount_inc_not_zero(&sk->sk_refcnt)) {
 		skb_orphan(skb);
 		skb->destructor = sock_efree;
 		skb->sk = sk;
+		return true;
 	}
+	return false;
 }
 
 void sk_reset_timer(struct sock *sk, struct timer_list *timer,
diff --git a/net/core/sock.c b/net/core/sock.c
index c761c4a0b66b..958614ea16ed 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2132,10 +2132,10 @@ void skb_orphan_partial(struct sk_buff *skb)
 	if (skb_is_tcp_pure_ack(skb))
 		return;
 
-	if (can_skb_orphan_partial(skb))
-		skb_set_owner_sk_safe(skb, skb->sk);
-	else
-		skb_orphan(skb);
+	if (can_skb_orphan_partial(skb) && skb_set_owner_sk_safe(skb, skb->sk))
+		return;
+
+	skb_orphan(skb);
 }
 EXPORT_SYMBOL(skb_orphan_partial);
 
-- 
cgit v1.2.3


From aa473d6ceb821d7c568c64cca7fff3e86ba9d789 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Tue, 11 May 2021 19:10:50 -0400
Subject: bnxt_en: Fix and improve .ndo_features_check().

Jakub Kicinski pointed out that we need to handle ipv6 extension headers
and to explicitly check for supported tunnel types in
.ndo_features_check().

For ipv6 extension headers, the hardware supports up to 2 ext. headers
and each must be <= 64 bytes.  For tunneled packets, the supported
packets are UDP with supported VXLAN and Geneve ports, GRE, and IPIP.

v3: More improvements based on Alexander Duyck's valuable feedback -
    Remove the jump lable in bnxt_features_check() and restructure it
    so that the TCP/UDP is check is consolidated in bnxt_exthdr_check().

v2: Add missing step to check inner ipv6 header for UDP and GRE tunnels.
    Check TCP/UDP next header after skipping ipv6 ext headers for
    non-tunneled packets and for inner ipv6.
    (Both feedback from Alexander Duyck)

Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Fixes: 1698d600b361 ("bnxt_en: Implement .ndo_features_check().")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 126 +++++++++++++++++++++++++-----
 1 file changed, 107 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 2985844634c8..46be4046ee51 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -10785,37 +10785,125 @@ static int bnxt_set_features(struct net_device *dev, netdev_features_t features)
 	return rc;
 }
 
+static bool bnxt_exthdr_check(struct bnxt *bp, struct sk_buff *skb, int nw_off,
+			      u8 **nextp)
+{
+	struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + nw_off);
+	int hdr_count = 0;
+	u8 *nexthdr;
+	int start;
+
+	/* Check that there are at most 2 IPv6 extension headers, no
+	 * fragment header, and each is <= 64 bytes.
+	 */
+	start = nw_off + sizeof(*ip6h);
+	nexthdr = &ip6h->nexthdr;
+	while (ipv6_ext_hdr(*nexthdr)) {
+		struct ipv6_opt_hdr *hp;
+		int hdrlen;
+
+		if (hdr_count >= 3 || *nexthdr == NEXTHDR_NONE ||
+		    *nexthdr == NEXTHDR_FRAGMENT)
+			return false;
+		hp = __skb_header_pointer(NULL, start, sizeof(*hp), skb->data,
+					  skb_headlen(skb), NULL);
+		if (!hp)
+			return false;
+		if (*nexthdr == NEXTHDR_AUTH)
+			hdrlen = ipv6_authlen(hp);
+		else
+			hdrlen = ipv6_optlen(hp);
+
+		if (hdrlen > 64)
+			return false;
+		nexthdr = &hp->nexthdr;
+		start += hdrlen;
+		hdr_count++;
+	}
+	if (nextp) {
+		/* Caller will check inner protocol */
+		if (skb->encapsulation) {
+			*nextp = nexthdr;
+			return true;
+		}
+		*nextp = NULL;
+	}
+	/* Only support TCP/UDP for non-tunneled ipv6 and inner ipv6 */
+	return *nexthdr == IPPROTO_TCP || *nexthdr == IPPROTO_UDP;
+}
+
+/* For UDP, we can only handle 1 Vxlan port and 1 Geneve port. */
+static bool bnxt_udp_tunl_check(struct bnxt *bp, struct sk_buff *skb)
+{
+	struct udphdr *uh = udp_hdr(skb);
+	__be16 udp_port = uh->dest;
+
+	if (udp_port != bp->vxlan_port && udp_port != bp->nge_port)
+		return false;
+	if (skb->inner_protocol_type == ENCAP_TYPE_ETHER) {
+		struct ethhdr *eh = inner_eth_hdr(skb);
+
+		switch (eh->h_proto) {
+		case htons(ETH_P_IP):
+			return true;
+		case htons(ETH_P_IPV6):
+			return bnxt_exthdr_check(bp, skb,
+						 skb_inner_network_offset(skb),
+						 NULL);
+		}
+	}
+	return false;
+}
+
+static bool bnxt_tunl_check(struct bnxt *bp, struct sk_buff *skb, u8 l4_proto)
+{
+	switch (l4_proto) {
+	case IPPROTO_UDP:
+		return bnxt_udp_tunl_check(bp, skb);
+	case IPPROTO_IPIP:
+		return true;
+	case IPPROTO_GRE: {
+		switch (skb->inner_protocol) {
+		default:
+			return false;
+		case htons(ETH_P_IP):
+			return true;
+		case htons(ETH_P_IPV6):
+			fallthrough;
+		}
+	}
+	case IPPROTO_IPV6:
+		/* Check ext headers of inner ipv6 */
+		return bnxt_exthdr_check(bp, skb, skb_inner_network_offset(skb),
+					 NULL);
+	}
+	return false;
+}
+
 static netdev_features_t bnxt_features_check(struct sk_buff *skb,
 					     struct net_device *dev,
 					     netdev_features_t features)
 {
-	struct bnxt *bp;
-	__be16 udp_port;
-	u8 l4_proto = 0;
+	struct bnxt *bp = netdev_priv(dev);
+	u8 *l4_proto;
 
 	features = vlan_features_check(skb, features);
-	if (!skb->encapsulation)
-		return features;
-
 	switch (vlan_get_protocol(skb)) {
 	case htons(ETH_P_IP):
-		l4_proto = ip_hdr(skb)->protocol;
+		if (!skb->encapsulation)
+			return features;
+		l4_proto = &ip_hdr(skb)->protocol;
+		if (bnxt_tunl_check(bp, skb, *l4_proto))
+			return features;
 		break;
 	case htons(ETH_P_IPV6):
-		l4_proto = ipv6_hdr(skb)->nexthdr;
+		if (!bnxt_exthdr_check(bp, skb, skb_network_offset(skb),
+				       &l4_proto))
+			break;
+		if (!l4_proto || bnxt_tunl_check(bp, skb, *l4_proto))
+			return features;
 		break;
-	default:
-		return features;
 	}
-
-	if (l4_proto != IPPROTO_UDP)
-		return features;
-
-	bp = netdev_priv(dev);
-	/* For UDP, we can only handle 1 Vxlan port and 1 Geneve port. */
-	udp_port = udp_hdr(skb)->dest;
-	if (udp_port == bp->vxlan_port || udp_port == bp->nge_port)
-		return features;
 	return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
 }
 
-- 
cgit v1.2.3


From 171c3b151118a2fe0fc1e2a9d1b5a1570cfe82d2 Mon Sep 17 00:00:00 2001
From: Richard Sanger <rsanger@wand.net.nz>
Date: Wed, 12 May 2021 13:31:22 +1200
Subject: net: packetmmap: fix only tx timestamp on request

The packetmmap tx ring should only return timestamps if requested via
setsockopt PACKET_TIMESTAMP, as documented. This allows compatibility
with non-timestamp aware user-space code which checks
tp_status == TP_STATUS_AVAILABLE; not expecting additional timestamp
flags to be set in tp_status.

Fixes: b9c32fb27170 ("packet: if hw/sw ts enabled in rx/tx ring, report which ts we got")
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
Signed-off-by: Richard Sanger <rsanger@wand.net.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ba96db1880ea..ae906eb4b269 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -422,7 +422,8 @@ static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts,
 	    ktime_to_timespec64_cond(shhwtstamps->hwtstamp, ts))
 		return TP_STATUS_TS_RAW_HARDWARE;
 
-	if (ktime_to_timespec64_cond(skb->tstamp, ts))
+	if ((flags & SOF_TIMESTAMPING_SOFTWARE) &&
+	    ktime_to_timespec64_cond(skb->tstamp, ts))
 		return TP_STATUS_TS_SOFTWARE;
 
 	return 0;
@@ -2340,7 +2341,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
 
-	if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
+	/* Always timestamp; prefer an existing software timestamp taken
+	 * closer to the time of capture.
+	 */
+	ts_status = tpacket_get_timestamp(skb, &ts,
+					  po->tp_tstamp | SOF_TIMESTAMPING_SOFTWARE);
+	if (!ts_status)
 		ktime_get_real_ts64(&ts);
 
 	status |= ts_status;
-- 
cgit v1.2.3


From 619fee9eb13b5d29e4267cb394645608088c28a8 Mon Sep 17 00:00:00 2001
From: Fugang Duan <fugang.duan@nxp.com>
Date: Wed, 12 May 2021 10:43:59 +0800
Subject: net: fec: fix the potential memory leak in fec_enet_init()

If the memory allocated for cbd_base is failed, it should
free the memory allocated for the queues, otherwise it causes
memory leak.

And if the memory allocated for the queues is failed, it can
return error directly.

Fixes: 59d0f7465644 ("net: fec: init multi queue date structure")
Signed-off-by: Fugang Duan <fugang.duan@nxp.com>
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec_main.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index f2065f9d02e6..a2ada39c22d7 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -3290,7 +3290,9 @@ static int fec_enet_init(struct net_device *ndev)
 		return ret;
 	}
 
-	fec_enet_alloc_queue(ndev);
+	ret = fec_enet_alloc_queue(ndev);
+	if (ret)
+		return ret;
 
 	bd_size = (fep->total_tx_ring_size + fep->total_rx_ring_size) * dsize;
 
@@ -3298,7 +3300,8 @@ static int fec_enet_init(struct net_device *ndev)
 	cbd_base = dmam_alloc_coherent(&fep->pdev->dev, bd_size, &bd_dma,
 				       GFP_KERNEL);
 	if (!cbd_base) {
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto free_queue_mem;
 	}
 
 	/* Get the Ethernet address */
@@ -3376,6 +3379,10 @@ static int fec_enet_init(struct net_device *ndev)
 		fec_enet_update_ethtool_stats(ndev);
 
 	return 0;
+
+free_queue_mem:
+	fec_enet_free_queue(ndev);
+	return ret;
 }
 
 #ifdef CONFIG_OF
-- 
cgit v1.2.3


From 052fcc4531824c38f8e0ad88213c1be102a0b124 Mon Sep 17 00:00:00 2001
From: Fugang Duan <fugang.duan@nxp.com>
Date: Wed, 12 May 2021 10:44:00 +0800
Subject: net: fec: add defer probe for of_get_mac_address

If MAC address read from nvmem efuse by calling .of_get_mac_address(),
but nvmem efuse is registered later than the driver, then it
return -EPROBE_DEFER value. So modify the driver to support
defer probe when read MAC address from nvmem efuse.

Signed-off-by: Fugang Duan <fugang.duan@nxp.com>
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec_main.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index a2ada39c22d7..ad82cffc6f3f 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1662,7 +1662,7 @@ static int fec_enet_rx_napi(struct napi_struct *napi, int budget)
 }
 
 /* ------------------------------------------------------------------------- */
-static void fec_get_mac(struct net_device *ndev)
+static int fec_get_mac(struct net_device *ndev)
 {
 	struct fec_enet_private *fep = netdev_priv(ndev);
 	unsigned char *iap, tmpaddr[ETH_ALEN];
@@ -1685,6 +1685,8 @@ static void fec_get_mac(struct net_device *ndev)
 			ret = of_get_mac_address(np, tmpaddr);
 			if (!ret)
 				iap = tmpaddr;
+			else if (ret == -EPROBE_DEFER)
+				return ret;
 		}
 	}
 
@@ -1723,7 +1725,7 @@ static void fec_get_mac(struct net_device *ndev)
 		eth_hw_addr_random(ndev);
 		dev_info(&fep->pdev->dev, "Using random MAC address: %pM\n",
 			 ndev->dev_addr);
-		return;
+		return 0;
 	}
 
 	memcpy(ndev->dev_addr, iap, ETH_ALEN);
@@ -1731,6 +1733,8 @@ static void fec_get_mac(struct net_device *ndev)
 	/* Adjust MAC if using macaddr */
 	if (iap == macaddr)
 		 ndev->dev_addr[ETH_ALEN-1] = macaddr[ETH_ALEN-1] + fep->dev_id;
+
+	return 0;
 }
 
 /* ------------------------------------------------------------------------- */
@@ -3305,7 +3309,10 @@ static int fec_enet_init(struct net_device *ndev)
 	}
 
 	/* Get the Ethernet address */
-	fec_get_mac(ndev);
+	ret = fec_get_mac(ndev);
+	if (ret)
+		goto free_queue_mem;
+
 	/* make sure MAC we just acquired is programmed into the hw */
 	fec_set_mac_address(ndev, NULL);
 
-- 
cgit v1.2.3


From e5cc361e21648b75f935f9571d4003aaee480214 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 12 May 2021 13:11:43 +0300
Subject: octeontx2-pf: fix a buffer overflow in otx2_set_rxfh_context()

This function is called from ethtool_set_rxfh() and "*rss_context"
comes from the user.  Add some bounds checking to prevent memory
corruption.

Fixes: 81a4362016e7 ("octeontx2-pf: Add RSS multi group support")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Sunil Goutham <sgoutham@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
index f4962a97a075..9d9a2e438acf 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
@@ -786,6 +786,10 @@ static int otx2_set_rxfh_context(struct net_device *dev, const u32 *indir,
 	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
 		return -EOPNOTSUPP;
 
+	if (*rss_context != ETH_RXFH_CONTEXT_ALLOC &&
+	    *rss_context >= MAX_RSS_GROUPS)
+		return -EINVAL;
+
 	rss = &pfvf->hw.rss_info;
 
 	if (!rss->enable) {
-- 
cgit v1.2.3


From 9c1bb37f8cad5e2ee1933fa1da9a6baa7876a8e4 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Wed, 12 May 2021 13:15:29 +0200
Subject: ptp: ocp: Fix a resource leak in an error handling path

If an error occurs after a successful 'pci_ioremap_bar()' call, it must be
undone by a corresponding 'pci_iounmap()' call, as already done in the
remove function.

Fixes: a7e1abad13f3 ("ptp: Add clock driver for the OpenCompute TimeCard.")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_ocp.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c
index 530e5f90095e..0d1034e3ed0f 100644
--- a/drivers/ptp/ptp_ocp.c
+++ b/drivers/ptp/ptp_ocp.c
@@ -324,7 +324,7 @@ ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (!bp->base) {
 		dev_err(&pdev->dev, "io_remap bar0\n");
 		err = -ENOMEM;
-		goto out;
+		goto out_release_regions;
 	}
 	bp->reg = bp->base + OCP_REGISTER_OFFSET;
 	bp->tod = bp->base + TOD_REGISTER_OFFSET;
@@ -347,6 +347,8 @@ ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	return 0;
 
 out:
+	pci_iounmap(pdev, bp->base);
+out_release_regions:
 	pci_release_regions(pdev);
 out_disable:
 	pci_disable_device(pdev);
-- 
cgit v1.2.3


From ca14f9597f4fdb3679453aec7bb2807f0b8b7363 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Wed, 12 May 2021 10:00:46 -0400
Subject: MAINTAINERS: nfc: drop Clément Perrochaud from NXP-NCI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Emails to Clément Perrochaud bounce with permanent error "user does not
exist", so remove Clément Perrochaud from NXP-NCI driver maintainers
entry.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Acked-by: Mark Greer <mgreer@animalcreek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index bd7aff0c120f..ec723b48769e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13205,7 +13205,6 @@ F:	Documentation/devicetree/bindings/sound/tfa9879.txt
 F:	sound/soc/codecs/tfa9879*
 
 NXP-NCI NFC DRIVER
-M:	Clément Perrochaud <clement.perrochaud@effinnov.com>
 R:	Charles Gorand <charles.gorand@effinnov.com>
 L:	linux-nfc@lists.01.org (moderated for non-subscribers)
 S:	Supported
-- 
cgit v1.2.3


From 8aa5713d8b2ce1ea67bdf212eb61bfcff3c52202 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Wed, 12 May 2021 10:43:18 -0400
Subject: MAINTAINERS: nfc: add Krzysztof Kozlowski as maintainer

The NFC subsystem is orphaned.  I am happy to spend some cycles to
review the patches, send pull requests and in general keep the NFC
subsystem running.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Acked-by: Mark Greer <mgreer@animalcreek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index ec723b48769e..7020293a1347 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12896,8 +12896,9 @@ F:	include/uapi/linux/nexthop.h
 F:	net/ipv4/nexthop.c
 
 NFC SUBSYSTEM
+M:	Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
 L:	netdev@vger.kernel.org
-S:	Orphan
+S:	Maintained
 F:	Documentation/devicetree/bindings/net/nfc/
 F:	drivers/nfc/
 F:	include/linux/platform_data/nfcmrvl.h
-- 
cgit v1.2.3


From 4a64541f2cebef54ea8d9f53ac5067328b8e02d8 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Wed, 12 May 2021 10:43:19 -0400
Subject: MAINTAINERS: nfc: include linux-nfc mailing list

Keep all NFC related patches in existing linux-nfc@lists.01.org mailing
list.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Acked-by: Mark Greer <mgreer@animalcreek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 7020293a1347..1d834bebf469 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12897,6 +12897,7 @@ F:	net/ipv4/nexthop.c
 
 NFC SUBSYSTEM
 M:	Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+L:	linux-nfc@lists.01.org (moderated for non-subscribers)
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/net/nfc/
-- 
cgit v1.2.3


From 832ce924b1a14e139e184a6da9f5a69a5e47b256 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 12 May 2021 13:02:48 +0300
Subject: chelsio/chtls: unlock on error in chtls_pt_recvmsg()

This error path needs to release some memory and call release_sock(sk);
before returning.

Fixes: 6919a8264a32 ("Crypto/chtls: add/delete TLS header in driver")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c
index 188d871f6b8c..c320cc8ca68d 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c
@@ -1564,8 +1564,10 @@ found_ok_skb:
 			cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
 					sizeof(thdr->type), &thdr->type);
 
-			if (cerr && thdr->type != TLS_RECORD_TYPE_DATA)
-				return -EIO;
+			if (cerr && thdr->type != TLS_RECORD_TYPE_DATA) {
+				copied = -EIO;
+				break;
+			}
 			/*  don't send tls header, skip copy */
 			goto skip_copy;
 		}
-- 
cgit v1.2.3


From 3b5169c2eb81e822445469a077223f8eb0729a59 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 12 May 2021 15:48:09 -0700
Subject: hwmon: (adm9240) Fix writes into inX_max attributes

When converting the driver to use the devm_hwmon_device_register_with_info
API, the wrong register was selected when writing into inX_max attributes.
Fix it.

Fixes: 124b7e34a5a6 ("hwmon: (adm9240) Convert to devm_hwmon_device_register_with_info API")
Reported-by: Chris Packham <Chris.Packham@alliedtelesis.co.nz>
Tested-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/adm9240.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/adm9240.c b/drivers/hwmon/adm9240.c
index 5677263bcf0d..483cd757abd3 100644
--- a/drivers/hwmon/adm9240.c
+++ b/drivers/hwmon/adm9240.c
@@ -485,7 +485,7 @@ static int adm9240_in_write(struct device *dev, u32 attr, int channel, long val)
 		reg = ADM9240_REG_IN_MIN(channel);
 		break;
 	case hwmon_in_max:
-		reg = ADM9240_REG_IN(channel);
+		reg = ADM9240_REG_IN_MAX(channel);
 		break;
 	default:
 		return -EOPNOTSUPP;
-- 
cgit v1.2.3


From 0852b6ca941ef3ff75076e85738877bd3271e1cd Mon Sep 17 00:00:00 2001
From: Gao Xiang <xiang@kernel.org>
Date: Mon, 10 May 2021 14:47:15 +0800
Subject: erofs: fix 1 lcluster-sized pcluster for big pcluster

If the 1st NONHEAD lcluster of a pcluster isn't CBLKCNT lcluster type
rather than a HEAD or PLAIN type instead, which means its pclustersize
_must_ be 1 lcluster (since its uncompressed size < 2 lclusters),
as illustrated below:

       HEAD     HEAD / PLAIN    lcluster type
   ____________ ____________
  |_:__________|_________:__|   file data (uncompressed)
   .                .
  .____________.
  |____________|                pcluster data (compressed)

Such on-disk case was explained before [1] but missed to be handled
properly in the runtime implementation.

It can be observed if manually generating 1 lcluster-sized pcluster
with 2 lclusters (thus CBLKCNT doesn't exist.) Let's fix it now.

[1] https://lore.kernel.org/r/20210407043927.10623-1-xiang@kernel.org

Link: https://lore.kernel.org/r/20210510064715.29123-1-xiang@kernel.org
Fixes: cec6e93beadf ("erofs: support parsing big pcluster compress indexes")
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Gao Xiang <xiang@kernel.org>
---
 fs/erofs/zmap.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index e62d813756f2..efaf32596b97 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -450,14 +450,31 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
 	lcn = m->lcn + 1;
 	if (m->compressedlcs)
 		goto out;
-	if (lcn == initial_lcn)
-		goto err_bonus_cblkcnt;
 
 	err = z_erofs_load_cluster_from_disk(m, lcn);
 	if (err)
 		return err;
 
+	/*
+	 * If the 1st NONHEAD lcluster has already been handled initially w/o
+	 * valid compressedlcs, which means at least it mustn't be CBLKCNT, or
+	 * an internal implemenatation error is detected.
+	 *
+	 * The following code can also handle it properly anyway, but let's
+	 * BUG_ON in the debugging mode only for developers to notice that.
+	 */
+	DBG_BUGON(lcn == initial_lcn &&
+		  m->type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD);
+
 	switch (m->type) {
+	case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
+	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+		/*
+		 * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type
+		 * rather than CBLKCNT, it's a 1 lcluster-sized pcluster.
+		 */
+		m->compressedlcs = 1;
+		break;
 	case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
 		if (m->delta[0] != 1)
 			goto err_bonus_cblkcnt;
-- 
cgit v1.2.3


From 3743d55b289c203d8f77b7cd47c24926b9d186ae Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Sun, 25 Apr 2021 15:34:51 +0800
Subject: x86, sched: Fix the AMD CPPC maximum performance value on certain AMD
 Ryzen generations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some AMD Ryzen generations has different calculation method on maximum
performance. 255 is not for all ASICs, some specific generations should use 166
as the maximum performance. Otherwise, it will report incorrect frequency value
like below:

  ~ → lscpu | grep MHz
  CPU MHz:                         3400.000
  CPU max MHz:                     7228.3198
  CPU min MHz:                     2200.0000

[ mingo: Tidied up whitespace use. ]
[ Alexander Monakov <amonakov@ispras.ru>: fix 225 -> 255 typo. ]

Fixes: 41ea667227ba ("x86, sched: Calculate frequency invariance for AMD systems")
Fixes: 3c55e94c0ade ("cpufreq: ACPI: Extend frequency tables to cover boost frequencies")
Reported-by: Jason Bagavatsingham <jason.bagavatsingham@gmail.com>
Fixed-by: Alexander Monakov <amonakov@ispras.ru>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Tested-by: Jason Bagavatsingham <jason.bagavatsingham@gmail.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210425073451.2557394-1-ray.huang@amd.com
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=211791
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/processor.h |  2 ++
 arch/x86/kernel/cpu/amd.c        | 16 ++++++++++++++++
 arch/x86/kernel/smpboot.c        |  2 +-
 drivers/cpufreq/acpi-cpufreq.c   |  6 +++++-
 4 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 154321d29050..556b2b17c3e2 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -787,8 +787,10 @@ DECLARE_PER_CPU(u64, msr_misc_features_shadow);
 
 #ifdef CONFIG_CPU_SUP_AMD
 extern u32 amd_get_nodes_per_socket(void);
+extern u32 amd_get_highest_perf(void);
 #else
 static inline u32 amd_get_nodes_per_socket(void)	{ return 0; }
+static inline u32 amd_get_highest_perf(void)		{ return 0; }
 #endif
 
 static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 2d11384dc9ab..6d7b3b3ea80b 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1165,3 +1165,19 @@ void set_dr_addr_mask(unsigned long mask, int dr)
 		break;
 	}
 }
+
+u32 amd_get_highest_perf(void)
+{
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+
+	if (c->x86 == 0x17 && ((c->x86_model >= 0x30 && c->x86_model < 0x40) ||
+			       (c->x86_model >= 0x70 && c->x86_model < 0x80)))
+		return 166;
+
+	if (c->x86 == 0x19 && ((c->x86_model >= 0x20 && c->x86_model < 0x30) ||
+			       (c->x86_model >= 0x40 && c->x86_model < 0x70)))
+		return 166;
+
+	return 255;
+}
+EXPORT_SYMBOL_GPL(amd_get_highest_perf);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0ad5214f598a..7770245cc7fa 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -2043,7 +2043,7 @@ static bool amd_set_max_freq_ratio(void)
 		return false;
 	}
 
-	highest_perf = perf_caps.highest_perf;
+	highest_perf = amd_get_highest_perf();
 	nominal_perf = perf_caps.nominal_perf;
 
 	if (!highest_perf || !nominal_perf) {
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index d1bbc16fba4b..7e7450453714 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -646,7 +646,11 @@ static u64 get_max_boost_ratio(unsigned int cpu)
 		return 0;
 	}
 
-	highest_perf = perf_caps.highest_perf;
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+		highest_perf = amd_get_highest_perf();
+	else
+		highest_perf = perf_caps.highest_perf;
+
 	nominal_perf = perf_caps.nominal_perf;
 
 	if (!highest_perf || !nominal_perf) {
-- 
cgit v1.2.3


From b813511135e8b84fa741afdfbab4937919100bef Mon Sep 17 00:00:00 2001
From: Abhijeet Rao <abhijeet.rao@intel.com>
Date: Wed, 12 May 2021 11:08:12 +0300
Subject: xhci-pci: Allow host runtime PM as default for Intel Alder Lake xHCI

In the same way as Intel Tiger Lake TCSS (Type-C Subsystem) the Alder Lake
TCSS xHCI needs to be runtime suspended whenever possible to allow the
TCSS hardware block to enter D3cold and thus save energy.

Cc: stable@vger.kernel.org
Signed-off-by: Abhijeet Rao <abhijeet.rao@intel.com>
Signed-off-by: Nikunj A. Dadhania <nikunj.dadhania@intel.com>
Signed-off-by: Azhar Shaikh <azhar.shaikh@intel.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Link: https://lore.kernel.org/r/20210512080816.866037-2-mathias.nyman@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-pci.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 5bbccc9a0179..a858add8436c 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -57,6 +57,7 @@
 #define PCI_DEVICE_ID_INTEL_CML_XHCI			0xa3af
 #define PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI		0x9a13
 #define PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI		0x1138
+#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI		0x461e
 
 #define PCI_DEVICE_ID_AMD_PROMONTORYA_4			0x43b9
 #define PCI_DEVICE_ID_AMD_PROMONTORYA_3			0x43ba
@@ -243,7 +244,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 	     pdev->device == PCI_DEVICE_ID_INTEL_TITAN_RIDGE_DD_XHCI ||
 	     pdev->device == PCI_DEVICE_ID_INTEL_ICE_LAKE_XHCI ||
 	     pdev->device == PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI ||
-	     pdev->device == PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI))
+	     pdev->device == PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI ||
+	     pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI))
 		xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW;
 
 	if (pdev->vendor == PCI_VENDOR_ID_ETRON &&
-- 
cgit v1.2.3


From 9b6a126ae58d9edfdde2d5f2e87f7615ea5e0155 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Wed, 12 May 2021 11:08:13 +0300
Subject: xhci: Fix giving back cancelled URBs even if halted endpoint can't
 reset

Commit 9ebf30007858 ("xhci: Fix halted endpoint at stop endpoint command
completion") in 5.12 changes how cancelled URBs are given back.

To cancel a URB xhci driver needs to stop the endpoint first.
To clear a halted endpoint xhci driver needs to reset the endpoint.

In rare cases when an endpoint halt (error) races with a endpoint stop we
need to clear the reset before removing, and giving back the cancelled URB.

The above change in 5.12 takes care of this, but it also relies on the
reset endpoint completion handler to give back the cancelled URBs.

There are cases when driver refuses to queue reset endpoint commands,
for example when a link suddenly goes to an inactive error state.
In this case the cancelled URB is never given back.

Fix this by giving back the URB in the stop endpoint if queuing a reset
endpoint command fails.

Fixes: 9ebf30007858 ("xhci: Fix halted endpoint at stop endpoint command completion")
CC: <stable@vger.kernel.org> # 5.12
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Link: https://lore.kernel.org/r/20210512080816.866037-3-mathias.nyman@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-ring.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 05c38dd3ee36..a8e4189277da 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -862,7 +862,7 @@ done:
 	return ret;
 }
 
-static void xhci_handle_halted_endpoint(struct xhci_hcd *xhci,
+static int xhci_handle_halted_endpoint(struct xhci_hcd *xhci,
 				struct xhci_virt_ep *ep, unsigned int stream_id,
 				struct xhci_td *td,
 				enum xhci_ep_reset_type reset_type)
@@ -875,7 +875,7 @@ static void xhci_handle_halted_endpoint(struct xhci_hcd *xhci,
 	 * Device will be reset soon to recover the link so don't do anything
 	 */
 	if (ep->vdev->flags & VDEV_PORT_ERROR)
-		return;
+		return -ENODEV;
 
 	/* add td to cancelled list and let reset ep handler take care of it */
 	if (reset_type == EP_HARD_RESET) {
@@ -888,16 +888,18 @@ static void xhci_handle_halted_endpoint(struct xhci_hcd *xhci,
 
 	if (ep->ep_state & EP_HALTED) {
 		xhci_dbg(xhci, "Reset ep command already pending\n");
-		return;
+		return 0;
 	}
 
 	err = xhci_reset_halted_ep(xhci, slot_id, ep->ep_index, reset_type);
 	if (err)
-		return;
+		return err;
 
 	ep->ep_state |= EP_HALTED;
 
 	xhci_ring_cmd_db(xhci);
+
+	return 0;
 }
 
 /*
@@ -1014,6 +1016,7 @@ static void xhci_handle_cmd_stop_ep(struct xhci_hcd *xhci, int slot_id,
 	struct xhci_td *td = NULL;
 	enum xhci_ep_reset_type reset_type;
 	struct xhci_command *command;
+	int err;
 
 	if (unlikely(TRB_TO_SUSPEND_PORT(le32_to_cpu(trb->generic.field[3])))) {
 		if (!xhci->devs[slot_id])
@@ -1058,7 +1061,10 @@ static void xhci_handle_cmd_stop_ep(struct xhci_hcd *xhci, int slot_id,
 					td->status = -EPROTO;
 			}
 			/* reset ep, reset handler cleans up cancelled tds */
-			xhci_handle_halted_endpoint(xhci, ep, 0, td, reset_type);
+			err = xhci_handle_halted_endpoint(xhci, ep, 0, td,
+							  reset_type);
+			if (err)
+				break;
 			xhci_stop_watchdog_timer_in_irq(xhci, ep);
 			return;
 		case EP_STATE_RUNNING:
-- 
cgit v1.2.3


From dda32c00c9a0fa103b5d54ef72c477b7aa993679 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Wed, 12 May 2021 11:08:14 +0300
Subject: xhci: Do not use GFP_KERNEL in (potentially) atomic context

'xhci_urb_enqueue()' is passed a 'mem_flags' argument, because "URBs may be
submitted in interrupt context" (see comment related to 'usb_submit_urb()'
in 'drivers/usb/core/urb.c')

So this flag should be used in all the calling chain.
Up to now, 'xhci_check_maxpacket()' which is only called from
'xhci_urb_enqueue()', uses GFP_KERNEL.

Be safe and pass the mem_flags to this function as well.

Fixes: ddba5cd0aeff ("xhci: Use command structures when queuing commands on the command ring")
Cc: <stable@vger.kernel.org>
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Link: https://lore.kernel.org/r/20210512080816.866037-4-mathias.nyman@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index ca9385d22f68..27283654ca08 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1514,7 +1514,7 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci,
  * we need to issue an evaluate context command and wait on it.
  */
 static int xhci_check_maxpacket(struct xhci_hcd *xhci, unsigned int slot_id,
-		unsigned int ep_index, struct urb *urb)
+		unsigned int ep_index, struct urb *urb, gfp_t mem_flags)
 {
 	struct xhci_container_ctx *out_ctx;
 	struct xhci_input_control_ctx *ctrl_ctx;
@@ -1545,7 +1545,7 @@ static int xhci_check_maxpacket(struct xhci_hcd *xhci, unsigned int slot_id,
 		 * changes max packet sizes.
 		 */
 
-		command = xhci_alloc_command(xhci, true, GFP_KERNEL);
+		command = xhci_alloc_command(xhci, true, mem_flags);
 		if (!command)
 			return -ENOMEM;
 
@@ -1639,7 +1639,7 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
 		 */
 		if (urb->dev->speed == USB_SPEED_FULL) {
 			ret = xhci_check_maxpacket(xhci, slot_id,
-					ep_index, urb);
+					ep_index, urb, mem_flags);
 			if (ret < 0) {
 				xhci_urb_free_priv(urb_priv);
 				urb->hcpriv = NULL;
-- 
cgit v1.2.3


From ca09b1bea63ab83f4cca3a2ae8bc4f597ec28851 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Wed, 12 May 2021 11:08:15 +0300
Subject: usb: xhci: Increase timeout for HC halt

On some devices (specifically the SC8180x based Surface Pro X with
QCOM04A6) HC halt / xhci_halt() times out during boot. Manually binding
the xhci-hcd driver at some point later does not exhibit this behavior.
To work around this, double XHCI_MAX_HALT_USEC, which also resolves this
issue.

Cc: <stable@vger.kernel.org>
Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Link: https://lore.kernel.org/r/20210512080816.866037-5-mathias.nyman@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-ext-caps.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/host/xhci-ext-caps.h b/drivers/usb/host/xhci-ext-caps.h
index fa59b242cd51..e8af0a125f84 100644
--- a/drivers/usb/host/xhci-ext-caps.h
+++ b/drivers/usb/host/xhci-ext-caps.h
@@ -7,8 +7,9 @@
  * Author: Sarah Sharp
  * Some code borrowed from the Linux EHCI driver.
  */
-/* Up to 16 ms to halt an HC */
-#define XHCI_MAX_HALT_USEC	(16*1000)
+
+/* HC should halt within 16 ms, but use 32 ms as some hosts take longer */
+#define XHCI_MAX_HALT_USEC	(32 * 1000)
 /* HC not running - set to 1 when run/stop bit is cleared. */
 #define XHCI_STS_HALT		(1<<0)
 
-- 
cgit v1.2.3


From 3c128781d8da463761495aaf8898c9ecb4e71528 Mon Sep 17 00:00:00 2001
From: Sandeep Singh <sandeep.singh@amd.com>
Date: Wed, 12 May 2021 11:08:16 +0300
Subject: xhci: Add reset resume quirk for AMD xhci controller.

One of AMD xhci controller require reset on resume.
Occasionally AMD xhci controller does not respond to
Stop endpoint command.
Once the issue happens controller goes into bad state
and in that case controller needs to be reset.

Cc: <stable@vger.kernel.org>
Signed-off-by: Sandeep Singh <sandeep.singh@amd.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Link: https://lore.kernel.org/r/20210512080816.866037-6-mathias.nyman@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-pci.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index a858add8436c..7bc18cf8042c 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -167,8 +167,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 	    (pdev->device == 0x15e0 || pdev->device == 0x15e1))
 		xhci->quirks |= XHCI_SNPS_BROKEN_SUSPEND;
 
-	if (pdev->vendor == PCI_VENDOR_ID_AMD && pdev->device == 0x15e5)
+	if (pdev->vendor == PCI_VENDOR_ID_AMD && pdev->device == 0x15e5) {
 		xhci->quirks |= XHCI_DISABLE_SPARSE;
+		xhci->quirks |= XHCI_RESET_ON_RESUME;
+	}
 
 	if (pdev->vendor == PCI_VENDOR_ID_AMD)
 		xhci->quirks |= XHCI_TRUST_TX_LENGTH;
-- 
cgit v1.2.3


From 12701ce524bc9b7c6345a2425208501fd2c62aad Mon Sep 17 00:00:00 2001
From: Badhri Jagan Sridharan <badhri@google.com>
Date: Mon, 10 May 2021 14:17:56 -0700
Subject: usb: typec: tcpm: Fix SINK_DISCOVERY current limit for Rp-default
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is a regression introduced by 1373fefc6243 ("usb: typec: tcpm:
Allow slow charging loops to comply to pSnkStby")

When Source advertises Rp-default, tcpm would request 500mA when in
SINK_DISCOVERY, Type-C spec advises the sink to follow BC1.2 current
limits when Rp-default is advertised.
[12750.503381] Requesting mux state 1, usb-role 2, orientation 1
[12750.503837] state change SNK_ATTACHED -> SNK_STARTUP [rev3 NONE_AMS]
[12751.003891] state change SNK_STARTUP -> SNK_DISCOVERY
[12751.003900] Setting voltage/current limit 5000 mV 500 mA

This patch restores the behavior where the tcpm would request 0mA when
Rp-default is advertised by the source.
[   73.174252] Requesting mux state 1, usb-role 2, orientation 1
[   73.174749] state change SNK_ATTACHED -> SNK_STARTUP [rev3 NONE_AMS]
[   73.674800] state change SNK_STARTUP -> SNK_DISCOVERY
[   73.674808] Setting voltage/current limit 5000 mV 0 mA

During SNK_DISCOVERY, Cap the current limit to PD_P_SNK_STDBY_MW / 5 only
for slow_charger_loop case.

Fixes: 1373fefc6243 ("usb: typec: tcpm: Allow slow charging loops to comply to pSnkStby")
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Badhri Jagan Sridharan <badhri@google.com>
Link: https://lore.kernel.org/r/20210510211756.3346954-1-badhri@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index db567e6fde92..72e4d63a2366 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -4085,7 +4085,7 @@ static void run_state_machine(struct tcpm_port *port)
 		if (port->vbus_present) {
 			u32 current_lim = tcpm_get_current_limit(port);
 
-			if (port->slow_charger_loop || (current_lim > PD_P_SNK_STDBY_MW / 5))
+			if (port->slow_charger_loop && (current_lim > PD_P_SNK_STDBY_MW / 5))
 				current_lim = PD_P_SNK_STDBY_MW / 5;
 			tcpm_set_current_limit(port, current_lim, 5000);
 			tcpm_set_charge(port, true);
-- 
cgit v1.2.3


From 975f94c7d6c306b833628baa9aec3f79db1eb3a1 Mon Sep 17 00:00:00 2001
From: Chunfeng Yun <chunfeng.yun@mediatek.com>
Date: Wed, 12 May 2021 10:07:38 +0800
Subject: usb: core: hub: fix race condition about TRSMRCY of resume

This may happen if the port becomes resume status exactly
when usb_port_resume() gets port status, it still need provide
a TRSMCRY time before access the device.

CC: <stable@vger.kernel.org>
Reported-by: Tianping Fang <tianping.fang@mediatek.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Chunfeng Yun <chunfeng.yun@mediatek.com>
Link: https://lore.kernel.org/r/20210512020738.52961-1-chunfeng.yun@mediatek.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index b2bc4b7c4289..fc7d6cdacf16 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -3642,9 +3642,6 @@ int usb_port_resume(struct usb_device *udev, pm_message_t msg)
 		 * sequence.
 		 */
 		status = hub_port_status(hub, port1, &portstatus, &portchange);
-
-		/* TRSMRCY = 10 msec */
-		msleep(10);
 	}
 
  SuspendCleared:
@@ -3659,6 +3656,9 @@ int usb_port_resume(struct usb_device *udev, pm_message_t msg)
 				usb_clear_port_feature(hub->hdev, port1,
 						USB_PORT_FEAT_C_SUSPEND);
 		}
+
+		/* TRSMRCY = 10 msec */
+		msleep(10);
 	}
 
 	if (udev->persist_enabled)
-- 
cgit v1.2.3


From e181811bd04d874fe48bbfa1165a82068b58144d Mon Sep 17 00:00:00 2001
From: Hou Pu <houpu.main@gmail.com>
Date: Thu, 13 May 2021 21:04:10 +0800
Subject: nvmet: use new ana_log_size instead the old one

The new ana_log_size should be used instead of the old one.
Or kernel NULL pointer dereference will happen like below:

[   38.957849][   T69] BUG: kernel NULL pointer dereference, address: 000000000000003c
[   38.975550][   T69] #PF: supervisor write access in kernel mode
[   38.975955][   T69] #PF: error_code(0x0002) - not-present page
[   38.976905][   T69] PGD 0 P4D 0
[   38.979388][   T69] Oops: 0002 [#1] SMP NOPTI
[   38.980488][   T69] CPU: 0 PID: 69 Comm: kworker/0:2 Not tainted 5.12.0+ #54
[   38.981254][   T69] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
[   38.982502][   T69] Workqueue: events nvme_loop_execute_work
[   38.985219][   T69] RIP: 0010:memcpy_orig+0x68/0x10f
[   38.986203][   T69] Code: 83 c2 20 eb 44 48 01 d6 48 01 d7 48 83 ea 20 0f 1f 00 48 83 ea 20 4c 8b 46 f8 4c 8b 4e f0 4c 8b 56 e8 4c 8b 5e e0 48 8d 76 e0 <4c> 89 47 f8 4c 89 4f f0 4c 89 57 e8 4c 89 5f e0 48 8d 7f e0 73 d2
[   38.987677][   T69] RSP: 0018:ffffc900001b7d48 EFLAGS: 00000287
[   38.987996][   T69] RAX: 0000000000000020 RBX: 0000000000000024 RCX: 0000000000000010
[   38.988327][   T69] RDX: ffffffffffffffe4 RSI: ffff8881084bc004 RDI: 0000000000000044
[   38.988620][   T69] RBP: 0000000000000024 R08: 0000000100000000 R09: 0000000000000000
[   38.988991][   T69] R10: 0000000100000000 R11: 0000000000000001 R12: 0000000000000024
[   38.989289][   T69] R13: ffff8881084bc000 R14: 0000000000000000 R15: 0000000000000024
[   38.989845][   T69] FS:  0000000000000000(0000) GS:ffff888237c00000(0000) knlGS:0000000000000000
[   38.990234][   T69] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   38.990490][   T69] CR2: 000000000000003c CR3: 00000001085b2000 CR4: 00000000000006f0
[   38.991105][   T69] Call Trace:
[   38.994157][   T69]  sg_copy_buffer+0xb8/0xf0
[   38.995357][   T69]  nvmet_copy_to_sgl+0x48/0x6d
[   38.995565][   T69]  nvmet_execute_get_log_page_ana+0xd4/0x1cb
[   38.995792][   T69]  nvmet_execute_get_log_page+0xc9/0x146
[   38.995992][   T69]  nvme_loop_execute_work+0x3e/0x44
[   38.996181][   T69]  process_one_work+0x1c3/0x3c0
[   38.996393][   T69]  worker_thread+0x44/0x3d0
[   38.996600][   T69]  ? cancel_delayed_work+0x90/0x90
[   38.996804][   T69]  kthread+0xf7/0x130
[   38.996961][   T69]  ? kthread_create_worker_on_cpu+0x70/0x70
[   38.997171][   T69]  ret_from_fork+0x22/0x30
[   38.997705][   T69] Modules linked in:
[   38.998741][   T69] CR2: 000000000000003c
[   39.000104][   T69] ---[ end trace e719927b609d0fa0 ]---

Fixes: 5e1f689913a4 ("nvme-multipath: fix double initialization of ANA state")
Signed-off-by: Hou Pu <houpu.main@gmail.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/multipath.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index deb14562c96a..f81871c7128a 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -817,7 +817,7 @@ int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
 	if (ana_log_size > ctrl->ana_log_size) {
 		nvme_mpath_stop(ctrl);
 		kfree(ctrl->ana_log_buf);
-		ctrl->ana_log_buf = kmalloc(ctrl->ana_log_size, GFP_KERNEL);
+		ctrl->ana_log_buf = kmalloc(ana_log_size, GFP_KERNEL);
 		if (!ctrl->ana_log_buf)
 			return -ENOMEM;
 	}
-- 
cgit v1.2.3


From 5d31950a483381b5444494dfb7fa5ed764193b92 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Sun, 9 May 2021 17:49:26 -0500
Subject: drm/radeon/ni_dpm: Fix booting bug

Create new structure NISLANDS_SMC_SWSTATE_SINGLE, as initialState.levels
and ACPIState.levels are never actually used as flexible arrays. Those
arrays can be used as simple objects of type
NISLANDS_SMC_HW_PERFORMANCE_LEVEL, instead.

Currently, the code fails because flexible array _levels_ in
struct NISLANDS_SMC_SWSTATE doesn't allow for code that access
the first element of initialState.levels and ACPIState.levels
arrays:

drivers/gpu/drm/radeon/ni_dpm.c:
1690         table->initialState.levels[0].mclk.vMPLL_AD_FUNC_CNTL =
1691                 cpu_to_be32(ni_pi->clock_registers.mpll_ad_func_cntl);
...
1903:   table->ACPIState.levels[0].mclk.vMPLL_AD_FUNC_CNTL = cpu_to_be32(mpll_ad_func_cntl);
1904:   table->ACPIState.levels[0].mclk.vMPLL_AD_FUNC_CNTL_2 = cpu_to_be32(mpll_ad_func_cntl_2);

because such element cannot exist without previously allocating
any dynamic memory for it (which never actually happens).

That's why struct NISLANDS_SMC_SWSTATE should only be used as type
for object driverState and new struct SISLANDS_SMC_SWSTATE_SINGLE is
created as type for objects initialState, ACPIState and ULVState.

Also, with the change from one-element array to flexible-array member
in commit 434fb1e7444a ("drm/radeon/nislands_smc.h: Replace one-element
array with flexible-array member in struct NISLANDS_SMC_SWSTATE"), the
size of dpmLevels in struct NISLANDS_SMC_STATETABLE should be fixed to
be NISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE instead of
NISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE - 1.

Bug: https://lore.kernel.org/dri-devel/3eedbe78-1fbd-4763-a7f3-ac5665e76a4a@xenosoft.de/
Fixes: 434fb1e7444a ("drm/radeon/nislands_smc.h: Replace one-element array with flexible-array member in struct NISLANDS_SMC_SWSTATE")
Cc: stable@vger.kernel.org
Reported-by: Christian Zigotzky <chzigotzky@xenosoft.de>
Tested-by: Christian Zigotzky <chzigotzky@xenosoft.de>
Link: https://lore.kernel.org/dri-devel/9bb5fcbd-daf5-1669-b3e7-b8624b3c36f9@xenosoft.de/
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/ni_dpm.c       | 144 +++++++++++++++++-----------------
 drivers/gpu/drm/radeon/nislands_smc.h |  34 +++++---
 2 files changed, 94 insertions(+), 84 deletions(-)

diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c
index dd5ef6493723..769f666335ac 100644
--- a/drivers/gpu/drm/radeon/ni_dpm.c
+++ b/drivers/gpu/drm/radeon/ni_dpm.c
@@ -1687,102 +1687,102 @@ static int ni_populate_smc_initial_state(struct radeon_device *rdev,
 	u32 reg;
 	int ret;
 
-	table->initialState.levels[0].mclk.vMPLL_AD_FUNC_CNTL =
+	table->initialState.level.mclk.vMPLL_AD_FUNC_CNTL =
 		cpu_to_be32(ni_pi->clock_registers.mpll_ad_func_cntl);
-	table->initialState.levels[0].mclk.vMPLL_AD_FUNC_CNTL_2 =
+	table->initialState.level.mclk.vMPLL_AD_FUNC_CNTL_2 =
 		cpu_to_be32(ni_pi->clock_registers.mpll_ad_func_cntl_2);
-	table->initialState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL =
+	table->initialState.level.mclk.vMPLL_DQ_FUNC_CNTL =
 		cpu_to_be32(ni_pi->clock_registers.mpll_dq_func_cntl);
-	table->initialState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL_2 =
+	table->initialState.level.mclk.vMPLL_DQ_FUNC_CNTL_2 =
 		cpu_to_be32(ni_pi->clock_registers.mpll_dq_func_cntl_2);
-	table->initialState.levels[0].mclk.vMCLK_PWRMGT_CNTL =
+	table->initialState.level.mclk.vMCLK_PWRMGT_CNTL =
 		cpu_to_be32(ni_pi->clock_registers.mclk_pwrmgt_cntl);
-	table->initialState.levels[0].mclk.vDLL_CNTL =
+	table->initialState.level.mclk.vDLL_CNTL =
 		cpu_to_be32(ni_pi->clock_registers.dll_cntl);
-	table->initialState.levels[0].mclk.vMPLL_SS =
+	table->initialState.level.mclk.vMPLL_SS =
 		cpu_to_be32(ni_pi->clock_registers.mpll_ss1);
-	table->initialState.levels[0].mclk.vMPLL_SS2 =
+	table->initialState.level.mclk.vMPLL_SS2 =
 		cpu_to_be32(ni_pi->clock_registers.mpll_ss2);
-	table->initialState.levels[0].mclk.mclk_value =
+	table->initialState.level.mclk.mclk_value =
 		cpu_to_be32(initial_state->performance_levels[0].mclk);
 
-	table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL =
+	table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL =
 		cpu_to_be32(ni_pi->clock_registers.cg_spll_func_cntl);
-	table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_2 =
+	table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_2 =
 		cpu_to_be32(ni_pi->clock_registers.cg_spll_func_cntl_2);
-	table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_3 =
+	table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_3 =
 		cpu_to_be32(ni_pi->clock_registers.cg_spll_func_cntl_3);
-	table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_4 =
+	table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_4 =
 		cpu_to_be32(ni_pi->clock_registers.cg_spll_func_cntl_4);
-	table->initialState.levels[0].sclk.vCG_SPLL_SPREAD_SPECTRUM =
+	table->initialState.level.sclk.vCG_SPLL_SPREAD_SPECTRUM =
 		cpu_to_be32(ni_pi->clock_registers.cg_spll_spread_spectrum);
-	table->initialState.levels[0].sclk.vCG_SPLL_SPREAD_SPECTRUM_2 =
+	table->initialState.level.sclk.vCG_SPLL_SPREAD_SPECTRUM_2 =
 		cpu_to_be32(ni_pi->clock_registers.cg_spll_spread_spectrum_2);
-	table->initialState.levels[0].sclk.sclk_value =
+	table->initialState.level.sclk.sclk_value =
 		cpu_to_be32(initial_state->performance_levels[0].sclk);
-	table->initialState.levels[0].arbRefreshState =
+	table->initialState.level.arbRefreshState =
 		NISLANDS_INITIAL_STATE_ARB_INDEX;
 
-	table->initialState.levels[0].ACIndex = 0;
+	table->initialState.level.ACIndex = 0;
 
 	ret = ni_populate_voltage_value(rdev, &eg_pi->vddc_voltage_table,
 					initial_state->performance_levels[0].vddc,
-					&table->initialState.levels[0].vddc);
+					&table->initialState.level.vddc);
 	if (!ret) {
 		u16 std_vddc;
 
 		ret = ni_get_std_voltage_value(rdev,
-					       &table->initialState.levels[0].vddc,
+					       &table->initialState.level.vddc,
 					       &std_vddc);
 		if (!ret)
 			ni_populate_std_voltage_value(rdev, std_vddc,
-						      table->initialState.levels[0].vddc.index,
-						      &table->initialState.levels[0].std_vddc);
+						      table->initialState.level.vddc.index,
+						      &table->initialState.level.std_vddc);
 	}
 
 	if (eg_pi->vddci_control)
 		ni_populate_voltage_value(rdev,
 					  &eg_pi->vddci_voltage_table,
 					  initial_state->performance_levels[0].vddci,
-					  &table->initialState.levels[0].vddci);
+					  &table->initialState.level.vddci);
 
-	ni_populate_initial_mvdd_value(rdev, &table->initialState.levels[0].mvdd);
+	ni_populate_initial_mvdd_value(rdev, &table->initialState.level.mvdd);
 
 	reg = CG_R(0xffff) | CG_L(0);
-	table->initialState.levels[0].aT = cpu_to_be32(reg);
+	table->initialState.level.aT = cpu_to_be32(reg);
 
-	table->initialState.levels[0].bSP = cpu_to_be32(pi->dsp);
+	table->initialState.level.bSP = cpu_to_be32(pi->dsp);
 
 	if (pi->boot_in_gen2)
-		table->initialState.levels[0].gen2PCIE = 1;
+		table->initialState.level.gen2PCIE = 1;
 	else
-		table->initialState.levels[0].gen2PCIE = 0;
+		table->initialState.level.gen2PCIE = 0;
 
 	if (pi->mem_gddr5) {
-		table->initialState.levels[0].strobeMode =
+		table->initialState.level.strobeMode =
 			cypress_get_strobe_mode_settings(rdev,
 							 initial_state->performance_levels[0].mclk);
 
 		if (initial_state->performance_levels[0].mclk > pi->mclk_edc_enable_threshold)
-			table->initialState.levels[0].mcFlags = NISLANDS_SMC_MC_EDC_RD_FLAG | NISLANDS_SMC_MC_EDC_WR_FLAG;
+			table->initialState.level.mcFlags = NISLANDS_SMC_MC_EDC_RD_FLAG | NISLANDS_SMC_MC_EDC_WR_FLAG;
 		else
-			table->initialState.levels[0].mcFlags =  0;
+			table->initialState.level.mcFlags =  0;
 	}
 
 	table->initialState.levelCount = 1;
 
 	table->initialState.flags |= PPSMC_SWSTATE_FLAG_DC;
 
-	table->initialState.levels[0].dpm2.MaxPS = 0;
-	table->initialState.levels[0].dpm2.NearTDPDec = 0;
-	table->initialState.levels[0].dpm2.AboveSafeInc = 0;
-	table->initialState.levels[0].dpm2.BelowSafeInc = 0;
+	table->initialState.level.dpm2.MaxPS = 0;
+	table->initialState.level.dpm2.NearTDPDec = 0;
+	table->initialState.level.dpm2.AboveSafeInc = 0;
+	table->initialState.level.dpm2.BelowSafeInc = 0;
 
 	reg = MIN_POWER_MASK | MAX_POWER_MASK;
-	table->initialState.levels[0].SQPowerThrottle = cpu_to_be32(reg);
+	table->initialState.level.SQPowerThrottle = cpu_to_be32(reg);
 
 	reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK;
-	table->initialState.levels[0].SQPowerThrottle_2 = cpu_to_be32(reg);
+	table->initialState.level.SQPowerThrottle_2 = cpu_to_be32(reg);
 
 	return 0;
 }
@@ -1813,43 +1813,43 @@ static int ni_populate_smc_acpi_state(struct radeon_device *rdev,
 	if (pi->acpi_vddc) {
 		ret = ni_populate_voltage_value(rdev,
 						&eg_pi->vddc_voltage_table,
-						pi->acpi_vddc, &table->ACPIState.levels[0].vddc);
+						pi->acpi_vddc, &table->ACPIState.level.vddc);
 		if (!ret) {
 			u16 std_vddc;
 
 			ret = ni_get_std_voltage_value(rdev,
-						       &table->ACPIState.levels[0].vddc, &std_vddc);
+						       &table->ACPIState.level.vddc, &std_vddc);
 			if (!ret)
 				ni_populate_std_voltage_value(rdev, std_vddc,
-							      table->ACPIState.levels[0].vddc.index,
-							      &table->ACPIState.levels[0].std_vddc);
+							      table->ACPIState.level.vddc.index,
+							      &table->ACPIState.level.std_vddc);
 		}
 
 		if (pi->pcie_gen2) {
 			if (pi->acpi_pcie_gen2)
-				table->ACPIState.levels[0].gen2PCIE = 1;
+				table->ACPIState.level.gen2PCIE = 1;
 			else
-				table->ACPIState.levels[0].gen2PCIE = 0;
+				table->ACPIState.level.gen2PCIE = 0;
 		} else {
-			table->ACPIState.levels[0].gen2PCIE = 0;
+			table->ACPIState.level.gen2PCIE = 0;
 		}
 	} else {
 		ret = ni_populate_voltage_value(rdev,
 						&eg_pi->vddc_voltage_table,
 						pi->min_vddc_in_table,
-						&table->ACPIState.levels[0].vddc);
+						&table->ACPIState.level.vddc);
 		if (!ret) {
 			u16 std_vddc;
 
 			ret = ni_get_std_voltage_value(rdev,
-						       &table->ACPIState.levels[0].vddc,
+						       &table->ACPIState.level.vddc,
 						       &std_vddc);
 			if (!ret)
 				ni_populate_std_voltage_value(rdev, std_vddc,
-							      table->ACPIState.levels[0].vddc.index,
-							      &table->ACPIState.levels[0].std_vddc);
+							      table->ACPIState.level.vddc.index,
+							      &table->ACPIState.level.std_vddc);
 		}
-		table->ACPIState.levels[0].gen2PCIE = 0;
+		table->ACPIState.level.gen2PCIE = 0;
 	}
 
 	if (eg_pi->acpi_vddci) {
@@ -1857,7 +1857,7 @@ static int ni_populate_smc_acpi_state(struct radeon_device *rdev,
 			ni_populate_voltage_value(rdev,
 						  &eg_pi->vddci_voltage_table,
 						  eg_pi->acpi_vddci,
-						  &table->ACPIState.levels[0].vddci);
+						  &table->ACPIState.level.vddci);
 	}
 
 
@@ -1900,37 +1900,37 @@ static int ni_populate_smc_acpi_state(struct radeon_device *rdev,
 	spll_func_cntl_2 &= ~SCLK_MUX_SEL_MASK;
 	spll_func_cntl_2 |= SCLK_MUX_SEL(4);
 
-	table->ACPIState.levels[0].mclk.vMPLL_AD_FUNC_CNTL = cpu_to_be32(mpll_ad_func_cntl);
-	table->ACPIState.levels[0].mclk.vMPLL_AD_FUNC_CNTL_2 = cpu_to_be32(mpll_ad_func_cntl_2);
-	table->ACPIState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL = cpu_to_be32(mpll_dq_func_cntl);
-	table->ACPIState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL_2 = cpu_to_be32(mpll_dq_func_cntl_2);
-	table->ACPIState.levels[0].mclk.vMCLK_PWRMGT_CNTL = cpu_to_be32(mclk_pwrmgt_cntl);
-	table->ACPIState.levels[0].mclk.vDLL_CNTL = cpu_to_be32(dll_cntl);
+	table->ACPIState.level.mclk.vMPLL_AD_FUNC_CNTL = cpu_to_be32(mpll_ad_func_cntl);
+	table->ACPIState.level.mclk.vMPLL_AD_FUNC_CNTL_2 = cpu_to_be32(mpll_ad_func_cntl_2);
+	table->ACPIState.level.mclk.vMPLL_DQ_FUNC_CNTL = cpu_to_be32(mpll_dq_func_cntl);
+	table->ACPIState.level.mclk.vMPLL_DQ_FUNC_CNTL_2 = cpu_to_be32(mpll_dq_func_cntl_2);
+	table->ACPIState.level.mclk.vMCLK_PWRMGT_CNTL = cpu_to_be32(mclk_pwrmgt_cntl);
+	table->ACPIState.level.mclk.vDLL_CNTL = cpu_to_be32(dll_cntl);
 
-	table->ACPIState.levels[0].mclk.mclk_value = 0;
+	table->ACPIState.level.mclk.mclk_value = 0;
 
-	table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL = cpu_to_be32(spll_func_cntl);
-	table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_2 = cpu_to_be32(spll_func_cntl_2);
-	table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_3 = cpu_to_be32(spll_func_cntl_3);
-	table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_4 = cpu_to_be32(spll_func_cntl_4);
+	table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL = cpu_to_be32(spll_func_cntl);
+	table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_2 = cpu_to_be32(spll_func_cntl_2);
+	table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_3 = cpu_to_be32(spll_func_cntl_3);
+	table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_4 = cpu_to_be32(spll_func_cntl_4);
 
-	table->ACPIState.levels[0].sclk.sclk_value = 0;
+	table->ACPIState.level.sclk.sclk_value = 0;
 
-	ni_populate_mvdd_value(rdev, 0, &table->ACPIState.levels[0].mvdd);
+	ni_populate_mvdd_value(rdev, 0, &table->ACPIState.level.mvdd);
 
 	if (eg_pi->dynamic_ac_timing)
-		table->ACPIState.levels[0].ACIndex = 1;
+		table->ACPIState.level.ACIndex = 1;
 
-	table->ACPIState.levels[0].dpm2.MaxPS = 0;
-	table->ACPIState.levels[0].dpm2.NearTDPDec = 0;
-	table->ACPIState.levels[0].dpm2.AboveSafeInc = 0;
-	table->ACPIState.levels[0].dpm2.BelowSafeInc = 0;
+	table->ACPIState.level.dpm2.MaxPS = 0;
+	table->ACPIState.level.dpm2.NearTDPDec = 0;
+	table->ACPIState.level.dpm2.AboveSafeInc = 0;
+	table->ACPIState.level.dpm2.BelowSafeInc = 0;
 
 	reg = MIN_POWER_MASK | MAX_POWER_MASK;
-	table->ACPIState.levels[0].SQPowerThrottle = cpu_to_be32(reg);
+	table->ACPIState.level.SQPowerThrottle = cpu_to_be32(reg);
 
 	reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK;
-	table->ACPIState.levels[0].SQPowerThrottle_2 = cpu_to_be32(reg);
+	table->ACPIState.level.SQPowerThrottle_2 = cpu_to_be32(reg);
 
 	return 0;
 }
@@ -1980,7 +1980,9 @@ static int ni_init_smc_table(struct radeon_device *rdev)
 	if (ret)
 		return ret;
 
-	table->driverState = table->initialState;
+	table->driverState.flags = table->initialState.flags;
+	table->driverState.levelCount = table->initialState.levelCount;
+	table->driverState.levels[0] = table->initialState.level;
 
 	table->ULVState = table->initialState;
 
diff --git a/drivers/gpu/drm/radeon/nislands_smc.h b/drivers/gpu/drm/radeon/nislands_smc.h
index 7395cb6b3cac..42f3bab0f9ee 100644
--- a/drivers/gpu/drm/radeon/nislands_smc.h
+++ b/drivers/gpu/drm/radeon/nislands_smc.h
@@ -143,6 +143,14 @@ struct NISLANDS_SMC_SWSTATE
 
 typedef struct NISLANDS_SMC_SWSTATE NISLANDS_SMC_SWSTATE;
 
+struct NISLANDS_SMC_SWSTATE_SINGLE {
+	uint8_t                             flags;
+	uint8_t                             levelCount;
+	uint8_t                             padding2;
+	uint8_t                             padding3;
+	NISLANDS_SMC_HW_PERFORMANCE_LEVEL   level;
+};
+
 #define NISLANDS_SMC_VOLTAGEMASK_VDDC  0
 #define NISLANDS_SMC_VOLTAGEMASK_MVDD  1
 #define NISLANDS_SMC_VOLTAGEMASK_VDDCI 2
@@ -160,19 +168,19 @@ typedef struct NISLANDS_SMC_VOLTAGEMASKTABLE NISLANDS_SMC_VOLTAGEMASKTABLE;
 
 struct NISLANDS_SMC_STATETABLE
 {
-    uint8_t                             thermalProtectType;
-    uint8_t                             systemFlags;
-    uint8_t                             maxVDDCIndexInPPTable;
-    uint8_t                             extraFlags;
-    uint8_t                             highSMIO[NISLANDS_MAX_NO_VREG_STEPS];
-    uint32_t                            lowSMIO[NISLANDS_MAX_NO_VREG_STEPS];
-    NISLANDS_SMC_VOLTAGEMASKTABLE       voltageMaskTable;
-    PP_NIslands_DPM2Parameters          dpm2Params;
-    NISLANDS_SMC_SWSTATE                initialState;
-    NISLANDS_SMC_SWSTATE                ACPIState;
-    NISLANDS_SMC_SWSTATE                ULVState;
-    NISLANDS_SMC_SWSTATE                driverState;
-    NISLANDS_SMC_HW_PERFORMANCE_LEVEL   dpmLevels[NISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE - 1];
+	uint8_t                             thermalProtectType;
+	uint8_t                             systemFlags;
+	uint8_t                             maxVDDCIndexInPPTable;
+	uint8_t                             extraFlags;
+	uint8_t                             highSMIO[NISLANDS_MAX_NO_VREG_STEPS];
+	uint32_t                            lowSMIO[NISLANDS_MAX_NO_VREG_STEPS];
+	NISLANDS_SMC_VOLTAGEMASKTABLE       voltageMaskTable;
+	PP_NIslands_DPM2Parameters          dpm2Params;
+	struct NISLANDS_SMC_SWSTATE_SINGLE  initialState;
+	struct NISLANDS_SMC_SWSTATE_SINGLE  ACPIState;
+	struct NISLANDS_SMC_SWSTATE_SINGLE  ULVState;
+	NISLANDS_SMC_SWSTATE                driverState;
+	NISLANDS_SMC_HW_PERFORMANCE_LEVEL   dpmLevels[NISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE];
 };
 
 typedef struct NISLANDS_SMC_STATETABLE NISLANDS_SMC_STATETABLE;
-- 
cgit v1.2.3


From 1ddeedaa28e14c4e40c95e3d8026d69eef47eaba Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Sun, 9 May 2021 17:55:25 -0500
Subject: drm/radeon/si_dpm: Fix SMU power state load

Create new structure SISLANDS_SMC_SWSTATE_SINGLE, as initialState.levels
and ACPIState.levels are never actually used as flexible arrays. Those
arrays can be used as simple objects of type
SISLANDS_SMC_HW_PERFORMANCE_LEVEL, instead.

Currently, the code fails because flexible array _levels_ in
struct SISLANDS_SMC_SWSTATE doesn't allow for code that access
the first element of initialState.levels and ACPIState.levels
arrays:

4353         table->initialState.levels[0].mclk.vDLL_CNTL =
4354                 cpu_to_be32(si_pi->clock_registers.dll_cntl);
...
4555         table->ACPIState.levels[0].mclk.vDLL_CNTL =
4556                 cpu_to_be32(dll_cntl);

because such element cannot exist without previously allocating
any dynamic memory for it (which never actually happens).

That's why struct SISLANDS_SMC_SWSTATE should only be used as type
for object driverState and new struct SISLANDS_SMC_SWSTATE_SINGLE is
created as type for objects initialState, ACPIState and ULVState.

Also, with the change from one-element array to flexible-array member
in commit 96e27e8d919e ("drm/radeon/si_dpm: Replace one-element array
with flexible-array in struct SISLANDS_SMC_SWSTATE"), the size of
dpmLevels in struct SISLANDS_SMC_STATETABLE should be fixed to be
SISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE instead of
SISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE - 1.

Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1583
Fixes: 96e27e8d919e ("drm/radeon/si_dpm: Replace one-element array with flexible-array in struct SISLANDS_SMC_SWSTATE")
Cc: stable@vger.kernel.org
Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/si_dpm.c       | 174 +++++++++++++++++-----------------
 drivers/gpu/drm/radeon/sislands_smc.h |  34 ++++---
 2 files changed, 109 insertions(+), 99 deletions(-)

diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index 918609551804..2c54c0d7ca5b 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -4350,70 +4350,70 @@ static int si_populate_smc_initial_state(struct radeon_device *rdev,
 	u32 reg;
 	int ret;
 
-	table->initialState.levels[0].mclk.vDLL_CNTL =
+	table->initialState.level.mclk.vDLL_CNTL =
 		cpu_to_be32(si_pi->clock_registers.dll_cntl);
-	table->initialState.levels[0].mclk.vMCLK_PWRMGT_CNTL =
+	table->initialState.level.mclk.vMCLK_PWRMGT_CNTL =
 		cpu_to_be32(si_pi->clock_registers.mclk_pwrmgt_cntl);
-	table->initialState.levels[0].mclk.vMPLL_AD_FUNC_CNTL =
+	table->initialState.level.mclk.vMPLL_AD_FUNC_CNTL =
 		cpu_to_be32(si_pi->clock_registers.mpll_ad_func_cntl);
-	table->initialState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL =
+	table->initialState.level.mclk.vMPLL_DQ_FUNC_CNTL =
 		cpu_to_be32(si_pi->clock_registers.mpll_dq_func_cntl);
-	table->initialState.levels[0].mclk.vMPLL_FUNC_CNTL =
+	table->initialState.level.mclk.vMPLL_FUNC_CNTL =
 		cpu_to_be32(si_pi->clock_registers.mpll_func_cntl);
-	table->initialState.levels[0].mclk.vMPLL_FUNC_CNTL_1 =
+	table->initialState.level.mclk.vMPLL_FUNC_CNTL_1 =
 		cpu_to_be32(si_pi->clock_registers.mpll_func_cntl_1);
-	table->initialState.levels[0].mclk.vMPLL_FUNC_CNTL_2 =
+	table->initialState.level.mclk.vMPLL_FUNC_CNTL_2 =
 		cpu_to_be32(si_pi->clock_registers.mpll_func_cntl_2);
-	table->initialState.levels[0].mclk.vMPLL_SS =
+	table->initialState.level.mclk.vMPLL_SS =
 		cpu_to_be32(si_pi->clock_registers.mpll_ss1);
-	table->initialState.levels[0].mclk.vMPLL_SS2 =
+	table->initialState.level.mclk.vMPLL_SS2 =
 		cpu_to_be32(si_pi->clock_registers.mpll_ss2);
 
-	table->initialState.levels[0].mclk.mclk_value =
+	table->initialState.level.mclk.mclk_value =
 		cpu_to_be32(initial_state->performance_levels[0].mclk);
 
-	table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL =
+	table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL =
 		cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl);
-	table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_2 =
+	table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_2 =
 		cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl_2);
-	table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_3 =
+	table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_3 =
 		cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl_3);
-	table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_4 =
+	table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_4 =
 		cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl_4);
-	table->initialState.levels[0].sclk.vCG_SPLL_SPREAD_SPECTRUM =
+	table->initialState.level.sclk.vCG_SPLL_SPREAD_SPECTRUM =
 		cpu_to_be32(si_pi->clock_registers.cg_spll_spread_spectrum);
-	table->initialState.levels[0].sclk.vCG_SPLL_SPREAD_SPECTRUM_2  =
+	table->initialState.level.sclk.vCG_SPLL_SPREAD_SPECTRUM_2  =
 		cpu_to_be32(si_pi->clock_registers.cg_spll_spread_spectrum_2);
 
-	table->initialState.levels[0].sclk.sclk_value =
+	table->initialState.level.sclk.sclk_value =
 		cpu_to_be32(initial_state->performance_levels[0].sclk);
 
-	table->initialState.levels[0].arbRefreshState =
+	table->initialState.level.arbRefreshState =
 		SISLANDS_INITIAL_STATE_ARB_INDEX;
 
-	table->initialState.levels[0].ACIndex = 0;
+	table->initialState.level.ACIndex = 0;
 
 	ret = si_populate_voltage_value(rdev, &eg_pi->vddc_voltage_table,
 					initial_state->performance_levels[0].vddc,
-					&table->initialState.levels[0].vddc);
+					&table->initialState.level.vddc);
 
 	if (!ret) {
 		u16 std_vddc;
 
 		ret = si_get_std_voltage_value(rdev,
-					       &table->initialState.levels[0].vddc,
+					       &table->initialState.level.vddc,
 					       &std_vddc);
 		if (!ret)
 			si_populate_std_voltage_value(rdev, std_vddc,
-						      table->initialState.levels[0].vddc.index,
-						      &table->initialState.levels[0].std_vddc);
+						      table->initialState.level.vddc.index,
+						      &table->initialState.level.std_vddc);
 	}
 
 	if (eg_pi->vddci_control)
 		si_populate_voltage_value(rdev,
 					  &eg_pi->vddci_voltage_table,
 					  initial_state->performance_levels[0].vddci,
-					  &table->initialState.levels[0].vddci);
+					  &table->initialState.level.vddci);
 
 	if (si_pi->vddc_phase_shed_control)
 		si_populate_phase_shedding_value(rdev,
@@ -4421,43 +4421,43 @@ static int si_populate_smc_initial_state(struct radeon_device *rdev,
 						 initial_state->performance_levels[0].vddc,
 						 initial_state->performance_levels[0].sclk,
 						 initial_state->performance_levels[0].mclk,
-						 &table->initialState.levels[0].vddc);
+						 &table->initialState.level.vddc);
 
-	si_populate_initial_mvdd_value(rdev, &table->initialState.levels[0].mvdd);
+	si_populate_initial_mvdd_value(rdev, &table->initialState.level.mvdd);
 
 	reg = CG_R(0xffff) | CG_L(0);
-	table->initialState.levels[0].aT = cpu_to_be32(reg);
+	table->initialState.level.aT = cpu_to_be32(reg);
 
-	table->initialState.levels[0].bSP = cpu_to_be32(pi->dsp);
+	table->initialState.level.bSP = cpu_to_be32(pi->dsp);
 
-	table->initialState.levels[0].gen2PCIE = (u8)si_pi->boot_pcie_gen;
+	table->initialState.level.gen2PCIE = (u8)si_pi->boot_pcie_gen;
 
 	if (pi->mem_gddr5) {
-		table->initialState.levels[0].strobeMode =
+		table->initialState.level.strobeMode =
 			si_get_strobe_mode_settings(rdev,
 						    initial_state->performance_levels[0].mclk);
 
 		if (initial_state->performance_levels[0].mclk > pi->mclk_edc_enable_threshold)
-			table->initialState.levels[0].mcFlags = SISLANDS_SMC_MC_EDC_RD_FLAG | SISLANDS_SMC_MC_EDC_WR_FLAG;
+			table->initialState.level.mcFlags = SISLANDS_SMC_MC_EDC_RD_FLAG | SISLANDS_SMC_MC_EDC_WR_FLAG;
 		else
-			table->initialState.levels[0].mcFlags =  0;
+			table->initialState.level.mcFlags =  0;
 	}
 
 	table->initialState.levelCount = 1;
 
 	table->initialState.flags |= PPSMC_SWSTATE_FLAG_DC;
 
-	table->initialState.levels[0].dpm2.MaxPS = 0;
-	table->initialState.levels[0].dpm2.NearTDPDec = 0;
-	table->initialState.levels[0].dpm2.AboveSafeInc = 0;
-	table->initialState.levels[0].dpm2.BelowSafeInc = 0;
-	table->initialState.levels[0].dpm2.PwrEfficiencyRatio = 0;
+	table->initialState.level.dpm2.MaxPS = 0;
+	table->initialState.level.dpm2.NearTDPDec = 0;
+	table->initialState.level.dpm2.AboveSafeInc = 0;
+	table->initialState.level.dpm2.BelowSafeInc = 0;
+	table->initialState.level.dpm2.PwrEfficiencyRatio = 0;
 
 	reg = MIN_POWER_MASK | MAX_POWER_MASK;
-	table->initialState.levels[0].SQPowerThrottle = cpu_to_be32(reg);
+	table->initialState.level.SQPowerThrottle = cpu_to_be32(reg);
 
 	reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK;
-	table->initialState.levels[0].SQPowerThrottle_2 = cpu_to_be32(reg);
+	table->initialState.level.SQPowerThrottle_2 = cpu_to_be32(reg);
 
 	return 0;
 }
@@ -4488,18 +4488,18 @@ static int si_populate_smc_acpi_state(struct radeon_device *rdev,
 
 	if (pi->acpi_vddc) {
 		ret = si_populate_voltage_value(rdev, &eg_pi->vddc_voltage_table,
-						pi->acpi_vddc, &table->ACPIState.levels[0].vddc);
+						pi->acpi_vddc, &table->ACPIState.level.vddc);
 		if (!ret) {
 			u16 std_vddc;
 
 			ret = si_get_std_voltage_value(rdev,
-						       &table->ACPIState.levels[0].vddc, &std_vddc);
+						       &table->ACPIState.level.vddc, &std_vddc);
 			if (!ret)
 				si_populate_std_voltage_value(rdev, std_vddc,
-							      table->ACPIState.levels[0].vddc.index,
-							      &table->ACPIState.levels[0].std_vddc);
+							      table->ACPIState.level.vddc.index,
+							      &table->ACPIState.level.std_vddc);
 		}
-		table->ACPIState.levels[0].gen2PCIE = si_pi->acpi_pcie_gen;
+		table->ACPIState.level.gen2PCIE = si_pi->acpi_pcie_gen;
 
 		if (si_pi->vddc_phase_shed_control) {
 			si_populate_phase_shedding_value(rdev,
@@ -4507,23 +4507,23 @@ static int si_populate_smc_acpi_state(struct radeon_device *rdev,
 							 pi->acpi_vddc,
 							 0,
 							 0,
-							 &table->ACPIState.levels[0].vddc);
+							 &table->ACPIState.level.vddc);
 		}
 	} else {
 		ret = si_populate_voltage_value(rdev, &eg_pi->vddc_voltage_table,
-						pi->min_vddc_in_table, &table->ACPIState.levels[0].vddc);
+						pi->min_vddc_in_table, &table->ACPIState.level.vddc);
 		if (!ret) {
 			u16 std_vddc;
 
 			ret = si_get_std_voltage_value(rdev,
-						       &table->ACPIState.levels[0].vddc, &std_vddc);
+						       &table->ACPIState.level.vddc, &std_vddc);
 
 			if (!ret)
 				si_populate_std_voltage_value(rdev, std_vddc,
-							      table->ACPIState.levels[0].vddc.index,
-							      &table->ACPIState.levels[0].std_vddc);
+							      table->ACPIState.level.vddc.index,
+							      &table->ACPIState.level.std_vddc);
 		}
-		table->ACPIState.levels[0].gen2PCIE = (u8)r600_get_pcie_gen_support(rdev,
+		table->ACPIState.level.gen2PCIE = (u8)r600_get_pcie_gen_support(rdev,
 										    si_pi->sys_pcie_mask,
 										    si_pi->boot_pcie_gen,
 										    RADEON_PCIE_GEN1);
@@ -4534,14 +4534,14 @@ static int si_populate_smc_acpi_state(struct radeon_device *rdev,
 							 pi->min_vddc_in_table,
 							 0,
 							 0,
-							 &table->ACPIState.levels[0].vddc);
+							 &table->ACPIState.level.vddc);
 	}
 
 	if (pi->acpi_vddc) {
 		if (eg_pi->acpi_vddci)
 			si_populate_voltage_value(rdev, &eg_pi->vddci_voltage_table,
 						  eg_pi->acpi_vddci,
-						  &table->ACPIState.levels[0].vddci);
+						  &table->ACPIState.level.vddci);
 	}
 
 	mclk_pwrmgt_cntl |= MRDCK0_RESET | MRDCK1_RESET;
@@ -4552,59 +4552,59 @@ static int si_populate_smc_acpi_state(struct radeon_device *rdev,
 	spll_func_cntl_2 &= ~SCLK_MUX_SEL_MASK;
 	spll_func_cntl_2 |= SCLK_MUX_SEL(4);
 
-	table->ACPIState.levels[0].mclk.vDLL_CNTL =
+	table->ACPIState.level.mclk.vDLL_CNTL =
 		cpu_to_be32(dll_cntl);
-	table->ACPIState.levels[0].mclk.vMCLK_PWRMGT_CNTL =
+	table->ACPIState.level.mclk.vMCLK_PWRMGT_CNTL =
 		cpu_to_be32(mclk_pwrmgt_cntl);
-	table->ACPIState.levels[0].mclk.vMPLL_AD_FUNC_CNTL =
+	table->ACPIState.level.mclk.vMPLL_AD_FUNC_CNTL =
 		cpu_to_be32(mpll_ad_func_cntl);
-	table->ACPIState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL =
+	table->ACPIState.level.mclk.vMPLL_DQ_FUNC_CNTL =
 		cpu_to_be32(mpll_dq_func_cntl);
-	table->ACPIState.levels[0].mclk.vMPLL_FUNC_CNTL =
+	table->ACPIState.level.mclk.vMPLL_FUNC_CNTL =
 		cpu_to_be32(mpll_func_cntl);
-	table->ACPIState.levels[0].mclk.vMPLL_FUNC_CNTL_1 =
+	table->ACPIState.level.mclk.vMPLL_FUNC_CNTL_1 =
 		cpu_to_be32(mpll_func_cntl_1);
-	table->ACPIState.levels[0].mclk.vMPLL_FUNC_CNTL_2 =
+	table->ACPIState.level.mclk.vMPLL_FUNC_CNTL_2 =
 		cpu_to_be32(mpll_func_cntl_2);
-	table->ACPIState.levels[0].mclk.vMPLL_SS =
+	table->ACPIState.level.mclk.vMPLL_SS =
 		cpu_to_be32(si_pi->clock_registers.mpll_ss1);
-	table->ACPIState.levels[0].mclk.vMPLL_SS2 =
+	table->ACPIState.level.mclk.vMPLL_SS2 =
 		cpu_to_be32(si_pi->clock_registers.mpll_ss2);
 
-	table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL =
+	table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL =
 		cpu_to_be32(spll_func_cntl);
-	table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_2 =
+	table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_2 =
 		cpu_to_be32(spll_func_cntl_2);
-	table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_3 =
+	table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_3 =
 		cpu_to_be32(spll_func_cntl_3);
-	table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_4 =
+	table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_4 =
 		cpu_to_be32(spll_func_cntl_4);
 
-	table->ACPIState.levels[0].mclk.mclk_value = 0;
-	table->ACPIState.levels[0].sclk.sclk_value = 0;
+	table->ACPIState.level.mclk.mclk_value = 0;
+	table->ACPIState.level.sclk.sclk_value = 0;
 
-	si_populate_mvdd_value(rdev, 0, &table->ACPIState.levels[0].mvdd);
+	si_populate_mvdd_value(rdev, 0, &table->ACPIState.level.mvdd);
 
 	if (eg_pi->dynamic_ac_timing)
-		table->ACPIState.levels[0].ACIndex = 0;
+		table->ACPIState.level.ACIndex = 0;
 
-	table->ACPIState.levels[0].dpm2.MaxPS = 0;
-	table->ACPIState.levels[0].dpm2.NearTDPDec = 0;
-	table->ACPIState.levels[0].dpm2.AboveSafeInc = 0;
-	table->ACPIState.levels[0].dpm2.BelowSafeInc = 0;
-	table->ACPIState.levels[0].dpm2.PwrEfficiencyRatio = 0;
+	table->ACPIState.level.dpm2.MaxPS = 0;
+	table->ACPIState.level.dpm2.NearTDPDec = 0;
+	table->ACPIState.level.dpm2.AboveSafeInc = 0;
+	table->ACPIState.level.dpm2.BelowSafeInc = 0;
+	table->ACPIState.level.dpm2.PwrEfficiencyRatio = 0;
 
 	reg = MIN_POWER_MASK | MAX_POWER_MASK;
-	table->ACPIState.levels[0].SQPowerThrottle = cpu_to_be32(reg);
+	table->ACPIState.level.SQPowerThrottle = cpu_to_be32(reg);
 
 	reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK;
-	table->ACPIState.levels[0].SQPowerThrottle_2 = cpu_to_be32(reg);
+	table->ACPIState.level.SQPowerThrottle_2 = cpu_to_be32(reg);
 
 	return 0;
 }
 
 static int si_populate_ulv_state(struct radeon_device *rdev,
-				 SISLANDS_SMC_SWSTATE *state)
+				 struct SISLANDS_SMC_SWSTATE_SINGLE *state)
 {
 	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
 	struct si_power_info *si_pi = si_get_pi(rdev);
@@ -4613,19 +4613,19 @@ static int si_populate_ulv_state(struct radeon_device *rdev,
 	int ret;
 
 	ret = si_convert_power_level_to_smc(rdev, &ulv->pl,
-					    &state->levels[0]);
+					    &state->level);
 	if (!ret) {
 		if (eg_pi->sclk_deep_sleep) {
 			if (sclk_in_sr <= SCLK_MIN_DEEPSLEEP_FREQ)
-				state->levels[0].stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_BYPASS;
+				state->level.stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_BYPASS;
 			else
-				state->levels[0].stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_THROTTLE;
+				state->level.stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_THROTTLE;
 		}
 		if (ulv->one_pcie_lane_in_ulv)
 			state->flags |= PPSMC_SWSTATE_FLAG_PCIE_X1;
-		state->levels[0].arbRefreshState = (u8)(SISLANDS_ULV_STATE_ARB_INDEX);
-		state->levels[0].ACIndex = 1;
-		state->levels[0].std_vddc = state->levels[0].vddc;
+		state->level.arbRefreshState = (u8)(SISLANDS_ULV_STATE_ARB_INDEX);
+		state->level.ACIndex = 1;
+		state->level.std_vddc = state->level.vddc;
 		state->levelCount = 1;
 
 		state->flags |= PPSMC_SWSTATE_FLAG_DC;
@@ -4725,7 +4725,9 @@ static int si_init_smc_table(struct radeon_device *rdev)
 	if (ret)
 		return ret;
 
-	table->driverState = table->initialState;
+	table->driverState.flags = table->initialState.flags;
+	table->driverState.levelCount = table->initialState.levelCount;
+	table->driverState.levels[0] = table->initialState.level;
 
 	ret = si_do_program_memory_timing_parameters(rdev, radeon_boot_state,
 						     SISLANDS_INITIAL_STATE_ARB_INDEX);
@@ -5275,8 +5277,8 @@ static int si_upload_ulv_state(struct radeon_device *rdev)
 	if (ulv->supported && ulv->pl.vddc) {
 		u32 address = si_pi->state_table_start +
 			offsetof(SISLANDS_SMC_STATETABLE, ULVState);
-		SISLANDS_SMC_SWSTATE *smc_state = &si_pi->smc_statetable.ULVState;
-		u32 state_size = sizeof(SISLANDS_SMC_SWSTATE);
+		struct SISLANDS_SMC_SWSTATE_SINGLE *smc_state = &si_pi->smc_statetable.ULVState;
+		u32 state_size = sizeof(struct SISLANDS_SMC_SWSTATE_SINGLE);
 
 		memset(smc_state, 0, state_size);
 
diff --git a/drivers/gpu/drm/radeon/sislands_smc.h b/drivers/gpu/drm/radeon/sislands_smc.h
index fbd6589bdab9..4ea1cb2e45a3 100644
--- a/drivers/gpu/drm/radeon/sislands_smc.h
+++ b/drivers/gpu/drm/radeon/sislands_smc.h
@@ -191,6 +191,14 @@ struct SISLANDS_SMC_SWSTATE
 
 typedef struct SISLANDS_SMC_SWSTATE SISLANDS_SMC_SWSTATE;
 
+struct SISLANDS_SMC_SWSTATE_SINGLE {
+	uint8_t                             flags;
+	uint8_t                             levelCount;
+	uint8_t                             padding2;
+	uint8_t                             padding3;
+	SISLANDS_SMC_HW_PERFORMANCE_LEVEL   level;
+};
+
 #define SISLANDS_SMC_VOLTAGEMASK_VDDC  0
 #define SISLANDS_SMC_VOLTAGEMASK_MVDD  1
 #define SISLANDS_SMC_VOLTAGEMASK_VDDCI 2
@@ -208,19 +216,19 @@ typedef struct SISLANDS_SMC_VOLTAGEMASKTABLE SISLANDS_SMC_VOLTAGEMASKTABLE;
 
 struct SISLANDS_SMC_STATETABLE
 {
-    uint8_t                             thermalProtectType;
-    uint8_t                             systemFlags;
-    uint8_t                             maxVDDCIndexInPPTable;
-    uint8_t                             extraFlags;
-    uint32_t                            lowSMIO[SISLANDS_MAX_NO_VREG_STEPS];
-    SISLANDS_SMC_VOLTAGEMASKTABLE       voltageMaskTable;
-    SISLANDS_SMC_VOLTAGEMASKTABLE       phaseMaskTable;
-    PP_SIslands_DPM2Parameters          dpm2Params;
-    SISLANDS_SMC_SWSTATE                initialState;
-    SISLANDS_SMC_SWSTATE                ACPIState;
-    SISLANDS_SMC_SWSTATE                ULVState;
-    SISLANDS_SMC_SWSTATE                driverState;
-    SISLANDS_SMC_HW_PERFORMANCE_LEVEL   dpmLevels[SISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE - 1];
+	uint8_t					thermalProtectType;
+	uint8_t					systemFlags;
+	uint8_t					maxVDDCIndexInPPTable;
+	uint8_t					extraFlags;
+	uint32_t				lowSMIO[SISLANDS_MAX_NO_VREG_STEPS];
+	SISLANDS_SMC_VOLTAGEMASKTABLE		voltageMaskTable;
+	SISLANDS_SMC_VOLTAGEMASKTABLE		phaseMaskTable;
+	PP_SIslands_DPM2Parameters		dpm2Params;
+	struct SISLANDS_SMC_SWSTATE_SINGLE	initialState;
+	struct SISLANDS_SMC_SWSTATE_SINGLE      ACPIState;
+	struct SISLANDS_SMC_SWSTATE_SINGLE      ULVState;
+	SISLANDS_SMC_SWSTATE			driverState;
+	SISLANDS_SMC_HW_PERFORMANCE_LEVEL	dpmLevels[SISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE];
 };
 
 typedef struct SISLANDS_SMC_STATETABLE SISLANDS_SMC_STATETABLE;
-- 
cgit v1.2.3


From 939baec9e895e75149327c01b775f46c21e12be5 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Mon, 10 May 2021 15:46:18 -0500
Subject: drm/amd/pm: Fix out-of-bounds bug

Create new structure SISLANDS_SMC_SWSTATE_SINGLE, as initialState.levels
and ACPIState.levels are never actually used as flexible arrays. Those
arrays can be used as simple objects of type
SISLANDS_SMC_HW_PERFORMANCE_LEVEL, instead.

Currently, the code fails because flexible array _levels_ in
struct SISLANDS_SMC_SWSTATE doesn't allow for code that accesses
the first element of initialState.levels and ACPIState.levels
arrays:

drivers/gpu/drm/amd/pm/powerplay/si_dpm.c:
4820: table->initialState.levels[0].mclk.vDLL_CNTL =
4821:         cpu_to_be32(si_pi->clock_registers.dll_cntl);
...
5021: table->ACPIState.levels[0].mclk.vDLL_CNTL =
5022:         cpu_to_be32(dll_cntl);

because such element cannot be accessed without previously allocating
enough dynamic memory for it to exist (which never actually happens).
So, there is an out-of-bounds bug in this case.

That's why struct SISLANDS_SMC_SWSTATE should only be used as type
for object driverState and new struct SISLANDS_SMC_SWSTATE_SINGLE is
created as type for objects initialState, ACPIState and ULVState.

Also, with the change from one-element array to flexible-array member
in commit 0e1aa13ca3ff ("drm/amd/pm: Replace one-element array with
flexible-array in struct SISLANDS_SMC_SWSTATE"), the size of
dpmLevels in struct SISLANDS_SMC_STATETABLE should be fixed to be
SISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE instead of
SISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE - 1.

Fixes: 0e1aa13ca3ff ("drm/amd/pm: Replace one-element array with flexible-array in struct SISLANDS_SMC_SWSTATE")
Cc: stable@vger.kernel.org
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/powerplay/si_dpm.c       | 174 ++++++++++++------------
 drivers/gpu/drm/amd/pm/powerplay/sislands_smc.h |  34 +++--
 2 files changed, 109 insertions(+), 99 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c b/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c
index 26a5321e621b..15c0b8af376f 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c
@@ -4817,70 +4817,70 @@ static int si_populate_smc_initial_state(struct amdgpu_device *adev,
 	u32 reg;
 	int ret;
 
-	table->initialState.levels[0].mclk.vDLL_CNTL =
+	table->initialState.level.mclk.vDLL_CNTL =
 		cpu_to_be32(si_pi->clock_registers.dll_cntl);
-	table->initialState.levels[0].mclk.vMCLK_PWRMGT_CNTL =
+	table->initialState.level.mclk.vMCLK_PWRMGT_CNTL =
 		cpu_to_be32(si_pi->clock_registers.mclk_pwrmgt_cntl);
-	table->initialState.levels[0].mclk.vMPLL_AD_FUNC_CNTL =
+	table->initialState.level.mclk.vMPLL_AD_FUNC_CNTL =
 		cpu_to_be32(si_pi->clock_registers.mpll_ad_func_cntl);
-	table->initialState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL =
+	table->initialState.level.mclk.vMPLL_DQ_FUNC_CNTL =
 		cpu_to_be32(si_pi->clock_registers.mpll_dq_func_cntl);
-	table->initialState.levels[0].mclk.vMPLL_FUNC_CNTL =
+	table->initialState.level.mclk.vMPLL_FUNC_CNTL =
 		cpu_to_be32(si_pi->clock_registers.mpll_func_cntl);
-	table->initialState.levels[0].mclk.vMPLL_FUNC_CNTL_1 =
+	table->initialState.level.mclk.vMPLL_FUNC_CNTL_1 =
 		cpu_to_be32(si_pi->clock_registers.mpll_func_cntl_1);
-	table->initialState.levels[0].mclk.vMPLL_FUNC_CNTL_2 =
+	table->initialState.level.mclk.vMPLL_FUNC_CNTL_2 =
 		cpu_to_be32(si_pi->clock_registers.mpll_func_cntl_2);
-	table->initialState.levels[0].mclk.vMPLL_SS =
+	table->initialState.level.mclk.vMPLL_SS =
 		cpu_to_be32(si_pi->clock_registers.mpll_ss1);
-	table->initialState.levels[0].mclk.vMPLL_SS2 =
+	table->initialState.level.mclk.vMPLL_SS2 =
 		cpu_to_be32(si_pi->clock_registers.mpll_ss2);
 
-	table->initialState.levels[0].mclk.mclk_value =
+	table->initialState.level.mclk.mclk_value =
 		cpu_to_be32(initial_state->performance_levels[0].mclk);
 
-	table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL =
+	table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL =
 		cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl);
-	table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_2 =
+	table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_2 =
 		cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl_2);
-	table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_3 =
+	table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_3 =
 		cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl_3);
-	table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_4 =
+	table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_4 =
 		cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl_4);
-	table->initialState.levels[0].sclk.vCG_SPLL_SPREAD_SPECTRUM =
+	table->initialState.level.sclk.vCG_SPLL_SPREAD_SPECTRUM =
 		cpu_to_be32(si_pi->clock_registers.cg_spll_spread_spectrum);
-	table->initialState.levels[0].sclk.vCG_SPLL_SPREAD_SPECTRUM_2  =
+	table->initialState.level.sclk.vCG_SPLL_SPREAD_SPECTRUM_2  =
 		cpu_to_be32(si_pi->clock_registers.cg_spll_spread_spectrum_2);
 
-	table->initialState.levels[0].sclk.sclk_value =
+	table->initialState.level.sclk.sclk_value =
 		cpu_to_be32(initial_state->performance_levels[0].sclk);
 
-	table->initialState.levels[0].arbRefreshState =
+	table->initialState.level.arbRefreshState =
 		SISLANDS_INITIAL_STATE_ARB_INDEX;
 
-	table->initialState.levels[0].ACIndex = 0;
+	table->initialState.level.ACIndex = 0;
 
 	ret = si_populate_voltage_value(adev, &eg_pi->vddc_voltage_table,
 					initial_state->performance_levels[0].vddc,
-					&table->initialState.levels[0].vddc);
+					&table->initialState.level.vddc);
 
 	if (!ret) {
 		u16 std_vddc;
 
 		ret = si_get_std_voltage_value(adev,
-					       &table->initialState.levels[0].vddc,
+					       &table->initialState.level.vddc,
 					       &std_vddc);
 		if (!ret)
 			si_populate_std_voltage_value(adev, std_vddc,
-						      table->initialState.levels[0].vddc.index,
-						      &table->initialState.levels[0].std_vddc);
+						      table->initialState.level.vddc.index,
+						      &table->initialState.level.std_vddc);
 	}
 
 	if (eg_pi->vddci_control)
 		si_populate_voltage_value(adev,
 					  &eg_pi->vddci_voltage_table,
 					  initial_state->performance_levels[0].vddci,
-					  &table->initialState.levels[0].vddci);
+					  &table->initialState.level.vddci);
 
 	if (si_pi->vddc_phase_shed_control)
 		si_populate_phase_shedding_value(adev,
@@ -4888,41 +4888,41 @@ static int si_populate_smc_initial_state(struct amdgpu_device *adev,
 						 initial_state->performance_levels[0].vddc,
 						 initial_state->performance_levels[0].sclk,
 						 initial_state->performance_levels[0].mclk,
-						 &table->initialState.levels[0].vddc);
+						 &table->initialState.level.vddc);
 
-	si_populate_initial_mvdd_value(adev, &table->initialState.levels[0].mvdd);
+	si_populate_initial_mvdd_value(adev, &table->initialState.level.mvdd);
 
 	reg = CG_R(0xffff) | CG_L(0);
-	table->initialState.levels[0].aT = cpu_to_be32(reg);
-	table->initialState.levels[0].bSP = cpu_to_be32(pi->dsp);
-	table->initialState.levels[0].gen2PCIE = (u8)si_pi->boot_pcie_gen;
+	table->initialState.level.aT = cpu_to_be32(reg);
+	table->initialState.level.bSP = cpu_to_be32(pi->dsp);
+	table->initialState.level.gen2PCIE = (u8)si_pi->boot_pcie_gen;
 
 	if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) {
-		table->initialState.levels[0].strobeMode =
+		table->initialState.level.strobeMode =
 			si_get_strobe_mode_settings(adev,
 						    initial_state->performance_levels[0].mclk);
 
 		if (initial_state->performance_levels[0].mclk > pi->mclk_edc_enable_threshold)
-			table->initialState.levels[0].mcFlags = SISLANDS_SMC_MC_EDC_RD_FLAG | SISLANDS_SMC_MC_EDC_WR_FLAG;
+			table->initialState.level.mcFlags = SISLANDS_SMC_MC_EDC_RD_FLAG | SISLANDS_SMC_MC_EDC_WR_FLAG;
 		else
-			table->initialState.levels[0].mcFlags =  0;
+			table->initialState.level.mcFlags =  0;
 	}
 
 	table->initialState.levelCount = 1;
 
 	table->initialState.flags |= PPSMC_SWSTATE_FLAG_DC;
 
-	table->initialState.levels[0].dpm2.MaxPS = 0;
-	table->initialState.levels[0].dpm2.NearTDPDec = 0;
-	table->initialState.levels[0].dpm2.AboveSafeInc = 0;
-	table->initialState.levels[0].dpm2.BelowSafeInc = 0;
-	table->initialState.levels[0].dpm2.PwrEfficiencyRatio = 0;
+	table->initialState.level.dpm2.MaxPS = 0;
+	table->initialState.level.dpm2.NearTDPDec = 0;
+	table->initialState.level.dpm2.AboveSafeInc = 0;
+	table->initialState.level.dpm2.BelowSafeInc = 0;
+	table->initialState.level.dpm2.PwrEfficiencyRatio = 0;
 
 	reg = MIN_POWER_MASK | MAX_POWER_MASK;
-	table->initialState.levels[0].SQPowerThrottle = cpu_to_be32(reg);
+	table->initialState.level.SQPowerThrottle = cpu_to_be32(reg);
 
 	reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK;
-	table->initialState.levels[0].SQPowerThrottle_2 = cpu_to_be32(reg);
+	table->initialState.level.SQPowerThrottle_2 = cpu_to_be32(reg);
 
 	return 0;
 }
@@ -4953,18 +4953,18 @@ static int si_populate_smc_acpi_state(struct amdgpu_device *adev,
 
 	if (pi->acpi_vddc) {
 		ret = si_populate_voltage_value(adev, &eg_pi->vddc_voltage_table,
-						pi->acpi_vddc, &table->ACPIState.levels[0].vddc);
+						pi->acpi_vddc, &table->ACPIState.level.vddc);
 		if (!ret) {
 			u16 std_vddc;
 
 			ret = si_get_std_voltage_value(adev,
-						       &table->ACPIState.levels[0].vddc, &std_vddc);
+						       &table->ACPIState.level.vddc, &std_vddc);
 			if (!ret)
 				si_populate_std_voltage_value(adev, std_vddc,
-							      table->ACPIState.levels[0].vddc.index,
-							      &table->ACPIState.levels[0].std_vddc);
+							      table->ACPIState.level.vddc.index,
+							      &table->ACPIState.level.std_vddc);
 		}
-		table->ACPIState.levels[0].gen2PCIE = si_pi->acpi_pcie_gen;
+		table->ACPIState.level.gen2PCIE = si_pi->acpi_pcie_gen;
 
 		if (si_pi->vddc_phase_shed_control) {
 			si_populate_phase_shedding_value(adev,
@@ -4972,23 +4972,23 @@ static int si_populate_smc_acpi_state(struct amdgpu_device *adev,
 							 pi->acpi_vddc,
 							 0,
 							 0,
-							 &table->ACPIState.levels[0].vddc);
+							 &table->ACPIState.level.vddc);
 		}
 	} else {
 		ret = si_populate_voltage_value(adev, &eg_pi->vddc_voltage_table,
-						pi->min_vddc_in_table, &table->ACPIState.levels[0].vddc);
+						pi->min_vddc_in_table, &table->ACPIState.level.vddc);
 		if (!ret) {
 			u16 std_vddc;
 
 			ret = si_get_std_voltage_value(adev,
-						       &table->ACPIState.levels[0].vddc, &std_vddc);
+						       &table->ACPIState.level.vddc, &std_vddc);
 
 			if (!ret)
 				si_populate_std_voltage_value(adev, std_vddc,
-							      table->ACPIState.levels[0].vddc.index,
-							      &table->ACPIState.levels[0].std_vddc);
+							      table->ACPIState.level.vddc.index,
+							      &table->ACPIState.level.std_vddc);
 		}
-		table->ACPIState.levels[0].gen2PCIE =
+		table->ACPIState.level.gen2PCIE =
 			(u8)amdgpu_get_pcie_gen_support(adev,
 							si_pi->sys_pcie_mask,
 							si_pi->boot_pcie_gen,
@@ -5000,14 +5000,14 @@ static int si_populate_smc_acpi_state(struct amdgpu_device *adev,
 							 pi->min_vddc_in_table,
 							 0,
 							 0,
-							 &table->ACPIState.levels[0].vddc);
+							 &table->ACPIState.level.vddc);
 	}
 
 	if (pi->acpi_vddc) {
 		if (eg_pi->acpi_vddci)
 			si_populate_voltage_value(adev, &eg_pi->vddci_voltage_table,
 						  eg_pi->acpi_vddci,
-						  &table->ACPIState.levels[0].vddci);
+						  &table->ACPIState.level.vddci);
 	}
 
 	mclk_pwrmgt_cntl |= MRDCK0_RESET | MRDCK1_RESET;
@@ -5018,59 +5018,59 @@ static int si_populate_smc_acpi_state(struct amdgpu_device *adev,
 	spll_func_cntl_2 &= ~SCLK_MUX_SEL_MASK;
 	spll_func_cntl_2 |= SCLK_MUX_SEL(4);
 
-	table->ACPIState.levels[0].mclk.vDLL_CNTL =
+	table->ACPIState.level.mclk.vDLL_CNTL =
 		cpu_to_be32(dll_cntl);
-	table->ACPIState.levels[0].mclk.vMCLK_PWRMGT_CNTL =
+	table->ACPIState.level.mclk.vMCLK_PWRMGT_CNTL =
 		cpu_to_be32(mclk_pwrmgt_cntl);
-	table->ACPIState.levels[0].mclk.vMPLL_AD_FUNC_CNTL =
+	table->ACPIState.level.mclk.vMPLL_AD_FUNC_CNTL =
 		cpu_to_be32(mpll_ad_func_cntl);
-	table->ACPIState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL =
+	table->ACPIState.level.mclk.vMPLL_DQ_FUNC_CNTL =
 		cpu_to_be32(mpll_dq_func_cntl);
-	table->ACPIState.levels[0].mclk.vMPLL_FUNC_CNTL =
+	table->ACPIState.level.mclk.vMPLL_FUNC_CNTL =
 		cpu_to_be32(mpll_func_cntl);
-	table->ACPIState.levels[0].mclk.vMPLL_FUNC_CNTL_1 =
+	table->ACPIState.level.mclk.vMPLL_FUNC_CNTL_1 =
 		cpu_to_be32(mpll_func_cntl_1);
-	table->ACPIState.levels[0].mclk.vMPLL_FUNC_CNTL_2 =
+	table->ACPIState.level.mclk.vMPLL_FUNC_CNTL_2 =
 		cpu_to_be32(mpll_func_cntl_2);
-	table->ACPIState.levels[0].mclk.vMPLL_SS =
+	table->ACPIState.level.mclk.vMPLL_SS =
 		cpu_to_be32(si_pi->clock_registers.mpll_ss1);
-	table->ACPIState.levels[0].mclk.vMPLL_SS2 =
+	table->ACPIState.level.mclk.vMPLL_SS2 =
 		cpu_to_be32(si_pi->clock_registers.mpll_ss2);
 
-	table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL =
+	table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL =
 		cpu_to_be32(spll_func_cntl);
-	table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_2 =
+	table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_2 =
 		cpu_to_be32(spll_func_cntl_2);
-	table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_3 =
+	table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_3 =
 		cpu_to_be32(spll_func_cntl_3);
-	table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_4 =
+	table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_4 =
 		cpu_to_be32(spll_func_cntl_4);
 
-	table->ACPIState.levels[0].mclk.mclk_value = 0;
-	table->ACPIState.levels[0].sclk.sclk_value = 0;
+	table->ACPIState.level.mclk.mclk_value = 0;
+	table->ACPIState.level.sclk.sclk_value = 0;
 
-	si_populate_mvdd_value(adev, 0, &table->ACPIState.levels[0].mvdd);
+	si_populate_mvdd_value(adev, 0, &table->ACPIState.level.mvdd);
 
 	if (eg_pi->dynamic_ac_timing)
-		table->ACPIState.levels[0].ACIndex = 0;
+		table->ACPIState.level.ACIndex = 0;
 
-	table->ACPIState.levels[0].dpm2.MaxPS = 0;
-	table->ACPIState.levels[0].dpm2.NearTDPDec = 0;
-	table->ACPIState.levels[0].dpm2.AboveSafeInc = 0;
-	table->ACPIState.levels[0].dpm2.BelowSafeInc = 0;
-	table->ACPIState.levels[0].dpm2.PwrEfficiencyRatio = 0;
+	table->ACPIState.level.dpm2.MaxPS = 0;
+	table->ACPIState.level.dpm2.NearTDPDec = 0;
+	table->ACPIState.level.dpm2.AboveSafeInc = 0;
+	table->ACPIState.level.dpm2.BelowSafeInc = 0;
+	table->ACPIState.level.dpm2.PwrEfficiencyRatio = 0;
 
 	reg = MIN_POWER_MASK | MAX_POWER_MASK;
-	table->ACPIState.levels[0].SQPowerThrottle = cpu_to_be32(reg);
+	table->ACPIState.level.SQPowerThrottle = cpu_to_be32(reg);
 
 	reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK;
-	table->ACPIState.levels[0].SQPowerThrottle_2 = cpu_to_be32(reg);
+	table->ACPIState.level.SQPowerThrottle_2 = cpu_to_be32(reg);
 
 	return 0;
 }
 
 static int si_populate_ulv_state(struct amdgpu_device *adev,
-				 SISLANDS_SMC_SWSTATE *state)
+				 struct SISLANDS_SMC_SWSTATE_SINGLE *state)
 {
 	struct evergreen_power_info *eg_pi = evergreen_get_pi(adev);
 	struct si_power_info *si_pi = si_get_pi(adev);
@@ -5079,19 +5079,19 @@ static int si_populate_ulv_state(struct amdgpu_device *adev,
 	int ret;
 
 	ret = si_convert_power_level_to_smc(adev, &ulv->pl,
-					    &state->levels[0]);
+					    &state->level);
 	if (!ret) {
 		if (eg_pi->sclk_deep_sleep) {
 			if (sclk_in_sr <= SCLK_MIN_DEEPSLEEP_FREQ)
-				state->levels[0].stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_BYPASS;
+				state->level.stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_BYPASS;
 			else
-				state->levels[0].stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_THROTTLE;
+				state->level.stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_THROTTLE;
 		}
 		if (ulv->one_pcie_lane_in_ulv)
 			state->flags |= PPSMC_SWSTATE_FLAG_PCIE_X1;
-		state->levels[0].arbRefreshState = (u8)(SISLANDS_ULV_STATE_ARB_INDEX);
-		state->levels[0].ACIndex = 1;
-		state->levels[0].std_vddc = state->levels[0].vddc;
+		state->level.arbRefreshState = (u8)(SISLANDS_ULV_STATE_ARB_INDEX);
+		state->level.ACIndex = 1;
+		state->level.std_vddc = state->level.vddc;
 		state->levelCount = 1;
 
 		state->flags |= PPSMC_SWSTATE_FLAG_DC;
@@ -5190,7 +5190,9 @@ static int si_init_smc_table(struct amdgpu_device *adev)
 	if (ret)
 		return ret;
 
-	table->driverState = table->initialState;
+	table->driverState.flags = table->initialState.flags;
+	table->driverState.levelCount = table->initialState.levelCount;
+	table->driverState.levels[0] = table->initialState.level;
 
 	ret = si_do_program_memory_timing_parameters(adev, amdgpu_boot_state,
 						     SISLANDS_INITIAL_STATE_ARB_INDEX);
@@ -5737,8 +5739,8 @@ static int si_upload_ulv_state(struct amdgpu_device *adev)
 	if (ulv->supported && ulv->pl.vddc) {
 		u32 address = si_pi->state_table_start +
 			offsetof(SISLANDS_SMC_STATETABLE, ULVState);
-		SISLANDS_SMC_SWSTATE *smc_state = &si_pi->smc_statetable.ULVState;
-		u32 state_size = sizeof(SISLANDS_SMC_SWSTATE);
+		struct SISLANDS_SMC_SWSTATE_SINGLE *smc_state = &si_pi->smc_statetable.ULVState;
+		u32 state_size = sizeof(struct SISLANDS_SMC_SWSTATE_SINGLE);
 
 		memset(smc_state, 0, state_size);
 
diff --git a/drivers/gpu/drm/amd/pm/powerplay/sislands_smc.h b/drivers/gpu/drm/amd/pm/powerplay/sislands_smc.h
index 0f7554052c90..c7dc117a688c 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/sislands_smc.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/sislands_smc.h
@@ -191,6 +191,14 @@ struct SISLANDS_SMC_SWSTATE
 
 typedef struct SISLANDS_SMC_SWSTATE SISLANDS_SMC_SWSTATE;
 
+struct SISLANDS_SMC_SWSTATE_SINGLE {
+	uint8_t                             flags;
+	uint8_t                             levelCount;
+	uint8_t                             padding2;
+	uint8_t                             padding3;
+	SISLANDS_SMC_HW_PERFORMANCE_LEVEL   level;
+};
+
 #define SISLANDS_SMC_VOLTAGEMASK_VDDC  0
 #define SISLANDS_SMC_VOLTAGEMASK_MVDD  1
 #define SISLANDS_SMC_VOLTAGEMASK_VDDCI 2
@@ -208,19 +216,19 @@ typedef struct SISLANDS_SMC_VOLTAGEMASKTABLE SISLANDS_SMC_VOLTAGEMASKTABLE;
 
 struct SISLANDS_SMC_STATETABLE
 {
-    uint8_t                             thermalProtectType;
-    uint8_t                             systemFlags;
-    uint8_t                             maxVDDCIndexInPPTable;
-    uint8_t                             extraFlags;
-    uint32_t                            lowSMIO[SISLANDS_MAX_NO_VREG_STEPS];
-    SISLANDS_SMC_VOLTAGEMASKTABLE       voltageMaskTable;
-    SISLANDS_SMC_VOLTAGEMASKTABLE       phaseMaskTable;
-    PP_SIslands_DPM2Parameters          dpm2Params;
-    SISLANDS_SMC_SWSTATE                initialState;
-    SISLANDS_SMC_SWSTATE                ACPIState;
-    SISLANDS_SMC_SWSTATE                ULVState;
-    SISLANDS_SMC_SWSTATE                driverState;
-    SISLANDS_SMC_HW_PERFORMANCE_LEVEL   dpmLevels[SISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE - 1];
+	uint8_t					thermalProtectType;
+	uint8_t					systemFlags;
+	uint8_t					maxVDDCIndexInPPTable;
+	uint8_t					extraFlags;
+	uint32_t				lowSMIO[SISLANDS_MAX_NO_VREG_STEPS];
+	SISLANDS_SMC_VOLTAGEMASKTABLE		voltageMaskTable;
+	SISLANDS_SMC_VOLTAGEMASKTABLE		phaseMaskTable;
+	PP_SIslands_DPM2Parameters		dpm2Params;
+	struct SISLANDS_SMC_SWSTATE_SINGLE	initialState;
+	struct SISLANDS_SMC_SWSTATE_SINGLE	ACPIState;
+	struct SISLANDS_SMC_SWSTATE_SINGLE	ULVState;
+	SISLANDS_SMC_SWSTATE			driverState;
+	SISLANDS_SMC_HW_PERFORMANCE_LEVEL	dpmLevels[SISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE];
 };
 
 typedef struct SISLANDS_SMC_STATETABLE SISLANDS_SMC_STATETABLE;
-- 
cgit v1.2.3


From fe1c97d008f86f672f0e9265f180c22451ca3b9f Mon Sep 17 00:00:00 2001
From: David Ward <david.ward@gatech.edu>
Date: Mon, 10 May 2021 05:30:39 -0400
Subject: drm/amd/display: Initialize attribute for hdcp_srm sysfs file

It is stored in dynamically allocated memory, so sysfs_bin_attr_init() must
be called to initialize it. (Note: "initialization" only sets the .attr.key
member in this struct; it does not change the value of any other members.)

Otherwise, when CONFIG_DEBUG_LOCK_ALLOC=y this message appears during boot:

    BUG: key ffff9248900cd148 has not been registered!

Fixes: 9037246bb2da ("drm/amd/display: Add sysfs interface for set/get srm")
Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1586
Reported-by: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
Signed-off-by: David Ward <david.ward@gatech.edu>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
index 616f5b1ea3a8..666796a0067c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
@@ -650,6 +650,7 @@ struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, struct
 
 	/* File created at /sys/class/drm/card0/device/hdcp_srm*/
 	hdcp_work[0].attr = data_attr;
+	sysfs_bin_attr_init(&hdcp_work[0].attr);
 
 	if (sysfs_create_bin_file(&adev->dev->kobj, &hdcp_work[0].attr))
 		DRM_WARN("Failed to create device file hdcp_srm");
-- 
cgit v1.2.3


From 83a0b8639185f40ab7fc9dd291a057150eb9d238 Mon Sep 17 00:00:00 2001
From: Likun GAO <Likun.Gao@amd.com>
Date: Thu, 29 Apr 2021 14:08:13 +0800
Subject: drm/amdgpu: add judgement when add ip blocks (v2)

Judgement whether to add an sw ip according to the harvest info.

v2: fix indentation (Alex)

Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Reviewed-by: Guchun Chen <guchun.chen@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    | 15 +++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 28 +++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h |  1 +
 drivers/gpu/drm/amd/amdgpu/nv.c               |  8 +++++++-
 drivers/gpu/drm/amd/include/amd_shared.h      |  6 ++++++
 6 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index dc3a69296321..264176a01e16 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1006,6 +1006,7 @@ struct amdgpu_device {
 	struct amdgpu_df                df;
 
 	struct amdgpu_ip_block          ip_blocks[AMDGPU_MAX_IP_NUM];
+	uint32_t		        harvest_ip_mask;
 	int				num_ip_blocks;
 	struct mutex	mn_lock;
 	DECLARE_HASHTABLE(mn_hash, 7);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 7d3b54615147..8b2a37bf2adf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1683,6 +1683,19 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
 	if (!ip_block_version)
 		return -EINVAL;
 
+	switch (ip_block_version->type) {
+	case AMD_IP_BLOCK_TYPE_VCN:
+		if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
+			return 0;
+		break;
+	case AMD_IP_BLOCK_TYPE_JPEG:
+		if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
+			return 0;
+		break;
+	default:
+		break;
+	}
+
 	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
 		  ip_block_version->funcs->name);
 
@@ -3111,7 +3124,6 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
 	return amdgpu_device_asic_has_dc_support(adev->asic_type);
 }
 
-
 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
 {
 	struct amdgpu_device *adev =
@@ -3276,6 +3288,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	adev->vm_manager.vm_pte_funcs = NULL;
 	adev->vm_manager.vm_pte_num_scheds = 0;
 	adev->gmc.gmc_funcs = NULL;
+	adev->harvest_ip_mask = 0x0;
 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index b2dbcb4df020..e1b6f5891759 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -373,6 +373,34 @@ int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
 	return -EINVAL;
 }
 
+void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
+{
+	struct binary_header *bhdr;
+	struct harvest_table *harvest_info;
+	int i;
+
+	bhdr = (struct binary_header *)adev->mman.discovery_bin;
+	harvest_info = (struct harvest_table *)(adev->mman.discovery_bin +
+			le16_to_cpu(bhdr->table_list[HARVEST_INFO].offset));
+
+	for (i = 0; i < 32; i++) {
+		if (le32_to_cpu(harvest_info->list[i].hw_id) == 0)
+			break;
+
+		switch (le32_to_cpu(harvest_info->list[i].hw_id)) {
+		case VCN_HWID:
+			adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK;
+			adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK;
+			break;
+		case DMU_HWID:
+			adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
+			break;
+		default:
+			break;
+		}
+	}
+}
+
 int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
 {
 	struct binary_header *bhdr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
index 8f6183801cb3..1b1ae21b1037 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
@@ -29,6 +29,7 @@
 
 void amdgpu_discovery_fini(struct amdgpu_device *adev);
 int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev);
+void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev);
 int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
                                     int *major, int *minor, int *revision);
 int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index d54af7f8801b..428413c860c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -635,6 +635,8 @@ static int nv_reg_base_init(struct amdgpu_device *adev)
 			goto legacy_init;
 		}
 
+		amdgpu_discovery_harvest_ip(adev);
+
 		return 0;
 	}
 
@@ -777,7 +779,6 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
 		if (!amdgpu_sriov_vf(adev))
 			amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block);
-
 		if (adev->enable_mes)
 			amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block);
 		break;
@@ -1149,6 +1150,11 @@ static int nv_common_early_init(void *handle)
 		return -EINVAL;
 	}
 
+	if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
+		adev->pg_flags &= ~(AMD_PG_SUPPORT_VCN |
+				    AMD_PG_SUPPORT_VCN_DPG |
+				    AMD_PG_SUPPORT_JPEG);
+
 	if (amdgpu_sriov_vf(adev)) {
 		amdgpu_virt_init_setting(adev);
 		xgpu_nv_mailbox_set_irq_funcs(adev);
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 43ed6291b2b8..9ab706cd07ff 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -216,6 +216,12 @@ enum PP_FEATURE_MASK {
 	PP_GFX_DCS_MASK = 0x80000,
 };
 
+enum amd_harvest_ip_mask {
+    AMD_HARVEST_IP_VCN_MASK = 0x1,
+    AMD_HARVEST_IP_JPEG_MASK = 0x2,
+    AMD_HARVEST_IP_DMU_MASK = 0x4,
+};
+
 enum DC_FEATURE_MASK {
 	DC_FBC_MASK = 0x1,
 	DC_MULTI_MON_PP_MCLK_SWITCH_MASK = 0x2,
-- 
cgit v1.2.3


From 5c1a376823c408efd7de30fc300e687c78627f27 Mon Sep 17 00:00:00 2001
From: Likun Gao <Likun.Gao@amd.com>
Date: Fri, 7 May 2021 13:56:46 +0800
Subject: drm/amdgpu: update the method for harvest IP for specific SKU

Update the method of disabling VCN IP for specific SKU for navi1x ASIC,
it will judge whether should add the related IP at the function of
amdgpu_device_ip_block_add().

Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Reviewed-by: Guchun Chen <guchun.chen@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/nv.c | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 428413c860c7..d290ca0b06da 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -623,6 +623,16 @@ static const struct amdgpu_ip_block_version nv_common_ip_block =
 	.funcs = &nv_common_ip_funcs,
 };
 
+static bool nv_is_headless_sku(struct pci_dev *pdev)
+{
+	if ((pdev->device == 0x731E &&
+	    (pdev->revision == 0xC6 || pdev->revision == 0xC7)) ||
+	    (pdev->device == 0x7340 && pdev->revision == 0xC9)  ||
+	    (pdev->device == 0x7360 && pdev->revision == 0xC7))
+		return true;
+	return false;
+}
+
 static int nv_reg_base_init(struct amdgpu_device *adev)
 {
 	int r;
@@ -636,6 +646,10 @@ static int nv_reg_base_init(struct amdgpu_device *adev)
 		}
 
 		amdgpu_discovery_harvest_ip(adev);
+		if (nv_is_headless_sku(adev->pdev)) {
+			adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK;
+			adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK;
+		}
 
 		return 0;
 	}
@@ -673,16 +687,6 @@ void nv_set_virt_ops(struct amdgpu_device *adev)
 	adev->virt.ops = &xgpu_nv_virt_ops;
 }
 
-static bool nv_is_headless_sku(struct pci_dev *pdev)
-{
-	if ((pdev->device == 0x731E &&
-	    (pdev->revision == 0xC6 || pdev->revision == 0xC7)) ||
-	    (pdev->device == 0x7340 && pdev->revision == 0xC9)  ||
-	    (pdev->device == 0x7360 && pdev->revision == 0xC7))
-		return true;
-	return false;
-}
-
 int nv_set_ip_blocks(struct amdgpu_device *adev)
 {
 	int r;
@@ -730,8 +734,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
 		    !amdgpu_sriov_vf(adev))
 			amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
-		if (!nv_is_headless_sku(adev->pdev))
-			amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
 		if (adev->enable_mes)
 			amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block);
@@ -754,8 +757,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
 		    !amdgpu_sriov_vf(adev))
 			amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
-		if (!nv_is_headless_sku(adev->pdev))
-		        amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
 		if (!amdgpu_sriov_vf(adev))
 			amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
 		break;
-- 
cgit v1.2.3


From 227545b9a08c68778ddd89428f99c351fc9315ac Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Fri, 30 Apr 2021 12:56:56 +0800
Subject: drm/radeon/dpm: Disable sclk switching on Oland when two 4K 60Hz
 monitors are connected

Screen flickers rapidly when two 4K 60Hz monitors are in use. This issue
doesn't happen when one monitor is 4K 60Hz (pixelclock 594MHz) and
another one is 4K 30Hz (pixelclock 297MHz).

The issue is gone after setting "power_dpm_force_performance_level" to
"high". Following the indication, we found that the issue occurs when
sclk is too low.

So resolve the issue by disabling sclk switching when there are two
monitors requires high pixelclock (> 297MHz).

v2:
 - Only apply the fix to Oland.
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/radeon.h    | 1 +
 drivers/gpu/drm/radeon/radeon_pm.c | 8 ++++++++
 drivers/gpu/drm/radeon/si_dpm.c    | 3 +++
 3 files changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 42281fce552e..56ed5634cebe 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1549,6 +1549,7 @@ struct radeon_dpm {
 	void                    *priv;
 	u32			new_active_crtcs;
 	int			new_active_crtc_count;
+	int			high_pixelclock_count;
 	u32			current_active_crtcs;
 	int			current_active_crtc_count;
 	bool single_display;
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index 0c1950f4e146..3861c0b98fcf 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -1767,6 +1767,7 @@ static void radeon_pm_compute_clocks_dpm(struct radeon_device *rdev)
 	struct drm_device *ddev = rdev->ddev;
 	struct drm_crtc *crtc;
 	struct radeon_crtc *radeon_crtc;
+	struct radeon_connector *radeon_connector;
 
 	if (!rdev->pm.dpm_enabled)
 		return;
@@ -1776,6 +1777,7 @@ static void radeon_pm_compute_clocks_dpm(struct radeon_device *rdev)
 	/* update active crtc counts */
 	rdev->pm.dpm.new_active_crtcs = 0;
 	rdev->pm.dpm.new_active_crtc_count = 0;
+	rdev->pm.dpm.high_pixelclock_count = 0;
 	if (rdev->num_crtc && rdev->mode_info.mode_config_initialized) {
 		list_for_each_entry(crtc,
 				    &ddev->mode_config.crtc_list, head) {
@@ -1783,6 +1785,12 @@ static void radeon_pm_compute_clocks_dpm(struct radeon_device *rdev)
 			if (crtc->enabled) {
 				rdev->pm.dpm.new_active_crtcs |= (1 << radeon_crtc->crtc_id);
 				rdev->pm.dpm.new_active_crtc_count++;
+				if (!radeon_crtc->connector)
+					continue;
+
+				radeon_connector = to_radeon_connector(radeon_crtc->connector);
+				if (radeon_connector->pixelclock_for_modeset > 297000)
+					rdev->pm.dpm.high_pixelclock_count++;
 			}
 		}
 	}
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index 2c54c0d7ca5b..3add39c1a689 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -2979,6 +2979,9 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
 		    (rdev->pdev->device == 0x6605)) {
 			max_sclk = 75000;
 		}
+
+		if (rdev->pm.dpm.high_pixelclock_count > 1)
+			disable_sclk_switching = true;
 	}
 
 	if (rps->vce_active) {
-- 
cgit v1.2.3


From 3666f83a11293fd3cbeb3c9e0c3c53a33a48c28b Mon Sep 17 00:00:00 2001
From: Sathishkumar S <sathishkumar.sundararaju@amd.com>
Date: Mon, 3 May 2021 12:34:10 +0530
Subject: drm/amdgpu: set vcn mgcg flag for picasso

enable vcn mgcg flag for picasso.

Signed-off-by: Sathishkumar S <sathishkumar.sundararaju@amd.com>
Reviewed-by: Leo Liu <leo.liu@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index d80e12b80c7e..8e1b9a40839f 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1401,7 +1401,8 @@ static int soc15_common_early_init(void *handle)
 				AMD_CG_SUPPORT_MC_MGCG |
 				AMD_CG_SUPPORT_MC_LS |
 				AMD_CG_SUPPORT_SDMA_MGCG |
-				AMD_CG_SUPPORT_SDMA_LS;
+				AMD_CG_SUPPORT_SDMA_LS |
+				AMD_CG_SUPPORT_VCN_MGCG;
 
 			adev->pg_flags = AMD_PG_SUPPORT_SDMA |
 				AMD_PG_SUPPORT_MMHUB |
-- 
cgit v1.2.3


From 5c1efb5f7682e2072ca5ce12cd616d432604ecc0 Mon Sep 17 00:00:00 2001
From: Sathishkumar S <sathishkumar.sundararaju@amd.com>
Date: Mon, 3 May 2021 23:57:31 +0530
Subject: drm/amdgpu: update vcn1.0 Non-DPG suspend sequence

update suspend register settings in Non-DPG mode.

Signed-off-by: Sathishkumar S <sathishkumar.sundararaju@amd.com>
Reviewed-by: Leo Liu <leo.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 51a773a37a35..0c1beefa3e49 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -1119,10 +1119,10 @@ static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev)
 		UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
 	SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_LMI_STATUS, tmp, tmp);
 
-	/* put VCPU into reset */
-	WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
-		UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
-		~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+	/* stall UMC channel */
+	WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
+		UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
+		~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
 
 	tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
 		UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
@@ -1141,6 +1141,11 @@ static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev)
 		UVD_SOFT_RESET__LMI_SOFT_RESET_MASK,
 		~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK);
 
+	/* put VCPU into reset */
+	WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
+		UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
+		~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+
 	WREG32_SOC15(UVD, 0, mmUVD_STATUS, 0);
 
 	vcn_v1_0_enable_clock_gating(adev);
-- 
cgit v1.2.3


From a73b6a3b4109ce2ed01dbc51a6c1551a6431b53c Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Wed, 28 Apr 2021 15:25:34 -0700
Subject: ics932s401: fix broken handling of errors when word reading fails

In commit b05ae01fdb89, someone tried to make the driver handle i2c read
errors by simply zeroing out the register contents, but for some reason
left unaltered the code that sets the cached register value the function
call return value.

The original patch was authored by a member of the Underhanded
Mangle-happy Nerds, I'm not terribly surprised.  I don't have the
hardware anymore so I can't test this, but it seems like a pretty
obvious API usage fix to me...

Fixes: b05ae01fdb89 ("misc/ics932s401: Add a missing check to i2c_smbus_read_word_data")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Link: https://lore.kernel.org/r/20210428222534.GJ3122264@magnolia
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/ics932s401.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/ics932s401.c b/drivers/misc/ics932s401.c
index 2bdf560ee681..0f9ea75b0b18 100644
--- a/drivers/misc/ics932s401.c
+++ b/drivers/misc/ics932s401.c
@@ -134,7 +134,7 @@ static struct ics932s401_data *ics932s401_update_device(struct device *dev)
 	for (i = 0; i < NUM_MIRRORED_REGS; i++) {
 		temp = i2c_smbus_read_word_data(client, regs_to_copy[i]);
 		if (temp < 0)
-			data->regs[regs_to_copy[i]] = 0;
+			temp = 0;
 		data->regs[regs_to_copy[i]] = temp >> 8;
 	}
 
-- 
cgit v1.2.3


From 6a3239a738d86c5e9b5aad17fefe2c2bfd6ced83 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 28 Apr 2021 09:49:31 +0200
Subject: Revert "crypto: cavium/nitrox - add an error message to explain the
 failure of pci_request_mem_regions"

This reverts commit 9fcddaf2e28d779cb946d23838ba6d50f299aa80 as it was
submitted under a fake name and we can not knowingly accept anonymous
contributions to the repository.

This commit was part of a submission "test" to the Linux kernel
community by some "researchers" at umn.edu.  As outlined at:
	https://www-users.cs.umn.edu/%7Ekjlu/papers/full-disclosure.pdf
it was done so as an attempt to submit a known-buggy patch to see if it
could get by our review.  However, the submission turned out to actually
be correct, and not have a bug in it as the author did not understand
how the PCI driver model works at all, and so the submission was
accepted.

As this change is of useless consequence, there is no loss of
functionality in reverting it.

Cc: "David S. Miller" <davem@davemloft.net>
Cc: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Cc: linux-crypto@vger.kernel.org
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Email: Herbert Xu <herbert@gondor.apana.org.au>
Link: https://lore.kernel.org/r/YIkTi9a3nnL50wMq@kroah.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/crypto/cavium/nitrox/nitrox_main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/crypto/cavium/nitrox/nitrox_main.c b/drivers/crypto/cavium/nitrox/nitrox_main.c
index facc8e6bc580..d385daf2c71c 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_main.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_main.c
@@ -442,7 +442,6 @@ static int nitrox_probe(struct pci_dev *pdev,
 	err = pci_request_mem_regions(pdev, nitrox_driver_name);
 	if (err) {
 		pci_disable_device(pdev);
-		dev_err(&pdev->dev, "Failed to request mem regions!\n");
 		return err;
 	}
 	pci_set_master(pdev);
-- 
cgit v1.2.3


From 3e465fc3846734e9489273d889f19cc17b4cf4bd Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:30 +0200
Subject: Revert "media: rcar_drif: fix a memory disclosure"

This reverts commit d39083234c60519724c6ed59509a2129fd2aed41.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, it was determined that this commit is not needed at all as
the media core already prevents memory disclosure on this codepath, so
just drop the extra memset happening here.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Fixes: d39083234c60 ("media: rcar_drif: fix a memory disclosure")
Cc: stable <stable@vger.kernel.org>
Reviewed-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-4-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/platform/rcar_drif.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/media/platform/rcar_drif.c b/drivers/media/platform/rcar_drif.c
index 83bd9a412a56..1e3b68a8743a 100644
--- a/drivers/media/platform/rcar_drif.c
+++ b/drivers/media/platform/rcar_drif.c
@@ -915,7 +915,6 @@ static int rcar_drif_g_fmt_sdr_cap(struct file *file, void *priv,
 {
 	struct rcar_drif_sdr *sdr = video_drvdata(file);
 
-	memset(f->fmt.sdr.reserved, 0, sizeof(f->fmt.sdr.reserved));
 	f->fmt.sdr.pixelformat = sdr->fmt->pixelformat;
 	f->fmt.sdr.buffersize = sdr->fmt->buffersize;
 
-- 
cgit v1.2.3


From 99ae3417672a6d4a3bf68d4fc43d7c6ca074d477 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:31 +0200
Subject: Revert "hwmon: (lm80) fix a missing check of bus read in lm80 probe"

This reverts commit 9aa3aa15f4c2f74f47afd6c5db4b420fadf3f315.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, it was determined that this commit is not needed at all so
just revert it.  Also, the call to lm80_init_client() was not properly
handled, so if error handling is needed in the lm80_probe() function,
then it should be done properly, not half-baked like the commit being
reverted here did.

Cc: Kangjie Lu <kjlu@umn.edu>
Fixes: 9aa3aa15f4c2 ("hwmon: (lm80) fix a missing check of bus read in lm80 probe")
Cc: stable <stable@vger.kernel.org>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20210503115736.2104747-5-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwmon/lm80.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/drivers/hwmon/lm80.c b/drivers/hwmon/lm80.c
index ac4adb44b224..97ab491d2922 100644
--- a/drivers/hwmon/lm80.c
+++ b/drivers/hwmon/lm80.c
@@ -596,7 +596,6 @@ static int lm80_probe(struct i2c_client *client)
 	struct device *dev = &client->dev;
 	struct device *hwmon_dev;
 	struct lm80_data *data;
-	int rv;
 
 	data = devm_kzalloc(dev, sizeof(struct lm80_data), GFP_KERNEL);
 	if (!data)
@@ -609,14 +608,8 @@ static int lm80_probe(struct i2c_client *client)
 	lm80_init_client(client);
 
 	/* A few vars need to be filled upon startup */
-	rv = lm80_read_value(client, LM80_REG_FAN_MIN(1));
-	if (rv < 0)
-		return rv;
-	data->fan[f_min][0] = rv;
-	rv = lm80_read_value(client, LM80_REG_FAN_MIN(2));
-	if (rv < 0)
-		return rv;
-	data->fan[f_min][1] = rv;
+	data->fan[f_min][0] = lm80_read_value(client, LM80_REG_FAN_MIN(1));
+	data->fan[f_min][1] = lm80_read_value(client, LM80_REG_FAN_MIN(2));
 
 	hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
 							   data, lm80_groups);
-- 
cgit v1.2.3


From 754f39158441f4c0d7a8255209dd9a939f08ce80 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:32 +0200
Subject: Revert "serial: mvebu-uart: Fix to avoid a potential NULL pointer
 dereference"

This reverts commit 32f47179833b63de72427131169809065db6745e.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be not be needed at all as the
change was useless because this function can only be called when
of_match_device matched on something.  So it should be reverted.

Cc: Aditya Pakki <pakki001@umn.edu>
Cc: stable <stable@vger.kernel.org>
Fixes: 32f47179833b ("serial: mvebu-uart: Fix to avoid a potential NULL pointer dereference")
Acked-by: Jiri Slaby <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-6-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/mvebu-uart.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c
index e0c00a1b0763..51b0ecabf2ec 100644
--- a/drivers/tty/serial/mvebu-uart.c
+++ b/drivers/tty/serial/mvebu-uart.c
@@ -818,9 +818,6 @@ static int mvebu_uart_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	if (!match)
-		return -ENODEV;
-
 	/* Assume that all UART ports have a DT alias or none has */
 	id = of_alias_get_id(pdev->dev.of_node, "serial");
 	if (!pdev->dev.of_node || id < 0)
-- 
cgit v1.2.3


From fd013265e5b5576a74a033920d6c571e08d7c423 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:33 +0200
Subject: Revert "media: usb: gspca: add a missed check for goto_low_power"

This reverts commit 5b711870bec4dc9a6d705d41e127e73944fa3650.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to do does nothing useful as a user
can do nothing with this information and if an error did happen, the
code would continue on as before.  Because of this, just revert it.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-7-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/usb/gspca/cpia1.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/media/usb/gspca/cpia1.c b/drivers/media/usb/gspca/cpia1.c
index a4f7431486f3..d93d384286c1 100644
--- a/drivers/media/usb/gspca/cpia1.c
+++ b/drivers/media/usb/gspca/cpia1.c
@@ -1424,7 +1424,6 @@ static int sd_config(struct gspca_dev *gspca_dev,
 {
 	struct sd *sd = (struct sd *) gspca_dev;
 	struct cam *cam;
-	int ret;
 
 	sd->mainsFreq = FREQ_DEF == V4L2_CID_POWER_LINE_FREQUENCY_60HZ;
 	reset_camera_params(gspca_dev);
@@ -1436,10 +1435,7 @@ static int sd_config(struct gspca_dev *gspca_dev,
 	cam->cam_mode = mode;
 	cam->nmodes = ARRAY_SIZE(mode);
 
-	ret = goto_low_power(gspca_dev);
-	if (ret)
-		gspca_err(gspca_dev, "Cannot go to low power mode: %d\n",
-			  ret);
+	goto_low_power(gspca_dev);
 	/* Check the firmware version. */
 	sd->params.version.firmwareVersion = 0;
 	get_version_information(gspca_dev);
-- 
cgit v1.2.3


From 4b059ce1f4b368208c2310925f49be77f15e527b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:34 +0200
Subject: Revert "ALSA: sb: fix a missing check of snd_ctl_add"

This reverts commit beae77170c60aa786f3e4599c18ead2854d8694d.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It is safe to ignore this error as the
mixer element is optional, and the driver is very legacy.

Cc: Aditya Pakki <pakki001@umn.edu>
Reviewed-by: Takashi Iwai <tiwai@suse.de>
Link: https://lore.kernel.org/r/20210503115736.2104747-8-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/isa/sb/sb16_main.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/sound/isa/sb/sb16_main.c b/sound/isa/sb/sb16_main.c
index 38dc1fde25f3..aa4870531023 100644
--- a/sound/isa/sb/sb16_main.c
+++ b/sound/isa/sb/sb16_main.c
@@ -846,14 +846,10 @@ int snd_sb16dsp_pcm(struct snd_sb *chip, int device)
 	snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &snd_sb16_playback_ops);
 	snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &snd_sb16_capture_ops);
 
-	if (chip->dma16 >= 0 && chip->dma8 != chip->dma16) {
-		err = snd_ctl_add(card, snd_ctl_new1(
-					&snd_sb16_dma_control, chip));
-		if (err)
-			return err;
-	} else {
+	if (chip->dma16 >= 0 && chip->dma8 != chip->dma16)
+		snd_ctl_add(card, snd_ctl_new1(&snd_sb16_dma_control, chip));
+	else
 		pcm->info_flags = SNDRV_PCM_INFO_HALF_DUPLEX;
-	}
 
 	snd_pcm_set_managed_buffer_all(pcm, SNDRV_DMA_TYPE_DEV,
 				       card->dev, 64*1024, 128*1024);
-- 
cgit v1.2.3


From 8d1beda5f11953ffe135a5213287f0b25b4da41b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:35 +0200
Subject: Revert "leds: lp5523: fix a missing check of return value of
 lp55xx_read"

This reverts commit 248b57015f35c94d4eae2fdd8c6febf5cd703900.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original commit does not properly unwind if there is an error
condition so it needs to be reverted at this point in time.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Jacek Anaszewski <jacek.anaszewski@gmail.com>
Cc: stable <stable@vger.kernel.org>
Fixes: 248b57015f35 ("leds: lp5523: fix a missing check of return value of lp55xx_read")
Link: https://lore.kernel.org/r/20210503115736.2104747-9-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/leds/leds-lp5523.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/leds/leds-lp5523.c b/drivers/leds/leds-lp5523.c
index fc433e63b1dc..5036d7d5f3d4 100644
--- a/drivers/leds/leds-lp5523.c
+++ b/drivers/leds/leds-lp5523.c
@@ -305,9 +305,7 @@ static int lp5523_init_program_engine(struct lp55xx_chip *chip)
 
 	/* Let the programs run for couple of ms and check the engine status */
 	usleep_range(3000, 6000);
-	ret = lp55xx_read(chip, LP5523_REG_STATUS, &status);
-	if (ret)
-		return ret;
+	lp55xx_read(chip, LP5523_REG_STATUS, &status);
 	status &= LP5523_ENG_STATUS_MASK;
 
 	if (status != LP5523_ENG_STATUS_MASK) {
-- 
cgit v1.2.3


From 6647f7a06eb030a2384ec71f0bb2e78854afabfe Mon Sep 17 00:00:00 2001
From: Phillip Potter <phil@philpotter.co.uk>
Date: Mon, 3 May 2021 13:56:36 +0200
Subject: leds: lp5523: check return value of lp5xx_read and jump to cleanup
 code

Check return value of lp5xx_read and if non-zero, jump to code at end of
the function, causing lp5523_stop_all_engines to be executed before
returning the error value up the call chain. This fixes the original
commit (248b57015f35) which was reverted due to the University of Minnesota
problems.

Cc: stable <stable@vger.kernel.org>
Acked-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
Signed-off-by: Phillip Potter <phil@philpotter.co.uk>
Link: https://lore.kernel.org/r/20210503115736.2104747-10-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/leds/leds-lp5523.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/leds/leds-lp5523.c b/drivers/leds/leds-lp5523.c
index 5036d7d5f3d4..b1590cb4a188 100644
--- a/drivers/leds/leds-lp5523.c
+++ b/drivers/leds/leds-lp5523.c
@@ -305,7 +305,9 @@ static int lp5523_init_program_engine(struct lp55xx_chip *chip)
 
 	/* Let the programs run for couple of ms and check the engine status */
 	usleep_range(3000, 6000);
-	lp55xx_read(chip, LP5523_REG_STATUS, &status);
+	ret = lp55xx_read(chip, LP5523_REG_STATUS, &status);
+	if (ret)
+		goto out;
 	status &= LP5523_ENG_STATUS_MASK;
 
 	if (status != LP5523_ENG_STATUS_MASK) {
-- 
cgit v1.2.3


From b0a85abbe92e1a6f3e8580a4590fa7245de7090b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:37 +0200
Subject: Revert "serial: max310x: pass return value of spi_register_driver"

This reverts commit 51f689cc11333944c7a457f25ec75fcb41e99410.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

This change did not properly unwind from the error condition, so it was
not correct.

Cc: Kangjie Lu <kjlu@umn.edu>
Acked-by: Jiri Slaby <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-11-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/max310x.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
index 8534d6e45a1d..a3ba0e6520a1 100644
--- a/drivers/tty/serial/max310x.c
+++ b/drivers/tty/serial/max310x.c
@@ -1518,10 +1518,10 @@ static int __init max310x_uart_init(void)
 		return ret;
 
 #ifdef CONFIG_SPI_MASTER
-	ret = spi_register_driver(&max310x_spi_driver);
+	spi_register_driver(&max310x_spi_driver);
 #endif
 
-	return ret;
+	return 0;
 }
 module_init(max310x_uart_init);
 
-- 
cgit v1.2.3


From 3890e3dea315f1a257d1b940a2a4e2fa16a7b095 Mon Sep 17 00:00:00 2001
From: Atul Gopinathan <atulgopinathan@gmail.com>
Date: Mon, 3 May 2021 13:56:38 +0200
Subject: serial: max310x: unregister uart driver in case of failure and abort

The macro "spi_register_driver" invokes the function
"__spi_register_driver()" which has a return type of int and can fail,
returning a negative value in such a case. This is currently ignored and
the init() function yields success even if the spi driver failed to
register.

Fix this by collecting the return value of "__spi_register_driver()" and
also unregister the uart driver in case of failure.

Cc: Jiri Slaby <jirislaby@kernel.org>
Signed-off-by: Atul Gopinathan <atulgopinathan@gmail.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-12-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/max310x.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
index a3ba0e6520a1..3cbc757d7be7 100644
--- a/drivers/tty/serial/max310x.c
+++ b/drivers/tty/serial/max310x.c
@@ -1518,10 +1518,12 @@ static int __init max310x_uart_init(void)
 		return ret;
 
 #ifdef CONFIG_SPI_MASTER
-	spi_register_driver(&max310x_spi_driver);
+	ret = spi_register_driver(&max310x_spi_driver);
+	if (ret)
+		uart_unregister_driver(&max310x_uart);
 #endif
 
-	return 0;
+	return ret;
 }
 module_init(max310x_uart_init);
 
-- 
cgit v1.2.3


From 68c5634c4a7278672a3bed00eb5646884257c413 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:39 +0200
Subject: Revert "rtlwifi: fix a potential NULL pointer dereference"

This reverts commit 765976285a8c8db3f0eb7f033829a899d0c2786e.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

This commit is not correct, it should not have used unlikely() and is
not propagating the error properly to the calling function, so it should
be reverted at this point in time.  Also, if the check failed, the
work queue was still assumed to be allocated, so further accesses would
have continued to fail, meaning this patch does nothing to solve the
root issues at all.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: Bryan Brattlof <hello@bryanbrattlof.com>
Fixes: 765976285a8c ("rtlwifi: fix a potential NULL pointer dereference")
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-13-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/realtek/rtlwifi/base.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c
index 2a7ee90a3f54..4136d7c63254 100644
--- a/drivers/net/wireless/realtek/rtlwifi/base.c
+++ b/drivers/net/wireless/realtek/rtlwifi/base.c
@@ -452,11 +452,6 @@ static void _rtl_init_deferred_work(struct ieee80211_hw *hw)
 	/* <2> work queue */
 	rtlpriv->works.hw = hw;
 	rtlpriv->works.rtl_wq = alloc_workqueue("%s", 0, 0, rtlpriv->cfg->name);
-	if (unlikely(!rtlpriv->works.rtl_wq)) {
-		pr_err("Failed to allocate work queue\n");
-		return;
-	}
-
 	INIT_DELAYED_WORK(&rtlpriv->works.watchdog_wq,
 			  rtl_watchdog_wq_callback);
 	INIT_DELAYED_WORK(&rtlpriv->works.ips_nic_off_wq,
-- 
cgit v1.2.3


From 30b0e0ee9d02b97b68705c46b41444786effc40c Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:40 +0200
Subject: net: rtlwifi: properly check for alloc_workqueue() failure

If alloc_workqueue() fails, properly catch this and propagate the error
to the calling functions, so that the devuce initialization will
properly error out.

Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: Bryan Brattlof <hello@bryanbrattlof.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-14-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/realtek/rtlwifi/base.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c
index 4136d7c63254..ffd150ec181f 100644
--- a/drivers/net/wireless/realtek/rtlwifi/base.c
+++ b/drivers/net/wireless/realtek/rtlwifi/base.c
@@ -440,9 +440,14 @@ static void rtl_watchdog_wq_callback(struct work_struct *work);
 static void rtl_fwevt_wq_callback(struct work_struct *work);
 static void rtl_c2hcmd_wq_callback(struct work_struct *work);
 
-static void _rtl_init_deferred_work(struct ieee80211_hw *hw)
+static int _rtl_init_deferred_work(struct ieee80211_hw *hw)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
+	struct workqueue_struct *wq;
+
+	wq = alloc_workqueue("%s", 0, 0, rtlpriv->cfg->name);
+	if (!wq)
+		return -ENOMEM;
 
 	/* <1> timer */
 	timer_setup(&rtlpriv->works.watchdog_timer,
@@ -451,7 +456,8 @@ static void _rtl_init_deferred_work(struct ieee80211_hw *hw)
 		    rtl_easy_concurrent_retrytimer_callback, 0);
 	/* <2> work queue */
 	rtlpriv->works.hw = hw;
-	rtlpriv->works.rtl_wq = alloc_workqueue("%s", 0, 0, rtlpriv->cfg->name);
+	rtlpriv->works.rtl_wq = wq;
+
 	INIT_DELAYED_WORK(&rtlpriv->works.watchdog_wq,
 			  rtl_watchdog_wq_callback);
 	INIT_DELAYED_WORK(&rtlpriv->works.ips_nic_off_wq,
@@ -461,6 +467,7 @@ static void _rtl_init_deferred_work(struct ieee80211_hw *hw)
 			  rtl_swlps_rfon_wq_callback);
 	INIT_DELAYED_WORK(&rtlpriv->works.fwevt_wq, rtl_fwevt_wq_callback);
 	INIT_DELAYED_WORK(&rtlpriv->works.c2hcmd_wq, rtl_c2hcmd_wq_callback);
+	return 0;
 }
 
 void rtl_deinit_deferred_work(struct ieee80211_hw *hw, bool ips_wq)
@@ -559,9 +566,7 @@ int rtl_init_core(struct ieee80211_hw *hw)
 	rtlmac->link_state = MAC80211_NOLINK;
 
 	/* <6> init deferred work */
-	_rtl_init_deferred_work(hw);
-
-	return 0;
+	return _rtl_init_deferred_work(hw);
 }
 EXPORT_SYMBOL_GPL(rtl_init_core);
 
-- 
cgit v1.2.3


From 5f94eaa4ee23e80841fa359a372f84cfe25daee1 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:41 +0200
Subject: Revert "net: fujitsu: fix a potential NULL pointer dereference"

This reverts commit 9f4d6358e11bbc7b839f9419636188e4151fb6e4.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original change does not change any behavior as the caller of this
function onlyu checks for "== -1" as an error condition so this error is
not handled properly.  Remove this change and it will be fixed up
properly in a later commit.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: David S. Miller <davem@davemloft.net>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Link: https://lore.kernel.org/r/20210503115736.2104747-15-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/fujitsu/fmvj18x_cs.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
index a7b7a4aace79..dc90c61fc827 100644
--- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
+++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
@@ -547,11 +547,6 @@ static int fmvj18x_get_hwinfo(struct pcmcia_device *link, u_char *node_id)
 	return -1;
 
     base = ioremap(link->resource[2]->start, resource_size(link->resource[2]));
-    if (!base) {
-	    pcmcia_release_window(link, link->resource[2]);
-	    return -ENOMEM;
-    }
-
     pcmcia_map_mem_page(link, link->resource[2], 0);
 
     /*
-- 
cgit v1.2.3


From 52202be1cd996cde6e8969a128dc27ee45a7cb5e Mon Sep 17 00:00:00 2001
From: Anirudh Rayabharam <mail@anirudhrb.com>
Date: Mon, 3 May 2021 13:56:42 +0200
Subject: net: fujitsu: fix potential null-ptr-deref

In fmvj18x_get_hwinfo(), if ioremap fails there will be NULL pointer
deref. To fix this, check the return value of ioremap and return -1
to the caller in case of failure.

Cc: "David S. Miller" <davem@davemloft.net>
Acked-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Anirudh Rayabharam <mail@anirudhrb.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-16-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/fujitsu/fmvj18x_cs.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
index dc90c61fc827..b0c0504950d8 100644
--- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
+++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
@@ -547,6 +547,11 @@ static int fmvj18x_get_hwinfo(struct pcmcia_device *link, u_char *node_id)
 	return -1;
 
     base = ioremap(link->resource[2]->start, resource_size(link->resource[2]));
+    if (!base) {
+	pcmcia_release_window(link, link->resource[2]);
+	return -1;
+    }
+
     pcmcia_map_mem_page(link, link->resource[2], 0);
 
     /*
-- 
cgit v1.2.3


From 5369ead83f5aff223b6418c99cb1fe9a8f007363 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:43 +0200
Subject: Revert "net/smc: fix a NULL pointer dereference"

This reverts commit e183d4e414b64711baf7a04e214b61969ca08dfa.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original commit causes a memory leak and does not properly fix the
issue it claims to fix.  I will send a follow-on patch to resolve this
properly.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Ursula Braun <ubraun@linux.ibm.com>
Cc: David S. Miller <davem@davemloft.net>
Link: https://lore.kernel.org/r/20210503115736.2104747-17-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/smc/smc_ism.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index 9c6e95882553..6558cf7643a7 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -417,11 +417,6 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
 	init_waitqueue_head(&smcd->lgrs_deleted);
 	smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)",
 						 WQ_MEM_RECLAIM, name);
-	if (!smcd->event_wq) {
-		kfree(smcd->conn);
-		kfree(smcd);
-		return NULL;
-	}
 	return smcd;
 }
 EXPORT_SYMBOL_GPL(smcd_alloc_dev);
-- 
cgit v1.2.3


From bbeb18f27a44ce6adb00d2316968bc59dc640b9b Mon Sep 17 00:00:00 2001
From: Anirudh Rayabharam <mail@anirudhrb.com>
Date: Mon, 3 May 2021 13:56:44 +0200
Subject: net/smc: properly handle workqueue allocation failure

In smcd_alloc_dev(), if alloc_ordered_workqueue() fails, properly catch
it, clean up and return NULL to let the caller know there was a failure.
Move the call to alloc_ordered_workqueue higher in the function in order
to abort earlier without needing to unwind the call to device_initialize().

Cc: Ursula Braun <ubraun@linux.ibm.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Anirudh Rayabharam <mail@anirudhrb.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-18-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/smc/smc_ism.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index 6558cf7643a7..94b31f2551bc 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -402,6 +402,14 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
 		return NULL;
 	}
 
+	smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)",
+						 WQ_MEM_RECLAIM, name);
+	if (!smcd->event_wq) {
+		kfree(smcd->conn);
+		kfree(smcd);
+		return NULL;
+	}
+
 	smcd->dev.parent = parent;
 	smcd->dev.release = smcd_release;
 	device_initialize(&smcd->dev);
@@ -415,8 +423,6 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
 	INIT_LIST_HEAD(&smcd->vlan);
 	INIT_LIST_HEAD(&smcd->lgr_list);
 	init_waitqueue_head(&smcd->lgrs_deleted);
-	smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)",
-						 WQ_MEM_RECLAIM, name);
 	return smcd;
 }
 EXPORT_SYMBOL_GPL(smcd_alloc_dev);
-- 
cgit v1.2.3


From 4df07045fcfd684379a394d0f2aa0cc4067bda2a Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:45 +0200
Subject: Revert "net: caif: replace BUG_ON with recovery code"

This reverts commit c5dea815834c7d2e9fc633785455bc428b7a1956.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original change here was pointless as dev can never be NULL in this
function so the claim in the changelog that this "fixes" anything is
incorrect (also the developer forgot about panic_on_warn).  A follow-up
change will resolve this issue properly.

Cc: Aditya Pakki <pakki001@umn.edu>
Cc: David S. Miller <davem@davemloft.net>
Link: https://lore.kernel.org/r/20210503115736.2104747-19-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/caif/caif_serial.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
index da6fffb4d5a8..a7f51eb58915 100644
--- a/drivers/net/caif/caif_serial.c
+++ b/drivers/net/caif/caif_serial.c
@@ -269,9 +269,7 @@ static netdev_tx_t caif_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ser_device *ser;
 
-	if (WARN_ON(!dev))
-		return -EINVAL;
-
+	BUG_ON(dev == NULL);
 	ser = netdev_priv(dev);
 
 	/* Send flow off once, on high water mark */
-- 
cgit v1.2.3


From 65a67792e3416f7c5d7daa47d99334cbb19a7449 Mon Sep 17 00:00:00 2001
From: Du Cheng <ducheng2@gmail.com>
Date: Mon, 3 May 2021 13:56:46 +0200
Subject: net: caif: remove BUG_ON(dev == NULL) in caif_xmit

The condition of dev == NULL is impossible in caif_xmit(), hence it is
for the removal.

Explanation:
The static caif_xmit() is only called upon via a function pointer
`ndo_start_xmit` defined in include/linux/netdevice.h:
```
struct net_device_ops {
    ...
    netdev_tx_t     (*ndo_start_xmit)(struct sk_buff *skb, struct net_device *dev);
    ...
}
```

The exhausive list of call points are:
```
drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
    dev->netdev_ops->ndo_start_xmit(skb, dev);
    ^                                    ^

drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
    struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
			     ^                       ^
    return adapter->rn_ops->ndo_start_xmit(skb, netdev); // adapter would crash first
	   ^                                    ^

drivers/usb/gadget/function/f_ncm.c
    ncm->netdev->netdev_ops->ndo_start_xmit(NULL, ncm->netdev);
	      ^                                   ^

include/linux/netdevice.h
static inline netdev_tx_t __netdev_start_xmit(...
{
    return ops->ndo_start_xmit(skb, dev);
				    ^
}

    const struct net_device_ops *ops = dev->netdev_ops;
				       ^
    rc = __netdev_start_xmit(ops, skb, dev, more);
				       ^
```

In each of the enumerated scenarios, it is impossible for the NULL-valued dev to
reach the caif_xmit() without crashing the kernel earlier, therefore `BUG_ON(dev ==
NULL)` is rather useless, hence the removal.

Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Du Cheng <ducheng2@gmail.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-20-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/caif/caif_serial.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
index a7f51eb58915..d17482395a4d 100644
--- a/drivers/net/caif/caif_serial.c
+++ b/drivers/net/caif/caif_serial.c
@@ -269,7 +269,6 @@ static netdev_tx_t caif_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ser_device *ser;
 
-	BUG_ON(dev == NULL);
 	ser = netdev_priv(dev);
 
 	/* Send flow off once, on high water mark */
-- 
cgit v1.2.3


From bee1b0511844c8c79fccf1f2b13472393b6b91f7 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:47 +0200
Subject: Revert "net: stmicro: fix a missing check of clk_prepare"

This reverts commit f86a3b83833e7cfe558ca4d70b64ebc48903efec.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original commit causes a memory leak when it is trying to claim it
is properly handling errors.  Revert this change and fix it up properly
in a follow-on commit.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: David S. Miller <davem@davemloft.net>
Fixes: f86a3b83833e ("net: stmicro: fix a missing check of clk_prepare")
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-21-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
index 527077c98ebc..fc68e90acbea 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
@@ -50,9 +50,7 @@ static int sun7i_gmac_init(struct platform_device *pdev, void *priv)
 		gmac->clk_enabled = 1;
 	} else {
 		clk_set_rate(gmac->tx_clk, SUN7I_GMAC_MII_RATE);
-		ret = clk_prepare(gmac->tx_clk);
-		if (ret)
-			return ret;
+		clk_prepare(gmac->tx_clk);
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 0c32a96d000f260b5ebfabb4145a86ae1cd71847 Mon Sep 17 00:00:00 2001
From: Anirudh Rayabharam <mail@anirudhrb.com>
Date: Mon, 3 May 2021 13:56:48 +0200
Subject: net: stmicro: handle clk_prepare() failure during init

In case clk_prepare() fails, capture and propagate the error code up the
stack. If regulator_enable() was called earlier, properly unwind it by
calling regulator_disable().

Signed-off-by: Anirudh Rayabharam <mail@anirudhrb.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-22-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
index fc68e90acbea..fc3b0acc8f99 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
@@ -30,7 +30,7 @@ struct sunxi_priv_data {
 static int sun7i_gmac_init(struct platform_device *pdev, void *priv)
 {
 	struct sunxi_priv_data *gmac = priv;
-	int ret;
+	int ret = 0;
 
 	if (gmac->regulator) {
 		ret = regulator_enable(gmac->regulator);
@@ -50,10 +50,12 @@ static int sun7i_gmac_init(struct platform_device *pdev, void *priv)
 		gmac->clk_enabled = 1;
 	} else {
 		clk_set_rate(gmac->tx_clk, SUN7I_GMAC_MII_RATE);
-		clk_prepare(gmac->tx_clk);
+		ret = clk_prepare(gmac->tx_clk);
+		if (ret && gmac->regulator)
+			regulator_disable(gmac->regulator);
 	}
 
-	return 0;
+	return ret;
 }
 
 static void sun7i_gmac_exit(struct platform_device *pdev, void *priv)
-- 
cgit v1.2.3


From 7930742d6a0ff091c85b92ef4e076432d8d8cb79 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:49 +0200
Subject: Revert "niu: fix missing checks of niu_pci_eeprom_read"

This reverts commit 26fd962bde0b15e54234fe762d86bc0349df1de4.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The change here was incorrect.  While it is nice to check if
niu_pci_eeprom_read() succeeded or not when using the data, any error
that might have happened was not propagated upwards properly, causing
the kernel to assume that these reads were successful, which results in
invalid data in the buffer that was to contain the successfully read
data.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Shannon Nelson <shannon.lee.nelson@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
Fixes: 26fd962bde0b ("niu: fix missing checks of niu_pci_eeprom_read")
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-23-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/sun/niu.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 707ccdd03b19..d70cdea756d1 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -8097,8 +8097,6 @@ static int niu_pci_vpd_scan_props(struct niu *np, u32 start, u32 end)
 		start += 3;
 
 		prop_len = niu_pci_eeprom_read(np, start + 4);
-		if (prop_len < 0)
-			return prop_len;
 		err = niu_pci_vpd_get_propname(np, start + 5, namebuf, 64);
 		if (err < 0)
 			return err;
@@ -8143,12 +8141,8 @@ static int niu_pci_vpd_scan_props(struct niu *np, u32 start, u32 end)
 			netif_printk(np, probe, KERN_DEBUG, np->dev,
 				     "VPD_SCAN: Reading in property [%s] len[%d]\n",
 				     namebuf, prop_len);
-			for (i = 0; i < prop_len; i++) {
-				err = niu_pci_eeprom_read(np, off + i);
-				if (err >= 0)
-					*prop_buf = err;
-				++prop_buf;
-			}
+			for (i = 0; i < prop_len; i++)
+				*prop_buf++ = niu_pci_eeprom_read(np, off + i);
 		}
 
 		start += len;
-- 
cgit v1.2.3


From e6e337708c22f80824b82d4af645f20715730ad0 Mon Sep 17 00:00:00 2001
From: Du Cheng <ducheng2@gmail.com>
Date: Mon, 3 May 2021 13:56:50 +0200
Subject: ethernet: sun: niu: fix missing checks of niu_pci_eeprom_read()

niu_pci_eeprom_read() may fail, so add checks to its return value and
propagate the error up the callstack.

An examination of the callstack up to niu_pci_eeprom_read shows that:

niu_pci_eeprom_read() // returns int
    niu_pci_vpd_scan_props() // returns int
        niu_pci_vpd_fetch() // returns *void*
            niu_get_invariants() // returns int

since niu_pci_vpd_fetch() returns void which breaks the bubbling up,
change its return type to int so that error is propagated upwards.

Signed-off-by: Du Cheng <ducheng2@gmail.com>
Cc: Shannon Nelson <shannon.lee.nelson@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-24-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/sun/niu.c | 34 ++++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index d70cdea756d1..74e748662ec0 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -8097,6 +8097,8 @@ static int niu_pci_vpd_scan_props(struct niu *np, u32 start, u32 end)
 		start += 3;
 
 		prop_len = niu_pci_eeprom_read(np, start + 4);
+		if (prop_len < 0)
+			return prop_len;
 		err = niu_pci_vpd_get_propname(np, start + 5, namebuf, 64);
 		if (err < 0)
 			return err;
@@ -8141,8 +8143,12 @@ static int niu_pci_vpd_scan_props(struct niu *np, u32 start, u32 end)
 			netif_printk(np, probe, KERN_DEBUG, np->dev,
 				     "VPD_SCAN: Reading in property [%s] len[%d]\n",
 				     namebuf, prop_len);
-			for (i = 0; i < prop_len; i++)
-				*prop_buf++ = niu_pci_eeprom_read(np, off + i);
+			for (i = 0; i < prop_len; i++) {
+				err =  niu_pci_eeprom_read(np, off + i);
+				if (err < 0)
+					return err;
+				*prop_buf++ = err;
+			}
 		}
 
 		start += len;
@@ -8152,14 +8158,14 @@ static int niu_pci_vpd_scan_props(struct niu *np, u32 start, u32 end)
 }
 
 /* ESPC_PIO_EN_ENABLE must be set */
-static void niu_pci_vpd_fetch(struct niu *np, u32 start)
+static int niu_pci_vpd_fetch(struct niu *np, u32 start)
 {
 	u32 offset;
 	int err;
 
 	err = niu_pci_eeprom_read16_swp(np, start + 1);
 	if (err < 0)
-		return;
+		return err;
 
 	offset = err + 3;
 
@@ -8168,12 +8174,14 @@ static void niu_pci_vpd_fetch(struct niu *np, u32 start)
 		u32 end;
 
 		err = niu_pci_eeprom_read(np, here);
+		if (err < 0)
+			return err;
 		if (err != 0x90)
-			return;
+			return -EINVAL;
 
 		err = niu_pci_eeprom_read16_swp(np, here + 1);
 		if (err < 0)
-			return;
+			return err;
 
 		here = start + offset + 3;
 		end = start + offset + err;
@@ -8181,9 +8189,12 @@ static void niu_pci_vpd_fetch(struct niu *np, u32 start)
 		offset += err;
 
 		err = niu_pci_vpd_scan_props(np, here, end);
-		if (err < 0 || err == 1)
-			return;
+		if (err < 0)
+			return err;
+		if (err == 1)
+			return -EINVAL;
 	}
+	return 0;
 }
 
 /* ESPC_PIO_EN_ENABLE must be set */
@@ -9274,8 +9285,11 @@ static int niu_get_invariants(struct niu *np)
 		offset = niu_pci_vpd_offset(np);
 		netif_printk(np, probe, KERN_DEBUG, np->dev,
 			     "%s() VPD offset [%08x]\n", __func__, offset);
-		if (offset)
-			niu_pci_vpd_fetch(np, offset);
+		if (offset) {
+			err = niu_pci_vpd_fetch(np, offset);
+			if (err < 0)
+				return err;
+		}
 		nw64(ESPC_PIO_EN, 0);
 
 		if (np->flags & NIU_FLAGS_VPD_VALID) {
-- 
cgit v1.2.3


From b95b57dfe7a142bf2446548eb7f49340fd73e78b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:51 +0200
Subject: Revert "qlcnic: Avoid potential NULL pointer dereference"

This reverts commit 5bf7295fe34a5251b1d241b9736af4697b590670.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

This commit does not properly detect if an error happens because the
logic after this loop will not detect that there was a failed
allocation.

Cc: Aditya Pakki <pakki001@umn.edu>
Cc: David S. Miller <davem@davemloft.net>
Fixes: 5bf7295fe34a ("qlcnic: Avoid potential NULL pointer dereference")
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-25-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
index d8a3ecaed3fc..985cf8cb2ec0 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
@@ -1047,8 +1047,6 @@ int qlcnic_do_lb_test(struct qlcnic_adapter *adapter, u8 mode)
 
 	for (i = 0; i < QLCNIC_NUM_ILB_PKT; i++) {
 		skb = netdev_alloc_skb(adapter->netdev, QLCNIC_ILB_PKT_SIZE);
-		if (!skb)
-			break;
 		qlcnic_create_loopback_buff(skb->data, adapter->mac_addr);
 		skb_put(skb, QLCNIC_ILB_PKT_SIZE);
 		adapter->ahw->diag_cnt = 0;
-- 
cgit v1.2.3


From 84460f01cba382553199bc1361f69a872d5abed4 Mon Sep 17 00:00:00 2001
From: Tom Seewald <tseewald@gmail.com>
Date: Mon, 3 May 2021 13:56:52 +0200
Subject: qlcnic: Add null check after calling netdev_alloc_skb

The function qlcnic_dl_lb_test() currently calls netdev_alloc_skb()
without checking afterwards that the allocation succeeded. Fix this by
checking if the skb is NULL and returning an error in such a case.
Breaking out of the loop if the skb is NULL is not correct as no error
would be reported to the caller and no message would be printed for the
user.

Cc: David S. Miller <davem@davemloft.net>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Tom Seewald <tseewald@gmail.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-26-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
index 985cf8cb2ec0..d8f0863b3934 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
@@ -1047,6 +1047,8 @@ int qlcnic_do_lb_test(struct qlcnic_adapter *adapter, u8 mode)
 
 	for (i = 0; i < QLCNIC_NUM_ILB_PKT; i++) {
 		skb = netdev_alloc_skb(adapter->netdev, QLCNIC_ILB_PKT_SIZE);
+		if (!skb)
+			goto error;
 		qlcnic_create_loopback_buff(skb->data, adapter->mac_addr);
 		skb_put(skb, QLCNIC_ILB_PKT_SIZE);
 		adapter->ahw->diag_cnt = 0;
@@ -1070,6 +1072,7 @@ int qlcnic_do_lb_test(struct qlcnic_adapter *adapter, u8 mode)
 			cnt++;
 	}
 	if (cnt != i) {
+error:
 		dev_err(&adapter->pdev->dev,
 			"LB Test: failed, TX[%d], RX[%d]\n", i, cnt);
 		if (mode != QLCNIC_ILB_MODE)
-- 
cgit v1.2.3


From 257343d3ed557f11d580d0b7c515dc154f64a42b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:53 +0200
Subject: Revert "gdrom: fix a memory leak bug"

This reverts commit 093c48213ee37c3c3ff1cf5ac1aa2a9d8bc66017.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

Because of this, all submissions from this group must be reverted from
the kernel tree and will need to be re-reviewed again to determine if
they actually are a valid fix.  Until that work is complete, remove this
change to ensure that no problems are being introduced into the
codebase.

Cc: Wenwen Wang <wang6495@umn.edu>
Cc: Peter Rosin <peda@axentia.se>
Cc: Jens Axboe <axboe@kernel.dk>
Fixes: 093c48213ee3 ("gdrom: fix a memory leak bug")
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-27-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/cdrom/gdrom.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 742b4a0932e3..7f681320c7d3 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -862,7 +862,6 @@ static void __exit exit_gdrom(void)
 	platform_device_unregister(pd);
 	platform_driver_unregister(&gdrom_driver);
 	kfree(gd.toc);
-	kfree(gd.cd_info);
 }
 
 module_init(init_gdrom);
-- 
cgit v1.2.3


From d03d1021da6fe7f46efe9f2a7335564e7c9db5ab Mon Sep 17 00:00:00 2001
From: Atul Gopinathan <atulgopinathan@gmail.com>
Date: Mon, 3 May 2021 13:56:54 +0200
Subject: cdrom: gdrom: deallocate struct gdrom_unit fields in remove_gdrom

The fields, "toc" and "cd_info", of "struct gdrom_unit gd" are allocated
in "probe_gdrom()". Prevent a memory leak by making sure "gd.cd_info" is
deallocated in the "remove_gdrom()" function.

Also prevent double free of the field "gd.toc" by moving it from the
module's exit function to "remove_gdrom()". This is because, in
"probe_gdrom()", the function makes sure to deallocate "gd.toc" in case
of any errors, so the exit function invoked later would again free
"gd.toc".

The patch also maintains consistency by deallocating the above mentioned
fields in "remove_gdrom()" along with another memory allocated field
"gd.disk".

Suggested-by: Jens Axboe <axboe@kernel.dk>
Cc: Peter Rosin <peda@axentia.se>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Atul Gopinathan <atulgopinathan@gmail.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-28-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/cdrom/gdrom.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 7f681320c7d3..6c4f6139f853 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -830,6 +830,8 @@ static int remove_gdrom(struct platform_device *devptr)
 	if (gdrom_major)
 		unregister_blkdev(gdrom_major, GDROM_DEV_NAME);
 	unregister_cdrom(gd.cd_info);
+	kfree(gd.cd_info);
+	kfree(gd.toc);
 
 	return 0;
 }
@@ -861,7 +863,6 @@ static void __exit exit_gdrom(void)
 {
 	platform_device_unregister(pd);
 	platform_driver_unregister(&gdrom_driver);
-	kfree(gd.toc);
 }
 
 module_init(init_gdrom);
-- 
cgit v1.2.3


From 566f53238da74801b48e985788e5f7c9159e5940 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:55 +0200
Subject: Revert "char: hpet: fix a missing check of ioremap"

This reverts commit 13bd14a41ce3105d5b1f3cd8b4d1e249d17b6d9b.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

While this is technically correct, it is only fixing ONE of these errors
in this function, so the patch is not fully correct.  I'll leave this
revert and provide a fix for this later that resolves this same
"problem" everywhere in this function.

Cc: Kangjie Lu <kjlu@umn.edu>
Link: https://lore.kernel.org/r/20210503115736.2104747-29-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/hpet.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index ed3b7dab678d..6f13def6c172 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -969,8 +969,6 @@ static acpi_status hpet_resources(struct acpi_resource *res, void *data)
 	if (ACPI_SUCCESS(status)) {
 		hdp->hd_phys_address = addr.address.minimum;
 		hdp->hd_address = ioremap(addr.address.minimum, addr.address.address_length);
-		if (!hdp->hd_address)
-			return AE_ERROR;
 
 		if (hpet_is_known(hdp)) {
 			iounmap(hdp->hd_address);
-- 
cgit v1.2.3


From b11701c933112d49b808dee01cb7ff854ba6a77a Mon Sep 17 00:00:00 2001
From: Tom Seewald <tseewald@gmail.com>
Date: Mon, 3 May 2021 13:56:56 +0200
Subject: char: hpet: add checks after calling ioremap

The function hpet_resources() calls ioremap() two times, but in both
cases it does not check if ioremap() returned a null pointer. Fix this
by adding null pointer checks and returning an appropriate error.

Signed-off-by: Tom Seewald <tseewald@gmail.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-30-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/hpet.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 6f13def6c172..8b55085650ad 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -969,6 +969,8 @@ static acpi_status hpet_resources(struct acpi_resource *res, void *data)
 	if (ACPI_SUCCESS(status)) {
 		hdp->hd_phys_address = addr.address.minimum;
 		hdp->hd_address = ioremap(addr.address.minimum, addr.address.address_length);
+		if (!hdp->hd_address)
+			return AE_ERROR;
 
 		if (hpet_is_known(hdp)) {
 			iounmap(hdp->hd_address);
@@ -982,6 +984,8 @@ static acpi_status hpet_resources(struct acpi_resource *res, void *data)
 		hdp->hd_phys_address = fixmem32->address;
 		hdp->hd_address = ioremap(fixmem32->address,
 						HPET_RANGE_SIZE);
+		if (!hdp->hd_address)
+			return AE_ERROR;
 
 		if (hpet_is_known(hdp)) {
 			iounmap(hdp->hd_address);
-- 
cgit v1.2.3


From 4d427b408c4c2ff1676966c72119a3a559f8e39b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:57 +0200
Subject: Revert "scsi: ufs: fix a missing check of devm_reset_control_get"

This reverts commit 63a06181d7ce169d09843645c50fea1901bc9f0a.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original commit is incorrect, it does not properly clean up on the
error path, so I'll keep the revert and fix it up properly with a
follow-on patch.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Avri Altman <avri.altman@wdc.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Fixes: 63a06181d7ce ("scsi: ufs: fix a missing check of devm_reset_control_get")
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-31-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/ufs/ufs-hisi.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/scsi/ufs/ufs-hisi.c b/drivers/scsi/ufs/ufs-hisi.c
index 0aa58131e791..7d1e07a9d9dd 100644
--- a/drivers/scsi/ufs/ufs-hisi.c
+++ b/drivers/scsi/ufs/ufs-hisi.c
@@ -468,10 +468,6 @@ static int ufs_hisi_init_common(struct ufs_hba *hba)
 	ufshcd_set_variant(hba, host);
 
 	host->rst  = devm_reset_control_get(dev, "rst");
-	if (IS_ERR(host->rst)) {
-		dev_err(dev, "%s: failed to get reset control\n", __func__);
-		return PTR_ERR(host->rst);
-	}
 
 	ufs_hisi_set_pm_lvl(hba);
 
-- 
cgit v1.2.3


From 2f4a784f40f8d337d6590e2e93f46429052e15ac Mon Sep 17 00:00:00 2001
From: Phillip Potter <phil@philpotter.co.uk>
Date: Mon, 3 May 2021 13:56:58 +0200
Subject: scsi: ufs: handle cleanup correctly on devm_reset_control_get error

Move ufshcd_set_variant call in ufs_hisi_init_common to common error
section at end of the function, and then jump to this from the error
checking statements for both devm_reset_control_get and
ufs_hisi_get_resource. This fixes the original commit (63a06181d7ce)
which was reverted due to the University of Minnesota problems.

Suggested-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Avri Altman <avri.altman@wdc.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Phillip Potter <phil@philpotter.co.uk>
Link: https://lore.kernel.org/r/20210503115736.2104747-32-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/ufs/ufs-hisi.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/ufs/ufs-hisi.c b/drivers/scsi/ufs/ufs-hisi.c
index 7d1e07a9d9dd..d0626773eb38 100644
--- a/drivers/scsi/ufs/ufs-hisi.c
+++ b/drivers/scsi/ufs/ufs-hisi.c
@@ -467,17 +467,24 @@ static int ufs_hisi_init_common(struct ufs_hba *hba)
 	host->hba = hba;
 	ufshcd_set_variant(hba, host);
 
-	host->rst  = devm_reset_control_get(dev, "rst");
+	host->rst = devm_reset_control_get(dev, "rst");
+	if (IS_ERR(host->rst)) {
+		dev_err(dev, "%s: failed to get reset control\n", __func__);
+		err = PTR_ERR(host->rst);
+		goto error;
+	}
 
 	ufs_hisi_set_pm_lvl(hba);
 
 	err = ufs_hisi_get_resource(host);
-	if (err) {
-		ufshcd_set_variant(hba, NULL);
-		return err;
-	}
+	if (err)
+		goto error;
 
 	return 0;
+
+error:
+	ufshcd_set_variant(hba, NULL);
+	return err;
 }
 
 static int ufs_hi3660_init(struct ufs_hba *hba)
-- 
cgit v1.2.3


From 1dacca7fa1ebea47d38d20cd2df37094805d2649 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:59 +0200
Subject: Revert "ALSA: gus: add a check of the status of snd_ctl_add"

This reverts commit 0f25e000cb4398081748e54f62a902098aa79ec1.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original commit did nothing if there was an error, except to print
out a message, which is pointless.  So remove the commit as it gives a
"false sense of doing something".

Cc: Kangjie Lu <kjlu@umn.edu>
Reviewed-by: Takashi Iwai <tiwai@suse.de>
Link: https://lore.kernel.org/r/20210503115736.2104747-33-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/isa/gus/gus_main.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/sound/isa/gus/gus_main.c b/sound/isa/gus/gus_main.c
index afc088f0377c..b7518122a10d 100644
--- a/sound/isa/gus/gus_main.c
+++ b/sound/isa/gus/gus_main.c
@@ -77,17 +77,8 @@ static const struct snd_kcontrol_new snd_gus_joystick_control = {
 
 static void snd_gus_init_control(struct snd_gus_card *gus)
 {
-	int ret;
-
-	if (!gus->ace_flag) {
-		ret =
-			snd_ctl_add(gus->card,
-					snd_ctl_new1(&snd_gus_joystick_control,
-						gus));
-		if (ret)
-			snd_printk(KERN_ERR "gus: snd_ctl_add failed: %d\n",
-					ret);
-	}
+	if (!gus->ace_flag)
+		snd_ctl_add(gus->card, snd_ctl_new1(&snd_gus_joystick_control, gus));
 }
 
 /*
-- 
cgit v1.2.3


From 94f88309f201821073f57ae6005caefa61bf7b7e Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:01 +0200
Subject: Revert "ALSA: sb8: add a check for request_region"

This reverts commit dcd0feac9bab901d5739de51b3f69840851f8919.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original commit message for this change was incorrect as the code
path can never result in a NULL dereference, alluding to the fact that
whatever tool was used to "find this" is broken.  It's just an optional
resource reservation, so removing this check is fine.

Cc: Kangjie Lu <kjlu@umn.edu>
Acked-by: Takashi Iwai <tiwai@suse.de>
Fixes: dcd0feac9bab ("ALSA: sb8: add a check for request_region")
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-35-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/isa/sb/sb8.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/sound/isa/sb/sb8.c b/sound/isa/sb/sb8.c
index 6c9d534ce8b6..95290ffe5c6e 100644
--- a/sound/isa/sb/sb8.c
+++ b/sound/isa/sb/sb8.c
@@ -95,10 +95,6 @@ static int snd_sb8_probe(struct device *pdev, unsigned int dev)
 
 	/* block the 0x388 port to avoid PnP conflicts */
 	acard->fm_res = request_region(0x388, 4, "SoundBlaster FM");
-	if (!acard->fm_res) {
-		err = -EBUSY;
-		goto _err;
-	}
 
 	if (port[dev] != SNDRV_AUTO_PORT) {
 		if ((err = snd_sbdsp_create(card, port[dev], irq[dev],
-- 
cgit v1.2.3


From a28591f61b60fac820c6de59826ffa710e5e314e Mon Sep 17 00:00:00 2001
From: Atul Gopinathan <atulgopinathan@gmail.com>
Date: Mon, 3 May 2021 13:57:02 +0200
Subject: ALSA: sb8: Add a comment note regarding an unused pointer

The field "fm_res" of "struct snd_sb8" is never used/dereferenced
throughout the sb8.c code. Therefore there is no need for any null value
check after the "request_region()".

Add a comment note to make developers know about this and prevent any
"NULL check" patches on this part of code.

Cc: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Atul Gopinathan <atulgopinathan@gmail.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-36-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/isa/sb/sb8.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/sound/isa/sb/sb8.c b/sound/isa/sb/sb8.c
index 95290ffe5c6e..ed3a87ebe3f4 100644
--- a/sound/isa/sb/sb8.c
+++ b/sound/isa/sb/sb8.c
@@ -93,7 +93,11 @@ static int snd_sb8_probe(struct device *pdev, unsigned int dev)
 	acard = card->private_data;
 	card->private_free = snd_sb8_free;
 
-	/* block the 0x388 port to avoid PnP conflicts */
+	/*
+	 * Block the 0x388 port to avoid PnP conflicts.
+	 * No need to check this value after request_region,
+	 * as we never do anything with it.
+	 */
 	acard->fm_res = request_region(0x388, 4, "SoundBlaster FM");
 
 	if (port[dev] != SNDRV_AUTO_PORT) {
-- 
cgit v1.2.3


From 4667a6fc1777ce071504bab570d3599107f4790f Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:03 +0200
Subject: Revert "ALSA: usx2y: Fix potential NULL pointer dereference"

This reverts commit a2c6433ee5a35a8de6d563f6512a26f87835ea0f.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original patch was incorrect, and would leak memory if the error
path the patch added was hit.

Cc: Aditya Pakki <pakki001@umn.edu>
Reviewed-by: Takashi Iwai <tiwai@suse.de>
Link: https://lore.kernel.org/r/20210503115736.2104747-37-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/usb/usx2y/usb_stream.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/sound/usb/usx2y/usb_stream.c b/sound/usb/usx2y/usb_stream.c
index 091c071b270a..6bba17bf689a 100644
--- a/sound/usb/usx2y/usb_stream.c
+++ b/sound/usb/usx2y/usb_stream.c
@@ -91,12 +91,7 @@ static int init_urbs(struct usb_stream_kernel *sk, unsigned use_packsize,
 
 	for (u = 0; u < USB_STREAM_NURBS; ++u) {
 		sk->inurb[u] = usb_alloc_urb(sk->n_o_ps, GFP_KERNEL);
-		if (!sk->inurb[u])
-			return -ENOMEM;
-
 		sk->outurb[u] = usb_alloc_urb(sk->n_o_ps, GFP_KERNEL);
-		if (!sk->outurb[u])
-			return -ENOMEM;
 	}
 
 	if (init_pipe_urbs(sk, use_packsize, sk->inurb, indata, dev, in_pipe) ||
-- 
cgit v1.2.3


From 58c0cc2d90f1e37c4eb63ae7f164c83830833f78 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:05 +0200
Subject: Revert "video: hgafb: fix potential NULL pointer dereference"

This reverts commit ec7f6aad57ad29e4e66cc2e18e1e1599ddb02542.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

This patch "looks" correct, but the driver keeps on running and will
fail horribly right afterward if this error condition ever trips.

So points for trying to resolve an issue, but a huge NEGATIVE value for
providing a "fake" fix for the problem as nothing actually got resolved
at all.  I'll go fix this up properly...

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Aditya Pakki <pakki001@umn.edu>
Cc: Ferenc Bakonyi <fero@drama.obuda.kando.hu>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Fixes: ec7f6aad57ad ("video: hgafb: fix potential NULL pointer dereference")
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-39-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/video/fbdev/hgafb.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/video/fbdev/hgafb.c b/drivers/video/fbdev/hgafb.c
index 8bbac7182ad3..fca29f219f8b 100644
--- a/drivers/video/fbdev/hgafb.c
+++ b/drivers/video/fbdev/hgafb.c
@@ -285,8 +285,6 @@ static int hga_card_detect(void)
 	hga_vram_len  = 0x08000;
 
 	hga_vram = ioremap(0xb0000, hga_vram_len);
-	if (!hga_vram)
-		goto error;
 
 	if (request_region(0x3b0, 12, "hgafb"))
 		release_io_ports = 1;
-- 
cgit v1.2.3


From dc13cac4862cc68ec74348a80b6942532b7735fa Mon Sep 17 00:00:00 2001
From: Igor Matheus Andrade Torrente <igormtorrente@gmail.com>
Date: Mon, 3 May 2021 13:57:06 +0200
Subject: video: hgafb: fix potential NULL pointer dereference

The return of ioremap if not checked, and can lead to a NULL to be
assigned to hga_vram. Potentially leading to a NULL pointer
dereference.

The fix adds code to deal with this case in the error label and
changes how the hgafb_probe handles the return of hga_card_detect.

Cc: Ferenc Bakonyi <fero@drama.obuda.kando.hu>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Igor Matheus Andrade Torrente <igormtorrente@gmail.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-40-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/video/fbdev/hgafb.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/video/fbdev/hgafb.c b/drivers/video/fbdev/hgafb.c
index fca29f219f8b..cc8e62ae93f6 100644
--- a/drivers/video/fbdev/hgafb.c
+++ b/drivers/video/fbdev/hgafb.c
@@ -285,6 +285,8 @@ static int hga_card_detect(void)
 	hga_vram_len  = 0x08000;
 
 	hga_vram = ioremap(0xb0000, hga_vram_len);
+	if (!hga_vram)
+		return -ENOMEM;
 
 	if (request_region(0x3b0, 12, "hgafb"))
 		release_io_ports = 1;
@@ -344,13 +346,18 @@ static int hga_card_detect(void)
 			hga_type_name = "Hercules";
 			break;
 	}
-	return 1;
+	return 0;
 error:
 	if (release_io_ports)
 		release_region(0x3b0, 12);
 	if (release_io_port)
 		release_region(0x3bf, 1);
-	return 0;
+
+	iounmap(hga_vram);
+
+	pr_err("hgafb: HGA card not detected.\n");
+
+	return -EINVAL;
 }
 
 /**
@@ -548,13 +555,11 @@ static const struct fb_ops hgafb_ops = {
 static int hgafb_probe(struct platform_device *pdev)
 {
 	struct fb_info *info;
+	int ret;
 
-	if (! hga_card_detect()) {
-		printk(KERN_INFO "hgafb: HGA card not detected.\n");
-		if (hga_vram)
-			iounmap(hga_vram);
-		return -EINVAL;
-	}
+	ret = hga_card_detect();
+	if (!ret)
+		return ret;
 
 	printk(KERN_INFO "hgafb: %s with %ldK of memory detected.\n",
 		hga_type_name, hga_vram_len/1024);
-- 
cgit v1.2.3


From abd7bca23bd4247124265152d00ffd4b2b0d6877 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:07 +0200
Subject: Revert "isdn: mISDNinfineon: fix potential NULL pointer dereference"

This reverts commit d721fe99f6ada070ae8fc0ec3e01ce5a42def0d9.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original commit was incorrect, it should have never have used
"unlikely()" and if it ever does trigger, resources are left grabbed.

Given there are no users for this code around, I'll just revert this and
leave it "as is" as the odds that ioremap() will ever fail here is
horrendiously low.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: David S. Miller <davem@davemloft.net>
Link: https://lore.kernel.org/r/20210503115736.2104747-41-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/isdn/hardware/mISDN/mISDNinfineon.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/isdn/hardware/mISDN/mISDNinfineon.c b/drivers/isdn/hardware/mISDN/mISDNinfineon.c
index a16c7a2a7f3d..fa9c491f9c38 100644
--- a/drivers/isdn/hardware/mISDN/mISDNinfineon.c
+++ b/drivers/isdn/hardware/mISDN/mISDNinfineon.c
@@ -697,11 +697,8 @@ setup_io(struct inf_hw *hw)
 				(ulong)hw->addr.start, (ulong)hw->addr.size);
 			return err;
 		}
-		if (hw->ci->addr_mode == AM_MEMIO) {
+		if (hw->ci->addr_mode == AM_MEMIO)
 			hw->addr.p = ioremap(hw->addr.start, hw->addr.size);
-			if (unlikely(!hw->addr.p))
-				return -ENOMEM;
-		}
 		hw->addr.mode = hw->ci->addr_mode;
 		if (debug & DEBUG_HW)
 			pr_notice("%s: IO addr %lx (%lu bytes) mode%d\n",
-- 
cgit v1.2.3


From c446f0d4702d316e1c6bf621f70e79678d28830a Mon Sep 17 00:00:00 2001
From: Phillip Potter <phil@philpotter.co.uk>
Date: Mon, 3 May 2021 13:57:08 +0200
Subject: isdn: mISDNinfineon: check/cleanup ioremap failure correctly in
 setup_io

Move hw->cfg.mode and hw->addr.mode assignments from hw->ci->cfg_mode
and hw->ci->addr_mode respectively, to be before the subsequent checks
for memory IO mode (and possible ioremap calls in this case).

Also introduce ioremap error checks at both locations. This allows
resources to be properly freed on ioremap failure, as when the caller
of setup_io then subsequently calls release_io via its error path,
release_io can now correctly determine the mode as it has been set
before the ioremap call.

Finally, refactor release_io function so that it will call
release_mem_region in the memory IO case, regardless of whether or not
hw->cfg.p/hw->addr.p are NULL. This means resources are then properly
released on failure.

This properly implements the original reverted commit (d721fe99f6ad)
from the University of Minnesota, whilst also implementing the ioremap
check for the hw->ci->cfg_mode if block as well.

Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Phillip Potter <phil@philpotter.co.uk>
Link: https://lore.kernel.org/r/20210503115736.2104747-42-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/isdn/hardware/mISDN/mISDNinfineon.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/isdn/hardware/mISDN/mISDNinfineon.c b/drivers/isdn/hardware/mISDN/mISDNinfineon.c
index fa9c491f9c38..88d592bafdb0 100644
--- a/drivers/isdn/hardware/mISDN/mISDNinfineon.c
+++ b/drivers/isdn/hardware/mISDN/mISDNinfineon.c
@@ -630,17 +630,19 @@ static void
 release_io(struct inf_hw *hw)
 {
 	if (hw->cfg.mode) {
-		if (hw->cfg.p) {
+		if (hw->cfg.mode == AM_MEMIO) {
 			release_mem_region(hw->cfg.start, hw->cfg.size);
-			iounmap(hw->cfg.p);
+			if (hw->cfg.p)
+				iounmap(hw->cfg.p);
 		} else
 			release_region(hw->cfg.start, hw->cfg.size);
 		hw->cfg.mode = AM_NONE;
 	}
 	if (hw->addr.mode) {
-		if (hw->addr.p) {
+		if (hw->addr.mode == AM_MEMIO) {
 			release_mem_region(hw->addr.start, hw->addr.size);
-			iounmap(hw->addr.p);
+			if (hw->addr.p)
+				iounmap(hw->addr.p);
 		} else
 			release_region(hw->addr.start, hw->addr.size);
 		hw->addr.mode = AM_NONE;
@@ -670,9 +672,12 @@ setup_io(struct inf_hw *hw)
 				(ulong)hw->cfg.start, (ulong)hw->cfg.size);
 			return err;
 		}
-		if (hw->ci->cfg_mode == AM_MEMIO)
-			hw->cfg.p = ioremap(hw->cfg.start, hw->cfg.size);
 		hw->cfg.mode = hw->ci->cfg_mode;
+		if (hw->ci->cfg_mode == AM_MEMIO) {
+			hw->cfg.p = ioremap(hw->cfg.start, hw->cfg.size);
+			if (!hw->cfg.p)
+				return -ENOMEM;
+		}
 		if (debug & DEBUG_HW)
 			pr_notice("%s: IO cfg %lx (%lu bytes) mode%d\n",
 				  hw->name, (ulong)hw->cfg.start,
@@ -697,9 +702,12 @@ setup_io(struct inf_hw *hw)
 				(ulong)hw->addr.start, (ulong)hw->addr.size);
 			return err;
 		}
-		if (hw->ci->addr_mode == AM_MEMIO)
-			hw->addr.p = ioremap(hw->addr.start, hw->addr.size);
 		hw->addr.mode = hw->ci->addr_mode;
+		if (hw->ci->addr_mode == AM_MEMIO) {
+			hw->addr.p = ioremap(hw->addr.start, hw->addr.size);
+			if (!hw->addr.p)
+				return -ENOMEM;
+		}
 		if (debug & DEBUG_HW)
 			pr_notice("%s: IO addr %lx (%lu bytes) mode%d\n",
 				  hw->name, (ulong)hw->addr.start,
-- 
cgit v1.2.3


From efba106f89fc6848726716c101f4c84e88720a9c Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:09 +0200
Subject: Revert "ath6kl: return error code in ath6kl_wmi_set_roam_lrssi_cmd()"

This reverts commit fc6a6521556c8250e356ddc6a3f2391aa62dc976.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The change being reverted does NOTHING as the caller to this function
does not even look at the return value of the call.  So the "claim" that
this fixed an an issue is not true.  It will be fixed up properly in a
future patch by propagating the error up the stack correctly.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-43-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/ath6kl/wmi.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath6kl/wmi.c b/drivers/net/wireless/ath/ath6kl/wmi.c
index b137e7f34397..aca9732ec1ee 100644
--- a/drivers/net/wireless/ath/ath6kl/wmi.c
+++ b/drivers/net/wireless/ath/ath6kl/wmi.c
@@ -776,8 +776,10 @@ int ath6kl_wmi_set_roam_lrssi_cmd(struct wmi *wmi, u8 lrssi)
 	cmd->info.params.roam_rssi_floor = DEF_LRSSI_ROAM_FLOOR;
 	cmd->roam_ctrl = WMI_SET_LRSSI_SCAN_PARAMS;
 
-	return ath6kl_wmi_cmd_send(wmi, 0, skb, WMI_SET_ROAM_CTRL_CMDID,
+	ath6kl_wmi_cmd_send(wmi, 0, skb, WMI_SET_ROAM_CTRL_CMDID,
 			    NO_SYNC_WMIFLAG);
+
+	return 0;
 }
 
 int ath6kl_wmi_force_roam_cmd(struct wmi *wmi, const u8 *bssid)
-- 
cgit v1.2.3


From 54433367840b46a1555c8ed36c4c0cfc5dbf1358 Mon Sep 17 00:00:00 2001
From: Anirudh Rayabharam <mail@anirudhrb.com>
Date: Mon, 3 May 2021 13:57:10 +0200
Subject: ath6kl: return error code in ath6kl_wmi_set_roam_lrssi_cmd()

Propagate error code from failure of ath6kl_wmi_cmd_send() to the
caller.

Signed-off-by: Anirudh Rayabharam <mail@anirudhrb.com>
Cc: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-44-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/ath6kl/debug.c | 5 ++++-
 drivers/net/wireless/ath/ath6kl/wmi.c   | 4 +---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/ath/ath6kl/debug.c b/drivers/net/wireless/ath/ath6kl/debug.c
index 7506cea46f58..433a047f3747 100644
--- a/drivers/net/wireless/ath/ath6kl/debug.c
+++ b/drivers/net/wireless/ath/ath6kl/debug.c
@@ -1027,14 +1027,17 @@ static ssize_t ath6kl_lrssi_roam_write(struct file *file,
 {
 	struct ath6kl *ar = file->private_data;
 	unsigned long lrssi_roam_threshold;
+	int ret;
 
 	if (kstrtoul_from_user(user_buf, count, 0, &lrssi_roam_threshold))
 		return -EINVAL;
 
 	ar->lrssi_roam_threshold = lrssi_roam_threshold;
 
-	ath6kl_wmi_set_roam_lrssi_cmd(ar->wmi, ar->lrssi_roam_threshold);
+	ret = ath6kl_wmi_set_roam_lrssi_cmd(ar->wmi, ar->lrssi_roam_threshold);
 
+	if (ret)
+		return ret;
 	return count;
 }
 
diff --git a/drivers/net/wireless/ath/ath6kl/wmi.c b/drivers/net/wireless/ath/ath6kl/wmi.c
index aca9732ec1ee..b137e7f34397 100644
--- a/drivers/net/wireless/ath/ath6kl/wmi.c
+++ b/drivers/net/wireless/ath/ath6kl/wmi.c
@@ -776,10 +776,8 @@ int ath6kl_wmi_set_roam_lrssi_cmd(struct wmi *wmi, u8 lrssi)
 	cmd->info.params.roam_rssi_floor = DEF_LRSSI_ROAM_FLOOR;
 	cmd->roam_ctrl = WMI_SET_LRSSI_SCAN_PARAMS;
 
-	ath6kl_wmi_cmd_send(wmi, 0, skb, WMI_SET_ROAM_CTRL_CMDID,
+	return ath6kl_wmi_cmd_send(wmi, 0, skb, WMI_SET_ROAM_CTRL_CMDID,
 			    NO_SYNC_WMIFLAG);
-
-	return 0;
 }
 
 int ath6kl_wmi_force_roam_cmd(struct wmi *wmi, const u8 *bssid)
-- 
cgit v1.2.3


From 5e68b86c7b7c059c0f0ec4bf8adabe63f84a61eb Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:11 +0200
Subject: Revert "rapidio: fix a NULL pointer dereference when
 create_workqueue() fails"

This reverts commit 23015b22e47c5409620b1726a677d69e5cd032ba.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original commit has a memory leak on the error path here, it does
not clean up everything properly.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Alexandre Bounine <alex.bou9@gmail.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Fixes: 23015b22e47c ("rapidio: fix a NULL pointer dereference when create_workqueue() fails")
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-45-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/rapidio/rio_cm.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c
index 50ec53d67a4c..e6c16f04f2b4 100644
--- a/drivers/rapidio/rio_cm.c
+++ b/drivers/rapidio/rio_cm.c
@@ -2138,14 +2138,6 @@ static int riocm_add_mport(struct device *dev,
 	mutex_init(&cm->rx_lock);
 	riocm_rx_fill(cm, RIOCM_RX_RING_SIZE);
 	cm->rx_wq = create_workqueue(DRV_NAME "/rxq");
-	if (!cm->rx_wq) {
-		riocm_error("failed to allocate IBMBOX_%d on %s",
-			    cmbox, mport->name);
-		rio_release_outb_mbox(mport, cmbox);
-		kfree(cm);
-		return -ENOMEM;
-	}
-
 	INIT_WORK(&cm->rx_work, rio_ibmsg_handler);
 
 	cm->tx_slot = 0;
-- 
cgit v1.2.3


From 69ce3ae36dcb03cdf416b0862a45369ddbf50fdf Mon Sep 17 00:00:00 2001
From: Anirudh Rayabharam <mail@anirudhrb.com>
Date: Mon, 3 May 2021 13:57:12 +0200
Subject: rapidio: handle create_workqueue() failure

In case create_workqueue() fails, release all resources and return -ENOMEM
to caller to avoid potential NULL pointer deref later. Move up the
create_workequeue() call to return early and avoid unwinding the call to
riocm_rx_fill().

Cc: Alexandre Bounine <alex.bou9@gmail.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Anirudh Rayabharam <mail@anirudhrb.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-46-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/rapidio/rio_cm.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c
index e6c16f04f2b4..db4c265287ae 100644
--- a/drivers/rapidio/rio_cm.c
+++ b/drivers/rapidio/rio_cm.c
@@ -2127,6 +2127,14 @@ static int riocm_add_mport(struct device *dev,
 		return -ENODEV;
 	}
 
+	cm->rx_wq = create_workqueue(DRV_NAME "/rxq");
+	if (!cm->rx_wq) {
+		rio_release_inb_mbox(mport, cmbox);
+		rio_release_outb_mbox(mport, cmbox);
+		kfree(cm);
+		return -ENOMEM;
+	}
+
 	/*
 	 * Allocate and register inbound messaging buffers to be ready
 	 * to receive channel and system management requests
@@ -2137,7 +2145,6 @@ static int riocm_add_mport(struct device *dev,
 	cm->rx_slots = RIOCM_RX_RING_SIZE;
 	mutex_init(&cm->rx_lock);
 	riocm_rx_fill(cm, RIOCM_RX_RING_SIZE);
-	cm->rx_wq = create_workqueue(DRV_NAME "/rxq");
 	INIT_WORK(&cm->rx_work, rio_ibmsg_handler);
 
 	cm->tx_slot = 0;
-- 
cgit v1.2.3


From 36a2c87f7ed9e305d05b9a5c044cc6c494771504 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:13 +0200
Subject: Revert "isdn: mISDN: Fix potential NULL pointer dereference of
 kzalloc"

This reverts commit 38d22659803a033b1b66cd2624c33570c0dde77d.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

While it looks like the original change is correct, it is not, as none
of the setup actually happens, and the error value is not propagated
upwards.

Cc: Aditya Pakki <pakki001@umn.edu>
Cc: David S. Miller <davem@davemloft.net>
Link: https://lore.kernel.org/r/20210503115736.2104747-47-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/isdn/hardware/mISDN/hfcsusb.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c
index 70061991915a..4bb470d3963d 100644
--- a/drivers/isdn/hardware/mISDN/hfcsusb.c
+++ b/drivers/isdn/hardware/mISDN/hfcsusb.c
@@ -249,9 +249,6 @@ hfcsusb_ph_info(struct hfcsusb *hw)
 	int i;
 
 	phi = kzalloc(struct_size(phi, bch, dch->dev.nrbchan), GFP_ATOMIC);
-	if (!phi)
-		return;
-
 	phi->dch.ch.protocol = hw->protocol;
 	phi->dch.ch.Flags = dch->Flags;
 	phi->dch.state = dch->state;
-- 
cgit v1.2.3


From 5265db2ccc735e2783b790d6c19fb5cee8c025ed Mon Sep 17 00:00:00 2001
From: Phillip Potter <phil@philpotter.co.uk>
Date: Mon, 3 May 2021 13:57:14 +0200
Subject: isdn: mISDN: correctly handle ph_info allocation failure in
 hfcsusb_ph_info

Modify return type of hfcusb_ph_info to int, so that we can pass error
value up the call stack when allocation of ph_info fails. Also change
three of four call sites to actually account for the memory failure.
The fourth, in ph_state_nt, is infeasible to change as it is in turn
called by ph_state which is used as a function pointer argument to
mISDN_initdchannel, which would necessitate changing its signature
and updating all the places where it is used (too many).

Fixes original flawed commit (38d22659803a) from the University of
Minnesota.

Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Phillip Potter <phil@philpotter.co.uk>
Link: https://lore.kernel.org/r/20210503115736.2104747-48-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/isdn/hardware/mISDN/hfcsusb.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c
index 4bb470d3963d..cd5642cef01f 100644
--- a/drivers/isdn/hardware/mISDN/hfcsusb.c
+++ b/drivers/isdn/hardware/mISDN/hfcsusb.c
@@ -46,7 +46,7 @@ static void hfcsusb_start_endpoint(struct hfcsusb *hw, int channel);
 static void hfcsusb_stop_endpoint(struct hfcsusb *hw, int channel);
 static int  hfcsusb_setup_bch(struct bchannel *bch, int protocol);
 static void deactivate_bchannel(struct bchannel *bch);
-static void hfcsusb_ph_info(struct hfcsusb *hw);
+static int  hfcsusb_ph_info(struct hfcsusb *hw);
 
 /* start next background transfer for control channel */
 static void
@@ -241,7 +241,7 @@ hfcusb_l2l1B(struct mISDNchannel *ch, struct sk_buff *skb)
  * send full D/B channel status information
  * as MPH_INFORMATION_IND
  */
-static void
+static int
 hfcsusb_ph_info(struct hfcsusb *hw)
 {
 	struct ph_info *phi;
@@ -249,6 +249,9 @@ hfcsusb_ph_info(struct hfcsusb *hw)
 	int i;
 
 	phi = kzalloc(struct_size(phi, bch, dch->dev.nrbchan), GFP_ATOMIC);
+	if (!phi)
+		return -ENOMEM;
+
 	phi->dch.ch.protocol = hw->protocol;
 	phi->dch.ch.Flags = dch->Flags;
 	phi->dch.state = dch->state;
@@ -260,6 +263,8 @@ hfcsusb_ph_info(struct hfcsusb *hw)
 	_queue_data(&dch->dev.D, MPH_INFORMATION_IND, MISDN_ID_ANY,
 		    struct_size(phi, bch, dch->dev.nrbchan), phi, GFP_ATOMIC);
 	kfree(phi);
+
+	return 0;
 }
 
 /*
@@ -344,8 +349,7 @@ hfcusb_l2l1D(struct mISDNchannel *ch, struct sk_buff *skb)
 			ret = l1_event(dch->l1, hh->prim);
 		break;
 	case MPH_INFORMATION_REQ:
-		hfcsusb_ph_info(hw);
-		ret = 0;
+		ret = hfcsusb_ph_info(hw);
 		break;
 	}
 
@@ -400,8 +404,7 @@ hfc_l1callback(struct dchannel *dch, u_int cmd)
 			       hw->name, __func__, cmd);
 		return -1;
 	}
-	hfcsusb_ph_info(hw);
-	return 0;
+	return hfcsusb_ph_info(hw);
 }
 
 static int
@@ -743,8 +746,7 @@ hfcsusb_setup_bch(struct bchannel *bch, int protocol)
 			handle_led(hw, (bch->nr == 1) ? LED_B1_OFF :
 				   LED_B2_OFF);
 	}
-	hfcsusb_ph_info(hw);
-	return 0;
+	return hfcsusb_ph_info(hw);
 }
 
 static void
-- 
cgit v1.2.3


From e1436df2f2550bc89d832ffd456373fdf5d5b5d7 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:15 +0200
Subject: Revert "ecryptfs: replace BUG_ON with error handling code"

This reverts commit 2c2a7552dd6465e8fde6bc9cccf8d66ed1c1eb72.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original commit log for this change was incorrect, no "error
handling code" was added, things will blow up just as badly as before if
any of these cases ever were true.  As this BUG_ON() never fired, and
most of these checks are "obviously" never going to be true, let's just
revert to the original code for now until this gets unwound to be done
correctly in the future.

Cc: Aditya Pakki <pakki001@umn.edu>
Fixes: 2c2a7552dd64 ("ecryptfs: replace BUG_ON with error handling code")
Cc: stable <stable@vger.kernel.org>
Acked-by: Tyler Hicks <code@tyhicks.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-49-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ecryptfs/crypto.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 345f8061e3b4..b1aa993784f7 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -296,10 +296,8 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
 	struct extent_crypt_result ecr;
 	int rc = 0;
 
-	if (!crypt_stat || !crypt_stat->tfm
-	       || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED))
-		return -EINVAL;
-
+	BUG_ON(!crypt_stat || !crypt_stat->tfm
+	       || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED));
 	if (unlikely(ecryptfs_verbosity > 0)) {
 		ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n",
 				crypt_stat->key_size);
-- 
cgit v1.2.3


From c6052f09c14bf0ecdd582662e022eb716f9b8022 Mon Sep 17 00:00:00 2001
From: Phillip Potter <phil@philpotter.co.uk>
Date: Mon, 3 May 2021 13:57:16 +0200
Subject: fs: ecryptfs: remove BUG_ON from crypt_scatterlist

crypt_stat memory itself is allocated when inode is created, in
ecryptfs_alloc_inode, which returns NULL on failure and is handled
by callers, which would prevent us getting to this point. It then
calls ecryptfs_init_crypt_stat which allocates crypt_stat->tfm
checking for and likewise handling allocation failure. Finally,
crypt_stat->flags has ECRYPTFS_STRUCT_INITIALIZED merged into it
in ecryptfs_init_crypt_stat as well.

Simply put, the conditions that the BUG_ON checks for will never
be triggered, as to even get to this function, the relevant conditions
will have already been fulfilled (or the inode allocation would fail in
the first place and thus no call to this function or those above it).

Cc: Tyler Hicks <code@tyhicks.com>
Signed-off-by: Phillip Potter <phil@philpotter.co.uk>
Link: https://lore.kernel.org/r/20210503115736.2104747-50-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ecryptfs/crypto.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index b1aa993784f7..e3f5d7f3c8a0 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -296,8 +296,6 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
 	struct extent_crypt_result ecr;
 	int rc = 0;
 
-	BUG_ON(!crypt_stat || !crypt_stat->tfm
-	       || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED));
 	if (unlikely(ecryptfs_verbosity > 0)) {
 		ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n",
 				crypt_stat->key_size);
-- 
cgit v1.2.3


From 43ed0fcf613a87dd0221ec72d1ade4d6544f2ffc Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:17 +0200
Subject: Revert "dmaengine: qcom_hidma: Check for driver register failure"

This reverts commit a474b3f0428d6b02a538aa10b3c3b722751cb382.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original change is NOT correct, as it does not correctly unwind from
the resources that was allocated before the call to
platform_driver_register().

Cc: Aditya Pakki <pakki001@umn.edu>
Acked-By: Vinod Koul <vkoul@kernel.org>
Acked-By: Sinan Kaya <okaya@kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-51-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/dma/qcom/hidma_mgmt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/qcom/hidma_mgmt.c b/drivers/dma/qcom/hidma_mgmt.c
index 806ca02c52d7..fe87b01f7a4e 100644
--- a/drivers/dma/qcom/hidma_mgmt.c
+++ b/drivers/dma/qcom/hidma_mgmt.c
@@ -418,8 +418,9 @@ static int __init hidma_mgmt_init(void)
 		hidma_mgmt_of_populate_channels(child);
 	}
 #endif
-	return platform_driver_register(&hidma_mgmt_driver);
+	platform_driver_register(&hidma_mgmt_driver);
 
+	return 0;
 }
 module_init(hidma_mgmt_init);
 MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3


From 4df2a8b0ad634d98a67e540a4e18a60f943e7d9f Mon Sep 17 00:00:00 2001
From: Phillip Potter <phil@philpotter.co.uk>
Date: Mon, 3 May 2021 13:57:18 +0200
Subject: dmaengine: qcom_hidma: comment platform_driver_register call

Place a comment in hidma_mgmt_init explaining why success must
currently be assumed, due to the cleanup issue that would need to
be considered were this module ever to be unloadable or were this
platform_driver_register call ever to fail.

Acked-By: Vinod Koul <vkoul@kernel.org>
Acked-By: Sinan Kaya <okaya@kernel.org>
Signed-off-by: Phillip Potter <phil@philpotter.co.uk>
Link: https://lore.kernel.org/r/20210503115736.2104747-52-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/dma/qcom/hidma_mgmt.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/dma/qcom/hidma_mgmt.c b/drivers/dma/qcom/hidma_mgmt.c
index fe87b01f7a4e..62026607f3f8 100644
--- a/drivers/dma/qcom/hidma_mgmt.c
+++ b/drivers/dma/qcom/hidma_mgmt.c
@@ -418,6 +418,20 @@ static int __init hidma_mgmt_init(void)
 		hidma_mgmt_of_populate_channels(child);
 	}
 #endif
+	/*
+	 * We do not check for return value here, as it is assumed that
+	 * platform_driver_register must not fail. The reason for this is that
+	 * the (potential) hidma_mgmt_of_populate_channels calls above are not
+	 * cleaned up if it does fail, and to do this work is quite
+	 * complicated. In particular, various calls of of_address_to_resource,
+	 * of_irq_to_resource, platform_device_register_full, of_dma_configure,
+	 * and of_msi_configure which then call other functions and so on, must
+	 * be cleaned up - this is not a trivial exercise.
+	 *
+	 * Currently, this module is not intended to be unloaded, and there is
+	 * no module_exit function defined which does the needed cleanup. For
+	 * this reason, we have to assume success here.
+	 */
 	platform_driver_register(&hidma_mgmt_driver);
 
 	return 0;
-- 
cgit v1.2.3


From 46651077765c80a0d6f87f3469129a72e49ce91b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:19 +0200
Subject: Revert "libertas: add checks for the return value of
 sysfs_create_group"

This reverts commit 434256833d8eb988cb7f3b8a41699e2fe48d9332.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original commit was incorrect, the error needs to be propagated back
to the caller AND if the second group call fails, the first needs to be
removed.  There are much better ways to solve this, the driver should
NOT be calling sysfs_create_group() on its own as it is racing userspace
and loosing.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-53-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/marvell/libertas/mesh.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/wireless/marvell/libertas/mesh.c b/drivers/net/wireless/marvell/libertas/mesh.c
index f5b78257d551..c611e6668b21 100644
--- a/drivers/net/wireless/marvell/libertas/mesh.c
+++ b/drivers/net/wireless/marvell/libertas/mesh.c
@@ -805,12 +805,7 @@ static void lbs_persist_config_init(struct net_device *dev)
 {
 	int ret;
 	ret = sysfs_create_group(&(dev->dev.kobj), &boot_opts_group);
-	if (ret)
-		pr_err("failed to create boot_opts_group.\n");
-
 	ret = sysfs_create_group(&(dev->dev.kobj), &mesh_ie_group);
-	if (ret)
-		pr_err("failed to create mesh_ie_group.\n");
 }
 
 static void lbs_persist_config_remove(struct net_device *dev)
-- 
cgit v1.2.3


From 7e79b38fe9a403b065ac5915465f620a8fb3de84 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:20 +0200
Subject: libertas: register sysfs groups properly

The libertas driver was trying to register sysfs groups "by hand" which
causes them to be created _after_ the device is initialized and
announced to userspace, which causes races and can prevent userspace
tools from seeing the sysfs files correctly.

Fix this up by using the built-in sysfs_groups pointers in struct
net_device which were created for this very reason, fixing the race
condition, and properly allowing for any error that might have occured
to be handled properly.

Cc: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-54-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/marvell/libertas/mesh.c | 28 ++++------------------------
 1 file changed, 4 insertions(+), 24 deletions(-)

diff --git a/drivers/net/wireless/marvell/libertas/mesh.c b/drivers/net/wireless/marvell/libertas/mesh.c
index c611e6668b21..c68814841583 100644
--- a/drivers/net/wireless/marvell/libertas/mesh.c
+++ b/drivers/net/wireless/marvell/libertas/mesh.c
@@ -801,19 +801,6 @@ static const struct attribute_group mesh_ie_group = {
 	.attrs = mesh_ie_attrs,
 };
 
-static void lbs_persist_config_init(struct net_device *dev)
-{
-	int ret;
-	ret = sysfs_create_group(&(dev->dev.kobj), &boot_opts_group);
-	ret = sysfs_create_group(&(dev->dev.kobj), &mesh_ie_group);
-}
-
-static void lbs_persist_config_remove(struct net_device *dev)
-{
-	sysfs_remove_group(&(dev->dev.kobj), &boot_opts_group);
-	sysfs_remove_group(&(dev->dev.kobj), &mesh_ie_group);
-}
-
 
 /***************************************************************************
  * Initializing and starting, stopping mesh
@@ -1009,6 +996,10 @@ static int lbs_add_mesh(struct lbs_private *priv)
 	SET_NETDEV_DEV(priv->mesh_dev, priv->dev->dev.parent);
 
 	mesh_dev->flags |= IFF_BROADCAST | IFF_MULTICAST;
+	mesh_dev->sysfs_groups[0] = &lbs_mesh_attr_group;
+	mesh_dev->sysfs_groups[1] = &boot_opts_group;
+	mesh_dev->sysfs_groups[2] = &mesh_ie_group;
+
 	/* Register virtual mesh interface */
 	ret = register_netdev(mesh_dev);
 	if (ret) {
@@ -1016,19 +1007,10 @@ static int lbs_add_mesh(struct lbs_private *priv)
 		goto err_free_netdev;
 	}
 
-	ret = sysfs_create_group(&(mesh_dev->dev.kobj), &lbs_mesh_attr_group);
-	if (ret)
-		goto err_unregister;
-
-	lbs_persist_config_init(mesh_dev);
-
 	/* Everything successful */
 	ret = 0;
 	goto done;
 
-err_unregister:
-	unregister_netdev(mesh_dev);
-
 err_free_netdev:
 	free_netdev(mesh_dev);
 
@@ -1049,8 +1031,6 @@ void lbs_remove_mesh(struct lbs_private *priv)
 
 	netif_stop_queue(mesh_dev);
 	netif_carrier_off(mesh_dev);
-	sysfs_remove_group(&(mesh_dev->dev.kobj), &lbs_mesh_attr_group);
-	lbs_persist_config_remove(mesh_dev);
 	unregister_netdev(mesh_dev);
 	priv->mesh_dev = NULL;
 	kfree(mesh_dev->ieee80211_ptr);
-- 
cgit v1.2.3


From 1e0ce84215dbfd6065872e5d3755352da34f198b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:21 +0200
Subject: Revert "ASoC: rt5645: fix a NULL pointer dereference"

This reverts commit 51dd97d1df5fb9ac58b9b358e63e67b530f6ae21.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

Lots of things seem to be still allocated here and must be properly
cleaned up if an error happens here.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-55-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/codecs/rt5645.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index 9408ee63cb26..7cb90975009a 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -3431,9 +3431,6 @@ static int rt5645_probe(struct snd_soc_component *component)
 		RT5645_HWEQ_NUM, sizeof(struct rt5645_eq_param_s),
 		GFP_KERNEL);
 
-	if (!rt5645->eq_param)
-		return -ENOMEM;
-
 	return 0;
 }
 
-- 
cgit v1.2.3


From 5e70b8e22b64eed13d5bbebcb5911dae65bf8c6b Mon Sep 17 00:00:00 2001
From: Phillip Potter <phil@philpotter.co.uk>
Date: Mon, 3 May 2021 13:57:22 +0200
Subject: ASoC: rt5645: add error checking to rt5645_probe function

Check for return value from various snd_soc_dapm_* calls, as many of
them can return errors and this should be handled. Also, reintroduce
the allocation failure check for rt5645->eq_param as well. Make all
areas where return values are checked lead to the end of the function
in the case of an error. Finally, introduce a comment explaining how
resources here are actually eventually cleaned up by the caller.

Cc: Mark Brown <broonie@kernel.org>
Signed-off-by: Phillip Potter <phil@philpotter.co.uk>
Link: https://lore.kernel.org/r/20210503115736.2104747-56-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/codecs/rt5645.c | 48 ++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 39 insertions(+), 9 deletions(-)

diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index 7cb90975009a..438fa18bcb55 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -3388,30 +3388,44 @@ static int rt5645_probe(struct snd_soc_component *component)
 {
 	struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(component);
 	struct rt5645_priv *rt5645 = snd_soc_component_get_drvdata(component);
+	int ret = 0;
 
 	rt5645->component = component;
 
 	switch (rt5645->codec_type) {
 	case CODEC_TYPE_RT5645:
-		snd_soc_dapm_new_controls(dapm,
+		ret = snd_soc_dapm_new_controls(dapm,
 			rt5645_specific_dapm_widgets,
 			ARRAY_SIZE(rt5645_specific_dapm_widgets));
-		snd_soc_dapm_add_routes(dapm,
+		if (ret < 0)
+			goto exit;
+
+		ret = snd_soc_dapm_add_routes(dapm,
 			rt5645_specific_dapm_routes,
 			ARRAY_SIZE(rt5645_specific_dapm_routes));
+		if (ret < 0)
+			goto exit;
+
 		if (rt5645->v_id < 3) {
-			snd_soc_dapm_add_routes(dapm,
+			ret = snd_soc_dapm_add_routes(dapm,
 				rt5645_old_dapm_routes,
 				ARRAY_SIZE(rt5645_old_dapm_routes));
+			if (ret < 0)
+				goto exit;
 		}
 		break;
 	case CODEC_TYPE_RT5650:
-		snd_soc_dapm_new_controls(dapm,
+		ret = snd_soc_dapm_new_controls(dapm,
 			rt5650_specific_dapm_widgets,
 			ARRAY_SIZE(rt5650_specific_dapm_widgets));
-		snd_soc_dapm_add_routes(dapm,
+		if (ret < 0)
+			goto exit;
+
+		ret = snd_soc_dapm_add_routes(dapm,
 			rt5650_specific_dapm_routes,
 			ARRAY_SIZE(rt5650_specific_dapm_routes));
+		if (ret < 0)
+			goto exit;
 		break;
 	}
 
@@ -3419,9 +3433,17 @@ static int rt5645_probe(struct snd_soc_component *component)
 
 	/* for JD function */
 	if (rt5645->pdata.jd_mode) {
-		snd_soc_dapm_force_enable_pin(dapm, "JD Power");
-		snd_soc_dapm_force_enable_pin(dapm, "LDO2");
-		snd_soc_dapm_sync(dapm);
+		ret = snd_soc_dapm_force_enable_pin(dapm, "JD Power");
+		if (ret < 0)
+			goto exit;
+
+		ret = snd_soc_dapm_force_enable_pin(dapm, "LDO2");
+		if (ret < 0)
+			goto exit;
+
+		ret = snd_soc_dapm_sync(dapm);
+		if (ret < 0)
+			goto exit;
 	}
 
 	if (rt5645->pdata.long_name)
@@ -3431,7 +3453,15 @@ static int rt5645_probe(struct snd_soc_component *component)
 		RT5645_HWEQ_NUM, sizeof(struct rt5645_eq_param_s),
 		GFP_KERNEL);
 
-	return 0;
+	if (!rt5645->eq_param)
+		ret = -ENOMEM;
+exit:
+	/*
+	 * If there was an error above, everything will be cleaned up by the
+	 * caller if we return an error here.  This will be done with a later
+	 * call to rt5645_remove().
+	 */
+	return ret;
 }
 
 static void rt5645_remove(struct snd_soc_component *component)
-- 
cgit v1.2.3


From fdda0dd2686ecd1f2e616c9e0366ea71b40c485d Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:23 +0200
Subject: Revert "ASoC: cs43130: fix a NULL pointer dereference"

This reverts commit a2be42f18d409213bb7e7a736e3ef6ba005115bb.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original patch here is not correct, sysfs files that were created
are not unwound.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-57-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/codecs/cs43130.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/sound/soc/codecs/cs43130.c b/sound/soc/codecs/cs43130.c
index 80bc7c10ed75..c2b6f0ae6d57 100644
--- a/sound/soc/codecs/cs43130.c
+++ b/sound/soc/codecs/cs43130.c
@@ -2319,8 +2319,6 @@ static int cs43130_probe(struct snd_soc_component *component)
 			return ret;
 
 		cs43130->wq = create_singlethread_workqueue("cs43130_hp");
-		if (!cs43130->wq)
-			return -ENOMEM;
 		INIT_WORK(&cs43130->work, cs43130_imp_meas);
 	}
 
-- 
cgit v1.2.3


From 2da441a6491d93eff8ffff523837fd621dc80389 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:24 +0200
Subject: ASoC: cs43130: handle errors in cs43130_probe() properly

cs43130_probe() does not do any valid error checking of things it
initializes, OR what it does, it does not unwind properly if there are
errors.

Fix this up by moving the sysfs files to an attribute group so the
driver core will correctly add/remove them all at once and handle errors
with them, and correctly check for creating a new workqueue and
unwinding if that fails.

Cc: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-58-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/codecs/cs43130.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/sound/soc/codecs/cs43130.c b/sound/soc/codecs/cs43130.c
index c2b6f0ae6d57..80cd3ea0c157 100644
--- a/sound/soc/codecs/cs43130.c
+++ b/sound/soc/codecs/cs43130.c
@@ -1735,6 +1735,14 @@ static DEVICE_ATTR(hpload_dc_r, 0444, cs43130_show_dc_r, NULL);
 static DEVICE_ATTR(hpload_ac_l, 0444, cs43130_show_ac_l, NULL);
 static DEVICE_ATTR(hpload_ac_r, 0444, cs43130_show_ac_r, NULL);
 
+static struct attribute *hpload_attrs[] = {
+	&dev_attr_hpload_dc_l.attr,
+	&dev_attr_hpload_dc_r.attr,
+	&dev_attr_hpload_ac_l.attr,
+	&dev_attr_hpload_ac_r.attr,
+};
+ATTRIBUTE_GROUPS(hpload);
+
 static struct reg_sequence hp_en_cal_seq[] = {
 	{CS43130_INT_MASK_4, CS43130_INT_MASK_ALL},
 	{CS43130_HP_MEAS_LOAD_1, 0},
@@ -2302,23 +2310,15 @@ static int cs43130_probe(struct snd_soc_component *component)
 
 	cs43130->hpload_done = false;
 	if (cs43130->dc_meas) {
-		ret = device_create_file(component->dev, &dev_attr_hpload_dc_l);
-		if (ret < 0)
-			return ret;
-
-		ret = device_create_file(component->dev, &dev_attr_hpload_dc_r);
-		if (ret < 0)
-			return ret;
-
-		ret = device_create_file(component->dev, &dev_attr_hpload_ac_l);
-		if (ret < 0)
-			return ret;
-
-		ret = device_create_file(component->dev, &dev_attr_hpload_ac_r);
-		if (ret < 0)
+		ret = sysfs_create_groups(&component->dev->kobj, hpload_groups);
+		if (ret)
 			return ret;
 
 		cs43130->wq = create_singlethread_workqueue("cs43130_hp");
+		if (!cs43130->wq) {
+			sysfs_remove_groups(&component->dev->kobj, hpload_groups);
+			return -ENOMEM;
+		}
 		INIT_WORK(&cs43130->work, cs43130_imp_meas);
 	}
 
-- 
cgit v1.2.3


From 47e4ff06fa7f5ba4860543a2913bbd0c164640aa Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:25 +0200
Subject: Revert "media: dvb: Add check on sp8870_readreg"

This reverts commit 467a37fba93f2b4fe3ab597ff6a517b22b566882.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

This commit is not properly checking for an error at all, so if a
read succeeds from this device, it will error out.

Cc: Aditya Pakki <pakki001@umn.edu>
Cc: Sean Young <sean@mess.org>
Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-59-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/dvb-frontends/sp8870.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/media/dvb-frontends/sp8870.c b/drivers/media/dvb-frontends/sp8870.c
index 655db8272268..ee893a2f2261 100644
--- a/drivers/media/dvb-frontends/sp8870.c
+++ b/drivers/media/dvb-frontends/sp8870.c
@@ -280,9 +280,7 @@ static int sp8870_set_frontend_parameters(struct dvb_frontend *fe)
 	sp8870_writereg(state, 0xc05, reg0xc05);
 
 	// read status reg in order to clear pending irqs
-	err = sp8870_readreg(state, 0x200);
-	if (err)
-		return err;
+	sp8870_readreg(state, 0x200);
 
 	// system controller start
 	sp8870_microcontroller_start(state);
-- 
cgit v1.2.3


From c6d822c56e7fd29e6fa1b1bb91b98f6a1e942b3c Mon Sep 17 00:00:00 2001
From: Alaa Emad <alaaemadhossney.ae@gmail.com>
Date: Mon, 3 May 2021 13:57:26 +0200
Subject: media: dvb: Add check on sp8870_readreg return

The function sp8870_readreg returns a negative value when i2c_transfer
fails so properly check for this and return the error if it happens.

Cc: Sean Young <sean@mess.org>
Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Alaa Emad <alaaemadhossney.ae@gmail.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-60-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/dvb-frontends/sp8870.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/media/dvb-frontends/sp8870.c b/drivers/media/dvb-frontends/sp8870.c
index ee893a2f2261..9767159aeb9b 100644
--- a/drivers/media/dvb-frontends/sp8870.c
+++ b/drivers/media/dvb-frontends/sp8870.c
@@ -280,7 +280,9 @@ static int sp8870_set_frontend_parameters(struct dvb_frontend *fe)
 	sp8870_writereg(state, 0xc05, reg0xc05);
 
 	// read status reg in order to clear pending irqs
-	sp8870_readreg(state, 0x200);
+	err = sp8870_readreg(state, 0x200);
+	if (err < 0)
+		return err;
 
 	// system controller start
 	sp8870_microcontroller_start(state);
-- 
cgit v1.2.3


From d8c3be2fb2079d0cb4cd29d6aba58dbe54771e42 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:27 +0200
Subject: Revert "media: gspca: mt9m111: Check write_bridge for timeout"

This reverts commit 656025850074f5c1ba2e05be37bda57ba2b8d491.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

Different error values should never be "OR" together and expect anything
sane to come out of the result.

Cc: Aditya Pakki <pakki001@umn.edu>
Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-61-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/usb/gspca/m5602/m5602_mt9m111.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/media/usb/gspca/m5602/m5602_mt9m111.c b/drivers/media/usb/gspca/m5602/m5602_mt9m111.c
index bfa3b381d8a2..50481dc928d0 100644
--- a/drivers/media/usb/gspca/m5602/m5602_mt9m111.c
+++ b/drivers/media/usb/gspca/m5602/m5602_mt9m111.c
@@ -195,7 +195,7 @@ static const struct v4l2_ctrl_config mt9m111_greenbal_cfg = {
 int mt9m111_probe(struct sd *sd)
 {
 	u8 data[2] = {0x00, 0x00};
-	int i, rc = 0;
+	int i;
 	struct gspca_dev *gspca_dev = (struct gspca_dev *)sd;
 
 	if (force_sensor) {
@@ -213,18 +213,16 @@ int mt9m111_probe(struct sd *sd)
 	/* Do the preinit */
 	for (i = 0; i < ARRAY_SIZE(preinit_mt9m111); i++) {
 		if (preinit_mt9m111[i][0] == BRIDGE) {
-			rc |= m5602_write_bridge(sd,
+			m5602_write_bridge(sd,
 				preinit_mt9m111[i][1],
 				preinit_mt9m111[i][2]);
 		} else {
 			data[0] = preinit_mt9m111[i][2];
 			data[1] = preinit_mt9m111[i][3];
-			rc |= m5602_write_sensor(sd,
+			m5602_write_sensor(sd,
 				preinit_mt9m111[i][1], data, 2);
 		}
 	}
-	if (rc < 0)
-		return rc;
 
 	if (m5602_read_sensor(sd, MT9M111_SC_CHIPVER, data, 2))
 		return -ENODEV;
-- 
cgit v1.2.3


From af44068c581c028fd9897ca75a10fa310d8fc449 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 13 May 2021 16:18:19 +0100
Subject: arm64: tools: Add __ASM_CPUCAPS_H to the endif in cpucaps.h

Anshuman suggested this.

Suggested-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210513151819.12526-1-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/tools/gen-cpucaps.awk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/tools/gen-cpucaps.awk b/arch/arm64/tools/gen-cpucaps.awk
index 18737a1ce044..00c9e72a200a 100755
--- a/arch/arm64/tools/gen-cpucaps.awk
+++ b/arch/arm64/tools/gen-cpucaps.awk
@@ -31,7 +31,7 @@ BEGIN {
 END {
 	printf("#define ARM64_NCAPS\t\t\t\t%d\n", cap_num)
 	print ""
-	print "#endif"
+	print "#endif /* __ASM_CPUCAPS_H */"
 }
 
 # Any lines not handled by previous rules are unexpected
-- 
cgit v1.2.3


From e932f5b458eee63d013578ea128b9ff8ef5f5496 Mon Sep 17 00:00:00 2001
From: Alaa Emad <alaaemadhossney.ae@gmail.com>
Date: Mon, 3 May 2021 13:57:28 +0200
Subject: media: gspca: mt9m111: Check write_bridge for timeout

If m5602_write_bridge times out, it will return a negative error value.
So properly check for this and handle the error correctly instead of
just ignoring it.

Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Alaa Emad <alaaemadhossney.ae@gmail.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-62-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/usb/gspca/m5602/m5602_mt9m111.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/media/usb/gspca/m5602/m5602_mt9m111.c b/drivers/media/usb/gspca/m5602/m5602_mt9m111.c
index 50481dc928d0..bf1af6ed9131 100644
--- a/drivers/media/usb/gspca/m5602/m5602_mt9m111.c
+++ b/drivers/media/usb/gspca/m5602/m5602_mt9m111.c
@@ -195,7 +195,7 @@ static const struct v4l2_ctrl_config mt9m111_greenbal_cfg = {
 int mt9m111_probe(struct sd *sd)
 {
 	u8 data[2] = {0x00, 0x00};
-	int i;
+	int i, err;
 	struct gspca_dev *gspca_dev = (struct gspca_dev *)sd;
 
 	if (force_sensor) {
@@ -213,15 +213,17 @@ int mt9m111_probe(struct sd *sd)
 	/* Do the preinit */
 	for (i = 0; i < ARRAY_SIZE(preinit_mt9m111); i++) {
 		if (preinit_mt9m111[i][0] == BRIDGE) {
-			m5602_write_bridge(sd,
-				preinit_mt9m111[i][1],
-				preinit_mt9m111[i][2]);
+			err = m5602_write_bridge(sd,
+					preinit_mt9m111[i][1],
+					preinit_mt9m111[i][2]);
 		} else {
 			data[0] = preinit_mt9m111[i][2];
 			data[1] = preinit_mt9m111[i][3];
-			m5602_write_sensor(sd,
-				preinit_mt9m111[i][1], data, 2);
+			err = m5602_write_sensor(sd,
+					preinit_mt9m111[i][1], data, 2);
 		}
+		if (err < 0)
+			return err;
 	}
 
 	if (m5602_read_sensor(sd, MT9M111_SC_CHIPVER, data, 2))
-- 
cgit v1.2.3


From 8e23e83c752b54e98102627a1cc09281ad71a299 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:29 +0200
Subject: Revert "media: gspca: Check the return value of write_bridge for
 timeout"

This reverts commit a21a0eb56b4e8fe4a330243af8030f890cde2283.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

Different error values should never be "OR" together and expect anything
sane to come out of the result.

Cc: Aditya Pakki <pakki001@umn.edu>
Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-63-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/usb/gspca/m5602/m5602_po1030.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/media/usb/gspca/m5602/m5602_po1030.c b/drivers/media/usb/gspca/m5602/m5602_po1030.c
index d680b777f097..7bdbb8065146 100644
--- a/drivers/media/usb/gspca/m5602/m5602_po1030.c
+++ b/drivers/media/usb/gspca/m5602/m5602_po1030.c
@@ -154,7 +154,6 @@ static const struct v4l2_ctrl_config po1030_greenbal_cfg = {
 
 int po1030_probe(struct sd *sd)
 {
-	int rc = 0;
 	u8 dev_id_h = 0, i;
 	struct gspca_dev *gspca_dev = (struct gspca_dev *)sd;
 
@@ -174,14 +173,11 @@ int po1030_probe(struct sd *sd)
 	for (i = 0; i < ARRAY_SIZE(preinit_po1030); i++) {
 		u8 data = preinit_po1030[i][2];
 		if (preinit_po1030[i][0] == SENSOR)
-			rc |= m5602_write_sensor(sd,
+			m5602_write_sensor(sd,
 				preinit_po1030[i][1], &data, 1);
 		else
-			rc |= m5602_write_bridge(sd, preinit_po1030[i][1],
-						data);
+			m5602_write_bridge(sd, preinit_po1030[i][1], data);
 	}
-	if (rc < 0)
-		return rc;
 
 	if (m5602_read_sensor(sd, PO1030_DEVID_H, &dev_id_h, 1))
 		return -ENODEV;
-- 
cgit v1.2.3


From dacb408ca6f0e34df22b40d8dd5fae7f8e777d84 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:30 +0200
Subject: media: gspca: properly check for errors in po1030_probe()

If m5602_write_sensor() or m5602_write_bridge() fail, do not continue to
initialize the device but return the error to the calling funtion.

Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-64-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/usb/gspca/m5602/m5602_po1030.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/media/usb/gspca/m5602/m5602_po1030.c b/drivers/media/usb/gspca/m5602/m5602_po1030.c
index 7bdbb8065146..8fd99ceee4b6 100644
--- a/drivers/media/usb/gspca/m5602/m5602_po1030.c
+++ b/drivers/media/usb/gspca/m5602/m5602_po1030.c
@@ -155,6 +155,7 @@ static const struct v4l2_ctrl_config po1030_greenbal_cfg = {
 int po1030_probe(struct sd *sd)
 {
 	u8 dev_id_h = 0, i;
+	int err;
 	struct gspca_dev *gspca_dev = (struct gspca_dev *)sd;
 
 	if (force_sensor) {
@@ -173,10 +174,13 @@ int po1030_probe(struct sd *sd)
 	for (i = 0; i < ARRAY_SIZE(preinit_po1030); i++) {
 		u8 data = preinit_po1030[i][2];
 		if (preinit_po1030[i][0] == SENSOR)
-			m5602_write_sensor(sd,
-				preinit_po1030[i][1], &data, 1);
+			err = m5602_write_sensor(sd, preinit_po1030[i][1],
+						 &data, 1);
 		else
-			m5602_write_bridge(sd, preinit_po1030[i][1], data);
+			err = m5602_write_bridge(sd, preinit_po1030[i][1],
+						 data);
+		if (err < 0)
+			return err;
 	}
 
 	if (m5602_read_sensor(sd, PO1030_DEVID_H, &dev_id_h, 1))
-- 
cgit v1.2.3


From 4fd798a5a89114c1892574c50f2aebd49bc5b4f5 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:31 +0200
Subject: Revert "net: liquidio: fix a NULL pointer dereference"

This reverts commit fe543b2f174f34a7a751aa08b334fe6b105c4569.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

While the original commit does keep the immediate "NULL dereference"
from happening, it does not properly propagate the error back to the
callers, AND it does not fix this same identical issue in the
drivers/net/ethernet/cavium/liquidio/lio_vf_main.c for some reason.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: David S. Miller <davem@davemloft.net>
Link: https://lore.kernel.org/r/20210503115736.2104747-65-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/cavium/liquidio/lio_main.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 7c5af4beedc6..6fa570068648 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -1166,11 +1166,6 @@ static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
 	sc = (struct octeon_soft_command *)
 		octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE,
 					  16, 0);
-	if (!sc) {
-		netif_info(lio, rx_err, lio->netdev,
-			   "Failed to allocate octeon_soft_command\n");
-		return;
-	}
 
 	ncmd = (union octnet_cmd *)sc->virtdptr;
 
-- 
cgit v1.2.3


From dbc97bfd3918ed9268bfc174cae8a7d6b3d51aad Mon Sep 17 00:00:00 2001
From: Tom Seewald <tseewald@gmail.com>
Date: Mon, 3 May 2021 13:57:32 +0200
Subject: net: liquidio: Add missing null pointer checks

The functions send_rx_ctrl_cmd() in both liquidio/lio_main.c and
liquidio/lio_vf_main.c do not check if the call to
octeon_alloc_soft_command() fails and returns a null pointer. Both
functions also return void so errors are not propagated back to the
caller.

Fix these issues by updating both instances of send_rx_ctrl_cmd() to
return an integer rather than void, and have them return -ENOMEM if an
allocation failure occurs. Also update all callers of send_rx_ctrl_cmd()
so that they now check the return value.

Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Tom Seewald <tseewald@gmail.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-66-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/cavium/liquidio/lio_main.c    | 28 +++++++++++++++-------
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 27 +++++++++++++++------
 2 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 6fa570068648..591229b96257 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -1153,7 +1153,7 @@ static void octeon_destroy_resources(struct octeon_device *oct)
  * @lio: per-network private data
  * @start_stop: whether to start or stop
  */
-static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
+static int send_rx_ctrl_cmd(struct lio *lio, int start_stop)
 {
 	struct octeon_soft_command *sc;
 	union octnet_cmd *ncmd;
@@ -1161,11 +1161,16 @@ static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
 	int retval;
 
 	if (oct->props[lio->ifidx].rx_on == start_stop)
-		return;
+		return 0;
 
 	sc = (struct octeon_soft_command *)
 		octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE,
 					  16, 0);
+	if (!sc) {
+		netif_info(lio, rx_err, lio->netdev,
+			   "Failed to allocate octeon_soft_command struct\n");
+		return -ENOMEM;
+	}
 
 	ncmd = (union octnet_cmd *)sc->virtdptr;
 
@@ -1187,18 +1192,19 @@ static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
 	if (retval == IQ_SEND_FAILED) {
 		netif_info(lio, rx_err, lio->netdev, "Failed to send RX Control message\n");
 		octeon_free_soft_command(oct, sc);
-		return;
 	} else {
 		/* Sleep on a wait queue till the cond flag indicates that the
 		 * response arrived or timed-out.
 		 */
 		retval = wait_for_sc_completion_timeout(oct, sc, 0);
 		if (retval)
-			return;
+			return retval;
 
 		oct->props[lio->ifidx].rx_on = start_stop;
 		WRITE_ONCE(sc->caller_is_done, true);
 	}
+
+	return retval;
 }
 
 /**
@@ -1773,6 +1779,7 @@ static int liquidio_open(struct net_device *netdev)
 	struct octeon_device_priv *oct_priv =
 		(struct octeon_device_priv *)oct->priv;
 	struct napi_struct *napi, *n;
+	int ret = 0;
 
 	if (oct->props[lio->ifidx].napi_enabled == 0) {
 		tasklet_disable(&oct_priv->droq_tasklet);
@@ -1808,7 +1815,9 @@ static int liquidio_open(struct net_device *netdev)
 	netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n");
 
 	/* tell Octeon to start forwarding packets to host */
-	send_rx_ctrl_cmd(lio, 1);
+	ret = send_rx_ctrl_cmd(lio, 1);
+	if (ret)
+		return ret;
 
 	/* start periodical statistics fetch */
 	INIT_DELAYED_WORK(&lio->stats_wk.work, lio_fetch_stats);
@@ -1819,7 +1828,7 @@ static int liquidio_open(struct net_device *netdev)
 	dev_info(&oct->pci_dev->dev, "%s interface is opened\n",
 		 netdev->name);
 
-	return 0;
+	return ret;
 }
 
 /**
@@ -1833,6 +1842,7 @@ static int liquidio_stop(struct net_device *netdev)
 	struct octeon_device_priv *oct_priv =
 		(struct octeon_device_priv *)oct->priv;
 	struct napi_struct *napi, *n;
+	int ret = 0;
 
 	ifstate_reset(lio, LIO_IFSTATE_RUNNING);
 
@@ -1849,7 +1859,9 @@ static int liquidio_stop(struct net_device *netdev)
 	lio->link_changes++;
 
 	/* Tell Octeon that nic interface is down. */
-	send_rx_ctrl_cmd(lio, 0);
+	ret = send_rx_ctrl_cmd(lio, 0);
+	if (ret)
+		return ret;
 
 	if (OCTEON_CN23XX_PF(oct)) {
 		if (!oct->msix_on)
@@ -1884,7 +1896,7 @@ static int liquidio_stop(struct net_device *netdev)
 
 	dev_info(&oct->pci_dev->dev, "%s interface is stopped\n", netdev->name);
 
-	return 0;
+	return ret;
 }
 
 /**
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 516f166ceff8..ffddb3126a32 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -595,7 +595,7 @@ static void octeon_destroy_resources(struct octeon_device *oct)
  * @lio: per-network private data
  * @start_stop: whether to start or stop
  */
-static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
+static int send_rx_ctrl_cmd(struct lio *lio, int start_stop)
 {
 	struct octeon_device *oct = (struct octeon_device *)lio->oct_dev;
 	struct octeon_soft_command *sc;
@@ -603,11 +603,16 @@ static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
 	int retval;
 
 	if (oct->props[lio->ifidx].rx_on == start_stop)
-		return;
+		return 0;
 
 	sc = (struct octeon_soft_command *)
 		octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE,
 					  16, 0);
+	if (!sc) {
+		netif_info(lio, rx_err, lio->netdev,
+			   "Failed to allocate octeon_soft_command struct\n");
+		return -ENOMEM;
+	}
 
 	ncmd = (union octnet_cmd *)sc->virtdptr;
 
@@ -635,11 +640,13 @@ static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
 		 */
 		retval = wait_for_sc_completion_timeout(oct, sc, 0);
 		if (retval)
-			return;
+			return retval;
 
 		oct->props[lio->ifidx].rx_on = start_stop;
 		WRITE_ONCE(sc->caller_is_done, true);
 	}
+
+	return retval;
 }
 
 /**
@@ -906,6 +913,7 @@ static int liquidio_open(struct net_device *netdev)
 	struct octeon_device_priv *oct_priv =
 		(struct octeon_device_priv *)oct->priv;
 	struct napi_struct *napi, *n;
+	int ret = 0;
 
 	if (!oct->props[lio->ifidx].napi_enabled) {
 		tasklet_disable(&oct_priv->droq_tasklet);
@@ -932,11 +940,13 @@ static int liquidio_open(struct net_device *netdev)
 					(LIQUIDIO_NDEV_STATS_POLL_TIME_MS));
 
 	/* tell Octeon to start forwarding packets to host */
-	send_rx_ctrl_cmd(lio, 1);
+	ret = send_rx_ctrl_cmd(lio, 1);
+	if (ret)
+		return ret;
 
 	dev_info(&oct->pci_dev->dev, "%s interface is opened\n", netdev->name);
 
-	return 0;
+	return ret;
 }
 
 /**
@@ -950,9 +960,12 @@ static int liquidio_stop(struct net_device *netdev)
 	struct octeon_device_priv *oct_priv =
 		(struct octeon_device_priv *)oct->priv;
 	struct napi_struct *napi, *n;
+	int ret = 0;
 
 	/* tell Octeon to stop forwarding packets to host */
-	send_rx_ctrl_cmd(lio, 0);
+	ret = send_rx_ctrl_cmd(lio, 0);
+	if (ret)
+		return ret;
 
 	netif_info(lio, ifdown, lio->netdev, "Stopping interface!\n");
 	/* Inform that netif carrier is down */
@@ -986,7 +999,7 @@ static int liquidio_stop(struct net_device *netdev)
 
 	dev_info(&oct->pci_dev->dev, "%s interface is stopped\n", netdev->name);
 
-	return 0;
+	return ret;
 }
 
 /**
-- 
cgit v1.2.3


From ed04fe8a0e87d7b5ea17d47f4ac9ec962b24814a Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:33 +0200
Subject: Revert "video: imsttfb: fix potential NULL pointer dereferences"

This reverts commit 1d84353d205a953e2381044953b7fa31c8c9702d.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original commit here, while technically correct, did not fully
handle all of the reported issues that the commit stated it was fixing,
so revert it until it can be "fixed" fully.

Note, ioremap() probably will never fail for old hardware like this, and
if anyone actually used this hardware (a PowerMac era PCI display card),
they would not be using fbdev anymore.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Aditya Pakki <pakki001@umn.edu>
Cc: Finn Thain <fthain@telegraphics.com.au>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Fixes: 1d84353d205a ("video: imsttfb: fix potential NULL pointer dereferences")
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-67-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/video/fbdev/imsttfb.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/video/fbdev/imsttfb.c b/drivers/video/fbdev/imsttfb.c
index 3ac053b88495..e04411701ec8 100644
--- a/drivers/video/fbdev/imsttfb.c
+++ b/drivers/video/fbdev/imsttfb.c
@@ -1512,11 +1512,6 @@ static int imsttfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	info->fix.smem_start = addr;
 	info->screen_base = (__u8 *)ioremap(addr, par->ramdac == IBM ?
 					    0x400000 : 0x800000);
-	if (!info->screen_base) {
-		release_mem_region(addr, size);
-		framebuffer_release(info);
-		return -ENOMEM;
-	}
 	info->fix.mmio_start = addr + 0x800000;
 	par->dc_regs = ioremap(addr + 0x800000, 0x1000);
 	par->cmap_regs_phys = addr + 0x840000;
-- 
cgit v1.2.3


From 13b7c0390a5d3840e1e2cda8f44a310fdbb982de Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:34 +0200
Subject: video: imsttfb: check for ioremap() failures

We should check if ioremap() were to somehow fail in imsttfb_probe() and
handle the unwinding of the resources allocated here properly.

Ideally if anyone cares about this driver (it's for a PowerMac era PCI
display card), they wouldn't even be using fbdev anymore.  Or the devm_*
apis could be used, but that's just extra work for diminishing
returns...

Cc: Finn Thain <fthain@telegraphics.com.au>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-68-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/video/fbdev/imsttfb.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/video/fbdev/imsttfb.c b/drivers/video/fbdev/imsttfb.c
index e04411701ec8..16f272a50811 100644
--- a/drivers/video/fbdev/imsttfb.c
+++ b/drivers/video/fbdev/imsttfb.c
@@ -1469,6 +1469,7 @@ static int imsttfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct imstt_par *par;
 	struct fb_info *info;
 	struct device_node *dp;
+	int ret = -ENOMEM;
 	
 	dp = pci_device_to_OF_node(pdev);
 	if(dp)
@@ -1504,23 +1505,37 @@ static int imsttfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		default:
 			printk(KERN_INFO "imsttfb: Device 0x%x unknown, "
 					 "contact maintainer.\n", pdev->device);
-			release_mem_region(addr, size);
-			framebuffer_release(info);
-			return -ENODEV;
+			ret = -ENODEV;
+			goto error;
 	}
 
 	info->fix.smem_start = addr;
 	info->screen_base = (__u8 *)ioremap(addr, par->ramdac == IBM ?
 					    0x400000 : 0x800000);
+	if (!info->screen_base)
+		goto error;
 	info->fix.mmio_start = addr + 0x800000;
 	par->dc_regs = ioremap(addr + 0x800000, 0x1000);
+	if (!par->dc_regs)
+		goto error;
 	par->cmap_regs_phys = addr + 0x840000;
 	par->cmap_regs = (__u8 *)ioremap(addr + 0x840000, 0x1000);
+	if (!par->cmap_regs)
+		goto error;
 	info->pseudo_palette = par->palette;
 	init_imstt(info);
 
 	pci_set_drvdata(pdev, info);
 	return 0;
+
+error:
+	if (par->dc_regs)
+		iounmap(par->dc_regs);
+	if (info->screen_base)
+		iounmap(info->screen_base);
+	release_mem_region(addr, size);
+	framebuffer_release(info);
+	return ret;
 }
 
 static void imsttfb_remove(struct pci_dev *pdev)
-- 
cgit v1.2.3


From 30a350947692f794796f563029d29764497f2887 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:35 +0200
Subject: Revert "brcmfmac: add a check for the status of usb_register"

This reverts commit 42daad3343be4a4e1ee03e30a5f5cc731dadfef5.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original commit here did nothing to actually help if usb_register()
failed, so it gives a "false sense of security" when there is none.  The
correct solution is to correctly unwind from this error.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-69-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
index 586f4dfc638b..d2a803fc8ac6 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
@@ -1586,10 +1586,6 @@ void brcmf_usb_exit(void)
 
 void brcmf_usb_register(void)
 {
-	int ret;
-
 	brcmf_dbg(USB, "Enter\n");
-	ret = usb_register(&brcmf_usbdrvr);
-	if (ret)
-		brcmf_err("usb_register failed %d\n", ret);
+	usb_register(&brcmf_usbdrvr);
 }
-- 
cgit v1.2.3


From 419b4a142a7ece36cebcd434f8ce2af59ef94b85 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:36 +0200
Subject: brcmfmac: properly check for bus register errors

The brcmfmac driver ignores any errors on initialization with the
different busses by deferring the initialization to a workqueue and
ignoring all possible errors that might happen.  Fix up all of this by
only allowing the module to load if all bus registering worked properly.

Cc: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-70-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c  |  8 ++---
 .../net/wireless/broadcom/brcm80211/brcmfmac/bus.h | 19 ++++++++--
 .../wireless/broadcom/brcm80211/brcmfmac/core.c    | 42 ++++++++++------------
 .../wireless/broadcom/brcm80211/brcmfmac/pcie.c    |  9 ++---
 .../wireless/broadcom/brcm80211/brcmfmac/pcie.h    |  5 ---
 .../net/wireless/broadcom/brcm80211/brcmfmac/usb.c |  4 +--
 6 files changed, 41 insertions(+), 46 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
index ce8c102df7b3..633d0ab19031 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
@@ -1217,13 +1217,9 @@ static struct sdio_driver brcmf_sdmmc_driver = {
 	},
 };
 
-void brcmf_sdio_register(void)
+int brcmf_sdio_register(void)
 {
-	int ret;
-
-	ret = sdio_register_driver(&brcmf_sdmmc_driver);
-	if (ret)
-		brcmf_err("sdio_register_driver failed: %d\n", ret);
+	return sdio_register_driver(&brcmf_sdmmc_driver);
 }
 
 void brcmf_sdio_exit(void)
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h
index 08f9d47f2e5c..3f5da3bb6aa5 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h
@@ -275,11 +275,26 @@ void brcmf_bus_add_txhdrlen(struct device *dev, uint len);
 
 #ifdef CONFIG_BRCMFMAC_SDIO
 void brcmf_sdio_exit(void);
-void brcmf_sdio_register(void);
+int brcmf_sdio_register(void);
+#else
+static inline void brcmf_sdio_exit(void) { }
+static inline int brcmf_sdio_register(void) { return 0; }
 #endif
+
 #ifdef CONFIG_BRCMFMAC_USB
 void brcmf_usb_exit(void);
-void brcmf_usb_register(void);
+int brcmf_usb_register(void);
+#else
+static inline void brcmf_usb_exit(void) { }
+static inline int brcmf_usb_register(void) { return 0; }
+#endif
+
+#ifdef CONFIG_BRCMFMAC_PCIE
+void brcmf_pcie_exit(void);
+int brcmf_pcie_register(void);
+#else
+static inline void brcmf_pcie_exit(void) { }
+static inline int brcmf_pcie_register(void) { return 0; }
 #endif
 
 #endif /* BRCMFMAC_BUS_H */
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
index 838b09b23abf..cee1682d2333 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
@@ -1518,40 +1518,34 @@ void brcmf_bus_change_state(struct brcmf_bus *bus, enum brcmf_bus_state state)
 	}
 }
 
-static void brcmf_driver_register(struct work_struct *work)
-{
-#ifdef CONFIG_BRCMFMAC_SDIO
-	brcmf_sdio_register();
-#endif
-#ifdef CONFIG_BRCMFMAC_USB
-	brcmf_usb_register();
-#endif
-#ifdef CONFIG_BRCMFMAC_PCIE
-	brcmf_pcie_register();
-#endif
-}
-static DECLARE_WORK(brcmf_driver_work, brcmf_driver_register);
-
 int __init brcmf_core_init(void)
 {
-	if (!schedule_work(&brcmf_driver_work))
-		return -EBUSY;
+	int err;
 
+	err = brcmf_sdio_register();
+	if (err)
+		return err;
+
+	err = brcmf_usb_register();
+	if (err)
+		goto error_usb_register;
+
+	err = brcmf_pcie_register();
+	if (err)
+		goto error_pcie_register;
 	return 0;
+
+error_pcie_register:
+	brcmf_usb_exit();
+error_usb_register:
+	brcmf_sdio_exit();
+	return err;
 }
 
 void __exit brcmf_core_exit(void)
 {
-	cancel_work_sync(&brcmf_driver_work);
-
-#ifdef CONFIG_BRCMFMAC_SDIO
 	brcmf_sdio_exit();
-#endif
-#ifdef CONFIG_BRCMFMAC_USB
 	brcmf_usb_exit();
-#endif
-#ifdef CONFIG_BRCMFMAC_PCIE
 	brcmf_pcie_exit();
-#endif
 }
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
index ad79e3b7e74a..143a705b5cb3 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
@@ -2140,15 +2140,10 @@ static struct pci_driver brcmf_pciedrvr = {
 };
 
 
-void brcmf_pcie_register(void)
+int brcmf_pcie_register(void)
 {
-	int err;
-
 	brcmf_dbg(PCIE, "Enter\n");
-	err = pci_register_driver(&brcmf_pciedrvr);
-	if (err)
-		brcmf_err(NULL, "PCIE driver registration failed, err=%d\n",
-			  err);
+	return pci_register_driver(&brcmf_pciedrvr);
 }
 
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.h
index d026401d2001..8e6c227e8315 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.h
@@ -11,9 +11,4 @@ struct brcmf_pciedev {
 	struct brcmf_pciedev_info *devinfo;
 };
 
-
-void brcmf_pcie_exit(void);
-void brcmf_pcie_register(void);
-
-
 #endif /* BRCMFMAC_PCIE_H */
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
index d2a803fc8ac6..9fb68c2dc7e3 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
@@ -1584,8 +1584,8 @@ void brcmf_usb_exit(void)
 	usb_deregister(&brcmf_usbdrvr);
 }
 
-void brcmf_usb_register(void)
+int brcmf_usb_register(void)
 {
 	brcmf_dbg(USB, "Enter\n");
-	usb_register(&brcmf_usbdrvr);
+	return usb_register(&brcmf_usbdrvr);
 }
-- 
cgit v1.2.3


From 9183f01b5e6e32eb3f17b5f3f8d5ad5ac9786c49 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 6 May 2021 16:00:47 +0200
Subject: cdrom: gdrom: initialize global variable at init time

As Peter points out, if we were to disconnect and then reconnect this
driver from a device, the "global" state of the device would contain odd
values and could cause problems.  Fix this up by just initializing the
whole thing to 0 at probe() time.

Ideally this would be a per-device variable, but given the age and the
total lack of users of it, that would require a lot of s/./->/g changes
for really no good reason.

Reported-by: Peter Rosin <peda@axentia.se>
Cc: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Peter Rosin <peda@axentia.se>
Link: https://lore.kernel.org/r/YJP2j6AU82MqEY2M@kroah.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/cdrom/gdrom.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 6c4f6139f853..c6d8c0f59722 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -744,6 +744,13 @@ static const struct blk_mq_ops gdrom_mq_ops = {
 static int probe_gdrom(struct platform_device *devptr)
 {
 	int err;
+
+	/*
+	 * Ensure our "one" device is initialized properly in case of previous
+	 * usages of it
+	 */
+	memset(&gd, 0, sizeof(gd));
+
 	/* Start the device */
 	if (gdrom_execute_diagnostic() != 1) {
 		pr_warn("ATA Probe for GDROM failed\n");
@@ -847,7 +854,7 @@ static struct platform_driver gdrom_driver = {
 static int __init init_gdrom(void)
 {
 	int rc;
-	gd.toc = NULL;
+
 	rc = platform_driver_register(&gdrom_driver);
 	if (rc)
 		return rc;
-- 
cgit v1.2.3


From d4d0ad57b3865795c4cde2fb5094c594c2e8f469 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@orcam.me.uk>
Date: Thu, 13 May 2021 11:51:41 +0200
Subject: vgacon: Record video mode changes with VT_RESIZEX

Fix an issue with VGA console font size changes made after the initial
video text mode has been changed with a user tool like `svgatextmode'
calling the VT_RESIZEX ioctl.  As it stands in that case the original
screen geometry continues being used to validate further VT resizing.

Consequently when the video adapter is firstly reprogrammed from the
original say 80x25 text mode using a 9x16 character cell (720x400 pixel
resolution) to say 80x37 text mode and the same character cell (720x592
pixel resolution), and secondly the CRTC character cell updated to 9x8
(by loading a suitable font with the KD_FONT_OP_SET request of the
KDFONTOP ioctl), the VT geometry does not get further updated from 80x37
and only upper half of the screen is used for the VT, with the lower
half showing rubbish corresponding to whatever happens to be there in
the video memory that maps to that part of the screen.  Of course the
proportions change according to text mode geometries and font sizes
chosen.

Address the problem then, by updating the text mode geometry defaults
rather than checking against them whenever the VT is resized via a user
ioctl.

Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
Fixes: e400b6ec4ede ("vt/vgacon: Check if screen resize request comes from userspace")
Cc: stable@vger.kernel.org # v2.6.24+
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/console/vgacon.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c
index 962c12be9774..511e7d06b148 100644
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -1089,12 +1089,20 @@ static int vgacon_resize(struct vc_data *c, unsigned int width,
 	if ((width << 1) * height > vga_vram_size)
 		return -EINVAL;
 
+	if (user) {
+		/*
+		 * Ho ho!  Someone (svgatextmode, eh?) may have reprogrammed
+		 * the video mode!  Set the new defaults then and go away.
+		 */
+		screen_info.orig_video_cols = width;
+		screen_info.orig_video_lines = height;
+		vga_default_font_height = c->vc_font.height;
+		return 0;
+	}
 	if (width % 2 || width > screen_info.orig_video_cols ||
 	    height > (screen_info.orig_video_lines * vga_default_font_height)/
 	    c->vc_font.height)
-		/* let svgatextmode tinker with video timings and
-		   return success */
-		return (user) ? 0 : -EINVAL;
+		return -EINVAL;
 
 	if (con_is_visible(c) && !vga_is_gfx) /* who knows */
 		vgacon_doresize(c, width, height);
-- 
cgit v1.2.3


From a90c275eb144c1b755f04769e1f29d832d6daeaf Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@orcam.me.uk>
Date: Thu, 13 May 2021 11:51:45 +0200
Subject: vt_ioctl: Revert VT_RESIZEX parameter handling removal

Revert the removal of code handling extra VT_RESIZEX ioctl's parameters
beyond those that VT_RESIZE supports, fixing a functional regression
causing `svgatextmode' not to resize the VT anymore.

As a consequence of the reverted change when the video adapter is
reprogrammed from the original say 80x25 text mode using a 9x16
character cell (720x400 pixel resolution) to say 80x37 text mode and the
same character cell (720x592 pixel resolution), the VT geometry does not
get updated and only upper two thirds of the screen are used for the VT,
and the lower part remains blank.  The proportions change according to
text mode geometries chosen.

Revert the change verbatim then, bringing back previous VT resizing.

Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
Fixes: 988d0763361b ("vt_ioctl: make VT_RESIZEX behave like VT_RESIZE")
Cc: stable@vger.kernel.org # v5.10+
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/tty/vt/vt_ioctl.c | 57 ++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 47 insertions(+), 10 deletions(-)

diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c
index 89aeaf3c1bca..95d10197566b 100644
--- a/drivers/tty/vt/vt_ioctl.c
+++ b/drivers/tty/vt/vt_ioctl.c
@@ -671,21 +671,58 @@ static int vt_resizex(struct vc_data *vc, struct vt_consize __user *cs)
 	if (copy_from_user(&v, cs, sizeof(struct vt_consize)))
 		return -EFAULT;
 
-	if (v.v_vlin)
-		pr_info_once("\"struct vt_consize\"->v_vlin is ignored. Please report if you need this.\n");
-	if (v.v_clin)
-		pr_info_once("\"struct vt_consize\"->v_clin is ignored. Please report if you need this.\n");
+	/* FIXME: Should check the copies properly */
+	if (!v.v_vlin)
+		v.v_vlin = vc->vc_scan_lines;
+
+	if (v.v_clin) {
+		int rows = v.v_vlin / v.v_clin;
+		if (v.v_rows != rows) {
+			if (v.v_rows) /* Parameters don't add up */
+				return -EINVAL;
+			v.v_rows = rows;
+		}
+	}
+
+	if (v.v_vcol && v.v_ccol) {
+		int cols = v.v_vcol / v.v_ccol;
+		if (v.v_cols != cols) {
+			if (v.v_cols)
+				return -EINVAL;
+			v.v_cols = cols;
+		}
+	}
+
+	if (v.v_clin > 32)
+		return -EINVAL;
 
-	console_lock();
 	for (i = 0; i < MAX_NR_CONSOLES; i++) {
-		vc = vc_cons[i].d;
+		struct vc_data *vcp;
 
-		if (vc) {
-			vc->vc_resize_user = 1;
-			vc_resize(vc, v.v_cols, v.v_rows);
+		if (!vc_cons[i].d)
+			continue;
+		console_lock();
+		vcp = vc_cons[i].d;
+		if (vcp) {
+			int ret;
+			int save_scan_lines = vcp->vc_scan_lines;
+			int save_font_height = vcp->vc_font.height;
+
+			if (v.v_vlin)
+				vcp->vc_scan_lines = v.v_vlin;
+			if (v.v_clin)
+				vcp->vc_font.height = v.v_clin;
+			vcp->vc_resize_user = 1;
+			ret = vc_resize(vcp, v.v_cols, v.v_rows);
+			if (ret) {
+				vcp->vc_scan_lines = save_scan_lines;
+				vcp->vc_font.height = save_font_height;
+				console_unlock();
+				return ret;
+			}
 		}
+		console_unlock();
 	}
-	console_unlock();
 
 	return 0;
 }
-- 
cgit v1.2.3


From 860dafa902595fb5f1d23bbcce1215188c3341e6 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@orcam.me.uk>
Date: Thu, 13 May 2021 11:51:50 +0200
Subject: vt: Fix character height handling with VT_RESIZEX

Restore the original intent of the VT_RESIZEX ioctl's `v_clin' parameter
which is the number of pixel rows per character (cell) rather than the
height of the font used.

For framebuffer devices the two values are always the same, because the
former is inferred from the latter one.  For VGA used as a true text
mode device these two parameters are independent from each other: the
number of pixel rows per character is set in the CRT controller, while
font height is in fact hardwired to 32 pixel rows and fonts of heights
below that value are handled by padding their data with blanks when
loaded to hardware for use by the character generator.  One can change
the setting in the CRT controller and it will update the screen contents
accordingly regardless of the font loaded.

The `v_clin' parameter is used by the `vgacon' driver to set the height
of the character cell and then the cursor position within.  Make the
parameter explicit then, by defining a new `vc_cell_height' struct
member of `vc_data', set it instead of `vc_font.height' from `v_clin' in
the VT_RESIZEX ioctl, and then use it throughout the `vgacon' driver
except where actual font data is accessed which as noted above is
independent from the CRTC setting.

This way the framebuffer console driver is free to ignore the `v_clin'
parameter as irrelevant, as it always should have, avoiding any issues
attempts to give the parameter a meaning there could have caused, such
as one that has led to commit 988d0763361b ("vt_ioctl: make VT_RESIZEX
behave like VT_RESIZE"):

 "syzbot is reporting UAF/OOB read at bit_putcs()/soft_cursor() [1][2],
  for vt_resizex() from ioctl(VT_RESIZEX) allows setting font height
  larger than actual font height calculated by con_font_set() from
  ioctl(PIO_FONT). Since fbcon_set_font() from con_font_set() allocates
  minimal amount of memory based on actual font height calculated by
  con_font_set(), use of vt_resizex() can cause UAF/OOB read for font
  data."

The problem first appeared around Linux 2.5.66 which predates our repo
history, but the origin could be identified with the old MIPS/Linux repo
also at: <git://git.kernel.org/pub/scm/linux/kernel/git/ralf/linux.git>
as commit 9736a3546de7 ("Merge with Linux 2.5.66."), where VT_RESIZEX
code in `vt_ioctl' was updated as follows:

 		if (clin)
-			video_font_height = clin;
+			vc->vc_font.height = clin;

making the parameter apply to framebuffer devices as well, perhaps due
to the use of "font" in the name of the original `video_font_height'
variable.  Use "cell" in the new struct member then to avoid ambiguity.

References:

[1] https://syzkaller.appspot.com/bug?id=32577e96d88447ded2d3b76d71254fb855245837
[2] https://syzkaller.appspot.com/bug?id=6b8355d27b2b94fb5cedf4655e3a59162d9e48e3

Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable@vger.kernel.org # v2.6.12+
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/tty/vt/vt_ioctl.c      |  6 +++---
 drivers/video/console/vgacon.c | 44 +++++++++++++++++++++---------------------
 include/linux/console_struct.h |  1 +
 3 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c
index 95d10197566b..0e0cd9e9e589 100644
--- a/drivers/tty/vt/vt_ioctl.c
+++ b/drivers/tty/vt/vt_ioctl.c
@@ -706,17 +706,17 @@ static int vt_resizex(struct vc_data *vc, struct vt_consize __user *cs)
 		if (vcp) {
 			int ret;
 			int save_scan_lines = vcp->vc_scan_lines;
-			int save_font_height = vcp->vc_font.height;
+			int save_cell_height = vcp->vc_cell_height;
 
 			if (v.v_vlin)
 				vcp->vc_scan_lines = v.v_vlin;
 			if (v.v_clin)
-				vcp->vc_font.height = v.v_clin;
+				vcp->vc_cell_height = v.v_clin;
 			vcp->vc_resize_user = 1;
 			ret = vc_resize(vcp, v.v_cols, v.v_rows);
 			if (ret) {
 				vcp->vc_scan_lines = save_scan_lines;
-				vcp->vc_font.height = save_font_height;
+				vcp->vc_cell_height = save_cell_height;
 				console_unlock();
 				return ret;
 			}
diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c
index 511e7d06b148..631eb918f8e1 100644
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -383,7 +383,7 @@ static void vgacon_init(struct vc_data *c, int init)
 		vc_resize(c, vga_video_num_columns, vga_video_num_lines);
 
 	c->vc_scan_lines = vga_scan_lines;
-	c->vc_font.height = vga_video_font_height;
+	c->vc_font.height = c->vc_cell_height = vga_video_font_height;
 	c->vc_complement_mask = 0x7700;
 	if (vga_512_chars)
 		c->vc_hi_font_mask = 0x0800;
@@ -518,32 +518,32 @@ static void vgacon_cursor(struct vc_data *c, int mode)
 		switch (CUR_SIZE(c->vc_cursor_type)) {
 		case CUR_UNDERLINE:
 			vgacon_set_cursor_size(c->state.x,
-					       c->vc_font.height -
-					       (c->vc_font.height <
+					       c->vc_cell_height -
+					       (c->vc_cell_height <
 						10 ? 2 : 3),
-					       c->vc_font.height -
-					       (c->vc_font.height <
+					       c->vc_cell_height -
+					       (c->vc_cell_height <
 						10 ? 1 : 2));
 			break;
 		case CUR_TWO_THIRDS:
 			vgacon_set_cursor_size(c->state.x,
-					       c->vc_font.height / 3,
-					       c->vc_font.height -
-					       (c->vc_font.height <
+					       c->vc_cell_height / 3,
+					       c->vc_cell_height -
+					       (c->vc_cell_height <
 						10 ? 1 : 2));
 			break;
 		case CUR_LOWER_THIRD:
 			vgacon_set_cursor_size(c->state.x,
-					       (c->vc_font.height * 2) / 3,
-					       c->vc_font.height -
-					       (c->vc_font.height <
+					       (c->vc_cell_height * 2) / 3,
+					       c->vc_cell_height -
+					       (c->vc_cell_height <
 						10 ? 1 : 2));
 			break;
 		case CUR_LOWER_HALF:
 			vgacon_set_cursor_size(c->state.x,
-					       c->vc_font.height / 2,
-					       c->vc_font.height -
-					       (c->vc_font.height <
+					       c->vc_cell_height / 2,
+					       c->vc_cell_height -
+					       (c->vc_cell_height <
 						10 ? 1 : 2));
 			break;
 		case CUR_NONE:
@@ -554,7 +554,7 @@ static void vgacon_cursor(struct vc_data *c, int mode)
 			break;
 		default:
 			vgacon_set_cursor_size(c->state.x, 1,
-					       c->vc_font.height);
+					       c->vc_cell_height);
 			break;
 		}
 		break;
@@ -565,13 +565,13 @@ static int vgacon_doresize(struct vc_data *c,
 		unsigned int width, unsigned int height)
 {
 	unsigned long flags;
-	unsigned int scanlines = height * c->vc_font.height;
+	unsigned int scanlines = height * c->vc_cell_height;
 	u8 scanlines_lo = 0, r7 = 0, vsync_end = 0, mode, max_scan;
 
 	raw_spin_lock_irqsave(&vga_lock, flags);
 
 	vgacon_xres = width * VGA_FONTWIDTH;
-	vgacon_yres = height * c->vc_font.height;
+	vgacon_yres = height * c->vc_cell_height;
 	if (vga_video_type >= VIDEO_TYPE_VGAC) {
 		outb_p(VGA_CRTC_MAX_SCAN, vga_video_port_reg);
 		max_scan = inb_p(vga_video_port_val);
@@ -626,9 +626,9 @@ static int vgacon_doresize(struct vc_data *c,
 static int vgacon_switch(struct vc_data *c)
 {
 	int x = c->vc_cols * VGA_FONTWIDTH;
-	int y = c->vc_rows * c->vc_font.height;
+	int y = c->vc_rows * c->vc_cell_height;
 	int rows = screen_info.orig_video_lines * vga_default_font_height/
-		c->vc_font.height;
+		c->vc_cell_height;
 	/*
 	 * We need to save screen size here as it's the only way
 	 * we can spot the screen has been resized and we need to
@@ -1041,7 +1041,7 @@ static int vgacon_adjust_height(struct vc_data *vc, unsigned fontheight)
 				cursor_size_lastto = 0;
 				c->vc_sw->con_cursor(c, CM_DRAW);
 			}
-			c->vc_font.height = fontheight;
+			c->vc_font.height = c->vc_cell_height = fontheight;
 			vc_resize(c, 0, rows);	/* Adjust console size */
 		}
 	}
@@ -1096,12 +1096,12 @@ static int vgacon_resize(struct vc_data *c, unsigned int width,
 		 */
 		screen_info.orig_video_cols = width;
 		screen_info.orig_video_lines = height;
-		vga_default_font_height = c->vc_font.height;
+		vga_default_font_height = c->vc_cell_height;
 		return 0;
 	}
 	if (width % 2 || width > screen_info.orig_video_cols ||
 	    height > (screen_info.orig_video_lines * vga_default_font_height)/
-	    c->vc_font.height)
+	    c->vc_cell_height)
 		return -EINVAL;
 
 	if (con_is_visible(c) && !vga_is_gfx) /* who knows */
diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h
index 153734816b49..d5b9c8d40c18 100644
--- a/include/linux/console_struct.h
+++ b/include/linux/console_struct.h
@@ -101,6 +101,7 @@ struct vc_data {
 	unsigned int	vc_rows;
 	unsigned int	vc_size_row;		/* Bytes per row */
 	unsigned int	vc_scan_lines;		/* # of scan lines */
+	unsigned int	vc_cell_height;		/* CRTC character cell height */
 	unsigned long	vc_origin;		/* [!] Start of real screen */
 	unsigned long	vc_scr_end;		/* [!] End of real screen */
 	unsigned long	vc_visible_origin;	/* [!] Top of visible window */
-- 
cgit v1.2.3


From eb01f5353bdaa59600b29d864819056a0e3de24d Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 13 May 2021 12:23:24 -0400
Subject: tracing: Handle %.*s in trace_check_vprintf()

If a trace event uses the %*.s notation, the trace_check_vprintf() will
fail and will warn about a bad processing of strings, because it does not
take into account the length field when processing the star (*) part.
Have it handle this case as well.

Link: https://lore.kernel.org/linux-nfs/238C0E2D-C2A4-4578-ADD2-C565B3B99842@oracle.com/

Reported-by: Chuck Lever III <chuck.lever@oracle.com>
Fixes: 9a6944fee68e2 ("tracing: Add a verifier to check string pointers for trace events")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 31 +++++++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 560e4c8d3825..a21ef9cd2aae 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3704,6 +3704,9 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
 		goto print;
 
 	while (*p) {
+		bool star = false;
+		int len = 0;
+
 		j = 0;
 
 		/* We only care about %s and variants */
@@ -3725,13 +3728,17 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
 				/* Need to test cases like %08.*s */
 				for (j = 1; p[i+j]; j++) {
 					if (isdigit(p[i+j]) ||
-					    p[i+j] == '*' ||
 					    p[i+j] == '.')
 						continue;
+					if (p[i+j] == '*') {
+						star = true;
+						continue;
+					}
 					break;
 				}
 				if (p[i+j] == 's')
 					break;
+				star = false;
 			}
 			j = 0;
 		}
@@ -3744,6 +3751,9 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
 		iter->fmt[i] = '\0';
 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
 
+		if (star)
+			len = va_arg(ap, int);
+
 		/* The ap now points to the string data of the %s */
 		str = va_arg(ap, const char *);
 
@@ -3762,8 +3772,18 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
 			int ret;
 
 			/* Try to safely read the string */
-			ret = strncpy_from_kernel_nofault(iter->fmt, str,
-							  iter->fmt_size);
+			if (star) {
+				if (len + 1 > iter->fmt_size)
+					len = iter->fmt_size - 1;
+				if (len < 0)
+					len = 0;
+				ret = copy_from_kernel_nofault(iter->fmt, str, len);
+				iter->fmt[len] = 0;
+				star = false;
+			} else {
+				ret = strncpy_from_kernel_nofault(iter->fmt, str,
+								  iter->fmt_size);
+			}
 			if (ret < 0)
 				trace_seq_printf(&iter->seq, "(0x%px)", str);
 			else
@@ -3775,7 +3795,10 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
 			strncpy(iter->fmt, p + i, j + 1);
 			iter->fmt[j+1] = '\0';
 		}
-		trace_seq_printf(&iter->seq, iter->fmt, str);
+		if (star)
+			trace_seq_printf(&iter->seq, iter->fmt, len, str);
+		else
+			trace_seq_printf(&iter->seq, iter->fmt, str);
 
 		p += i + j + 1;
 	}
-- 
cgit v1.2.3


From ced081a436d21a7d34d4d42acb85058f9cf423f2 Mon Sep 17 00:00:00 2001
From: Luca Stefani <luca.stefani.ge1@gmail.com>
Date: Thu, 6 May 2021 21:37:25 +0200
Subject: binder: Return EFAULT if we fail BINDER_ENABLE_ONEWAY_SPAM_DETECTION

All the other ioctl paths return EFAULT in case the
copy_from_user/copy_to_user call fails, make oneway spam detection
follow the same paradigm.

Fixes: a7dc1e6f99df ("binder: tell userspace to dump current backtrace when detected oneway spamming")
Acked-by: Todd Kjos <tkjos@google.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Luca Stefani <luca.stefani.ge1@gmail.com>
Link: https://lore.kernel.org/r/20210506193726.45118-1-luca.stefani.ge1@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/android/binder.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 61d34e1dc59c..bcec598b89f2 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -4918,7 +4918,7 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		uint32_t enable;
 
 		if (copy_from_user(&enable, ubuf, sizeof(enable))) {
-			ret = -EINVAL;
+			ret = -EFAULT;
 			goto err;
 		}
 		binder_inner_proc_lock(proc);
-- 
cgit v1.2.3


From c699a0db2d62e3bbb7f0bf35c87edbc8d23e3062 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Mon, 10 May 2021 14:49:05 -0400
Subject: dm snapshot: fix crash with transient storage and zero chunk size

The following commands will crash the kernel:

modprobe brd rd_size=1048576
dmsetup create o --table "0 `blockdev --getsize /dev/ram0` snapshot-origin /dev/ram0"
dmsetup create s --table "0 `blockdev --getsize /dev/ram0` snapshot /dev/ram0 /dev/ram1 N 0"

The reason is that when we test for zero chunk size, we jump to the label
bad_read_metadata without setting the "r" variable. The function
snapshot_ctr destroys all the structures and then exits with "r == 0". The
kernel then crashes because it falsely believes that snapshot_ctr
succeeded.

In order to fix the bug, we set the variable "r" to -EINVAL.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-snap.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 2a51ddd840b4..b8e4d31124ea 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1408,6 +1408,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
 	if (!s->store->chunk_size) {
 		ti->error = "Chunk size not set";
+		r = -EINVAL;
 		goto bad_read_metadata;
 	}
 
-- 
cgit v1.2.3


From 640d1eaff2c09e382a23bd831094ebbfaa16fef5 Mon Sep 17 00:00:00 2001
From: Jim Cromie <jim.cromie@gmail.com>
Date: Tue, 4 May 2021 16:22:34 -0600
Subject: dyndbg: avoid calling dyndbg_emit_prefix when it has no work

Wrap function in a static-inline one, which checks flags to avoid
calling the function unnecessarily.

And hoist its output-buffer initialization to the grand-caller, which
is already allocating the buffer on the stack, and can trivially
initialize it too.

Signed-off-by: Jim Cromie <jim.cromie@gmail.com>
Link: https://lore.kernel.org/r/20210504222235.1033685-2-jim.cromie@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/dynamic_debug.h |  5 +++++
 lib/dynamic_debug.c           | 19 ++++++++++++-------
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index a57ee75342cf..dce631e678dd 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -32,6 +32,11 @@ struct _ddebug {
 #define _DPRINTK_FLAGS_INCL_FUNCNAME	(1<<2)
 #define _DPRINTK_FLAGS_INCL_LINENO	(1<<3)
 #define _DPRINTK_FLAGS_INCL_TID		(1<<4)
+
+#define _DPRINTK_FLAGS_INCL_ANY		\
+	(_DPRINTK_FLAGS_INCL_MODNAME | _DPRINTK_FLAGS_INCL_FUNCNAME |\
+	 _DPRINTK_FLAGS_INCL_LINENO  | _DPRINTK_FLAGS_INCL_TID)
+
 #if defined DEBUG
 #define _DPRINTK_FLAGS_DEFAULT _DPRINTK_FLAGS_PRINT
 #else
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 921d0a654243..398920403321 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -586,13 +586,11 @@ static int remaining(int wrote)
 	return 0;
 }
 
-static char *dynamic_emit_prefix(const struct _ddebug *desc, char *buf)
+static char *__dynamic_emit_prefix(const struct _ddebug *desc, char *buf)
 {
 	int pos_after_tid;
 	int pos = 0;
 
-	*buf = '\0';
-
 	if (desc->flags & _DPRINTK_FLAGS_INCL_TID) {
 		if (in_interrupt())
 			pos += snprintf(buf + pos, remaining(pos), "<intr> ");
@@ -618,11 +616,18 @@ static char *dynamic_emit_prefix(const struct _ddebug *desc, char *buf)
 	return buf;
 }
 
+static inline char *dynamic_emit_prefix(struct _ddebug *desc, char *buf)
+{
+	if (unlikely(desc->flags & _DPRINTK_FLAGS_INCL_ANY))
+		return __dynamic_emit_prefix(desc, buf);
+	return buf;
+}
+
 void __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...)
 {
 	va_list args;
 	struct va_format vaf;
-	char buf[PREFIX_SIZE];
+	char buf[PREFIX_SIZE] = "";
 
 	BUG_ON(!descriptor);
 	BUG_ON(!fmt);
@@ -655,7 +660,7 @@ void __dynamic_dev_dbg(struct _ddebug *descriptor,
 	if (!dev) {
 		printk(KERN_DEBUG "(NULL device *): %pV", &vaf);
 	} else {
-		char buf[PREFIX_SIZE];
+		char buf[PREFIX_SIZE] = "";
 
 		dev_printk_emit(LOGLEVEL_DEBUG, dev, "%s%s %s: %pV",
 				dynamic_emit_prefix(descriptor, buf),
@@ -684,7 +689,7 @@ void __dynamic_netdev_dbg(struct _ddebug *descriptor,
 	vaf.va = &args;
 
 	if (dev && dev->dev.parent) {
-		char buf[PREFIX_SIZE];
+		char buf[PREFIX_SIZE] = "";
 
 		dev_printk_emit(LOGLEVEL_DEBUG, dev->dev.parent,
 				"%s%s %s %s%s: %pV",
@@ -720,7 +725,7 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
 	vaf.va = &args;
 
 	if (ibdev && ibdev->dev.parent) {
-		char buf[PREFIX_SIZE];
+		char buf[PREFIX_SIZE] = "";
 
 		dev_printk_emit(LOGLEVEL_DEBUG, ibdev->dev.parent,
 				"%s%s %s %s: %pV",
-- 
cgit v1.2.3


From a3626bcf5fafad0ded410b269e21f37bdaf2baf4 Mon Sep 17 00:00:00 2001
From: Jim Cromie <jim.cromie@gmail.com>
Date: Tue, 4 May 2021 16:22:35 -0600
Subject: dyndbg: drop uninformative vpr_info

Remove a vpr_info which I added in 2012, when I knew even less than now.
In 2020, a simpler pr_fmt stripped it of context, and any remaining value.

no functional change.

Signed-off-by: Jim Cromie <jim.cromie@gmail.com>
Link: https://lore.kernel.org/r/20210504222235.1033685-3-jim.cromie@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/dynamic_debug.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 398920403321..641767b0dce2 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -920,7 +920,6 @@ static const struct seq_operations ddebug_proc_seqops = {
 
 static int ddebug_proc_open(struct inode *inode, struct file *file)
 {
-	vpr_info("called\n");
 	return seq_open_private(file, &ddebug_proc_seqops,
 				sizeof(struct ddebug_iter));
 }
-- 
cgit v1.2.3


From dbae70d452a0858d62915166d93650c98fe6639c Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Wed, 12 May 2021 08:28:43 -0400
Subject: dm integrity: revert to not using discard filler when recalulating

Revert the commit 7a5b96b4784454ba258e83dc7469ddbacd3aaac3 ("dm integrity:
use discard support when recalculating").

There's a bug that when we write some data beyond the current recalculate
boundary, the checksum will be rewritten with the discard filler later.
And the data will no longer have integrity protection. There's no easy
fix for this case.

Also, another problematic case is if dm-integrity is used to detect
bitrot (random device errors, bit flips, etc); dm-integrity should
detect that even for unused sectors. With commit 7a5b96b4784 it can
happen that such change is undetected (because discard filler is not a
valid checksum).

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Acked-by: Milan Broz <gmazyland@gmail.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-integrity.c | 57 ++++++++++++++++++++---------------------------
 1 file changed, 24 insertions(+), 33 deletions(-)

diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 781942aeddd1..6d00e619a141 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -2689,30 +2689,26 @@ next_chunk:
 	if (unlikely(dm_integrity_failed(ic)))
 		goto err;
 
-	if (!ic->discard) {
-		io_req.bi_op = REQ_OP_READ;
-		io_req.bi_op_flags = 0;
-		io_req.mem.type = DM_IO_VMA;
-		io_req.mem.ptr.addr = ic->recalc_buffer;
-		io_req.notify.fn = NULL;
-		io_req.client = ic->io;
-		io_loc.bdev = ic->dev->bdev;
-		io_loc.sector = get_data_sector(ic, area, offset);
-		io_loc.count = n_sectors;
+	io_req.bi_op = REQ_OP_READ;
+	io_req.bi_op_flags = 0;
+	io_req.mem.type = DM_IO_VMA;
+	io_req.mem.ptr.addr = ic->recalc_buffer;
+	io_req.notify.fn = NULL;
+	io_req.client = ic->io;
+	io_loc.bdev = ic->dev->bdev;
+	io_loc.sector = get_data_sector(ic, area, offset);
+	io_loc.count = n_sectors;
 
-		r = dm_io(&io_req, 1, &io_loc, NULL);
-		if (unlikely(r)) {
-			dm_integrity_io_error(ic, "reading data", r);
-			goto err;
-		}
+	r = dm_io(&io_req, 1, &io_loc, NULL);
+	if (unlikely(r)) {
+		dm_integrity_io_error(ic, "reading data", r);
+		goto err;
+	}
 
-		t = ic->recalc_tags;
-		for (i = 0; i < n_sectors; i += ic->sectors_per_block) {
-			integrity_sector_checksum(ic, logical_sector + i, ic->recalc_buffer + (i << SECTOR_SHIFT), t);
-			t += ic->tag_size;
-		}
-	} else {
-		t = ic->recalc_tags + (n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size;
+	t = ic->recalc_tags;
+	for (i = 0; i < n_sectors; i += ic->sectors_per_block) {
+		integrity_sector_checksum(ic, logical_sector + i, ic->recalc_buffer + (i << SECTOR_SHIFT), t);
+		t += ic->tag_size;
 	}
 
 	metadata_block = get_metadata_sector_and_offset(ic, area, offset, &metadata_offset);
@@ -4368,13 +4364,11 @@ try_smaller_buffer:
 			goto bad;
 		}
 		INIT_WORK(&ic->recalc_work, integrity_recalc);
-		if (!ic->discard) {
-			ic->recalc_buffer = vmalloc(RECALC_SECTORS << SECTOR_SHIFT);
-			if (!ic->recalc_buffer) {
-				ti->error = "Cannot allocate buffer for recalculating";
-				r = -ENOMEM;
-				goto bad;
-			}
+		ic->recalc_buffer = vmalloc(RECALC_SECTORS << SECTOR_SHIFT);
+		if (!ic->recalc_buffer) {
+			ti->error = "Cannot allocate buffer for recalculating";
+			r = -ENOMEM;
+			goto bad;
 		}
 		ic->recalc_tags = kvmalloc_array(RECALC_SECTORS >> ic->sb->log2_sectors_per_block,
 						 ic->tag_size, GFP_KERNEL);
@@ -4383,9 +4377,6 @@ try_smaller_buffer:
 			r = -ENOMEM;
 			goto bad;
 		}
-		if (ic->discard)
-			memset(ic->recalc_tags, DISCARD_FILLER,
-			       (RECALC_SECTORS >> ic->sb->log2_sectors_per_block) * ic->tag_size);
 	} else {
 		if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
 			ti->error = "Recalculate can only be specified with internal_hash";
@@ -4579,7 +4570,7 @@ static void dm_integrity_dtr(struct dm_target *ti)
 
 static struct target_type integrity_target = {
 	.name			= "integrity",
-	.version		= {1, 9, 0},
+	.version		= {1, 10, 0},
 	.module			= THIS_MODULE,
 	.features		= DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY,
 	.ctr			= dm_integrity_ctr,
-- 
cgit v1.2.3


From bc8f3d4647a99468d7733039b6bc9234b6e91df4 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Tue, 11 May 2021 11:41:00 -0400
Subject: dm integrity: fix sparse warnings

Use the types __le* instead of __u* to fix sparse warnings.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-integrity.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 6d00e619a141..20f2510db1f6 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -66,14 +66,14 @@ struct superblock {
 	__u8 magic[8];
 	__u8 version;
 	__u8 log2_interleave_sectors;
-	__u16 integrity_tag_size;
-	__u32 journal_sections;
-	__u64 provided_data_sectors;	/* userspace uses this value */
-	__u32 flags;
+	__le16 integrity_tag_size;
+	__le32 journal_sections;
+	__le64 provided_data_sectors;	/* userspace uses this value */
+	__le32 flags;
 	__u8 log2_sectors_per_block;
 	__u8 log2_blocks_per_bitmap_bit;
 	__u8 pad[2];
-	__u64 recalc_sector;
+	__le64 recalc_sector;
 	__u8 pad2[8];
 	__u8 salt[SALT_SIZE];
 };
@@ -86,16 +86,16 @@ struct superblock {
 
 #define	JOURNAL_ENTRY_ROUNDUP		8
 
-typedef __u64 commit_id_t;
+typedef __le64 commit_id_t;
 #define JOURNAL_MAC_PER_SECTOR		8
 
 struct journal_entry {
 	union {
 		struct {
-			__u32 sector_lo;
-			__u32 sector_hi;
+			__le32 sector_lo;
+			__le32 sector_hi;
 		} s;
-		__u64 sector;
+		__le64 sector;
 	} u;
 	commit_id_t last_bytes[];
 	/* __u8 tag[0]; */
@@ -806,7 +806,7 @@ static void section_mac(struct dm_integrity_c *ic, unsigned section, __u8 result
 	}
 
 	if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) {
-		uint64_t section_le;
+		__le64 section_le;
 
 		r = crypto_shash_update(desc, (__u8 *)&ic->sb->salt, SALT_SIZE);
 		if (unlikely(r < 0)) {
@@ -1640,7 +1640,7 @@ static void integrity_end_io(struct bio *bio)
 static void integrity_sector_checksum(struct dm_integrity_c *ic, sector_t sector,
 				      const char *data, char *result)
 {
-	__u64 sector_le = cpu_to_le64(sector);
+	__le64 sector_le = cpu_to_le64(sector);
 	SHASH_DESC_ON_STACK(req, ic->internal_hash);
 	int r;
 	unsigned digest_size;
@@ -3822,7 +3822,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
 			for (i = 0; i < ic->journal_sections; i++) {
 				struct scatterlist sg;
 				struct skcipher_request *section_req;
-				__u32 section_le = cpu_to_le32(i);
+				__le32 section_le = cpu_to_le32(i);
 
 				memset(crypt_iv, 0x00, ivsize);
 				memset(crypt_data, 0x00, crypt_len);
-- 
cgit v1.2.3


From 27b57bb76a897be80494ee11ee4e85326d19383d Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 13 May 2021 21:40:38 +0200
Subject: Revert "Revert "ALSA: usx2y: Fix potential NULL pointer dereference""

This reverts commit 4667a6fc1777ce071504bab570d3599107f4790f.

Takashi writes:
	I have already started working on the bigger cleanup of this driver
	code based on 5.13-rc1, so could you drop this revert?

I missed our previous discussion about this, my fault for applying it.

Reported-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/usb/usx2y/usb_stream.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sound/usb/usx2y/usb_stream.c b/sound/usb/usx2y/usb_stream.c
index 6bba17bf689a..091c071b270a 100644
--- a/sound/usb/usx2y/usb_stream.c
+++ b/sound/usb/usx2y/usb_stream.c
@@ -91,7 +91,12 @@ static int init_urbs(struct usb_stream_kernel *sk, unsigned use_packsize,
 
 	for (u = 0; u < USB_STREAM_NURBS; ++u) {
 		sk->inurb[u] = usb_alloc_urb(sk->n_o_ps, GFP_KERNEL);
+		if (!sk->inurb[u])
+			return -ENOMEM;
+
 		sk->outurb[u] = usb_alloc_urb(sk->n_o_ps, GFP_KERNEL);
+		if (!sk->outurb[u])
+			return -ENOMEM;
 	}
 
 	if (init_pipe_urbs(sk, use_packsize, sk->inurb, indata, dev, in_pipe) ||
-- 
cgit v1.2.3


From a93a0a15876d2a077a3bc260b387d2457a051f24 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Thu, 13 May 2021 09:44:49 +0200
Subject: net: mdio: thunder: Fix a double free issue in the .remove function

'bus->mii_bus' have been allocated with 'devm_mdiobus_alloc_size()' in the
probe function. So it must not be freed explicitly or there will be a
double free.

Remove the incorrect 'mdiobus_free' in the remove function.

Fixes: 379d7ac7ca31 ("phy: mdio-thunder: Add driver for Cavium Thunder SoC MDIO buses.")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Russell King <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mdio/mdio-thunder.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/mdio/mdio-thunder.c b/drivers/net/mdio/mdio-thunder.c
index cb1761693b69..822d2cdd2f35 100644
--- a/drivers/net/mdio/mdio-thunder.c
+++ b/drivers/net/mdio/mdio-thunder.c
@@ -126,7 +126,6 @@ static void thunder_mdiobus_pci_remove(struct pci_dev *pdev)
 			continue;
 
 		mdiobus_unregister(bus->mii_bus);
-		mdiobus_free(bus->mii_bus);
 		oct_mdio_writeq(0, bus->register_base + SMI_EN);
 	}
 	pci_release_regions(pdev);
-- 
cgit v1.2.3


From e1d027dd97e1e750669cdc0d3b016a4f54e473eb Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Thu, 13 May 2021 09:24:55 +0200
Subject: net: mdio: octeon: Fix some double free issues

'bus->mii_bus' has been allocated with 'devm_mdiobus_alloc_size()' in the
probe function. So it must not be freed explicitly or there will be a
double free.

Remove the incorrect 'mdiobus_free' in the error handling path of the
probe function and in remove function.

Suggested-By: Andrew Lunn <andrew@lunn.ch>
Fixes: 35d2aeac9810 ("phy: mdio-octeon: Use devm_mdiobus_alloc_size()")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Russell King <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mdio/mdio-octeon.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/mdio/mdio-octeon.c b/drivers/net/mdio/mdio-octeon.c
index 8ce99c4888e1..e096e68ac667 100644
--- a/drivers/net/mdio/mdio-octeon.c
+++ b/drivers/net/mdio/mdio-octeon.c
@@ -71,7 +71,6 @@ static int octeon_mdiobus_probe(struct platform_device *pdev)
 
 	return 0;
 fail_register:
-	mdiobus_free(bus->mii_bus);
 	smi_en.u64 = 0;
 	oct_mdio_writeq(smi_en.u64, bus->register_base + SMI_EN);
 	return err;
@@ -85,7 +84,6 @@ static int octeon_mdiobus_remove(struct platform_device *pdev)
 	bus = platform_get_drvdata(pdev);
 
 	mdiobus_unregister(bus->mii_bus);
-	mdiobus_free(bus->mii_bus);
 	smi_en.u64 = 0;
 	oct_mdio_writeq(smi_en.u64, bus->register_base + SMI_EN);
 	return 0;
-- 
cgit v1.2.3


From 65e302a9bd57b62872040d57eea1201562a7cbb2 Mon Sep 17 00:00:00 2001
From: Ayush Sawal <ayush.sawal@chelsio.com>
Date: Thu, 13 May 2021 15:11:51 +0530
Subject: cxgb4/ch_ktls: Clear resources when pf4 device is removed

This patch maintain the list of active tids and clear all the active
connection resources when DETACH notification comes.

Fixes: a8c16e8ed624f ("crypto/chcr: move nic TLS functionality to drivers/net")
Signed-off-by: Ayush Sawal <ayush.sawal@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c    |  2 +-
 .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c      | 80 +++++++++++++++++++++-
 .../chelsio/inline_crypto/ch_ktls/chcr_ktls.h      |  2 +
 3 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 6264bc66a4fc..421bd9b88028 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -6480,9 +6480,9 @@ static void cxgb4_ktls_dev_del(struct net_device *netdev,
 
 	adap->uld[CXGB4_ULD_KTLS].tlsdev_ops->tls_dev_del(netdev, tls_ctx,
 							  direction);
-	cxgb4_set_ktls_feature(adap, FW_PARAMS_PARAM_DEV_KTLS_HW_DISABLE);
 
 out_unlock:
+	cxgb4_set_ktls_feature(adap, FW_PARAMS_PARAM_DEV_KTLS_HW_DISABLE);
 	mutex_unlock(&uld_mutex);
 }
 
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
index ef3f1e92632f..59683f79959c 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
@@ -59,6 +59,7 @@ static int chcr_get_nfrags_to_send(struct sk_buff *skb, u32 start, u32 len)
 }
 
 static int chcr_init_tcb_fields(struct chcr_ktls_info *tx_info);
+static void clear_conn_resources(struct chcr_ktls_info *tx_info);
 /*
  * chcr_ktls_save_keys: calculate and save crypto keys.
  * @tx_info - driver specific tls info.
@@ -364,10 +365,14 @@ static void chcr_ktls_dev_del(struct net_device *netdev,
 				chcr_get_ktls_tx_context(tls_ctx);
 	struct chcr_ktls_info *tx_info = tx_ctx->chcr_info;
 	struct ch_ktls_port_stats_debug *port_stats;
+	struct chcr_ktls_uld_ctx *u_ctx;
 
 	if (!tx_info)
 		return;
 
+	u_ctx = tx_info->adap->uld[CXGB4_ULD_KTLS].handle;
+	if (u_ctx && u_ctx->detach)
+		return;
 	/* clear l2t entry */
 	if (tx_info->l2te)
 		cxgb4_l2t_release(tx_info->l2te);
@@ -384,6 +389,8 @@ static void chcr_ktls_dev_del(struct net_device *netdev,
 	if (tx_info->tid != -1) {
 		cxgb4_remove_tid(&tx_info->adap->tids, tx_info->tx_chan,
 				 tx_info->tid, tx_info->ip_family);
+
+		xa_erase(&u_ctx->tid_list, tx_info->tid);
 	}
 
 	port_stats = &tx_info->adap->ch_ktls_stats.ktls_port[tx_info->port_id];
@@ -411,6 +418,7 @@ static int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk,
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct ch_ktls_port_stats_debug *port_stats;
 	struct chcr_ktls_ofld_ctx_tx *tx_ctx;
+	struct chcr_ktls_uld_ctx *u_ctx;
 	struct chcr_ktls_info *tx_info;
 	struct dst_entry *dst;
 	struct adapter *adap;
@@ -425,6 +433,7 @@ static int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk,
 	adap = pi->adapter;
 	port_stats = &adap->ch_ktls_stats.ktls_port[pi->port_id];
 	atomic64_inc(&port_stats->ktls_tx_connection_open);
+	u_ctx = adap->uld[CXGB4_ULD_KTLS].handle;
 
 	if (direction == TLS_OFFLOAD_CTX_DIR_RX) {
 		pr_err("not expecting for RX direction\n");
@@ -434,6 +443,9 @@ static int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk,
 	if (tx_ctx->chcr_info)
 		goto out;
 
+	if (u_ctx && u_ctx->detach)
+		goto out;
+
 	tx_info = kvzalloc(sizeof(*tx_info), GFP_KERNEL);
 	if (!tx_info)
 		goto out;
@@ -569,6 +581,8 @@ free_tid:
 	cxgb4_remove_tid(&tx_info->adap->tids, tx_info->tx_chan,
 			 tx_info->tid, tx_info->ip_family);
 
+	xa_erase(&u_ctx->tid_list, tx_info->tid);
+
 put_module:
 	/* release module refcount */
 	module_put(THIS_MODULE);
@@ -633,8 +647,12 @@ static int chcr_ktls_cpl_act_open_rpl(struct adapter *adap,
 {
 	const struct cpl_act_open_rpl *p = (void *)input;
 	struct chcr_ktls_info *tx_info = NULL;
+	struct chcr_ktls_ofld_ctx_tx *tx_ctx;
+	struct chcr_ktls_uld_ctx *u_ctx;
 	unsigned int atid, tid, status;
+	struct tls_context *tls_ctx;
 	struct tid_info *t;
+	int ret = 0;
 
 	tid = GET_TID(p);
 	status = AOPEN_STATUS_G(ntohl(p->atid_status));
@@ -666,14 +684,29 @@ static int chcr_ktls_cpl_act_open_rpl(struct adapter *adap,
 	if (!status) {
 		tx_info->tid = tid;
 		cxgb4_insert_tid(t, tx_info, tx_info->tid, tx_info->ip_family);
+		/* Adding tid */
+		tls_ctx = tls_get_ctx(tx_info->sk);
+		tx_ctx = chcr_get_ktls_tx_context(tls_ctx);
+		u_ctx = adap->uld[CXGB4_ULD_KTLS].handle;
+		if (u_ctx) {
+			ret = xa_insert_bh(&u_ctx->tid_list, tid, tx_ctx,
+					   GFP_NOWAIT);
+			if (ret < 0) {
+				pr_err("%s: Failed to allocate tid XA entry = %d\n",
+				       __func__, tx_info->tid);
+				tx_info->open_state = CH_KTLS_OPEN_FAILURE;
+				goto out;
+			}
+		}
 		tx_info->open_state = CH_KTLS_OPEN_SUCCESS;
 	} else {
 		tx_info->open_state = CH_KTLS_OPEN_FAILURE;
 	}
+out:
 	spin_unlock(&tx_info->lock);
 
 	complete(&tx_info->completion);
-	return 0;
+	return ret;
 }
 
 /*
@@ -2090,6 +2123,8 @@ static void *chcr_ktls_uld_add(const struct cxgb4_lld_info *lldi)
 		goto out;
 	}
 	u_ctx->lldi = *lldi;
+	u_ctx->detach = false;
+	xa_init_flags(&u_ctx->tid_list, XA_FLAGS_LOCK_BH);
 out:
 	return u_ctx;
 }
@@ -2123,6 +2158,45 @@ static int chcr_ktls_uld_rx_handler(void *handle, const __be64 *rsp,
 	return 0;
 }
 
+static void clear_conn_resources(struct chcr_ktls_info *tx_info)
+{
+	/* clear l2t entry */
+	if (tx_info->l2te)
+		cxgb4_l2t_release(tx_info->l2te);
+
+#if IS_ENABLED(CONFIG_IPV6)
+	/* clear clip entry */
+	if (tx_info->ip_family == AF_INET6)
+		cxgb4_clip_release(tx_info->netdev, (const u32 *)
+				   &tx_info->sk->sk_v6_rcv_saddr,
+				   1);
+#endif
+
+	/* clear tid */
+	if (tx_info->tid != -1)
+		cxgb4_remove_tid(&tx_info->adap->tids, tx_info->tx_chan,
+				 tx_info->tid, tx_info->ip_family);
+}
+
+static void ch_ktls_reset_all_conn(struct chcr_ktls_uld_ctx *u_ctx)
+{
+	struct ch_ktls_port_stats_debug *port_stats;
+	struct chcr_ktls_ofld_ctx_tx *tx_ctx;
+	struct chcr_ktls_info *tx_info;
+	unsigned long index;
+
+	xa_for_each(&u_ctx->tid_list, index, tx_ctx) {
+		tx_info = tx_ctx->chcr_info;
+		clear_conn_resources(tx_info);
+		port_stats = &tx_info->adap->ch_ktls_stats.ktls_port[tx_info->port_id];
+		atomic64_inc(&port_stats->ktls_tx_connection_close);
+		kvfree(tx_info);
+		tx_ctx->chcr_info = NULL;
+		/* release module refcount */
+		module_put(THIS_MODULE);
+	}
+}
+
 static int chcr_ktls_uld_state_change(void *handle, enum cxgb4_state new_state)
 {
 	struct chcr_ktls_uld_ctx *u_ctx = handle;
@@ -2139,7 +2213,10 @@ static int chcr_ktls_uld_state_change(void *handle, enum cxgb4_state new_state)
 	case CXGB4_STATE_DETACH:
 		pr_info("%s: Down\n", pci_name(u_ctx->lldi.pdev));
 		mutex_lock(&dev_mutex);
+		u_ctx->detach = true;
 		list_del(&u_ctx->entry);
+		ch_ktls_reset_all_conn(u_ctx);
+		xa_destroy(&u_ctx->tid_list);
 		mutex_unlock(&dev_mutex);
 		break;
 	default:
@@ -2178,6 +2255,7 @@ static void __exit chcr_ktls_exit(void)
 		adap = pci_get_drvdata(u_ctx->lldi.pdev);
 		memset(&adap->ch_ktls_stats, 0, sizeof(adap->ch_ktls_stats));
 		list_del(&u_ctx->entry);
+		xa_destroy(&u_ctx->tid_list);
 		kfree(u_ctx);
 	}
 	mutex_unlock(&dev_mutex);
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h
index 18b3b1f02415..10572dc55365 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h
@@ -75,6 +75,8 @@ struct chcr_ktls_ofld_ctx_tx {
 struct chcr_ktls_uld_ctx {
 	struct list_head entry;
 	struct cxgb4_lld_info lldi;
+	struct xarray tid_list;
+	bool detach;
 };
 
 static inline struct chcr_ktls_ofld_ctx_tx *
-- 
cgit v1.2.3


From c7d8302478ae645c2e9b59f2cf125641875b7dc2 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Thu, 13 May 2021 12:46:21 +0000
Subject: net: korina: Fix return value check in korina_probe()

In case of error, the function devm_platform_ioremap_resource_byname()
returns ERR_PTR() and never returns NULL. The NULL test in the return
value check should be replaced with IS_ERR().

Fixes: b4cd249a8cc0 ("net: korina: Use devres functions")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/korina.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index 6f987a7ffcb3..b30a45725374 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -1315,23 +1315,23 @@ static int korina_probe(struct platform_device *pdev)
 	lp->tx_irq = platform_get_irq_byname(pdev, "tx");
 
 	p = devm_platform_ioremap_resource_byname(pdev, "emac");
-	if (!p) {
+	if (IS_ERR(p)) {
 		printk(KERN_ERR DRV_NAME ": cannot remap registers\n");
-		return -ENOMEM;
+		return PTR_ERR(p);
 	}
 	lp->eth_regs = p;
 
 	p = devm_platform_ioremap_resource_byname(pdev, "dma_rx");
-	if (!p) {
+	if (IS_ERR(p)) {
 		printk(KERN_ERR DRV_NAME ": cannot remap Rx DMA registers\n");
-		return -ENOMEM;
+		return PTR_ERR(p);
 	}
 	lp->rx_dma_regs = p;
 
 	p = devm_platform_ioremap_resource_byname(pdev, "dma_tx");
-	if (!p) {
+	if (IS_ERR(p)) {
 		printk(KERN_ERR DRV_NAME ": cannot remap Tx DMA registers\n");
-		return -ENOMEM;
+		return PTR_ERR(p);
 	}
 	lp->tx_dma_regs = p;
 
-- 
cgit v1.2.3


From e4df1b0c24350a0f00229ff895a91f1072bd850d Mon Sep 17 00:00:00 2001
From: Tao Liu <thomas.liu@ucloud.cn>
Date: Thu, 13 May 2021 21:08:00 +0800
Subject: openvswitch: meter: fix race when getting now_ms.

We have observed meters working unexpected if traffic is 3+Gbit/s
with multiple connections.

now_ms is not pretected by meter->lock, we may get a negative
long_delta_ms when another cpu updated meter->used, then:
    delta_ms = (u32)long_delta_ms;
which will be a large value.

    band->bucket += delta_ms * band->rate;
then we get a wrong band->bucket.

OpenVswitch userspace datapath has fixed the same issue[1] some
time ago, and we port the implementation to kernel datapath.

[1] https://patchwork.ozlabs.org/project/openvswitch/patch/20191025114436.9746-1-i.maximets@ovn.org/

Fixes: 96fbc13d7e77 ("openvswitch: Add meter infrastructure")
Signed-off-by: Tao Liu <thomas.liu@ucloud.cn>
Suggested-by: Ilya Maximets <i.maximets@ovn.org>
Reviewed-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/meter.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 96b524ceabca..896b8f5bc885 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -611,6 +611,14 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
 	spin_lock(&meter->lock);
 
 	long_delta_ms = (now_ms - meter->used); /* ms */
+	if (long_delta_ms < 0) {
+		/* This condition means that we have several threads fighting
+		 * for a meter lock, and the one who received the packets a
+		 * bit later wins. Assuming that all racing threads received
+		 * packets at the same time to avoid overflow.
+		 */
+		long_delta_ms = 0;
+	}
 
 	/* Make sure delta_ms will not be too large, so that bucket will not
 	 * wrap around below.
-- 
cgit v1.2.3


From d6f67afbdf9df5301641b2ef7ac4030abab3e067 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Date: Mon, 10 May 2021 22:39:38 +0900
Subject: btrfs: return 0 for dev_extent_hole_check_zoned hole_start in case of
 error

Commit 7000babddac6 ("btrfs: assign proper values to a bool variable in
dev_extent_hole_check_zoned") assigned false to the hole_start parameter
of dev_extent_hole_check_zoned().

The hole_start parameter is not boolean and returns the start location of
the found hole.

Fixes: 7000babddac6 ("btrfs: assign proper values to a bool variable in dev_extent_hole_check_zoned")
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 77cdb75acc15..bc53939fef48 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1458,7 +1458,7 @@ static bool dev_extent_hole_check_zoned(struct btrfs_device *device,
 		/* Given hole range was invalid (outside of device) */
 		if (ret == -ERANGE) {
 			*hole_start += *hole_size;
-			*hole_size = false;
+			*hole_size = 0;
 			return true;
 		}
 
-- 
cgit v1.2.3


From 71795ee590111e3636cc3c148289dfa9fa0a5fc3 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Thu, 29 Apr 2021 10:51:34 -0400
Subject: btrfs: avoid RCU stalls while running delayed iputs

Generally a delayed iput is added when we might do the final iput, so
usually we'll end up sleeping while processing the delayed iputs
naturally.  However there's no guarantee of this, especially for small
files.  In production we noticed 5 instances of RCU stalls while testing
a kernel release overnight across 1000 machines, so this is relatively
common:

  host count: 5
  rcu: INFO: rcu_sched self-detected stall on CPU
  rcu: ....: (20998 ticks this GP) idle=59e/1/0x4000000000000002 softirq=12333372/12333372 fqs=3208
   	(t=21031 jiffies g=27810193 q=41075) NMI backtrace for cpu 1
  CPU: 1 PID: 1713 Comm: btrfs-cleaner Kdump: loaded Not tainted 5.6.13-0_fbk12_rc1_5520_gec92bffc1ec9 #1
  Call Trace:
    <IRQ> dump_stack+0x50/0x70
    nmi_cpu_backtrace.cold.6+0x30/0x65
    ? lapic_can_unplug_cpu.cold.30+0x40/0x40
    nmi_trigger_cpumask_backtrace+0xba/0xca
    rcu_dump_cpu_stacks+0x99/0xc7
    rcu_sched_clock_irq.cold.90+0x1b2/0x3a3
    ? trigger_load_balance+0x5c/0x200
    ? tick_sched_do_timer+0x60/0x60
    ? tick_sched_do_timer+0x60/0x60
    update_process_times+0x24/0x50
    tick_sched_timer+0x37/0x70
    __hrtimer_run_queues+0xfe/0x270
    hrtimer_interrupt+0xf4/0x210
    smp_apic_timer_interrupt+0x5e/0x120
    apic_timer_interrupt+0xf/0x20 </IRQ>
   RIP: 0010:queued_spin_lock_slowpath+0x17d/0x1b0
   RSP: 0018:ffffc9000da5fe48 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13
   RAX: 0000000000000000 RBX: ffff889fa81d0cd8 RCX: 0000000000000029
   RDX: ffff889fff86c0c0 RSI: 0000000000080000 RDI: ffff88bfc2da7200
   RBP: ffff888f2dcdd768 R08: 0000000001040000 R09: 0000000000000000
   R10: 0000000000000001 R11: ffffffff82a55560 R12: ffff88bfc2da7200
   R13: 0000000000000000 R14: ffff88bff6c2a360 R15: ffffffff814bd870
   ? kzalloc.constprop.57+0x30/0x30
   list_lru_add+0x5a/0x100
   inode_lru_list_add+0x20/0x40
   iput+0x1c1/0x1f0
   run_delayed_iput_locked+0x46/0x90
   btrfs_run_delayed_iputs+0x3f/0x60
   cleaner_kthread+0xf2/0x120
   kthread+0x10b/0x130

Fix this by adding a cond_resched_lock() to the loop processing delayed
iputs so we can avoid these sort of stalls.

CC: stable@vger.kernel.org # 4.9+
Reviewed-by: Rik van Riel <riel@surriel.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 69fcdf8f0b1c..095e452f59f0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3246,6 +3246,7 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
 		inode = list_first_entry(&fs_info->delayed_iputs,
 				struct btrfs_inode, delayed_iput);
 		run_delayed_iput_locked(fs_info, inode);
+		cond_resched_lock(&fs_info->delayed_iput_lock);
 	}
 	spin_unlock(&fs_info->delayed_iput_lock);
 }
-- 
cgit v1.2.3


From 15c7745c9a0078edad1f7df5a6bb7b80bc8cca23 Mon Sep 17 00:00:00 2001
From: Boris Burkov <boris@bur.io>
Date: Tue, 6 Apr 2021 15:31:18 -0700
Subject: btrfs: return whole extents in fiemap

  `xfs_io -c 'fiemap <off> <len>' <file>`

can give surprising results on btrfs that differ from xfs.

btrfs prints out extents trimmed to fit the user input. If the user's
fiemap request has an offset, then rather than returning each whole
extent which intersects that range, we also trim the start extent to not
have start < off.

Documentation in filesystems/fiemap.txt and the xfs_io man page suggests
that returning the whole extent is expected.

Some cases which all yield the same fiemap in xfs, but not btrfs:
  dd if=/dev/zero of=$f bs=4k count=1
  sudo xfs_io -c 'fiemap 0 1024' $f
    0: [0..7]: 26624..26631
  sudo xfs_io -c 'fiemap 2048 1024' $f
    0: [4..7]: 26628..26631
  sudo xfs_io -c 'fiemap 2048 4096' $f
    0: [4..7]: 26628..26631
  sudo xfs_io -c 'fiemap 3584 512' $f
    0: [7..7]: 26631..26631
  sudo xfs_io -c 'fiemap 4091 5' $f
    0: [7..6]: 26631..26630

I believe this is a consequence of the logic for merging contiguous
extents represented by separate extent items. That logic needs to track
the last offset as it loops through the extent items, which happens to
pick up the start offset on the first iteration, and trim off the
beginning of the full extent. To fix it, start `off` at 0 rather than
`start` so that we keep the iteration/merging intact without cutting off
the start of the extent.

after the fix, all the above commands give:

  0: [0..7]: 26624..26631

The merging logic is exercised by fstest generic/483, and I have written
a new fstest for checking we don't have backwards or zero-length fiemaps
for cases like those above.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Boris Burkov <boris@bur.io>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f2d1bb234377..360d997c7226 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5210,7 +5210,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
 		  u64 start, u64 len)
 {
 	int ret = 0;
-	u64 off = start;
+	u64 off;
 	u64 max = start + len;
 	u32 flags = 0;
 	u32 found_type;
@@ -5245,6 +5245,11 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
 		goto out_free_ulist;
 	}
 
+	/*
+	 * We can't initialize that to 'start' as this could miss extents due
+	 * to extent item merging
+	 */
+	off = 0;
 	start = round_down(start, btrfs_inode_sectorsize(inode));
 	len = round_up(max, btrfs_inode_sectorsize(inode)) - start;
 
-- 
cgit v1.2.3


From 54a40fc3a1da21b52dbf19f72fdc27a2ec740760 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 12 May 2021 16:27:16 +0100
Subject: btrfs: fix removed dentries still existing after log is synced

When we move one inode from one directory to another and both the inode
and its previous parent directory were logged before, we are not supposed
to have the dentry for the old parent if we have a power failure after the
log is synced. Only the new dentry is supposed to exist.

Generally this works correctly, however there is a scenario where this is
not currently working, because the old parent of the file/directory that
was moved is not authoritative for a range that includes the dir index and
dir item keys of the old dentry. This case is better explained with the
following example and reproducer:

  # The test requires a very specific layout of keys and items in the
  # fs/subvolume btree to trigger the bug. So we want to make sure that
  # on whatever platform we are, we have the same leaf/node size.
  #
  # Currently in btrfs the node/leaf size can not be smaller than the page
  # size (but it can be greater than the page size). So use the largest
  # supported node/leaf size (64K).

  $ mkfs.btrfs -f -n 65536 /dev/sdc
  $ mount /dev/sdc /mnt

  # "testdir" is inode 257.
  $ mkdir /mnt/testdir
  $ chmod 755 /mnt/testdir

  # Create several empty files to have the directory "testdir" with its
  # items spread over several leaves (7 in this case).
  $ for ((i = 1; i <= 1200; i++)); do
       echo -n > /mnt/testdir/file$i
    done

  # Create our test directory "dira", inode number 1458, which gets all
  # its items in leaf 7.
  #
  # The BTRFS_DIR_ITEM_KEY item for inode 257 ("testdir") that points to
  # the entry named "dira" is in leaf 2, while the BTRFS_DIR_INDEX_KEY
  # item that points to that entry is in leaf 3.
  #
  # For this particular filesystem node size (64K), file count and file
  # names, we endup with the directory entry items from inode 257 in
  # leaves 2 and 3, as previously mentioned - what matters for triggering
  # the bug exercised by this test case is that those items are not placed
  # in leaf 1, they must be placed in a leaf different from the one
  # containing the inode item for inode 257.
  #
  # The corresponding BTRFS_DIR_ITEM_KEY and BTRFS_DIR_INDEX_KEY items for
  # the parent inode (257) are the following:
  #
  #    item 460 key (257 DIR_ITEM 3724298081) itemoff 48344 itemsize 34
  #         location key (1458 INODE_ITEM 0) type DIR
  #         transid 6 data_len 0 name_len 4
  #         name: dira
  #
  # and:
  #
  #    item 771 key (257 DIR_INDEX 1202) itemoff 36673 itemsize 34
  #         location key (1458 INODE_ITEM 0) type DIR
  #         transid 6 data_len 0 name_len 4
  #         name: dira

  $ mkdir /mnt/testdir/dira

  # Make sure everything done so far is durably persisted.
  $ sync

  # Now do a change to inode 257 ("testdir") that does not result in
  # COWing leaves 2 and 3 - the leaves that contain the directory items
  # pointing to inode 1458 (directory "dira").
  #
  # Changing permissions, the owner/group, updating or adding a xattr,
  # etc, will not change (COW) leaves 2 and 3. So for the sake of
  # simplicity change the permissions of inode 257, which results in
  # updating its inode item and therefore change (COW) only leaf 1.

  $ chmod 700 /mnt/testdir

  # Now fsync directory inode 257.
  #
  # Since only the first leaf was changed/COWed, we log the inode item of
  # inode 257 and only the dentries found in the first leaf, all have a
  # key type of BTRFS_DIR_ITEM_KEY, and no keys of type
  # BTRFS_DIR_INDEX_KEY, because they sort after the former type and none
  # exist in the first leaf.
  #
  # We also log 3 items that represent ranges for dir items and dir
  # indexes for which the log is authoritative:
  #
  # 1) a key of type BTRFS_DIR_LOG_ITEM_KEY, which indicates the log is
  #    authoritative for all BTRFS_DIR_ITEM_KEY keys that have an offset
  #    in the range [0, 2285968570] (the offset here is the crc32c of the
  #    dentry's name). The value 2285968570 corresponds to the offset of
  #    the first key of leaf 2 (which is of type BTRFS_DIR_ITEM_KEY);
  #
  # 2) a key of type BTRFS_DIR_LOG_ITEM_KEY, which indicates the log is
  #    authoritative for all BTRFS_DIR_ITEM_KEY keys that have an offset
  #    in the range [4293818216, (u64)-1] (the offset here is the crc32c
  #    of the dentry's name). The value 4293818216 corresponds to the
  #    offset of the highest key of type BTRFS_DIR_ITEM_KEY plus 1
  #    (4293818215 + 1), which is located in leaf 2;
  #
  # 3) a key of type BTRFS_DIR_LOG_INDEX_KEY, with an offset of 1203,
  #    which indicates the log is authoritative for all keys of type
  #    BTRFS_DIR_INDEX_KEY that have an offset in the range
  #    [1203, (u64)-1]. The value 1203 corresponds to the offset of the
  #    last key of type BTRFS_DIR_INDEX_KEY plus 1 (1202 + 1), which is
  #    located in leaf 3;
  #
  # Also, because "testdir" is a directory and inode 1458 ("dira") is a
  # child directory, we log inode 1458 too.

  $ xfs_io -c "fsync" /mnt/testdir

  # Now move "dira", inode 1458, to be a child of the root directory
  # (inode 256).
  #
  # Because this inode was previously logged, when "testdir" was fsynced,
  # the log is updated so that the old inode reference, referring to inode
  # 257 as the parent, is deleted and the new inode reference, referring
  # to inode 256 as the parent, is added to the log.

  $ mv /mnt/testdir/dira /mnt

  # Now change some file and fsync it. This guarantees the log changes
  # made by the previous move/rename operation are persisted. We do not
  # need to do any special modification to the file, just any change to
  # any file and sync the log.

  $ xfs_io -c "pwrite -S 0xab 0 64K" -c "fsync" /mnt/testdir/file1

  # Simulate a power failure and then mount again the filesystem to
  # replay the log tree. We want to verify that we are able to mount the
  # filesystem, meaning log replay was successful, and that directory
  # inode 1458 ("dira") only has inode 256 (the filesystem's root) as
  # its parent (and no longer a child of inode 257).
  #
  # It used to happen that during log replay we would end up having
  # inode 1458 (directory "dira") with 2 hard links, being a child of
  # inode 257 ("testdir") and inode 256 (the filesystem's root). This
  # resulted in the tree checker detecting the issue and causing the
  # mount operation to fail (with -EIO).
  #
  # This happened because in the log we have the new name/parent for
  # inode 1458, which results in adding the new dentry with inode 256
  # as the parent, but the previous dentry, under inode 257 was never
  # removed - this is because the ranges for dir items and dir indexes
  # of inode 257 for which the log is authoritative do not include the
  # old dir item and dir index for the dentry of inode 257 referring to
  # inode 1458:
  #
  # - for dir items, the log is authoritative for the ranges
  #   [0, 2285968570] and [4293818216, (u64)-1]. The dir item at inode 257
  #   pointing to inode 1458 has a key of (257 DIR_ITEM 3724298081), as
  #   previously mentioned, so the dir item is not deleted when the log
  #   replay procedure processes the authoritative ranges, as 3724298081
  #   is outside both ranges;
  #
  # - for dir indexes, the log is authoritative for the range
  #   [1203, (u64)-1], and the dir index item of inode 257 pointing to
  #   inode 1458 has a key of (257 DIR_INDEX 1202), as previously
  #   mentioned, so the dir index item is not deleted when the log
  #   replay procedure processes the authoritative range.

  <power failure>

  $ mount /dev/sdc /mnt
  mount: /mnt: can't read superblock on /dev/sdc.

  $ dmesg
  (...)
  [87849.840509] BTRFS info (device sdc): start tree-log replay
  [87849.875719] BTRFS critical (device sdc): corrupt leaf: root=5 block=30539776 slot=554 ino=1458, invalid nlink: has 2 expect no more than 1 for dir
  [87849.878084] BTRFS info (device sdc): leaf 30539776 gen 7 total ptrs 557 free space 2092 owner 5
  [87849.879516] BTRFS info (device sdc): refs 1 lock_owner 0 current 2099108
  [87849.880613] 	item 0 key (1181 1 0) itemoff 65275 itemsize 160
  [87849.881544] 		inode generation 6 size 0 mode 100644
  [87849.882692] 	item 1 key (1181 12 257) itemoff 65258 itemsize 17
  (...)
  [87850.562549] 	item 556 key (1458 12 257) itemoff 16017 itemsize 14
  [87850.563349] BTRFS error (device dm-0): block=30539776 write time tree block corruption detected
  [87850.564386] ------------[ cut here ]------------
  [87850.564920] WARNING: CPU: 3 PID: 2099108 at fs/btrfs/disk-io.c:465 csum_one_extent_buffer+0xed/0x100 [btrfs]
  [87850.566129] Modules linked in: btrfs dm_zero dm_snapshot (...)
  [87850.573789] CPU: 3 PID: 2099108 Comm: mount Not tainted 5.12.0-rc8-btrfs-next-86 #1
  (...)
  [87850.587481] Call Trace:
  [87850.587768]  btree_csum_one_bio+0x244/0x2b0 [btrfs]
  [87850.588354]  ? btrfs_bio_fits_in_stripe+0xd8/0x110 [btrfs]
  [87850.589003]  btrfs_submit_metadata_bio+0xb7/0x100 [btrfs]
  [87850.589654]  submit_one_bio+0x61/0x70 [btrfs]
  [87850.590248]  submit_extent_page+0x91/0x2f0 [btrfs]
  [87850.590842]  write_one_eb+0x175/0x440 [btrfs]
  [87850.591370]  ? find_extent_buffer_nolock+0x1c0/0x1c0 [btrfs]
  [87850.592036]  btree_write_cache_pages+0x1e6/0x610 [btrfs]
  [87850.592665]  ? free_debug_processing+0x1d5/0x240
  [87850.593209]  do_writepages+0x43/0xf0
  [87850.593798]  ? __filemap_fdatawrite_range+0xa4/0x100
  [87850.594391]  __filemap_fdatawrite_range+0xc5/0x100
  [87850.595196]  btrfs_write_marked_extents+0x68/0x160 [btrfs]
  [87850.596202]  btrfs_write_and_wait_transaction.isra.0+0x4d/0xd0 [btrfs]
  [87850.597377]  btrfs_commit_transaction+0x794/0xca0 [btrfs]
  [87850.598455]  ? _raw_spin_unlock_irqrestore+0x32/0x60
  [87850.599305]  ? kmem_cache_free+0x15a/0x3d0
  [87850.600029]  btrfs_recover_log_trees+0x346/0x380 [btrfs]
  [87850.601021]  ? replay_one_extent+0x7d0/0x7d0 [btrfs]
  [87850.601988]  open_ctree+0x13c9/0x1698 [btrfs]
  [87850.602846]  btrfs_mount_root.cold+0x13/0xed [btrfs]
  [87850.603771]  ? kmem_cache_alloc_trace+0x7c9/0x930
  [87850.604576]  ? vfs_parse_fs_string+0x5d/0xb0
  [87850.605293]  ? kfree+0x276/0x3f0
  [87850.605857]  legacy_get_tree+0x30/0x50
  [87850.606540]  vfs_get_tree+0x28/0xc0
  [87850.607163]  fc_mount+0xe/0x40
  [87850.607695]  vfs_kern_mount.part.0+0x71/0x90
  [87850.608440]  btrfs_mount+0x13b/0x3e0 [btrfs]
  (...)
  [87850.629477] ---[ end trace 68802022b99a1ea0 ]---
  [87850.630849] BTRFS: error (device sdc) in btrfs_commit_transaction:2381: errno=-5 IO failure (Error while writing out transaction)
  [87850.632422] BTRFS warning (device sdc): Skipping commit of aborted transaction.
  [87850.633416] BTRFS: error (device sdc) in cleanup_transaction:1978: errno=-5 IO failure
  [87850.634553] BTRFS: error (device sdc) in btrfs_replay_log:2431: errno=-5 IO failure (Failed to recover log tree)
  [87850.637529] BTRFS error (device sdc): open_ctree failed

In this example the inode we moved was a directory, so it was easy to
detect the problem because directories can only have one hard link and
the tree checker immediately detects that. If the moved inode was a file,
then the log replay would succeed and we would end up having both the
new hard link (/mnt/foo) and the old hard link (/mnt/testdir/foo) present,
but only the new one should be present.

Fix this by forcing re-logging of the old parent directory when logging
the new name during a rename operation. This ensures we end up with a log
that is authoritative for a range covering the keys for the old dentry,
therefore causing the old dentry do be deleted when replaying the log.

A test case for fstests will follow up soon.

Fixes: 64d6b281ba4db0 ("btrfs: remove unnecessary check_parent_dirs_for_sync()")
CC: stable@vger.kernel.org # 5.12+
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index a0fc3a1390ab..fd6b1f13112e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -6462,6 +6462,24 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
 	    (!old_dir || old_dir->logged_trans < trans->transid))
 		return;
 
+	/*
+	 * If we are doing a rename (old_dir is not NULL) from a directory that
+	 * was previously logged, make sure the next log attempt on the directory
+	 * is not skipped and logs the inode again. This is because the log may
+	 * not currently be authoritative for a range including the old
+	 * BTRFS_DIR_ITEM_KEY and BTRFS_DIR_INDEX_KEY keys, so we want to make
+	 * sure after a log replay we do not end up with both the new and old
+	 * dentries around (in case the inode is a directory we would have a
+	 * directory with two hard links and 2 inode references for different
+	 * parents). The next log attempt of old_dir will happen at
+	 * btrfs_log_all_parents(), called through btrfs_log_inode_parent()
+	 * below, because we have previously set inode->last_unlink_trans to the
+	 * current transaction ID, either here or at btrfs_record_unlink_dir() in
+	 * case inode is a directory.
+	 */
+	if (old_dir)
+		old_dir->logged_trans = 0;
+
 	btrfs_init_log_ctx(&ctx, &inode->vfs_inode);
 	ctx.logging_new_name = true;
 	/*
-- 
cgit v1.2.3


From c07531c01d8284aedaf95708ea90e76d11af0e21 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Mon, 10 May 2021 14:50:24 +0300
Subject: netfilter: flowtable: Remove redundant hw refresh bit

Offloading conns could fail for multiple reasons and a hw refresh bit is
set to try to reoffload it in next sw packet.
But it could be in some cases and future points that the hw refresh bit
is not set but a refresh could succeed.
Remove the hw refresh bit and do offload refresh if requested.
There won't be a new work entry if a work is already pending
anyway as there is the hw pending bit.

Fixes: 8b3646d6e0c4 ("net/sched: act_ct: Support refreshing the flow table entries")
Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_flow_table.h | 1 -
 net/netfilter/nf_flow_table_core.c    | 3 +--
 net/netfilter/nf_flow_table_offload.c | 7 ++++---
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 51d8eb99764d..48ef7460ff30 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -157,7 +157,6 @@ enum nf_flow_flags {
 	NF_FLOW_HW,
 	NF_FLOW_HW_DYING,
 	NF_FLOW_HW_DEAD,
-	NF_FLOW_HW_REFRESH,
 	NF_FLOW_HW_PENDING,
 };
 
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 39c02d1aeedf..1d02650dd715 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -306,8 +306,7 @@ void flow_offload_refresh(struct nf_flowtable *flow_table,
 {
 	flow->timeout = nf_flowtable_time_stamp + NF_FLOW_TIMEOUT;
 
-	if (likely(!nf_flowtable_hw_offload(flow_table) ||
-		   !test_and_clear_bit(NF_FLOW_HW_REFRESH, &flow->flags)))
+	if (likely(!nf_flowtable_hw_offload(flow_table)))
 		return;
 
 	nf_flow_offload_add(flow_table, flow);
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 2af7bdb38407..528b2f172684 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -902,10 +902,11 @@ static void flow_offload_work_add(struct flow_offload_work *offload)
 
 	err = flow_offload_rule_add(offload, flow_rule);
 	if (err < 0)
-		set_bit(NF_FLOW_HW_REFRESH, &offload->flow->flags);
-	else
-		set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
+		goto out;
+
+	set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
 
+out:
 	nf_flow_offload_destroy(flow_rule);
 }
 
-- 
cgit v1.2.3


From f0b3d338064e1fe7531f0d2977e35f3b334abfb4 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Mon, 10 May 2021 07:58:22 +0200
Subject: netfilter: nft_set_pipapo_avx2: Add irq_fpu_usable() check, fallback
 to non-AVX2 version

Arturo reported this backtrace:

[709732.358791] WARNING: CPU: 3 PID: 456 at arch/x86/kernel/fpu/core.c:128 kernel_fpu_begin_mask+0xae/0xe0
[709732.358793] Modules linked in: binfmt_misc nft_nat nft_chain_nat nf_nat nft_counter nft_ct nf_tables nf_conntrack_netlink nfnetlink 8021q garp stp mrp llc vrf intel_rapl_msr intel_rapl_common skx_edac nfit libnvdimm ipmi_ssif x86_pkg_temp_thermal intel_powerclamp coretemp crc32_pclmul mgag200 ghash_clmulni_intel drm_kms_helper cec aesni_intel drm libaes crypto_simd cryptd glue_helper mei_me dell_smbios iTCO_wdt evdev intel_pmc_bxt iTCO_vendor_support dcdbas pcspkr rapl dell_wmi_descriptor wmi_bmof sg i2c_algo_bit watchdog mei acpi_ipmi ipmi_si button nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ipmi_devintf ipmi_msghandler ip_tables x_tables autofs4 ext4 crc16 mbcache jbd2 dm_mod raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor sd_mod t10_pi crc_t10dif crct10dif_generic raid6_pq libcrc32c crc32c_generic raid1 raid0 multipath linear md_mod ahci libahci tg3 libata xhci_pci libphy xhci_hcd ptp usbcore crct10dif_pclmul crct10dif_common bnxt_en crc32c_intel scsi_mod
[709732.358941]  pps_core i2c_i801 lpc_ich i2c_smbus wmi usb_common
[709732.358957] CPU: 3 PID: 456 Comm: jbd2/dm-0-8 Not tainted 5.10.0-0.bpo.5-amd64 #1 Debian 5.10.24-1~bpo10+1
[709732.358959] Hardware name: Dell Inc. PowerEdge R440/04JN2K, BIOS 2.9.3 09/23/2020
[709732.358964] RIP: 0010:kernel_fpu_begin_mask+0xae/0xe0
[709732.358969] Code: ae 54 24 04 83 e3 01 75 38 48 8b 44 24 08 65 48 33 04 25 28 00 00 00 75 33 48 83 c4 10 5b c3 65 8a 05 5e 21 5e 76 84 c0 74 92 <0f> 0b eb 8e f0 80 4f 01 40 48 81 c7 00 14 00 00 e8 dd fb ff ff eb
[709732.358972] RSP: 0018:ffffbb9700304740 EFLAGS: 00010202
[709732.358976] RAX: 0000000000000001 RBX: 0000000000000003 RCX: 0000000000000001
[709732.358979] RDX: ffffbb9700304970 RSI: ffff922fe1952e00 RDI: 0000000000000003
[709732.358981] RBP: ffffbb9700304970 R08: ffff922fc868a600 R09: ffff922fc711e462
[709732.358984] R10: 000000000000005f R11: ffff922ff0b27180 R12: ffffbb9700304960
[709732.358987] R13: ffffbb9700304b08 R14: ffff922fc664b6c8 R15: ffff922fc664b660
[709732.358990] FS:  0000000000000000(0000) GS:ffff92371fec0000(0000) knlGS:0000000000000000
[709732.358993] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[709732.358996] CR2: 0000557a6655bdd0 CR3: 000000026020a001 CR4: 00000000007706e0
[709732.358999] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[709732.359001] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[709732.359003] PKRU: 55555554
[709732.359005] Call Trace:
[709732.359009]  <IRQ>
[709732.359035]  nft_pipapo_avx2_lookup+0x4c/0x1cba [nf_tables]
[709732.359046]  ? sched_clock+0x5/0x10
[709732.359054]  ? sched_clock_cpu+0xc/0xb0
[709732.359061]  ? record_times+0x16/0x80
[709732.359068]  ? plist_add+0xc1/0x100
[709732.359073]  ? psi_group_change+0x47/0x230
[709732.359079]  ? skb_clone+0x4d/0xb0
[709732.359085]  ? enqueue_task_rt+0x22b/0x310
[709732.359098]  ? bnxt_start_xmit+0x1e8/0xaf0 [bnxt_en]
[709732.359102]  ? packet_rcv+0x40/0x4a0
[709732.359121]  nft_lookup_eval+0x59/0x160 [nf_tables]
[709732.359133]  nft_do_chain+0x350/0x500 [nf_tables]
[709732.359152]  ? nft_lookup_eval+0x59/0x160 [nf_tables]
[709732.359163]  ? nft_do_chain+0x364/0x500 [nf_tables]
[709732.359172]  ? fib4_rule_action+0x6d/0x80
[709732.359178]  ? fib_rules_lookup+0x107/0x250
[709732.359184]  nft_nat_do_chain+0x8a/0xf2 [nft_chain_nat]
[709732.359193]  nf_nat_inet_fn+0xea/0x210 [nf_nat]
[709732.359202]  nf_nat_ipv4_out+0x14/0xa0 [nf_nat]
[709732.359207]  nf_hook_slow+0x44/0xc0
[709732.359214]  ip_output+0xd2/0x100
[709732.359221]  ? __ip_finish_output+0x210/0x210
[709732.359226]  ip_forward+0x37d/0x4a0
[709732.359232]  ? ip4_key_hashfn+0xb0/0xb0
[709732.359238]  ip_sublist_rcv_finish+0x4f/0x60
[709732.359243]  ip_sublist_rcv+0x196/0x220
[709732.359250]  ? ip_rcv_finish_core.isra.22+0x400/0x400
[709732.359255]  ip_list_rcv+0x137/0x160
[709732.359264]  __netif_receive_skb_list_core+0x29b/0x2c0
[709732.359272]  netif_receive_skb_list_internal+0x1a6/0x2d0
[709732.359280]  gro_normal_list.part.156+0x19/0x40
[709732.359286]  napi_complete_done+0x67/0x170
[709732.359298]  bnxt_poll+0x105/0x190 [bnxt_en]
[709732.359304]  ? irqentry_exit+0x29/0x30
[709732.359309]  ? asm_common_interrupt+0x1e/0x40
[709732.359315]  net_rx_action+0x144/0x3c0
[709732.359322]  __do_softirq+0xd5/0x29c
[709732.359329]  asm_call_irq_on_stack+0xf/0x20
[709732.359332]  </IRQ>
[709732.359339]  do_softirq_own_stack+0x37/0x40
[709732.359346]  irq_exit_rcu+0x9d/0xa0
[709732.359353]  common_interrupt+0x78/0x130
[709732.359358]  asm_common_interrupt+0x1e/0x40
[709732.359366] RIP: 0010:crc_41+0x0/0x1e [crc32c_intel]
[709732.359370] Code: ff ff f2 4d 0f 38 f1 93 a8 fe ff ff f2 4c 0f 38 f1 81 b0 fe ff ff f2 4c 0f 38 f1 8a b0 fe ff ff f2 4d 0f 38 f1 93 b0 fe ff ff <f2> 4c 0f 38 f1 81 b8 fe ff ff f2 4c 0f 38 f1 8a b8 fe ff ff f2 4d
[709732.359373] RSP: 0018:ffffbb97008dfcd0 EFLAGS: 00000246
[709732.359377] RAX: 000000000000002a RBX: 0000000000000400 RCX: ffff922fc591dd50
[709732.359379] RDX: ffff922fc591dea0 RSI: 0000000000000a14 RDI: ffffffffc00dddc0
[709732.359382] RBP: 0000000000001000 R08: 000000000342d8c3 R09: 0000000000000000
[709732.359384] R10: 0000000000000000 R11: ffff922fc591dff0 R12: ffffbb97008dfe58
[709732.359386] R13: 000000000000000a R14: ffff922fd2b91e80 R15: ffff922fef83fe38
[709732.359395]  ? crc_43+0x1e/0x1e [crc32c_intel]
[709732.359403]  ? crc32c_pcl_intel_update+0x97/0xb0 [crc32c_intel]
[709732.359419]  ? jbd2_journal_commit_transaction+0xaec/0x1a30 [jbd2]
[709732.359425]  ? irq_exit_rcu+0x3e/0xa0
[709732.359447]  ? kjournald2+0xbd/0x270 [jbd2]
[709732.359454]  ? finish_wait+0x80/0x80
[709732.359470]  ? commit_timeout+0x10/0x10 [jbd2]
[709732.359476]  ? kthread+0x116/0x130
[709732.359481]  ? kthread_park+0x80/0x80
[709732.359488]  ? ret_from_fork+0x1f/0x30
[709732.359494] ---[ end trace 081a19978e5f09f5 ]---

that is, nft_pipapo_avx2_lookup() uses the FPU running from a softirq
that interrupted a kthread, also using the FPU.

That's exactly the reason why irq_fpu_usable() is there: use it, and
if we can't use the FPU, fall back to the non-AVX2 version of the
lookup operation, i.e. nft_pipapo_lookup().

Reported-by: Arturo Borrero Gonzalez <arturo@netfilter.org>
Cc: <stable@vger.kernel.org> # 5.6.x
Fixes: 7400b063969b ("nft_set_pipapo: Introduce AVX2-based lookup implementation")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_set_pipapo.c      | 4 ++--
 net/netfilter/nft_set_pipapo.h      | 2 ++
 net/netfilter/nft_set_pipapo_avx2.c | 3 +++
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 528a2d7ca991..dce866d93fee 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -408,8 +408,8 @@ int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst,
  *
  * Return: true on match, false otherwise.
  */
-static bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
-			      const u32 *key, const struct nft_set_ext **ext)
+bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
+		       const u32 *key, const struct nft_set_ext **ext)
 {
 	struct nft_pipapo *priv = nft_set_priv(set);
 	unsigned long *res_map, *fill_map;
diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h
index 25a75591583e..d84afb8fa79a 100644
--- a/net/netfilter/nft_set_pipapo.h
+++ b/net/netfilter/nft_set_pipapo.h
@@ -178,6 +178,8 @@ struct nft_pipapo_elem {
 
 int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst,
 		  union nft_pipapo_map_bucket *mt, bool match_only);
+bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
+		       const u32 *key, const struct nft_set_ext **ext);
 
 /**
  * pipapo_and_field_buckets_4bit() - Intersect 4-bit buckets
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index d65ae0e23028..eabdb8d552ee 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -1131,6 +1131,9 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
 	bool map_index;
 	int i, ret = 0;
 
+	if (unlikely(!irq_fpu_usable()))
+		return nft_pipapo_lookup(net, set, key, ext);
+
 	m = rcu_dereference(priv->match);
 
 	/* This also protects access to all data related to scratch maps */
-- 
cgit v1.2.3


From 8ec7791bae1327b1c279c5cd6e929c3b12daaf0a Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 6 May 2021 14:49:58 +1000
Subject: powerpc/64s: Fix crashes when toggling stf barrier

The STF (store-to-load forwarding) barrier mitigation can be
enabled/disabled at runtime via a debugfs file (stf_barrier), which
causes the kernel to patch itself to enable/disable the relevant
mitigations.

However depending on which mitigation we're using, it may not be safe to
do that patching while other CPUs are active. For example the following
crash:

  User access of kernel address (c00000003fff5af0) - exploit attempt? (uid: 0)
  segfault (11) at c00000003fff5af0 nip 7fff8ad12198 lr 7fff8ad121f8 code 1
  code: 40820128 e93c00d0 e9290058 7c292840 40810058 38600000 4bfd9a81 e8410018
  code: 2c030006 41810154 3860ffb6 e9210098 <e94d8ff0> 7d295279 39400000 40820a3c

Shows that we returned to userspace without restoring the user r13
value, due to executing the partially patched STF exit code.

Fix it by doing the patching under stop machine. The CPUs that aren't
doing the patching will be spinning in the core of the stop machine
logic. That is currently sufficient for our purposes, because none of
the patching we do is to that code or anywhere in the vicinity.

Fixes: a048a07d7f45 ("powerpc/64s: Add support for a store forwarding barrier at kernel entry/exit")
Cc: stable@vger.kernel.org # v4.17+
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210506044959.1298123-1-mpe@ellerman.id.au
---
 arch/powerpc/lib/feature-fixups.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 1fd31b4b0e13..10083add8b33 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -14,6 +14,7 @@
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/sched/mm.h>
+#include <linux/stop_machine.h>
 #include <asm/cputable.h>
 #include <asm/code-patching.h>
 #include <asm/page.h>
@@ -227,11 +228,25 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
 		                                           : "unknown");
 }
 
+static int __do_stf_barrier_fixups(void *data)
+{
+	enum stf_barrier_type *types = data;
+
+	do_stf_entry_barrier_fixups(*types);
+	do_stf_exit_barrier_fixups(*types);
+
+	return 0;
+}
 
 void do_stf_barrier_fixups(enum stf_barrier_type types)
 {
-	do_stf_entry_barrier_fixups(types);
-	do_stf_exit_barrier_fixups(types);
+	/*
+	 * The call to the fallback entry flush, and the fallback/sync-ori exit
+	 * flush can not be safely patched in/out while other CPUs are executing
+	 * them. So call __do_stf_barrier_fixups() on one CPU while all other CPUs
+	 * spin in the stop machine core with interrupts hard disabled.
+	 */
+	stop_machine(__do_stf_barrier_fixups, &types, NULL);
 }
 
 void do_uaccess_flush_fixups(enum l1d_flush_type types)
-- 
cgit v1.2.3


From aec86b052df6541cc97c5fca44e5934cbea4963b Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 6 May 2021 14:49:59 +1000
Subject: powerpc/64s: Fix crashes when toggling entry flush barrier

The entry flush mitigation can be enabled/disabled at runtime via a
debugfs file (entry_flush), which causes the kernel to patch itself to
enable/disable the relevant mitigations.

However depending on which mitigation we're using, it may not be safe to
do that patching while other CPUs are active. For example the following
crash:

  sleeper[15639]: segfault (11) at c000000000004c20 nip c000000000004c20 lr c000000000004c20

Shows that we returned to userspace with a corrupted LR that points into
the kernel, due to executing the partially patched call to the fallback
entry flush (ie. we missed the LR restore).

Fix it by doing the patching under stop machine. The CPUs that aren't
doing the patching will be spinning in the core of the stop machine
logic. That is currently sufficient for our purposes, because none of
the patching we do is to that code or anywhere in the vicinity.

Fixes: f79643787e0a ("powerpc/64s: flush L1D on kernel entry")
Cc: stable@vger.kernel.org # v5.10+
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210506044959.1298123-2-mpe@ellerman.id.au
---
 arch/powerpc/lib/feature-fixups.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 10083add8b33..0aefa6a4a259 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -299,8 +299,9 @@ void do_uaccess_flush_fixups(enum l1d_flush_type types)
 						: "unknown");
 }
 
-void do_entry_flush_fixups(enum l1d_flush_type types)
+static int __do_entry_flush_fixups(void *data)
 {
+	enum l1d_flush_type types = *(enum l1d_flush_type *)data;
 	unsigned int instrs[3], *dest;
 	long *start, *end;
 	int i;
@@ -369,6 +370,19 @@ void do_entry_flush_fixups(enum l1d_flush_type types)
 							: "ori type" :
 		(types &  L1D_FLUSH_MTTRIG)     ? "mttrig type"
 						: "unknown");
+
+	return 0;
+}
+
+void do_entry_flush_fixups(enum l1d_flush_type types)
+{
+	/*
+	 * The call to the fallback flush can not be safely patched in/out while
+	 * other CPUs are executing it. So call __do_entry_flush_fixups() on one
+	 * CPU while all other CPUs spin in the stop machine core with interrupts
+	 * hard disabled.
+	 */
+	stop_machine(__do_entry_flush_fixups, &types, NULL);
 }
 
 void do_rfi_flush_fixups(enum l1d_flush_type types)
-- 
cgit v1.2.3


From 49b39ec248af863781a13aa6d81c5f69a2928094 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Fri, 14 May 2021 00:07:59 +1000
Subject: powerpc/64s: Fix entry flush patching w/strict RWX & hash

The entry flush mitigation can be enabled/disabled at runtime. When this
happens it results in the kernel patching its own instructions to
enable/disable the mitigation sequence.

With strict kernel RWX enabled instruction patching happens via a
secondary mapping of the kernel text, so that we don't have to make the
primary mapping writable. With the hash MMU this leads to a hash fault,
which causes us to execute the exception entry which contains the entry
flush mitigation.

This means we end up executing the entry flush in a semi-patched state,
ie. after we have patched the first instruction but before we patch the
second or third instruction of the sequence.

On machines with updated firmware the entry flush is a series of special
nops, and it's safe to to execute in a semi-patched state.

However when using the fallback flush the sequence is mflr/branch/mtlr,
and so it's not safe to execute if we have patched out the mflr but not
the other two instructions. Doing so leads to us corrputing LR, leading
to an oops, for example:

  # echo 0 > /sys/kernel/debug/powerpc/entry_flush
  kernel tried to execute exec-protected page (c000000002971000) - exploit attempt? (uid: 0)
  BUG: Unable to handle kernel instruction fetch
  Faulting instruction address: 0xc000000002971000
  Oops: Kernel access of bad area, sig: 11 [#1]
  LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
  CPU: 0 PID: 2215 Comm: bash Not tainted 5.13.0-rc1-00010-gda3bb206c9ce #1
  NIP:  c000000002971000 LR: c000000002971000 CTR: c000000000120c40
  REGS: c000000013243840 TRAP: 0400   Not tainted  (5.13.0-rc1-00010-gda3bb206c9ce)
  MSR:  8000000010009033 <SF,EE,ME,IR,DR,RI,LE>  CR: 48428482  XER: 00000000
  ...
  NIP  0xc000000002971000
  LR   0xc000000002971000
  Call Trace:
    do_patch_instruction+0xc4/0x340 (unreliable)
    do_entry_flush_fixups+0x100/0x3b0
    entry_flush_set+0x50/0xe0
    simple_attr_write+0x160/0x1a0
    full_proxy_write+0x8c/0x110
    vfs_write+0xf0/0x340
    ksys_write+0x84/0x140
    system_call_exception+0x164/0x2d0
    system_call_common+0xec/0x278

The simplest fix is to change the order in which we patch the
instructions, so that the sequence is always safe to execute. For the
non-fallback flushes it doesn't matter what order we patch in.

Fixes: bd573a81312f ("powerpc/mm/64s: Allow STRICT_KERNEL_RWX again")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210513140800.1391706-1-mpe@ellerman.id.au
---
 arch/powerpc/lib/feature-fixups.c | 59 ++++++++++++++++++++++++++++-----------
 1 file changed, 43 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 0aefa6a4a259..5d12e37fa8bf 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -325,6 +325,31 @@ static int __do_entry_flush_fixups(void *data)
 	if (types & L1D_FLUSH_MTTRIG)
 		instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
 
+	/*
+	 * If we're patching in or out the fallback flush we need to be careful about the
+	 * order in which we patch instructions. That's because it's possible we could
+	 * take a page fault after patching one instruction, so the sequence of
+	 * instructions must be safe even in a half patched state.
+	 *
+	 * To make that work, when patching in the fallback flush we patch in this order:
+	 *  - the mflr		(dest)
+	 *  - the mtlr		(dest + 2)
+	 *  - the branch	(dest + 1)
+	 *
+	 * That ensures the sequence is safe to execute at any point. In contrast if we
+	 * patch the mtlr last, it's possible we could return from the branch and not
+	 * restore LR, leading to a crash later.
+	 *
+	 * When patching out the fallback flush (either with nops or another flush type),
+	 * we patch in this order:
+	 *  - the branch	(dest + 1)
+	 *  - the mtlr		(dest + 2)
+	 *  - the mflr		(dest)
+	 *
+	 * Note we are protected by stop_machine() from other CPUs executing the code in a
+	 * semi-patched state.
+	 */
+
 	start = PTRRELOC(&__start___entry_flush_fixup);
 	end = PTRRELOC(&__stop___entry_flush_fixup);
 	for (i = 0; start < end; start++, i++) {
@@ -332,15 +357,16 @@ static int __do_entry_flush_fixups(void *data)
 
 		pr_devel("patching dest %lx\n", (unsigned long)dest);
 
-		patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
-
-		if (types == L1D_FLUSH_FALLBACK)
-			patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&entry_flush_fallback,
-				     BRANCH_SET_LINK);
-		else
+		if (types == L1D_FLUSH_FALLBACK) {
+			patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+			patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+			patch_branch((struct ppc_inst *)(dest + 1),
+				     (unsigned long)&entry_flush_fallback, BRANCH_SET_LINK);
+		} else {
 			patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
-
-		patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+			patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+			patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+		}
 	}
 
 	start = PTRRELOC(&__start___scv_entry_flush_fixup);
@@ -350,15 +376,16 @@ static int __do_entry_flush_fixups(void *data)
 
 		pr_devel("patching dest %lx\n", (unsigned long)dest);
 
-		patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
-
-		if (types == L1D_FLUSH_FALLBACK)
-			patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&scv_entry_flush_fallback,
-				     BRANCH_SET_LINK);
-		else
+		if (types == L1D_FLUSH_FALLBACK) {
+			patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+			patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+			patch_branch((struct ppc_inst *)(dest + 1),
+				     (unsigned long)&scv_entry_flush_fallback, BRANCH_SET_LINK);
+		} else {
 			patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
-
-		patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+			patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+			patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+		}
 	}
 
 
-- 
cgit v1.2.3


From 5b48ba2fbd77bc68feebd336ffad5ff166782bde Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Fri, 14 May 2021 00:08:00 +1000
Subject: powerpc/64s: Fix stf mitigation patching w/strict RWX & hash

The stf entry barrier fallback is unsafe to execute in a semi-patched
state, which can happen when enabling/disabling the mitigation with
strict kernel RWX enabled and using the hash MMU.

See the previous commit for more details.

Fix it by changing the order in which we patch the instructions.

Note the stf barrier fallback is only used on Power6 or earlier.

Fixes: bd573a81312f ("powerpc/mm/64s: Allow STRICT_KERNEL_RWX again")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210513140800.1391706-2-mpe@ellerman.id.au
---
 arch/powerpc/lib/feature-fixups.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 5d12e37fa8bf..fe26f2fa0f3f 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -150,17 +150,17 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
 
 		pr_devel("patching dest %lx\n", (unsigned long)dest);
 
-		patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
-
-		if (types & STF_BARRIER_FALLBACK)
+		// See comment in do_entry_flush_fixups() RE order of patching
+		if (types & STF_BARRIER_FALLBACK) {
+			patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+			patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
 			patch_branch((struct ppc_inst *)(dest + 1),
-				     (unsigned long)&stf_barrier_fallback,
-				     BRANCH_SET_LINK);
-		else
-			patch_instruction((struct ppc_inst *)(dest + 1),
-					  ppc_inst(instrs[1]));
-
-		patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+				     (unsigned long)&stf_barrier_fallback, BRANCH_SET_LINK);
+		} else {
+			patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
+			patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+			patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+		}
 	}
 
 	printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i,
-- 
cgit v1.2.3


From 4ec5feec1ad029bdf7d49bc50ccc0c195eeabe93 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Mon, 3 May 2021 21:17:08 +1000
Subject: powerpc/64s: Make NMI record implicitly soft-masked code as irqs
 disabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

scv support introduced the notion of code that implicitly soft-masks
irqs due to the instruction addresses. This is required because scv
enters the kernel with MSR[EE]=1.

If a NMI (including soft-NMI) interrupt hits when we are implicitly
soft-masked then its regs->softe does not reflect this because it is
derived from the explicit soft mask state (paca->irq_soft_mask). This
makes arch_irq_disabled_regs(regs) return false.

This can trigger a warning in the soft-NMI watchdog code (shown below).
Fix it by having NMI interrupts set regs->softe to disabled in case of
interrupting an implicit soft-masked region.

  ------------[ cut here ]------------
  WARNING: CPU: 41 PID: 1103 at arch/powerpc/kernel/watchdog.c:259 soft_nmi_interrupt+0x3e4/0x5f0
  CPU: 41 PID: 1103 Comm: (spawn) Not tainted
  NIP:  c000000000039534 LR: c000000000039234 CTR: c000000000009a00
  REGS: c000007fffbcf940 TRAP: 0700   Not tainted
  MSR:  9000000000021033 <SF,HV,ME,IR,DR,RI,LE>  CR: 22042482  XER: 200400ad
  CFAR: c000000000039260 IRQMASK: 3
  GPR00: c000000000039204 c000007fffbcfbe0 c000000001d6c300 0000000000000003
  GPR04: 00007ffffa45d078 0000000000000000 0000000000000008 0000000000000020
  GPR08: 0000007ffd4e0000 0000000000000000 c000007ffffceb00 7265677368657265
  GPR12: 9000000000009033 c000007ffffceb00 00000f7075bf4480 000000000000002a
  GPR16: 00000f705745a528 00007ffffa45ddd8 00000f70574d0008 0000000000000000
  GPR20: 00000f7075c58d70 00000f7057459c38 0000000000000001 0000000000000040
  GPR24: 0000000000000000 0000000000000029 c000000001dae058 0000000000000029
  GPR28: 0000000000000000 0000000000000800 0000000000000009 c000007fffbcfd60
  NIP [c000000000039534] soft_nmi_interrupt+0x3e4/0x5f0
  LR [c000000000039234] soft_nmi_interrupt+0xe4/0x5f0
  Call Trace:
  [c000007fffbcfbe0] [c000000000039204] soft_nmi_interrupt+0xb4/0x5f0 (unreliable)
  [c000007fffbcfcf0] [c00000000000c0e8] soft_nmi_common+0x138/0x1c4
  --- interrupt: 900 at end_real_trampolines+0x0/0x1000
  NIP:  c000000000003000 LR: 00007ca426adb03c CTR: 900000000280f033
  REGS: c000007fffbcfd60 TRAP: 0900
  MSR:  9000000000009033 <SF,HV,EE,ME,IR,DR,RI,LE>  CR: 44042482  XER: 200400ad
  CFAR: 00007ca426946020 IRQMASK: 0
  GPR00: 00000000000000ad 00007ffffa45d050 00007ca426b07f00 0000000000000035
  GPR04: 00007ffffa45d078 0000000000000000 0000000000000008 0000000000000020
  GPR08: 0000000000000000 0000000000100000 0000000010000000 00007ffffa45d110
  GPR12: 0000000000000001 00007ca426d4e680 00000f7075bf4480 000000000000002a
  GPR16: 00000f705745a528 00007ffffa45ddd8 00000f70574d0008 0000000000000000
  GPR20: 00000f7075c58d70 00000f7057459c38 0000000000000001 0000000000000040
  GPR24: 0000000000000000 00000f7057473f68 0000000000000003 000000000000041b
  GPR28: 00007ffffa45d4c4 0000000000000035 0000000000000000 00000f7057473f68
  NIP [c000000000003000] end_real_trampolines+0x0/0x1000
  LR [00007ca426adb03c] 0x7ca426adb03c
  --- interrupt: 900
  Instruction dump:
  60000000 60000000 60420000 38600001 482b3ae5 60000000 e93f0138 a36d0008
  7daa6b78 71290001 7f7907b4 4082fd34 <0fe00000> 4bfffd2c 60420000 ea6100a8
  ---[ end trace dc75f67d819779da ]---

Fixes: 118178e62e2e ("powerpc: move NMI entry/exit code into wrapper")
Reported-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210503111708.758261-1-npiggin@gmail.com
---
 arch/powerpc/include/asm/interrupt.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index c77e8f57ff06..59f704408d65 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -220,6 +220,13 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte
 	local_paca->irq_soft_mask = IRQS_ALL_DISABLED;
 	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
 
+	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !(regs->msr & MSR_PR) &&
+				regs->nip < (unsigned long)__end_interrupts) {
+		// Kernel code running below __end_interrupts is
+		// implicitly soft-masked.
+		regs->softe = IRQS_ALL_DISABLED;
+	}
+
 	/* Don't do any per-CPU operations until interrupt state is fixed */
 
 	if (nmi_disables_ftrace(regs)) {
-- 
cgit v1.2.3


From c6ac667b07996929835b512de0e9a988977e6abc Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 14 May 2021 14:40:08 +1000
Subject: powerpc/64e/interrupt: Fix nvgprs being clobbered

Some interrupt handlers have an "extra" that saves 1 or 2
registers (r14, r15) in the paca save area and makes them available to
use by the handler.

The change to always save nvgprs in exception handlers lead to some
interrupt handlers saving those scratch r14 / r15 registers into the
interrupt frame's GPR saves, which get restored on interrupt exit.

Fix this by always reloading those scratch registers from paca before
the EXCEPTION_COMMON that saves nvgprs.

Fixes: 4228b2c3d20e ("powerpc/64e/interrupt: always save nvgprs on interrupt")
Reported-by: Christian Zigotzky <chzigotzky@xenosoft.de>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Tested-by: Christian Zigotzky <chzigotzky@xenosoft.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210514044008.1955783-1-npiggin@gmail.com
---
 arch/powerpc/kernel/exceptions-64e.S | 38 +++++++++++++++++++++++-------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 7c3654b0d0f4..f1ae710274bc 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -340,6 +340,12 @@ ret_from_mc_except:
 	andi.	r10,r10,IRQS_DISABLED;	/* yes -> go out of line */ \
 	bne	masked_interrupt_book3e_##n
 
+/*
+ * Additional regs must be re-loaded from paca before EXCEPTION_COMMON* is
+ * called, because that does SAVE_NVGPRS which must see the original register
+ * values, otherwise the scratch values might be restored when exiting the
+ * interrupt.
+ */
 #define PROLOG_ADDITION_2REGS_GEN(n)					    \
 	std	r14,PACA_EXGEN+EX_R14(r13);				    \
 	std	r15,PACA_EXGEN+EX_R15(r13)
@@ -535,6 +541,10 @@ __end_interrupts:
 				PROLOG_ADDITION_2REGS)
 	mfspr	r14,SPRN_DEAR
 	mfspr	r15,SPRN_ESR
+	std	r14,_DAR(r1)
+	std	r15,_DSISR(r1)
+	ld	r14,PACA_EXGEN+EX_R14(r13)
+	ld	r15,PACA_EXGEN+EX_R15(r13)
 	EXCEPTION_COMMON(0x300)
 	b	storage_fault_common
 
@@ -544,6 +554,10 @@ __end_interrupts:
 				PROLOG_ADDITION_2REGS)
 	li	r15,0
 	mr	r14,r10
+	std	r14,_DAR(r1)
+	std	r15,_DSISR(r1)
+	ld	r14,PACA_EXGEN+EX_R14(r13)
+	ld	r15,PACA_EXGEN+EX_R15(r13)
 	EXCEPTION_COMMON(0x400)
 	b	storage_fault_common
 
@@ -557,6 +571,10 @@ __end_interrupts:
 				PROLOG_ADDITION_2REGS)
 	mfspr	r14,SPRN_DEAR
 	mfspr	r15,SPRN_ESR
+	std	r14,_DAR(r1)
+	std	r15,_DSISR(r1)
+	ld	r14,PACA_EXGEN+EX_R14(r13)
+	ld	r15,PACA_EXGEN+EX_R15(r13)
 	EXCEPTION_COMMON(0x600)
 	b	alignment_more	/* no room, go out of line */
 
@@ -565,10 +583,10 @@ __end_interrupts:
 	NORMAL_EXCEPTION_PROLOG(0x700, BOOKE_INTERRUPT_PROGRAM,
 				PROLOG_ADDITION_1REG)
 	mfspr	r14,SPRN_ESR
-	EXCEPTION_COMMON(0x700)
 	std	r14,_DSISR(r1)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
 	ld	r14,PACA_EXGEN+EX_R14(r13)
+	EXCEPTION_COMMON(0x700)
+	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	program_check_exception
 	REST_NVGPRS(r1)
 	b	interrupt_return
@@ -725,11 +743,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	 * normal exception
 	 */
 	mfspr	r14,SPRN_DBSR
-	EXCEPTION_COMMON_CRIT(0xd00)
 	std	r14,_DSISR(r1)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
 	ld	r14,PACA_EXCRIT+EX_R14(r13)
 	ld	r15,PACA_EXCRIT+EX_R15(r13)
+	EXCEPTION_COMMON_CRIT(0xd00)
+	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	DebugException
 	REST_NVGPRS(r1)
 	b	interrupt_return
@@ -796,11 +814,11 @@ kernel_dbg_exc:
 	 * normal exception
 	 */
 	mfspr	r14,SPRN_DBSR
-	EXCEPTION_COMMON_DBG(0xd08)
 	std	r14,_DSISR(r1)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
 	ld	r14,PACA_EXDBG+EX_R14(r13)
 	ld	r15,PACA_EXDBG+EX_R15(r13)
+	EXCEPTION_COMMON_DBG(0xd08)
+	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	DebugException
 	REST_NVGPRS(r1)
 	b	interrupt_return
@@ -931,11 +949,7 @@ masked_interrupt_book3e_0x2c0:
  * original values stashed away in the PACA
  */
 storage_fault_common:
-	std	r14,_DAR(r1)
-	std	r15,_DSISR(r1)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	ld	r14,PACA_EXGEN+EX_R14(r13)
-	ld	r15,PACA_EXGEN+EX_R15(r13)
 	bl	do_page_fault
 	b	interrupt_return
 
@@ -944,11 +958,7 @@ storage_fault_common:
  * continues here.
  */
 alignment_more:
-	std	r14,_DAR(r1)
-	std	r15,_DSISR(r1)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	ld	r14,PACA_EXGEN+EX_R14(r13)
-	ld	r15,PACA_EXGEN+EX_R15(r13)
 	bl	alignment_exception
 	REST_NVGPRS(r1)
 	b	interrupt_return
-- 
cgit v1.2.3


From 1b6604896e78969baffc1b6cc6bc175f95929ac4 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Thu, 13 May 2021 21:56:48 +0900
Subject: ALSA: dice: fix stream format at middle sampling rate for Alesis iO
 26

Alesis iO 26 FireWire has two pairs of digital optical interface. It
delivers PCM frames from the interfaces by second isochronous packet
streaming. Although both of the interfaces are available at 44.1/48.0
kHz, first one of them is only available at 88.2/96.0 kHz. It reduces
the number of PCM samples to 4 in Multi Bit Linear Audio data channel
of data blocks on the second isochronous packet streaming.

This commit fixes hardcoded stream formats.

Cc: <stable@vger.kernel.org>
Fixes: 28b208f600a3 ("ALSA: dice: add parameters of stream formats for models produced by Alesis")
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Link: https://lore.kernel.org/r/20210513125652.110249-2-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/firewire/dice/dice-alesis.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/firewire/dice/dice-alesis.c b/sound/firewire/dice/dice-alesis.c
index 0916864511d5..27c13b9cc9ef 100644
--- a/sound/firewire/dice/dice-alesis.c
+++ b/sound/firewire/dice/dice-alesis.c
@@ -16,7 +16,7 @@ alesis_io14_tx_pcm_chs[MAX_STREAMS][SND_DICE_RATE_MODE_COUNT] = {
 static const unsigned int
 alesis_io26_tx_pcm_chs[MAX_STREAMS][SND_DICE_RATE_MODE_COUNT] = {
 	{10, 10, 4},	/* Tx0 = Analog + S/PDIF. */
-	{16, 8, 0},	/* Tx1 = ADAT1 + ADAT2. */
+	{16, 4, 0},	/* Tx1 = ADAT1 + ADAT2 (available at low rate). */
 };
 
 int snd_dice_detect_alesis_formats(struct snd_dice *dice)
-- 
cgit v1.2.3


From 0edabdfe89581669609eaac5f6a8d0ae6fe95e7f Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Thu, 13 May 2021 21:56:49 +0900
Subject: ALSA: bebob/oxfw: fix Kconfig entry for Mackie d.2 Pro

Mackie d.2 has an extension card for IEEE 1394 communication, which uses
BridgeCo DM1000 ASIC. On the other hand, Mackie d.4 Pro has built-in
function for IEEE 1394 communication by Oxford Semiconductor OXFW971,
according to schematic diagram available in Mackie website. Although I
misunderstood that Mackie d.2 Pro would be also a model with OXFW971,
it's wrong. Mackie d.2 Pro is a model which includes the extension card
as factory settings.

This commit fixes entries in Kconfig and comment in ALSA OXFW driver.

Cc: <stable@vger.kernel.org>
Fixes: fd6f4b0dc167 ("ALSA: bebob: Add skelton for BeBoB based devices")
Fixes: ec4dba5053e1 ("ALSA: oxfw: Add support for Behringer/Mackie devices")
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Link: https://lore.kernel.org/r/20210513125652.110249-3-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/firewire/Kconfig       | 4 ++--
 sound/firewire/bebob/bebob.c | 2 +-
 sound/firewire/oxfw/oxfw.c   | 1 -
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/sound/firewire/Kconfig b/sound/firewire/Kconfig
index 25778765cbfe..9897bd26a438 100644
--- a/sound/firewire/Kconfig
+++ b/sound/firewire/Kconfig
@@ -38,7 +38,7 @@ config SND_OXFW
 	   * Mackie(Loud) Onyx 1640i (former model)
 	   * Mackie(Loud) Onyx Satellite
 	   * Mackie(Loud) Tapco Link.Firewire
-	   * Mackie(Loud) d.2 pro/d.4 pro
+	   * Mackie(Loud) d.4 pro
 	   * Mackie(Loud) U.420/U.420d
 	   * TASCAM FireOne
 	   * Stanton Controllers & Systems 1 Deck/Mixer
@@ -84,7 +84,7 @@ config SND_BEBOB
 	  * PreSonus FIREBOX/FIREPOD/FP10/Inspire1394
 	  * BridgeCo RDAudio1/Audio5
 	  * Mackie Onyx 1220/1620/1640 (FireWire I/O Card)
-	  * Mackie d.2 (FireWire Option)
+	  * Mackie d.2 (FireWire Option) and d.2 Pro
 	  * Stanton FinalScratch 2 (ScratchAmp)
 	  * Tascam IF-FW/DM
 	  * Behringer XENIX UFX 1204/1604
diff --git a/sound/firewire/bebob/bebob.c b/sound/firewire/bebob/bebob.c
index 2c8e3392a490..daeecfa8b9aa 100644
--- a/sound/firewire/bebob/bebob.c
+++ b/sound/firewire/bebob/bebob.c
@@ -387,7 +387,7 @@ static const struct ieee1394_device_id bebob_id_table[] = {
 	SND_BEBOB_DEV_ENTRY(VEN_BRIDGECO, 0x00010049, &spec_normal),
 	/* Mackie, Onyx 1220/1620/1640 (Firewire I/O Card) */
 	SND_BEBOB_DEV_ENTRY(VEN_MACKIE2, 0x00010065, &spec_normal),
-	/* Mackie, d.2 (Firewire Option) */
+	// Mackie, d.2 (Firewire option card) and d.2 Pro (the card is built-in).
 	SND_BEBOB_DEV_ENTRY(VEN_MACKIE1, 0x00010067, &spec_normal),
 	/* Stanton, ScratchAmp */
 	SND_BEBOB_DEV_ENTRY(VEN_STANTON, 0x00000001, &spec_normal),
diff --git a/sound/firewire/oxfw/oxfw.c b/sound/firewire/oxfw/oxfw.c
index 1f1e3236efb8..9eea25c46dc7 100644
--- a/sound/firewire/oxfw/oxfw.c
+++ b/sound/firewire/oxfw/oxfw.c
@@ -355,7 +355,6 @@ static const struct ieee1394_device_id oxfw_id_table[] = {
 	 *  Onyx-i series (former models):	0x081216
 	 *  Mackie Onyx Satellite:		0x00200f
 	 *  Tapco LINK.firewire 4x6:		0x000460
-	 *  d.2 pro:				Unknown
 	 *  d.4 pro:				Unknown
 	 *  U.420:				Unknown
 	 *  U.420d:				Unknown
-- 
cgit v1.2.3


From 395f41e2cdac63e7581fb9574e5ac0f02556e34a Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Thu, 13 May 2021 21:56:50 +0900
Subject: ALSA: firewire-lib: fix check for the size of isochronous packet
 payload

The check for size of isochronous packet payload just cares of the size of
IR context payload without the size of CIP header.

Cc: <stable@vger.kernel.org>
Fixes: f11453c7cc01 ("ALSA: firewire-lib: use 16 bytes IR context header to separate CIP header")
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Link: https://lore.kernel.org/r/20210513125652.110249-4-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/firewire/amdtp-stream.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c
index 4e2f2bb7879f..b53971bf4b90 100644
--- a/sound/firewire/amdtp-stream.c
+++ b/sound/firewire/amdtp-stream.c
@@ -633,18 +633,24 @@ static int parse_ir_ctx_header(struct amdtp_stream *s, unsigned int cycle,
 			       unsigned int *syt, unsigned int index)
 {
 	const __be32 *cip_header;
+	unsigned int cip_header_size;
 	int err;
 
 	*payload_length = be32_to_cpu(ctx_header[0]) >> ISO_DATA_LENGTH_SHIFT;
-	if (*payload_length > s->ctx_data.tx.ctx_header_size +
-					s->ctx_data.tx.max_ctx_payload_length) {
+
+	if (!(s->flags & CIP_NO_HEADER))
+		cip_header_size = 8;
+	else
+		cip_header_size = 0;
+
+	if (*payload_length > cip_header_size + s->ctx_data.tx.max_ctx_payload_length) {
 		dev_err(&s->unit->device,
 			"Detect jumbo payload: %04x %04x\n",
-			*payload_length, s->ctx_data.tx.max_ctx_payload_length);
+			*payload_length, cip_header_size + s->ctx_data.tx.max_ctx_payload_length);
 		return -EIO;
 	}
 
-	if (!(s->flags & CIP_NO_HEADER)) {
+	if (cip_header_size > 0) {
 		cip_header = ctx_header + 2;
 		err = check_cip_header(s, cip_header, *payload_length,
 				       data_blocks, data_block_counter, syt);
-- 
cgit v1.2.3


From 1be4f21d9984fa9835fae5411a29465dc5aece6f Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Thu, 13 May 2021 21:56:51 +0900
Subject: ALSA: firewire-lib: fix calculation for size of IR context payload

The quadlets for CIP header is handled as a part of IR context header,
thus it doesn't join in IR context payload. However current calculation
includes the quadlets in IR context payload.

Cc: <stable@vger.kernel.org>
Fixes: f11453c7cc01 ("ALSA: firewire-lib: use 16 bytes IR context header to separate CIP header")
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Link: https://lore.kernel.org/r/20210513125652.110249-5-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/firewire/amdtp-stream.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c
index b53971bf4b90..73aff017dc9a 100644
--- a/sound/firewire/amdtp-stream.c
+++ b/sound/firewire/amdtp-stream.c
@@ -1071,23 +1071,22 @@ static int amdtp_stream_start(struct amdtp_stream *s, int channel, int speed,
 		s->data_block_counter = 0;
 	}
 
-	/* initialize packet buffer */
+	// initialize packet buffer.
+	max_ctx_payload_size = amdtp_stream_get_max_payload(s);
 	if (s->direction == AMDTP_IN_STREAM) {
 		dir = DMA_FROM_DEVICE;
 		type = FW_ISO_CONTEXT_RECEIVE;
-		if (!(s->flags & CIP_NO_HEADER))
+		if (!(s->flags & CIP_NO_HEADER)) {
+			max_ctx_payload_size -= 8;
 			ctx_header_size = IR_CTX_HEADER_SIZE_CIP;
-		else
+		} else {
 			ctx_header_size = IR_CTX_HEADER_SIZE_NO_CIP;
-
-		max_ctx_payload_size = amdtp_stream_get_max_payload(s) -
-				       ctx_header_size;
+		}
 	} else {
 		dir = DMA_TO_DEVICE;
 		type = FW_ISO_CONTEXT_TRANSMIT;
 		ctx_header_size = 0;	// No effect for IT context.
 
-		max_ctx_payload_size = amdtp_stream_get_max_payload(s);
 		if (!(s->flags & CIP_NO_HEADER))
 			max_ctx_payload_size -= IT_PKT_HEADER_SIZE_CIP;
 	}
-- 
cgit v1.2.3


From 814b43127f4ac69332e809152e30773941438aff Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Thu, 13 May 2021 21:56:52 +0900
Subject: ALSA: firewire-lib: fix amdtp_packet tracepoints event for
 packet_index field

The snd_firewire_lib:amdtp_packet tracepoints event includes index of
packet processed in a context handling. However in IR context, it is not
calculated as expected.

Cc: <stable@vger.kernel.org>
Fixes: 753e717986c2 ("ALSA: firewire-lib: use packet descriptor for IR context")
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Link: https://lore.kernel.org/r/20210513125652.110249-6-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/firewire/amdtp-stream-trace.h |  6 +++---
 sound/firewire/amdtp-stream.c       | 15 +++++++++------
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/sound/firewire/amdtp-stream-trace.h b/sound/firewire/amdtp-stream-trace.h
index 26e7cb555d3c..aa53c13b89d3 100644
--- a/sound/firewire/amdtp-stream-trace.h
+++ b/sound/firewire/amdtp-stream-trace.h
@@ -14,8 +14,8 @@
 #include <linux/tracepoint.h>
 
 TRACE_EVENT(amdtp_packet,
-	TP_PROTO(const struct amdtp_stream *s, u32 cycles, const __be32 *cip_header, unsigned int payload_length, unsigned int data_blocks, unsigned int data_block_counter, unsigned int index),
-	TP_ARGS(s, cycles, cip_header, payload_length, data_blocks, data_block_counter, index),
+	TP_PROTO(const struct amdtp_stream *s, u32 cycles, const __be32 *cip_header, unsigned int payload_length, unsigned int data_blocks, unsigned int data_block_counter, unsigned int packet_index, unsigned int index),
+	TP_ARGS(s, cycles, cip_header, payload_length, data_blocks, data_block_counter, packet_index, index),
 	TP_STRUCT__entry(
 		__field(unsigned int, second)
 		__field(unsigned int, cycle)
@@ -48,7 +48,7 @@ TRACE_EVENT(amdtp_packet,
 		__entry->payload_quadlets = payload_length / sizeof(__be32);
 		__entry->data_blocks = data_blocks;
 		__entry->data_block_counter = data_block_counter,
-		__entry->packet_index = s->packet_index;
+		__entry->packet_index = packet_index;
 		__entry->irq = !!in_interrupt();
 		__entry->index = index;
 	),
diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c
index 73aff017dc9a..e0faa6601966 100644
--- a/sound/firewire/amdtp-stream.c
+++ b/sound/firewire/amdtp-stream.c
@@ -526,7 +526,7 @@ static void build_it_pkt_header(struct amdtp_stream *s, unsigned int cycle,
 	}
 
 	trace_amdtp_packet(s, cycle, cip_header, payload_length, data_blocks,
-			   data_block_counter, index);
+			   data_block_counter, s->packet_index, index);
 }
 
 static int check_cip_header(struct amdtp_stream *s, const __be32 *buf,
@@ -630,7 +630,7 @@ static int parse_ir_ctx_header(struct amdtp_stream *s, unsigned int cycle,
 			       unsigned int *payload_length,
 			       unsigned int *data_blocks,
 			       unsigned int *data_block_counter,
-			       unsigned int *syt, unsigned int index)
+			       unsigned int *syt, unsigned int packet_index, unsigned int index)
 {
 	const __be32 *cip_header;
 	unsigned int cip_header_size;
@@ -668,7 +668,7 @@ static int parse_ir_ctx_header(struct amdtp_stream *s, unsigned int cycle,
 	}
 
 	trace_amdtp_packet(s, cycle, cip_header, *payload_length, *data_blocks,
-			   *data_block_counter, index);
+			   *data_block_counter, packet_index, index);
 
 	return err;
 }
@@ -707,12 +707,13 @@ static int generate_device_pkt_descs(struct amdtp_stream *s,
 				     unsigned int packets)
 {
 	unsigned int dbc = s->data_block_counter;
+	unsigned int packet_index = s->packet_index;
+	unsigned int queue_size = s->queue_size;
 	int i;
 	int err;
 
 	for (i = 0; i < packets; ++i) {
 		struct pkt_desc *desc = descs + i;
-		unsigned int index = (s->packet_index + i) % s->queue_size;
 		unsigned int cycle;
 		unsigned int payload_length;
 		unsigned int data_blocks;
@@ -721,7 +722,7 @@ static int generate_device_pkt_descs(struct amdtp_stream *s,
 		cycle = compute_cycle_count(ctx_header[1]);
 
 		err = parse_ir_ctx_header(s, cycle, ctx_header, &payload_length,
-					  &data_blocks, &dbc, &syt, i);
+					  &data_blocks, &dbc, &syt, packet_index, i);
 		if (err < 0)
 			return err;
 
@@ -729,13 +730,15 @@ static int generate_device_pkt_descs(struct amdtp_stream *s,
 		desc->syt = syt;
 		desc->data_blocks = data_blocks;
 		desc->data_block_counter = dbc;
-		desc->ctx_payload = s->buffer.packets[index].buffer;
+		desc->ctx_payload = s->buffer.packets[packet_index].buffer;
 
 		if (!(s->flags & CIP_DBC_IS_END_EVENT))
 			dbc = (dbc + desc->data_blocks) & 0xff;
 
 		ctx_header +=
 			s->ctx_data.tx.ctx_header_size / sizeof(*ctx_header);
+
+		packet_index = (packet_index + 1) % queue_size;
 	}
 
 	s->data_block_counter = dbc;
-- 
cgit v1.2.3


From 1d5cfca286178ce81fb0c8a5f5777ef123cd69e4 Mon Sep 17 00:00:00 2001
From: PeiSen Hou <pshou@realtek.com>
Date: Fri, 14 May 2021 12:50:48 +0200
Subject: ALSA: hda/realtek: Add some CLOVE SSIDs of ALC293

Fix "use as headset mic, without its own jack detect" problen.

Signed-off-by: PeiSen Hou <pshou@realtek.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/d0746eaf29f248a5acc30313e3ba4f99@realtek.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 3e269de84079..552e2cb73291 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -8385,12 +8385,19 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1558, 0x50b8, "Clevo NK50SZ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x50d5, "Clevo NP50D5", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x50f0, "Clevo NH50A[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0x50f2, "Clevo NH50E[PR]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x50f3, "Clevo NH58DPQ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0x50f5, "Clevo NH55EPY", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0x50f6, "Clevo NH55DPQ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x5101, "Clevo S510WU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x5157, "Clevo W517GU1", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x51a1, "Clevo NS50MU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x70a1, "Clevo NB70T[HJK]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x70b3, "Clevo NK70SB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0x70f2, "Clevo NH79EPY", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0x70f3, "Clevo NH77DPQ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0x70f4, "Clevo NH77EPY", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0x70f6, "Clevo NH77DPQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x8228, "Clevo NR40BU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x8520, "Clevo NH50D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x8521, "Clevo NH77D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
@@ -8408,9 +8415,17 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1558, 0x8a51, "Clevo NH70RCQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x8d50, "Clevo NH55RCQ-M", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x951d, "Clevo N950T[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0x9600, "Clevo N960K[PR]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x961d, "Clevo N960S[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x971d, "Clevo N970T[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0xa500, "Clevo NL53RU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0xa600, "Clevo NL5XNU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0xb018, "Clevo NP50D[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0xb019, "Clevo NH77D[BE]Q", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0xb022, "Clevo NH77D[DC][QW]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0xc018, "Clevo NP50D[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0xc019, "Clevo NH77D[BE]Q", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0xc022, "Clevo NH77[DC][QW]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC233_FIXUP_LENOVO_MULTI_CODECS),
 	SND_PCI_QUIRK(0x17aa, 0x1048, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
 	SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE),
-- 
cgit v1.2.3


From 156ed0215ef365604f2382d5164c36d3a1cfd98f Mon Sep 17 00:00:00 2001
From: Martin Ågren <martin.agren@gmail.com>
Date: Thu, 22 Apr 2021 21:22:40 +0200
Subject: uio/uio_pci_generic: fix return value changed in refactoring
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit ef84928cff58 ("uio/uio_pci_generic: use device-managed function
equivalents") was able to simplify various error paths thanks to no
longer having to clean up on the way out. Some error paths were dropped,
others were simplified. In one of those simplifications, the return
value was accidentally changed from -ENODEV to -ENOMEM. Restore the old
return value.

Fixes: ef84928cff58 ("uio/uio_pci_generic: use device-managed function equivalents")
Cc: stable <stable@vger.kernel.org>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Martin Ågren <martin.agren@gmail.com>
Link: https://lore.kernel.org/r/20210422192240.1136373-1-martin.agren@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/uio/uio_pci_generic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/uio/uio_pci_generic.c b/drivers/uio/uio_pci_generic.c
index c7d681fef198..3bb0b0075467 100644
--- a/drivers/uio/uio_pci_generic.c
+++ b/drivers/uio/uio_pci_generic.c
@@ -82,7 +82,7 @@ static int probe(struct pci_dev *pdev,
 	}
 
 	if (pdev->irq && !pci_intx_mask_supported(pdev))
-		return -ENOMEM;
+		return -ENODEV;
 
 	gdev = devm_kzalloc(&pdev->dev, sizeof(struct uio_pci_generic_dev), GFP_KERNEL);
 	if (!gdev)
-- 
cgit v1.2.3


From 3ee098f96b8b6c1a98f7f97915f8873164e6af9d Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 9 May 2021 09:13:03 +0200
Subject: uio_hv_generic: Fix a memory leak in error handling paths

If 'vmbus_establish_gpadl()' fails, the (recv|send)_gpadl will not be
updated and 'hv_uio_cleanup()' in the error handling path will not be
able to free the corresponding buffer.

In such a case, we need to free the buffer explicitly.

Fixes: cdfa835c6e5e ("uio_hv_generic: defer opening vmbus until first use")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/4fdaff557deef6f0475d02ba7922ddbaa1ab08a6.1620544055.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/uio/uio_hv_generic.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c
index 0330ba99730e..eebc399f2cc7 100644
--- a/drivers/uio/uio_hv_generic.c
+++ b/drivers/uio/uio_hv_generic.c
@@ -296,8 +296,10 @@ hv_uio_probe(struct hv_device *dev,
 
 	ret = vmbus_establish_gpadl(channel, pdata->recv_buf,
 				    RECV_BUFFER_SIZE, &pdata->recv_gpadl);
-	if (ret)
+	if (ret) {
+		vfree(pdata->recv_buf);
 		goto fail_close;
+	}
 
 	/* put Global Physical Address Label in name */
 	snprintf(pdata->recv_name, sizeof(pdata->recv_name),
@@ -316,8 +318,10 @@ hv_uio_probe(struct hv_device *dev,
 
 	ret = vmbus_establish_gpadl(channel, pdata->send_buf,
 				    SEND_BUFFER_SIZE, &pdata->send_gpadl);
-	if (ret)
+	if (ret) {
+		vfree(pdata->send_buf);
 		goto fail_close;
+	}
 
 	snprintf(pdata->send_name, sizeof(pdata->send_name),
 		 "send:%u", pdata->send_gpadl);
-- 
cgit v1.2.3


From 0b0226be3a52dadd965644bc52a807961c2c26df Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 9 May 2021 09:13:12 +0200
Subject: uio_hv_generic: Fix another memory leak in error handling paths

Memory allocated by 'vmbus_alloc_ring()' at the beginning of the probe
function is never freed in the error handling path.

Add the missing 'vmbus_free_ring()' call.

Note that it is already freed in the .remove function.

Fixes: cdfa835c6e5e ("uio_hv_generic: defer opening vmbus until first use")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/0d86027b8eeed8e6360bc3d52bcdb328ff9bdca1.1620544055.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/uio/uio_hv_generic.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c
index eebc399f2cc7..652fe2547587 100644
--- a/drivers/uio/uio_hv_generic.c
+++ b/drivers/uio/uio_hv_generic.c
@@ -291,7 +291,7 @@ hv_uio_probe(struct hv_device *dev,
 	pdata->recv_buf = vzalloc(RECV_BUFFER_SIZE);
 	if (pdata->recv_buf == NULL) {
 		ret = -ENOMEM;
-		goto fail_close;
+		goto fail_free_ring;
 	}
 
 	ret = vmbus_establish_gpadl(channel, pdata->recv_buf,
@@ -351,6 +351,8 @@ hv_uio_probe(struct hv_device *dev,
 
 fail_close:
 	hv_uio_cleanup(dev, pdata);
+fail_free_ring:
+	vmbus_free_ring(dev->channel);
 
 	return ret;
 }
-- 
cgit v1.2.3


From 2962484dfef8dbb7f9059822bc26ce8a04d0e47c Mon Sep 17 00:00:00 2001
From: Hsin-Yi Wang <hsinyi@chromium.org>
Date: Tue, 20 Apr 2021 21:30:50 +0800
Subject: misc: eeprom: at24: check suspend status before disable regulator

cd5676db0574 ("misc: eeprom: at24: support pm_runtime control") disables
regulator in runtime suspend. If runtime suspend is called before
regulator disable, it will results in regulator unbalanced disabling.

Fixes: cd5676db0574 ("misc: eeprom: at24: support pm_runtime control")
Cc: stable <stable@vger.kernel.org>
Acked-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Hsin-Yi Wang <hsinyi@chromium.org>
Link: https://lore.kernel.org/r/20210420133050.377209-1-hsinyi@chromium.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/eeprom/at24.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index 926408b41270..7a6f01ace78a 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -763,7 +763,8 @@ static int at24_probe(struct i2c_client *client)
 	at24->nvmem = devm_nvmem_register(dev, &nvmem_config);
 	if (IS_ERR(at24->nvmem)) {
 		pm_runtime_disable(dev);
-		regulator_disable(at24->vcc_reg);
+		if (!pm_runtime_status_suspended(dev))
+			regulator_disable(at24->vcc_reg);
 		return PTR_ERR(at24->nvmem);
 	}
 
@@ -774,7 +775,8 @@ static int at24_probe(struct i2c_client *client)
 	err = at24_read(at24, 0, &test_byte, 1);
 	if (err) {
 		pm_runtime_disable(dev);
-		regulator_disable(at24->vcc_reg);
+		if (!pm_runtime_status_suspended(dev))
+			regulator_disable(at24->vcc_reg);
 		return -ENODEV;
 	}
 
-- 
cgit v1.2.3


From 447c19f3b5074409c794b350b10306e1da1ef4ba Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 14 May 2021 12:02:50 +0100
Subject: io_uring: fix ltout double free on completion race

Always remove linked timeout on io_link_timeout_fn() from the master
request link list, otherwise we may get use-after-free when first
io_link_timeout_fn() puts linked timeout in the fail path, and then
will be found and put on master's free.

Cc: stable@vger.kernel.org # 5.10+
Fixes: 90cd7e424969d ("io_uring: track link timeout's master explicitly")
Reported-and-tested-by: syzbot+5a864149dd970b546223@syzkaller.appspotmail.com
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/69c46bf6ce37fec4fdcd98f0882e18eb07ce693a.1620990121.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 9ac5e278a91e..599102cc6dfc 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6354,10 +6354,11 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
 	 * We don't expect the list to be empty, that will only happen if we
 	 * race with the completion of the linked work.
 	 */
-	if (prev && req_ref_inc_not_zero(prev))
+	if (prev) {
 		io_remove_next_linked(prev);
-	else
-		prev = NULL;
+		if (!req_ref_inc_not_zero(prev))
+			prev = NULL;
+	}
 	spin_unlock_irqrestore(&ctx->completion_lock, flags);
 
 	if (prev) {
-- 
cgit v1.2.3


From 2d74d0421e5afc1e7be7167ffb7eb8b2cf32343a Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 14 May 2021 12:05:46 +0100
Subject: io_uring: further remove sqpoll limits on opcodes

There are three types of requests that left disabled for sqpoll, namely
epoll ctx, statx, and resources update. Since SQPOLL task is now closely
mimics a userspace thread, remove the restrictions.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/909b52d70c45636d8d7897582474ea5aab5eed34.1620990306.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 599102cc6dfc..29ec5b28c73d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4035,7 +4035,7 @@ static int io_epoll_ctl_prep(struct io_kiocb *req,
 #if defined(CONFIG_EPOLL)
 	if (sqe->ioprio || sqe->buf_index)
 		return -EINVAL;
-	if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL)))
+	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
 		return -EINVAL;
 
 	req->epoll.epfd = READ_ONCE(sqe->fd);
@@ -4150,7 +4150,7 @@ static int io_fadvise(struct io_kiocb *req, unsigned int issue_flags)
 
 static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
-	if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL)))
+	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
 		return -EINVAL;
 	if (sqe->ioprio || sqe->buf_index)
 		return -EINVAL;
@@ -5827,8 +5827,6 @@ done:
 static int io_rsrc_update_prep(struct io_kiocb *req,
 				const struct io_uring_sqe *sqe)
 {
-	if (unlikely(req->ctx->flags & IORING_SETUP_SQPOLL))
-		return -EINVAL;
 	if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
 		return -EINVAL;
 	if (sqe->ioprio || sqe->rw_flags)
-- 
cgit v1.2.3


From 489809e2e22b3dedc0737163d97eb2b574137b42 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 14 May 2021 12:06:44 +0100
Subject: io_uring: increase max number of reg buffers

Since recent changes instead of storing a large array of struct
io_mapped_ubuf, we store pointers to them, that is 4 times slimmer and
we should not to so worry about restricting max number of registererd
buffer slots, increase the limit 4 times.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/d3dee1da37f46da416aa96a16bf9e5094e10584d.1620990371.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 29ec5b28c73d..e481ac8a757a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -100,6 +100,8 @@
 #define IORING_MAX_RESTRICTIONS	(IORING_RESTRICTION_LAST + \
 				 IORING_REGISTER_LAST + IORING_OP_LAST)
 
+#define IORING_MAX_REG_BUFFERS	(1U << 14)
+
 #define SQE_VALID_FLAGS	(IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK|	\
 				IOSQE_IO_HARDLINK | IOSQE_ASYNC | \
 				IOSQE_BUFFER_SELECT)
@@ -8389,7 +8391,7 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
 
 	if (ctx->user_bufs)
 		return -EBUSY;
-	if (!nr_args || nr_args > UIO_MAXIOV)
+	if (!nr_args || nr_args > IORING_MAX_REG_BUFFERS)
 		return -EINVAL;
 	ret = io_rsrc_node_switch_start(ctx);
 	if (ret)
-- 
cgit v1.2.3


From 27e7db56cf3dffd302bd7ddfacb1d405cf671a2a Mon Sep 17 00:00:00 2001
From: Saravana Kannan <saravanak@google.com>
Date: Wed, 5 May 2021 09:47:34 -0700
Subject: spi: Don't have controller clean up spi device before driver unbind

When a spi device is unregistered and triggers a driver unbind, the
driver might need to access the spi device. So, don't have the
controller clean up the spi device before the driver is unbound. Clean
up the spi device after the driver is unbound.

Fixes: c7299fea6769 ("spi: Fix spi device unregister flow")
Reported-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Saravana Kannan <saravanak@google.com>
Tested-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20210505164734.175546-1-saravanak@google.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 98048af04abf..e353b7a9e54e 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -714,8 +714,6 @@ void spi_unregister_device(struct spi_device *spi)
 	if (!spi)
 		return;
 
-	spi_cleanup(spi);
-
 	if (spi->dev.of_node) {
 		of_node_clear_flag(spi->dev.of_node, OF_POPULATED);
 		of_node_put(spi->dev.of_node);
@@ -723,7 +721,9 @@ void spi_unregister_device(struct spi_device *spi)
 	if (ACPI_COMPANION(&spi->dev))
 		acpi_device_clear_enumerated(ACPI_COMPANION(&spi->dev));
 	device_remove_software_node(&spi->dev);
-	device_unregister(&spi->dev);
+	device_del(&spi->dev);
+	spi_cleanup(spi);
+	put_device(&spi->dev);
 }
 EXPORT_SYMBOL_GPL(spi_unregister_device);
 
-- 
cgit v1.2.3


From 3486d2c9be652a31033363bdd50391b0c8a8fe21 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 13 May 2021 09:32:46 +0200
Subject: clocksource/drivers/hyper-v: Re-enable VDSO_CLOCKMODE_HVCLOCK on X86

Mohammed reports (https://bugzilla.kernel.org/show_bug.cgi?id=213029)
the commit e4ab4658f1cf ("clocksource/drivers/hyper-v: Handle vDSO
differences inline") broke vDSO on x86. The problem appears to be that
VDSO_CLOCKMODE_HVCLOCK is an enum value in 'enum vdso_clock_mode' and
'#ifdef VDSO_CLOCKMODE_HVCLOCK' branch evaluates to false (it is not
a define).

Use a dedicated HAVE_VDSO_CLOCKMODE_HVCLOCK define instead.

Fixes: e4ab4658f1cf ("clocksource/drivers/hyper-v: Handle vDSO differences inline")
Reported-by: Mohammed Gamal <mgamal@redhat.com>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Link: https://lore.kernel.org/r/20210513073246.1715070-1-vkuznets@redhat.com
---
 arch/x86/include/asm/vdso/clocksource.h | 2 ++
 drivers/clocksource/hyperv_timer.c      | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/vdso/clocksource.h b/arch/x86/include/asm/vdso/clocksource.h
index 119ac8612d89..136e5e57cfe1 100644
--- a/arch/x86/include/asm/vdso/clocksource.h
+++ b/arch/x86/include/asm/vdso/clocksource.h
@@ -7,4 +7,6 @@
 	VDSO_CLOCKMODE_PVCLOCK,	\
 	VDSO_CLOCKMODE_HVCLOCK
 
+#define HAVE_VDSO_CLOCKMODE_HVCLOCK
+
 #endif /* __ASM_VDSO_CLOCKSOURCE_H */
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
index 977fd05ac35f..d6ece7bbce89 100644
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -419,7 +419,7 @@ static void resume_hv_clock_tsc(struct clocksource *arg)
 	hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr);
 }
 
-#ifdef VDSO_CLOCKMODE_HVCLOCK
+#ifdef HAVE_VDSO_CLOCKMODE_HVCLOCK
 static int hv_cs_enable(struct clocksource *cs)
 {
 	vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK);
@@ -435,7 +435,7 @@ static struct clocksource hyperv_cs_tsc = {
 	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
 	.suspend= suspend_hv_clock_tsc,
 	.resume	= resume_hv_clock_tsc,
-#ifdef VDSO_CLOCKMODE_HVCLOCK
+#ifdef HAVE_VDSO_CLOCKMODE_HVCLOCK
 	.enable = hv_cs_enable,
 	.vdso_clock_mode = VDSO_CLOCKMODE_HVCLOCK,
 #else
-- 
cgit v1.2.3


From cb6f6b3384d7825d2a43f2256c5200e3b3956fc8 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@xilinx.com>
Date: Wed, 12 May 2021 13:18:21 -0700
Subject: xen/arm: move xen_swiotlb_detect to arm/swiotlb-xen.h

Move xen_swiotlb_detect to a static inline function to make it available
to !CONFIG_XEN builds.

CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
Signed-off-by: Stefano Stabellini <stefano.stabellini@xilinx.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20210512201823.1963-1-sstabellini@kernel.org
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/arm/xen/mm.c             | 12 ------------
 include/xen/arm/swiotlb-xen.h | 15 ++++++++++++++-
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index f8f07469d259..223b1151fd7d 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -135,18 +135,6 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
 	return;
 }
 
-int xen_swiotlb_detect(void)
-{
-	if (!xen_domain())
-		return 0;
-	if (xen_feature(XENFEAT_direct_mapped))
-		return 1;
-	/* legacy case */
-	if (!xen_feature(XENFEAT_not_direct_mapped) && xen_initial_domain())
-		return 1;
-	return 0;
-}
-
 static int __init xen_mm_init(void)
 {
 	struct gnttab_cache_flush cflush;
diff --git a/include/xen/arm/swiotlb-xen.h b/include/xen/arm/swiotlb-xen.h
index 2994fe6031a0..33336ab58afc 100644
--- a/include/xen/arm/swiotlb-xen.h
+++ b/include/xen/arm/swiotlb-xen.h
@@ -2,6 +2,19 @@
 #ifndef _ASM_ARM_SWIOTLB_XEN_H
 #define _ASM_ARM_SWIOTLB_XEN_H
 
-extern int xen_swiotlb_detect(void);
+#include <xen/features.h>
+#include <xen/xen.h>
+
+static inline int xen_swiotlb_detect(void)
+{
+	if (!xen_domain())
+		return 0;
+	if (xen_feature(XENFEAT_direct_mapped))
+		return 1;
+	/* legacy case */
+	if (!xen_feature(XENFEAT_not_direct_mapped) && xen_initial_domain())
+		return 1;
+	return 0;
+}
 
 #endif /* _ASM_ARM_SWIOTLB_XEN_H */
-- 
cgit v1.2.3


From 687842ec50342b716953f5847a49dd337cb6de8c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 12 May 2021 13:18:22 -0700
Subject: arm64: do not set SWIOTLB_NO_FORCE when swiotlb is required

Although SWIOTLB_NO_FORCE is meant to allow later calls to swiotlb_init,
today dma_direct_map_page returns error if SWIOTLB_NO_FORCE.

For now, without a larger overhaul of SWIOTLB_NO_FORCE, the best we can
do is to avoid setting SWIOTLB_NO_FORCE in mem_init when we know that it
is going to be required later (e.g. Xen requires it).

CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
CC: catalin.marinas@arm.com
CC: will@kernel.org
CC: linux-arm-kernel@lists.infradead.org
Fixes: 2726bf3ff252 ("swiotlb: Make SWIOTLB_NO_FORCE perform no allocation")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Stefano Stabellini <stefano.stabellini@xilinx.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210512201823.1963-2-sstabellini@kernel.org
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/arm64/mm/init.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 16a2b2b1c54d..e55409caaee3 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -43,6 +43,7 @@
 #include <linux/sizes.h>
 #include <asm/tlb.h>
 #include <asm/alternative.h>
+#include <asm/xen/swiotlb-xen.h>
 
 /*
  * We need to be able to catch inadvertent references to memstart_addr
@@ -482,7 +483,7 @@ void __init mem_init(void)
 	if (swiotlb_force == SWIOTLB_FORCE ||
 	    max_pfn > PFN_DOWN(arm64_dma_phys_limit))
 		swiotlb_init(1);
-	else
+	else if (!xen_swiotlb_detect())
 		swiotlb_force = SWIOTLB_NO_FORCE;
 
 	set_max_mapnr(max_pfn - PHYS_PFN_OFFSET);
-- 
cgit v1.2.3


From 97729b653de52ba98e08732dd8855586e37a3a31 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@xilinx.com>
Date: Wed, 12 May 2021 13:18:23 -0700
Subject: xen/swiotlb: check if the swiotlb has already been initialized

xen_swiotlb_init calls swiotlb_late_init_with_tbl, which fails with
-ENOMEM if the swiotlb has already been initialized.

Add an explicit check io_tlb_default_mem != NULL at the beginning of
xen_swiotlb_init. If the swiotlb is already initialized print a warning
and return -EEXIST.

On x86, the error propagates.

On ARM, we don't actually need a special swiotlb buffer (yet), any
buffer would do. So ignore the error and continue.

CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
Signed-off-by: Stefano Stabellini <stefano.stabellini@xilinx.com>
Reviewed-by: Boris Ostrovsky <boris.ostrvsky@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20210512201823.1963-3-sstabellini@kernel.org
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/arm/xen/mm.c         | 8 +++++++-
 drivers/xen/swiotlb-xen.c | 5 +++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 223b1151fd7d..a7e54a087b80 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -138,9 +138,15 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
 static int __init xen_mm_init(void)
 {
 	struct gnttab_cache_flush cflush;
+	int rc;
+
 	if (!xen_swiotlb_detect())
 		return 0;
-	xen_swiotlb_init();
+
+	rc = xen_swiotlb_init();
+	/* we can work with the default swiotlb */
+	if (rc < 0 && rc != -EEXIST)
+		return rc;
 
 	cflush.op = 0;
 	cflush.a.dev_bus_addr = 0;
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 4c89afc0df62..24d11861ac7d 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -164,6 +164,11 @@ int __ref xen_swiotlb_init(void)
 	int rc = -ENOMEM;
 	char *start;
 
+	if (io_tlb_default_mem != NULL) {
+		pr_warn("swiotlb buffer already initialized\n");
+		return -EEXIST;
+	}
+
 retry:
 	m_ret = XEN_SWIOTLB_ENOMEM;
 	order = get_order(bytes);
-- 
cgit v1.2.3


From 03f26d8f11403295de445b6e4e0e57ac57755791 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 14 May 2021 10:20:52 +0800
Subject: blk-mq: plug request for shared sbitmap

In case of shared sbitmap, request won't be held in plug list any more
sine commit 32bc15afed04 ("blk-mq: Facilitate a shared sbitmap per
tagset"), this way makes request merge from flush plug list & batching
submission not possible, so cause performance regression.

Yanhui reports performance regression when running sequential IO
test(libaio, 16 jobs, 8 depth for each job) in VM, and the VM disk
is emulated with image stored on xfs/megaraid_sas.

Fix the issue by recovering original behavior to allow to hold request
in plug list.

Cc: Yanhui Ma <yama@redhat.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Bart Van Assche <bvanassche@acm.org>
Cc: kashyap.desai@broadcom.com
Fixes: 32bc15afed04 ("blk-mq: Facilitate a shared sbitmap per tagset")
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20210514022052.1047665-1-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 466676bc2f0b..28ef0248efba 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2232,8 +2232,9 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio)
 		/* Bypass scheduler for flush requests */
 		blk_insert_flush(rq);
 		blk_mq_run_hw_queue(data.hctx, true);
-	} else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs ||
-				!blk_queue_nonrot(q))) {
+	} else if (plug && (q->nr_hw_queues == 1 ||
+		   blk_mq_is_sbitmap_shared(rq->mq_hctx->flags) ||
+		   q->mq_ops->commit_rqs || !blk_queue_nonrot(q))) {
 		/*
 		 * Use plugging if we have a ->commit_rqs() hook as well, as
 		 * we know the driver uses bd->last in a smart fashion.
-- 
cgit v1.2.3


From 630ef623ed26c18a457cdc070cf24014e50129c2 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 13 May 2021 10:15:29 -0700
Subject: blk-mq: Swap two calls in blk_mq_exit_queue()

If a tag set is shared across request queues (e.g. SCSI LUNs) then the
block layer core keeps track of the number of active request queues in
tags->active_queues. blk_mq_tag_busy() and blk_mq_tag_idle() update that
atomic counter if the hctx flag BLK_MQ_F_TAG_QUEUE_SHARED is set. Make
sure that blk_mq_exit_queue() calls blk_mq_tag_idle() before that flag is
cleared by blk_mq_del_queue_tag_set().

Cc: Christoph Hellwig <hch@infradead.org>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Hannes Reinecke <hare@suse.com>
Fixes: 0d2602ca30e4 ("blk-mq: improve support for shared tags maps")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20210513171529.7977-1-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 28ef0248efba..c86c01bfecdb 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3286,10 +3286,12 @@ EXPORT_SYMBOL(blk_mq_init_allocated_queue);
 /* tags can _not_ be used after returning from blk_mq_exit_queue */
 void blk_mq_exit_queue(struct request_queue *q)
 {
-	struct blk_mq_tag_set	*set = q->tag_set;
+	struct blk_mq_tag_set *set = q->tag_set;
 
-	blk_mq_del_queue_tag_set(q);
+	/* Checks hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED. */
 	blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
+	/* May clear BLK_MQ_F_TAG_QUEUE_SHARED in hctx->flags. */
+	blk_mq_del_queue_tag_set(q);
 }
 
 static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
-- 
cgit v1.2.3


From 4bc2082311311892742deb2ce04bc335f85ee27a Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 13 May 2021 10:17:08 -0700
Subject: block/partitions/efi.c: Fix the efi_partition() kernel-doc header

Fix the following kernel-doc warning:

block/partitions/efi.c:685: warning: wrong kernel-doc identifier on line:
 * efi_partition(struct parsed_partitions *state)

Cc: Alexander Viro <viro@math.psu.edu>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20210513171708.8391-1-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/partitions/efi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/partitions/efi.c b/block/partitions/efi.c
index b64bfdd4326c..e2716792ecc1 100644
--- a/block/partitions/efi.c
+++ b/block/partitions/efi.c
@@ -682,7 +682,7 @@ static void utf16_le_to_7bit(const __le16 *in, unsigned int size, u8 *out)
 }
 
 /**
- * efi_partition(struct parsed_partitions *state)
+ * efi_partition - scan for GPT partitions
  * @state: disk parsed partitions
  *
  * Description: called from check.c, if the disk contains GPT
-- 
cgit v1.2.3


From 588a513d34257fdde95a9f0df0202e31998e85c6 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 14 May 2021 10:50:01 +0100
Subject: arm64: Fix race condition on PG_dcache_clean in
 __sync_icache_dcache()

To ensure that instructions are observable in a new mapping, the arm64
set_pte_at() implementation cleans the D-cache and invalidates the
I-cache to the PoU. As an optimisation, this is only done on executable
mappings and the PG_dcache_clean page flag is set to avoid future cache
maintenance on the same page.

When two different processes map the same page (e.g. private executable
file or shared mapping) there's a potential race on checking and setting
PG_dcache_clean via set_pte_at() -> __sync_icache_dcache(). While on the
fault paths the page is locked (PG_locked), mprotect() does not take the
page lock. The result is that one process may see the PG_dcache_clean
flag set but the I/D cache maintenance not yet performed.

Avoid test_and_set_bit(PG_dcache_clean) in favour of separate test_bit()
and set_bit(). In the rare event of a race, the cache maintenance is
done twice.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: <stable@vger.kernel.org>
Cc: Will Deacon <will@kernel.org>
Cc: Steven Price <steven.price@arm.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210514095001.13236-1-catalin.marinas@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/flush.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index ac485163a4a7..6d44c028d1c9 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -55,8 +55,10 @@ void __sync_icache_dcache(pte_t pte)
 {
 	struct page *page = pte_page(pte);
 
-	if (!test_and_set_bit(PG_dcache_clean, &page->flags))
+	if (!test_bit(PG_dcache_clean, &page->flags)) {
 		sync_icache_aliases(page_address(page), page_size(page));
+		set_bit(PG_dcache_clean, &page->flags);
+	}
 }
 EXPORT_SYMBOL_GPL(__sync_icache_dcache);
 
-- 
cgit v1.2.3


From 75016891357a628d2b8acc09e2b9b2576c18d318 Mon Sep 17 00:00:00 2001
From: Hoang Le <hoang.h.le@dektech.com.au>
Date: Fri, 14 May 2021 08:23:03 +0700
Subject: Revert "net:tipc: Fix a double free in tipc_sk_mcast_rcv"

This reverts commit 6bf24dc0cc0cc43b29ba344b66d78590e687e046.
Above fix is not correct and caused memory leak issue.

Fixes: 6bf24dc0cc0c ("net:tipc: Fix a double free in tipc_sk_mcast_rcv")
Acked-by: Jon Maloy <jmaloy@redhat.com>
Acked-by: Tung Nguyen <tung.q.nguyen@dektech.com.au>
Signed-off-by: Hoang Le <hoang.h.le@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 58935cd0d068..53af72824c9c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1262,7 +1262,10 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
 		spin_lock_bh(&inputq->lock);
 		if (skb_peek(arrvq) == skb) {
 			skb_queue_splice_tail_init(&tmpq, inputq);
-			__skb_dequeue(arrvq);
+			/* Decrease the skb's refcnt as increasing in the
+			 * function tipc_skb_peek
+			 */
+			kfree_skb(__skb_dequeue(arrvq));
 		}
 		spin_unlock_bh(&inputq->lock);
 		__skb_queue_purge(&tmpq);
-- 
cgit v1.2.3


From 974271e5ed45cfe4daddbeb16224a2156918530e Mon Sep 17 00:00:00 2001
From: Jim Ma <majinjing3@gmail.com>
Date: Fri, 14 May 2021 11:11:02 +0800
Subject: tls splice: check SPLICE_F_NONBLOCK instead of MSG_DONTWAIT

In tls_sw_splice_read, checkout MSG_* is inappropriate, should use
SPLICE_*, update tls_wait_data to accept nonblock arguments instead
of flags for recvmsg and splice.

Fixes: c46234ebb4d1 ("tls: RX path for ktls")
Signed-off-by: Jim Ma <majinjing3@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tls/tls_sw.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 1dcb34dfd56b..694de024d0ee 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -37,6 +37,7 @@
 
 #include <linux/sched/signal.h>
 #include <linux/module.h>
+#include <linux/splice.h>
 #include <crypto/aead.h>
 
 #include <net/strparser.h>
@@ -1281,7 +1282,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
 }
 
 static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock,
-				     int flags, long timeo, int *err)
+				     bool nonblock, long timeo, int *err)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
@@ -1306,7 +1307,7 @@ static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock,
 		if (sock_flag(sk, SOCK_DONE))
 			return NULL;
 
-		if ((flags & MSG_DONTWAIT) || !timeo) {
+		if (nonblock || !timeo) {
 			*err = -EAGAIN;
 			return NULL;
 		}
@@ -1786,7 +1787,7 @@ int tls_sw_recvmsg(struct sock *sk,
 		bool async_capable;
 		bool async = false;
 
-		skb = tls_wait_data(sk, psock, flags, timeo, &err);
+		skb = tls_wait_data(sk, psock, flags & MSG_DONTWAIT, timeo, &err);
 		if (!skb) {
 			if (psock) {
 				int ret = sk_msg_recvmsg(sk, psock, msg, len,
@@ -1990,9 +1991,9 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
 
 	lock_sock(sk);
 
-	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+	timeo = sock_rcvtimeo(sk, flags & SPLICE_F_NONBLOCK);
 
-	skb = tls_wait_data(sk, NULL, flags, timeo, &err);
+	skb = tls_wait_data(sk, NULL, flags & SPLICE_F_NONBLOCK, timeo, &err);
 	if (!skb)
 		goto splice_read_end;
 
-- 
cgit v1.2.3


From a90c57f2cedd52a511f739fb55e6244e22e1a2fb Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Fri, 14 May 2021 11:16:59 +0800
Subject: net: sched: fix packet stuck problem for lockless qdisc

Lockless qdisc has below concurrent problem:
    cpu0                 cpu1
     .                     .
q->enqueue                 .
     .                     .
qdisc_run_begin()          .
     .                     .
dequeue_skb()              .
     .                     .
sch_direct_xmit()          .
     .                     .
     .                q->enqueue
     .             qdisc_run_begin()
     .            return and do nothing
     .                     .
qdisc_run_end()            .

cpu1 enqueue a skb without calling __qdisc_run() because cpu0
has not released the lock yet and spin_trylock() return false
for cpu1 in qdisc_run_begin(), and cpu0 do not see the skb
enqueued by cpu1 when calling dequeue_skb() because cpu1 may
enqueue the skb after cpu0 calling dequeue_skb() and before
cpu0 calling qdisc_run_end().

Lockless qdisc has below another concurrent problem when
tx_action is involved:

cpu0(serving tx_action)     cpu1             cpu2
          .                   .                .
          .              q->enqueue            .
          .            qdisc_run_begin()       .
          .              dequeue_skb()         .
          .                   .            q->enqueue
          .                   .                .
          .             sch_direct_xmit()      .
          .                   .         qdisc_run_begin()
          .                   .       return and do nothing
          .                   .                .
 clear __QDISC_STATE_SCHED    .                .
 qdisc_run_begin()            .                .
 return and do nothing        .                .
          .                   .                .
          .            qdisc_run_end()         .

This patch fixes the above data race by:
1. If the first spin_trylock() return false and STATE_MISSED is
   not set, set STATE_MISSED and retry another spin_trylock() in
   case other CPU may not see STATE_MISSED after it releases the
   lock.
2. reschedule if STATE_MISSED is set after the lock is released
   at the end of qdisc_run_end().

For tx_action case, STATE_MISSED is also set when cpu1 is at the
end if qdisc_run_end(), so tx_action will be rescheduled again
to dequeue the skb enqueued by cpu2.

Clear STATE_MISSED before retrying a dequeuing when dequeuing
returns NULL in order to reduce the overhead of the second
spin_trylock() and __netif_schedule() calling.

Also clear the STATE_MISSED before calling __netif_schedule()
at the end of qdisc_run_end() to avoid doing another round of
dequeuing in the pfifo_fast_dequeue().

The performance impact of this patch, tested using pktgen and
dummy netdev with pfifo_fast qdisc attached:

 threads  without+this_patch   with+this_patch      delta
    1        2.61Mpps            2.60Mpps           -0.3%
    2        3.97Mpps            3.82Mpps           -3.7%
    4        5.62Mpps            5.59Mpps           -0.5%
    8        2.78Mpps            2.77Mpps           -0.3%
   16        2.22Mpps            2.22Mpps           -0.0%

Fixes: 6b3ba9146fe6 ("net: sched: allow qdiscs to handle locking")
Acked-by: Jakub Kicinski <kuba@kernel.org>
Tested-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 35 ++++++++++++++++++++++++++++++++++-
 net/sched/sch_generic.c   | 19 +++++++++++++++++++
 2 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index f7a6e14491fb..1e625519ae96 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -36,6 +36,7 @@ struct qdisc_rate_table {
 enum qdisc_state_t {
 	__QDISC_STATE_SCHED,
 	__QDISC_STATE_DEACTIVATED,
+	__QDISC_STATE_MISSED,
 };
 
 struct qdisc_size_table {
@@ -159,8 +160,33 @@ static inline bool qdisc_is_empty(const struct Qdisc *qdisc)
 static inline bool qdisc_run_begin(struct Qdisc *qdisc)
 {
 	if (qdisc->flags & TCQ_F_NOLOCK) {
+		if (spin_trylock(&qdisc->seqlock))
+			goto nolock_empty;
+
+		/* If the MISSED flag is set, it means other thread has
+		 * set the MISSED flag before second spin_trylock(), so
+		 * we can return false here to avoid multi cpus doing
+		 * the set_bit() and second spin_trylock() concurrently.
+		 */
+		if (test_bit(__QDISC_STATE_MISSED, &qdisc->state))
+			return false;
+
+		/* Set the MISSED flag before the second spin_trylock(),
+		 * if the second spin_trylock() return false, it means
+		 * other cpu holding the lock will do dequeuing for us
+		 * or it will see the MISSED flag set after releasing
+		 * lock and reschedule the net_tx_action() to do the
+		 * dequeuing.
+		 */
+		set_bit(__QDISC_STATE_MISSED, &qdisc->state);
+
+		/* Retry again in case other CPU may not see the new flag
+		 * after it releases the lock at the end of qdisc_run_end().
+		 */
 		if (!spin_trylock(&qdisc->seqlock))
 			return false;
+
+nolock_empty:
 		WRITE_ONCE(qdisc->empty, false);
 	} else if (qdisc_is_running(qdisc)) {
 		return false;
@@ -176,8 +202,15 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
 static inline void qdisc_run_end(struct Qdisc *qdisc)
 {
 	write_seqcount_end(&qdisc->running);
-	if (qdisc->flags & TCQ_F_NOLOCK)
+	if (qdisc->flags & TCQ_F_NOLOCK) {
 		spin_unlock(&qdisc->seqlock);
+
+		if (unlikely(test_bit(__QDISC_STATE_MISSED,
+				      &qdisc->state))) {
+			clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+			__netif_schedule(qdisc);
+		}
+	}
 }
 
 static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 44991ea726fc..795d986e7030 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -640,8 +640,10 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
 {
 	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
 	struct sk_buff *skb = NULL;
+	bool need_retry = true;
 	int band;
 
+retry:
 	for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
 		struct skb_array *q = band2list(priv, band);
 
@@ -652,6 +654,23 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
 	}
 	if (likely(skb)) {
 		qdisc_update_stats_at_dequeue(qdisc, skb);
+	} else if (need_retry &&
+		   test_bit(__QDISC_STATE_MISSED, &qdisc->state)) {
+		/* Delay clearing the STATE_MISSED here to reduce
+		 * the overhead of the second spin_trylock() in
+		 * qdisc_run_begin() and __netif_schedule() calling
+		 * in qdisc_run_end().
+		 */
+		clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+
+		/* Make sure dequeuing happens after clearing
+		 * STATE_MISSED.
+		 */
+		smp_mb__after_atomic();
+
+		need_retry = false;
+
+		goto retry;
 	} else {
 		WRITE_ONCE(qdisc->empty, true);
 	}
-- 
cgit v1.2.3


From 102b55ee92f9fda4dde7a45d2b20538e6e3e3d1e Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Fri, 14 May 2021 11:17:00 +0800
Subject: net: sched: fix tx action rescheduling issue during deactivation

Currently qdisc_run() checks the STATE_DEACTIVATED of lockless
qdisc before calling __qdisc_run(), which ultimately clear the
STATE_MISSED when all the skb is dequeued. If STATE_DEACTIVATED
is set before clearing STATE_MISSED, there may be rescheduling
of net_tx_action() at the end of qdisc_run_end(), see below:

CPU0(net_tx_atcion)  CPU1(__dev_xmit_skb)  CPU2(dev_deactivate)
          .                   .                     .
          .            set STATE_MISSED             .
          .           __netif_schedule()            .
          .                   .           set STATE_DEACTIVATED
          .                   .                qdisc_reset()
          .                   .                     .
          .<---------------   .              synchronize_net()
clear __QDISC_STATE_SCHED  |  .                     .
          .                |  .                     .
          .                |  .            some_qdisc_is_busy()
          .                |  .               return *false*
          .                |  .                     .
  test STATE_DEACTIVATED   |  .                     .
__qdisc_run() *not* called |  .                     .
          .                |  .                     .
   test STATE_MISS         |  .                     .
 __netif_schedule()--------|  .                     .
          .                   .                     .
          .                   .                     .

__qdisc_run() is not called by net_tx_atcion() in CPU0 because
CPU2 has set STATE_DEACTIVATED flag during dev_deactivate(), and
STATE_MISSED is only cleared in __qdisc_run(), __netif_schedule
is called at the end of qdisc_run_end(), causing tx action
rescheduling problem.

qdisc_run() called by net_tx_action() runs in the softirq context,
which should has the same semantic as the qdisc_run() called by
__dev_xmit_skb() protected by rcu_read_lock_bh(). And there is a
synchronize_net() between STATE_DEACTIVATED flag being set and
qdisc_reset()/some_qdisc_is_busy in dev_deactivate(), we can safely
bail out for the deactived lockless qdisc in net_tx_action(), and
qdisc_reset() will reset all skb not dequeued yet.

So add the rcu_read_lock() explicitly to protect the qdisc_run()
and do the STATE_DEACTIVATED checking in net_tx_action() before
calling qdisc_run_begin(). Another option is to do the checking in
the qdisc_run_end(), but it will add unnecessary overhead for
non-tx_action case, because __dev_queue_xmit() will not see qdisc
with STATE_DEACTIVATED after synchronize_net(), the qdisc with
STATE_DEACTIVATED can only be seen by net_tx_action() because of
__netif_schedule().

The STATE_DEACTIVATED checking in qdisc_run() is to avoid race
between net_tx_action() and qdisc_reset(), see:
commit d518d2ed8640 ("net/sched: fix race between deactivation
and dequeue for NOLOCK qdisc"). As the bailout added above for
deactived lockless qdisc in net_tx_action() provides better
protection for the race without calling qdisc_run() at all, so
remove the STATE_DEACTIVATED checking in qdisc_run().

After qdisc_reset(), there is no skb in qdisc to be dequeued, so
clear the STATE_MISSED in dev_reset_queue() too.

Fixes: 6b3ba9146fe6 ("net: sched: allow qdiscs to handle locking")
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
V8: Clearing STATE_MISSED before calling __netif_schedule() has
    avoid the endless rescheduling problem, but there may still
    be a unnecessary rescheduling, so adjust the commit log.
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h |  7 +------
 net/core/dev.c          | 26 ++++++++++++++++++++++----
 net/sched/sch_generic.c |  4 +++-
 3 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index f5c1bee0cd6a..6d7b12cba015 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -128,12 +128,7 @@ void __qdisc_run(struct Qdisc *q);
 static inline void qdisc_run(struct Qdisc *q)
 {
 	if (qdisc_run_begin(q)) {
-		/* NOLOCK qdisc must check 'state' under the qdisc seqlock
-		 * to avoid racing with dev_qdisc_reset()
-		 */
-		if (!(q->flags & TCQ_F_NOLOCK) ||
-		    likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
-			__qdisc_run(q);
+		__qdisc_run(q);
 		qdisc_run_end(q);
 	}
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index 222b1d322c96..d596cd746353 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5025,25 +5025,43 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
 		sd->output_queue_tailp = &sd->output_queue;
 		local_irq_enable();
 
+		rcu_read_lock();
+
 		while (head) {
 			struct Qdisc *q = head;
 			spinlock_t *root_lock = NULL;
 
 			head = head->next_sched;
 
-			if (!(q->flags & TCQ_F_NOLOCK)) {
-				root_lock = qdisc_lock(q);
-				spin_lock(root_lock);
-			}
 			/* We need to make sure head->next_sched is read
 			 * before clearing __QDISC_STATE_SCHED
 			 */
 			smp_mb__before_atomic();
+
+			if (!(q->flags & TCQ_F_NOLOCK)) {
+				root_lock = qdisc_lock(q);
+				spin_lock(root_lock);
+			} else if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED,
+						     &q->state))) {
+				/* There is a synchronize_net() between
+				 * STATE_DEACTIVATED flag being set and
+				 * qdisc_reset()/some_qdisc_is_busy() in
+				 * dev_deactivate(), so we can safely bail out
+				 * early here to avoid data race between
+				 * qdisc_deactivate() and some_qdisc_is_busy()
+				 * for lockless qdisc.
+				 */
+				clear_bit(__QDISC_STATE_SCHED, &q->state);
+				continue;
+			}
+
 			clear_bit(__QDISC_STATE_SCHED, &q->state);
 			qdisc_run(q);
 			if (root_lock)
 				spin_unlock(root_lock);
 		}
+
+		rcu_read_unlock();
 	}
 
 	xfrm_dev_backlog(sd);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 795d986e7030..d86c4cca2cab 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -1177,8 +1177,10 @@ static void dev_reset_queue(struct net_device *dev,
 	qdisc_reset(qdisc);
 
 	spin_unlock_bh(qdisc_lock(qdisc));
-	if (nolock)
+	if (nolock) {
+		clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
 		spin_unlock_bh(&qdisc->seqlock);
+	}
 }
 
 static bool some_qdisc_is_busy(struct net_device *dev)
-- 
cgit v1.2.3


From dcad9ee9e0663d74a89b25b987f9c7be86432812 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Fri, 14 May 2021 11:17:01 +0800
Subject: net: sched: fix tx action reschedule issue with stopped queue

The netdev qeueue might be stopped when byte queue limit has
reached or tx hw ring is full, net_tx_action() may still be
rescheduled if STATE_MISSED is set, which consumes unnecessary
cpu without dequeuing and transmiting any skb because the
netdev queue is stopped, see qdisc_run_end().

This patch fixes it by checking the netdev queue state before
calling qdisc_run() and clearing STATE_MISSED if netdev queue is
stopped during qdisc_run(), the net_tx_action() is rescheduled
again when netdev qeueue is restarted, see netif_tx_wake_queue().

As there is time window between netif_xmit_frozen_or_stopped()
checking and STATE_MISSED clearing, between which STATE_MISSED
may set by net_tx_action() scheduled by netif_tx_wake_queue(),
so set the STATE_MISSED again if netdev queue is restarted.

Fixes: 6b3ba9146fe6 ("net: sched: allow qdiscs to handle locking")
Reported-by: Michal Kubecek <mkubecek@suse.cz>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c          |  3 ++-
 net/sched/sch_generic.c | 27 ++++++++++++++++++++++++++-
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index d596cd746353..ef8cf7619baf 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3853,7 +3853,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 
 	if (q->flags & TCQ_F_NOLOCK) {
 		rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
-		qdisc_run(q);
+		if (likely(!netif_xmit_frozen_or_stopped(txq)))
+			qdisc_run(q);
 
 		if (unlikely(to_free))
 			kfree_skb_list(to_free);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index d86c4cca2cab..fc8b56bcabf3 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -35,6 +35,25 @@
 const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
 EXPORT_SYMBOL(default_qdisc_ops);
 
+static void qdisc_maybe_clear_missed(struct Qdisc *q,
+				     const struct netdev_queue *txq)
+{
+	clear_bit(__QDISC_STATE_MISSED, &q->state);
+
+	/* Make sure the below netif_xmit_frozen_or_stopped()
+	 * checking happens after clearing STATE_MISSED.
+	 */
+	smp_mb__after_atomic();
+
+	/* Checking netif_xmit_frozen_or_stopped() again to
+	 * make sure STATE_MISSED is set if the STATE_MISSED
+	 * set by netif_tx_wake_queue()'s rescheduling of
+	 * net_tx_action() is cleared by the above clear_bit().
+	 */
+	if (!netif_xmit_frozen_or_stopped(txq))
+		set_bit(__QDISC_STATE_MISSED, &q->state);
+}
+
 /* Main transmission queue. */
 
 /* Modifications to data participating in scheduling must be protected with
@@ -74,6 +93,7 @@ static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
 			}
 		} else {
 			skb = SKB_XOFF_MAGIC;
+			qdisc_maybe_clear_missed(q, txq);
 		}
 	}
 
@@ -242,6 +262,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
 			}
 		} else {
 			skb = NULL;
+			qdisc_maybe_clear_missed(q, txq);
 		}
 		if (lock)
 			spin_unlock(lock);
@@ -251,8 +272,10 @@ validate:
 	*validate = true;
 
 	if ((q->flags & TCQ_F_ONETXQUEUE) &&
-	    netif_xmit_frozen_or_stopped(txq))
+	    netif_xmit_frozen_or_stopped(txq)) {
+		qdisc_maybe_clear_missed(q, txq);
 		return skb;
+	}
 
 	skb = qdisc_dequeue_skb_bad_txq(q);
 	if (unlikely(skb)) {
@@ -311,6 +334,8 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 		HARD_TX_LOCK(dev, txq, smp_processor_id());
 		if (!netif_xmit_frozen_or_stopped(txq))
 			skb = dev_hard_start_xmit(skb, dev, txq, &ret);
+		else
+			qdisc_maybe_clear_missed(q, txq);
 
 		HARD_TX_UNLOCK(dev, txq);
 	} else {
-- 
cgit v1.2.3


From b81ac7841d511d68989534eff5550269e1bf896d Mon Sep 17 00:00:00 2001
From: Jonathan Davies <jonathan.davies@nutanix.com>
Date: Fri, 14 May 2021 14:41:01 +0000
Subject: net: cdc_eem: fix URL to CDC EEM 1.0 spec

The old URL is no longer accessible.

Signed-off-by: Jonathan Davies <jonathan.davies@nutanix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_eem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/cdc_eem.c b/drivers/net/usb/cdc_eem.c
index 0eeec80bec31..2e60bc1b9a6b 100644
--- a/drivers/net/usb/cdc_eem.c
+++ b/drivers/net/usb/cdc_eem.c
@@ -26,7 +26,7 @@
  * for transport over USB using a simpler USB device model than the
  * previous CDC "Ethernet Control Model" (ECM, or "CDC Ethernet").
  *
- * For details, see www.usb.org/developers/devclass_docs/CDC_EEM10.pdf
+ * For details, see https://usb.org/sites/default/files/CDC_EEM10.pdf
  *
  * This version has been tested with GIGAntIC WuaoW SIM Smart Card on 2.6.24,
  * 2.6.27 and 2.6.30rc2 kernel.
-- 
cgit v1.2.3


From c625b80b9d00f3546722cd77527f9697c8c4c911 Mon Sep 17 00:00:00 2001
From: Peter Wang <peter.wang@mediatek.com>
Date: Wed, 12 May 2021 18:01:45 +0800
Subject: scsi: ufs: ufs-mediatek: Fix power down spec violation

As per spec, e.g. JESD220E chapter 7.2, while powering off the UFS device,
RST_N signal should be between VSS(Ground) and VCCQ/VCCQ2. The power down
sequence after fixing:

Power down:

 1. Assert RST_N low

 2. Turn-off VCC

 3. Turn-off VCCQ/VCCQ2

Link: https://lore.kernel.org/r/1620813706-25331-1-git-send-email-peter.wang@mediatek.com
Reviewed-by: Stanley Chu <stanley.chu@mediatek.com>
Signed-off-by: Peter Wang <peter.wang@mediatek.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufs-mediatek.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/scsi/ufs/ufs-mediatek.c b/drivers/scsi/ufs/ufs-mediatek.c
index a981f261b304..aee3cfc7142a 100644
--- a/drivers/scsi/ufs/ufs-mediatek.c
+++ b/drivers/scsi/ufs/ufs-mediatek.c
@@ -922,6 +922,7 @@ static void ufs_mtk_vreg_set_lpm(struct ufs_hba *hba, bool lpm)
 static int ufs_mtk_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
 {
 	int err;
+	struct arm_smccc_res res;
 
 	if (ufshcd_is_link_hibern8(hba)) {
 		err = ufs_mtk_link_set_lpm(hba);
@@ -941,6 +942,9 @@ static int ufs_mtk_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
 			goto fail;
 	}
 
+	if (ufshcd_is_link_off(hba))
+		ufs_mtk_device_reset_ctrl(0, res);
+
 	return 0;
 fail:
 	/*
-- 
cgit v1.2.3


From 56f396146af278135c0ff958c79b5ee1bd22453d Mon Sep 17 00:00:00 2001
From: Matt Wang <wwentao@vmware.com>
Date: Tue, 11 May 2021 03:04:37 +0000
Subject: scsi: BusLogic: Fix 64-bit system enumeration error for Buslogic

Commit 391e2f25601e ("[SCSI] BusLogic: Port driver to 64-bit")
introduced a serious issue for 64-bit systems.  With this commit,
64-bit kernel will enumerate 8*15 non-existing disks.  This is caused
by the broken CCB structure.  The change from u32 data to void *data
increased CCB length on 64-bit system, which introduced an extra 4
byte offset of the CDB.  This leads to incorrect response to INQUIRY
commands during enumeration.

Fix disk enumeration failure by reverting the portion of the commit
above which switched the data pointer from u32 to void.

Link: https://lore.kernel.org/r/C325637F-1166-4340-8F0F-3BCCD59D4D54@vmware.com
Acked-by: Khalid Aziz <khalid@gonehiking.org>
Signed-off-by: Matt Wang <wwentao@vmware.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/BusLogic.c | 6 +++---
 drivers/scsi/BusLogic.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c
index 3ee46a843cb5..adddcd589941 100644
--- a/drivers/scsi/BusLogic.c
+++ b/drivers/scsi/BusLogic.c
@@ -2926,11 +2926,11 @@ static int blogic_qcmd_lck(struct scsi_cmnd *command,
 		ccb->opcode = BLOGIC_INITIATOR_CCB_SG;
 		ccb->datalen = count * sizeof(struct blogic_sg_seg);
 		if (blogic_multimaster_type(adapter))
-			ccb->data = (void *)((unsigned int) ccb->dma_handle +
+			ccb->data = (unsigned int) ccb->dma_handle +
 					((unsigned long) &ccb->sglist -
-					(unsigned long) ccb));
+					(unsigned long) ccb);
 		else
-			ccb->data = ccb->sglist;
+			ccb->data = virt_to_32bit_virt(ccb->sglist);
 
 		scsi_for_each_sg(command, sg, count, i) {
 			ccb->sglist[i].segbytes = sg_dma_len(sg);
diff --git a/drivers/scsi/BusLogic.h b/drivers/scsi/BusLogic.h
index a8e4a19788a7..7d1ec10f2430 100644
--- a/drivers/scsi/BusLogic.h
+++ b/drivers/scsi/BusLogic.h
@@ -806,7 +806,7 @@ struct blogic_ccb {
 	unsigned char cdblen;				/* Byte 2 */
 	unsigned char sense_datalen;			/* Byte 3 */
 	u32 datalen;					/* Bytes 4-7 */
-	void *data;					/* Bytes 8-11 */
+	u32 data;					/* Bytes 8-11 */
 	unsigned char:8;				/* Byte 12 */
 	unsigned char:8;				/* Byte 13 */
 	enum blogic_adapter_status adapter_status;	/* Byte 14 */
-- 
cgit v1.2.3


From d0b2b70eb12e9ffaf95e11b16b230a4e015a536c Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 13 May 2021 09:49:12 -0700
Subject: scsi: ufs: core: Increase the usable queue depth

With the current implementation of the UFS driver active_queues is 1
instead of 0 if all UFS request queues are idle. That causes
hctx_may_queue() to divide the queue depth by 2 when queueing a request and
hence reduces the usable queue depth.

The shared tag set code in the block layer keeps track of the number of
active request queues. blk_mq_tag_busy() is called before a request is
queued onto a hwq and blk_mq_tag_idle() is called some time after the hwq
became idle. blk_mq_tag_idle() is called from inside blk_mq_timeout_work().
Hence, blk_mq_tag_idle() is only called if a timer is associated with each
request that is submitted to a request queue that shares a tag set with
another request queue.

Adds a blk_mq_start_request() call in ufshcd_exec_dev_cmd(). This doubles
the queue depth on my test setup from 16 to 32.

In addition to increasing the usable queue depth, also fix the
documentation of the 'timeout' parameter in the header above
ufshcd_exec_dev_cmd().

Link: https://lore.kernel.org/r/20210513164912.5683-1-bvanassche@acm.org
Fixes: 7252a3603015 ("scsi: ufs: Avoid busy-waiting by eliminating tag conflicts")
Cc: Can Guo <cang@codeaurora.org>
Cc: Alim Akhtar <alim.akhtar@samsung.com>
Cc: Avri Altman <avri.altman@wdc.com>
Cc: Stanley Chu <stanley.chu@mediatek.com>
Cc: Bean Huo <beanhuo@micron.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Can Guo <cang@codeaurora.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufshcd.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 3eb54937f1d8..72fd41bfbd54 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -2842,7 +2842,7 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,
  * ufshcd_exec_dev_cmd - API for sending device management requests
  * @hba: UFS hba
  * @cmd_type: specifies the type (NOP, Query...)
- * @timeout: time in seconds
+ * @timeout: timeout in milliseconds
  *
  * NOTE: Since there is only one available tag for device management commands,
  * it is expected you hold the hba->dev_cmd.lock mutex.
@@ -2872,6 +2872,9 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 	}
 	tag = req->tag;
 	WARN_ON_ONCE(!ufshcd_valid_tag(hba, tag));
+	/* Set the timeout such that the SCSI error handler is not activated. */
+	req->timeout = msecs_to_jiffies(2 * timeout);
+	blk_mq_start_request(req);
 
 	init_completion(&wait);
 	lrbp = &hba->lrb[tag];
-- 
cgit v1.2.3


From 22247efd822e6d263f3c8bd327f3f769aea9b1d9 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Fri, 14 May 2021 17:27:04 -0700
Subject: mm/hugetlb: fix F_SEAL_FUTURE_WRITE

Patch series "mm/hugetlb: Fix issues on file sealing and fork", v2.

Hugh reported issue with F_SEAL_FUTURE_WRITE not applied correctly to
hugetlbfs, which I can easily verify using the memfd_test program, which
seems that the program is hardly run with hugetlbfs pages (as by default
shmem).

Meanwhile I found another probably even more severe issue on that hugetlb
fork won't wr-protect child cow pages, so child can potentially write to
parent private pages.  Patch 2 addresses that.

After this series applied, "memfd_test hugetlbfs" should start to pass.

This patch (of 2):

F_SEAL_FUTURE_WRITE is missing for hugetlb starting from the first day.
There is a test program for that and it fails constantly.

$ ./memfd_test hugetlbfs
memfd-hugetlb: CREATE
memfd-hugetlb: BASIC
memfd-hugetlb: SEAL-WRITE
memfd-hugetlb: SEAL-FUTURE-WRITE
mmap() didn't fail as expected
Aborted (core dumped)

I think it's probably because no one is really running the hugetlbfs test.

Fix it by checking FUTURE_WRITE also in hugetlbfs_file_mmap() as what we
do in shmem_mmap().  Generalize a helper for that.

Link: https://lkml.kernel.org/r/20210503234356.9097-1-peterx@redhat.com
Link: https://lkml.kernel.org/r/20210503234356.9097-2-peterx@redhat.com
Fixes: ab3948f58ff84 ("mm/memfd: add an F_SEAL_FUTURE_WRITE seal to memfd")
Signed-off-by: Peter Xu <peterx@redhat.com>
Reported-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c |  5 +++++
 include/linux/mm.h   | 32 ++++++++++++++++++++++++++++++++
 mm/shmem.c           | 22 ++++------------------
 3 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a2a42335e8fd..9d9e0097c1d3 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -131,6 +131,7 @@ static void huge_pagevec_release(struct pagevec *pvec)
 static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct inode *inode = file_inode(file);
+	struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
 	loff_t len, vma_len;
 	int ret;
 	struct hstate *h = hstate_file(file);
@@ -146,6 +147,10 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
 	vma->vm_ops = &hugetlb_vm_ops;
 
+	ret = seal_check_future_write(info->seals, vma);
+	if (ret)
+		return ret;
+
 	/*
 	 * page based offset in vm_pgoff could be sufficiently large to
 	 * overflow a loff_t when converted to byte offset.  This can
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 322ec61d0da7..c274f75efcf9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3216,5 +3216,37 @@ void mem_dump_obj(void *object);
 static inline void mem_dump_obj(void *object) {}
 #endif
 
+/**
+ * seal_check_future_write - Check for F_SEAL_FUTURE_WRITE flag and handle it
+ * @seals: the seals to check
+ * @vma: the vma to operate on
+ *
+ * Check whether F_SEAL_FUTURE_WRITE is set; if so, do proper check/handling on
+ * the vma flags.  Return 0 if check pass, or <0 for errors.
+ */
+static inline int seal_check_future_write(int seals, struct vm_area_struct *vma)
+{
+	if (seals & F_SEAL_FUTURE_WRITE) {
+		/*
+		 * New PROT_WRITE and MAP_SHARED mmaps are not allowed when
+		 * "future write" seal active.
+		 */
+		if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
+			return -EPERM;
+
+		/*
+		 * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as
+		 * MAP_SHARED and read-only, take care to not allow mprotect to
+		 * revert protections on such mappings. Do this only for shared
+		 * mappings. For private mappings, don't need to mask
+		 * VM_MAYWRITE as we still want them to be COW-writable.
+		 */
+		if (vma->vm_flags & VM_SHARED)
+			vma->vm_flags &= ~(VM_MAYWRITE);
+	}
+
+	return 0;
+}
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/mm/shmem.c b/mm/shmem.c
index a08cedefbfaa..eb131b9fb190 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2258,25 +2258,11 @@ out_nomem:
 static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct shmem_inode_info *info = SHMEM_I(file_inode(file));
+	int ret;
 
-	if (info->seals & F_SEAL_FUTURE_WRITE) {
-		/*
-		 * New PROT_WRITE and MAP_SHARED mmaps are not allowed when
-		 * "future write" seal active.
-		 */
-		if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
-			return -EPERM;
-
-		/*
-		 * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as
-		 * MAP_SHARED and read-only, take care to not allow mprotect to
-		 * revert protections on such mappings. Do this only for shared
-		 * mappings. For private mappings, don't need to mask
-		 * VM_MAYWRITE as we still want them to be COW-writable.
-		 */
-		if (vma->vm_flags & VM_SHARED)
-			vma->vm_flags &= ~(VM_MAYWRITE);
-	}
+	ret = seal_check_future_write(info->seals, vma);
+	if (ret)
+		return ret;
 
 	/* arm64 - allow memory tagging on RAM-based files */
 	vma->vm_flags |= VM_MTE_ALLOWED;
-- 
cgit v1.2.3


From 84894e1c42e9f25c17f2888e0c0e1505cb727538 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Fri, 14 May 2021 17:27:07 -0700
Subject: mm/hugetlb: fix cow where page writtable in child

When rework early cow of pinned hugetlb pages, we moved huge_ptep_get()
upper but overlooked a side effect that the huge_ptep_get() will fetch the
pte after wr-protection.  After moving it upwards, we need explicit
wr-protect of child pte or we will keep the write bit set in the child
process, which could cause data corrution where the child can write to the
original page directly.

This issue can also be exposed by "memfd_test hugetlbfs" kselftest.

Link: https://lkml.kernel.org/r/20210503234356.9097-3-peterx@redhat.com
Fixes: 4eae4efa2c299 ("hugetlb: do early cow when page pinned on src mm")
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3db405dea3dc..95918f410c0f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4056,6 +4056,7 @@ again:
 				 * See Documentation/vm/mmu_notifier.rst
 				 */
 				huge_ptep_set_wrprotect(src, addr, src_pte);
+				entry = huge_pte_wrprotect(entry);
 			}
 
 			page_dup_rmap(ptepage, true);
-- 
cgit v1.2.3


From afe0c26d1968fe3bbef6a45df945bfeff774ca75 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Fri, 14 May 2021 17:27:10 -0700
Subject: mm, slub: move slub_debug static key enabling outside slab_mutex

Paul E.  McKenney reported [1] that commit 1f0723a4c0df ("mm, slub: enable
slub_debug static key when creating cache with explicit debug flags")
results in the lockdep complaint:

 ======================================================
 WARNING: possible circular locking dependency detected
 5.12.0+ #15 Not tainted
 ------------------------------------------------------
 rcu_torture_sta/109 is trying to acquire lock:
 ffffffff96063cd0 (cpu_hotplug_lock){++++}-{0:0}, at: static_key_enable+0x9/0x20

 but task is already holding lock:
 ffffffff96173c28 (slab_mutex){+.+.}-{3:3}, at: kmem_cache_create_usercopy+0x2d/0x250

 which lock already depends on the new lock.

 the existing dependency chain (in reverse order) is:

 -> #1 (slab_mutex){+.+.}-{3:3}:
        lock_acquire+0xb9/0x3a0
        __mutex_lock+0x8d/0x920
        slub_cpu_dead+0x15/0xf0
        cpuhp_invoke_callback+0x17a/0x7c0
        cpuhp_invoke_callback_range+0x3b/0x80
        _cpu_down+0xdf/0x2a0
        cpu_down+0x2c/0x50
        device_offline+0x82/0xb0
        remove_cpu+0x1a/0x30
        torture_offline+0x80/0x140
        torture_onoff+0x147/0x260
        kthread+0x10a/0x140
        ret_from_fork+0x22/0x30

 -> #0 (cpu_hotplug_lock){++++}-{0:0}:
        check_prev_add+0x8f/0xbf0
        __lock_acquire+0x13f0/0x1d80
        lock_acquire+0xb9/0x3a0
        cpus_read_lock+0x21/0xa0
        static_key_enable+0x9/0x20
        __kmem_cache_create+0x38d/0x430
        kmem_cache_create_usercopy+0x146/0x250
        kmem_cache_create+0xd/0x10
        rcu_torture_stats+0x79/0x280
        kthread+0x10a/0x140
        ret_from_fork+0x22/0x30

 other info that might help us debug this:

  Possible unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(slab_mutex);
                                lock(cpu_hotplug_lock);
                                lock(slab_mutex);
   lock(cpu_hotplug_lock);

  *** DEADLOCK ***

 1 lock held by rcu_torture_sta/109:
  #0: ffffffff96173c28 (slab_mutex){+.+.}-{3:3}, at: kmem_cache_create_usercopy+0x2d/0x250

 stack backtrace:
 CPU: 3 PID: 109 Comm: rcu_torture_sta Not tainted 5.12.0+ #15
 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-1ubuntu1.1 04/01/2014
 Call Trace:
  dump_stack+0x6d/0x89
  check_noncircular+0xfe/0x110
  ? lock_is_held_type+0x98/0x110
  check_prev_add+0x8f/0xbf0
  __lock_acquire+0x13f0/0x1d80
  lock_acquire+0xb9/0x3a0
  ? static_key_enable+0x9/0x20
  ? mark_held_locks+0x49/0x70
  cpus_read_lock+0x21/0xa0
  ? static_key_enable+0x9/0x20
  static_key_enable+0x9/0x20
  __kmem_cache_create+0x38d/0x430
  kmem_cache_create_usercopy+0x146/0x250
  ? rcu_torture_stats_print+0xd0/0xd0
  kmem_cache_create+0xd/0x10
  rcu_torture_stats+0x79/0x280
  ? rcu_torture_stats_print+0xd0/0xd0
  kthread+0x10a/0x140
  ? kthread_park+0x80/0x80
  ret_from_fork+0x22/0x30

This is because there's one order of locking from the hotplug callbacks:

lock(cpu_hotplug_lock); // from hotplug machinery itself
lock(slab_mutex); // in e.g. slab_mem_going_offline_callback()

And commit 1f0723a4c0df made the reverse sequence possible:
lock(slab_mutex); // in kmem_cache_create_usercopy()
lock(cpu_hotplug_lock); // kmem_cache_open() -> static_key_enable()

The simplest fix is to move static_key_enable() to a place before slab_mutex is
taken. That means kmem_cache_create_usercopy() in mm/slab_common.c which is not
ideal for SLUB-specific code, but the #ifdef CONFIG_SLUB_DEBUG makes it
at least self-contained and obvious.

[1] https://lore.kernel.org/lkml/20210502171827.GA3670492@paulmck-ThinkPad-P17-Gen-1/

Link: https://lkml.kernel.org/r/20210504120019.26791-1-vbabka@suse.cz
Fixes: 1f0723a4c0df ("mm, slub: enable slub_debug static key when creating cache with explicit debug flags")
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reported-by: Paul E. McKenney <paulmck@kernel.org>
Tested-by: Paul E. McKenney <paulmck@kernel.org>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/slab_common.c | 10 ++++++++++
 mm/slub.c        |  9 ---------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/mm/slab_common.c b/mm/slab_common.c
index f8833d3e5d47..a4a571428c51 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -318,6 +318,16 @@ kmem_cache_create_usercopy(const char *name,
 	const char *cache_name;
 	int err;
 
+#ifdef CONFIG_SLUB_DEBUG
+	/*
+	 * If no slub_debug was enabled globally, the static key is not yet
+	 * enabled by setup_slub_debug(). Enable it if the cache is being
+	 * created with any of the debugging flags passed explicitly.
+	 */
+	if (flags & SLAB_DEBUG_FLAGS)
+		static_branch_enable(&slub_debug_enabled);
+#endif
+
 	mutex_lock(&slab_mutex);
 
 	err = kmem_cache_sanity_check(name, size);
diff --git a/mm/slub.c b/mm/slub.c
index feda53ae62ba..438fa8d4c970 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3828,15 +3828,6 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 
 static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
 {
-#ifdef CONFIG_SLUB_DEBUG
-	/*
-	 * If no slub_debug was enabled globally, the static key is not yet
-	 * enabled by setup_slub_debug(). Enable it if the cache is being
-	 * created with any of the debugging flags passed explicitly.
-	 */
-	if (flags & SLAB_DEBUG_FLAGS)
-		static_branch_enable(&slub_debug_enabled);
-#endif
 	s->flags = kmem_cache_flags(s->size, flags, s->name);
 #ifdef CONFIG_SLAB_FREELIST_HARDENED
 	s->random = get_random_long();
-- 
cgit v1.2.3


From eb1f065f90cdcdcc704e9e2dc678931317c69a99 Mon Sep 17 00:00:00 2001
From: Alistair Popple <apopple@nvidia.com>
Date: Fri, 14 May 2021 17:27:13 -0700
Subject: kernel/resource: fix return code check in __request_free_mem_region

Splitting an earlier version of a patch that allowed calling
__request_region() while holding the resource lock into a series of
patches required changing the return code for the newly introduced
__request_region_locked().

Unfortunately this change was not carried through to a subsequent commit
56fd94919b8b ("kernel/resource: fix locking in request_free_mem_region")
in the series.  This resulted in a use-after-free due to freeing the
struct resource without properly releasing it.  Fix this by correcting the
return code check so that the struct is not freed if the request to add it
was successful.

Link: https://lkml.kernel.org/r/20210512073528.22334-1-apopple@nvidia.com
Fixes: 56fd94919b8b ("kernel/resource: fix locking in request_free_mem_region")
Signed-off-by: Alistair Popple <apopple@nvidia.com>
Reported-by: kernel test robot <oliver.sang@intel.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Muchun Song <smuchun@gmail.com>
Cc: Oliver Sang <oliver.sang@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/resource.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/resource.c b/kernel/resource.c
index 028a5ab18818..ca9f5198a01f 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1805,7 +1805,7 @@ static struct resource *__request_free_mem_region(struct device *dev,
 				REGION_DISJOINT)
 			continue;
 
-		if (!__request_region_locked(res, &iomem_resource, addr, size,
+		if (__request_region_locked(res, &iomem_resource, addr, size,
 						name, 0))
 			break;
 
-- 
cgit v1.2.3


From d6e621de1fceb3b098ebf435ef7ea91ec4838a1a Mon Sep 17 00:00:00 2001
From: Phillip Lougher <phillip@squashfs.org.uk>
Date: Fri, 14 May 2021 17:27:16 -0700
Subject: squashfs: fix divide error in calculate_skip()

Sysbot has reported a "divide error" which has been identified as being
caused by a corrupted file_size value within the file inode.  This value
has been corrupted to a much larger value than expected.

Calculate_skip() is passed i_size_read(inode) >> msblk->block_log.  Due to
the file_size value corruption this overflows the int argument/variable in
that function, leading to the divide error.

This patch changes the function to use u64.  This will accommodate any
unexpectedly large values due to corruption.

The value returned from calculate_skip() is clamped to be never more than
SQUASHFS_CACHED_BLKS - 1, or 7.  So file_size corruption does not lead to
an unexpectedly large return result here.

Link: https://lkml.kernel.org/r/20210507152618.9447-1-phillip@squashfs.org.uk
Signed-off-by: Phillip Lougher <phillip@squashfs.org.uk>
Reported-by: <syzbot+e8f781243ce16ac2f962@syzkaller.appspotmail.com>
Reported-by: <syzbot+7b98870d4fec9447b951@syzkaller.appspotmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/squashfs/file.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 7b1128398976..89d492916dea 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -211,11 +211,11 @@ failure:
  * If the skip factor is limited in this way then the file will use multiple
  * slots.
  */
-static inline int calculate_skip(int blocks)
+static inline int calculate_skip(u64 blocks)
 {
-	int skip = blocks / ((SQUASHFS_META_ENTRIES + 1)
+	u64 skip = blocks / ((SQUASHFS_META_ENTRIES + 1)
 		 * SQUASHFS_META_INDEXES);
-	return min(SQUASHFS_CACHED_BLKS - 1, skip + 1);
+	return min((u64) SQUASHFS_CACHED_BLKS - 1, skip + 1);
 }
 
 
-- 
cgit v1.2.3


From 7ed9d238c7dbb1fdb63ad96a6184985151b0171c Mon Sep 17 00:00:00 2001
From: Axel Rasmussen <axelrasmussen@google.com>
Date: Fri, 14 May 2021 17:27:19 -0700
Subject: userfaultfd: release page in error path to avoid BUG_ON

Consider the following sequence of events:

1. Userspace issues a UFFD ioctl, which ends up calling into
   shmem_mfill_atomic_pte(). We successfully account the blocks, we
   shmem_alloc_page(), but then the copy_from_user() fails. We return
   -ENOENT. We don't release the page we allocated.
2. Our caller detects this error code, tries the copy_from_user() after
   dropping the mmap_lock, and retries, calling back into
   shmem_mfill_atomic_pte().
3. Meanwhile, let's say another process filled up the tmpfs being used.
4. So shmem_mfill_atomic_pte() fails to account blocks this time, and
   immediately returns - without releasing the page.

This triggers a BUG_ON in our caller, which asserts that the page
should always be consumed, unless -ENOENT is returned.

To fix this, detect if we have such a "dangling" page when accounting
fails, and if so, release it before returning.

Link: https://lkml.kernel.org/r/20210428230858.348400-1-axelrasmussen@google.com
Fixes: cb658a453b93 ("userfaultfd: shmem: avoid leaking blocks and used blocks in UFFDIO_COPY")
Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Reported-by: Hugh Dickins <hughd@google.com>
Acked-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/shmem.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index eb131b9fb190..5d46611cba8d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2361,8 +2361,18 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
 	pgoff_t offset, max_off;
 
 	ret = -ENOMEM;
-	if (!shmem_inode_acct_block(inode, 1))
+	if (!shmem_inode_acct_block(inode, 1)) {
+		/*
+		 * We may have got a page, returned -ENOENT triggering a retry,
+		 * and now we find ourselves with -ENOMEM. Release the page, to
+		 * avoid a BUG_ON in our caller.
+		 */
+		if (unlikely(*pagep)) {
+			put_page(*pagep);
+			*pagep = NULL;
+		}
 		goto out;
+	}
 
 	if (!*pagep) {
 		page = shmem_alloc_page(gfp, info, pgoff);
-- 
cgit v1.2.3


From 628622904b8d229591134e44efd6608a7541eb89 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 14 May 2021 17:27:22 -0700
Subject: ksm: revert "use GET_KSM_PAGE_NOLOCK to get ksm page in
 remove_rmap_item_from_tree()"

This reverts commit 3e96b6a2e9ad929a3230a22f4d64a74671a0720b.  General
Protection Fault in rmap_walk_ksm() under memory pressure:
remove_rmap_item_from_tree() needs to take page lock, of course.

Link: https://lkml.kernel.org/r/alpine.LSU.2.11.2105092253500.1127@eggly.anvils
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/ksm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/ksm.c b/mm/ksm.c
index 6bbe314c5260..2f3aaeb34a42 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -776,11 +776,12 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
 		struct page *page;
 
 		stable_node = rmap_item->head;
-		page = get_ksm_page(stable_node, GET_KSM_PAGE_NOLOCK);
+		page = get_ksm_page(stable_node, GET_KSM_PAGE_LOCK);
 		if (!page)
 			goto out;
 
 		hlist_del(&rmap_item->hlist);
+		unlock_page(page);
 		put_page(page);
 
 		if (!hlist_empty(&stable_node->hlist))
-- 
cgit v1.2.3


From 9ddb3c14afba8bc5950ed297f02d4ae05ff35cd1 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 14 May 2021 17:27:24 -0700
Subject: mm: fix struct page layout on 32-bit systems

32-bit architectures which expect 8-byte alignment for 8-byte integers and
need 64-bit DMA addresses (arm, mips, ppc) had their struct page
inadvertently expanded in 2019.  When the dma_addr_t was added, it forced
the alignment of the union to 8 bytes, which inserted a 4 byte gap between
'flags' and the union.

Fix this by storing the dma_addr_t in one or two adjacent unsigned longs.
This restores the alignment to that of an unsigned long.  We always
store the low bits in the first word to prevent the PageTail bit from
being inadvertently set on a big endian platform.  If that happened,
get_user_pages_fast() racing against a page which was freed and
reallocated to the page_pool could dereference a bogus compound_head(),
which would be hard to trace back to this cause.

Link: https://lkml.kernel.org/r/20210510153211.1504886-1-willy@infradead.org
Fixes: c25fff7171be ("mm: add dma_addr_t to struct page")
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Tested-by: Matteo Croce <mcroce@linux.microsoft.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h |  4 ++--
 include/net/page_pool.h  | 12 +++++++++++-
 net/core/page_pool.c     | 12 +++++++-----
 3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6613b26a8894..5aacc1c10a45 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -97,10 +97,10 @@ struct page {
 		};
 		struct {	/* page_pool used by netstack */
 			/**
-			 * @dma_addr: might require a 64-bit value even on
+			 * @dma_addr: might require a 64-bit value on
 			 * 32-bit architectures.
 			 */
-			dma_addr_t dma_addr;
+			unsigned long dma_addr[2];
 		};
 		struct {	/* slab, slob and slub */
 			union {
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 6d517a37c18b..b4b6de909c93 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -198,7 +198,17 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
 
 static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
 {
-	return page->dma_addr;
+	dma_addr_t ret = page->dma_addr[0];
+	if (sizeof(dma_addr_t) > sizeof(unsigned long))
+		ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
+	return ret;
+}
+
+static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
+{
+	page->dma_addr[0] = addr;
+	if (sizeof(dma_addr_t) > sizeof(unsigned long))
+		page->dma_addr[1] = upper_32_bits(addr);
 }
 
 static inline bool is_page_pool_compiled_in(void)
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 9ec1aa9640ad..3c4c4c7a0402 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -174,8 +174,10 @@ static void page_pool_dma_sync_for_device(struct page_pool *pool,
 					  struct page *page,
 					  unsigned int dma_sync_size)
 {
+	dma_addr_t dma_addr = page_pool_get_dma_addr(page);
+
 	dma_sync_size = min(dma_sync_size, pool->p.max_len);
-	dma_sync_single_range_for_device(pool->p.dev, page->dma_addr,
+	dma_sync_single_range_for_device(pool->p.dev, dma_addr,
 					 pool->p.offset, dma_sync_size,
 					 pool->p.dma_dir);
 }
@@ -195,7 +197,7 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
 	if (dma_mapping_error(pool->p.dev, dma))
 		return false;
 
-	page->dma_addr = dma;
+	page_pool_set_dma_addr(page, dma);
 
 	if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
 		page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
@@ -331,13 +333,13 @@ void page_pool_release_page(struct page_pool *pool, struct page *page)
 		 */
 		goto skip_dma_unmap;
 
-	dma = page->dma_addr;
+	dma = page_pool_get_dma_addr(page);
 
-	/* When page is unmapped, it cannot be returned our pool */
+	/* When page is unmapped, it cannot be returned to our pool */
 	dma_unmap_page_attrs(pool->p.dev, dma,
 			     PAGE_SIZE << pool->p.order, pool->p.dma_dir,
 			     DMA_ATTR_SKIP_CPU_SYNC);
-	page->dma_addr = 0;
+	page_pool_set_dma_addr(page, 0);
 skip_dma_unmap:
 	/* This may be the last page returned, releasing the pool, so
 	 * it is not safe to reference pool afterwards.
-- 
cgit v1.2.3


From f649dc0e0d7b509c75570ee403723660f5b72ec7 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Fri, 14 May 2021 17:27:27 -0700
Subject: kasan: fix unit tests with CONFIG_UBSAN_LOCAL_BOUNDS enabled

These tests deliberately access these arrays out of bounds, which will
cause the dynamic local bounds checks inserted by
CONFIG_UBSAN_LOCAL_BOUNDS to fail and panic the kernel.  To avoid this
problem, access the arrays via volatile pointers, which will prevent the
compiler from being able to determine the array bounds.

These accesses use volatile pointers to char (char *volatile) rather than
the more conventional pointers to volatile char (volatile char *) because
we want to prevent the compiler from making inferences about the pointer
itself (i.e.  its array bounds), not the data that it refers to.

Link: https://lkml.kernel.org/r/20210507025915.1464056-1-pcc@google.com
Link: https://linux-review.googlesource.com/id/I90b1713fbfa1bf68ff895aef099ea77b98a7c3b9
Signed-off-by: Peter Collingbourne <pcc@google.com>
Tested-by: Alexander Potapenko <glider@google.com>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Peter Collingbourne <pcc@google.com>
Cc: George Popescu <georgepope@android.com>
Cc: Elena Petrova <lenaptr@google.com>
Cc: Evgenii Stepanov <eugenis@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_kasan.c | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index dc05cfc2d12f..cacbbbdef768 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -654,8 +654,20 @@ static char global_array[10];
 
 static void kasan_global_oob(struct kunit *test)
 {
-	volatile int i = 3;
-	char *p = &global_array[ARRAY_SIZE(global_array) + i];
+	/*
+	 * Deliberate out-of-bounds access. To prevent CONFIG_UBSAN_LOCAL_BOUNDS
+	 * from failing here and panicing the kernel, access the array via a
+	 * volatile pointer, which will prevent the compiler from being able to
+	 * determine the array bounds.
+	 *
+	 * This access uses a volatile pointer to char (char *volatile) rather
+	 * than the more conventional pointer to volatile char (volatile char *)
+	 * because we want to prevent the compiler from making inferences about
+	 * the pointer itself (i.e. its array bounds), not the data that it
+	 * refers to.
+	 */
+	char *volatile array = global_array;
+	char *p = &array[ARRAY_SIZE(global_array) + 3];
 
 	/* Only generic mode instruments globals. */
 	KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
@@ -703,8 +715,9 @@ static void ksize_uaf(struct kunit *test)
 static void kasan_stack_oob(struct kunit *test)
 {
 	char stack_array[10];
-	volatile int i = OOB_TAG_OFF;
-	char *p = &stack_array[ARRAY_SIZE(stack_array) + i];
+	/* See comment in kasan_global_oob. */
+	char *volatile array = stack_array;
+	char *p = &array[ARRAY_SIZE(stack_array) + OOB_TAG_OFF];
 
 	KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_STACK);
 
@@ -715,7 +728,9 @@ static void kasan_alloca_oob_left(struct kunit *test)
 {
 	volatile int i = 10;
 	char alloca_array[i];
-	char *p = alloca_array - 1;
+	/* See comment in kasan_global_oob. */
+	char *volatile array = alloca_array;
+	char *p = array - 1;
 
 	/* Only generic mode instruments dynamic allocas. */
 	KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
@@ -728,7 +743,9 @@ static void kasan_alloca_oob_right(struct kunit *test)
 {
 	volatile int i = 10;
 	char alloca_array[i];
-	char *p = alloca_array + i;
+	/* See comment in kasan_global_oob. */
+	char *volatile array = alloca_array;
+	char *p = array + i;
 
 	/* Only generic mode instruments dynamic allocas. */
 	KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
-- 
cgit v1.2.3


From 076171a67789ad0107de44c2964f2e46a7d0d7b8 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 14 May 2021 17:27:30 -0700
Subject: mm/filemap: fix readahead return types

A readahead request will not allocate more memory than can be represented
by a size_t, even on systems that have HIGHMEM available.  Change the
length functions from returning an loff_t to a size_t.

Link: https://lkml.kernel.org/r/20210510201201.1558972-1-willy@infradead.org
Fixes: 32c0a6bcaa1f57 ("btrfs: add and use readahead_batch_length")
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/iomap/buffered-io.c  | 4 ++--
 include/linux/pagemap.h | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index f2cd2034a87b..9023717c5188 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -394,7 +394,7 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
 {
 	struct inode *inode = rac->mapping->host;
 	loff_t pos = readahead_pos(rac);
-	loff_t length = readahead_length(rac);
+	size_t length = readahead_length(rac);
 	struct iomap_readpage_ctx ctx = {
 		.rac	= rac,
 	};
@@ -402,7 +402,7 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
 	trace_iomap_readahead(inode, readahead_count(rac));
 
 	while (length > 0) {
-		loff_t ret = iomap_apply(inode, pos, length, 0, ops,
+		ssize_t ret = iomap_apply(inode, pos, length, 0, ops,
 				&ctx, iomap_readahead_actor);
 		if (ret <= 0) {
 			WARN_ON_ONCE(ret == 0);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index a4bd41128bf3..e89df447fae3 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -997,9 +997,9 @@ static inline loff_t readahead_pos(struct readahead_control *rac)
  * readahead_length - The number of bytes in this readahead request.
  * @rac: The readahead request.
  */
-static inline loff_t readahead_length(struct readahead_control *rac)
+static inline size_t readahead_length(struct readahead_control *rac)
 {
-	return (loff_t)rac->_nr_pages * PAGE_SIZE;
+	return rac->_nr_pages * PAGE_SIZE;
 }
 
 /**
@@ -1024,7 +1024,7 @@ static inline unsigned int readahead_count(struct readahead_control *rac)
  * readahead_batch_length - The number of bytes in the current batch.
  * @rac: The readahead request.
  */
-static inline loff_t readahead_batch_length(struct readahead_control *rac)
+static inline size_t readahead_batch_length(struct readahead_control *rac)
 {
 	return rac->_batch_count * PAGE_SIZE;
 }
-- 
cgit v1.2.3


From c3187cf32216313fb316084efac4dab3a8459b1d Mon Sep 17 00:00:00 2001
From: Jouni Roivas <jouni.roivas@tuxera.com>
Date: Fri, 14 May 2021 17:27:33 -0700
Subject: hfsplus: prevent corruption in shrinking truncate

I believe there are some issues introduced by commit 31651c607151
("hfsplus: avoid deadlock on file truncation")

HFS+ has extent records which always contains 8 extents.  In case the
first extent record in catalog file gets full, new ones are allocated from
extents overflow file.

In case shrinking truncate happens to middle of an extent record which
locates in extents overflow file, the logic in hfsplus_file_truncate() was
changed so that call to hfs_brec_remove() is not guarded any more.

Right action would be just freeing the extents that exceed the new size
inside extent record by calling hfsplus_free_extents(), and then check if
the whole extent record should be removed.  However since the guard
(blk_cnt > start) is now after the call to hfs_brec_remove(), this has
unfortunate effect that the last matching extent record is removed
unconditionally.

To reproduce this issue, create a file which has at least 10 extents, and
then perform shrinking truncate into middle of the last extent record, so
that the number of remaining extents is not under or divisible by 8.  This
causes the last extent record (8 extents) to be removed totally instead of
truncating into middle of it.  Thus this causes corruption, and lost data.

Fix for this is simply checking if the new truncated end is below the
start of this extent record, making it safe to remove the full extent
record.  However call to hfs_brec_remove() can't be moved to it's previous
place since we're dropping ->tree_lock and it can cause a race condition
and the cached info being invalidated possibly corrupting the node data.

Another issue is related to this one.  When entering into the block
(blk_cnt > start) we are not holding the ->tree_lock.  We break out from
the loop not holding the lock, but hfs_find_exit() does unlock it.  Not
sure if it's possible for someone else to take the lock under our feet,
but it can cause hard to debug errors and premature unlocking.  Even if
there's no real risk of it, the locking should still always be kept in
balance.  Thus taking the lock now just before the check.

Link: https://lkml.kernel.org/r/20210429165139.3082828-1-jouni.roivas@tuxera.com
Fixes: 31651c607151f ("hfsplus: avoid deadlock on file truncation")
Signed-off-by: Jouni Roivas <jouni.roivas@tuxera.com>
Reviewed-by: Anton Altaparmakov <anton@tuxera.com>
Cc: Anatoly Trosinenko <anatoly.trosinenko@gmail.com>
Cc: Viacheslav Dubeyko <slava@dubeyko.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hfsplus/extents.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index a930ddd15681..7054a542689f 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -598,13 +598,15 @@ void hfsplus_file_truncate(struct inode *inode)
 		res = __hfsplus_ext_cache_extent(&fd, inode, alloc_cnt);
 		if (res)
 			break;
-		hfs_brec_remove(&fd);
 
-		mutex_unlock(&fd.tree->tree_lock);
 		start = hip->cached_start;
+		if (blk_cnt <= start)
+			hfs_brec_remove(&fd);
+		mutex_unlock(&fd.tree->tree_lock);
 		hfsplus_free_extents(sb, hip->cached_extents,
 				     alloc_cnt - start, alloc_cnt - blk_cnt);
 		hfsplus_dump_extent(hip->cached_extents);
+		mutex_lock(&fd.tree->tree_lock);
 		if (blk_cnt > start) {
 			hip->extent_state |= HFSPLUS_EXT_DIRTY;
 			break;
@@ -612,7 +614,6 @@ void hfsplus_file_truncate(struct inode *inode)
 		alloc_cnt = start;
 		hip->cached_start = hip->cached_blocks = 0;
 		hip->extent_state &= ~(HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW);
-		mutex_lock(&fd.tree->tree_lock);
 	}
 	hfs_find_exit(&fd);
 
-- 
cgit v1.2.3


From f4d3f25aced3b493e57fd4109e2bc86f0831b23e Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Fri, 14 May 2021 17:27:36 -0700
Subject: docs: admin-guide: update description for kernel.modprobe sysctl

When I added CONFIG_MODPROBE_PATH, I neglected to update Documentation/.
It's still true that this defaults to /sbin/modprobe, but now via a level
of indirection.  So document that the kernel might have been built with
something other than /sbin/modprobe as the initial value.

Link: https://lkml.kernel.org/r/20210420125324.1246826-1-linux@rasmusvillemoes.dk
Fixes: 17652f4240f7a ("modules: add CONFIG_MODPROBE_PATH")
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/admin-guide/sysctl/kernel.rst | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 1d56a6b73a4e..7ca8df5451d4 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -483,10 +483,11 @@ modprobe
 ========
 
 The full path to the usermode helper for autoloading kernel modules,
-by default "/sbin/modprobe".  This binary is executed when the kernel
-requests a module.  For example, if userspace passes an unknown
-filesystem type to mount(), then the kernel will automatically request
-the corresponding filesystem module by executing this usermode helper.
+by default ``CONFIG_MODPROBE_PATH``, which in turn defaults to
+"/sbin/modprobe".  This binary is executed when the kernel requests a
+module.  For example, if userspace passes an unknown filesystem type
+to mount(), then the kernel will automatically request the
+corresponding filesystem module by executing this usermode helper.
 This usermode helper should insert the needed module into the kernel.
 
 This sysctl only affects module autoloading.  It has no effect on the
-- 
cgit v1.2.3


From 86d0c164272536c732853e19391de5159f860701 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 14 May 2021 17:27:39 -0700
Subject: mm/ioremap: fix iomap_max_page_shift

iomap_max_page_shift is expected to contain a page shift, so it can't be a
'bool', has to be an 'unsigned int'

And fix the default values: P4D_SHIFT is when huge iomap is allowed.

However, on some architectures (eg: powerpc book3s/64), P4D_SHIFT is not a
constant so it can't be used to initialise a static variable.  So,
initialise iomap_max_page_shift with a maximum shift supported by the
architecture, it is gated by P4D_SHIFT in vmap_try_huge_p4d() anyway.

Link: https://lkml.kernel.org/r/ad2d366015794a9f21320dcbdd0a8eb98979e9df.1620898113.git.christophe.leroy@csgroup.eu
Fixes: bbc180a5adb0 ("mm: HUGE_VMAP arch support cleanup")
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/ioremap.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mm/ioremap.c b/mm/ioremap.c
index d1dcc7e744ac..8ee0136f8cb0 100644
--- a/mm/ioremap.c
+++ b/mm/ioremap.c
@@ -16,16 +16,16 @@
 #include "pgalloc-track.h"
 
 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
-static bool __ro_after_init iomap_max_page_shift = PAGE_SHIFT;
+static unsigned int __ro_after_init iomap_max_page_shift = BITS_PER_LONG - 1;
 
 static int __init set_nohugeiomap(char *str)
 {
-	iomap_max_page_shift = P4D_SHIFT;
+	iomap_max_page_shift = PAGE_SHIFT;
 	return 0;
 }
 early_param("nohugeiomap", set_nohugeiomap);
 #else /* CONFIG_HAVE_ARCH_HUGE_VMAP */
-static const bool iomap_max_page_shift = PAGE_SHIFT;
+static const unsigned int iomap_max_page_shift = PAGE_SHIFT;
 #endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */
 
 int ioremap_page_range(unsigned long addr,
-- 
cgit v1.2.3


From 73578af92a0fae6609b955fcc9113e50e413c80f Mon Sep 17 00:00:00 2001
From: Javed Hasan <jhasan@marvell.com>
Date: Wed, 12 May 2021 00:25:33 -0700
Subject: scsi: qedf: Add pointer checks in qedf_update_link_speed()

The following trace was observed:

 [   14.042059] Call Trace:
 [   14.042061]  <IRQ>
 [   14.042068]  qedf_link_update+0x144/0x1f0 [qedf]
 [   14.042117]  qed_link_update+0x5c/0x80 [qed]
 [   14.042135]  qed_mcp_handle_link_change+0x2d2/0x410 [qed]
 [   14.042155]  ? qed_set_ptt+0x70/0x80 [qed]
 [   14.042170]  ? qed_set_ptt+0x70/0x80 [qed]
 [   14.042186]  ? qed_rd+0x13/0x40 [qed]
 [   14.042205]  qed_mcp_handle_events+0x437/0x690 [qed]
 [   14.042221]  ? qed_set_ptt+0x70/0x80 [qed]
 [   14.042239]  qed_int_sp_dpc+0x3a6/0x3e0 [qed]
 [   14.042245]  tasklet_action_common.isra.14+0x5a/0x100
 [   14.042250]  __do_softirq+0xe4/0x2f8
 [   14.042253]  irq_exit+0xf7/0x100
 [   14.042255]  do_IRQ+0x7f/0xd0
 [   14.042257]  common_interrupt+0xf/0xf
 [   14.042259]  </IRQ>

API qedf_link_update() is getting called from QED but by that time
shost_data is not initialised. This results in a NULL pointer dereference
when we try to dereference shost_data while updating supported_speeds.

Add a NULL pointer check before dereferencing shost_data.

Link: https://lore.kernel.org/r/20210512072533.23618-1-jhasan@marvell.com
Fixes: 61d8658b4a43 ("scsi: qedf: Add QLogic FastLinQ offload FCoE driver framework.")
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Javed Hasan <jhasan@marvell.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedf/qedf_main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index 69f7784233f9..756231151882 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -536,7 +536,9 @@ static void qedf_update_link_speed(struct qedf_ctx *qedf,
 	if (linkmode_intersects(link->supported_caps, sup_caps))
 		lport->link_supported_speeds |= FC_PORTSPEED_20GBIT;
 
-	fc_host_supported_speeds(lport->host) = lport->link_supported_speeds;
+	if (lport->host && lport->host->shost_data)
+		fc_host_supported_speeds(lport->host) =
+			lport->link_supported_speeds;
 }
 
 static void qedf_bw_update(void *dev)
-- 
cgit v1.2.3


From 8b549c18ae81dbc36fb11e4aa08b8378c599ca95 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 14 Apr 2021 14:45:43 +0200
Subject: openrisc: Define memory barrier mb

This came up in the discussion of the requirements of qspinlock on an
architecture.  OpenRISC uses qspinlock, but it was noticed that the
memmory barrier was not defined.

Peter defined it in the mail thread writing:

    As near as I can tell this should do. The arch spec only lists
    this one instruction and the text makes it sound like a completion
    barrier.

This is correct so applying this patch.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
[shorne@gmail.com:Turned the mail into a patch]
Signed-off-by: Stafford Horne <shorne@gmail.com>
---
 arch/openrisc/include/asm/barrier.h | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 arch/openrisc/include/asm/barrier.h

diff --git a/arch/openrisc/include/asm/barrier.h b/arch/openrisc/include/asm/barrier.h
new file mode 100644
index 000000000000..7538294721be
--- /dev/null
+++ b/arch/openrisc/include/asm/barrier.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_BARRIER_H
+#define __ASM_BARRIER_H
+
+#define mb() asm volatile ("l.msync" ::: "memory")
+
+#include <asm-generic/barrier.h>
+
+#endif /* __ASM_BARRIER_H */
-- 
cgit v1.2.3


From ffb324e6f874121f7dce5bdae5e05d02baae7269 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Sat, 15 May 2021 03:00:37 +0000
Subject: tty: vt: always invoke vc->vc_sw->con_resize callback

syzbot is reporting OOB write at vga16fb_imageblit() [1], for
resize_screen() from ioctl(VT_RESIZE) returns 0 without checking whether
requested rows/columns fit the amount of memory reserved for the graphical
screen if current mode is KD_GRAPHICS.

----------
  #include <sys/types.h>
  #include <sys/stat.h>
  #include <fcntl.h>
  #include <sys/ioctl.h>
  #include <linux/kd.h>
  #include <linux/vt.h>

  int main(int argc, char *argv[])
  {
        const int fd = open("/dev/char/4:1", O_RDWR);
        struct vt_sizes vt = { 0x4100, 2 };

        ioctl(fd, KDSETMODE, KD_GRAPHICS);
        ioctl(fd, VT_RESIZE, &vt);
        ioctl(fd, KDSETMODE, KD_TEXT);
        return 0;
  }
----------

Allow framebuffer drivers to return -EINVAL, by moving vc->vc_mode !=
KD_GRAPHICS check from resize_screen() to fbcon_resize().

Link: https://syzkaller.appspot.com/bug?extid=1f29e126cf461c4de3b3 [1]
Reported-by: syzbot <syzbot+1f29e126cf461c4de3b3@syzkaller.appspotmail.com>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Tested-by: syzbot <syzbot+1f29e126cf461c4de3b3@syzkaller.appspotmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/tty/vt/vt.c              | 2 +-
 drivers/video/fbdev/core/fbcon.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 01645e87b3d5..fa1548d4f94b 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -1171,7 +1171,7 @@ static inline int resize_screen(struct vc_data *vc, int width, int height,
 	/* Resizes the resolution of the display adapater */
 	int err = 0;
 
-	if (vc->vc_mode != KD_GRAPHICS && vc->vc_sw->con_resize)
+	if (vc->vc_sw->con_resize)
 		err = vc->vc_sw->con_resize(vc, width, height, user);
 
 	return err;
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 3406067985b1..22bb3892f6bd 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -2019,7 +2019,7 @@ static int fbcon_resize(struct vc_data *vc, unsigned int width,
 			return -EINVAL;
 
 		pr_debug("resize now %ix%i\n", var.xres, var.yres);
-		if (con_is_visible(vc)) {
+		if (con_is_visible(vc) && vc->vc_mode == KD_TEXT) {
 			var.activate = FB_ACTIVATE_NOW |
 				FB_ACTIVATE_FORCE;
 			fb_set_var(info, &var);
-- 
cgit v1.2.3


From 5cb289bf2d7c34ca1abd794ce116c4f19185a1d4 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Fri, 14 May 2021 17:09:52 +0800
Subject: scsi: qla2xxx: Fix error return code in qla82xx_write_flash_dword()

Fix to return a negative error code from the error handling case instead of
0 as done elsewhere in this function.

Link: https://lore.kernel.org/r/20210514090952.6715-1-thunder.leizhen@huawei.com
Fixes: a9083016a531 ("[SCSI] qla2xxx: Add ISP82XX support.")
Reported-by: Hulk Robot <hulkci@huawei.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_nx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c
index 0677295957bc..615e44af1ca6 100644
--- a/drivers/scsi/qla2xxx/qla_nx.c
+++ b/drivers/scsi/qla2xxx/qla_nx.c
@@ -1063,7 +1063,8 @@ qla82xx_write_flash_dword(struct qla_hw_data *ha, uint32_t flashaddr,
 		return ret;
 	}
 
-	if (qla82xx_flash_set_write_enable(ha))
+	ret = qla82xx_flash_set_write_enable(ha);
+	if (ret < 0)
 		goto done_write;
 
 	qla82xx_wr_32(ha, QLA82XX_ROMUSB_ROM_WDATA, data);
-- 
cgit v1.2.3


From d1acd81bd6eb685aa9fef25624fb36d297f6404e Mon Sep 17 00:00:00 2001
From: Ajish Koshy <ajish.koshy@microchip.com>
Date: Wed, 5 May 2021 17:31:03 +0530
Subject: scsi: pm80xx: Fix drives missing during rmmod/insmod loop

When driver is loaded after rmmod some drives are not showing up during
discovery.

SATA drives are directly attached to the controller connected phys.  During
device discovery, the IDENTIFY command (qc timeout (cmd 0xec)) is timing out
during revalidation. This will trigger abort from host side and controller
successfully aborts the command and returns success. Post this successful
abort response ATA library decides to mark the disk as NODEV.

To overcome this, inside pm8001_scan_start() after phy_start() call, add get
start response and wait for few milliseconds to trigger next phy start.
This millisecond delay will give sufficient time for the controller state
machine to accept next phy start.

Link: https://lore.kernel.org/r/20210505120103.24497-1-ajish.koshy@microchip.com
Signed-off-by: Ajish Koshy <ajish.koshy@microchip.com>
Signed-off-by: Viswas G <viswas.g@microchip.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/pm8001/pm8001_hwi.c  | 10 ++++++----
 drivers/scsi/pm8001/pm8001_init.c |  2 +-
 drivers/scsi/pm8001/pm8001_sas.c  |  7 ++++++-
 drivers/scsi/pm8001/pm80xx_hwi.c  | 12 ++++++------
 4 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c
index ecd06d2d7e81..71aa6af08340 100644
--- a/drivers/scsi/pm8001/pm8001_hwi.c
+++ b/drivers/scsi/pm8001/pm8001_hwi.c
@@ -3765,11 +3765,13 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
 	case HW_EVENT_PHY_START_STATUS:
 		pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_PHY_START_STATUS status = %x\n",
 			   status);
-		if (status == 0) {
+		if (status == 0)
 			phy->phy_state = 1;
-			if (pm8001_ha->flags == PM8001F_RUN_TIME &&
-					phy->enable_completion != NULL)
-				complete(phy->enable_completion);
+
+		if (pm8001_ha->flags == PM8001F_RUN_TIME &&
+				phy->enable_completion != NULL) {
+			complete(phy->enable_completion);
+			phy->enable_completion = NULL;
 		}
 		break;
 	case HW_EVENT_SAS_PHY_UP:
diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c
index 390c33df0357..af09bd282cb9 100644
--- a/drivers/scsi/pm8001/pm8001_init.c
+++ b/drivers/scsi/pm8001/pm8001_init.c
@@ -1151,8 +1151,8 @@ static int pm8001_pci_probe(struct pci_dev *pdev,
 		goto err_out_shost;
 	}
 	list_add_tail(&pm8001_ha->list, &hba_list);
-	scsi_scan_host(pm8001_ha->shost);
 	pm8001_ha->flags = PM8001F_RUN_TIME;
+	scsi_scan_host(pm8001_ha->shost);
 	return 0;
 
 err_out_shost:
diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
index d28af413b93a..335cf37e6cb9 100644
--- a/drivers/scsi/pm8001/pm8001_sas.c
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -264,12 +264,17 @@ void pm8001_scan_start(struct Scsi_Host *shost)
 	int i;
 	struct pm8001_hba_info *pm8001_ha;
 	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
+	DECLARE_COMPLETION_ONSTACK(completion);
 	pm8001_ha = sha->lldd_ha;
 	/* SAS_RE_INITIALIZATION not available in SPCv/ve */
 	if (pm8001_ha->chip_id == chip_8001)
 		PM8001_CHIP_DISP->sas_re_init_req(pm8001_ha);
-	for (i = 0; i < pm8001_ha->chip->n_phy; ++i)
+	for (i = 0; i < pm8001_ha->chip->n_phy; ++i) {
+		pm8001_ha->phy[i].enable_completion = &completion;
 		PM8001_CHIP_DISP->phy_start_req(pm8001_ha, i);
+		wait_for_completion(&completion);
+		msleep(300);
+	}
 }
 
 int pm8001_scan_finished(struct Scsi_Host *shost, unsigned long time)
diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c
index 4e980830f9f5..700530e969ac 100644
--- a/drivers/scsi/pm8001/pm80xx_hwi.c
+++ b/drivers/scsi/pm8001/pm80xx_hwi.c
@@ -3487,13 +3487,13 @@ static int mpi_phy_start_resp(struct pm8001_hba_info *pm8001_ha, void *piomb)
 	pm8001_dbg(pm8001_ha, INIT,
 		   "phy start resp status:0x%x, phyid:0x%x\n",
 		   status, phy_id);
-	if (status == 0) {
+	if (status == 0)
 		phy->phy_state = PHY_LINK_DOWN;
-		if (pm8001_ha->flags == PM8001F_RUN_TIME &&
-				phy->enable_completion != NULL) {
-			complete(phy->enable_completion);
-			phy->enable_completion = NULL;
-		}
+
+	if (pm8001_ha->flags == PM8001F_RUN_TIME &&
+			phy->enable_completion != NULL) {
+		complete(phy->enable_completion);
+		phy->enable_completion = NULL;
 	}
 	return 0;
 
-- 
cgit v1.2.3


From fbb80d5ad400a12ec67214a0e7e9f9497dc9e615 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Tue, 11 May 2021 20:54:28 +0800
Subject: irqchip: Remove redundant error printing

When devm_ioremap_resource() fails, a clear enough error message will be
printed by its subfunction __devm_ioremap_resource(). The error
information contains the device name, failure cause, and possibly resource
information.

Therefore, remove the error printing here to simplify code and reduce the
binary size.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210511125428.6108-2-thunder.leizhen@huawei.com
---
 drivers/irqchip/irq-mvebu-icu.c  | 4 +---
 drivers/irqchip/irq-mvebu-sei.c  | 4 +---
 drivers/irqchip/irq-stm32-exti.c | 4 +---
 3 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/irqchip/irq-mvebu-icu.c b/drivers/irqchip/irq-mvebu-icu.c
index 91adf771f185..090bc3f4f7d8 100644
--- a/drivers/irqchip/irq-mvebu-icu.c
+++ b/drivers/irqchip/irq-mvebu-icu.c
@@ -359,10 +359,8 @@ static int mvebu_icu_probe(struct platform_device *pdev)
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	icu->base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(icu->base)) {
-		dev_err(&pdev->dev, "Failed to map icu base address.\n");
+	if (IS_ERR(icu->base))
 		return PTR_ERR(icu->base);
-	}
 
 	/*
 	 * Legacy bindings: ICU is one node with one MSI parent: force manually
diff --git a/drivers/irqchip/irq-mvebu-sei.c b/drivers/irqchip/irq-mvebu-sei.c
index 18832ccc8ff8..3a7b7a7f20ca 100644
--- a/drivers/irqchip/irq-mvebu-sei.c
+++ b/drivers/irqchip/irq-mvebu-sei.c
@@ -384,10 +384,8 @@ static int mvebu_sei_probe(struct platform_device *pdev)
 
 	sei->res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	sei->base = devm_ioremap_resource(sei->dev, sei->res);
-	if (IS_ERR(sei->base)) {
-		dev_err(sei->dev, "Failed to remap SEI resource\n");
+	if (IS_ERR(sei->base))
 		return PTR_ERR(sei->base);
-	}
 
 	/* Retrieve the SEI capabilities with the interrupt ranges */
 	sei->caps = of_device_get_match_data(&pdev->dev);
diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c
index b9db90c4aa56..4704f2ee5797 100644
--- a/drivers/irqchip/irq-stm32-exti.c
+++ b/drivers/irqchip/irq-stm32-exti.c
@@ -892,10 +892,8 @@ static int stm32_exti_probe(struct platform_device *pdev)
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	host_data->base = devm_ioremap_resource(dev, res);
-	if (IS_ERR(host_data->base)) {
-		dev_err(dev, "Unable to map registers\n");
+	if (IS_ERR(host_data->base))
 		return PTR_ERR(host_data->base);
-	}
 
 	for (i = 0; i < drv_data->bank_nr; i++)
 		stm32_exti_chip_init(host_data, i, np);
-- 
cgit v1.2.3


From c1f0616124c455c5c762b6f123e40bba5df759e6 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Sun, 16 May 2021 18:17:55 +0200
Subject: ALSA: intel8x0: Don't update period unless prepared

The interrupt handler of intel8x0 calls snd_intel8x0_update() whenever
the hardware sets the corresponding status bit for each stream.  This
works fine for most cases as long as the hardware behaves properly.
But when the hardware gives a wrong bit set, this leads to a zero-
division Oops, and reportedly, this seems what happened on a VM.

For fixing the crash, this patch adds a internal flag indicating that
the stream is ready to be updated, and check it (as well as the flag
being in suspended) to ignore such spurious update.

Cc: <stable@vger.kernel.org>
Reported-and-tested-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Link: https://lore.kernel.org/r/s5h5yzi7uh0.wl-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/intel8x0.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c
index 35903d1a1cbd..5b124c4ad572 100644
--- a/sound/pci/intel8x0.c
+++ b/sound/pci/intel8x0.c
@@ -331,6 +331,7 @@ struct ichdev {
 	unsigned int ali_slot;			/* ALI DMA slot */
 	struct ac97_pcm *pcm;
 	int pcm_open_flag;
+	unsigned int prepared:1;
 	unsigned int suspended: 1;
 };
 
@@ -691,6 +692,9 @@ static inline void snd_intel8x0_update(struct intel8x0 *chip, struct ichdev *ich
 	int status, civ, i, step;
 	int ack = 0;
 
+	if (!ichdev->prepared || ichdev->suspended)
+		return;
+
 	spin_lock_irqsave(&chip->reg_lock, flags);
 	status = igetbyte(chip, port + ichdev->roff_sr);
 	civ = igetbyte(chip, port + ICH_REG_OFF_CIV);
@@ -881,6 +885,7 @@ static int snd_intel8x0_hw_params(struct snd_pcm_substream *substream,
 	if (ichdev->pcm_open_flag) {
 		snd_ac97_pcm_close(ichdev->pcm);
 		ichdev->pcm_open_flag = 0;
+		ichdev->prepared = 0;
 	}
 	err = snd_ac97_pcm_open(ichdev->pcm, params_rate(hw_params),
 				params_channels(hw_params),
@@ -902,6 +907,7 @@ static int snd_intel8x0_hw_free(struct snd_pcm_substream *substream)
 	if (ichdev->pcm_open_flag) {
 		snd_ac97_pcm_close(ichdev->pcm);
 		ichdev->pcm_open_flag = 0;
+		ichdev->prepared = 0;
 	}
 	return 0;
 }
@@ -976,6 +982,7 @@ static int snd_intel8x0_pcm_prepare(struct snd_pcm_substream *substream)
 			ichdev->pos_shift = (runtime->sample_bits > 16) ? 2 : 1;
 	}
 	snd_intel8x0_setup_periods(chip, ichdev);
+	ichdev->prepared = 1;
 	return 0;
 }
 
-- 
cgit v1.2.3


From d07f6ca923ea0927a1024dfccafc5b53b61cfecc Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 16 May 2021 15:27:44 -0700
Subject: Linux 5.13-rc2

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 53d09c414635..0ed7e061c8e9 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 13
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Frozen Wasteland
 
 # *DOCUMENTATION*
-- 
cgit v1.2.3


From 9d5e8492eee017ffdaa9f0957e91d39d83163197 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Sun, 9 May 2021 16:22:54 -0700
Subject: xfs: adjust rt allocation minlen when extszhint > rtextsize

xfs_bmap_rtalloc doesn't handle realtime extent files with extent size
hints larger than the rt volume's extent size properly, because
xfs_bmap_extsize_align can adjust the offset/length parameters to try to
fit the extent size hint.

Under these conditions, minlen has to be large enough so that any
allocation returned by xfs_rtallocate_extent will be large enough to
cover at least one of the blocks that the caller asked for.  If the
allocation is too short, bmapi_write will return no mapping for the
requested range, which causes ENOSPC errors in other parts of the
filesystem.

Therefore, adjust minlen upwards to fix this.  This can be found by
running generic/263 (g/127 or g/522) with a realtime extent size hint
that's larger than the rt volume extent size.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
---
 fs/xfs/xfs_bmap_util.c | 83 ++++++++++++++++++++++++++++++++++----------------
 1 file changed, 57 insertions(+), 26 deletions(-)

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index a5e9d7d34023..c9381bf4f04b 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -71,18 +71,23 @@ xfs_zero_extent(
 #ifdef CONFIG_XFS_RT
 int
 xfs_bmap_rtalloc(
-	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
+	struct xfs_bmalloca	*ap)
 {
-	int		error;		/* error return value */
-	xfs_mount_t	*mp;		/* mount point structure */
-	xfs_extlen_t	prod = 0;	/* product factor for allocators */
-	xfs_extlen_t	mod = 0;	/* product factor for allocators */
-	xfs_extlen_t	ralen = 0;	/* realtime allocation length */
-	xfs_extlen_t	align;		/* minimum allocation alignment */
-	xfs_rtblock_t	rtb;
-
-	mp = ap->ip->i_mount;
+	struct xfs_mount	*mp = ap->ip->i_mount;
+	xfs_fileoff_t		orig_offset = ap->offset;
+	xfs_rtblock_t		rtb;
+	xfs_extlen_t		prod = 0;  /* product factor for allocators */
+	xfs_extlen_t		mod = 0;   /* product factor for allocators */
+	xfs_extlen_t		ralen = 0; /* realtime allocation length */
+	xfs_extlen_t		align;     /* minimum allocation alignment */
+	xfs_extlen_t		orig_length = ap->length;
+	xfs_extlen_t		minlen = mp->m_sb.sb_rextsize;
+	xfs_extlen_t		raminlen;
+	bool			rtlocked = false;
+	int			error;
+
 	align = xfs_get_extsz_hint(ap->ip);
+retry:
 	prod = align / mp->m_sb.sb_rextsize;
 	error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
 					align, 1, ap->eof, 0,
@@ -92,6 +97,15 @@ xfs_bmap_rtalloc(
 	ASSERT(ap->length);
 	ASSERT(ap->length % mp->m_sb.sb_rextsize == 0);
 
+	/*
+	 * If we shifted the file offset downward to satisfy an extent size
+	 * hint, increase minlen by that amount so that the allocator won't
+	 * give us an allocation that's too short to cover at least one of the
+	 * blocks that the caller asked for.
+	 */
+	if (ap->offset != orig_offset)
+		minlen += orig_offset - ap->offset;
+
 	/*
 	 * If the offset & length are not perfectly aligned
 	 * then kill prod, it will just get us in trouble.
@@ -116,10 +130,13 @@ xfs_bmap_rtalloc(
 	/*
 	 * Lock out modifications to both the RT bitmap and summary inodes
 	 */
-	xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
-	xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
-	xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
-	xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL);
+	if (!rtlocked) {
+		xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
+		xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
+		xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
+		xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL);
+		rtlocked = true;
+	}
 
 	/*
 	 * If it's an allocation to an empty file at offset 0,
@@ -144,30 +161,44 @@ xfs_bmap_rtalloc(
 	do_div(ap->blkno, mp->m_sb.sb_rextsize);
 	rtb = ap->blkno;
 	ap->length = ralen;
-	error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length,
-				&ralen, ap->wasdel, prod, &rtb);
+	raminlen = max_t(xfs_extlen_t, 1, minlen / mp->m_sb.sb_rextsize);
+	error = xfs_rtallocate_extent(ap->tp, ap->blkno, raminlen, ap->length,
+			&ralen, ap->wasdel, prod, &rtb);
 	if (error)
 		return error;
 
-	ap->blkno = rtb;
-	if (ap->blkno != NULLFSBLOCK) {
-		ap->blkno *= mp->m_sb.sb_rextsize;
-		ralen *= mp->m_sb.sb_rextsize;
-		ap->length = ralen;
-		ap->ip->i_nblocks += ralen;
+	if (rtb != NULLRTBLOCK) {
+		ap->blkno = rtb * mp->m_sb.sb_rextsize;
+		ap->length = ralen * mp->m_sb.sb_rextsize;
+		ap->ip->i_nblocks += ap->length;
 		xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
 		if (ap->wasdel)
-			ap->ip->i_delayed_blks -= ralen;
+			ap->ip->i_delayed_blks -= ap->length;
 		/*
 		 * Adjust the disk quota also. This was reserved
 		 * earlier.
 		 */
 		xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
 			ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
-					XFS_TRANS_DQ_RTBCOUNT, (long) ralen);
-	} else {
-		ap->length = 0;
+					XFS_TRANS_DQ_RTBCOUNT, ap->length);
+		return 0;
+	}
+
+	if (align > mp->m_sb.sb_rextsize) {
+		/*
+		 * We previously enlarged the request length to try to satisfy
+		 * an extent size hint.  The allocator didn't return anything,
+		 * so reset the parameters to the original values and try again
+		 * without alignment criteria.
+		 */
+		ap->offset = orig_offset;
+		ap->length = orig_length;
+		minlen = align = mp->m_sb.sb_rextsize;
+		goto retry;
 	}
+
+	ap->blkno = NULLFSBLOCK;
+	ap->length = 0;
 	return 0;
 }
 #endif /* CONFIG_XFS_RT */
-- 
cgit v1.2.3


From c6de37dd5e48b883db032aa4dc0547a4858b9f20 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 21 Apr 2021 11:58:48 -0700
Subject: tools build: Fix quiet cmd indentation

The tools quiet cmd output has mismatched indentation (and extra space
character between cmd name and target name) compared to the rest of
kbuild out:

  HOSTCC  scripts/insert-sys-cert
  LD       /srv/code/tools/objtool/arch/x86/objtool-in.o
  LD       /srv/code/tools/objtool/libsubcmd-in.o
  AR       /srv/code/tools/objtool/libsubcmd.a
  HOSTLD  scripts/genksyms/genksyms
  CC      scripts/mod/empty.o
  HOSTCC  scripts/mod/mk_elfconfig
  CC      scripts/mod/devicetable-offsets.s
  MKELF   scripts/mod/elfconfig.h
  HOSTCC  scripts/mod/modpost.o
  HOSTCC  scripts/mod/file2alias.o
  HOSTCC  scripts/mod/sumversion.o
  LD       /srv/code/tools/objtool/objtool-in.o
  LINK     /srv/code/tools/objtool/objtool
  HOSTLD  scripts/mod/modpost
  CC      kernel/bounds.s

Adjust to match the rest of kbuild.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 tools/build/Makefile.build     | 22 +++++++++++-----------
 tools/scripts/Makefile.include | 30 +++++++++++++++---------------
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build
index cd72016c3cfa..715092fc6a23 100644
--- a/tools/build/Makefile.build
+++ b/tools/build/Makefile.build
@@ -51,39 +51,39 @@ subdir-obj-y :=
 build-file := $(dir)/Build
 -include $(build-file)
 
-quiet_cmd_flex  = FLEX     $@
-quiet_cmd_bison = BISON    $@
+quiet_cmd_flex  = FLEX    $@
+quiet_cmd_bison = BISON   $@
 
 # Create directory unless it exists
-quiet_cmd_mkdir = MKDIR    $(dir $@)
+quiet_cmd_mkdir = MKDIR   $(dir $@)
       cmd_mkdir = mkdir -p $(dir $@)
      rule_mkdir = $(if $(wildcard $(dir $@)),,@$(call echo-cmd,mkdir) $(cmd_mkdir))
 
 # Compile command
-quiet_cmd_cc_o_c = CC       $@
+quiet_cmd_cc_o_c = CC      $@
       cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $<
 
-quiet_cmd_host_cc_o_c = HOSTCC   $@
+quiet_cmd_host_cc_o_c = HOSTCC  $@
       cmd_host_cc_o_c = $(HOSTCC) $(host_c_flags) -c -o $@ $<
 
-quiet_cmd_cxx_o_c = CXX      $@
+quiet_cmd_cxx_o_c = CXX     $@
       cmd_cxx_o_c = $(CXX) $(cxx_flags) -c -o $@ $<
 
-quiet_cmd_cpp_i_c = CPP      $@
+quiet_cmd_cpp_i_c = CPP     $@
       cmd_cpp_i_c = $(CC) $(c_flags) -E -o $@ $<
 
-quiet_cmd_cc_s_c = AS       $@
+quiet_cmd_cc_s_c = AS      $@
       cmd_cc_s_c = $(CC) $(c_flags) -S -o $@ $<
 
-quiet_cmd_gen = GEN      $@
+quiet_cmd_gen = GEN     $@
 
 # Link agregate command
 # If there's nothing to link, create empty $@ object.
-quiet_cmd_ld_multi = LD       $@
+quiet_cmd_ld_multi = LD      $@
       cmd_ld_multi = $(if $(strip $(obj-y)),\
                      $(LD) -r -o $@  $(filter $(obj-y),$^),rm -f $@; $(AR) rcs $@)
 
-quiet_cmd_host_ld_multi = HOSTLD   $@
+quiet_cmd_host_ld_multi = HOSTLD  $@
       cmd_host_ld_multi = $(if $(strip $(obj-y)),\
                           $(HOSTLD) -r -o $@  $(filter $(obj-y),$^),rm -f $@; $(HOSTAR) rcs $@)
 
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index f9271f3ea912..071312f5eb92 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -131,29 +131,29 @@ QUIET_SUBDIR1  =
 
 ifneq ($(silent),1)
   ifneq ($(V),1)
-	QUIET_CC       = @echo '  CC       '$@;
-	QUIET_CC_FPIC  = @echo '  CC FPIC  '$@;
-	QUIET_CLANG    = @echo '  CLANG    '$@;
-	QUIET_AR       = @echo '  AR       '$@;
-	QUIET_LINK     = @echo '  LINK     '$@;
-	QUIET_MKDIR    = @echo '  MKDIR    '$@;
-	QUIET_GEN      = @echo '  GEN      '$@;
+	QUIET_CC       = @echo '  CC      '$@;
+	QUIET_CC_FPIC  = @echo '  CC FPIC '$@;
+	QUIET_CLANG    = @echo '  CLANG   '$@;
+	QUIET_AR       = @echo '  AR      '$@;
+	QUIET_LINK     = @echo '  LINK    '$@;
+	QUIET_MKDIR    = @echo '  MKDIR   '$@;
+	QUIET_GEN      = @echo '  GEN     '$@;
 	QUIET_SUBDIR0  = +@subdir=
 	QUIET_SUBDIR1  = ;$(NO_SUBDIR) \
-			  echo '  SUBDIR   '$$subdir; \
+			  echo '  SUBDIR  '$$subdir; \
 			 $(MAKE) $(PRINT_DIR) -C $$subdir
-	QUIET_FLEX     = @echo '  FLEX     '$@;
-	QUIET_BISON    = @echo '  BISON    '$@;
-	QUIET_GENSKEL  = @echo '  GEN-SKEL '$@;
+	QUIET_FLEX     = @echo '  FLEX    '$@;
+	QUIET_BISON    = @echo '  BISON   '$@;
+	QUIET_GENSKEL  = @echo '  GENSKEL '$@;
 
 	descend = \
-		+@echo	       '  DESCEND  '$(1); \
+		+@echo	       '  DESCEND '$(1); \
 		mkdir -p $(OUTPUT)$(1) && \
 		$(MAKE) $(COMMAND_O) subdir=$(if $(subdir),$(subdir)/$(1),$(1)) $(PRINT_DIR) -C $(1) $(2)
 
-	QUIET_CLEAN    = @printf '  CLEAN    %s\n' $1;
-	QUIET_INSTALL  = @printf '  INSTALL  %s\n' $1;
-	QUIET_UNINST   = @printf '  UNINST   %s\n' $1;
+	QUIET_CLEAN    = @printf '  CLEAN   %s\n' $1;
+	QUIET_INSTALL  = @printf '  INSTALL %s\n' $1;
+	QUIET_UNINST   = @printf '  UNINST  %s\n' $1;
   endif
 endif
 
-- 
cgit v1.2.3


From 98a499a11ecdd8cb91d03dd5c034aaf7422f2deb Mon Sep 17 00:00:00 2001
From: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Date: Thu, 13 May 2021 18:24:02 +0200
Subject: scripts/jobserver-exec: Fix a typo ("envirnoment")
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/jobserver-exec | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/jobserver-exec b/scripts/jobserver-exec
index 48d141e3ec56..8762887a970c 100755
--- a/scripts/jobserver-exec
+++ b/scripts/jobserver-exec
@@ -10,7 +10,7 @@ from __future__ import print_function
 import os, sys, errno
 import subprocess
 
-# Extract and prepare jobserver file descriptors from envirnoment.
+# Extract and prepare jobserver file descriptors from environment.
 claim = 0
 jobs = b""
 try:
-- 
cgit v1.2.3


From c93db682cfb213501881072a9200a48ce1dc3c3f Mon Sep 17 00:00:00 2001
From: Michal Kubecek <mkubecek@suse.cz>
Date: Sat, 15 May 2021 12:11:13 +0200
Subject: kbuild: dummy-tools: adjust to stricter stackprotector check

Commit 3fb0fdb3bbe7 ("x86/stackprotector/32: Make the canary into a regular
percpu variable") modified the stackprotector check on 32-bit x86 to check
if gcc supports using %fs as canary. Adjust dummy-tools gcc script to pass
this new test by returning "%fs" rather than "%gs" if it detects
-mstack-protector-guard-reg=fs on command line.

Fixes: 3fb0fdb3bbe7 ("x86/stackprotector/32: Make the canary into a regular percpu variable")
Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/dummy-tools/gcc | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/scripts/dummy-tools/gcc b/scripts/dummy-tools/gcc
index f6d543725f1e..b2483149bbe5 100755
--- a/scripts/dummy-tools/gcc
+++ b/scripts/dummy-tools/gcc
@@ -76,7 +76,11 @@ fi
 if arg_contain -S "$@"; then
 	# For scripts/gcc-x86-*-has-stack-protector.sh
 	if arg_contain -fstack-protector "$@"; then
-		echo "%gs"
+		if arg_contain -mstack-protector-guard-reg=fs "$@"; then
+			echo "%fs"
+		else
+			echo "%gs"
+		fi
 		exit 0
 	fi
 
-- 
cgit v1.2.3


From 4236a26a6b998c8c4fdc0117b8848a38789c48ae Mon Sep 17 00:00:00 2001
From: wenhuizhang <wenhui@gwmail.gwu.edu>
Date: Thu, 13 May 2021 12:55:16 -0400
Subject: cifs: remove deadstore in cifs_close_all_deferred_files()

Deadstore detected by Lukas Bulwahn's CodeChecker Tool (ELISA group).

line 741 struct cifsInodeInfo *cinode;
line 747 cinode = CIFS_I(d_inode(cfile->dentry));
could be deleted.

cinode on filesystem should not be deleted when files are closed,
they are representations of some data fields on a physical disk,
thus no further action is required.
The virtual inode on vfs will be handled by vfs automatically,
and the denotation is inode, which is different from the cinode.

Signed-off-by: wenhuizhang <wenhui@gwmail.gwu.edu>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/misc.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 524dbdfb7184..801a5300f765 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -738,13 +738,11 @@ void
 cifs_close_all_deferred_files(struct cifs_tcon *tcon)
 {
 	struct cifsFileInfo *cfile;
-	struct cifsInodeInfo *cinode;
 	struct list_head *tmp;
 
 	spin_lock(&tcon->open_file_lock);
 	list_for_each(tmp, &tcon->openFileList) {
 		cfile = list_entry(tmp, struct cifsFileInfo, tlist);
-		cinode = CIFS_I(d_inode(cfile->dentry));
 		if (delayed_work_pending(&cfile->deferred))
 			mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
 	}
-- 
cgit v1.2.3


From 055f23b74b20f2824ce33047b4cf2e2aa856bf3b Mon Sep 17 00:00:00 2001
From: Jessica Yu <jeyu@kernel.org>
Date: Wed, 12 May 2021 15:45:46 +0200
Subject: module: check for exit sections in layout_sections() instead of
 module_init_section()

Previously, when CONFIG_MODULE_UNLOAD=n, the module loader just does not
attempt to load exit sections since it never expects that any code in those
sections will ever execute. However, dynamic code patching (alternatives,
jump_label and static_call) can have sites in __exit code, even if __exit is
never executed. Therefore __exit must be present at runtime, at least for as
long as __init code is.

Commit 33121347fb1c ("module: treat exit sections the same as init
sections when !CONFIG_MODULE_UNLOAD") solves the requirements of
jump_labels and static_calls by putting the exit sections in the init
region of the module so that they are at least present at init, and
discarded afterwards. It does this by including a check for exit
sections in module_init_section(), so that it also returns true for exit
sections, and the module loader will automatically sort them in the init
region of the module.

However, the solution there was not completely arch-independent. ARM is
a special case where it supplies its own module_{init, exit}_section()
functions. Instead of pushing the exit section checks into
module_init_section(), just implement the exit section check in
layout_sections(), so that we don't have to touch arch-dependent code.

Fixes: 33121347fb1c ("module: treat exit sections the same as init sections when !CONFIG_MODULE_UNLOAD")
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
---
 kernel/module.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/kernel/module.c b/kernel/module.c
index b5dd92e35b02..7e78dfabca97 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2401,6 +2401,15 @@ static long get_offset(struct module *mod, unsigned int *size,
 	return ret;
 }
 
+static bool module_init_layout_section(const char *sname)
+{
+#ifndef CONFIG_MODULE_UNLOAD
+	if (module_exit_section(sname))
+		return true;
+#endif
+	return module_init_section(sname);
+}
+
 /*
  * Lay out the SHF_ALLOC sections in a way not dissimilar to how ld
  * might -- code, read-only data, read-write data, small data.  Tally
@@ -2435,7 +2444,7 @@ static void layout_sections(struct module *mod, struct load_info *info)
 			if ((s->sh_flags & masks[m][0]) != masks[m][0]
 			    || (s->sh_flags & masks[m][1])
 			    || s->sh_entsize != ~0UL
-			    || module_init_section(sname))
+			    || module_init_layout_section(sname))
 				continue;
 			s->sh_entsize = get_offset(mod, &mod->core_layout.size, s, i);
 			pr_debug("\t%s\n", sname);
@@ -2468,7 +2477,7 @@ static void layout_sections(struct module *mod, struct load_info *info)
 			if ((s->sh_flags & masks[m][0]) != masks[m][0]
 			    || (s->sh_flags & masks[m][1])
 			    || s->sh_entsize != ~0UL
-			    || !module_init_section(sname))
+			    || !module_init_layout_section(sname))
 				continue;
 			s->sh_entsize = (get_offset(mod, &mod->init_layout.size, s, i)
 					 | INIT_OFFSET_MASK);
@@ -2807,11 +2816,7 @@ void * __weak module_alloc(unsigned long size)
 
 bool __weak module_init_section(const char *name)
 {
-#ifndef CONFIG_MODULE_UNLOAD
-	return strstarts(name, ".init") || module_exit_section(name);
-#else
 	return strstarts(name, ".init");
-#endif
 }
 
 bool __weak module_exit_section(const char *name)
-- 
cgit v1.2.3


From 145e06b58f8625becc61792a0554726314297a85 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Thu, 13 May 2021 16:39:02 +0800
Subject: drm/i915/gvt: Move mdev attribute groups into kvmgt module

As kvmgt module contains all handling for VFIO/mdev, leaving mdev attribute
groups in gvt module caused dependency issue. Although it was there for possible
other hypervisor usage, that turns out never to be true. So this moves all mdev
handling into kvmgt module completely to resolve dependency issue.

With this fix, no config workaround is required. So revert previous workaround
commits: adaeb718d46f ("vfio/gvt: fix DRM_I915_GVT dependency on VFIO_MDEV")
and 07e543f4f9d1 ("vfio/gvt: Make DRM_I915_GVT depend on VFIO_MDEV").

Reviewed-by: Colin Xu <colin.xu@intel.com>
Cc: Arnd Bergmann <arnd@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20210513083902.2822350-1-zhenyuw@linux.intel.com
---
 drivers/gpu/drm/i915/Kconfig         |   1 -
 drivers/gpu/drm/i915/gvt/gvt.c       | 124 +----------------------------------
 drivers/gpu/drm/i915/gvt/gvt.h       |   3 -
 drivers/gpu/drm/i915/gvt/hypercall.h |   2 +-
 drivers/gpu/drm/i915/gvt/kvmgt.c     | 122 +++++++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/gvt/mpt.h       |   4 +-
 6 files changed, 118 insertions(+), 138 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 69f57ca9c68d..93f4d059fc89 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -102,7 +102,6 @@ config DRM_I915_GVT
 	bool "Enable Intel GVT-g graphics virtualization host support"
 	depends on DRM_I915
 	depends on 64BIT
-	depends on VFIO_MDEV=y || VFIO_MDEV=DRM_I915
 	default n
 	help
 	  Choose this option if you want to enable Intel GVT-g graphics
diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c
index e7c2babcee8b..cbac409f6c8a 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.c
+++ b/drivers/gpu/drm/i915/gvt/gvt.c
@@ -46,118 +46,6 @@ static const char * const supported_hypervisors[] = {
 	[INTEL_GVT_HYPERVISOR_KVM] = "KVM",
 };
 
-static struct intel_vgpu_type *
-intel_gvt_find_vgpu_type(struct intel_gvt *gvt, unsigned int type_group_id)
-{
-	if (WARN_ON(type_group_id >= gvt->num_types))
-		return NULL;
-	return &gvt->types[type_group_id];
-}
-
-static ssize_t available_instances_show(struct mdev_type *mtype,
-					struct mdev_type_attribute *attr,
-					char *buf)
-{
-	struct intel_vgpu_type *type;
-	unsigned int num = 0;
-	void *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt;
-
-	type = intel_gvt_find_vgpu_type(gvt, mtype_get_type_group_id(mtype));
-	if (!type)
-		num = 0;
-	else
-		num = type->avail_instance;
-
-	return sprintf(buf, "%u\n", num);
-}
-
-static ssize_t device_api_show(struct mdev_type *mtype,
-			       struct mdev_type_attribute *attr, char *buf)
-{
-	return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
-}
-
-static ssize_t description_show(struct mdev_type *mtype,
-				struct mdev_type_attribute *attr, char *buf)
-{
-	struct intel_vgpu_type *type;
-	void *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt;
-
-	type = intel_gvt_find_vgpu_type(gvt, mtype_get_type_group_id(mtype));
-	if (!type)
-		return 0;
-
-	return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
-		       "fence: %d\nresolution: %s\n"
-		       "weight: %d\n",
-		       BYTES_TO_MB(type->low_gm_size),
-		       BYTES_TO_MB(type->high_gm_size),
-		       type->fence, vgpu_edid_str(type->resolution),
-		       type->weight);
-}
-
-static MDEV_TYPE_ATTR_RO(available_instances);
-static MDEV_TYPE_ATTR_RO(device_api);
-static MDEV_TYPE_ATTR_RO(description);
-
-static struct attribute *gvt_type_attrs[] = {
-	&mdev_type_attr_available_instances.attr,
-	&mdev_type_attr_device_api.attr,
-	&mdev_type_attr_description.attr,
-	NULL,
-};
-
-static struct attribute_group *gvt_vgpu_type_groups[] = {
-	[0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL,
-};
-
-static bool intel_get_gvt_attrs(struct attribute_group ***intel_vgpu_type_groups)
-{
-	*intel_vgpu_type_groups = gvt_vgpu_type_groups;
-	return true;
-}
-
-static int intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt)
-{
-	int i, j;
-	struct intel_vgpu_type *type;
-	struct attribute_group *group;
-
-	for (i = 0; i < gvt->num_types; i++) {
-		type = &gvt->types[i];
-
-		group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL);
-		if (WARN_ON(!group))
-			goto unwind;
-
-		group->name = type->name;
-		group->attrs = gvt_type_attrs;
-		gvt_vgpu_type_groups[i] = group;
-	}
-
-	return 0;
-
-unwind:
-	for (j = 0; j < i; j++) {
-		group = gvt_vgpu_type_groups[j];
-		kfree(group);
-	}
-
-	return -ENOMEM;
-}
-
-static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
-{
-	int i;
-	struct attribute_group *group;
-
-	for (i = 0; i < gvt->num_types; i++) {
-		group = gvt_vgpu_type_groups[i];
-		gvt_vgpu_type_groups[i] = NULL;
-		kfree(group);
-	}
-}
-
 static const struct intel_gvt_ops intel_gvt_ops = {
 	.emulate_cfg_read = intel_vgpu_emulate_cfg_read,
 	.emulate_cfg_write = intel_vgpu_emulate_cfg_write,
@@ -169,8 +57,6 @@ static const struct intel_gvt_ops intel_gvt_ops = {
 	.vgpu_reset = intel_gvt_reset_vgpu,
 	.vgpu_activate = intel_gvt_activate_vgpu,
 	.vgpu_deactivate = intel_gvt_deactivate_vgpu,
-	.gvt_find_vgpu_type = intel_gvt_find_vgpu_type,
-	.get_gvt_attrs = intel_get_gvt_attrs,
 	.vgpu_query_plane = intel_vgpu_query_plane,
 	.vgpu_get_dmabuf = intel_vgpu_get_dmabuf,
 	.write_protect_handler = intel_vgpu_page_track_handler,
@@ -274,7 +160,6 @@ void intel_gvt_clean_device(struct drm_i915_private *i915)
 		return;
 
 	intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu);
-	intel_gvt_cleanup_vgpu_type_groups(gvt);
 	intel_gvt_clean_vgpu_types(gvt);
 
 	intel_gvt_debugfs_clean(gvt);
@@ -363,12 +248,6 @@ int intel_gvt_init_device(struct drm_i915_private *i915)
 	if (ret)
 		goto out_clean_thread;
 
-	ret = intel_gvt_init_vgpu_type_groups(gvt);
-	if (ret) {
-		gvt_err("failed to init vgpu type groups: %d\n", ret);
-		goto out_clean_types;
-	}
-
 	vgpu = intel_gvt_create_idle_vgpu(gvt);
 	if (IS_ERR(vgpu)) {
 		ret = PTR_ERR(vgpu);
@@ -454,7 +333,8 @@ EXPORT_SYMBOL_GPL(intel_gvt_register_hypervisor);
 void
 intel_gvt_unregister_hypervisor(void)
 {
-	intel_gvt_hypervisor_host_exit(intel_gvt_host.dev);
+	void *gvt = (void *)kdev_to_i915(intel_gvt_host.dev)->gvt;
+	intel_gvt_hypervisor_host_exit(intel_gvt_host.dev, gvt);
 	module_put(THIS_MODULE);
 }
 EXPORT_SYMBOL_GPL(intel_gvt_unregister_hypervisor);
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 88ab360fcb31..0c0615602343 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -574,9 +574,6 @@ struct intel_gvt_ops {
 	void (*vgpu_reset)(struct intel_vgpu *);
 	void (*vgpu_activate)(struct intel_vgpu *);
 	void (*vgpu_deactivate)(struct intel_vgpu *);
-	struct intel_vgpu_type *(*gvt_find_vgpu_type)(
-		struct intel_gvt *gvt, unsigned int type_group_id);
-	bool (*get_gvt_attrs)(struct attribute_group ***intel_vgpu_type_groups);
 	int (*vgpu_query_plane)(struct intel_vgpu *vgpu, void *);
 	int (*vgpu_get_dmabuf)(struct intel_vgpu *vgpu, unsigned int);
 	int (*write_protect_handler)(struct intel_vgpu *, u64, void *,
diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h
index b79da5124f83..f33e3cbd0439 100644
--- a/drivers/gpu/drm/i915/gvt/hypercall.h
+++ b/drivers/gpu/drm/i915/gvt/hypercall.h
@@ -49,7 +49,7 @@ enum hypervisor_type {
 struct intel_gvt_mpt {
 	enum hypervisor_type type;
 	int (*host_init)(struct device *dev, void *gvt, const void *ops);
-	void (*host_exit)(struct device *dev);
+	void (*host_exit)(struct device *dev, void *gvt);
 	int (*attach_vgpu)(void *vgpu, unsigned long *handle);
 	void (*detach_vgpu)(void *vgpu);
 	int (*inject_msi)(unsigned long handle, u32 addr, u16 data);
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 65ff43cfc0f7..48b4d4cf805d 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -144,6 +144,104 @@ static inline bool handle_valid(unsigned long handle)
 	return !!(handle & ~0xff);
 }
 
+static ssize_t available_instances_show(struct mdev_type *mtype,
+					struct mdev_type_attribute *attr,
+					char *buf)
+{
+	struct intel_vgpu_type *type;
+	unsigned int num = 0;
+	struct intel_gvt *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt;
+
+	type = &gvt->types[mtype_get_type_group_id(mtype)];
+	if (!type)
+		num = 0;
+	else
+		num = type->avail_instance;
+
+	return sprintf(buf, "%u\n", num);
+}
+
+static ssize_t device_api_show(struct mdev_type *mtype,
+			       struct mdev_type_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
+}
+
+static ssize_t description_show(struct mdev_type *mtype,
+				struct mdev_type_attribute *attr, char *buf)
+{
+	struct intel_vgpu_type *type;
+	struct intel_gvt *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt;
+
+	type = &gvt->types[mtype_get_type_group_id(mtype)];
+	if (!type)
+		return 0;
+
+	return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
+		       "fence: %d\nresolution: %s\n"
+		       "weight: %d\n",
+		       BYTES_TO_MB(type->low_gm_size),
+		       BYTES_TO_MB(type->high_gm_size),
+		       type->fence, vgpu_edid_str(type->resolution),
+		       type->weight);
+}
+
+static MDEV_TYPE_ATTR_RO(available_instances);
+static MDEV_TYPE_ATTR_RO(device_api);
+static MDEV_TYPE_ATTR_RO(description);
+
+static struct attribute *gvt_type_attrs[] = {
+	&mdev_type_attr_available_instances.attr,
+	&mdev_type_attr_device_api.attr,
+	&mdev_type_attr_description.attr,
+	NULL,
+};
+
+static struct attribute_group *gvt_vgpu_type_groups[] = {
+	[0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL,
+};
+
+static int intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt)
+{
+	int i, j;
+	struct intel_vgpu_type *type;
+	struct attribute_group *group;
+
+	for (i = 0; i < gvt->num_types; i++) {
+		type = &gvt->types[i];
+
+		group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL);
+		if (!group)
+			goto unwind;
+
+		group->name = type->name;
+		group->attrs = gvt_type_attrs;
+		gvt_vgpu_type_groups[i] = group;
+	}
+
+	return 0;
+
+unwind:
+	for (j = 0; j < i; j++) {
+		group = gvt_vgpu_type_groups[j];
+		kfree(group);
+	}
+
+	return -ENOMEM;
+}
+
+static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
+{
+	int i;
+	struct attribute_group *group;
+
+	for (i = 0; i < gvt->num_types; i++) {
+		group = gvt_vgpu_type_groups[i];
+		gvt_vgpu_type_groups[i] = NULL;
+		kfree(group);
+	}
+}
+
 static int kvmgt_guest_init(struct mdev_device *mdev);
 static void intel_vgpu_release_work(struct work_struct *work);
 static bool kvmgt_guest_exit(struct kvmgt_guest_info *info);
@@ -694,14 +792,13 @@ static int intel_vgpu_create(struct mdev_device *mdev)
 	struct intel_vgpu *vgpu = NULL;
 	struct intel_vgpu_type *type;
 	struct device *pdev;
-	void *gvt;
+	struct intel_gvt *gvt;
 	int ret;
 
 	pdev = mdev_parent_dev(mdev);
 	gvt = kdev_to_i915(pdev)->gvt;
 
-	type = intel_gvt_ops->gvt_find_vgpu_type(gvt,
-						 mdev_get_type_group_id(mdev));
+	type = &gvt->types[mdev_get_type_group_id(mdev)];
 	if (!type) {
 		ret = -EINVAL;
 		goto out;
@@ -1667,19 +1764,26 @@ static struct mdev_parent_ops intel_vgpu_ops = {
 
 static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops)
 {
-	struct attribute_group **kvm_vgpu_type_groups;
+	int ret;
+
+	ret = intel_gvt_init_vgpu_type_groups((struct intel_gvt *)gvt);
+	if (ret)
+		return ret;
 
 	intel_gvt_ops = ops;
-	if (!intel_gvt_ops->get_gvt_attrs(&kvm_vgpu_type_groups))
-		return -EFAULT;
-	intel_vgpu_ops.supported_type_groups = kvm_vgpu_type_groups;
+	intel_vgpu_ops.supported_type_groups = gvt_vgpu_type_groups;
 
-	return mdev_register_device(dev, &intel_vgpu_ops);
+	ret = mdev_register_device(dev, &intel_vgpu_ops);
+	if (ret)
+		intel_gvt_cleanup_vgpu_type_groups((struct intel_gvt *)gvt);
+
+	return ret;
 }
 
-static void kvmgt_host_exit(struct device *dev)
+static void kvmgt_host_exit(struct device *dev, void *gvt)
 {
 	mdev_unregister_device(dev);
+	intel_gvt_cleanup_vgpu_type_groups((struct intel_gvt *)gvt);
 }
 
 static int kvmgt_page_track_add(unsigned long handle, u64 gfn)
diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h
index 550a456e936f..e6c5a792a49a 100644
--- a/drivers/gpu/drm/i915/gvt/mpt.h
+++ b/drivers/gpu/drm/i915/gvt/mpt.h
@@ -63,13 +63,13 @@ static inline int intel_gvt_hypervisor_host_init(struct device *dev,
 /**
  * intel_gvt_hypervisor_host_exit - exit GVT-g host side
  */
-static inline void intel_gvt_hypervisor_host_exit(struct device *dev)
+static inline void intel_gvt_hypervisor_host_exit(struct device *dev, void *gvt)
 {
 	/* optional to provide */
 	if (!intel_gvt_host.mpt->host_exit)
 		return;
 
-	intel_gvt_host.mpt->host_exit(dev);
+	intel_gvt_host.mpt->host_exit(dev, gvt);
 }
 
 /**
-- 
cgit v1.2.3


From b80bfc59c60d8a006fdd7a33352732911ee51397 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Mon, 5 Apr 2021 20:15:24 +0200
Subject: drm/exynos: correct exynos_drm_fimd kerneldoc

Correct the kerneldoc of fimd_shadow_protect_win() to fix W=1 warnings:

  drivers/gpu/drm/exynos/exynos_drm_fimd.c:734: warning:
    expecting prototype for shadow_protect_win(). Prototype was for fimd_shadow_protect_win() instead

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_fimd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index 49a2e0c53918..ae576122873e 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -723,7 +723,7 @@ static void fimd_win_set_colkey(struct fimd_context *ctx, unsigned int win)
 }
 
 /**
- * shadow_protect_win() - disable updating values from shadow registers at vsync
+ * fimd_shadow_protect_win() - disable updating values from shadow registers at vsync
  *
  * @ctx: local driver data
  * @win: window to protect registers for
-- 
cgit v1.2.3


From 04562956fd41fb22645e47a00cd5cbd601ce4bdd Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Tue, 11 May 2021 17:40:04 +0800
Subject: drm/exynos: Remove redundant error printing in exynos_dsi_probe()

When devm_ioremap_resource() fails, a clear enough error message will be
printed by its subfunction __devm_ioremap_resource(). The error
information contains the device name, failure cause, and possibly resource
information.

Therefore, remove the error printing here to simplify code and reduce the
binary size.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_dsi.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
index 44e402b7cdfb..2d2fe5ab26e7 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
@@ -1786,10 +1786,8 @@ static int exynos_dsi_probe(struct platform_device *pdev)
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	dsi->reg_base = devm_ioremap_resource(dev, res);
-	if (IS_ERR(dsi->reg_base)) {
-		dev_err(dev, "failed to remap io region\n");
+	if (IS_ERR(dsi->reg_base))
 		return PTR_ERR(dsi->reg_base);
-	}
 
 	dsi->phy = devm_phy_get(dev, "dsim");
 	if (IS_ERR(dsi->phy)) {
-- 
cgit v1.2.3


From a470c5665b3b918c31bcc912234862803b10ba00 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Tue, 11 May 2021 19:27:33 +0800
Subject: drm/exynos/decon5433: Remove redundant error printing in
 exynos5433_decon_probe()

When devm_ioremap_resource() fails, a clear enough error message will be
printed by its subfunction __devm_ioremap_resource(). The error
information contains the device name, failure cause, and possibly resource
information.

Therefore, remove the error printing here to simplify code and reduce the
binary size.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index b9a4b7670a89..197b97341cad 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -815,10 +815,8 @@ static int exynos5433_decon_probe(struct platform_device *pdev)
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	ctx->addr = devm_ioremap_resource(dev, res);
-	if (IS_ERR(ctx->addr)) {
-		dev_err(dev, "ioremap failed\n");
+	if (IS_ERR(ctx->addr))
 		return PTR_ERR(ctx->addr);
-	}
 
 	ret = decon_conf_irq(ctx, "vsync", decon_irq_handler, 0);
 	if (ret < 0)
-- 
cgit v1.2.3


From 5b9fedb31e476693c90d8ee040e7d4c51b3e7cc4 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 17 May 2021 14:39:56 +0200
Subject: quota: Disable quotactl_path syscall

In commit fa8b90070a80 ("quota: wire up quotactl_path") we have wired up
new quotactl_path syscall. However some people in LWN discussion have
objected that the path based syscall is missing dirfd and flags argument
which is mostly standard for contemporary path based syscalls. Indeed
they have a point and after a discussion with Christian Brauner and
Sascha Hauer I've decided to disable the syscall for now and update its
API. Since there is no userspace currently using that syscall and it
hasn't been released in any major release, we should be fine.

CC: Christian Brauner <christian.brauner@ubuntu.com>
CC: Sascha Hauer <s.hauer@pengutronix.de>
Link: https://lore.kernel.org/lkml/20210512153621.n5u43jsytbik4yze@wittgenstein
Signed-off-by: Jan Kara <jack@suse.cz>
---
 arch/alpha/kernel/syscalls/syscall.tbl      | 2 +-
 arch/arm/tools/syscall.tbl                  | 2 +-
 arch/arm64/include/asm/unistd32.h           | 3 +--
 arch/ia64/kernel/syscalls/syscall.tbl       | 2 +-
 arch/m68k/kernel/syscalls/syscall.tbl       | 2 +-
 arch/microblaze/kernel/syscalls/syscall.tbl | 2 +-
 arch/mips/kernel/syscalls/syscall_n32.tbl   | 2 +-
 arch/mips/kernel/syscalls/syscall_n64.tbl   | 2 +-
 arch/mips/kernel/syscalls/syscall_o32.tbl   | 2 +-
 arch/parisc/kernel/syscalls/syscall.tbl     | 2 +-
 arch/powerpc/kernel/syscalls/syscall.tbl    | 2 +-
 arch/s390/kernel/syscalls/syscall.tbl       | 2 +-
 arch/sh/kernel/syscalls/syscall.tbl         | 2 +-
 arch/sparc/kernel/syscalls/syscall.tbl      | 2 +-
 arch/x86/entry/syscalls/syscall_32.tbl      | 2 +-
 arch/x86/entry/syscalls/syscall_64.tbl      | 2 +-
 arch/xtensa/kernel/syscalls/syscall.tbl     | 2 +-
 17 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 5622578742fd..3000a2e8ee21 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -482,7 +482,7 @@
 550	common	process_madvise			sys_process_madvise
 551	common	epoll_pwait2			sys_epoll_pwait2
 552	common	mount_setattr			sys_mount_setattr
-553	common	quotactl_path			sys_quotactl_path
+# 553 reserved for quotactl_path
 554	common	landlock_create_ruleset		sys_landlock_create_ruleset
 555	common	landlock_add_rule		sys_landlock_add_rule
 556	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index c7679d7db98b..28e03b5fec00 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -456,7 +456,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-443	common	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 7859749d6628..5dab69d2c22b 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -893,8 +893,7 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise)
 __SYSCALL(__NR_epoll_pwait2, compat_sys_epoll_pwait2)
 #define __NR_mount_setattr 442
 __SYSCALL(__NR_mount_setattr, sys_mount_setattr)
-#define __NR_quotactl_path 443
-__SYSCALL(__NR_quotactl_path, sys_quotactl_path)
+/* 443 is reserved for quotactl_path */
 #define __NR_landlock_create_ruleset 444
 __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
 #define __NR_landlock_add_rule 445
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index 1ee8e736a48e..bb11fe4c875a 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -363,7 +363,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-443	common	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 0dd019dc2136..79c2d24c89dd 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -442,7 +442,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-443	common	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 2ac716984ca2..b11395a20c20 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -448,7 +448,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-443	common	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 5e0096657251..9220909526f9 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -381,7 +381,7 @@
 440	n32	process_madvise			sys_process_madvise
 441	n32	epoll_pwait2			compat_sys_epoll_pwait2
 442	n32	mount_setattr			sys_mount_setattr
-443	n32	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	n32	landlock_create_ruleset		sys_landlock_create_ruleset
 445	n32	landlock_add_rule		sys_landlock_add_rule
 446	n32	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index 9974f5f8e49b..9cd1c34f31b5 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -357,7 +357,7 @@
 440	n64	process_madvise			sys_process_madvise
 441	n64	epoll_pwait2			sys_epoll_pwait2
 442	n64	mount_setattr			sys_mount_setattr
-443	n64	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	n64	landlock_create_ruleset		sys_landlock_create_ruleset
 445	n64	landlock_add_rule		sys_landlock_add_rule
 446	n64	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 39d6e71e57b6..d560c467a8c6 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -430,7 +430,7 @@
 440	o32	process_madvise			sys_process_madvise
 441	o32	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
 442	o32	mount_setattr			sys_mount_setattr
-443	o32	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	o32	landlock_create_ruleset		sys_landlock_create_ruleset
 445	o32	landlock_add_rule		sys_landlock_add_rule
 446	o32	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 5ac80b83d745..aabc37f8cae3 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -440,7 +440,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-443	common	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 2e68fbb57cc6..8f052ff4058c 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -522,7 +522,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-443	common	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 7e4a2aba366d..0690263df1dd 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -445,7 +445,7 @@
 440  common	process_madvise		sys_process_madvise		sys_process_madvise
 441  common	epoll_pwait2		sys_epoll_pwait2		compat_sys_epoll_pwait2
 442  common	mount_setattr		sys_mount_setattr		sys_mount_setattr
-443  common	quotactl_path		sys_quotactl_path		sys_quotactl_path
+# 443 reserved for quotactl_path
 444  common	landlock_create_ruleset	sys_landlock_create_ruleset	sys_landlock_create_ruleset
 445  common	landlock_add_rule	sys_landlock_add_rule		sys_landlock_add_rule
 446  common	landlock_restrict_self	sys_landlock_restrict_self	sys_landlock_restrict_self
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index f47a0dc55445..0b91499ebdcf 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -445,7 +445,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-443	common	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index b9e1c0e735b7..e34cc30ef22c 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -488,7 +488,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-443	common	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 28a1423ce32e..4bbc267fb36b 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -447,7 +447,7 @@
 440	i386	process_madvise		sys_process_madvise
 441	i386	epoll_pwait2		sys_epoll_pwait2		compat_sys_epoll_pwait2
 442	i386	mount_setattr		sys_mount_setattr
-443	i386	quotactl_path		sys_quotactl_path
+# 443 reserved for quotactl_path
 444	i386	landlock_create_ruleset	sys_landlock_create_ruleset
 445	i386	landlock_add_rule	sys_landlock_add_rule
 446	i386	landlock_restrict_self	sys_landlock_restrict_self
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index ecd551b08d05..ce18119ea0d0 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -364,7 +364,7 @@
 440	common	process_madvise		sys_process_madvise
 441	common	epoll_pwait2		sys_epoll_pwait2
 442	common	mount_setattr		sys_mount_setattr
-443	common	quotactl_path		sys_quotactl_path
+# 443 reserved for quotactl_path
 444	common	landlock_create_ruleset	sys_landlock_create_ruleset
 445	common	landlock_add_rule	sys_landlock_add_rule
 446	common	landlock_restrict_self	sys_landlock_restrict_self
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 9d76d433d3d6..fd2f30227d96 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -413,7 +413,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-443	common	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
-- 
cgit v1.2.3


From 7a274727702cc07d27cdebd36d1d5132abeea12f Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 17 May 2021 12:43:34 +0100
Subject: io_uring: don't modify req->poll for rw

__io_queue_proc() is used by both poll and apoll, so we should not
access req->poll directly but selecting right struct io_poll_iocb
depending on use case.

Reported-and-tested-by: syzbot+a84b8783366ecb1c65d0@syzkaller.appspotmail.com
Fixes: ea6a693d862d ("io_uring: disable multishot poll for double poll add cases")
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/4a6a1de31142d8e0250fe2dfd4c8923d82a5bbfc.1621251795.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index e481ac8a757a..89ec10471b30 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5019,10 +5019,10 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
 		 * Can't handle multishot for double wait for now, turn it
 		 * into one-shot mode.
 		 */
-		if (!(req->poll.events & EPOLLONESHOT))
-			req->poll.events |= EPOLLONESHOT;
+		if (!(poll_one->events & EPOLLONESHOT))
+			poll_one->events |= EPOLLONESHOT;
 		/* double add on the same waitqueue head, ignore */
-		if (poll->head == head)
+		if (poll_one->head == head)
 			return;
 		poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
 		if (!poll) {
-- 
cgit v1.2.3


From 6416954ca75baed71640bf3828625bf165fb9b5e Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Fri, 14 May 2021 10:03:40 +0100
Subject: btrfs: release path before starting transaction when cloning inline
 extent

When cloning an inline extent there are a few cases, such as when we have
an implicit hole at file offset 0, where we start a transaction while
holding a read lock on a leaf. Starting the transaction results in a call
to sb_start_intwrite(), which results in doing a read lock on a percpu
semaphore. Lockdep doesn't like this and complains about it:

  [46.580704] ======================================================
  [46.580752] WARNING: possible circular locking dependency detected
  [46.580799] 5.13.0-rc1 #28 Not tainted
  [46.580832] ------------------------------------------------------
  [46.580877] cloner/3835 is trying to acquire lock:
  [46.580918] c00000001301d638 (sb_internal#2){.+.+}-{0:0}, at: clone_copy_inline_extent+0xe4/0x5a0
  [46.581167]
  [46.581167] but task is already holding lock:
  [46.581217] c000000007fa2550 (btrfs-tree-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x70/0x1d0
  [46.581293]
  [46.581293] which lock already depends on the new lock.
  [46.581293]
  [46.581351]
  [46.581351] the existing dependency chain (in reverse order) is:
  [46.581410]
  [46.581410] -> #1 (btrfs-tree-00){++++}-{3:3}:
  [46.581464]        down_read_nested+0x68/0x200
  [46.581536]        __btrfs_tree_read_lock+0x70/0x1d0
  [46.581577]        btrfs_read_lock_root_node+0x88/0x200
  [46.581623]        btrfs_search_slot+0x298/0xb70
  [46.581665]        btrfs_set_inode_index+0xfc/0x260
  [46.581708]        btrfs_new_inode+0x26c/0x950
  [46.581749]        btrfs_create+0xf4/0x2b0
  [46.581782]        lookup_open.isra.57+0x55c/0x6a0
  [46.581855]        path_openat+0x418/0xd20
  [46.581888]        do_filp_open+0x9c/0x130
  [46.581920]        do_sys_openat2+0x2ec/0x430
  [46.581961]        do_sys_open+0x90/0xc0
  [46.581993]        system_call_exception+0x3d4/0x410
  [46.582037]        system_call_common+0xec/0x278
  [46.582078]
  [46.582078] -> #0 (sb_internal#2){.+.+}-{0:0}:
  [46.582135]        __lock_acquire+0x1e90/0x2c50
  [46.582176]        lock_acquire+0x2b4/0x5b0
  [46.582263]        start_transaction+0x3cc/0x950
  [46.582308]        clone_copy_inline_extent+0xe4/0x5a0
  [46.582353]        btrfs_clone+0x5fc/0x880
  [46.582388]        btrfs_clone_files+0xd8/0x1c0
  [46.582434]        btrfs_remap_file_range+0x3d8/0x590
  [46.582481]        do_clone_file_range+0x10c/0x270
  [46.582558]        vfs_clone_file_range+0x1b0/0x310
  [46.582605]        ioctl_file_clone+0x90/0x130
  [46.582651]        do_vfs_ioctl+0x874/0x1ac0
  [46.582697]        sys_ioctl+0x6c/0x120
  [46.582733]        system_call_exception+0x3d4/0x410
  [46.582777]        system_call_common+0xec/0x278
  [46.582822]
  [46.582822] other info that might help us debug this:
  [46.582822]
  [46.582888]  Possible unsafe locking scenario:
  [46.582888]
  [46.582942]        CPU0                    CPU1
  [46.582984]        ----                    ----
  [46.583028]   lock(btrfs-tree-00);
  [46.583062]                                lock(sb_internal#2);
  [46.583119]                                lock(btrfs-tree-00);
  [46.583174]   lock(sb_internal#2);
  [46.583212]
  [46.583212]  *** DEADLOCK ***
  [46.583212]
  [46.583266] 6 locks held by cloner/3835:
  [46.583299]  #0: c00000001301d448 (sb_writers#12){.+.+}-{0:0}, at: ioctl_file_clone+0x90/0x130
  [46.583382]  #1: c00000000f6d3768 (&sb->s_type->i_mutex_key#15){+.+.}-{3:3}, at: lock_two_nondirectories+0x58/0xc0
  [46.583477]  #2: c00000000f6d72a8 (&sb->s_type->i_mutex_key#15/4){+.+.}-{3:3}, at: lock_two_nondirectories+0x9c/0xc0
  [46.583574]  #3: c00000000f6d7138 (&ei->i_mmap_lock){+.+.}-{3:3}, at: btrfs_remap_file_range+0xd0/0x590
  [46.583657]  #4: c00000000f6d35f8 (&ei->i_mmap_lock/1){+.+.}-{3:3}, at: btrfs_remap_file_range+0xe0/0x590
  [46.583743]  #5: c000000007fa2550 (btrfs-tree-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x70/0x1d0
  [46.583828]
  [46.583828] stack backtrace:
  [46.583872] CPU: 1 PID: 3835 Comm: cloner Not tainted 5.13.0-rc1 #28
  [46.583931] Call Trace:
  [46.583955] [c0000000167c7200] [c000000000c1ee78] dump_stack+0xec/0x144 (unreliable)
  [46.584052] [c0000000167c7240] [c000000000274058] print_circular_bug.isra.32+0x3a8/0x400
  [46.584123] [c0000000167c72e0] [c0000000002741f4] check_noncircular+0x144/0x190
  [46.584191] [c0000000167c73b0] [c000000000278fc0] __lock_acquire+0x1e90/0x2c50
  [46.584259] [c0000000167c74f0] [c00000000027aa94] lock_acquire+0x2b4/0x5b0
  [46.584317] [c0000000167c75e0] [c000000000a0d6cc] start_transaction+0x3cc/0x950
  [46.584388] [c0000000167c7690] [c000000000af47a4] clone_copy_inline_extent+0xe4/0x5a0
  [46.584457] [c0000000167c77c0] [c000000000af525c] btrfs_clone+0x5fc/0x880
  [46.584514] [c0000000167c7990] [c000000000af5698] btrfs_clone_files+0xd8/0x1c0
  [46.584583] [c0000000167c7a00] [c000000000af5b58] btrfs_remap_file_range+0x3d8/0x590
  [46.584652] [c0000000167c7ae0] [c0000000005d81dc] do_clone_file_range+0x10c/0x270
  [46.584722] [c0000000167c7b40] [c0000000005d84f0] vfs_clone_file_range+0x1b0/0x310
  [46.584793] [c0000000167c7bb0] [c00000000058bf80] ioctl_file_clone+0x90/0x130
  [46.584861] [c0000000167c7c10] [c00000000058c894] do_vfs_ioctl+0x874/0x1ac0
  [46.584922] [c0000000167c7d10] [c00000000058db4c] sys_ioctl+0x6c/0x120
  [46.584978] [c0000000167c7d60] [c0000000000364a4] system_call_exception+0x3d4/0x410
  [46.585046] [c0000000167c7e10] [c00000000000d45c] system_call_common+0xec/0x278
  [46.585114] --- interrupt: c00 at 0x7ffff7e22990
  [46.585160] NIP:  00007ffff7e22990 LR: 00000001000010ec CTR: 0000000000000000
  [46.585224] REGS: c0000000167c7e80 TRAP: 0c00   Not tainted  (5.13.0-rc1)
  [46.585280] MSR:  800000000280f033 <SF,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE>  CR: 28000244  XER: 00000000
  [46.585374] IRQMASK: 0
  [46.585374] GPR00: 0000000000000036 00007fffffffdec0 00007ffff7f17100 0000000000000004
  [46.585374] GPR04: 000000008020940d 00007fffffffdf40 0000000000000000 0000000000000000
  [46.585374] GPR08: 0000000000000004 0000000000000000 0000000000000000 0000000000000000
  [46.585374] GPR12: 0000000000000000 00007ffff7ffa940 0000000000000000 0000000000000000
  [46.585374] GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
  [46.585374] GPR20: 0000000000000000 000000009123683e 00007fffffffdf40 0000000000000000
  [46.585374] GPR24: 0000000000000000 0000000000000000 0000000000000000 0000000000000004
  [46.585374] GPR28: 0000000100030260 0000000100030280 0000000000000003 000000000000005f
  [46.585919] NIP [00007ffff7e22990] 0x7ffff7e22990
  [46.585964] LR [00000001000010ec] 0x1000010ec
  [46.586010] --- interrupt: c00

This should be a false positive, as both locks are acquired in read mode.
Nevertheless, we don't need to hold a leaf locked when we start the
transaction, so just release the leaf (path) before starting it.

Reported-by: Ritesh Harjani <riteshh@linux.ibm.com>
Link: https://lore.kernel.org/linux-btrfs/20210513214404.xks77p566fglzgum@riteshh-domain/
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/reflink.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index f4ec06b53aa0..06682128d8fa 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -285,6 +285,11 @@ copy_inline_extent:
 	ret = btrfs_inode_set_file_extent_range(BTRFS_I(dst), 0, aligned_end);
 out:
 	if (!ret && !trans) {
+		/*
+		 * Release path before starting a new transaction so we don't
+		 * hold locks that would confuse lockdep.
+		 */
+		btrfs_release_path(path);
 		/*
 		 * No transaction here means we copied the inline extent into a
 		 * page of the destination inode.
-- 
cgit v1.2.3


From 91df99a6eb50d5a1bc70fff4a09a0b7ae6aab96d Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Fri, 14 May 2021 10:56:16 -0400
Subject: btrfs: do not BUG_ON in link_to_fixup_dir

While doing error injection testing I got the following panic

  kernel BUG at fs/btrfs/tree-log.c:1862!
  invalid opcode: 0000 [#1] SMP NOPTI
  CPU: 1 PID: 7836 Comm: mount Not tainted 5.13.0-rc1+ #305
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014
  RIP: 0010:link_to_fixup_dir+0xd5/0xe0
  RSP: 0018:ffffb5800180fa30 EFLAGS: 00010216
  RAX: fffffffffffffffb RBX: 00000000fffffffb RCX: ffff8f595287faf0
  RDX: ffffb5800180fa37 RSI: ffff8f5954978800 RDI: 0000000000000000
  RBP: ffff8f5953af9450 R08: 0000000000000019 R09: 0000000000000001
  R10: 000151f408682970 R11: 0000000120021001 R12: ffff8f5954978800
  R13: ffff8f595287faf0 R14: ffff8f5953c77dd0 R15: 0000000000000065
  FS:  00007fc5284c8c40(0000) GS:ffff8f59bbd00000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007fc5287f47c0 CR3: 000000011275e002 CR4: 0000000000370ee0
  Call Trace:
   replay_one_buffer+0x409/0x470
   ? btree_read_extent_buffer_pages+0xd0/0x110
   walk_up_log_tree+0x157/0x1e0
   walk_log_tree+0xa6/0x1d0
   btrfs_recover_log_trees+0x1da/0x360
   ? replay_one_extent+0x7b0/0x7b0
   open_ctree+0x1486/0x1720
   btrfs_mount_root.cold+0x12/0xea
   ? __kmalloc_track_caller+0x12f/0x240
   legacy_get_tree+0x24/0x40
   vfs_get_tree+0x22/0xb0
   vfs_kern_mount.part.0+0x71/0xb0
   btrfs_mount+0x10d/0x380
   ? vfs_parse_fs_string+0x4d/0x90
   legacy_get_tree+0x24/0x40
   vfs_get_tree+0x22/0xb0
   path_mount+0x433/0xa10
   __x64_sys_mount+0xe3/0x120
   do_syscall_64+0x3d/0x80
   entry_SYSCALL_64_after_hwframe+0x44/0xae

We can get -EIO or any number of legitimate errors from
btrfs_search_slot(), panicing here is not the appropriate response.  The
error path for this code handles errors properly, simply return the
error.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index fd6b1f13112e..c17d6b827b42 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1858,8 +1858,6 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
 		ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
 	} else if (ret == -EEXIST) {
 		ret = 0;
-	} else {
-		BUG(); /* Logic Error */
 	}
 	iput(inode);
 
-- 
cgit v1.2.3


From b433d090ac63eae4d3182cfc274dbacb0c4ee0ec Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@baylibre.com>
Date: Tue, 11 May 2021 12:00:54 -0700
Subject: MAINTAINERS: ARM/Amlogic SoCs: add Neil as primary maintainer

Add Neil as primary maintainer for the Amlogic family of Arm SoCs.  I
will now act as co-maintainer.

Neil is already doing lots of the reviewing, testing and behind the
scenes support for users of the upstream kernel on these SoCs, so this
is just to formalize the current state of affairs.

Thanks Neil for all of your efforts, and keep up the great work!

Signed-off-by: Kevin Hilman <khilman@baylibre.com>
Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Link: https://lore.kernel.org/r/20210511190054.26300-1-khilman@baylibre.com'
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 1162b0917630..a75eb1514957 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1622,8 +1622,8 @@ F:	Documentation/devicetree/bindings/sound/amlogic*
 F:	sound/soc/meson/
 
 ARM/Amlogic Meson SoC support
+M:	Neil Armstrong <narmstrong@baylibre.com>
 M:	Kevin Hilman <khilman@baylibre.com>
-R:	Neil Armstrong <narmstrong@baylibre.com>
 R:	Jerome Brunet <jbrunet@baylibre.com>
 R:	Martin Blumenstingl <martin.blumenstingl@googlemail.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-- 
cgit v1.2.3


From 6863b4d7bf19a54e23fc5838b7e66d954444289d Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@nvidia.com>
Date: Tue, 11 May 2021 08:48:27 +0300
Subject: RDMA/mlx5: Verify that DM operation is reasonable

Fix the complaint from smatch by verifing that the user requested DM
operation is not greater than 31.

divers/infiniband/hw/mlx5/dm.c:220 mlx5_ib_handler_MLX5_IB_METHOD_DM_MAP_OP_ADDR()
error: undefined (user controlled) shift '(((1))) << op'

Fixes: cea85fa5dbc2 ("RDMA/mlx5: Add support in MEMIC operations")
Link: https://lore.kernel.org/r/458b1d7710c3cf01360c8771893f483665569786.1620711734.git.leonro@nvidia.com
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/mlx5/dm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/infiniband/hw/mlx5/dm.c b/drivers/infiniband/hw/mlx5/dm.c
index 094bf85589db..001d766cf291 100644
--- a/drivers/infiniband/hw/mlx5/dm.c
+++ b/drivers/infiniband/hw/mlx5/dm.c
@@ -217,6 +217,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DM_MAP_OP_ADDR)(
 	if (err)
 		return err;
 
+	if (op >= BITS_PER_TYPE(u32))
+		return -EOPNOTSUPP;
+
 	if (!(MLX5_CAP_DEV_MEM(dev->mdev, memic_operations) & BIT(op)))
 		return -EOPNOTSUPP;
 
-- 
cgit v1.2.3


From 97f30d324ce6645a4de4ffb71e4ae9b8ca36ff04 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@nvidia.com>
Date: Tue, 11 May 2021 08:48:29 +0300
Subject: RDMA/mlx5: Recover from fatal event in dual port mode

When there is fatal event on the slave port, the device is marked as not
active. We need to mark it as active again when the slave is recovered to
regain full functionality.

Fixes: d69a24e03659 ("IB/mlx5: Move IB event processing onto a workqueue")
Link: https://lore.kernel.org/r/8906754455bb23019ef223c725d2c0d38acfb80b.1620711734.git.leonro@nvidia.com
Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/mlx5/main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 6d1dd09a4388..644d5d0ac544 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -4419,6 +4419,7 @@ static int mlx5r_mp_probe(struct auxiliary_device *adev,
 
 		if (bound) {
 			rdma_roce_rescan_device(&dev->ib_dev);
+			mpi->ibdev->ib_active = true;
 			break;
 		}
 	}
-- 
cgit v1.2.3


From dc07628bd2bbc1da768e265192c28ebd301f509d Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Tue, 11 May 2021 08:48:31 +0300
Subject: RDMA/rxe: Return CQE error if invalid lkey was supplied

RXE is missing update of WQE status in LOCAL_WRITE failures.  This caused
the following kernel panic if someone sent an atomic operation with an
explicitly wrong lkey.

[leonro@vm ~]$ mkt test
test_atomic_invalid_lkey (tests.test_atomic.AtomicTest) ...
 WARNING: CPU: 5 PID: 263 at drivers/infiniband/sw/rxe/rxe_comp.c:740 rxe_completer+0x1a6d/0x2e30 [rdma_rxe]
 Modules linked in: crc32_generic rdma_rxe ip6_udp_tunnel udp_tunnel rdma_ucm rdma_cm ib_umad ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core mlx5_core ptp pps_core
 CPU: 5 PID: 263 Comm: python3 Not tainted 5.13.0-rc1+ #2936
 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
 RIP: 0010:rxe_completer+0x1a6d/0x2e30 [rdma_rxe]
 Code: 03 0f 8e 65 0e 00 00 3b 93 10 06 00 00 0f 84 82 0a 00 00 4c 89 ff 4c 89 44 24 38 e8 2d 74 a9 e1 4c 8b 44 24 38 e9 1c f5 ff ff <0f> 0b e9 0c e8 ff ff b8 05 00 00 00 41 bf 05 00 00 00 e9 ab e7 ff
 RSP: 0018:ffff8880158af090 EFLAGS: 00010246
 RAX: 0000000000000000 RBX: ffff888016a78000 RCX: ffffffffa0cf1652
 RDX: 1ffff9200004b442 RSI: 0000000000000004 RDI: ffffc9000025a210
 RBP: dffffc0000000000 R08: 00000000ffffffea R09: ffff88801617740b
 R10: ffffed1002c2ee81 R11: 0000000000000007 R12: ffff88800f3b63e8
 R13: ffff888016a78008 R14: ffffc9000025a180 R15: 000000000000000c
 FS:  00007f88b622a740(0000) GS:ffff88806d540000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007f88b5a1fa10 CR3: 000000000d848004 CR4: 0000000000370ea0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
 Call Trace:
  rxe_do_task+0x130/0x230 [rdma_rxe]
  rxe_rcv+0xb11/0x1df0 [rdma_rxe]
  rxe_loopback+0x157/0x1e0 [rdma_rxe]
  rxe_responder+0x5532/0x7620 [rdma_rxe]
  rxe_do_task+0x130/0x230 [rdma_rxe]
  rxe_rcv+0x9c8/0x1df0 [rdma_rxe]
  rxe_loopback+0x157/0x1e0 [rdma_rxe]
  rxe_requester+0x1efd/0x58c0 [rdma_rxe]
  rxe_do_task+0x130/0x230 [rdma_rxe]
  rxe_post_send+0x998/0x1860 [rdma_rxe]
  ib_uverbs_post_send+0xd5f/0x1220 [ib_uverbs]
  ib_uverbs_write+0x847/0xc80 [ib_uverbs]
  vfs_write+0x1c5/0x840
  ksys_write+0x176/0x1d0
  do_syscall_64+0x3f/0x80
  entry_SYSCALL_64_after_hwframe+0x44/0xae

Fixes: 8700e3e7c485 ("Soft RoCE driver")
Link: https://lore.kernel.org/r/11e7b553f3a6f5371c6bb3f57c494bb52b88af99.1620711734.git.leonro@nvidia.com
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Acked-by: Zhu Yanjun <zyjzyj2000@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_comp.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 2af26737d32d..a6712e373eed 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -346,13 +346,15 @@ static inline enum comp_state do_read(struct rxe_qp *qp,
 	ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE,
 			&wqe->dma, payload_addr(pkt),
 			payload_size(pkt), to_mr_obj, NULL);
-	if (ret)
+	if (ret) {
+		wqe->status = IB_WC_LOC_PROT_ERR;
 		return COMPST_ERROR;
+	}
 
 	if (wqe->dma.resid == 0 && (pkt->mask & RXE_END_MASK))
 		return COMPST_COMP_ACK;
-	else
-		return COMPST_UPDATE_COMP;
+
+	return COMPST_UPDATE_COMP;
 }
 
 static inline enum comp_state do_atomic(struct rxe_qp *qp,
@@ -366,10 +368,12 @@ static inline enum comp_state do_atomic(struct rxe_qp *qp,
 	ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE,
 			&wqe->dma, &atomic_orig,
 			sizeof(u64), to_mr_obj, NULL);
-	if (ret)
+	if (ret) {
+		wqe->status = IB_WC_LOC_PROT_ERR;
 		return COMPST_ERROR;
-	else
-		return COMPST_COMP_ACK;
+	}
+
+	return COMPST_COMP_ACK;
 }
 
 static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
-- 
cgit v1.2.3


From b7df21cf1b79ab7026f545e7bf837bd5750ac026 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 8 May 2021 03:57:03 +0800
Subject: tipc: skb_linearize the head skb when reassembling msgs

It's not a good idea to append the frag skb to a skb's frag_list if
the frag_list already has skbs from elsewhere, such as this skb was
created by pskb_copy() where the frag_list was cloned (all the skbs
in it were skb_get'ed) and shared by multiple skbs.

However, the new appended frag skb should have been only seen by the
current skb. Otherwise, it will cause use after free crashes as this
appended frag skb are seen by multiple skbs but it only got skb_get
called once.

The same thing happens with a skb updated by pskb_may_pull() with a
skb_cloned skb. Li Shuang has reported quite a few crashes caused
by this when doing testing over macvlan devices:

  [] kernel BUG at net/core/skbuff.c:1970!
  [] Call Trace:
  []  skb_clone+0x4d/0xb0
  []  macvlan_broadcast+0xd8/0x160 [macvlan]
  []  macvlan_process_broadcast+0x148/0x150 [macvlan]
  []  process_one_work+0x1a7/0x360
  []  worker_thread+0x30/0x390

  [] kernel BUG at mm/usercopy.c:102!
  [] Call Trace:
  []  __check_heap_object+0xd3/0x100
  []  __check_object_size+0xff/0x16b
  []  simple_copy_to_iter+0x1c/0x30
  []  __skb_datagram_iter+0x7d/0x310
  []  __skb_datagram_iter+0x2a5/0x310
  []  skb_copy_datagram_iter+0x3b/0x90
  []  tipc_recvmsg+0x14a/0x3a0 [tipc]
  []  ____sys_recvmsg+0x91/0x150
  []  ___sys_recvmsg+0x7b/0xc0

  [] kernel BUG at mm/slub.c:305!
  [] Call Trace:
  []  <IRQ>
  []  kmem_cache_free+0x3ff/0x400
  []  __netif_receive_skb_core+0x12c/0xc40
  []  ? kmem_cache_alloc+0x12e/0x270
  []  netif_receive_skb_internal+0x3d/0xb0
  []  ? get_rx_page_info+0x8e/0xa0 [be2net]
  []  be_poll+0x6ef/0xd00 [be2net]
  []  ? irq_exit+0x4f/0x100
  []  net_rx_action+0x149/0x3b0

  ...

This patch is to fix it by linearizing the head skb if it has frag_list
set in tipc_buf_append(). Note that we choose to do this before calling
skb_unshare(), as __skb_linearize() will avoid skb_copy(). Also, we can
not just drop the frag_list either as the early time.

Fixes: 45c8b7b175ce ("tipc: allow non-linear first fragment buffer")
Reported-by: Li Shuang <shuali@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/msg.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 3f0a25345a7c..ce6ab54822d8 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -149,18 +149,13 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
 		if (unlikely(head))
 			goto err;
 		*buf = NULL;
+		if (skb_has_frag_list(frag) && __skb_linearize(frag))
+			goto err;
 		frag = skb_unshare(frag, GFP_ATOMIC);
 		if (unlikely(!frag))
 			goto err;
 		head = *headbuf = frag;
 		TIPC_SKB_CB(head)->tail = NULL;
-		if (skb_is_nonlinear(head)) {
-			skb_walk_frags(head, tail) {
-				TIPC_SKB_CB(head)->tail = tail;
-			}
-		} else {
-			skb_frag_list_init(head);
-		}
 		return 0;
 	}
 
-- 
cgit v1.2.3


From 31db0dbd72444abe645d90c20ecb84d668f5af5e Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 14 May 2021 17:24:48 +0300
Subject: net: hso: check for allocation failure in
 hso_create_bulk_serial_device()

In current kernels, small allocations never actually fail so this
patch shouldn't affect runtime.

Originally this error handling code written with the idea that if
the "serial->tiocmget" allocation failed, then we would continue
operating instead of bailing out early.  But in later years we added
an unchecked dereference on the next line.

	serial->tiocmget->serial_state_notification = kzalloc();
        ^^^^^^^^^^^^^^^^^^

Since these allocations are never going fail in real life, this is
mostly a philosophical debate, but I think bailing out early is the
correct behavior that the user would want.  And generally it's safer to
bail as soon an error happens.

Fixes: af0de1303c4e ("usb: hso: obey DMA rules in tiocmget")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Johan Hovold <johan@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/hso.c | 37 ++++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index 3ef4b2841402..260f850d69eb 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -2618,29 +2618,28 @@ static struct hso_device *hso_create_bulk_serial_device(
 		num_urbs = 2;
 		serial->tiocmget = kzalloc(sizeof(struct hso_tiocmget),
 					   GFP_KERNEL);
+		if (!serial->tiocmget)
+			goto exit;
 		serial->tiocmget->serial_state_notification
 			= kzalloc(sizeof(struct hso_serial_state_notification),
 					   GFP_KERNEL);
-		/* it isn't going to break our heart if serial->tiocmget
-		 *  allocation fails don't bother checking this.
-		 */
-		if (serial->tiocmget && serial->tiocmget->serial_state_notification) {
-			tiocmget = serial->tiocmget;
-			tiocmget->endp = hso_get_ep(interface,
-						    USB_ENDPOINT_XFER_INT,
-						    USB_DIR_IN);
-			if (!tiocmget->endp) {
-				dev_err(&interface->dev, "Failed to find INT IN ep\n");
-				goto exit;
-			}
-
-			tiocmget->urb = usb_alloc_urb(0, GFP_KERNEL);
-			if (tiocmget->urb) {
-				mutex_init(&tiocmget->mutex);
-				init_waitqueue_head(&tiocmget->waitq);
-			} else
-				hso_free_tiomget(serial);
+		if (!serial->tiocmget->serial_state_notification)
+			goto exit;
+		tiocmget = serial->tiocmget;
+		tiocmget->endp = hso_get_ep(interface,
+					    USB_ENDPOINT_XFER_INT,
+					    USB_DIR_IN);
+		if (!tiocmget->endp) {
+			dev_err(&interface->dev, "Failed to find INT IN ep\n");
+			goto exit;
 		}
+
+		tiocmget->urb = usb_alloc_urb(0, GFP_KERNEL);
+		if (tiocmget->urb) {
+			mutex_init(&tiocmget->mutex);
+			init_waitqueue_head(&tiocmget->waitq);
+		} else
+			hso_free_tiomget(serial);
 	}
 	else
 		num_urbs = 1;
-- 
cgit v1.2.3


From e0652f8bb44d6294eeeac06d703185357f25d50b Mon Sep 17 00:00:00 2001
From: Dongliang Mu <mudongliangabcd@gmail.com>
Date: Sat, 15 May 2021 07:29:06 +0800
Subject: NFC: nci: fix memory leak in nci_allocate_device

nfcmrvl_disconnect fails to free the hci_dev field in struct nci_dev.
Fix this by freeing hci_dev in nci_free_device.

BUG: memory leak
unreferenced object 0xffff888111ea6800 (size 1024):
  comm "kworker/1:0", pid 19, jiffies 4294942308 (age 13.580s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 60 fd 0c 81 88 ff ff  .........`......
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<000000004bc25d43>] kmalloc include/linux/slab.h:552 [inline]
    [<000000004bc25d43>] kzalloc include/linux/slab.h:682 [inline]
    [<000000004bc25d43>] nci_hci_allocate+0x21/0xd0 net/nfc/nci/hci.c:784
    [<00000000c59cff92>] nci_allocate_device net/nfc/nci/core.c:1170 [inline]
    [<00000000c59cff92>] nci_allocate_device+0x10b/0x160 net/nfc/nci/core.c:1132
    [<00000000006e0a8e>] nfcmrvl_nci_register_dev+0x10a/0x1c0 drivers/nfc/nfcmrvl/main.c:153
    [<000000004da1b57e>] nfcmrvl_probe+0x223/0x290 drivers/nfc/nfcmrvl/usb.c:345
    [<00000000d506aed9>] usb_probe_interface+0x177/0x370 drivers/usb/core/driver.c:396
    [<00000000bc632c92>] really_probe+0x159/0x4a0 drivers/base/dd.c:554
    [<00000000f5009125>] driver_probe_device+0x84/0x100 drivers/base/dd.c:740
    [<000000000ce658ca>] __device_attach_driver+0xee/0x110 drivers/base/dd.c:846
    [<000000007067d05f>] bus_for_each_drv+0xb7/0x100 drivers/base/bus.c:431
    [<00000000f8e13372>] __device_attach+0x122/0x250 drivers/base/dd.c:914
    [<000000009cf68860>] bus_probe_device+0xc6/0xe0 drivers/base/bus.c:491
    [<00000000359c965a>] device_add+0x5be/0xc30 drivers/base/core.c:3109
    [<00000000086e4bd3>] usb_set_configuration+0x9d9/0xb90 drivers/usb/core/message.c:2164
    [<00000000ca036872>] usb_generic_driver_probe+0x8c/0xc0 drivers/usb/core/generic.c:238
    [<00000000d40d36f6>] usb_probe_device+0x5c/0x140 drivers/usb/core/driver.c:293
    [<00000000bc632c92>] really_probe+0x159/0x4a0 drivers/base/dd.c:554

Reported-by: syzbot+19bcfc64a8df1318d1c3@syzkaller.appspotmail.com
Fixes: 11f54f228643 ("NFC: nci: Add HCI over NCI protocol support")
Signed-off-by: Dongliang Mu <mudongliangabcd@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/nfc/nci_core.h | 1 +
 net/nfc/nci/core.c         | 1 +
 net/nfc/nci/hci.c          | 5 +++++
 3 files changed, 7 insertions(+)

diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index bd76e8e082c0..1df0f8074c9d 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -298,6 +298,7 @@ int nci_nfcc_loopback(struct nci_dev *ndev, void *data, size_t data_len,
 		      struct sk_buff **resp);
 
 struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev);
+void nci_hci_deallocate(struct nci_dev *ndev);
 int nci_hci_send_event(struct nci_dev *ndev, u8 gate, u8 event,
 		       const u8 *param, size_t param_len);
 int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate,
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 9a585332ea84..da7fe9db1b00 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -1191,6 +1191,7 @@ EXPORT_SYMBOL(nci_allocate_device);
 void nci_free_device(struct nci_dev *ndev)
 {
 	nfc_free_device(ndev->nfc_dev);
+	nci_hci_deallocate(ndev);
 	kfree(ndev);
 }
 EXPORT_SYMBOL(nci_free_device);
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index 6b275a387a92..96865142104f 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -792,3 +792,8 @@ struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev)
 
 	return hdev;
 }
+
+void nci_hci_deallocate(struct nci_dev *ndev)
+{
+	kfree(ndev->hci_dev);
+}
-- 
cgit v1.2.3


From 28c66b6da4087b8cfe81c2ec0a46eb6116dafda9 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Sat, 15 May 2021 15:16:05 +0800
Subject: net: bnx2: Fix error return code in bnx2_init_board()

Fix to return -EPERM from the error handling case instead of 0, as done
elsewhere in this function.

Fixes: b6016b767397 ("[BNX2]: New Broadcom gigabit network driver.")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Reviewed-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index c0986096c701..5bace8a93d73 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -8247,9 +8247,9 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
 		BNX2_WR(bp, PCI_COMMAND, reg);
 	} else if ((BNX2_CHIP_ID(bp) == BNX2_CHIP_ID_5706_A1) &&
 		!(bp->flags & BNX2_FLAG_PCIX)) {
-
 		dev_err(&pdev->dev,
 			"5706 A1 can only be used in a PCIX bus, aborting\n");
+		rc = -EPERM;
 		goto err_out_unmap;
 	}
 
-- 
cgit v1.2.3


From ab21494be9dc7d62736c5fcd06be65d49df713ee Mon Sep 17 00:00:00 2001
From: Andy Gospodarek <gospo@broadcom.com>
Date: Sat, 15 May 2021 03:25:18 -0400
Subject: bnxt_en: Include new P5 HV definition in VF check.

Otherwise, some of the recently added HyperV VF IDs would not be
recognized as VF devices and they would not initialize properly.

Fixes: 7fbf359bb2c1 ("bnxt_en: Add PCI IDs for Hyper-V VF devices.")
Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Andy Gospodarek <gospo@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 46be4046ee51..4e57041b4775 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -282,7 +282,8 @@ static bool bnxt_vf_pciid(enum board_idx idx)
 {
 	return (idx == NETXTREME_C_VF || idx == NETXTREME_E_VF ||
 		idx == NETXTREME_S_VF || idx == NETXTREME_C_VF_HV ||
-		idx == NETXTREME_E_VF_HV || idx == NETXTREME_E_P5_VF);
+		idx == NETXTREME_E_VF_HV || idx == NETXTREME_E_P5_VF ||
+		idx == NETXTREME_E_P5_VF_HV);
 }
 
 #define DB_CP_REARM_FLAGS	(DB_KEY_CP | DB_IDX_VALID)
-- 
cgit v1.2.3


From 702279d2ce4650000bb6302013630304e359dc13 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sat, 15 May 2021 03:25:19 -0400
Subject: bnxt_en: Fix context memory setup for 64K page size.

There was a typo in the code that checks for 64K BNXT_PAGE_SHIFT in
bnxt_hwrm_set_pg_attr().  Fix it and make the code more understandable
with a new macro BNXT_SET_CTX_PAGE_ATTR().

Fixes: 1b9394e5a2ad ("bnxt_en: Configure context memory on new devices.")
Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c |  9 +--------
 drivers/net/ethernet/broadcom/bnxt/bnxt.h | 10 ++++++++++
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 4e57041b4775..fcc729d52b17 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -6933,17 +6933,10 @@ ctx_err:
 static void bnxt_hwrm_set_pg_attr(struct bnxt_ring_mem_info *rmem, u8 *pg_attr,
 				  __le64 *pg_dir)
 {
-	u8 pg_size = 0;
-
 	if (!rmem->nr_pages)
 		return;
 
-	if (BNXT_PAGE_SHIFT == 13)
-		pg_size = 1 << 4;
-	else if (BNXT_PAGE_SIZE == 16)
-		pg_size = 2 << 4;
-
-	*pg_attr = pg_size;
+	BNXT_SET_CTX_PAGE_ATTR(*pg_attr);
 	if (rmem->depth >= 1) {
 		if (rmem->depth == 2)
 			*pg_attr |= 2;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 98e0cef4532c..30e47ea343f9 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1457,6 +1457,16 @@ struct bnxt_ctx_pg_info {
 
 #define BNXT_BACKING_STORE_CFG_LEGACY_LEN	256
 
+#define BNXT_SET_CTX_PAGE_ATTR(attr)					\
+do {									\
+	if (BNXT_PAGE_SIZE == 0x2000)					\
+		attr = FUNC_BACKING_STORE_CFG_REQ_SRQ_PG_SIZE_PG_8K;	\
+	else if (BNXT_PAGE_SIZE == 0x10000)				\
+		attr = FUNC_BACKING_STORE_CFG_REQ_QPC_PG_SIZE_PG_64K;	\
+	else								\
+		attr = FUNC_BACKING_STORE_CFG_REQ_QPC_PG_SIZE_PG_4K;	\
+} while (0)
+
 struct bnxt_ctx_mem_info {
 	u32	qp_max_entries;
 	u16	qp_min_qp1_entries;
-- 
cgit v1.2.3


From 9f6f852550d0e1b7735651228116ae9d300f69b3 Mon Sep 17 00:00:00 2001
From: Zheyu Ma <zheyuma97@gmail.com>
Date: Sun, 16 May 2021 07:11:40 +0000
Subject: isdn: mISDN: netjet: Fix crash in nj_probe:

'nj_setup' in netjet.c might fail with -EIO and in this case
'card->irq' is initialized and is bigger than zero. A subsequent call to
'nj_release' will free the irq that has not been requested.

Fix this bug by deleting the previous assignment to 'card->irq' and just
keep the assignment before 'request_irq'.

The KASAN's log reveals it:

[    3.354615 ] WARNING: CPU: 0 PID: 1 at kernel/irq/manage.c:1826
free_irq+0x100/0x480
[    3.355112 ] Modules linked in:
[    3.355310 ] CPU: 0 PID: 1 Comm: swapper/0 Not tainted
5.13.0-rc1-00144-g25a1298726e #13
[    3.355816 ] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014
[    3.356552 ] RIP: 0010:free_irq+0x100/0x480
[    3.356820 ] Code: 6e 08 74 6f 4d 89 f4 e8 5e ac 09 00 4d 8b 74 24 18
4d 85 f6 75 e3 e8 4f ac 09 00 8b 75 c8 48 c7 c7 78 c1 2e 85 e8 e0 cf f5
ff <0f> 0b 48 8b 75 c0 4c 89 ff e8 72 33 0b 03 48 8b 43 40 4c 8b a0 80
[    3.358012 ] RSP: 0000:ffffc90000017b48 EFLAGS: 00010082
[    3.358357 ] RAX: 0000000000000000 RBX: ffff888104dc8000 RCX:
0000000000000000
[    3.358814 ] RDX: ffff8881003c8000 RSI: ffffffff8124a9e6 RDI:
00000000ffffffff
[    3.359272 ] RBP: ffffc90000017b88 R08: 0000000000000000 R09:
0000000000000000
[    3.359732 ] R10: ffffc900000179f0 R11: 0000000000001d04 R12:
0000000000000000
[    3.360195 ] R13: ffff888107dc6000 R14: ffff888107dc6928 R15:
ffff888104dc80a8
[    3.360652 ] FS:  0000000000000000(0000) GS:ffff88817bc00000(0000)
knlGS:0000000000000000
[    3.361170 ] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    3.361538 ] CR2: 0000000000000000 CR3: 000000000582e000 CR4:
00000000000006f0
[    3.362003 ] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
0000000000000000
[    3.362175 ] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
0000000000000400
[    3.362175 ] Call Trace:
[    3.362175 ]  nj_release+0x51/0x1e0
[    3.362175 ]  nj_probe+0x450/0x950
[    3.362175 ]  ? pci_device_remove+0x110/0x110
[    3.362175 ]  local_pci_probe+0x45/0xa0
[    3.362175 ]  pci_device_probe+0x12b/0x1d0
[    3.362175 ]  really_probe+0x2a9/0x610
[    3.362175 ]  driver_probe_device+0x90/0x1d0
[    3.362175 ]  ? mutex_lock_nested+0x1b/0x20
[    3.362175 ]  device_driver_attach+0x68/0x70
[    3.362175 ]  __driver_attach+0x124/0x1b0
[    3.362175 ]  ? device_driver_attach+0x70/0x70
[    3.362175 ]  bus_for_each_dev+0xbb/0x110
[    3.362175 ]  ? rdinit_setup+0x45/0x45
[    3.362175 ]  driver_attach+0x27/0x30
[    3.362175 ]  bus_add_driver+0x1eb/0x2a0
[    3.362175 ]  driver_register+0xa9/0x180
[    3.362175 ]  __pci_register_driver+0x82/0x90
[    3.362175 ]  ? w6692_init+0x38/0x38
[    3.362175 ]  nj_init+0x36/0x38
[    3.362175 ]  do_one_initcall+0x7f/0x3d0
[    3.362175 ]  ? rdinit_setup+0x45/0x45
[    3.362175 ]  ? rcu_read_lock_sched_held+0x4f/0x80
[    3.362175 ]  kernel_init_freeable+0x2aa/0x301
[    3.362175 ]  ? rest_init+0x2c0/0x2c0
[    3.362175 ]  kernel_init+0x18/0x190
[    3.362175 ]  ? rest_init+0x2c0/0x2c0
[    3.362175 ]  ? rest_init+0x2c0/0x2c0
[    3.362175 ]  ret_from_fork+0x1f/0x30
[    3.362175 ] Kernel panic - not syncing: panic_on_warn set ...
[    3.362175 ] CPU: 0 PID: 1 Comm: swapper/0 Not tainted
5.13.0-rc1-00144-g25a1298726e #13
[    3.362175 ] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014
[    3.362175 ] Call Trace:
[    3.362175 ]  dump_stack+0xba/0xf5
[    3.362175 ]  ? free_irq+0x100/0x480
[    3.362175 ]  panic+0x15a/0x3f2
[    3.362175 ]  ? __warn+0xf2/0x150
[    3.362175 ]  ? free_irq+0x100/0x480
[    3.362175 ]  __warn+0x108/0x150
[    3.362175 ]  ? free_irq+0x100/0x480
[    3.362175 ]  report_bug+0x119/0x1c0
[    3.362175 ]  handle_bug+0x3b/0x80
[    3.362175 ]  exc_invalid_op+0x18/0x70
[    3.362175 ]  asm_exc_invalid_op+0x12/0x20
[    3.362175 ] RIP: 0010:free_irq+0x100/0x480
[    3.362175 ] Code: 6e 08 74 6f 4d 89 f4 e8 5e ac 09 00 4d 8b 74 24 18
4d 85 f6 75 e3 e8 4f ac 09 00 8b 75 c8 48 c7 c7 78 c1 2e 85 e8 e0 cf f5
ff <0f> 0b 48 8b 75 c0 4c 89 ff e8 72 33 0b 03 48 8b 43 40 4c 8b a0 80
[    3.362175 ] RSP: 0000:ffffc90000017b48 EFLAGS: 00010082
[    3.362175 ] RAX: 0000000000000000 RBX: ffff888104dc8000 RCX:
0000000000000000
[    3.362175 ] RDX: ffff8881003c8000 RSI: ffffffff8124a9e6 RDI:
00000000ffffffff
[    3.362175 ] RBP: ffffc90000017b88 R08: 0000000000000000 R09:
0000000000000000
[    3.362175 ] R10: ffffc900000179f0 R11: 0000000000001d04 R12:
0000000000000000
[    3.362175 ] R13: ffff888107dc6000 R14: ffff888107dc6928 R15:
ffff888104dc80a8
[    3.362175 ]  ? vprintk+0x76/0x150
[    3.362175 ]  ? free_irq+0x100/0x480
[    3.362175 ]  nj_release+0x51/0x1e0
[    3.362175 ]  nj_probe+0x450/0x950
[    3.362175 ]  ? pci_device_remove+0x110/0x110
[    3.362175 ]  local_pci_probe+0x45/0xa0
[    3.362175 ]  pci_device_probe+0x12b/0x1d0
[    3.362175 ]  really_probe+0x2a9/0x610
[    3.362175 ]  driver_probe_device+0x90/0x1d0
[    3.362175 ]  ? mutex_lock_nested+0x1b/0x20
[    3.362175 ]  device_driver_attach+0x68/0x70
[    3.362175 ]  __driver_attach+0x124/0x1b0
[    3.362175 ]  ? device_driver_attach+0x70/0x70
[    3.362175 ]  bus_for_each_dev+0xbb/0x110
[    3.362175 ]  ? rdinit_setup+0x45/0x45
[    3.362175 ]  driver_attach+0x27/0x30
[    3.362175 ]  bus_add_driver+0x1eb/0x2a0
[    3.362175 ]  driver_register+0xa9/0x180
[    3.362175 ]  __pci_register_driver+0x82/0x90
[    3.362175 ]  ? w6692_init+0x38/0x38
[    3.362175 ]  nj_init+0x36/0x38
[    3.362175 ]  do_one_initcall+0x7f/0x3d0
[    3.362175 ]  ? rdinit_setup+0x45/0x45
[    3.362175 ]  ? rcu_read_lock_sched_held+0x4f/0x80
[    3.362175 ]  kernel_init_freeable+0x2aa/0x301
[    3.362175 ]  ? rest_init+0x2c0/0x2c0
[    3.362175 ]  kernel_init+0x18/0x190
[    3.362175 ]  ? rest_init+0x2c0/0x2c0
[    3.362175 ]  ? rest_init+0x2c0/0x2c0
[    3.362175 ]  ret_from_fork+0x1f/0x30
[    3.362175 ] Dumping ftrace buffer:
[    3.362175 ]    (ftrace buffer empty)
[    3.362175 ] Kernel Offset: disabled
[    3.362175 ] Rebooting in 1 seconds..

Reported-by: Zheyu Ma <zheyuma97@gmail.com>
Signed-off-by: Zheyu Ma <zheyuma97@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/hardware/mISDN/netjet.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/isdn/hardware/mISDN/netjet.c b/drivers/isdn/hardware/mISDN/netjet.c
index ee925b58bbce..2a1ddd47a096 100644
--- a/drivers/isdn/hardware/mISDN/netjet.c
+++ b/drivers/isdn/hardware/mISDN/netjet.c
@@ -1100,7 +1100,6 @@ nj_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		card->typ = NETJET_S_TJ300;
 
 	card->base = pci_resource_start(pdev, 0);
-	card->irq = pdev->irq;
 	pci_set_drvdata(pdev, card);
 	err = setup_instance(card);
 	if (err)
-- 
cgit v1.2.3


From 020ef930b826d21c5446fdc9db80fd72a791bc21 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Sun, 16 May 2021 14:44:42 +0000
Subject: mld: fix panic in mld_newpack()

mld_newpack() doesn't allow to allocate high order page,
only order-0 allocation is allowed.
If headroom size is too large, a kernel panic could occur in skb_put().

Test commands:
    ip netns del A
    ip netns del B
    ip netns add A
    ip netns add B
    ip link add veth0 type veth peer name veth1
    ip link set veth0 netns A
    ip link set veth1 netns B

    ip netns exec A ip link set lo up
    ip netns exec A ip link set veth0 up
    ip netns exec A ip -6 a a 2001:db8:0::1/64 dev veth0
    ip netns exec B ip link set lo up
    ip netns exec B ip link set veth1 up
    ip netns exec B ip -6 a a 2001:db8:0::2/64 dev veth1
    for i in {1..99}
    do
        let A=$i-1
        ip netns exec A ip link add ip6gre$i type ip6gre \
	local 2001:db8:$A::1 remote 2001:db8:$A::2 encaplimit 100
        ip netns exec A ip -6 a a 2001:db8:$i::1/64 dev ip6gre$i
        ip netns exec A ip link set ip6gre$i up

        ip netns exec B ip link add ip6gre$i type ip6gre \
	local 2001:db8:$A::2 remote 2001:db8:$A::1 encaplimit 100
        ip netns exec B ip -6 a a 2001:db8:$i::2/64 dev ip6gre$i
        ip netns exec B ip link set ip6gre$i up
    done

Splat looks like:
kernel BUG at net/core/skbuff.c:110!
invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
CPU: 0 PID: 7 Comm: kworker/0:1 Not tainted 5.12.0+ #891
Workqueue: ipv6_addrconf addrconf_dad_work
RIP: 0010:skb_panic+0x15d/0x15f
Code: 92 fe 4c 8b 4c 24 10 53 8b 4d 70 45 89 e0 48 c7 c7 00 ae 79 83
41 57 41 56 41 55 48 8b 54 24 a6 26 f9 ff <0f> 0b 48 8b 6c 24 20 89
34 24 e8 4a 4e 92 fe 8b 34 24 48 c7 c1 20
RSP: 0018:ffff88810091f820 EFLAGS: 00010282
RAX: 0000000000000089 RBX: ffff8881086e9000 RCX: 0000000000000000
RDX: 0000000000000089 RSI: 0000000000000008 RDI: ffffed1020123efb
RBP: ffff888005f6eac0 R08: ffffed1022fc0031 R09: ffffed1022fc0031
R10: ffff888117e00187 R11: ffffed1022fc0030 R12: 0000000000000028
R13: ffff888008284eb0 R14: 0000000000000ed8 R15: 0000000000000ec0
FS:  0000000000000000(0000) GS:ffff888117c00000(0000)
knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f8b801c5640 CR3: 0000000033c2c006 CR4: 00000000003706f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 ? ip6_mc_hdr.isra.26.constprop.46+0x12a/0x600
 ? ip6_mc_hdr.isra.26.constprop.46+0x12a/0x600
 skb_put.cold.104+0x22/0x22
 ip6_mc_hdr.isra.26.constprop.46+0x12a/0x600
 ? rcu_read_lock_sched_held+0x91/0xc0
 mld_newpack+0x398/0x8f0
 ? ip6_mc_hdr.isra.26.constprop.46+0x600/0x600
 ? lock_contended+0xc40/0xc40
 add_grhead.isra.33+0x280/0x380
 add_grec+0x5ca/0xff0
 ? mld_sendpack+0xf40/0xf40
 ? lock_downgrade+0x690/0x690
 mld_send_initial_cr.part.34+0xb9/0x180
 ipv6_mc_dad_complete+0x15d/0x1b0
 addrconf_dad_completed+0x8d2/0xbb0
 ? lock_downgrade+0x690/0x690
 ? addrconf_rs_timer+0x660/0x660
 ? addrconf_dad_work+0x73c/0x10e0
 addrconf_dad_work+0x73c/0x10e0

Allowing high order page allocation could fix this problem.

Fixes: 72e09ad107e7 ("ipv6: avoid high order allocations")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/mcast.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 0d59efb6b49e..d36ef9d25e73 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1745,10 +1745,7 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
 		     IPV6_TLV_PADN, 0 };
 
 	/* we assume size > sizeof(ra) here */
-	/* limit our allocations to order-0 page */
-	size = min_t(int, size, SKB_MAX_ORDER(0, 0));
 	skb = sock_alloc_send_skb(sk, size, 1, &err);
-
 	if (!skb)
 		return NULL;
 
-- 
cgit v1.2.3


From 04c26faa51d1e2fe71cf13c45791f5174c37f986 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 17 May 2021 02:28:58 +0800
Subject: tipc: wait and exit until all work queues are done

On some host, a crash could be triggered simply by repeating these
commands several times:

  # modprobe tipc
  # tipc bearer enable media udp name UDP1 localip 127.0.0.1
  # rmmod tipc

  [] BUG: unable to handle kernel paging request at ffffffffc096bb00
  [] Workqueue: events 0xffffffffc096bb00
  [] Call Trace:
  []  ? process_one_work+0x1a7/0x360
  []  ? worker_thread+0x30/0x390
  []  ? create_worker+0x1a0/0x1a0
  []  ? kthread+0x116/0x130
  []  ? kthread_flush_work_fn+0x10/0x10
  []  ? ret_from_fork+0x35/0x40

When removing the TIPC module, the UDP tunnel sock will be delayed to
release in a work queue as sock_release() can't be done in rtnl_lock().
If the work queue is schedule to run after the TIPC module is removed,
kernel will crash as the work queue function cleanup_beareri() code no
longer exists when trying to invoke it.

To fix it, this patch introduce a member wq_count in tipc_net to track
the numbers of work queues in schedule, and  wait and exit until all
work queues are done in tipc_exit_net().

Fixes: d0f91938bede ("tipc: add ip/udp media type")
Reported-by: Shuang Li <shuali@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/core.c      | 2 ++
 net/tipc/core.h      | 2 ++
 net/tipc/udp_media.c | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 5cc1f0307215..72f3ac73779b 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -119,6 +119,8 @@ static void __net_exit tipc_exit_net(struct net *net)
 #ifdef CONFIG_TIPC_CRYPTO
 	tipc_crypto_stop(&tipc_net(net)->crypto_tx);
 #endif
+	while (atomic_read(&tn->wq_count))
+		cond_resched();
 }
 
 static void __net_exit tipc_pernet_pre_exit(struct net *net)
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 03de7b213f55..5741ae488bb5 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -149,6 +149,8 @@ struct tipc_net {
 #endif
 	/* Work item for net finalize */
 	struct tipc_net_work final_work;
+	/* The numbers of work queues in schedule */
+	atomic_t wq_count;
 };
 
 static inline struct tipc_net *tipc_net(struct net *net)
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index e556d2cdc064..c2bb818704c8 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -814,6 +814,7 @@ static void cleanup_bearer(struct work_struct *work)
 		kfree_rcu(rcast, rcu);
 	}
 
+	atomic_dec(&tipc_net(sock_net(ub->ubsock->sk))->wq_count);
 	dst_cache_destroy(&ub->rcast.dst_cache);
 	udp_tunnel_sock_release(ub->ubsock);
 	synchronize_net();
@@ -834,6 +835,7 @@ static void tipc_udp_disable(struct tipc_bearer *b)
 	RCU_INIT_POINTER(ub->bearer, NULL);
 
 	/* sock_release need to be done outside of rtnl lock */
+	atomic_inc(&tipc_net(sock_net(ub->ubsock->sk))->wq_count);
 	INIT_WORK(&ub->work, cleanup_bearer);
 	schedule_work(&ub->work);
 }
-- 
cgit v1.2.3


From 4710ccc52e8e504a5617a889843a18cd06f1ab72 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 10 May 2021 15:35:14 -0500
Subject: dt-bindings: media: renesas,drif: Use graph schema

Convert the renesas,drif binding schema to use the graph schema. The
binding referred to video-interfaces.txt, but it doesn't actually use any
properties from it as 'sync-active' is a custom property. As 'sync-active'
is custom, it needs a type definition.

Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Ramesh Shanmugasundaram <rashanmu@gmail.com>
Cc: linux-media@vger.kernel.org
Cc: linux-renesas-soc@vger.kernel.org
Signed-off-by: Rob Herring <robh@kernel.org>
Reviewed-by: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Link: https://lore.kernel.org/r/20210510203514.603471-1-robh@kernel.org
---
 .../devicetree/bindings/media/renesas,drif.yaml      | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/Documentation/devicetree/bindings/media/renesas,drif.yaml b/Documentation/devicetree/bindings/media/renesas,drif.yaml
index f1bdaeab4053..ce505a7c006a 100644
--- a/Documentation/devicetree/bindings/media/renesas,drif.yaml
+++ b/Documentation/devicetree/bindings/media/renesas,drif.yaml
@@ -99,32 +99,26 @@ properties:
       Indicates that the channel acts as primary among the bonded channels.
 
   port:
-    type: object
+    $ref: /schemas/graph.yaml#/properties/port
+    unevaluatedProperties: false
     description:
-      Child port node corresponding to the data input, in accordance with the
-      video interface bindings defined in
-      Documentation/devicetree/bindings/media/video-interfaces.txt.
-      The port node must contain at least one endpoint.
+      Child port node corresponding to the data input. The port node must
+      contain at least one endpoint.
 
     properties:
       endpoint:
-        type: object
+        $ref: /schemas/graph.yaml#/$defs/endpoint-base
+        unevaluatedProperties: false
 
         properties:
-          remote-endpoint:
-            description:
-              A phandle to the remote tuner endpoint subnode in remote node
-              port.
-
           sync-active:
+            $ref: /schemas/types.yaml#/definitions/uint32
             enum: [0, 1]
             description:
               Indicates sync signal polarity, 0/1 for low/high respectively.
               This property maps to SYNCAC bit in the hardware manual. The
               default is 1 (active high).
 
-        additionalProperties: false
-
 required:
   - compatible
   - reg
-- 
cgit v1.2.3


From c17611592d9635c443bedc9be901f4463f45c6d5 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 10 May 2021 15:45:24 -0500
Subject: dt-bindings: More removals of type references on common properties

Users of common properties shouldn't have a type definition as the
common schemas already have one. A few new ones slipped in and
*-names was missed in the last clean-up pass. Drop all the unnecessary
type references in the tree.

A meta-schema update to catch these is pending.

Cc: Stephen Boyd <sboyd@kernel.org>
Cc: Olivier Moysan <olivier.moysan@foss.st.com>
Cc: Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
Cc: Lars-Peter Clausen <lars@metafoo.de>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Orson Zhai <orsonzhai@gmail.com>
Cc: Baolin Wang <baolin.wang7@gmail.com>
Cc: Chunyan Zhang <zhang.lyra@gmail.com>
Cc: Liam Girdwood <lgirdwood@gmail.com>
Cc: Fabrice Gasnier <fabrice.gasnier@st.com>
Cc: Odelu Kukatla <okukatla@codeaurora.org>
Cc: Alex Elder <elder@kernel.org>
Cc: Shengjiu Wang <shengjiu.wang@nxp.com>
Cc: linux-clk@vger.kernel.org
Cc: alsa-devel@alsa-project.org
Cc: linux-iio@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-input@vger.kernel.org
Cc: linux-pm@vger.kernel.org
Cc: netdev@vger.kernel.org
Signed-off-by: Rob Herring <robh@kernel.org>
Acked-by: Mark Brown <broonie@kernel.org>
Acked-by: Georgi Djakov <djakov@kernel.org>
Reviewed-by: Luca Ceresoli <luca@lucaceresoli.net>
Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Link: https://lore.kernel.org/r/20210510204524.617390-1-robh@kernel.org
---
 Documentation/devicetree/bindings/clock/idt,versaclock5.yaml       | 2 --
 Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml  | 1 -
 Documentation/devicetree/bindings/input/input.yaml                 | 1 -
 Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml      | 1 -
 Documentation/devicetree/bindings/net/qcom,ipa.yaml                | 1 -
 Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml | 2 +-
 Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml             | 2 +-
 7 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml b/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml
index c268debe5b8d..28675b0b80f1 100644
--- a/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml
+++ b/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml
@@ -60,7 +60,6 @@ properties:
     maxItems: 2
 
   idt,xtal-load-femtofarads:
-    $ref: /schemas/types.yaml#/definitions/uint32
     minimum: 9000
     maximum: 22760
     description: Optional load capacitor for XTAL1 and XTAL2
@@ -84,7 +83,6 @@ patternProperties:
         enum: [ 1800000, 2500000, 3300000 ]
       idt,slew-percent:
         description: The Slew rate control for CMOS single-ended.
-        $ref: /schemas/types.yaml#/definitions/uint32
         enum: [ 80, 85, 90, 100 ]
 
 required:
diff --git a/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml b/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml
index 6f2398cdc82d..1e7894e524f9 100644
--- a/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml
@@ -102,7 +102,6 @@ patternProperties:
 
       st,adc-channel-names:
         description: List of single-ended channel names.
-        $ref: /schemas/types.yaml#/definitions/string-array
 
       st,filter-order:
         description: |
diff --git a/Documentation/devicetree/bindings/input/input.yaml b/Documentation/devicetree/bindings/input/input.yaml
index 74244d21d2b3..d41d8743aad4 100644
--- a/Documentation/devicetree/bindings/input/input.yaml
+++ b/Documentation/devicetree/bindings/input/input.yaml
@@ -38,6 +38,5 @@ properties:
       Duration in seconds which the key should be kept pressed for device to
       reset automatically. Device with key pressed reset feature can specify
       this property.
-    $ref: /schemas/types.yaml#/definitions/uint32
 
 additionalProperties: true
diff --git a/Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml b/Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml
index cb6498108b78..36c955965d90 100644
--- a/Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml
+++ b/Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml
@@ -92,7 +92,6 @@ properties:
       this interconnect to send RPMh commands.
 
   qcom,bcm-voter-names:
-    $ref: /schemas/types.yaml#/definitions/string-array
     description: |
       Names for each of the qcom,bcm-voters specified.
 
diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
index 7443490d4cc6..5fe6d3dceb08 100644
--- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml
+++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
@@ -105,7 +105,6 @@ properties:
       - description: Whether the IPA clock is enabled (if valid)
 
   qcom,smem-state-names:
-    $ref: /schemas/types.yaml#/definitions/string-array
     description: The names of the state bits used for SMP2P output
     items:
       - const: ipa-clock-enabled-valid
diff --git a/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml b/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml
index db1aa238cda5..b62c2431f94e 100644
--- a/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml
+++ b/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml
@@ -20,7 +20,7 @@ properties:
     maxItems: 1
 
   phys:
-    $ref: /schemas/types.yaml#/definitions/phandle
+    maxItems: 1
     description: phandle to the USB phy
 
   monitored-battery:
diff --git a/Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml b/Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml
index b4c190bddd84..61802a11baf4 100644
--- a/Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml
+++ b/Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml
@@ -49,7 +49,7 @@ properties:
     maxItems: 1
 
   memory-region:
-    $ref: /schemas/types.yaml#/definitions/phandle
+    maxItems: 1
     description:
       phandle to a node describing reserved memory (System RAM memory)
       The M core can't access all the DDR memory space on some platform,
-- 
cgit v1.2.3


From 3c814519743a919f8b3c236c0565e24709806d66 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Mon, 17 May 2021 10:19:54 -0400
Subject: MAINTAINERS: net: remove stale website link

The http://www.linuxfoundation.org/en/Net does not contain networking
subsystem description ("Nothing found").

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 2 --
 1 file changed, 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 1d834bebf469..c1cb2e38ae2e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12709,7 +12709,6 @@ M:	"David S. Miller" <davem@davemloft.net>
 M:	Jakub Kicinski <kuba@kernel.org>
 L:	netdev@vger.kernel.org
 S:	Maintained
-W:	http://www.linuxfoundation.org/en/Net
 Q:	https://patchwork.kernel.org/project/netdevbpf/list/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
@@ -12754,7 +12753,6 @@ M:	"David S. Miller" <davem@davemloft.net>
 M:	Jakub Kicinski <kuba@kernel.org>
 L:	netdev@vger.kernel.org
 S:	Maintained
-W:	http://www.linuxfoundation.org/en/Net
 Q:	https://patchwork.kernel.org/project/netdevbpf/list/
 B:	mailto:netdev@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
-- 
cgit v1.2.3


From 35d96e631860226d5dc4de0fad0a415362ec2457 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 17 May 2021 16:13:35 +0200
Subject: bonding: init notify_work earlier to avoid uninitialized use

If bond_kobj_init() or later kzalloc() in bond_alloc_slave() fail,
then we call kobject_put() on the slave->kobj. This in turn calls
the release function slave_kobj_release() which will always try to
cancel_delayed_work_sync(&slave->notify_work), which shouldn't be
done on an uninitialized work struct.

Always initialize the work struct earlier to avoid problems here.

Syzbot bisected this down to a completely pointless commit, some
fault injection may have been at work here that caused the alloc
failure in the first place, which may interact badly with bisect.

Reported-by: syzbot+bfda097c12a00c8cae67@syzkaller.appspotmail.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Acked-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 20bbda1b36e1..c5a646d06102 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1526,6 +1526,7 @@ static struct slave *bond_alloc_slave(struct bonding *bond,
 
 	slave->bond = bond;
 	slave->dev = slave_dev;
+	INIT_DELAYED_WORK(&slave->notify_work, bond_netdev_notify_work);
 
 	if (bond_kobj_init(slave))
 		return NULL;
@@ -1538,7 +1539,6 @@ static struct slave *bond_alloc_slave(struct bonding *bond,
 			return NULL;
 		}
 	}
-	INIT_DELAYED_WORK(&slave->notify_work, bond_netdev_notify_work);
 
 	return slave;
 }
-- 
cgit v1.2.3


From 444d7be9532dcfda8e0385226c862fd7e986f607 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Mon, 17 May 2021 10:47:06 +0200
Subject: net/smc: remove device from smcd_dev_list after failed device_add()

If the device_add() for a smcd_dev fails, there's no cleanup step that
rolls back the earlier list_add(). The device subsequently gets freed,
and we end up with a corrupted list.

Add some error handling that removes the device from the list.

Fixes: c6ba7c9ba43d ("net/smc: add base infrastructure for SMC-D and ISM")
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_ism.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index 9c6e95882553..d24b96ea0eb5 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -428,6 +428,8 @@ EXPORT_SYMBOL_GPL(smcd_alloc_dev);
 
 int smcd_register_dev(struct smcd_dev *smcd)
 {
+	int rc;
+
 	mutex_lock(&smcd_dev_list.mutex);
 	if (list_empty(&smcd_dev_list.list)) {
 		u8 *system_eid = NULL;
@@ -447,7 +449,14 @@ int smcd_register_dev(struct smcd_dev *smcd)
 			    dev_name(&smcd->dev), smcd->pnetid,
 			    smcd->pnetid_by_user ? " (user defined)" : "");
 
-	return device_add(&smcd->dev);
+	rc = device_add(&smcd->dev);
+	if (rc) {
+		mutex_lock(&smcd_dev_list.mutex);
+		list_del(&smcd->list);
+		mutex_unlock(&smcd_dev_list.mutex);
+	}
+
+	return rc;
 }
 EXPORT_SYMBOL_GPL(smcd_register_dev);
 
-- 
cgit v1.2.3


From 1d482e666b8e74c7555dbdfbfb77205eeed3ff2d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 17 May 2021 16:38:09 +0200
Subject: netlink: disable IRQs for netlink_lock_table()

Syzbot reports that in mac80211 we have a potential deadlock
between our "local->stop_queue_reasons_lock" (spinlock) and
netlink's nl_table_lock (rwlock). This is because there's at
least one situation in which we might try to send a netlink
message with this spinlock held while it is also possible to
take the spinlock from a hardirq context, resulting in the
following deadlock scenario reported by lockdep:

       CPU0                    CPU1
       ----                    ----
  lock(nl_table_lock);
                               local_irq_disable();
                               lock(&local->queue_stop_reason_lock);
                               lock(nl_table_lock);
  <Interrupt>
    lock(&local->queue_stop_reason_lock);

This seems valid, we can take the queue_stop_reason_lock in
any kind of context ("CPU0"), and call ieee80211_report_ack_skb()
with the spinlock held and IRQs disabled ("CPU1") in some
code path (ieee80211_do_stop() via ieee80211_free_txskb()).

Short of disallowing netlink use in scenarios like these
(which would be rather complex in mac80211's case due to
the deep callchain), it seems the only fix for this is to
disable IRQs while nl_table_lock is held to avoid hitting
this scenario, this disallows the "CPU0" portion of the
reported deadlock.

Note that the writer side (netlink_table_grab()) already
disables IRQs for this lock.

Unfortunately though, this seems like a huge hammer, and
maybe the whole netlink table locking should be reworked.

Reported-by: syzbot+69ff9dff50dcfe14ddd4@syzkaller.appspotmail.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 3a62f97acf39..6133e412b948 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -461,11 +461,13 @@ void netlink_table_ungrab(void)
 static inline void
 netlink_lock_table(void)
 {
+	unsigned long flags;
+
 	/* read_lock() synchronizes us to netlink_table_grab */
 
-	read_lock(&nl_table_lock);
+	read_lock_irqsave(&nl_table_lock, flags);
 	atomic_inc(&nl_table_users);
-	read_unlock(&nl_table_lock);
+	read_unlock_irqrestore(&nl_table_lock, flags);
 }
 
 static inline void
-- 
cgit v1.2.3


From 5aec55b46c6238506cdf0c60cd0e42ab77a1e5e0 Mon Sep 17 00:00:00 2001
From: Catherine Sullivan <csully@google.com>
Date: Mon, 17 May 2021 14:08:11 -0700
Subject: gve: Check TX QPL was actually assigned

Correctly check the TX QPL was assigned and unassigned if
other steps in the allocation fail.

Fixes: f5cedc84a30d (gve: Add transmit and receive support)
Signed-off-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David Awogbemila <awogbemila@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/gve_tx.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c
index 6938f3a939d6..bb57c42872b4 100644
--- a/drivers/net/ethernet/google/gve/gve_tx.c
+++ b/drivers/net/ethernet/google/gve/gve_tx.c
@@ -212,10 +212,11 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
 	tx->dev = &priv->pdev->dev;
 	if (!tx->raw_addressing) {
 		tx->tx_fifo.qpl = gve_assign_tx_qpl(priv);
-
+		if (!tx->tx_fifo.qpl)
+			goto abort_with_desc;
 		/* map Tx FIFO */
 		if (gve_tx_fifo_init(priv, &tx->tx_fifo))
-			goto abort_with_desc;
+			goto abort_with_qpl;
 	}
 
 	tx->q_resources =
@@ -236,6 +237,9 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
 abort_with_fifo:
 	if (!tx->raw_addressing)
 		gve_tx_fifo_release(priv, &tx->tx_fifo);
+abort_with_qpl:
+	if (!tx->raw_addressing)
+		gve_unassign_qpl(priv, tx->tx_fifo.qpl->id);
 abort_with_desc:
 	dma_free_coherent(hdev, bytes, tx->desc, tx->bus);
 	tx->desc = NULL;
-- 
cgit v1.2.3


From e96b491a0ffa35a8a9607c193fa4d894ca9fb32f Mon Sep 17 00:00:00 2001
From: David Awogbemila <awogbemila@google.com>
Date: Mon, 17 May 2021 14:08:12 -0700
Subject: gve: Update mgmt_msix_idx if num_ntfy changes

If we do not get the expected number of vectors from
pci_enable_msix_range, we update priv->num_ntfy_blks but not
priv->mgmt_msix_idx. This patch fixes this so that priv->mgmt_msix_idx
is updated accordingly.

Fixes: f5cedc84a30d ("gve: Add transmit and receive support")
Signed-off-by: David Awogbemila <awogbemila@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/gve_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 7302498c6df3..64192942ca53 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -220,6 +220,7 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
 		int vecs_left = new_num_ntfy_blks % 2;
 
 		priv->num_ntfy_blks = new_num_ntfy_blks;
+		priv->mgmt_msix_idx = priv->num_ntfy_blks;
 		priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
 						vecs_per_type);
 		priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
-- 
cgit v1.2.3


From 5218e919c8d06279884aa0baf76778a6817d5b93 Mon Sep 17 00:00:00 2001
From: David Awogbemila <awogbemila@google.com>
Date: Mon, 17 May 2021 14:08:13 -0700
Subject: gve: Add NULL pointer checks when freeing irqs.

When freeing notification blocks, we index priv->msix_vectors.
If we failed to allocate priv->msix_vectors (see abort_with_msix_vectors)
this could lead to a NULL pointer dereference if the driver is unloaded.

Fixes: 893ce44df565 ("gve: Add basic driver framework for Compute Engine Virtual NIC")
Signed-off-by: David Awogbemila <awogbemila@google.com>
Acked-by: Willem de Brujin <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/gve_main.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 64192942ca53..21a5d058dab4 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -301,20 +301,22 @@ static void gve_free_notify_blocks(struct gve_priv *priv)
 {
 	int i;
 
-	/* Free the irqs */
-	for (i = 0; i < priv->num_ntfy_blks; i++) {
-		struct gve_notify_block *block = &priv->ntfy_blocks[i];
-		int msix_idx = i;
-
-		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
-				      NULL);
-		free_irq(priv->msix_vectors[msix_idx].vector, block);
+	if (priv->msix_vectors) {
+		/* Free the irqs */
+		for (i = 0; i < priv->num_ntfy_blks; i++) {
+			struct gve_notify_block *block = &priv->ntfy_blocks[i];
+			int msix_idx = i;
+
+			irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
+					      NULL);
+			free_irq(priv->msix_vectors[msix_idx].vector, block);
+		}
+		free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
 	}
 	dma_free_coherent(&priv->pdev->dev,
 			  priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks),
 			  priv->ntfy_blocks, priv->ntfy_block_bus);
 	priv->ntfy_blocks = NULL;
-	free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
 	pci_disable_msix(priv->pdev);
 	kvfree(priv->msix_vectors);
 	priv->msix_vectors = NULL;
-- 
cgit v1.2.3


From f81781835f0adfae8d701545386030d223efcd6f Mon Sep 17 00:00:00 2001
From: Catherine Sullivan <csully@google.com>
Date: Mon, 17 May 2021 14:08:14 -0700
Subject: gve: Upgrade memory barrier in poll routine

As currently written, if the driver checks for more work (via
gve_tx_poll or gve_rx_poll) before the device posts work and the
irq doorbell is not unmasked
(via iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, ...)) before the device
attempts to raise an interrupt, an interrupt is lost and this could
potentially lead to the traffic being completely halted. For
example, if a tx queue has already been stopped, the driver won't get
the chance to complete work and egress will be halted.

We need a full memory barrier in the poll
routine to ensure that the irq doorbell is unmasked before the driver
checks for more work.

Fixes: f5cedc84a30d ("gve: Add transmit and receive support")
Signed-off-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David Awogbemila <awogbemila@google.com>
Acked-by: Willem de Brujin <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/gve_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 21a5d058dab4..bbc423e93122 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -180,7 +180,7 @@ static int gve_napi_poll(struct napi_struct *napi, int budget)
 	/* Double check we have no extra work.
 	 * Ensure unmask synchronizes with checking for work.
 	 */
-	dma_rmb();
+	mb();
 	if (block->tx)
 		reschedule |= gve_tx_poll(block, -1);
 	if (block->rx)
-- 
cgit v1.2.3


From fbd4a28b4fa66faaa7f510c0adc531d37e0a7848 Mon Sep 17 00:00:00 2001
From: David Awogbemila <awogbemila@google.com>
Date: Mon, 17 May 2021 14:08:15 -0700
Subject: gve: Correct SKB queue index validation.

SKBs with skb_get_queue_mapping(skb) == tx_cfg.num_queues should also be
considered invalid.

Fixes: f5cedc84a30d ("gve: Add transmit and receive support")
Signed-off-by: David Awogbemila <awogbemila@google.com>
Acked-by: Willem de Brujin <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/gve_tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c
index bb57c42872b4..3e04a3973d68 100644
--- a/drivers/net/ethernet/google/gve/gve_tx.c
+++ b/drivers/net/ethernet/google/gve/gve_tx.c
@@ -593,7 +593,7 @@ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev)
 	struct gve_tx_ring *tx;
 	int nsegs;
 
-	WARN(skb_get_queue_mapping(skb) > priv->tx_cfg.num_queues,
+	WARN(skb_get_queue_mapping(skb) >= priv->tx_cfg.num_queues,
 	     "skb queue index out of range");
 	tx = &priv->tx[skb_get_queue_mapping(skb)];
 	if (unlikely(gve_maybe_stop_tx(tx, skb))) {
-- 
cgit v1.2.3


From 1dde47a66d4fb181830d6fa000e5ea86907b639e Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 17 May 2021 12:04:13 +0300
Subject: net: mdiobus: get rid of a BUG_ON()

We spotted a bug recently during a review where a driver was
unregistering a bus that wasn't registered, which would trigger this
BUG_ON().  Let's handle that situation more gracefully, and just print
a warning and return.

Reported-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio_bus.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index dadf75ff3ab9..6045ad3def12 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -607,7 +607,8 @@ void mdiobus_unregister(struct mii_bus *bus)
 	struct mdio_device *mdiodev;
 	int i;
 
-	BUG_ON(bus->state != MDIOBUS_REGISTERED);
+	if (WARN_ON_ONCE(bus->state != MDIOBUS_REGISTERED))
+		return;
 	bus->state = MDIOBUS_UNREGISTERED;
 
 	for (i = 0; i < PHY_MAX_ADDR; i++) {
-- 
cgit v1.2.3


From c67d734975a25ba7b6e8f820c13e0d8eb4a2a77c Mon Sep 17 00:00:00 2001
From: Milian Wolff <milian.wolff@kdab.com>
Date: Thu, 29 Apr 2021 20:57:59 +0200
Subject: perf buildid-list: Initialize zstd_data

Fixes segmentation fault when trying to obtain buildid list (e.g. via
perf-archive) from a zstd-compressed `perf.data` file:

```
    $ perf record -z ls
    ...
    [ perf record: Captured and wrote 0,010 MB perf.data, compressed (original 0,001 MB, ratio is 2,190) ]
    $ memcheck perf buildid-list
    ...
    ==57268== Invalid read of size 4
    ==57268==    at 0x5260D88: ZSTD_decompressStream (in /usr/lib/libzstd.so.1.4.9)
    ==57268==    by 0x4BB51B: zstd_decompress_stream (zstd.c:100)
    ==57268==    by 0x425C6C: perf_session__process_compressed_event (session.c:73)
    ==57268==    by 0x427450: perf_session__process_user_event (session.c:1631)
    ==57268==    by 0x42A609: reader__process_events (session.c:2207)
    ==57268==    by 0x42A609: __perf_session__process_events (session.c:2264)
    ==57268==    by 0x42A609: perf_session__process_events (session.c:2297)
    ==57268==    by 0x343A62: perf_session__list_build_ids (builtin-buildid-list.c:88)
    ==57268==    by 0x343A62: cmd_buildid_list (builtin-buildid-list.c:120)
    ==57268==    by 0x3C7732: run_builtin (perf.c:313)
    ==57268==    by 0x331157: handle_internal_command (perf.c:365)
    ==57268==    by 0x331157: run_argv (perf.c:409)
    ==57268==    by 0x331157: main (perf.c:539)
    ==57268==  Address 0x7470 is not stack'd, malloc'd or (recently) free'd
```

Signed-off-by: Milian Wolff <milian.wolff@kdab.com>
Cc: Alexey Budankov <alexey.budankov@linux.intel.com>
Link: http://lore.kernel.org/lkml/20210429185759.59870-1-milian.wolff@kdab.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-buildid-list.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index 87f5b1a4a7fa..833405c27dae 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -80,6 +80,9 @@ static int perf_session__list_build_ids(bool force, bool with_hits)
 	if (!perf_header__has_feat(&session->header, HEADER_BUILD_ID))
 		with_hits = true;
 
+	if (zstd_init(&(session->zstd_data), 0) < 0)
+		pr_warning("Decompression initialization failed. Reported data may be incomplete.\n");
+
 	/*
 	 * in pipe-mode, the only way to get the buildids is to parse
 	 * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID
-- 
cgit v1.2.3


From 3c91e8efaf4838e4c8e465656e9707b5de26f3db Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 12 May 2021 12:35:07 -0300
Subject: tools arch kvm: Sync kvm headers with the kernel sources

To pick up the changes from:

  70f094f4f01dc4d6 ("KVM: nVMX: Properly pad 'struct kvm_vmx_nested_state_hdr'")

That don't entail changes in tooling.

This silences these tools/perf build warnings:

  Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/kvm.h' differs from latest version at 'arch/x86/include/uapi/asm/kvm.h'
  diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/include/uapi/asm/kvm.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 5a3022c8af82..0662f644aad9 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -437,6 +437,8 @@ struct kvm_vmx_nested_state_hdr {
 		__u16 flags;
 	} smm;
 
+	__u16 pad;
+
 	__u32 flags;
 	__u64 preemption_timer_deadline;
 };
-- 
cgit v1.2.3


From fea63d54f7a3e74f8ab489a8b82413a29849a594 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Mon, 17 May 2021 12:42:32 -0500
Subject: x86/sev-es: Move sev_es_put_ghcb() in prep for follow on patch

Move the location of sev_es_put_ghcb() in preparation for an update to it
in a follow-on patch. This will better highlight the changes being made
to the function.

No functional change.

Fixes: 0786138c78e79 ("x86/sev-es: Add a Runtime #VC Exception Handler")
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/8c07662ec17d3d82e5c53841a1d9e766d3bdbab6.1621273353.git.thomas.lendacky@amd.com
---
 arch/x86/kernel/sev.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 9578c82832aa..45e212675811 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -221,24 +221,6 @@ static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state)
 	return ghcb;
 }
 
-static __always_inline void sev_es_put_ghcb(struct ghcb_state *state)
-{
-	struct sev_es_runtime_data *data;
-	struct ghcb *ghcb;
-
-	data = this_cpu_read(runtime_data);
-	ghcb = &data->ghcb_page;
-
-	if (state->ghcb) {
-		/* Restore GHCB from Backup */
-		*ghcb = *state->ghcb;
-		data->backup_ghcb_active = false;
-		state->ghcb = NULL;
-	} else {
-		data->ghcb_active = false;
-	}
-}
-
 /* Needed in vc_early_forward_exception */
 void do_early_exception(struct pt_regs *regs, int trapnr);
 
@@ -461,6 +443,24 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt
 /* Include code shared with pre-decompression boot stage */
 #include "sev-shared.c"
 
+static __always_inline void sev_es_put_ghcb(struct ghcb_state *state)
+{
+	struct sev_es_runtime_data *data;
+	struct ghcb *ghcb;
+
+	data = this_cpu_read(runtime_data);
+	ghcb = &data->ghcb_page;
+
+	if (state->ghcb) {
+		/* Restore GHCB from Backup */
+		*ghcb = *state->ghcb;
+		data->backup_ghcb_active = false;
+		state->ghcb = NULL;
+	} else {
+		data->ghcb_active = false;
+	}
+}
+
 void noinstr __sev_es_nmi_complete(void)
 {
 	struct ghcb_state state;
-- 
cgit v1.2.3


From a50c5bebc99c525e7fbc059988c6a5ab8680cb76 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Mon, 17 May 2021 12:42:33 -0500
Subject: x86/sev-es: Invalidate the GHCB after completing VMGEXIT

Since the VMGEXIT instruction can be issued from userspace, invalidate
the GHCB after performing VMGEXIT processing in the kernel.

Invalidation is only required after userspace is available, so call
vc_ghcb_invalidate() from sev_es_put_ghcb(). Update vc_ghcb_invalidate()
to additionally clear the GHCB exit code so that it is always presented
as 0 when VMGEXIT has been issued by anything else besides the kernel.

Fixes: 0786138c78e79 ("x86/sev-es: Add a Runtime #VC Exception Handler")
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/5a8130462e4f0057ee1184509cd056eedd78742b.1621273353.git.thomas.lendacky@amd.com
---
 arch/x86/kernel/sev-shared.c | 1 +
 arch/x86/kernel/sev.c        | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
index 6ec8b3bfd76e..9f90f460a28c 100644
--- a/arch/x86/kernel/sev-shared.c
+++ b/arch/x86/kernel/sev-shared.c
@@ -63,6 +63,7 @@ static bool sev_es_negotiate_protocol(void)
 
 static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
 {
+	ghcb->save.sw_exit_code = 0;
 	memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
 }
 
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 45e212675811..4fa111becc93 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -457,6 +457,11 @@ static __always_inline void sev_es_put_ghcb(struct ghcb_state *state)
 		data->backup_ghcb_active = false;
 		state->ghcb = NULL;
 	} else {
+		/*
+		 * Invalidate the GHCB so a VMGEXIT instruction issued
+		 * from userspace won't appear to be valid.
+		 */
+		vc_ghcb_invalidate(ghcb);
 		data->ghcb_active = false;
 	}
 }
-- 
cgit v1.2.3


From 9f079c1bdc9087842dc5ac9d81b1d7f2578e81ce Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Tue, 18 May 2021 10:25:10 +0900
Subject: ALSA: dice: disable double_pcm_frames mode for M-Audio Profire 610,
 2626 and Avid M-Box 3 Pro

ALSA dice driver detects jumbo payload at high sampling transfer frequency
for below models:

 * Avid M-Box 3 Pro
 * M-Audio Profire 610
 * M-Audio Profire 2626

Although many DICE-based devices have a quirk at high sampling transfer
frequency to multiplex double number of PCM frames into data block than
the number in IEC 61883-1/6, the above devices are just compliant to
IEC 61883-1/6.

This commit disables the mode of double_pcm_frames for the models.

Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Link: https://lore.kernel.org/r/20210518012510.37126-1-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/firewire/dice/dice-pcm.c    |  4 ++--
 sound/firewire/dice/dice-stream.c |  2 +-
 sound/firewire/dice/dice.c        | 24 ++++++++++++++++++++++++
 sound/firewire/dice/dice.h        |  3 ++-
 4 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/sound/firewire/dice/dice-pcm.c b/sound/firewire/dice/dice-pcm.c
index af8a90ee40f3..a69ca1111b03 100644
--- a/sound/firewire/dice/dice-pcm.c
+++ b/sound/firewire/dice/dice-pcm.c
@@ -218,7 +218,7 @@ static int pcm_open(struct snd_pcm_substream *substream)
 
 		if (frames_per_period > 0) {
 			// For double_pcm_frame quirk.
-			if (rate > 96000) {
+			if (rate > 96000 && !dice->disable_double_pcm_frames) {
 				frames_per_period *= 2;
 				frames_per_buffer *= 2;
 			}
@@ -273,7 +273,7 @@ static int pcm_hw_params(struct snd_pcm_substream *substream,
 
 		mutex_lock(&dice->mutex);
 		// For double_pcm_frame quirk.
-		if (rate > 96000) {
+		if (rate > 96000 && !dice->disable_double_pcm_frames) {
 			events_per_period /= 2;
 			events_per_buffer /= 2;
 		}
diff --git a/sound/firewire/dice/dice-stream.c b/sound/firewire/dice/dice-stream.c
index 1a14c083e8ce..c4dfe76500c2 100644
--- a/sound/firewire/dice/dice-stream.c
+++ b/sound/firewire/dice/dice-stream.c
@@ -181,7 +181,7 @@ static int keep_resources(struct snd_dice *dice, struct amdtp_stream *stream,
 	// as 'Dual Wire'.
 	// For this quirk, blocking mode is required and PCM buffer size should
 	// be aligned to SYT_INTERVAL.
-	double_pcm_frames = rate > 96000;
+	double_pcm_frames = (rate > 96000 && !dice->disable_double_pcm_frames);
 	if (double_pcm_frames) {
 		rate /= 2;
 		pcm_chs *= 2;
diff --git a/sound/firewire/dice/dice.c b/sound/firewire/dice/dice.c
index 107a81691f0e..239d164b0eea 100644
--- a/sound/firewire/dice/dice.c
+++ b/sound/firewire/dice/dice.c
@@ -21,6 +21,7 @@ MODULE_LICENSE("GPL v2");
 #define OUI_SSL			0x0050c2	// Actually ID reserved by IEEE.
 #define OUI_PRESONUS		0x000a92
 #define OUI_HARMAN		0x000fd7
+#define OUI_AVID		0x00a07e
 
 #define DICE_CATEGORY_ID	0x04
 #define WEISS_CATEGORY_ID	0x00
@@ -222,6 +223,14 @@ static int dice_probe(struct fw_unit *unit,
 				(snd_dice_detect_formats_t)entry->driver_data;
 	}
 
+	// Below models are compliant to IEC 61883-1/6 and have no quirk at high sampling transfer
+	// frequency.
+	// * Avid M-Box 3 Pro
+	// * M-Audio Profire 610
+	// * M-Audio Profire 2626
+	if (entry->vendor_id == OUI_MAUDIO || entry->vendor_id == OUI_AVID)
+		dice->disable_double_pcm_frames = true;
+
 	spin_lock_init(&dice->lock);
 	mutex_init(&dice->mutex);
 	init_completion(&dice->clock_accepted);
@@ -278,7 +287,22 @@ static void dice_bus_reset(struct fw_unit *unit)
 
 #define DICE_INTERFACE	0x000001
 
+#define DICE_DEV_ENTRY_TYPICAL(vendor, model, data) \
+	{ \
+		.match_flags	= IEEE1394_MATCH_VENDOR_ID | \
+				  IEEE1394_MATCH_MODEL_ID | \
+				  IEEE1394_MATCH_SPECIFIER_ID | \
+				  IEEE1394_MATCH_VERSION, \
+		.vendor_id	= (vendor), \
+		.model_id	= (model), \
+		.specifier_id	= (vendor), \
+		.version	= DICE_INTERFACE, \
+		.driver_data = (kernel_ulong_t)(data), \
+	}
+
 static const struct ieee1394_device_id dice_id_table[] = {
+	// Avid M-Box 3 Pro. To match in probe function.
+	DICE_DEV_ENTRY_TYPICAL(OUI_AVID, 0x000004, snd_dice_detect_extension_formats),
 	/* M-Audio Profire 2626 has a different value in version field. */
 	{
 		.match_flags	= IEEE1394_MATCH_VENDOR_ID |
diff --git a/sound/firewire/dice/dice.h b/sound/firewire/dice/dice.h
index adc6f7c84460..3c967d1b3605 100644
--- a/sound/firewire/dice/dice.h
+++ b/sound/firewire/dice/dice.h
@@ -109,7 +109,8 @@ struct snd_dice {
 	struct fw_iso_resources rx_resources[MAX_STREAMS];
 	struct amdtp_stream tx_stream[MAX_STREAMS];
 	struct amdtp_stream rx_stream[MAX_STREAMS];
-	bool global_enabled;
+	bool global_enabled:1;
+	bool disable_double_pcm_frames:1;
 	struct completion clock_accepted;
 	unsigned int substreams_counter;
 
-- 
cgit v1.2.3


From 4c6fe8c547e3c9e8c15dabdd23c569ee0df3adb1 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Tue, 18 May 2021 10:26:12 +0900
Subject: ALSA: dice: fix stream format for TC Electronic Konnekt Live at high
 sampling transfer frequency

At high sampling transfer frequency, TC Electronic Konnekt Live
transfers/receives 6 audio data frames in multi bit linear audio data
channel of data block in CIP payload. Current hard-coded stream format
is wrong.

Cc: <stable@vger.kernel.org>
Fixes: f1f0f330b1d0 ("ALSA: dice: add parameters of stream formats for models produced by TC Electronic")
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Link: https://lore.kernel.org/r/20210518012612.37268-1-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/firewire/dice/dice-tcelectronic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/firewire/dice/dice-tcelectronic.c b/sound/firewire/dice/dice-tcelectronic.c
index a8875d24ba2a..43a3bcb15b3d 100644
--- a/sound/firewire/dice/dice-tcelectronic.c
+++ b/sound/firewire/dice/dice-tcelectronic.c
@@ -38,8 +38,8 @@ static const struct dice_tc_spec konnekt_24d = {
 };
 
 static const struct dice_tc_spec konnekt_live = {
-	.tx_pcm_chs = {{16, 16, 16}, {0, 0, 0} },
-	.rx_pcm_chs = {{16, 16, 16}, {0, 0, 0} },
+	.tx_pcm_chs = {{16, 16, 6}, {0, 0, 0} },
+	.rx_pcm_chs = {{16, 16, 6}, {0, 0, 0} },
 	.has_midi = true,
 };
 
-- 
cgit v1.2.3


From 3317c26a4b413b41364f2c4b83c778c6aba1576d Mon Sep 17 00:00:00 2001
From: Like Xu <like.xu@linux.intel.com>
Date: Fri, 30 Apr 2021 13:22:46 +0800
Subject: perf/x86: Avoid touching LBR_TOS MSR for Arch LBR

The Architecture LBR does not have MSR_LBR_TOS (0x000001c9).
In a guest that should support Architecture LBR, check_msr()
will be a non-related check for the architecture MSR 0x0
(IA32_P5_MC_ADDR) that is also not supported by KVM.

The failure will cause x86_pmu.lbr_nr = 0, thereby preventing
the initialization of the guest Arch LBR. Fix it by avoiding
this extraneous check in intel_pmu_init() for Arch LBR.

Fixes: 47125db27e47 ("perf/x86/intel/lbr: Support Architectural LBR")
Signed-off-by: Like Xu <like.xu@linux.intel.com>
[peterz: simpler still]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210430052247.3079672-1-like.xu@linux.intel.com
---
 arch/x86/events/intel/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 2521d03de5e0..e28892270c58 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6253,7 +6253,7 @@ __init int intel_pmu_init(void)
 	 * Check all LBT MSR here.
 	 * Disable LBR access if any LBR MSRs can not be accessed.
 	 */
-	if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL))
+	if (x86_pmu.lbr_tos && !check_msr(x86_pmu.lbr_tos, 0x3UL))
 		x86_pmu.lbr_nr = 0;
 	for (i = 0; i < x86_pmu.lbr_nr; i++) {
 		if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) &&
-- 
cgit v1.2.3


From 488e13a489e9707a7e81e1991fdd1f20c0f04689 Mon Sep 17 00:00:00 2001
From: Like Xu <like.xu@linux.intel.com>
Date: Fri, 30 Apr 2021 13:22:47 +0800
Subject: perf/x86/lbr: Remove cpuc->lbr_xsave allocation from atomic context

If the kernel is compiled with the CONFIG_LOCKDEP option, the conditional
might_sleep_if() deep in kmem_cache_alloc() will generate the following
trace, and potentially cause a deadlock when another LBR event is added:

  [] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:196
  [] Call Trace:
  []  kmem_cache_alloc+0x36/0x250
  []  intel_pmu_lbr_add+0x152/0x170
  []  x86_pmu_add+0x83/0xd0

Make it symmetric with the release_lbr_buffers() call and mirror the
existing DS buffers.

Fixes: c085fb8774 ("perf/x86/intel/lbr: Support XSAVES for arch LBR read")
Signed-off-by: Like Xu <like.xu@linux.intel.com>
[peterz: simplified]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Kan Liang <kan.liang@linux.intel.com>
Link: https://lkml.kernel.org/r/20210430052247.3079672-2-like.xu@linux.intel.com
---
 arch/x86/events/core.c       |  6 ++++--
 arch/x86/events/intel/lbr.c  | 26 ++++++++++++++++++++------
 arch/x86/events/perf_event.h |  6 ++++++
 3 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 8e509325c2c3..8f71dd72ef95 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -396,10 +396,12 @@ int x86_reserve_hardware(void)
 	if (!atomic_inc_not_zero(&pmc_refcount)) {
 		mutex_lock(&pmc_reserve_mutex);
 		if (atomic_read(&pmc_refcount) == 0) {
-			if (!reserve_pmc_hardware())
+			if (!reserve_pmc_hardware()) {
 				err = -EBUSY;
-			else
+			} else {
 				reserve_ds_buffers();
+				reserve_lbr_buffers();
+			}
 		}
 		if (!err)
 			atomic_inc(&pmc_refcount);
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 76dbab6ac9fb..4409d2cccfda 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -658,7 +658,6 @@ static inline bool branch_user_callstack(unsigned br_sel)
 
 void intel_pmu_lbr_add(struct perf_event *event)
 {
-	struct kmem_cache *kmem_cache = event->pmu->task_ctx_cache;
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 
 	if (!x86_pmu.lbr_nr)
@@ -696,11 +695,6 @@ void intel_pmu_lbr_add(struct perf_event *event)
 	perf_sched_cb_inc(event->ctx->pmu);
 	if (!cpuc->lbr_users++ && !event->total_time_running)
 		intel_pmu_lbr_reset();
-
-	if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
-	    kmem_cache && !cpuc->lbr_xsave &&
-	    (cpuc->lbr_users != cpuc->lbr_pebs_users))
-		cpuc->lbr_xsave = kmem_cache_alloc(kmem_cache, GFP_KERNEL);
 }
 
 void release_lbr_buffers(void)
@@ -722,6 +716,26 @@ void release_lbr_buffers(void)
 	}
 }
 
+void reserve_lbr_buffers(void)
+{
+	struct kmem_cache *kmem_cache;
+	struct cpu_hw_events *cpuc;
+	int cpu;
+
+	if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
+		return;
+
+	for_each_possible_cpu(cpu) {
+		cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
+		kmem_cache = x86_get_pmu(cpu)->task_ctx_cache;
+		if (!kmem_cache || cpuc->lbr_xsave)
+			continue;
+
+		cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache, GFP_KERNEL,
+							cpu_to_node(cpu));
+	}
+}
+
 void intel_pmu_lbr_del(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 27fa85e7d4fd..ad87cb36f7c8 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1244,6 +1244,8 @@ void reserve_ds_buffers(void);
 
 void release_lbr_buffers(void);
 
+void reserve_lbr_buffers(void);
+
 extern struct event_constraint bts_constraint;
 extern struct event_constraint vlbr_constraint;
 
@@ -1393,6 +1395,10 @@ static inline void release_lbr_buffers(void)
 {
 }
 
+static inline void reserve_lbr_buffers(void)
+{
+}
+
 static inline int intel_pmu_init(void)
 {
 	return 0;
-- 
cgit v1.2.3


From 89e70d5c583c55088faa2201d397ee30a15704aa Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Wed, 12 May 2021 20:09:37 +0800
Subject: locking/lockdep: Correct calling tracepoints

The commit eb1f00237aca ("lockdep,trace: Expose tracepoints") reverses
tracepoints for lock_contended() and lock_acquired(), thus the ftrace
log shows the wrong locking sequence that "acquired" event is prior to
"contended" event:

  <idle>-0       [001] d.s3 20803.501685: lock_acquire: 0000000008b91ab4 &sg_policy->update_lock
  <idle>-0       [001] d.s3 20803.501686: lock_acquired: 0000000008b91ab4 &sg_policy->update_lock
  <idle>-0       [001] d.s3 20803.501689: lock_contended: 0000000008b91ab4 &sg_policy->update_lock
  <idle>-0       [001] d.s3 20803.501690: lock_release: 0000000008b91ab4 &sg_policy->update_lock

This patch fixes calling tracepoints for lock_contended() and
lock_acquired().

Fixes: eb1f00237aca ("lockdep,trace: Expose tracepoints")
Signed-off-by: Leo Yan <leo.yan@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210512120937.90211-1-leo.yan@linaro.org
---
 kernel/locking/lockdep.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 48d736aa03b2..7641bd407239 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -5736,7 +5736,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
 {
 	unsigned long flags;
 
-	trace_lock_acquired(lock, ip);
+	trace_lock_contended(lock, ip);
 
 	if (unlikely(!lock_stat || !lockdep_enabled()))
 		return;
@@ -5754,7 +5754,7 @@ void lock_acquired(struct lockdep_map *lock, unsigned long ip)
 {
 	unsigned long flags;
 
-	trace_lock_contended(lock, ip);
+	trace_lock_acquired(lock, ip);
 
 	if (unlikely(!lock_stat || !lockdep_enabled()))
 		return;
-- 
cgit v1.2.3


From 3a010c493271f04578b133de977e0e5dd2848cea Mon Sep 17 00:00:00 2001
From: Zqiang <qiang.zhang@windriver.com>
Date: Mon, 17 May 2021 11:40:05 +0800
Subject: locking/mutex: clear MUTEX_FLAGS if wait_list is empty due to signal

When a interruptible mutex locker is interrupted by a signal
without acquiring this lock and removed from the wait queue.
if the mutex isn't contended enough to have a waiter
put into the wait queue again, the setting of the WAITER
bit will force mutex locker to go into the slowpath to
acquire the lock every time, so if the wait queue is empty,
the WAITER bit need to be clear.

Fixes: 040a0a371005 ("mutex: Add support for wound/wait style locks")
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Zqiang <qiang.zhang@windriver.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210517034005.30828-1-qiang.zhang@windriver.com
---
 kernel/locking/mutex-debug.c |  4 ++--
 kernel/locking/mutex-debug.h |  2 +-
 kernel/locking/mutex.c       | 18 +++++++++++++-----
 kernel/locking/mutex.h       |  4 +---
 4 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c
index a7276aaf2abc..db9301591e3f 100644
--- a/kernel/locking/mutex-debug.c
+++ b/kernel/locking/mutex-debug.c
@@ -57,7 +57,7 @@ void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 	task->blocked_on = waiter;
 }
 
-void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
+void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 			 struct task_struct *task)
 {
 	DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
@@ -65,7 +65,7 @@ void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 	DEBUG_LOCKS_WARN_ON(task->blocked_on != waiter);
 	task->blocked_on = NULL;
 
-	list_del_init(&waiter->list);
+	INIT_LIST_HEAD(&waiter->list);
 	waiter->task = NULL;
 }
 
diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h
index 1edd3f45a4ec..53e631e1d76d 100644
--- a/kernel/locking/mutex-debug.h
+++ b/kernel/locking/mutex-debug.h
@@ -22,7 +22,7 @@ extern void debug_mutex_free_waiter(struct mutex_waiter *waiter);
 extern void debug_mutex_add_waiter(struct mutex *lock,
 				   struct mutex_waiter *waiter,
 				   struct task_struct *task);
-extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
+extern void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 				struct task_struct *task);
 extern void debug_mutex_unlock(struct mutex *lock);
 extern void debug_mutex_init(struct mutex *lock, const char *name,
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index cb6b112ce155..013e1b08a1bf 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -194,7 +194,7 @@ static inline bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_wait
  * Add @waiter to a given location in the lock wait_list and set the
  * FLAG_WAITERS flag if it's the first waiter.
  */
-static void __sched
+static void
 __mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 		   struct list_head *list)
 {
@@ -205,6 +205,16 @@ __mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 		__mutex_set_flag(lock, MUTEX_FLAG_WAITERS);
 }
 
+static void
+__mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter)
+{
+	list_del(&waiter->list);
+	if (likely(list_empty(&lock->wait_list)))
+		__mutex_clear_flag(lock, MUTEX_FLAGS);
+
+	debug_mutex_remove_waiter(lock, waiter, current);
+}
+
 /*
  * Give up ownership to a specific task, when @task = NULL, this is equivalent
  * to a regular unlock. Sets PICKUP on a handoff, clears HANDOFF, preserves
@@ -1061,9 +1071,7 @@ acquired:
 			__ww_mutex_check_waiters(lock, ww_ctx);
 	}
 
-	mutex_remove_waiter(lock, &waiter, current);
-	if (likely(list_empty(&lock->wait_list)))
-		__mutex_clear_flag(lock, MUTEX_FLAGS);
+	__mutex_remove_waiter(lock, &waiter);
 
 	debug_mutex_free_waiter(&waiter);
 
@@ -1080,7 +1088,7 @@ skip_wait:
 
 err:
 	__set_current_state(TASK_RUNNING);
-	mutex_remove_waiter(lock, &waiter, current);
+	__mutex_remove_waiter(lock, &waiter);
 err_early_kill:
 	spin_unlock(&lock->wait_lock);
 	debug_mutex_free_waiter(&waiter);
diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h
index 1c2287d3fa71..f0c710b1d192 100644
--- a/kernel/locking/mutex.h
+++ b/kernel/locking/mutex.h
@@ -10,12 +10,10 @@
  * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs:
  */
 
-#define mutex_remove_waiter(lock, waiter, task) \
-		__list_del((waiter)->list.prev, (waiter)->list.next)
-
 #define debug_mutex_wake_waiter(lock, waiter)		do { } while (0)
 #define debug_mutex_free_waiter(waiter)			do { } while (0)
 #define debug_mutex_add_waiter(lock, waiter, ti)	do { } while (0)
+#define debug_mutex_remove_waiter(lock, waiter, ti)     do { } while (0)
 #define debug_mutex_unlock(lock)			do { } while (0)
 #define debug_mutex_init(lock, name, key)		do { } while (0)
 
-- 
cgit v1.2.3


From 0b07154f066ab2c087c342b372be5771145bdc60 Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Mon, 17 May 2021 17:39:46 +0200
Subject: dt-bindings: spi: spi-mux: rename flash node

The recent conversion of the common MTD properties to YAML now mandates
a particular node name for SPI flash devices.

Reported-by: Rob Herring <robh@kernel.org>
Signed-off-by: Michael Walle <michael@walle.cc>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20210517153946.9502-1-michael@walle.cc
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/spi/spi-mux.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/spi/spi-mux.yaml b/Documentation/devicetree/bindings/spi/spi-mux.yaml
index 6c21a132b51f..c2c007260582 100644
--- a/Documentation/devicetree/bindings/spi/spi-mux.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-mux.yaml
@@ -72,7 +72,7 @@ examples:
 
             mux-controls = <&mux>;
 
-            spi-flash@0 {
+            flash@0 {
                 compatible = "jedec,spi-nor";
                 reg = <0>;
                 #address-cells = <1>;
-- 
cgit v1.2.3


From d7aed20d446d8c87f5e13adf73281056b0064a45 Mon Sep 17 00:00:00 2001
From: Alain Volmat <alain.volmat@foss.st.com>
Date: Wed, 12 May 2021 07:20:42 +0200
Subject: MAINTAINERS: Add Alain Volmat as STM32 SPI maintainer

Add Alain Volmat as STM32 SPI maintainer.

Signed-off-by: Alain Volmat <alain.volmat@foss.st.com>
Reviewed-by: Amelie Delaunay <amelie.delaunay@foss.st.com>
Link: https://lore.kernel.org/r/1620796842-23546-1-git-send-email-alain.volmat@foss.st.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 MAINTAINERS | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 1c9827207949..528068e31845 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16954,6 +16954,12 @@ L:	linux-i2c@vger.kernel.org
 S:	Maintained
 F:	drivers/i2c/busses/i2c-stm32*
 
+ST STM32 SPI DRIVER
+M:	Alain Volmat <alain.volmat@foss.st.com>
+L:	linux-spi@vger.kernel.org
+S:	Maintained
+F:	drivers/spi/spi-stm32.c
+
 ST VL53L0X ToF RANGER(I2C) IIO DRIVER
 M:	Song Qiang <songqiang1304521@gmail.com>
 L:	linux-iio@vger.kernel.org
-- 
cgit v1.2.3


From d37316b72e8bf95a52d1c3e93c823d128c09b521 Mon Sep 17 00:00:00 2001
From: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Date: Tue, 18 May 2021 16:45:14 +0930
Subject: ARM: npcm: wpcm450: select interrupt controller driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The interrupt controller driver is necessary in order to have a
functioning Linux system on WPCM450. Select it in mach-npcm/Kconfig.

Fixes: ece3fe93e8f4 ("ARM: npcm: Introduce Nuvoton WPCM450 SoC")
Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Signed-off-by: Joel Stanley <joel@jms.id.au>
Reviewed-by: Joel Stanley <joel@jms.id.au>
Link: https://lore.kernel.org/r/20210513165627.1767093-1-j.neuschaefer@gmx.net
Link: https://lore.kernel.org/r/20210518071514.604492-1-joel@jms.id.au'
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/mach-npcm/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/mach-npcm/Kconfig b/arch/arm/mach-npcm/Kconfig
index 658c8efb4ca1..a71cf1d189ae 100644
--- a/arch/arm/mach-npcm/Kconfig
+++ b/arch/arm/mach-npcm/Kconfig
@@ -10,6 +10,7 @@ config ARCH_WPCM450
 	bool "Support for WPCM450 BMC (Hermon)"
 	depends on ARCH_MULTI_V5
 	select CPU_ARM926T
+	select WPCM450_AIC
 	select NPCM7XX_TIMER
 	help
 	  General support for WPCM450 BMC (Hermon).
-- 
cgit v1.2.3


From 889d916b6f8a48b8c9489fffcad3b78eedd01a51 Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Tue, 11 May 2021 08:48:28 +0300
Subject: RDMA/core: Don't access cm_id after its destruction

restrack should only be attached to a cm_id while the ID has a valid
device pointer. It is set up when the device is first loaded, but not
cleared when the device is removed. There is also two copies of the device
pointer, one private and one in the public API, and these were left out of
sync.

Make everything go to NULL together and manipulate restrack right around
the device assignments.

Found by syzcaller:
BUG: KASAN: wild-memory-access in __list_del include/linux/list.h:112 [inline]
BUG: KASAN: wild-memory-access in __list_del_entry include/linux/list.h:135 [inline]
BUG: KASAN: wild-memory-access in list_del include/linux/list.h:146 [inline]
BUG: KASAN: wild-memory-access in cma_cancel_listens drivers/infiniband/core/cma.c:1767 [inline]
BUG: KASAN: wild-memory-access in cma_cancel_operation drivers/infiniband/core/cma.c:1795 [inline]
BUG: KASAN: wild-memory-access in cma_cancel_operation+0x1f4/0x4b0 drivers/infiniband/core/cma.c:1783
Write of size 8 at addr dead000000000108 by task syz-executor716/334

CPU: 0 PID: 334 Comm: syz-executor716 Not tainted 5.11.0+ #271
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
Call Trace:
 __dump_stack lib/dump_stack.c:79 [inline]
 dump_stack+0xbe/0xf9 lib/dump_stack.c:120
 __kasan_report mm/kasan/report.c:400 [inline]
 kasan_report.cold+0x5f/0xd5 mm/kasan/report.c:413
 __list_del include/linux/list.h:112 [inline]
 __list_del_entry include/linux/list.h:135 [inline]
 list_del include/linux/list.h:146 [inline]
 cma_cancel_listens drivers/infiniband/core/cma.c:1767 [inline]
 cma_cancel_operation drivers/infiniband/core/cma.c:1795 [inline]
 cma_cancel_operation+0x1f4/0x4b0 drivers/infiniband/core/cma.c:1783
 _destroy_id+0x29/0x460 drivers/infiniband/core/cma.c:1862
 ucma_close_id+0x36/0x50 drivers/infiniband/core/ucma.c:185
 ucma_destroy_private_ctx+0x58d/0x5b0 drivers/infiniband/core/ucma.c:576
 ucma_close+0x91/0xd0 drivers/infiniband/core/ucma.c:1797
 __fput+0x169/0x540 fs/file_table.c:280
 task_work_run+0xb7/0x100 kernel/task_work.c:140
 exit_task_work include/linux/task_work.h:30 [inline]
 do_exit+0x7da/0x17f0 kernel/exit.c:825
 do_group_exit+0x9e/0x190 kernel/exit.c:922
 __do_sys_exit_group kernel/exit.c:933 [inline]
 __se_sys_exit_group kernel/exit.c:931 [inline]
 __x64_sys_exit_group+0x2d/0x30 kernel/exit.c:931
 do_syscall_64+0x2d/0x40 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fixes: 255d0c14b375 ("RDMA/cma: rdma_bind_addr() leaks a cma_dev reference count")
Link: https://lore.kernel.org/r/3352ee288fe34f2b44220457a29bfc0548686363.1620711734.git.leonro@nvidia.com
Signed-off-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/core/cma.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 2b9ffc21cbc4..ab148a696c0c 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -473,6 +473,7 @@ static void cma_release_dev(struct rdma_id_private *id_priv)
 	list_del(&id_priv->list);
 	cma_dev_put(id_priv->cma_dev);
 	id_priv->cma_dev = NULL;
+	id_priv->id.device = NULL;
 	if (id_priv->id.route.addr.dev_addr.sgid_attr) {
 		rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr);
 		id_priv->id.route.addr.dev_addr.sgid_attr = NULL;
@@ -1860,6 +1861,7 @@ static void _destroy_id(struct rdma_id_private *id_priv,
 				iw_destroy_cm_id(id_priv->cm_id.iw);
 		}
 		cma_leave_mc_groups(id_priv);
+		rdma_restrack_del(&id_priv->res);
 		cma_release_dev(id_priv);
 	}
 
@@ -1873,7 +1875,6 @@ static void _destroy_id(struct rdma_id_private *id_priv,
 	kfree(id_priv->id.route.path_rec);
 
 	put_net(id_priv->id.route.addr.dev_addr.net);
-	rdma_restrack_del(&id_priv->res);
 	kfree(id_priv);
 }
 
@@ -3774,7 +3775,7 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
 	}
 
 	id_priv->backlog = backlog;
-	if (id->device) {
+	if (id_priv->cma_dev) {
 		if (rdma_cap_ib_cm(id->device, 1)) {
 			ret = cma_ib_listen(id_priv);
 			if (ret)
-- 
cgit v1.2.3


From 976aac5f882989e4f6c1b3a7224819bf0e801c6a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 14 May 2021 16:00:08 +0200
Subject: kcsan: Fix debugfs initcall return type

clang with CONFIG_LTO_CLANG points out that an initcall function should
return an 'int' due to the changes made to the initcall macros in commit
3578ad11f3fb ("init: lto: fix PREL32 relocations"):

kernel/kcsan/debugfs.c:274:15: error: returning 'void' from a function with incompatible result type 'int'
late_initcall(kcsan_debugfs_init);
~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~
include/linux/init.h:292:46: note: expanded from macro 'late_initcall'
 #define late_initcall(fn)               __define_initcall(fn, 7)

Fixes: e36299efe7d7 ("kcsan, debugfs: Move debugfs file creation out of early init")
Cc: stable <stable@vger.kernel.org>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Marco Elver <elver@google.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Miguel Ojeda <ojeda@kernel.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/kcsan/debugfs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c
index c1dd02f3be8b..e65de172ccf7 100644
--- a/kernel/kcsan/debugfs.c
+++ b/kernel/kcsan/debugfs.c
@@ -266,9 +266,10 @@ static const struct file_operations debugfs_ops =
 	.release = single_release
 };
 
-static void __init kcsan_debugfs_init(void)
+static int __init kcsan_debugfs_init(void)
 {
 	debugfs_create_file("kcsan", 0644, NULL, NULL, &debugfs_ops);
+	return 0;
 }
 
 late_initcall(kcsan_debugfs_init);
-- 
cgit v1.2.3


From be07f056396d6bb40963c45a02951c566ddeef8e Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 18 May 2021 10:09:08 +0800
Subject: tipc: simplify the finalize work queue

This patch is to use "struct work_struct" for the finalize work queue
instead of "struct tipc_net_work", as it can get the "net" and "addr"
from tipc_net's other members and there is no need to add extra net
and addr in tipc_net by defining "struct tipc_net_work".

Note that it's safe to get net from tn->bcl as bcl is always released
after the finalize work queue is done.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/core.c     |  4 ++--
 net/tipc/core.h     |  8 +-------
 net/tipc/discover.c |  4 ++--
 net/tipc/link.c     |  5 +++++
 net/tipc/link.h     |  1 +
 net/tipc/net.c      | 15 +++------------
 6 files changed, 14 insertions(+), 23 deletions(-)

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 72f3ac73779b..3f4542e0f065 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -60,7 +60,7 @@ static int __net_init tipc_init_net(struct net *net)
 	tn->trial_addr = 0;
 	tn->addr_trial_end = 0;
 	tn->capabilities = TIPC_NODE_CAPABILITIES;
-	INIT_WORK(&tn->final_work.work, tipc_net_finalize_work);
+	INIT_WORK(&tn->work, tipc_net_finalize_work);
 	memset(tn->node_id, 0, sizeof(tn->node_id));
 	memset(tn->node_id_string, 0, sizeof(tn->node_id_string));
 	tn->mon_threshold = TIPC_DEF_MON_THRESHOLD;
@@ -110,7 +110,7 @@ static void __net_exit tipc_exit_net(struct net *net)
 
 	tipc_detach_loopback(net);
 	/* Make sure the tipc_net_finalize_work() finished */
-	cancel_work_sync(&tn->final_work.work);
+	cancel_work_sync(&tn->work);
 	tipc_net_stop(net);
 
 	tipc_bcast_stop(net);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 5741ae488bb5..0a3f7a70a50a 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -91,12 +91,6 @@ extern unsigned int tipc_net_id __read_mostly;
 extern int sysctl_tipc_rmem[3] __read_mostly;
 extern int sysctl_tipc_named_timeout __read_mostly;
 
-struct tipc_net_work {
-	struct work_struct work;
-	struct net *net;
-	u32 addr;
-};
-
 struct tipc_net {
 	u8  node_id[NODE_ID_LEN];
 	u32 node_addr;
@@ -148,7 +142,7 @@ struct tipc_net {
 	struct tipc_crypto *crypto_tx;
 #endif
 	/* Work item for net finalize */
-	struct tipc_net_work final_work;
+	struct work_struct work;
 	/* The numbers of work queues in schedule */
 	atomic_t wq_count;
 };
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 5380f605b851..da69e1abf68f 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -168,7 +168,7 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d,
 
 	/* Apply trial address if we just left trial period */
 	if (!trial && !self) {
-		tipc_sched_net_finalize(net, tn->trial_addr);
+		schedule_work(&tn->work);
 		msg_set_prevnode(buf_msg(d->skb), tn->trial_addr);
 		msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
 	}
@@ -308,7 +308,7 @@ static void tipc_disc_timeout(struct timer_list *t)
 	if (!time_before(jiffies, tn->addr_trial_end) && !tipc_own_addr(net)) {
 		mod_timer(&d->timer, jiffies + TIPC_DISC_INIT);
 		spin_unlock_bh(&d->lock);
-		tipc_sched_net_finalize(net, tn->trial_addr);
+		schedule_work(&tn->work);
 		return;
 	}
 
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 115109259430..c44b4bfaaee6 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -372,6 +372,11 @@ char tipc_link_plane(struct tipc_link *l)
 	return l->net_plane;
 }
 
+struct net *tipc_link_net(struct tipc_link *l)
+{
+	return l->net;
+}
+
 void tipc_link_update_caps(struct tipc_link *l, u16 capabilities)
 {
 	l->peer_caps = capabilities;
diff --git a/net/tipc/link.h b/net/tipc/link.h
index fc07232c9a12..a16f401fdabd 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -156,4 +156,5 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l,   struct tipc_msg *hdr,
 int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
 			  struct sk_buff_head *xmitq);
 bool tipc_link_too_silent(struct tipc_link *l);
+struct net *tipc_link_net(struct tipc_link *l);
 #endif
diff --git a/net/tipc/net.c b/net/tipc/net.c
index a130195af188..0e95572e56b4 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -41,6 +41,7 @@
 #include "socket.h"
 #include "node.h"
 #include "bcast.h"
+#include "link.h"
 #include "netlink.h"
 #include "monitor.h"
 
@@ -142,19 +143,9 @@ static void tipc_net_finalize(struct net *net, u32 addr)
 
 void tipc_net_finalize_work(struct work_struct *work)
 {
-	struct tipc_net_work *fwork;
+	struct tipc_net *tn = container_of(work, struct tipc_net, work);
 
-	fwork = container_of(work, struct tipc_net_work, work);
-	tipc_net_finalize(fwork->net, fwork->addr);
-}
-
-void tipc_sched_net_finalize(struct net *net, u32 addr)
-{
-	struct tipc_net *tn = tipc_net(net);
-
-	tn->final_work.net = net;
-	tn->final_work.addr = addr;
-	schedule_work(&tn->final_work.work);
+	tipc_net_finalize(tipc_link_net(tn->bcl), tn->trial_addr);
 }
 
 void tipc_net_stop(struct net *net)
-- 
cgit v1.2.3


From 33e6b1674f339c5d3be56ec9b4921d1ddd14327d Mon Sep 17 00:00:00 2001
From: Markus Bloechl <markus.bloechl@ipetronik.com>
Date: Tue, 18 May 2021 11:54:11 +0200
Subject: net: lan78xx: advertise tx software timestamping support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

lan78xx already calls skb_tx_timestamp() in its lan78xx_start_xmit().
Override .get_ts_info to also advertise this capability
(SOF_TIMESTAMPING_TX_SOFTWARE) via ethtool.

Signed-off-by: Markus Blöchl <markus.bloechl@ipetronik.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/lan78xx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 6acc5e904518..02bce40a67e5 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1645,6 +1645,7 @@ static const struct ethtool_ops lan78xx_ethtool_ops = {
 	.get_strings	= lan78xx_get_strings,
 	.get_wol	= lan78xx_get_wol,
 	.set_wol	= lan78xx_set_wol,
+	.get_ts_info	= ethtool_op_get_ts_info,
 	.get_eee	= lan78xx_get_eee,
 	.set_eee	= lan78xx_set_eee,
 	.get_pauseparam	= lan78xx_get_pause,
-- 
cgit v1.2.3


From a710b9ffbebaf713f7dbd4dbd9524907e5d66f33 Mon Sep 17 00:00:00 2001
From: Jiaran Zhang <zhangjiaran@huawei.com>
Date: Tue, 18 May 2021 19:36:00 +0800
Subject: net: hns3: fix incorrect resp_msg issue

In hclge_mbx_handler(), if there are two consecutive mailbox
messages that requires resp_msg, the resp_msg is not cleared
after processing the first message, which will cause the resp_msg
data of second message incorrect.

Fix it by clearing the resp_msg before processing every mailbox
message.

Fixes: bb5790b71bad ("net: hns3: refactor mailbox response scheme between PF and VF")
Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 8e5f9dc8791d..f1c9f4ada348 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -710,7 +710,6 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 	unsigned int flag;
 	int ret = 0;
 
-	memset(&resp_msg, 0, sizeof(resp_msg));
 	/* handle all the mailbox requests in the queue */
 	while (!hclge_cmd_crq_empty(&hdev->hw)) {
 		if (test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state)) {
@@ -738,6 +737,9 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 
 		trace_hclge_pf_mbx_get(hdev, req);
 
+		/* clear the resp_msg before processing every mailbox message */
+		memset(&resp_msg, 0, sizeof(resp_msg));
+
 		switch (req->msg.code) {
 		case HCLGE_MBX_MAP_RING_TO_VECTOR:
 			ret = hclge_map_unmap_ring_to_vf_vector(vport, true,
-- 
cgit v1.2.3


From a289a7e5c1d49b7d47df9913c1cc81fb48fab613 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Tue, 18 May 2021 19:36:01 +0800
Subject: net: hns3: put off calling register_netdev() until client initialize
 complete

Currently, the netdevice is registered before client initializing
complete. So there is a timewindow between netdevice available
and usable. In this case, if user try to change the channel number
or ring param, it may cause the hns3_set_rx_cpu_rmap() being called
twice, and report bug.

[47199.416502] hns3 0000:35:00.0 eth1: set channels: tqp_num=1, rxfh=0
[47199.430340] hns3 0000:35:00.0 eth1: already uninitialized
[47199.438554] hns3 0000:35:00.0: rss changes from 4 to 1
[47199.511854] hns3 0000:35:00.0: Channels changed, rss_size from 4 to 1, tqps from 4 to 1
[47200.163524] ------------[ cut here ]------------
[47200.171674] kernel BUG at lib/cpu_rmap.c:142!
[47200.177847] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP
[47200.185259] Modules linked in: hclge(+) hns3(-) hns3_cae(O) hns_roce_hw_v2 hnae3 vfio_iommu_type1 vfio_pci vfio_virqfd vfio pv680_mii(O) [last unloaded: hclge]
[47200.205912] CPU: 1 PID: 8260 Comm: ethtool Tainted: G           O      5.11.0-rc3+ #1
[47200.215601] Hardware name:  , xxxxxx 02/04/2021
[47200.223052] pstate: 60400009 (nZCv daif +PAN -UAO -TCO BTYPE=--)
[47200.230188] pc : cpu_rmap_add+0x38/0x40
[47200.237472] lr : irq_cpu_rmap_add+0x84/0x140
[47200.243291] sp : ffff800010e93a30
[47200.247295] x29: ffff800010e93a30 x28: ffff082100584880
[47200.254155] x27: 0000000000000000 x26: 0000000000000000
[47200.260712] x25: 0000000000000000 x24: 0000000000000004
[47200.267241] x23: ffff08209ba03000 x22: ffff08209ba038c0
[47200.273789] x21: 000000000000003f x20: ffff0820e2bc1680
[47200.280400] x19: ffff0820c970ec80 x18: 00000000000000c0
[47200.286944] x17: 0000000000000000 x16: ffffb43debe4a0d0
[47200.293456] x15: fffffc2082990600 x14: dead000000000122
[47200.300059] x13: ffffffffffffffff x12: 000000000000003e
[47200.306606] x11: ffff0820815b8080 x10: ffff53e411988000
[47200.313171] x9 : 0000000000000000 x8 : ffff0820e2bc1700
[47200.319682] x7 : 0000000000000000 x6 : 000000000000003f
[47200.326170] x5 : 0000000000000040 x4 : ffff800010e93a20
[47200.332656] x3 : 0000000000000004 x2 : ffff0820c970ec80
[47200.339168] x1 : ffff0820e2bc1680 x0 : 0000000000000004
[47200.346058] Call trace:
[47200.349324]  cpu_rmap_add+0x38/0x40
[47200.354300]  hns3_set_rx_cpu_rmap+0x6c/0xe0 [hns3]
[47200.362294]  hns3_reset_notify_init_enet+0x1cc/0x340 [hns3]
[47200.370049]  hns3_change_channels+0x40/0xb0 [hns3]
[47200.376770]  hns3_set_channels+0x12c/0x2a0 [hns3]
[47200.383353]  ethtool_set_channels+0x140/0x250
[47200.389772]  dev_ethtool+0x714/0x23d0
[47200.394440]  dev_ioctl+0x4cc/0x640
[47200.399277]  sock_do_ioctl+0x100/0x2a0
[47200.404574]  sock_ioctl+0x28c/0x470
[47200.409079]  __arm64_sys_ioctl+0xb4/0x100
[47200.415217]  el0_svc_common.constprop.0+0x84/0x210
[47200.422088]  do_el0_svc+0x28/0x34
[47200.426387]  el0_svc+0x28/0x70
[47200.431308]  el0_sync_handler+0x1a4/0x1b0
[47200.436477]  el0_sync+0x174/0x180
[47200.441562] Code: 11000405 79000c45 f8247861 d65f03c0 (d4210000)
[47200.448869] ---[ end trace a01efe4ce42e5f34 ]---

The process is like below:
excuting hns3_client_init
|
register_netdev()
|                           hns3_set_channels()
|                           |
hns3_set_rx_cpu_rmap()      hns3_reset_notify_uninit_enet()
|                               |
|                            quit without calling function
|                            hns3_free_rx_cpu_rmap for flag
|                            HNS3_NIC_STATE_INITED is unset.
|                           |
|                           hns3_reset_notify_init_enet()
|                               |
set HNS3_NIC_STATE_INITED    call hns3_set_rx_cpu_rmap()-- crash

Fix it by calling register_netdev() at the end of function
hns3_client_init().

Fixes: 08a100689d4b ("net: hns3: re-organize vector handle")
Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 783fdaf8f8d6..c64d18878f64 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -4317,12 +4317,6 @@ static int hns3_client_init(struct hnae3_handle *handle)
 	if (ret)
 		goto out_init_phy;
 
-	ret = register_netdev(netdev);
-	if (ret) {
-		dev_err(priv->dev, "probe register netdev fail!\n");
-		goto out_reg_netdev_fail;
-	}
-
 	/* the device can work without cpu rmap, only aRFS needs it */
 	ret = hns3_set_rx_cpu_rmap(netdev);
 	if (ret)
@@ -4355,17 +4349,23 @@ static int hns3_client_init(struct hnae3_handle *handle)
 	if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3)
 		set_bit(HNAE3_PFLAG_LIMIT_PROMISC, &handle->supported_pflags);
 
+	ret = register_netdev(netdev);
+	if (ret) {
+		dev_err(priv->dev, "probe register netdev fail!\n");
+		goto out_reg_netdev_fail;
+	}
+
 	if (netif_msg_drv(handle))
 		hns3_info_show(priv);
 
 	return ret;
 
+out_reg_netdev_fail:
+	hns3_dbg_uninit(handle);
 out_client_start:
 	hns3_free_rx_cpu_rmap(netdev);
 	hns3_nic_uninit_irq(priv);
 out_init_irq_fail:
-	unregister_netdev(netdev);
-out_reg_netdev_fail:
 	hns3_uninit_phy(netdev);
 out_init_phy:
 	hns3_uninit_all_ring(priv);
-- 
cgit v1.2.3


From 73a13d8dbe33e53a12400f2be0f5af169816c67f Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Tue, 18 May 2021 19:36:02 +0800
Subject: net: hns3: fix user's coalesce configuration lost issue

Currently, when adaptive is on, the user's coalesce configuration
may be overwritten by the dynamic one. The reason is that user's
configurations are saved in struct hns3_enet_tqp_vector whose
value maybe changed by the dynamic algorithm. To fix it, use
struct hns3_nic_priv instead of struct hns3_enet_tqp_vector to
save and get the user's configuration.

BTW, operations of storing and restoring coalesce info in the reset
process are unnecessary now, so remove them as well.

Fixes: 434776a5fae2 ("net: hns3: add ethtool_ops.set_coalesce support to PF")
Fixes: 7e96adc46633 ("net: hns3: add ethtool_ops.get_coalesce support to PF")
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c    | 84 +++++++++++-----------
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 64 ++++++-----------
 2 files changed, 63 insertions(+), 85 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index c64d18878f64..6d6c0ac65bb4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -264,22 +264,17 @@ static void hns3_vector_coalesce_init(struct hns3_enet_tqp_vector *tqp_vector,
 	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(priv->ae_handle->pdev);
 	struct hns3_enet_coalesce *tx_coal = &tqp_vector->tx_group.coal;
 	struct hns3_enet_coalesce *rx_coal = &tqp_vector->rx_group.coal;
+	struct hns3_enet_coalesce *ptx_coal = &priv->tx_coal;
+	struct hns3_enet_coalesce *prx_coal = &priv->rx_coal;
 
-	/* initialize the configuration for interrupt coalescing.
-	 * 1. GL (Interrupt Gap Limiter)
-	 * 2. RL (Interrupt Rate Limiter)
-	 * 3. QL (Interrupt Quantity Limiter)
-	 *
-	 * Default: enable interrupt coalescing self-adaptive and GL
-	 */
-	tx_coal->adapt_enable = 1;
-	rx_coal->adapt_enable = 1;
+	tx_coal->adapt_enable = ptx_coal->adapt_enable;
+	rx_coal->adapt_enable = prx_coal->adapt_enable;
 
-	tx_coal->int_gl = HNS3_INT_GL_50K;
-	rx_coal->int_gl = HNS3_INT_GL_50K;
+	tx_coal->int_gl = ptx_coal->int_gl;
+	rx_coal->int_gl = prx_coal->int_gl;
 
-	rx_coal->flow_level = HNS3_FLOW_LOW;
-	tx_coal->flow_level = HNS3_FLOW_LOW;
+	rx_coal->flow_level = prx_coal->flow_level;
+	tx_coal->flow_level = ptx_coal->flow_level;
 
 	/* device version above V3(include V3), GL can configure 1us
 	 * unit, so uses 1us unit.
@@ -294,8 +289,8 @@ static void hns3_vector_coalesce_init(struct hns3_enet_tqp_vector *tqp_vector,
 		rx_coal->ql_enable = 1;
 		tx_coal->int_ql_max = ae_dev->dev_specs.int_ql_max;
 		rx_coal->int_ql_max = ae_dev->dev_specs.int_ql_max;
-		tx_coal->int_ql = HNS3_INT_QL_DEFAULT_CFG;
-		rx_coal->int_ql = HNS3_INT_QL_DEFAULT_CFG;
+		tx_coal->int_ql = ptx_coal->int_ql;
+		rx_coal->int_ql = prx_coal->int_ql;
 	}
 }
 
@@ -3844,6 +3839,34 @@ map_ring_fail:
 	return ret;
 }
 
+static void hns3_nic_init_coal_cfg(struct hns3_nic_priv *priv)
+{
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(priv->ae_handle->pdev);
+	struct hns3_enet_coalesce *tx_coal = &priv->tx_coal;
+	struct hns3_enet_coalesce *rx_coal = &priv->rx_coal;
+
+	/* initialize the configuration for interrupt coalescing.
+	 * 1. GL (Interrupt Gap Limiter)
+	 * 2. RL (Interrupt Rate Limiter)
+	 * 3. QL (Interrupt Quantity Limiter)
+	 *
+	 * Default: enable interrupt coalescing self-adaptive and GL
+	 */
+	tx_coal->adapt_enable = 1;
+	rx_coal->adapt_enable = 1;
+
+	tx_coal->int_gl = HNS3_INT_GL_50K;
+	rx_coal->int_gl = HNS3_INT_GL_50K;
+
+	rx_coal->flow_level = HNS3_FLOW_LOW;
+	tx_coal->flow_level = HNS3_FLOW_LOW;
+
+	if (ae_dev->dev_specs.int_ql_max) {
+		tx_coal->int_ql = HNS3_INT_QL_DEFAULT_CFG;
+		rx_coal->int_ql = HNS3_INT_QL_DEFAULT_CFG;
+	}
+}
+
 static int hns3_nic_alloc_vector_data(struct hns3_nic_priv *priv)
 {
 	struct hnae3_handle *h = priv->ae_handle;
@@ -4295,6 +4318,8 @@ static int hns3_client_init(struct hnae3_handle *handle)
 		goto out_get_ring_cfg;
 	}
 
+	hns3_nic_init_coal_cfg(priv);
+
 	ret = hns3_nic_alloc_vector_data(priv);
 	if (ret) {
 		ret = -ENOMEM;
@@ -4571,31 +4596,6 @@ int hns3_nic_reset_all_ring(struct hnae3_handle *h)
 	return 0;
 }
 
-static void hns3_store_coal(struct hns3_nic_priv *priv)
-{
-	/* ethtool only support setting and querying one coal
-	 * configuration for now, so save the vector 0' coal
-	 * configuration here in order to restore it.
-	 */
-	memcpy(&priv->tx_coal, &priv->tqp_vector[0].tx_group.coal,
-	       sizeof(struct hns3_enet_coalesce));
-	memcpy(&priv->rx_coal, &priv->tqp_vector[0].rx_group.coal,
-	       sizeof(struct hns3_enet_coalesce));
-}
-
-static void hns3_restore_coal(struct hns3_nic_priv *priv)
-{
-	u16 vector_num = priv->vector_num;
-	int i;
-
-	for (i = 0; i < vector_num; i++) {
-		memcpy(&priv->tqp_vector[i].tx_group.coal, &priv->tx_coal,
-		       sizeof(struct hns3_enet_coalesce));
-		memcpy(&priv->tqp_vector[i].rx_group.coal, &priv->rx_coal,
-		       sizeof(struct hns3_enet_coalesce));
-	}
-}
-
 static int hns3_reset_notify_down_enet(struct hnae3_handle *handle)
 {
 	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
@@ -4654,8 +4654,6 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle)
 	if (ret)
 		goto err_put_ring;
 
-	hns3_restore_coal(priv);
-
 	ret = hns3_nic_init_vector_data(priv);
 	if (ret)
 		goto err_dealloc_vector;
@@ -4721,8 +4719,6 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle)
 
 	hns3_nic_uninit_vector_data(priv);
 
-	hns3_store_coal(priv);
-
 	hns3_nic_dealloc_vector_data(priv);
 
 	hns3_uninit_all_ring(priv);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index b48faf769b1c..c1ea403d2b56 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -1134,50 +1134,32 @@ static void hns3_get_channels(struct net_device *netdev,
 		h->ae_algo->ops->get_channels(h, ch);
 }
 
-static int hns3_get_coalesce_per_queue(struct net_device *netdev, u32 queue,
-				       struct ethtool_coalesce *cmd)
+static int hns3_get_coalesce(struct net_device *netdev,
+			     struct ethtool_coalesce *cmd)
 {
-	struct hns3_enet_tqp_vector *tx_vector, *rx_vector;
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hns3_enet_coalesce *tx_coal = &priv->tx_coal;
+	struct hns3_enet_coalesce *rx_coal = &priv->rx_coal;
 	struct hnae3_handle *h = priv->ae_handle;
-	u16 queue_num = h->kinfo.num_tqps;
 
 	if (hns3_nic_resetting(netdev))
 		return -EBUSY;
 
-	if (queue >= queue_num) {
-		netdev_err(netdev,
-			   "Invalid queue value %u! Queue max id=%u\n",
-			   queue, queue_num - 1);
-		return -EINVAL;
-	}
-
-	tx_vector = priv->ring[queue].tqp_vector;
-	rx_vector = priv->ring[queue_num + queue].tqp_vector;
+	cmd->use_adaptive_tx_coalesce = tx_coal->adapt_enable;
+	cmd->use_adaptive_rx_coalesce = rx_coal->adapt_enable;
 
-	cmd->use_adaptive_tx_coalesce =
-			tx_vector->tx_group.coal.adapt_enable;
-	cmd->use_adaptive_rx_coalesce =
-			rx_vector->rx_group.coal.adapt_enable;
-
-	cmd->tx_coalesce_usecs = tx_vector->tx_group.coal.int_gl;
-	cmd->rx_coalesce_usecs = rx_vector->rx_group.coal.int_gl;
+	cmd->tx_coalesce_usecs = tx_coal->int_gl;
+	cmd->rx_coalesce_usecs = rx_coal->int_gl;
 
 	cmd->tx_coalesce_usecs_high = h->kinfo.int_rl_setting;
 	cmd->rx_coalesce_usecs_high = h->kinfo.int_rl_setting;
 
-	cmd->tx_max_coalesced_frames = tx_vector->tx_group.coal.int_ql;
-	cmd->rx_max_coalesced_frames = rx_vector->rx_group.coal.int_ql;
+	cmd->tx_max_coalesced_frames = tx_coal->int_ql;
+	cmd->rx_max_coalesced_frames = rx_coal->int_ql;
 
 	return 0;
 }
 
-static int hns3_get_coalesce(struct net_device *netdev,
-			     struct ethtool_coalesce *cmd)
-{
-	return hns3_get_coalesce_per_queue(netdev, 0, cmd);
-}
-
 static int hns3_check_gl_coalesce_para(struct net_device *netdev,
 				       struct ethtool_coalesce *cmd)
 {
@@ -1292,19 +1274,7 @@ static int hns3_check_coalesce_para(struct net_device *netdev,
 		return ret;
 	}
 
-	ret = hns3_check_ql_coalesce_param(netdev, cmd);
-	if (ret)
-		return ret;
-
-	if (cmd->use_adaptive_tx_coalesce == 1 ||
-	    cmd->use_adaptive_rx_coalesce == 1) {
-		netdev_info(netdev,
-			    "adaptive-tx=%u and adaptive-rx=%u, tx_usecs or rx_usecs will changed dynamically.\n",
-			    cmd->use_adaptive_tx_coalesce,
-			    cmd->use_adaptive_rx_coalesce);
-	}
-
-	return 0;
+	return hns3_check_ql_coalesce_param(netdev, cmd);
 }
 
 static void hns3_set_coalesce_per_queue(struct net_device *netdev,
@@ -1350,6 +1320,9 @@ static int hns3_set_coalesce(struct net_device *netdev,
 			     struct ethtool_coalesce *cmd)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hns3_enet_coalesce *tx_coal = &priv->tx_coal;
+	struct hns3_enet_coalesce *rx_coal = &priv->rx_coal;
 	u16 queue_num = h->kinfo.num_tqps;
 	int ret;
 	int i;
@@ -1364,6 +1337,15 @@ static int hns3_set_coalesce(struct net_device *netdev,
 	h->kinfo.int_rl_setting =
 		hns3_rl_round_down(cmd->rx_coalesce_usecs_high);
 
+	tx_coal->adapt_enable = cmd->use_adaptive_tx_coalesce;
+	rx_coal->adapt_enable = cmd->use_adaptive_rx_coalesce;
+
+	tx_coal->int_gl = cmd->tx_coalesce_usecs;
+	rx_coal->int_gl = cmd->rx_coalesce_usecs;
+
+	tx_coal->int_ql = cmd->tx_max_coalesced_frames;
+	rx_coal->int_ql = cmd->rx_max_coalesced_frames;
+
 	for (i = 0; i < queue_num; i++)
 		hns3_set_coalesce_per_queue(netdev, cmd, i);
 
-- 
cgit v1.2.3


From 9bb5a495424fd4bfa672eb1f31481248562fa156 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Tue, 18 May 2021 19:36:03 +0800
Subject: net: hns3: check the return of skb_checksum_help()

Currently skb_checksum_help()'s return is ignored, but it may
return error when it fails to allocate memory when linearizing.

So adds checking for the return of skb_checksum_help().

Fixes: 76ad4f0ee747("net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC")
Fixes: 3db084d28dc0("net: hns3: Fix for vxlan tx checksum bug")
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 6d6c0ac65bb4..026558f8e04b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -841,8 +841,6 @@ static bool hns3_tunnel_csum_bug(struct sk_buff *skb)
 	      l4.udp->dest == htons(4790))))
 		return false;
 
-	skb_checksum_help(skb);
-
 	return true;
 }
 
@@ -919,8 +917,7 @@ static int hns3_set_l2l3l4(struct sk_buff *skb, u8 ol4_proto,
 			/* the stack computes the IP header already,
 			 * driver calculate l4 checksum when not TSO.
 			 */
-			skb_checksum_help(skb);
-			return 0;
+			return skb_checksum_help(skb);
 		}
 
 		hns3_set_outer_l2l3l4(skb, ol4_proto, ol_type_vlan_len_msec);
@@ -965,7 +962,7 @@ static int hns3_set_l2l3l4(struct sk_buff *skb, u8 ol4_proto,
 		break;
 	case IPPROTO_UDP:
 		if (hns3_tunnel_csum_bug(skb))
-			break;
+			return skb_checksum_help(skb);
 
 		hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4CS_B, 1);
 		hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4T_S,
@@ -990,8 +987,7 @@ static int hns3_set_l2l3l4(struct sk_buff *skb, u8 ol4_proto,
 		/* the stack computes the IP header already,
 		 * driver calculate l4 checksum when not TSO.
 		 */
-		skb_checksum_help(skb);
-		return 0;
+		return skb_checksum_help(skb);
 	}
 
 	return 0;
-- 
cgit v1.2.3


From add0b32ef9146a8559a60aed54c37692a5f9d34f Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 30 Apr 2021 17:06:01 -0500
Subject: siginfo: Move si_trapno inside the union inside _si_fault

It turns out that linux uses si_trapno very sparingly, and as such it
can be considered extra information for a very narrow selection of
signals, rather than information that is present with every fault
reported in siginfo.

As such move si_trapno inside the union inside of _si_fault.  This
results in no change in placement, and makes it eaiser
to extend _si_fault in the future as this reduces the number of
special cases.  In particular with si_trapno included in the union it
is no longer a concern that the union must be pointer aligned on most
architectures because the union follows immediately after si_addr
which is a pointer.

This change results in a difference in siginfo field placement on
sparc and alpha for the fields si_addr_lsb, si_lower, si_upper,
si_pkey, and si_perf.  These architectures do not implement the
signals that would use si_addr_lsb, si_lower, si_upper, si_pkey, and
si_perf.  Further these architecture have not yet implemented the
userspace that would use si_perf.

The point of this change is in fact to correct these placement issues
before sparc or alpha grow userspace that cares.  This change was
discussed[1] and the agreement is that this change is currently safe.

[1]: https://lkml.kernel.org/r/CAK8P3a0+uKYwL1NhY6Hvtieghba2hKYGD6hcKx5n8=4Gtt+pHA@mail.gmail.com
Acked-by: Marco Elver <elver@google.com>
v1: https://lkml.kernel.org/r/m1tunns7yf.fsf_-_@fess.ebiederm.org
v2: https://lkml.kernel.org/r/20210505141101.11519-5-ebiederm@xmission.com
Link: https://lkml.kernel.org/r/20210517195748.8880-1-ebiederm@xmission.com
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 arch/x86/kernel/signal_compat.c    | 3 +++
 include/linux/compat.h             | 5 ++---
 include/uapi/asm-generic/siginfo.h | 7 ++-----
 kernel/signal.c                    | 1 +
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index 0e5d0a7e203b..a9fcabd8a5e5 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -127,6 +127,9 @@ static inline void signal_compat_build_tests(void)
 	BUILD_BUG_ON(offsetof(siginfo_t, si_addr) != 0x10);
 	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr) != 0x0C);
 
+	BUILD_BUG_ON(offsetof(siginfo_t, si_trapno) != 0x18);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_trapno) != 0x10);
+
 	BUILD_BUG_ON(offsetof(siginfo_t, si_addr_lsb) != 0x18);
 	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr_lsb) != 0x10);
 
diff --git a/include/linux/compat.h b/include/linux/compat.h
index f0d2dd35d408..6af7bef15e94 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -214,12 +214,11 @@ typedef struct compat_siginfo {
 		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGTRAP, SIGEMT */
 		struct {
 			compat_uptr_t _addr;	/* faulting insn/memory ref. */
-#ifdef __ARCH_SI_TRAPNO
-			int _trapno;	/* TRAP # which caused the signal */
-#endif
 #define __COMPAT_ADDR_BND_PKEY_PAD  (__alignof__(compat_uptr_t) < sizeof(short) ? \
 				     sizeof(short) : __alignof__(compat_uptr_t))
 			union {
+				/* used on alpha and sparc */
+				int _trapno;	/* TRAP # which caused the signal */
 				/*
 				 * used when si_code=BUS_MCEERR_AR or
 				 * used when si_code=BUS_MCEERR_AO
diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h
index 03d6f6d2c1fe..e663bf117b46 100644
--- a/include/uapi/asm-generic/siginfo.h
+++ b/include/uapi/asm-generic/siginfo.h
@@ -63,9 +63,6 @@ union __sifields {
 	/* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGTRAP, SIGEMT */
 	struct {
 		void __user *_addr; /* faulting insn/memory ref. */
-#ifdef __ARCH_SI_TRAPNO
-		int _trapno;	/* TRAP # which caused the signal */
-#endif
 #ifdef __ia64__
 		int _imm;		/* immediate value for "break" */
 		unsigned int _flags;	/* see ia64 si_flags */
@@ -75,6 +72,8 @@ union __sifields {
 #define __ADDR_BND_PKEY_PAD  (__alignof__(void *) < sizeof(short) ? \
 			      sizeof(short) : __alignof__(void *))
 		union {
+			/* used on alpha and sparc */
+			int _trapno;	/* TRAP # which caused the signal */
 			/*
 			 * used when si_code=BUS_MCEERR_AR or
 			 * used when si_code=BUS_MCEERR_AO
@@ -150,9 +149,7 @@ typedef struct siginfo {
 #define si_int		_sifields._rt._sigval.sival_int
 #define si_ptr		_sifields._rt._sigval.sival_ptr
 #define si_addr		_sifields._sigfault._addr
-#ifdef __ARCH_SI_TRAPNO
 #define si_trapno	_sifields._sigfault._trapno
-#endif
 #define si_addr_lsb	_sifields._sigfault._addr_lsb
 #define si_lower	_sifields._sigfault._addr_bnd._lower
 #define si_upper	_sifields._sigfault._addr_bnd._upper
diff --git a/kernel/signal.c b/kernel/signal.c
index c3017aa8024a..65888aec65a0 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -4607,6 +4607,7 @@ static inline void siginfo_buildtime_checks(void)
 
 	/* sigfault */
 	CHECK_OFFSET(si_addr);
+	CHECK_OFFSET(si_trapno);
 	CHECK_OFFSET(si_addr_lsb);
 	CHECK_OFFSET(si_lower);
 	CHECK_OFFSET(si_upper);
-- 
cgit v1.2.3


From 9abcabe3111811aeae0f3a14e159b14248631875 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 30 Apr 2021 17:29:36 -0500
Subject: signal: Implement SIL_FAULT_TRAPNO

Now that si_trapno is part of the union in _si_fault and available on
all architectures, add SIL_FAULT_TRAPNO and update siginfo_layout to
return SIL_FAULT_TRAPNO when the code assumes si_trapno is valid.

There is room for future changes to reduce when si_trapno is valid but
this is all that is needed to make si_trapno and the other members of
the the union in _sigfault mutually exclusive.

Update the code that uses siginfo_layout to deal with SIL_FAULT_TRAPNO
and have the same code ignore si_trapno in in all other cases.

v1: https://lkml.kernel.org/r/m1o8dvs7s7.fsf_-_@fess.ebiederm.org
v2: https://lkml.kernel.org/r/20210505141101.11519-6-ebiederm@xmission.com
Link: https://lkml.kernel.org/r/20210517195748.8880-2-ebiederm@xmission.com
Reviewed-by: Marco Elver <elver@google.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/signalfd.c          |  8 +++-----
 include/linux/signal.h |  1 +
 kernel/signal.c        | 34 ++++++++++++----------------------
 3 files changed, 16 insertions(+), 27 deletions(-)

diff --git a/fs/signalfd.c b/fs/signalfd.c
index 040a1142915f..e87e59581653 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -123,15 +123,13 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
 		 */
 	case SIL_FAULT:
 		new.ssi_addr = (long) kinfo->si_addr;
-#ifdef __ARCH_SI_TRAPNO
+		break;
+	case SIL_FAULT_TRAPNO:
+		new.ssi_addr = (long) kinfo->si_addr;
 		new.ssi_trapno = kinfo->si_trapno;
-#endif
 		break;
 	case SIL_FAULT_MCEERR:
 		new.ssi_addr = (long) kinfo->si_addr;
-#ifdef __ARCH_SI_TRAPNO
-		new.ssi_trapno = kinfo->si_trapno;
-#endif
 		new.ssi_addr_lsb = (short) kinfo->si_addr_lsb;
 		break;
 	case SIL_PERF_EVENT:
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 1e98548d7cf6..5160fd45e5ca 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -40,6 +40,7 @@ enum siginfo_layout {
 	SIL_TIMER,
 	SIL_POLL,
 	SIL_FAULT,
+	SIL_FAULT_TRAPNO,
 	SIL_FAULT_MCEERR,
 	SIL_FAULT_BNDERR,
 	SIL_FAULT_PKUERR,
diff --git a/kernel/signal.c b/kernel/signal.c
index 65888aec65a0..597594ee72de 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1194,6 +1194,7 @@ static inline bool has_si_pid_and_uid(struct kernel_siginfo *info)
 	case SIL_TIMER:
 	case SIL_POLL:
 	case SIL_FAULT:
+	case SIL_FAULT_TRAPNO:
 	case SIL_FAULT_MCEERR:
 	case SIL_FAULT_BNDERR:
 	case SIL_FAULT_PKUERR:
@@ -2527,6 +2528,7 @@ static void hide_si_addr_tag_bits(struct ksignal *ksig)
 {
 	switch (siginfo_layout(ksig->sig, ksig->info.si_code)) {
 	case SIL_FAULT:
+	case SIL_FAULT_TRAPNO:
 	case SIL_FAULT_MCEERR:
 	case SIL_FAULT_BNDERR:
 	case SIL_FAULT_PKUERR:
@@ -3214,6 +3216,10 @@ enum siginfo_layout siginfo_layout(unsigned sig, int si_code)
 #endif
 			else if ((sig == SIGTRAP) && (si_code == TRAP_PERF))
 				layout = SIL_PERF_EVENT;
+#ifdef __ARCH_SI_TRAPNO
+			else if (layout == SIL_FAULT)
+				layout = SIL_FAULT_TRAPNO;
+#endif
 		}
 		else if (si_code <= NSIGPOLL)
 			layout = SIL_POLL;
@@ -3317,30 +3323,22 @@ void copy_siginfo_to_external32(struct compat_siginfo *to,
 		break;
 	case SIL_FAULT:
 		to->si_addr = ptr_to_compat(from->si_addr);
-#ifdef __ARCH_SI_TRAPNO
+		break;
+	case SIL_FAULT_TRAPNO:
+		to->si_addr = ptr_to_compat(from->si_addr);
 		to->si_trapno = from->si_trapno;
-#endif
 		break;
 	case SIL_FAULT_MCEERR:
 		to->si_addr = ptr_to_compat(from->si_addr);
-#ifdef __ARCH_SI_TRAPNO
-		to->si_trapno = from->si_trapno;
-#endif
 		to->si_addr_lsb = from->si_addr_lsb;
 		break;
 	case SIL_FAULT_BNDERR:
 		to->si_addr = ptr_to_compat(from->si_addr);
-#ifdef __ARCH_SI_TRAPNO
-		to->si_trapno = from->si_trapno;
-#endif
 		to->si_lower = ptr_to_compat(from->si_lower);
 		to->si_upper = ptr_to_compat(from->si_upper);
 		break;
 	case SIL_FAULT_PKUERR:
 		to->si_addr = ptr_to_compat(from->si_addr);
-#ifdef __ARCH_SI_TRAPNO
-		to->si_trapno = from->si_trapno;
-#endif
 		to->si_pkey = from->si_pkey;
 		break;
 	case SIL_PERF_EVENT:
@@ -3401,30 +3399,22 @@ static int post_copy_siginfo_from_user32(kernel_siginfo_t *to,
 		break;
 	case SIL_FAULT:
 		to->si_addr = compat_ptr(from->si_addr);
-#ifdef __ARCH_SI_TRAPNO
+		break;
+	case SIL_FAULT_TRAPNO:
+		to->si_addr = compat_ptr(from->si_addr);
 		to->si_trapno = from->si_trapno;
-#endif
 		break;
 	case SIL_FAULT_MCEERR:
 		to->si_addr = compat_ptr(from->si_addr);
-#ifdef __ARCH_SI_TRAPNO
-		to->si_trapno = from->si_trapno;
-#endif
 		to->si_addr_lsb = from->si_addr_lsb;
 		break;
 	case SIL_FAULT_BNDERR:
 		to->si_addr = compat_ptr(from->si_addr);
-#ifdef __ARCH_SI_TRAPNO
-		to->si_trapno = from->si_trapno;
-#endif
 		to->si_lower = compat_ptr(from->si_lower);
 		to->si_upper = compat_ptr(from->si_upper);
 		break;
 	case SIL_FAULT_PKUERR:
 		to->si_addr = compat_ptr(from->si_addr);
-#ifdef __ARCH_SI_TRAPNO
-		to->si_trapno = from->si_trapno;
-#endif
 		to->si_pkey = from->si_pkey;
 		break;
 	case SIL_PERF_EVENT:
-- 
cgit v1.2.3


From af5eeab7e8e8c2f0fad10e4ab8cc8092012a2d5b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 2 May 2021 14:27:24 -0500
Subject: signal: Factor force_sig_perf out of perf_sigtrap

Separate filling in siginfo for TRAP_PERF from deciding that
siginal needs to be sent.

There are enough little details that need to be correct when
properly filling in siginfo_t that it is easy to make mistakes
if filling in the siginfo_t is in the same function with other
logic.  So factor out force_sig_perf to reduce the cognative
load of on reviewers, maintainers and implementors.

v1: https://lkml.kernel.org/r/m17dkjqqxz.fsf_-_@fess.ebiederm.org
v2: https://lkml.kernel.org/r/20210505141101.11519-10-ebiederm@xmission.com
Link: https://lkml.kernel.org/r/20210517195748.8880-3-ebiederm@xmission.com
Reviewed-by: Marco Elver <elver@google.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/sched/signal.h |  1 +
 kernel/events/core.c         | 11 ++---------
 kernel/signal.c              | 13 +++++++++++++
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 3f6a0fcaa10c..7f4278fa21fe 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -326,6 +326,7 @@ int send_sig_mceerr(int code, void __user *, short, struct task_struct *);
 
 int force_sig_bnderr(void __user *addr, void __user *lower, void __user *upper);
 int force_sig_pkuerr(void __user *addr, u32 pkey);
+int force_sig_perf(void __user *addr, u32 type, u64 sig_data);
 
 int force_sig_ptrace_errno_trap(int errno, void __user *addr);
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 928b166d888e..48ea8863183b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6394,8 +6394,6 @@ void perf_event_wakeup(struct perf_event *event)
 
 static void perf_sigtrap(struct perf_event *event)
 {
-	struct kernel_siginfo info;
-
 	/*
 	 * We'd expect this to only occur if the irq_work is delayed and either
 	 * ctx->task or current has changed in the meantime. This can be the
@@ -6410,13 +6408,8 @@ static void perf_sigtrap(struct perf_event *event)
 	if (current->flags & PF_EXITING)
 		return;
 
-	clear_siginfo(&info);
-	info.si_signo = SIGTRAP;
-	info.si_code = TRAP_PERF;
-	info.si_errno = event->attr.type;
-	info.si_perf = event->attr.sig_data;
-	info.si_addr = (void __user *)event->pending_addr;
-	force_sig_info(&info);
+	force_sig_perf((void __user *)event->pending_addr,
+		       event->attr.type, event->attr.sig_data);
 }
 
 static void perf_pending_event_disable(struct perf_event *event)
diff --git a/kernel/signal.c b/kernel/signal.c
index 597594ee72de..3a18d13c39b2 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1763,6 +1763,19 @@ int force_sig_pkuerr(void __user *addr, u32 pkey)
 }
 #endif
 
+int force_sig_perf(void __user *addr, u32 type, u64 sig_data)
+{
+	struct kernel_siginfo info;
+
+	clear_siginfo(&info);
+	info.si_signo = SIGTRAP;
+	info.si_errno = type;
+	info.si_code  = TRAP_PERF;
+	info.si_addr  = addr;
+	info.si_perf  = sig_data;
+	return force_sig_info(&info);
+}
+
 /* For the crazy architectures that include trap information in
  * the errno field, instead of an actual errno value.
  */
-- 
cgit v1.2.3


From 0683b53197b55343a166f1507086823030809a19 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 2 May 2021 17:28:31 -0500
Subject: signal: Deliver all of the siginfo perf data in _perf

Don't abuse si_errno and deliver all of the perf data in _perf member
of siginfo_t.

Note: The data field in the perf data structures in a u64 to allow a
pointer to be encoded without needed to implement a 32bit and 64bit
version of the same structure.  There already exists a 32bit and 64bit
versions siginfo_t, and the 32bit version can not include a 64bit
member as it only has 32bit alignment.  So unsigned long is used in
siginfo_t instead of a u64 as unsigned long can encode a pointer on
all architectures linux supports.

v1: https://lkml.kernel.org/r/m11rarqqx2.fsf_-_@fess.ebiederm.org
v2: https://lkml.kernel.org/r/20210503203814.25487-10-ebiederm@xmission.com
v3: https://lkml.kernel.org/r/20210505141101.11519-11-ebiederm@xmission.com
Link: https://lkml.kernel.org/r/20210517195748.8880-4-ebiederm@xmission.com
Reviewed-by: Marco Elver <elver@google.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 arch/m68k/kernel/signal.c                           |  3 ++-
 arch/x86/kernel/signal_compat.c                     |  6 ++++--
 fs/signalfd.c                                       |  3 ++-
 include/linux/compat.h                              |  5 ++++-
 include/uapi/asm-generic/siginfo.h                  |  8 ++++++--
 include/uapi/linux/perf_event.h                     |  2 +-
 include/uapi/linux/signalfd.h                       |  4 ++--
 kernel/signal.c                                     | 21 +++++++++++++--------
 .../testing/selftests/perf_events/sigtrap_threads.c | 14 +++++++-------
 9 files changed, 41 insertions(+), 25 deletions(-)

diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index a4b7ee1df211..8f215e79e70e 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -623,7 +623,8 @@ static inline void siginfo_build_tests(void)
 	BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x12);
 
 	/* _sigfault._perf */
-	BUILD_BUG_ON(offsetof(siginfo_t, si_perf) != 0x10);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_perf_data) != 0x10);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_perf_type) != 0x14);
 
 	/* _sigpoll */
 	BUILD_BUG_ON(offsetof(siginfo_t, si_band)   != 0x0c);
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index a9fcabd8a5e5..06743ec054d2 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -141,8 +141,10 @@ static inline void signal_compat_build_tests(void)
 	BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x20);
 	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pkey) != 0x14);
 
-	BUILD_BUG_ON(offsetof(siginfo_t, si_perf) != 0x18);
-	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf) != 0x10);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_perf_data) != 0x18);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_perf_type) != 0x20);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_data) != 0x10);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_type) != 0x14);
 
 	CHECK_CSI_OFFSET(_sigpoll);
 	CHECK_CSI_SIZE  (_sigpoll, 2*sizeof(int));
diff --git a/fs/signalfd.c b/fs/signalfd.c
index e87e59581653..373df2f12415 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -134,7 +134,8 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
 		break;
 	case SIL_PERF_EVENT:
 		new.ssi_addr = (long) kinfo->si_addr;
-		new.ssi_perf = kinfo->si_perf;
+		new.ssi_perf_type = kinfo->si_perf_type;
+		new.ssi_perf_data = kinfo->si_perf_data;
 		break;
 	case SIL_CHLD:
 		new.ssi_pid    = kinfo->si_pid;
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 6af7bef15e94..a27fffaae121 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -236,7 +236,10 @@ typedef struct compat_siginfo {
 					u32 _pkey;
 				} _addr_pkey;
 				/* used when si_code=TRAP_PERF */
-				compat_ulong_t _perf;
+				struct {
+					compat_ulong_t _data;
+					u32 _type;
+				} _perf;
 			};
 		} _sigfault;
 
diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h
index e663bf117b46..5a3c221f4c9d 100644
--- a/include/uapi/asm-generic/siginfo.h
+++ b/include/uapi/asm-generic/siginfo.h
@@ -91,7 +91,10 @@ union __sifields {
 				__u32 _pkey;
 			} _addr_pkey;
 			/* used when si_code=TRAP_PERF */
-			unsigned long _perf;
+			struct {
+				unsigned long _data;
+				__u32 _type;
+			} _perf;
 		};
 	} _sigfault;
 
@@ -154,7 +157,8 @@ typedef struct siginfo {
 #define si_lower	_sifields._sigfault._addr_bnd._lower
 #define si_upper	_sifields._sigfault._addr_bnd._upper
 #define si_pkey		_sifields._sigfault._addr_pkey._pkey
-#define si_perf		_sifields._sigfault._perf
+#define si_perf_data	_sifields._sigfault._perf._data
+#define si_perf_type	_sifields._sigfault._perf._type
 #define si_band		_sifields._sigpoll._band
 #define si_fd		_sifields._sigpoll._fd
 #define si_call_addr	_sifields._sigsys._call_addr
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index e54e639248c8..7b14753b3d38 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -464,7 +464,7 @@ struct perf_event_attr {
 
 	/*
 	 * User provided data if sigtrap=1, passed back to user via
-	 * siginfo_t::si_perf, e.g. to permit user to identify the event.
+	 * siginfo_t::si_perf_data, e.g. to permit user to identify the event.
 	 */
 	__u64	sig_data;
 };
diff --git a/include/uapi/linux/signalfd.h b/include/uapi/linux/signalfd.h
index 7e333042c7e3..e78dddf433fc 100644
--- a/include/uapi/linux/signalfd.h
+++ b/include/uapi/linux/signalfd.h
@@ -39,8 +39,8 @@ struct signalfd_siginfo {
 	__s32 ssi_syscall;
 	__u64 ssi_call_addr;
 	__u32 ssi_arch;
-	__u32 __pad3;
-	__u64 ssi_perf;
+	__u32 ssi_perf_type;
+	__u64 ssi_perf_data;
 
 	/*
 	 * Pad strcture to 128 bytes. Remember to update the
diff --git a/kernel/signal.c b/kernel/signal.c
index 3a18d13c39b2..dca53515ae3f 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1768,11 +1768,13 @@ int force_sig_perf(void __user *addr, u32 type, u64 sig_data)
 	struct kernel_siginfo info;
 
 	clear_siginfo(&info);
-	info.si_signo = SIGTRAP;
-	info.si_errno = type;
-	info.si_code  = TRAP_PERF;
-	info.si_addr  = addr;
-	info.si_perf  = sig_data;
+	info.si_signo     = SIGTRAP;
+	info.si_errno     = 0;
+	info.si_code      = TRAP_PERF;
+	info.si_addr      = addr;
+	info.si_perf_data = sig_data;
+	info.si_perf_type = type;
+
 	return force_sig_info(&info);
 }
 
@@ -3356,7 +3358,8 @@ void copy_siginfo_to_external32(struct compat_siginfo *to,
 		break;
 	case SIL_PERF_EVENT:
 		to->si_addr = ptr_to_compat(from->si_addr);
-		to->si_perf = from->si_perf;
+		to->si_perf_data = from->si_perf_data;
+		to->si_perf_type = from->si_perf_type;
 		break;
 	case SIL_CHLD:
 		to->si_pid = from->si_pid;
@@ -3432,7 +3435,8 @@ static int post_copy_siginfo_from_user32(kernel_siginfo_t *to,
 		break;
 	case SIL_PERF_EVENT:
 		to->si_addr = compat_ptr(from->si_addr);
-		to->si_perf = from->si_perf;
+		to->si_perf_data = from->si_perf_data;
+		to->si_perf_type = from->si_perf_type;
 		break;
 	case SIL_CHLD:
 		to->si_pid    = from->si_pid;
@@ -4615,7 +4619,8 @@ static inline void siginfo_buildtime_checks(void)
 	CHECK_OFFSET(si_lower);
 	CHECK_OFFSET(si_upper);
 	CHECK_OFFSET(si_pkey);
-	CHECK_OFFSET(si_perf);
+	CHECK_OFFSET(si_perf_data);
+	CHECK_OFFSET(si_perf_type);
 
 	/* sigpoll */
 	CHECK_OFFSET(si_band);
diff --git a/tools/testing/selftests/perf_events/sigtrap_threads.c b/tools/testing/selftests/perf_events/sigtrap_threads.c
index 78ddf5e11625..8e83cf91513a 100644
--- a/tools/testing/selftests/perf_events/sigtrap_threads.c
+++ b/tools/testing/selftests/perf_events/sigtrap_threads.c
@@ -43,7 +43,7 @@ static struct {
 	siginfo_t first_siginfo;	/* First observed siginfo_t. */
 } ctx;
 
-/* Unique value to check si_perf is correctly set from perf_event_attr::sig_data. */
+/* Unique value to check si_perf_data is correctly set from perf_event_attr::sig_data. */
 #define TEST_SIG_DATA(addr) (~(unsigned long)(addr))
 
 static struct perf_event_attr make_event_attr(bool enabled, volatile void *addr)
@@ -164,8 +164,8 @@ TEST_F(sigtrap_threads, enable_event)
 	EXPECT_EQ(ctx.signal_count, NUM_THREADS);
 	EXPECT_EQ(ctx.tids_want_signal, 0);
 	EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
-	EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT);
-	EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on));
+	EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT);
+	EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on));
 
 	/* Check enabled for parent. */
 	ctx.iterate_on = 0;
@@ -183,8 +183,8 @@ TEST_F(sigtrap_threads, modify_and_enable_event)
 	EXPECT_EQ(ctx.signal_count, NUM_THREADS);
 	EXPECT_EQ(ctx.tids_want_signal, 0);
 	EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
-	EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT);
-	EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on));
+	EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT);
+	EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on));
 
 	/* Check enabled for parent. */
 	ctx.iterate_on = 0;
@@ -203,8 +203,8 @@ TEST_F(sigtrap_threads, signal_stress)
 	EXPECT_EQ(ctx.signal_count, NUM_THREADS * ctx.iterate_on);
 	EXPECT_EQ(ctx.tids_want_signal, 0);
 	EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
-	EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT);
-	EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on));
+	EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT);
+	EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on));
 }
 
 TEST_HARNESS_MAIN
-- 
cgit v1.2.3


From 922e3013046b79b444c87eda5baf43afae1326a8 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 3 May 2021 12:52:43 -0500
Subject: signalfd: Remove SIL_PERF_EVENT fields from signalfd_siginfo

With the addition of ssi_perf_data and ssi_perf_type struct signalfd_siginfo
is dangerously close to running out of space.  All that remains is just
enough space for two additional 64bit fields.  A practice of adding all
possible siginfo_t fields into struct singalfd_siginfo can not be supported
as adding the missing fields ssi_lower, ssi_upper, and ssi_pkey would
require two 64bit fields and one 32bit fields.  In practice the fields
ssi_perf_data and ssi_perf_type can never be used by signalfd as the signal
that generates them always delivers them synchronously to the thread that
triggers them.

Therefore until someone actually needs the fields ssi_perf_data and
ssi_perf_type in signalfd_siginfo remove them.  This leaves a bit more room
for future expansion.

v1: https://lkml.kernel.org/r/20210503203814.25487-12-ebiederm@xmission.com
v2: https://lkml.kernel.org/r/20210505141101.11519-12-ebiederm@xmission.com
Link: https://lkml.kernel.org/r/20210517195748.8880-5-ebiederm@xmission.com
Reviewed-by: Marco Elver <elver@google.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/signalfd.c                 | 16 ++++++----------
 include/uapi/linux/signalfd.h |  4 +---
 2 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/fs/signalfd.c b/fs/signalfd.c
index 373df2f12415..167b5889db4b 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -114,12 +114,13 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
 		break;
 	case SIL_FAULT_BNDERR:
 	case SIL_FAULT_PKUERR:
+	case SIL_PERF_EVENT:
 		/*
-		 * Fall through to the SIL_FAULT case.  Both SIL_FAULT_BNDERR
-		 * and SIL_FAULT_PKUERR are only generated by faults that
-		 * deliver them synchronously to userspace.  In case someone
-		 * injects one of these signals and signalfd catches it treat
-		 * it as SIL_FAULT.
+		 * Fall through to the SIL_FAULT case.  SIL_FAULT_BNDERR,
+		 * SIL_FAULT_PKUERR, and SIL_PERF_EVENT are only
+		 * generated by faults that deliver them synchronously to
+		 * userspace.  In case someone injects one of these signals
+		 * and signalfd catches it treat it as SIL_FAULT.
 		 */
 	case SIL_FAULT:
 		new.ssi_addr = (long) kinfo->si_addr;
@@ -132,11 +133,6 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
 		new.ssi_addr = (long) kinfo->si_addr;
 		new.ssi_addr_lsb = (short) kinfo->si_addr_lsb;
 		break;
-	case SIL_PERF_EVENT:
-		new.ssi_addr = (long) kinfo->si_addr;
-		new.ssi_perf_type = kinfo->si_perf_type;
-		new.ssi_perf_data = kinfo->si_perf_data;
-		break;
 	case SIL_CHLD:
 		new.ssi_pid    = kinfo->si_pid;
 		new.ssi_uid    = kinfo->si_uid;
diff --git a/include/uapi/linux/signalfd.h b/include/uapi/linux/signalfd.h
index e78dddf433fc..83429a05b698 100644
--- a/include/uapi/linux/signalfd.h
+++ b/include/uapi/linux/signalfd.h
@@ -39,8 +39,6 @@ struct signalfd_siginfo {
 	__s32 ssi_syscall;
 	__u64 ssi_call_addr;
 	__u32 ssi_arch;
-	__u32 ssi_perf_type;
-	__u64 ssi_perf_data;
 
 	/*
 	 * Pad strcture to 128 bytes. Remember to update the
@@ -51,7 +49,7 @@ struct signalfd_siginfo {
 	 * comes out of a read(2) and we really don't want to have
 	 * a compat on read(2).
 	 */
-	__u8 __pad[16];
+	__u8 __pad[28];
 };
 
 
-- 
cgit v1.2.3


From 3410fbcd47dc6479af4309febf760ccaa5efb472 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@nvidia.com>
Date: Wed, 12 May 2021 13:52:27 +0300
Subject: {net, RDMA}/mlx5: Fix override of log_max_qp by other device

mlx5_core_dev holds pointer to static profile, hence when the
log_max_qp of the profile is override by some device, then it
effect all other mlx5 devices that share the same profile.
Fix it by having a profile instance for every mlx5 device.

Fixes: 883371c453b9 ("net/mlx5: Check FW limitations on log_max_qp before setting it")
Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/infiniband/hw/mlx5/mr.c                |  4 +--
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 11 +++----
 include/linux/mlx5/driver.h                    | 44 +++++++++++++-------------
 3 files changed, 29 insertions(+), 30 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 4388afeff251..9662cd39c7ff 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -743,10 +743,10 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 		ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) /
 			   MLX5_IB_UMR_OCTOWORD;
 		ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
-		if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
+		if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) &&
 		    !dev->is_rep && mlx5_core_is_pf(dev->mdev) &&
 		    mlx5_ib_can_load_pas_with_umr(dev, 0))
-			ent->limit = dev->mdev->profile->mr_cache[i].limit;
+			ent->limit = dev->mdev->profile.mr_cache[i].limit;
 		else
 			ent->limit = 0;
 		spin_lock_irq(&ent->lock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index c114365eb126..a1d67bd7fb43 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -503,7 +503,7 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx)
 
 static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
 {
-	struct mlx5_profile *prof = dev->profile;
+	struct mlx5_profile *prof = &dev->profile;
 	void *set_hca_cap;
 	int err;
 
@@ -524,11 +524,11 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
 		 to_fw_pkey_sz(dev, 128));
 
 	/* Check log_max_qp from HCA caps to set in current profile */
-	if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < profile[prof_sel].log_max_qp) {
+	if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) {
 		mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n",
-			       profile[prof_sel].log_max_qp,
+			       prof->log_max_qp,
 			       MLX5_CAP_GEN_MAX(dev, log_max_qp));
-		profile[prof_sel].log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp);
+		prof->log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp);
 	}
 	if (prof->mask & MLX5_PROF_MASK_QP_SIZE)
 		MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp,
@@ -1381,8 +1381,7 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
 	struct mlx5_priv *priv = &dev->priv;
 	int err;
 
-	dev->profile = &profile[profile_idx];
-
+	memcpy(&dev->profile, &profile[profile_idx], sizeof(dev->profile));
 	INIT_LIST_HEAD(&priv->ctx_list);
 	spin_lock_init(&priv->ctx_lock);
 	mutex_init(&dev->intf_state_mutex);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index f8e8d7e90616..020a8f7fdbdd 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -703,6 +703,27 @@ struct mlx5_hv_vhca;
 #define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) (MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity))
 #define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))
 
+enum {
+	MLX5_PROF_MASK_QP_SIZE		= (u64)1 << 0,
+	MLX5_PROF_MASK_MR_CACHE		= (u64)1 << 1,
+};
+
+enum {
+	MR_CACHE_LAST_STD_ENTRY = 20,
+	MLX5_IMR_MTT_CACHE_ENTRY,
+	MLX5_IMR_KSM_CACHE_ENTRY,
+	MAX_MR_CACHE_ENTRIES
+};
+
+struct mlx5_profile {
+	u64	mask;
+	u8	log_max_qp;
+	struct {
+		int	size;
+		int	limit;
+	} mr_cache[MAX_MR_CACHE_ENTRIES];
+};
+
 struct mlx5_core_dev {
 	struct device *device;
 	enum mlx5_coredev_type coredev_type;
@@ -731,7 +752,7 @@ struct mlx5_core_dev {
 	struct mutex		intf_state_mutex;
 	unsigned long		intf_state;
 	struct mlx5_priv	priv;
-	struct mlx5_profile	*profile;
+	struct mlx5_profile	profile;
 	u32			issi;
 	struct mlx5e_resources  mlx5e_res;
 	struct mlx5_dm          *dm;
@@ -1083,18 +1104,6 @@ static inline u8 mlx5_mkey_variant(u32 mkey)
 	return mkey & 0xff;
 }
 
-enum {
-	MLX5_PROF_MASK_QP_SIZE		= (u64)1 << 0,
-	MLX5_PROF_MASK_MR_CACHE		= (u64)1 << 1,
-};
-
-enum {
-	MR_CACHE_LAST_STD_ENTRY = 20,
-	MLX5_IMR_MTT_CACHE_ENTRY,
-	MLX5_IMR_KSM_CACHE_ENTRY,
-	MAX_MR_CACHE_ENTRIES
-};
-
 /* Async-atomic event notifier used by mlx5 core to forward FW
  * evetns recived from event queue to mlx5 consumers.
  * Optimise event queue dipatching.
@@ -1148,15 +1157,6 @@ int mlx5_rdma_rn_get_params(struct mlx5_core_dev *mdev,
 			    struct ib_device *device,
 			    struct rdma_netdev_alloc_params *params);
 
-struct mlx5_profile {
-	u64	mask;
-	u8	log_max_qp;
-	struct {
-		int	size;
-		int	limit;
-	} mr_cache[MAX_MR_CACHE_ENTRIES];
-};
-
 enum {
 	MLX5_PCI_DEV_IS_VF		= 1 << 0,
 };
-- 
cgit v1.2.3


From dca59f4a791960ec73fa15803faa0abe0f92ece2 Mon Sep 17 00:00:00 2001
From: Dima Chumak <dchumak@nvidia.com>
Date: Mon, 26 Apr 2021 15:16:26 +0300
Subject: net/mlx5e: Fix nullptr in add_vlan_push_action()

The result of dev_get_by_index_rcu() is not checked for NULL and then
gets dereferenced immediately.

Also, the RCU lock must be held by the caller of dev_get_by_index_rcu(),
which isn't satisfied by the call stack.

Fix by handling nullptr return value when iflink device is not found.
Add RCU locking around dev_get_by_index_rcu() to avoid possible adverse
effects while iterating over the net_device's hlist.

It is safe not to increment reference count of the net_device pointer in
case of a successful lookup, because it's already handled by VLAN code
during VLAN device registration (see register_vlan_dev and
netdev_upper_dev_link).

Fixes: 278748a95aa3 ("net/mlx5e: Offload TC e-switch rules with egress VLAN device")
Addresses-Coverity: ("Dereference null return value")
Signed-off-by: Dima Chumak <dchumak@nvidia.com>
Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 47a9c49b25fd..46945d04b5b8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -3526,8 +3526,12 @@ static int add_vlan_push_action(struct mlx5e_priv *priv,
 	if (err)
 		return err;
 
-	*out_dev = dev_get_by_index_rcu(dev_net(vlan_dev),
-					dev_get_iflink(vlan_dev));
+	rcu_read_lock();
+	*out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), dev_get_iflink(vlan_dev));
+	rcu_read_unlock();
+	if (!*out_dev)
+		return -ENODEV;
+
 	if (is_vlan_dev(*out_dev))
 		err = add_vlan_push_action(priv, attr, out_dev, action);
 
-- 
cgit v1.2.3


From 442b3d7b671bcb779ebdad46edd08051eb8b28d9 Mon Sep 17 00:00:00 2001
From: Jianbo Liu <jianbol@nvidia.com>
Date: Fri, 30 Apr 2021 06:58:29 +0000
Subject: net/mlx5: Set reformat action when needed for termination rules

For remote mirroring, after the tunnel packets are received, they are
decapsulated and sent to representor, then re-encapsulated and sent
out over another tunnel. So reformat action is set only when the
destination is required to do encapsulation.

Fixes: 249ccc3c95bd ("net/mlx5e: Add support for offloading traffic from uplink to uplink")
Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
Reviewed-by: Ariel Levkovich <lariel@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../mellanox/mlx5/core/eswitch_offloads_termtbl.c  | 31 +++++++---------------
 1 file changed, 10 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
index a81ece94f599..e3e7fdd396ad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
@@ -172,19 +172,6 @@ mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw,
 	}
 }
 
-static bool mlx5_eswitch_termtbl_is_encap_reformat(struct mlx5_pkt_reformat *rt)
-{
-	switch (rt->reformat_type) {
-	case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
-	case MLX5_REFORMAT_TYPE_L2_TO_NVGRE:
-	case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
-	case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
-		return true;
-	default:
-		return false;
-	}
-}
-
 static void
 mlx5_eswitch_termtbl_actions_move(struct mlx5_flow_act *src,
 				  struct mlx5_flow_act *dst)
@@ -202,14 +189,6 @@ mlx5_eswitch_termtbl_actions_move(struct mlx5_flow_act *src,
 			memset(&src->vlan[1], 0, sizeof(src->vlan[1]));
 		}
 	}
-
-	if (src->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT &&
-	    mlx5_eswitch_termtbl_is_encap_reformat(src->pkt_reformat)) {
-		src->action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
-		dst->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
-		dst->pkt_reformat = src->pkt_reformat;
-		src->pkt_reformat = NULL;
-	}
 }
 
 static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw,
@@ -279,6 +258,14 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw,
 		if (dest[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT)
 			continue;
 
+		if (attr->dests[num_vport_dests].flags & MLX5_ESW_DEST_ENCAP) {
+			term_tbl_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+			term_tbl_act.pkt_reformat = attr->dests[num_vport_dests].pkt_reformat;
+		} else {
+			term_tbl_act.action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+			term_tbl_act.pkt_reformat = NULL;
+		}
+
 		/* get the terminating table for the action list */
 		tt = mlx5_eswitch_termtbl_get_create(esw, &term_tbl_act,
 						     &dest[i], attr);
@@ -301,6 +288,8 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw,
 		goto revert_changes;
 
 	/* create the FTE */
+	flow_act->action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+	flow_act->pkt_reformat = NULL;
 	rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest);
 	if (IS_ERR(rule))
 		goto revert_changes;
-- 
cgit v1.2.3


From fca086617af864efd20289774901221b2df06b39 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Thu, 13 May 2021 15:00:53 +0300
Subject: net/mlx5: Fix err prints and return when creating termination table

Fix print to print correct error code and not using IS_ERR() which
will just result in always printing 1.
Also return real err instead of always -EOPNOTSUPP.

Fixes: 10caabdaad5a ("net/mlx5e: Use termination table for VLAN push actions")
Signed-off-by: Roi Dayan <roid@nvidia.com>
Reviewed-by: Maor Dickman <maord@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../mellanox/mlx5/core/eswitch_offloads_termtbl.c  | 23 +++++++++++-----------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
index e3e7fdd396ad..d61bee2d35fe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
@@ -65,7 +65,7 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev,
 {
 	struct mlx5_flow_table_attr ft_attr = {};
 	struct mlx5_flow_namespace *root_ns;
-	int err;
+	int err, err2;
 
 	root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
 	if (!root_ns) {
@@ -83,26 +83,26 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev,
 	ft_attr.autogroup.max_num_groups = 1;
 	tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
 	if (IS_ERR(tt->termtbl)) {
-		esw_warn(dev, "Failed to create termination table (error %d)\n",
-			 IS_ERR(tt->termtbl));
-		return -EOPNOTSUPP;
+		err = PTR_ERR(tt->termtbl);
+		esw_warn(dev, "Failed to create termination table, err %pe\n", tt->termtbl);
+		return err;
 	}
 
 	tt->rule = mlx5_add_flow_rules(tt->termtbl, NULL, flow_act,
 				       &tt->dest, 1);
 	if (IS_ERR(tt->rule)) {
-		esw_warn(dev, "Failed to create termination table rule (error %d)\n",
-			 IS_ERR(tt->rule));
+		err = PTR_ERR(tt->rule);
+		esw_warn(dev, "Failed to create termination table rule, err %pe\n", tt->rule);
 		goto add_flow_err;
 	}
 	return 0;
 
 add_flow_err:
-	err = mlx5_destroy_flow_table(tt->termtbl);
-	if (err)
-		esw_warn(dev, "Failed to destroy termination table\n");
+	err2 = mlx5_destroy_flow_table(tt->termtbl);
+	if (err2)
+		esw_warn(dev, "Failed to destroy termination table, err %d\n", err2);
 
-	return -EOPNOTSUPP;
+	return err;
 }
 
 static struct mlx5_termtbl_handle *
@@ -270,8 +270,7 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw,
 		tt = mlx5_eswitch_termtbl_get_create(esw, &term_tbl_act,
 						     &dest[i], attr);
 		if (IS_ERR(tt)) {
-			esw_warn(esw->dev, "Failed to get termination table (error %d)\n",
-				 IS_ERR(tt));
+			esw_warn(esw->dev, "Failed to get termination table, err %pe\n", tt);
 			goto revert_changes;
 		}
 		attr->dests[num_vport_dests].termtbl = tt;
-- 
cgit v1.2.3


From 82041634d96e87b41c600a673f10150d9f21f742 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Fri, 7 May 2021 10:08:47 +0300
Subject: net/mlx5: SF, Fix show state inactive when its inactivated

When a SF is inactivated and when it is in a TEARDOWN_REQUEST
state, driver still returns its state as active. This is incorrect.
Fix it by treating TEARDOWN_REQEUST as inactive state. When a SF
is still attached to the driver, on user request to reactivate EINVAL
error is returned. Inform user about it with better code EBUSY and
informative error message.

Fixes: 6a3273217469 ("net/mlx5: SF, Port function state change support")
Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Vu Pham <vuhuong@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
index a8e73c9ed1ea..1be048769309 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
@@ -136,10 +136,10 @@ static enum devlink_port_fn_state mlx5_sf_to_devlink_state(u8 hw_state)
 	switch (hw_state) {
 	case MLX5_VHCA_STATE_ACTIVE:
 	case MLX5_VHCA_STATE_IN_USE:
-	case MLX5_VHCA_STATE_TEARDOWN_REQUEST:
 		return DEVLINK_PORT_FN_STATE_ACTIVE;
 	case MLX5_VHCA_STATE_INVALID:
 	case MLX5_VHCA_STATE_ALLOCATED:
+	case MLX5_VHCA_STATE_TEARDOWN_REQUEST:
 	default:
 		return DEVLINK_PORT_FN_STATE_INACTIVE;
 	}
@@ -192,14 +192,17 @@ sf_err:
 	return err;
 }
 
-static int mlx5_sf_activate(struct mlx5_core_dev *dev, struct mlx5_sf *sf)
+static int mlx5_sf_activate(struct mlx5_core_dev *dev, struct mlx5_sf *sf,
+			    struct netlink_ext_ack *extack)
 {
 	int err;
 
 	if (mlx5_sf_is_active(sf))
 		return 0;
-	if (sf->hw_state != MLX5_VHCA_STATE_ALLOCATED)
-		return -EINVAL;
+	if (sf->hw_state != MLX5_VHCA_STATE_ALLOCATED) {
+		NL_SET_ERR_MSG_MOD(extack, "SF is inactivated but it is still attached");
+		return -EBUSY;
+	}
 
 	err = mlx5_cmd_sf_enable_hca(dev, sf->hw_fn_id);
 	if (err)
@@ -226,7 +229,8 @@ static int mlx5_sf_deactivate(struct mlx5_core_dev *dev, struct mlx5_sf *sf)
 
 static int mlx5_sf_state_set(struct mlx5_core_dev *dev, struct mlx5_sf_table *table,
 			     struct mlx5_sf *sf,
-			     enum devlink_port_fn_state state)
+			     enum devlink_port_fn_state state,
+			     struct netlink_ext_ack *extack)
 {
 	int err = 0;
 
@@ -234,7 +238,7 @@ static int mlx5_sf_state_set(struct mlx5_core_dev *dev, struct mlx5_sf_table *ta
 	if (state == mlx5_sf_to_devlink_state(sf->hw_state))
 		goto out;
 	if (state == DEVLINK_PORT_FN_STATE_ACTIVE)
-		err = mlx5_sf_activate(dev, sf);
+		err = mlx5_sf_activate(dev, sf, extack);
 	else if (state == DEVLINK_PORT_FN_STATE_INACTIVE)
 		err = mlx5_sf_deactivate(dev, sf);
 	else
@@ -265,7 +269,7 @@ int mlx5_devlink_sf_port_fn_state_set(struct devlink *devlink, struct devlink_po
 		goto out;
 	}
 
-	err = mlx5_sf_state_set(dev, table, sf, state);
+	err = mlx5_sf_state_set(dev, table, sf, state, extack);
 out:
 	mlx5_sf_table_put(table);
 	return err;
-- 
cgit v1.2.3


From fe7738eb3ca3631a75844e790f6cb576c0fe7b00 Mon Sep 17 00:00:00 2001
From: Dima Chumak <dchumak@nvidia.com>
Date: Mon, 26 Apr 2021 15:16:26 +0300
Subject: net/mlx5e: Fix nullptr in mlx5e_tc_add_fdb_flow()

The result of __dev_get_by_index() is not checked for NULL, which then
passed to mlx5e_attach_encap() and gets dereferenced.

Also, in case of a successful lookup, the net_device reference count is
not incremented, which may result in net_device pointer becoming invalid
at any time during mlx5e_attach_encap() execution.

Fix by using dev_get_by_index(), which does proper reference counting on
the net_device pointer. Also, handle nullptr return value when mirred
device is not found.

It's safe to call dev_put() on the mirred net_device pointer, right
after mlx5e_attach_encap() call, because it's not being saved/copied
down the call chain.

Fixes: 3c37745ec614 ("net/mlx5e: Properly deal with encap flows add/del under neigh update")
Addresses-Coverity: ("Dereference null return value")
Signed-off-by: Dima Chumak <dchumak@nvidia.com>
Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 46945d04b5b8..882bafba43f2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1322,10 +1322,10 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 		      struct netlink_ext_ack *extack)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-	struct net_device *out_dev, *encap_dev = NULL;
 	struct mlx5e_tc_flow_parse_attr *parse_attr;
 	struct mlx5_flow_attr *attr = flow->attr;
 	bool vf_tun = false, encap_valid = true;
+	struct net_device *encap_dev = NULL;
 	struct mlx5_esw_flow_attr *esw_attr;
 	struct mlx5_fc *counter = NULL;
 	struct mlx5e_rep_priv *rpriv;
@@ -1371,16 +1371,22 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 	esw_attr = attr->esw_attr;
 
 	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
+		struct net_device *out_dev;
 		int mirred_ifindex;
 
 		if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
 			continue;
 
 		mirred_ifindex = parse_attr->mirred_ifindex[out_index];
-		out_dev = __dev_get_by_index(dev_net(priv->netdev),
-					     mirred_ifindex);
+		out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
+		if (!out_dev) {
+			NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
+			err = -ENODEV;
+			goto err_out;
+		}
 		err = mlx5e_attach_encap(priv, flow, out_dev, out_index,
 					 extack, &encap_dev, &encap_valid);
+		dev_put(out_dev);
 		if (err)
 			goto err_out;
 
-- 
cgit v1.2.3


From 83026d83186bc48bb41ee4872f339b83f31dfc55 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Mon, 3 May 2021 18:01:02 +0300
Subject: net/mlx5e: Fix null deref accessing lag dev

It could be the lag dev is null so stop processing the event.
In bond_enslave() the active/backup slave being set before setting the
upper dev so first event is without an upper dev.
After setting the upper dev with bond_master_upper_dev_link() there is
a second event and in that event we have an upper dev.

Fixes: 7e51891a237f ("net/mlx5e: Use netdev events to set/del egress acl forward-to-vport rule")
Signed-off-by: Roi Dayan <roid@nvidia.com>
Reviewed-by: Maor Dickman <maord@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
index 95f2b26a3ee3..9c076aa20306 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
@@ -223,6 +223,8 @@ static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *pt
 	rpriv = priv->ppriv;
 	fwd_vport_num = rpriv->rep->vport;
 	lag_dev = netdev_master_upper_dev_get(netdev);
+	if (!lag_dev)
+		return;
 
 	netdev_dbg(netdev, "lag_dev(%s)'s slave vport(%d) is txable(%d)\n",
 		   lag_dev->name, fwd_vport_num, net_lag_port_dev_txable(netdev));
-- 
cgit v1.2.3


From eb96cc15926f4ddde3a28c42feeffdf002451c24 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Sun, 2 May 2021 10:25:50 +0300
Subject: net/mlx5e: Make sure fib dev exists in fib event

For unreachable route entry the fib dev does not exists.

Fixes: 8914add2c9e5 ("net/mlx5e: Handle FIB events to update tunnel endpoint device")
Reported-by: Dennis Afanasev <dennis.afanasev@stateless.net>
Signed-off-by: Roi Dayan <roid@nvidia.com>
Reviewed-by: Maor Dickman <maord@nvidia.com>
Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
index 593503bc4d07..f1fb11680d20 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@ -1505,7 +1505,7 @@ mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
 
 	fen_info = container_of(info, struct fib_entry_notifier_info, info);
 	fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
-	if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
+	if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
 	    fen_info->dst_len != 32)
 		return NULL;
 
-- 
cgit v1.2.3


From 77ecd10d0a8aaa6e4871d8c63626e4c9fc5e47db Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@nvidia.com>
Date: Thu, 25 Feb 2021 11:20:00 -0800
Subject: net/mlx5e: reset XPS on error flow if netdev isn't registered yet

mlx5e_attach_netdev can be called prior to registering the netdevice:
Example stack:

ipoib_new_child_link ->
ipoib_intf_init->
rdma_init_netdev->
mlx5_rdma_setup_rn->

mlx5e_attach_netdev->
mlx5e_num_channels_changed ->
mlx5e_set_default_xps_cpumasks ->
netif_set_xps_queue ->
__netif_set_xps_queue -> kmalloc

If any later stage fails at any point after mlx5e_num_channels_changed()
returns, XPS allocated maps will never be freed as they
are only freed during netdev unregistration, which will never happen for
yet to be registered netdevs.

Fixes: 3909a12e7913 ("net/mlx5e: Fix configuration of XPS cpumasks and netdev queues in corner cases")
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index bca832cdc4cb..89937b055070 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -5229,6 +5229,11 @@ static void mlx5e_update_features(struct net_device *netdev)
 	rtnl_unlock();
 }
 
+static void mlx5e_reset_channels(struct net_device *netdev)
+{
+	netdev_reset_tc(netdev);
+}
+
 int mlx5e_attach_netdev(struct mlx5e_priv *priv)
 {
 	const bool take_rtnl = priv->netdev->reg_state == NETREG_REGISTERED;
@@ -5283,6 +5288,7 @@ err_cleanup_tx:
 	profile->cleanup_tx(priv);
 
 out:
+	mlx5e_reset_channels(priv->netdev);
 	set_bit(MLX5E_STATE_DESTROYING, &priv->state);
 	cancel_work_sync(&priv->update_stats_work);
 	return err;
@@ -5300,6 +5306,7 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv)
 
 	profile->cleanup_rx(priv);
 	profile->cleanup_tx(priv);
+	mlx5e_reset_channels(priv->netdev);
 	cancel_work_sync(&priv->update_stats_work);
 }
 
-- 
cgit v1.2.3


From 97817fcc684ed01497bd19d0cd4dea699665b9cf Mon Sep 17 00:00:00 2001
From: Dima Chumak <dchumak@nvidia.com>
Date: Tue, 13 Apr 2021 22:43:08 +0300
Subject: net/mlx5e: Fix multipath lag activation

When handling FIB_EVENT_ENTRY_REPLACE event for a new multipath route,
lag activation can be missed if a stale (struct lag_mp)->mfi pointer
exists, which was associated with an older multipath route that had been
removed.

Normally, when a route is removed, it triggers mlx5_lag_fib_event(),
which handles FIB_EVENT_ENTRY_DEL and clears mfi pointer. But, if
mlx5_lag_check_prereq() condition isn't met, for example when eswitch is
in legacy mode, the fib event is skipped and mfi pointer becomes stale.

Fix by resetting mfi pointer to NULL every time mlx5_lag_mp_init() is
called.

Fixes: 544fe7c2e654 ("net/mlx5e: Activate HW multipath and handle port affinity based on FIB events")
Signed-off-by: Dima Chumak <dchumak@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
index 2c41a6920264..fd6196b5e163 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
@@ -307,6 +307,11 @@ int mlx5_lag_mp_init(struct mlx5_lag *ldev)
 	struct lag_mp *mp = &ldev->lag_mp;
 	int err;
 
+	/* always clear mfi, as it might become stale when a route delete event
+	 * has been missed
+	 */
+	mp->mfi = NULL;
+
 	if (mp->fib_nb.notifier_call)
 		return 0;
 
@@ -335,4 +340,5 @@ void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
 	unregister_fib_notifier(&init_net, &mp->fib_nb);
 	destroy_workqueue(mp->wq);
 	mp->fib_nb.notifier_call = NULL;
+	mp->mfi = NULL;
 }
-- 
cgit v1.2.3


From 7d1a3d08c8a6398e7497a98cf3f7b73ea13d9939 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@nvidia.com>
Date: Tue, 20 Apr 2021 15:16:16 +0300
Subject: net/mlx5e: Reject mirroring on source port change encap rules

Rules with MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE dest flag are
translated to destination FT in eswitch. Currently it is not possible to
mirror such rules because firmware doesn't support mixing FT and Vport
destinations in single rule when one of them adds encapsulation. Since the
only use case for MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE destination is
support for tunnel endpoints on VF and trying to offload such rule with
mirror action causes either crash in fs_core or firmware error with
syndrome 0xff6a1d, reject all such rules in mlx5 TC layer.

Fixes: 10742efc20a4 ("net/mlx5e: VF tunnel TX traffic offloading")
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 882bafba43f2..bccdb43a880b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1399,6 +1399,12 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 		esw_attr->dests[out_index].mdev = out_priv->mdev;
 	}
 
+	if (vf_tun && esw_attr->out_count > 1) {
+		NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
+		err = -EOPNOTSUPP;
+		goto err_out;
+	}
+
 	err = mlx5_eswitch_add_vlan_action(esw, attr);
 	if (err)
 		goto err_out;
-- 
cgit v1.2.3


From 5e7923acbd86d0ff29269688d8a9c47ad091dd46 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Wed, 21 Apr 2021 14:26:31 +0300
Subject: net/mlx5e: Fix error path of updating netdev queues

Avoid division by zero in the error flow. In the driver TC number can be
either 1 or 8. When TC count is set to 1, driver zero netdev->num_tc.
Hence, need to convert it back from 0 to 1 in the error flow.

Fixes: fa3748775b92 ("net/mlx5e: Handle errors from netif_set_real_num_{tx,rx}_queues")
Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 89937b055070..d1b9a4040d60 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2697,7 +2697,7 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
 	int err;
 
 	old_num_txqs = netdev->real_num_tx_queues;
-	old_ntc = netdev->num_tc;
+	old_ntc = netdev->num_tc ? : 1;
 
 	nch = priv->channels.params.num_channels;
 	ntc = priv->channels.params.num_tc;
-- 
cgit v1.2.3


From 7c9f131f366ab414691907fa0407124ea2b2f3bc Mon Sep 17 00:00:00 2001
From: Eli Cohen <elic@nvidia.com>
Date: Thu, 22 Apr 2021 15:48:10 +0300
Subject: {net,vdpa}/mlx5: Configure interface MAC into mpfs L2 table

net/mlx5: Expose MPFS configuration API

MPFS is the multi physical function switch that bridges traffic between
the physical port and any physical functions associated with it. The
driver is required to add or remove MAC entries to properly forward
incoming traffic to the correct physical function.

We export the API to control MPFS so that other drivers, such as
mlx5_vdpa are able to add MAC addresses of their network interfaces.

The MAC address of the vdpa interface must be configured into the MPFS L2
address. Failing to do so could cause, in some NIC configurations, failure
to forward packets to the vdpa network device instance.

Fix this by adding calls to update the MPFS table.

CC: <mst@redhat.com>
CC: <jasowang@redhat.com>
CC: <virtualization@lists.linux-foundation.org>
Fixes: 1a86b377aa21 ("vdpa/mlx5: Add VDPA driver for supported mlx5 devices")
Signed-off-by: Eli Cohen <elic@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c    |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c |  3 +++
 drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h |  5 +----
 drivers/vdpa/mlx5/net/mlx5_vnet.c                  | 19 ++++++++++++++++++-
 include/linux/mlx5/mpfs.h                          | 18 ++++++++++++++++++
 6 files changed, 42 insertions(+), 5 deletions(-)
 create mode 100644 include/linux/mlx5/mpfs.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 0d571a0c76d9..0b75fab41ae8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -35,6 +35,7 @@
 #include <linux/ipv6.h>
 #include <linux/tcp.h>
 #include <linux/mlx5/fs.h>
+#include <linux/mlx5/mpfs.h>
 #include "en.h"
 #include "en_rep.h"
 #include "lib/mpfs.h"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 570f2280823c..b88705a3a1a8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -35,6 +35,7 @@
 #include <linux/mlx5/mlx5_ifc.h>
 #include <linux/mlx5/vport.h>
 #include <linux/mlx5/fs.h>
+#include <linux/mlx5/mpfs.h>
 #include "esw/acl/lgcy.h"
 #include "esw/legacy.h"
 #include "mlx5_core.h"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
index fd8449ff9e17..839a01da110f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
@@ -33,6 +33,7 @@
 #include <linux/etherdevice.h>
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/mpfs.h>
 #include <linux/mlx5/eswitch.h>
 #include "mlx5_core.h"
 #include "lib/mpfs.h"
@@ -175,6 +176,7 @@ out:
 	mutex_unlock(&mpfs->lock);
 	return err;
 }
+EXPORT_SYMBOL(mlx5_mpfs_add_mac);
 
 int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac)
 {
@@ -206,3 +208,4 @@ unlock:
 	mutex_unlock(&mpfs->lock);
 	return err;
 }
+EXPORT_SYMBOL(mlx5_mpfs_del_mac);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h
index 4a7b2c3203a7..4a293542a7aa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h
@@ -84,12 +84,9 @@ struct l2addr_node {
 #ifdef CONFIG_MLX5_MPFS
 int  mlx5_mpfs_init(struct mlx5_core_dev *dev);
 void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev);
-int  mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac);
-int  mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac);
 #else /* #ifndef CONFIG_MLX5_MPFS */
 static inline int  mlx5_mpfs_init(struct mlx5_core_dev *dev) { return 0; }
 static inline void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) {}
-static inline int  mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; }
-static inline int  mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; }
 #endif
+
 #endif
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 189e4385df40..dda5dc6f7737 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -15,6 +15,7 @@
 #include <linux/mlx5/vport.h>
 #include <linux/mlx5/fs.h>
 #include <linux/mlx5/mlx5_ifc_vdpa.h>
+#include <linux/mlx5/mpfs.h>
 #include "mlx5_vdpa.h"
 
 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
@@ -1859,11 +1860,16 @@ static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb
 static void mlx5_vdpa_free(struct vdpa_device *vdev)
 {
 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+	struct mlx5_core_dev *pfmdev;
 	struct mlx5_vdpa_net *ndev;
 
 	ndev = to_mlx5_vdpa_ndev(mvdev);
 
 	free_resources(ndev);
+	if (!is_zero_ether_addr(ndev->config.mac)) {
+		pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
+		mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
+	}
 	mlx5_vdpa_free_resources(&ndev->mvdev);
 	mutex_destroy(&ndev->reslock);
 }
@@ -1990,6 +1996,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
 {
 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
 	struct virtio_net_config *config;
+	struct mlx5_core_dev *pfmdev;
 	struct mlx5_vdpa_dev *mvdev;
 	struct mlx5_vdpa_net *ndev;
 	struct mlx5_core_dev *mdev;
@@ -2023,10 +2030,17 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
 	if (err)
 		goto err_mtu;
 
+	if (!is_zero_ether_addr(config->mac)) {
+		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
+		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
+		if (err)
+			goto err_mtu;
+	}
+
 	mvdev->vdev.dma_dev = mdev->device;
 	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
 	if (err)
-		goto err_mtu;
+		goto err_mpfs;
 
 	err = alloc_resources(ndev);
 	if (err)
@@ -2044,6 +2058,9 @@ err_reg:
 	free_resources(ndev);
 err_res:
 	mlx5_vdpa_free_resources(&ndev->mvdev);
+err_mpfs:
+	if (!is_zero_ether_addr(config->mac))
+		mlx5_mpfs_del_mac(pfmdev, config->mac);
 err_mtu:
 	mutex_destroy(&ndev->reslock);
 	put_device(&mvdev->vdev.dev);
diff --git a/include/linux/mlx5/mpfs.h b/include/linux/mlx5/mpfs.h
new file mode 100644
index 000000000000..bf700c8d5516
--- /dev/null
+++ b/include/linux/mlx5/mpfs.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+ * Copyright (c) 2021 Mellanox Technologies Ltd.
+ */
+
+#ifndef _MLX5_MPFS_
+#define _MLX5_MPFS_
+
+struct mlx5_core_dev;
+
+#ifdef CONFIG_MLX5_MPFS
+int  mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac);
+int  mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac);
+#else /* #ifndef CONFIG_MLX5_MPFS */
+static inline int  mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; }
+static inline int  mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; }
+#endif
+
+#endif
-- 
cgit v1.2.3


From 75e8564e919f369cafb3d2b8fd11ec5af7b37416 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Sun, 25 Apr 2021 13:28:10 +0300
Subject: net/mlx5: Don't overwrite HCA capabilities when setting MSI-X count

During driver probe of device that has dynamic MSI-X feature enabled,
the following error is printed in some FW flavour (not released yet).

 mlx5_core 0000:06:00.0: firmware version: 4.7.4387
 mlx5_core 0000:06:00.0: 126.016 Gb/s available PCIe bandwidth (8.0 GT/s PCIe x16 link)
 mlx5_core 0000:06:00.0: mlx5_cmd_check:777:(pid 70599): SET_HCA_CAP(0x109) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x0)
 mlx5_core 0000:06:00.0: set_hca_cap:622:(pid 70599): handle_hca_cap failed
 mlx5_core 0000:06:00.0: mlx5_function_setup:1045:(pid 70599): set_hca_cap failed
 mlx5_core 0000:06:00.0: probe_one:1465:(pid 70599): mlx5_init_one failed with error code -22
 mlx5_core: probe of 0000:06:00.0 failed with error -22

In order to make the setting capability of MSI-X future proof, let's
query the current capabilities first.

Fixes: 604774add516 ("net/mlx5: Dynamically assign MSI-X vectors count")
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 1f907df5b3a2..c3373fb1cd7f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -95,9 +95,10 @@ int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs)
 int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
 			    int msix_vec_count)
 {
-	int sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+	int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+	void *hca_cap = NULL, *query_cap = NULL, *cap;
 	int num_vf_msix, min_msix, max_msix;
-	void *hca_cap, *cap;
 	int ret;
 
 	num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
@@ -116,11 +117,20 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
 	if (msix_vec_count > max_msix)
 		return -EOVERFLOW;
 
-	hca_cap = kzalloc(sz, GFP_KERNEL);
-	if (!hca_cap)
-		return -ENOMEM;
+	query_cap = kzalloc(query_sz, GFP_KERNEL);
+	hca_cap = kzalloc(set_sz, GFP_KERNEL);
+	if (!hca_cap || !query_cap) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = mlx5_vport_get_other_func_cap(dev, function_id, query_cap);
+	if (ret)
+		goto out;
 
 	cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability);
+	memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability),
+	       MLX5_UN_SZ_BYTES(hca_cap_union));
 	MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count);
 
 	MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP);
@@ -130,7 +140,9 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
 	MLX5_SET(set_hca_cap_in, hca_cap, op_mod,
 		 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1);
 	ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap);
+out:
 	kfree(hca_cap);
+	kfree(query_cap);
 	return ret;
 }
 
-- 
cgit v1.2.3


From 6ff51ab8aa8fcbcddeeefce8ca705b575805d12b Mon Sep 17 00:00:00 2001
From: Ariel Levkovich <lariel@nvidia.com>
Date: Wed, 31 Mar 2021 10:09:02 +0300
Subject: net/mlx5: Set term table as an unmanaged flow table

Termination tables are restricted to have the default miss action and
cannot be set to forward to another table in case of a miss.
If the fs prio of the termination table is not the last one in the
list, fs_core will attempt to attach it to another table.

Set the unmanaged ft flag when creating the termination table ft
and select the tc offload prio for it to prevent fs_core from selecting
the forwarding to next ft miss action and use the default one.

In addition, set the flow that forwards to the termination table to
ignore ft level restrictions since the ft level is not set by fs_core
for unamanged fts.

Fixes: 249ccc3c95bd ("net/mlx5e: Add support for offloading traffic from uplink to uplink")
Signed-off-by: Ariel Levkovich <lariel@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
index d61bee2d35fe..b45954905845 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
@@ -76,10 +76,11 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev,
 	/* As this is the terminating action then the termination table is the
 	 * same prio as the slow path
 	 */
-	ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION |
+	ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION | MLX5_FLOW_TABLE_UNMANAGED |
 			MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
-	ft_attr.prio = FDB_SLOW_PATH;
+	ft_attr.prio = FDB_TC_OFFLOAD;
 	ft_attr.max_fte = 1;
+	ft_attr.level = 1;
 	ft_attr.autogroup.max_num_groups = 1;
 	tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
 	if (IS_ERR(tt->termtbl)) {
@@ -217,6 +218,7 @@ mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw,
 	int i;
 
 	if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table) ||
+	    !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level) ||
 	    attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH ||
 	    !mlx5_eswitch_offload_is_uplink_port(esw, spec))
 		return false;
@@ -289,6 +291,7 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw,
 	/* create the FTE */
 	flow_act->action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
 	flow_act->pkt_reformat = NULL;
+	flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
 	rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest);
 	if (IS_ERR(rule))
 		goto revert_changes;
-- 
cgit v1.2.3


From e63052a5dd3ce7979bff727a8f4bb6d6b3d1317b Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 5 May 2021 13:20:26 -0700
Subject: mlx5e: add add missing BH locking around napi_schdule()

It's not correct to call napi_schedule() in pure process
context. Because we use __raise_softirq_irqoff() we require
callers to be in a context which will eventually lead to
softirq handling (hardirq, bh disabled, etc.).

With code as is users will see:

 NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #08!!!

Fixes: a8dd7ac12fc3 ("net/mlx5e: Generalize RQ activation")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index d1b9a4040d60..ad0f69480b9c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -889,10 +889,13 @@ err_free_rq:
 void mlx5e_activate_rq(struct mlx5e_rq *rq)
 {
 	set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
-	if (rq->icosq)
+	if (rq->icosq) {
 		mlx5e_trigger_irq(rq->icosq);
-	else
+	} else {
+		local_bh_disable();
 		napi_schedule(rq->cq.napi);
+		local_bh_enable();
+	}
 }
 
 void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
-- 
cgit v1.2.3


From fec356a61aa3d3a66416b4321f1279e09e0f256f Mon Sep 17 00:00:00 2001
From: Wu Bo <wubo40@huawei.com>
Date: Wed, 19 May 2021 13:01:09 +0800
Subject: nvmet: fix memory leak in nvmet_alloc_ctrl()

When creating ctrl in nvmet_alloc_ctrl(), if the cntlid_min is larger
than cntlid_max of the subsystem, and jumps to the
"out_free_changed_ns_list" label, but the ctrl->sqs lack of be freed.
Fix this by jumping to the "out_free_sqs" label.

Fixes: 94a39d61f80f ("nvmet: make ctrl-id configurable")
Signed-off-by: Wu Bo <wubo40@huawei.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 25cc2ee8de3f..1853db38b682 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -1372,7 +1372,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
 		goto out_free_changed_ns_list;
 
 	if (subsys->cntlid_min > subsys->cntlid_max)
-		goto out_free_changed_ns_list;
+		goto out_free_sqs;
 
 	ret = ida_simple_get(&cntlid_ida,
 			     subsys->cntlid_min, subsys->cntlid_max,
-- 
cgit v1.2.3


From 03504e3b54cc8118cc26c064e60a0b00c2308708 Mon Sep 17 00:00:00 2001
From: Wu Bo <wubo40@huawei.com>
Date: Wed, 19 May 2021 13:01:10 +0800
Subject: nvme-loop: fix memory leak in nvme_loop_create_ctrl()

When creating loop ctrl in nvme_loop_create_ctrl(), if nvme_init_ctrl()
fails, the loop ctrl should be freed before jumping to the "out" label.

Fixes: 3a85a5de29ea ("nvme-loop: add a NVMe loopback host driver")
Signed-off-by: Wu Bo <wubo40@huawei.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/loop.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 74b3b150e1a5..cb30cb942e1d 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -590,8 +590,10 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
 
 	ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_loop_ctrl_ops,
 				0 /* no quirks, we're perfect! */);
-	if (ret)
+	if (ret) {
+		kfree(ctrl);
 		goto out;
+	}
 
 	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
 		WARN_ON_ONCE(1);
-- 
cgit v1.2.3


From 825619b09ad351894d2c6fb6705f5b3711d145c7 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 17 May 2021 14:07:45 -0700
Subject: nvme-tcp: fix possible use-after-completion

Commit db5ad6b7f8cd ("nvme-tcp: try to send request in queue_rq
context") added a second context that may perform a network send.
This means that now RX and TX are not serialized in nvme_tcp_io_work
and can run concurrently.

While there is correct mutual exclusion in the TX path (where
the send_mutex protect the queue socket send activity) RX activity,
and more specifically request completion may run concurrently.

This means we must guarantee that any mutation of the request state
related to its lifetime, bytes sent must not be accessed when a completion
may have possibly arrived back (and processed).

The race may trigger when a request completion arrives, processed
_and_ reused as a fresh new request, exactly in the (relatively short)
window between the last data payload sent and before the request iov_iter
is advanced.

Consider the following race:
1. 16K write request is queued
2. The nvme command and the data is sent to the controller (in-capsule
   or solicited by r2t)
3. After the last payload is sent but before the req.iter is advanced,
   the controller sends back a completion.
4. The completion is processed, the request is completed, and reused
   to transfer a new request (write or read)
5. The new request is queued, and the driver reset the request parameters
   (nvme_tcp_setup_cmd_pdu).
6. Now context in (2) resumes execution and advances the req.iter

==> use-after-completion as this is already a new request.

Fix this by making sure the request is not advanced after the last
data payload send, knowing that a completion may have arrived already.

An alternative solution would have been to delay the request completion
or state change waiting for reference counting on the TX path, but besides
adding atomic operations to the hot-path, it may present challenges in
multi-stage R2T scenarios where a r2t handler needs to be deferred to
an async execution.

Reported-by: Narayan Ayalasomayajula <narayan.ayalasomayajula@wdc.com>
Tested-by: Anil Mishra <anil.mishra@wdc.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Cc: stable@vger.kernel.org # v5.8+
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 0222e23f5936..b97d2732a80f 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -943,7 +943,6 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
 		if (ret <= 0)
 			return ret;
 
-		nvme_tcp_advance_req(req, ret);
 		if (queue->data_digest)
 			nvme_tcp_ddgst_update(queue->snd_hash, page,
 					offset, ret);
@@ -960,6 +959,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
 			}
 			return 1;
 		}
+		nvme_tcp_advance_req(req, ret);
 	}
 	return -EAGAIN;
 }
-- 
cgit v1.2.3


From a0fdd1418007f83565d3f2e04b47923ba93a9b8c Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Mon, 17 May 2021 15:36:43 -0700
Subject: nvme-tcp: rerun io_work if req_list is not empty

A possible race condition exists where the request to send data is
enqueued from nvme_tcp_handle_r2t()'s will not be observed by
nvme_tcp_send_all() if it happens to be running. The driver relies on
io_work to send the enqueued request when it is runs again, but the
concurrently running nvme_tcp_send_all() may not have released the
send_mutex at that time. If no future commands are enqueued to re-kick
the io_work, the request will timeout in the SEND_H2C state, resulting
in a timeout error like:

  nvme nvme0: queue 1: timeout request 0x3 type 6

Ensure the io_work continues to run as long as the req_list is not empty.

Fixes: db5ad6b7f8cdd ("nvme-tcp: try to send request in queue_rq context")
Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/tcp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index b97d2732a80f..34f4b3402f7c 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1140,7 +1140,8 @@ static void nvme_tcp_io_work(struct work_struct *w)
 				pending = true;
 			else if (unlikely(result < 0))
 				break;
-		}
+		} else
+			pending = !llist_empty(&queue->req_list);
 
 		result = nvme_tcp_try_recv(queue);
 		if (result > 0)
-- 
cgit v1.2.3


From a7d139145a6640172516b193abf6d2398620aa14 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 10 May 2021 21:56:35 -0700
Subject: nvme-fc: clear q_live at beginning of association teardown

The __nvmf_check_ready() routine used to bounce all filesystem io if the
controller state isn't LIVE.  However, a later patch changed the logic so
that it rejection ends up being based on the Q live check.  The FC
transport has a slightly different sequence from rdma and tcp for
shutting down queues/marking them non-live.  FC marks its queue non-live
after aborting all ios and waiting for their termination, leaving a
rather large window for filesystem io to continue to hit the transport.
Unfortunately this resulted in filesystem I/O or applications seeing I/O
errors.

Change the FC transport to mark the queues non-live at the first sign of
teardown for the association (when I/O is initially terminated).

Fixes: 73a5379937ec ("nvme-fabrics: allow to queue requests for live queues")
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index d9ab9e7871d0..256e87721a01 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2461,6 +2461,18 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
 static void
 __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
 {
+	int q;
+
+	/*
+	 * if aborting io, the queues are no longer good, mark them
+	 * all as not live.
+	 */
+	if (ctrl->ctrl.queue_count > 1) {
+		for (q = 1; q < ctrl->ctrl.queue_count; q++)
+			clear_bit(NVME_FC_Q_LIVE, &ctrl->queues[q].flags);
+	}
+	clear_bit(NVME_FC_Q_LIVE, &ctrl->queues[0].flags);
+
 	/*
 	 * If io queues are present, stop them and terminate all outstanding
 	 * ios on them. As FC allocates FC exchange for each io, the
-- 
cgit v1.2.3


From 036867e93ebf4d7e70eba6a8c72db74ee3760bc3 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 17 May 2021 09:46:40 +0100
Subject: drm/i915/gem: Pin the L-shape quirked object as unshrinkable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When instantiating a tiled object on an L-shaped memory machine, we mark
the object as unshrinkable to prevent the shrinker from trying to swap
out the pages. We have to do this as we do not know the swizzling on the
individual pages, and so the data will be scrambled across swap out/in.

Not only do we need to move the object off the shrinker list, we need to
mark the object with shrink_pin so that the counter is consistent across
calls to madvise.

v2: in the madvise ioctl we need to check if the object is currently
shrinkable/purgeable, not if the object type supports shrinking

Fixes: 0175969e489a ("drm/i915/gem: Use shrinkable status for unknown swizzle quirks")
References: https://gitlab.freedesktop.org/drm/intel/-/issues/3293
References: https://gitlab.freedesktop.org/drm/intel/-/issues/3450
Reported-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Tested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: <stable@vger.kernel.org> # v5.12+
Link: https://patchwork.freedesktop.org/patch/msgid/20210517084640.18862-1-matthew.auld@intel.com
(cherry picked from commit 8777d17b68dcfbfbd4d524f444adefae56f41225)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_pages.c |  2 ++
 drivers/gpu/drm/i915/i915_gem.c           | 11 +++++------
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index aed8a37ccdc9..7361971c177d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -63,6 +63,8 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 	    i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
 		GEM_BUG_ON(i915_gem_object_has_tiling_quirk(obj));
 		i915_gem_object_set_tiling_quirk(obj);
+		GEM_BUG_ON(!list_empty(&obj->mm.link));
+		atomic_inc(&obj->mm.shrink_pin);
 		shrinkable = false;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b23f58e94cfb..b3cedd20f365 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -999,12 +999,11 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 		obj->mm.madv = args->madv;
 
 	if (i915_gem_object_has_pages(obj)) {
-		struct list_head *list;
+		unsigned long flags;
 
-		if (i915_gem_object_is_shrinkable(obj)) {
-			unsigned long flags;
-
-			spin_lock_irqsave(&i915->mm.obj_lock, flags);
+		spin_lock_irqsave(&i915->mm.obj_lock, flags);
+		if (!list_empty(&obj->mm.link)) {
+			struct list_head *list;
 
 			if (obj->mm.madv != I915_MADV_WILLNEED)
 				list = &i915->mm.purge_list;
@@ -1012,8 +1011,8 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 				list = &i915->mm.shrink_list;
 			list_move_tail(&obj->mm.link, list);
 
-			spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 		}
+		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 	}
 
 	/* if the object is no longer attached, discard its backing storage */
-- 
cgit v1.2.3


From 023dfa9602f561952c0e19d74f66614a56d7e57a Mon Sep 17 00:00:00 2001
From: Simon Rettberg <simon.rettberg@rz.uni-freiburg.de>
Date: Mon, 26 Apr 2021 16:11:24 +0200
Subject: drm/i915/gt: Disable HiZ Raw Stall Optimization on broken gen7

When resetting CACHE_MODE registers, don't enable HiZ Raw Stall
Optimization on Ivybridge GT1 and Baytrail, as it causes severe glitches
when rendering any kind of 3D accelerated content.
This optimization is disabled on these platforms by default according to
official documentation from 01.org.

Fixes: ef99a60ffd9b ("drm/i915/gt: Clear CACHE_MODE prior to clearing residuals")
BugLink: https://gitlab.freedesktop.org/drm/intel/-/issues/3081
BugLink: https://gitlab.freedesktop.org/drm/intel/-/issues/3404
BugLink: https://gitlab.freedesktop.org/drm/intel/-/issues/3071
Reviewed-by: Manuel Bentele <development@manuel-bentele.de>
Signed-off-by: Simon Rettberg <simon.rettberg@rz.uni-freiburg.de>
Reviewed-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
[Rodrigo removed invalid Fixes line]
Link: https://patchwork.freedesktop.org/patch/msgid/20210426161124.2b7fd708@dellnichtsogutkiste
(cherry picked from commit 929b734ad34b717d6a1b8de97f53bb5616040147)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/gt/gen7_renderclear.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c
index de575fdb033f..21f08e53889c 100644
--- a/drivers/gpu/drm/i915/gt/gen7_renderclear.c
+++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c
@@ -397,7 +397,10 @@ static void emit_batch(struct i915_vma * const vma,
 	gen7_emit_pipeline_invalidate(&cmds);
 	batch_add(&cmds, MI_LOAD_REGISTER_IMM(2));
 	batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_0_GEN7));
-	batch_add(&cmds, 0xffff0000);
+	batch_add(&cmds, 0xffff0000 |
+			((IS_IVB_GT1(i915) || IS_VALLEYVIEW(i915)) ?
+			 HIZ_RAW_STALL_OPT_DISABLE :
+			 0));
 	batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_1));
 	batch_add(&cmds, 0xffff0000 | PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
 	gen7_emit_pipeline_invalidate(&cmds);
-- 
cgit v1.2.3


From 0024430e920f2900654ad83cd081cf52e02a3ef5 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Tue, 18 May 2021 12:01:06 -0700
Subject: x86/build: Fix location of '-plugin-opt=' flags

Commit b33fff07e3e3 ("x86, build: allow LTO to be selected") added a
couple of '-plugin-opt=' flags to KBUILD_LDFLAGS because the code model
and stack alignment are not stored in LLVM bitcode.

However, these flags were added to KBUILD_LDFLAGS prior to the
emulation flag assignment, which uses ':=', so they were overwritten
and never added to $(LD) invocations.

The absence of these flags caused misalignment issues in the
AMDGPU driver when compiling with CONFIG_LTO_CLANG, resulting in
general protection faults.

Shuffle the assignment below the initial one so that the flags are
properly passed along and all of the linker flags stay together.

At the same time, avoid any future issues with clobbering flags by
changing the emulation flag assignment to '+=' since KBUILD_LDFLAGS is
already defined with ':=' in the main Makefile before being exported for
modification here as a result of commit:

  ce99d0bf312d ("kbuild: clear LDFLAGS in the top Makefile")

Fixes: b33fff07e3e3 ("x86, build: allow LTO to be selected")
Reported-by: Anthony Ruhier <aruhier@mailbox.org>
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Tested-by: Anthony Ruhier <aruhier@mailbox.org>
Cc: stable@vger.kernel.org
Link: https://github.com/ClangBuiltLinux/linux/issues/1374
Link: https://lore.kernel.org/r/20210518190106.60935-1-nathan@kernel.org
---
 arch/x86/Makefile | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index c77c5d8a7b3e..307529417021 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -178,11 +178,6 @@ ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1)
 	KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args,)
 endif
 
-ifdef CONFIG_LTO_CLANG
-KBUILD_LDFLAGS	+= -plugin-opt=-code-model=kernel \
-		   -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8)
-endif
-
 # Workaround for a gcc prelease that unfortunately was shipped in a suse release
 KBUILD_CFLAGS += -Wno-sign-compare
 #
@@ -202,7 +197,12 @@ ifdef CONFIG_RETPOLINE
   endif
 endif
 
-KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE)
+KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE)
+
+ifdef CONFIG_LTO_CLANG
+KBUILD_LDFLAGS	+= -plugin-opt=-code-model=kernel \
+		   -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8)
+endif
 
 ifdef CONFIG_X86_NEED_RELOCS
 LDFLAGS_vmlinux := --emit-relocs --discard-none
-- 
cgit v1.2.3


From 647e6cc979b0675499347ddbac55c83876a20cf9 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Wed, 5 May 2021 15:36:35 +0200
Subject: platform/surface: aggregator: Do not mark interrupt as shared

Having both IRQF_NO_AUTOEN and IRQF_SHARED set causes
request_threaded_irq() to return with -EINVAL (see comment in flag
validation in that function). As the interrupt is currently not shared
between multiple devices, drop the IRQF_SHARED flag.

Fixes: 507cf5a2f1e2 ("platform/surface: aggregator: move to use request_irq by IRQF_NO_AUTOEN flag")
Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Link: https://lore.kernel.org/r/20210505133635.1499703-1-luzmaximilian@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/surface/aggregator/controller.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/platform/surface/aggregator/controller.c b/drivers/platform/surface/aggregator/controller.c
index 69e86cd599d3..8a70df60142c 100644
--- a/drivers/platform/surface/aggregator/controller.c
+++ b/drivers/platform/surface/aggregator/controller.c
@@ -2483,8 +2483,7 @@ int ssam_irq_setup(struct ssam_controller *ctrl)
 	 * interrupt, and let the SAM resume callback during the controller
 	 * resume process clear it.
 	 */
-	const int irqf = IRQF_SHARED | IRQF_ONESHOT |
-			 IRQF_TRIGGER_RISING | IRQF_NO_AUTOEN;
+	const int irqf = IRQF_ONESHOT | IRQF_TRIGGER_RISING | IRQF_NO_AUTOEN;
 
 	gpiod = gpiod_get(dev, "ssam_wakeup-int", GPIOD_ASIS);
 	if (IS_ERR(gpiod))
-- 
cgit v1.2.3


From ba6e1d8422bd476ad79da409639a773c02f0cbad Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 14 May 2021 22:04:36 +0200
Subject: platform/surface: aggregator: avoid clang -Wconstant-conversion
 warning

Clang complains about the assignment of SSAM_ANY_IID to
ssam_device_uid->instance:

drivers/platform/surface/surface_aggregator_registry.c:478:25: error: implicit conversion from 'int' to '__u8' (aka 'unsigned char') changes value from 65535 to 255 [-Werror,-Wconstant-conversion]
        { SSAM_VDEV(HUB, 0x02, SSAM_ANY_IID, 0x00) },
        ~                      ^~~~~~~~~~~~
include/linux/surface_aggregator/device.h:71:23: note: expanded from macro 'SSAM_ANY_IID'
 #define SSAM_ANY_IID            0xffff
                                ^~~~~~
include/linux/surface_aggregator/device.h:126:63: note: expanded from macro 'SSAM_VDEV'
        SSAM_DEVICE(SSAM_DOMAIN_VIRTUAL, SSAM_VIRTUAL_TC_##cat, tid, iid, fun)
                                                                     ^~~
include/linux/surface_aggregator/device.h:102:41: note: expanded from macro 'SSAM_DEVICE'
        .instance = ((iid) != SSAM_ANY_IID) ? (iid) : 0,                        \
                                               ^~~

The assignment doesn't actually happen, but clang checks the type limits
before checking whether this assignment is reached. Replace the ?:
operator with a __builtin_choose_expr() invocation that avoids the
warning for the untaken part.

Fixes: eb0e90a82098 ("platform/surface: aggregator: Add dedicated bus and device type")
Cc: platform-driver-x86@vger.kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Maximilian Luz <luzmaximilian@gmail.com>
Link: https://lore.kernel.org/r/20210514200453.1542978-1-arnd@kernel.org
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 include/linux/surface_aggregator/device.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/surface_aggregator/device.h b/include/linux/surface_aggregator/device.h
index 4441ad667c3f..6ff9c58b3e17 100644
--- a/include/linux/surface_aggregator/device.h
+++ b/include/linux/surface_aggregator/device.h
@@ -98,9 +98,9 @@ struct ssam_device_uid {
 		     | (((fun) != SSAM_ANY_FUN) ? SSAM_MATCH_FUNCTION : 0),	\
 	.domain   = d,								\
 	.category = cat,							\
-	.target   = ((tid) != SSAM_ANY_TID) ? (tid) : 0,			\
-	.instance = ((iid) != SSAM_ANY_IID) ? (iid) : 0,			\
-	.function = ((fun) != SSAM_ANY_FUN) ? (fun) : 0				\
+	.target   = __builtin_choose_expr((tid) != SSAM_ANY_TID, (tid), 0),	\
+	.instance = __builtin_choose_expr((iid) != SSAM_ANY_IID, (iid), 0),	\
+	.function = __builtin_choose_expr((fun) != SSAM_ANY_FUN, (fun), 0)
 
 /**
  * SSAM_VDEV() - Initialize a &struct ssam_device_id as virtual device with
-- 
cgit v1.2.3


From 773fe1d74404fcb6f0e7e69c3420cf04a6bb56b0 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Sat, 15 May 2021 00:19:54 +0200
Subject: platform/surface: aggregator: Add platform-drivers-x86 list to
 MAINTAINERS entry

The Surface System Aggregator Module driver entry is currently missing a
mailing list. Surface platform drivers are discussed on the
platform-driver-x86 list and all other Surface platform drivers have a
reference to that list in their entries. So let's add one here as well.

Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Link: https://lore.kernel.org/r/20210514221954.5976-1-luzmaximilian@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index bd7aff0c120f..57467b6046f1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12180,6 +12180,7 @@ F:	drivers/platform/surface/surfacepro3_button.c
 
 MICROSOFT SURFACE SYSTEM AGGREGATOR SUBSYSTEM
 M:	Maximilian Luz <luzmaximilian@gmail.com>
+L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
 W:	https://github.com/linux-surface/surface-aggregator-module
 C:	irc://chat.freenode.net/##linux-surface
-- 
cgit v1.2.3


From 9795d8232a24be9e1e1cc408a6bdc01c40e2cedc Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Thu, 13 May 2021 15:44:37 +0200
Subject: platform/surface: dtx: Fix poll function

The poll function should not return -ERESTARTSYS.

Furthermore, locking in this function is completely unnecessary. The
ddev->lock protects access to the main device and controller (ddev->dev
and ddev->ctrl), ensuring that both are and remain valid while being
accessed by clients. Both are, however, never accessed in the poll
function. The shutdown test (via atomic bit flags) be safely done
without locking, so drop locking here entirely.

Reported-by: kernel test robot <lkp@intel.com>
Fixes: 1d609992832e ("platform/surface: Add DTX driver)
Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Link: https://lore.kernel.org/r/20210513134437.2431022-1-luzmaximilian@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/surface/surface_dtx.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/platform/surface/surface_dtx.c b/drivers/platform/surface/surface_dtx.c
index 63ce587e79e3..5d9b758a99bb 100644
--- a/drivers/platform/surface/surface_dtx.c
+++ b/drivers/platform/surface/surface_dtx.c
@@ -527,20 +527,14 @@ static __poll_t surface_dtx_poll(struct file *file, struct poll_table_struct *pt
 	struct sdtx_client *client = file->private_data;
 	__poll_t events = 0;
 
-	if (down_read_killable(&client->ddev->lock))
-		return -ERESTARTSYS;
-
-	if (test_bit(SDTX_DEVICE_SHUTDOWN_BIT, &client->ddev->flags)) {
-		up_read(&client->ddev->lock);
+	if (test_bit(SDTX_DEVICE_SHUTDOWN_BIT, &client->ddev->flags))
 		return EPOLLHUP | EPOLLERR;
-	}
 
 	poll_wait(file, &client->ddev->waitq, pt);
 
 	if (!kfifo_is_empty(&client->buffer))
 		events |= EPOLLIN | EPOLLRDNORM;
 
-	up_read(&client->ddev->lock);
 	return events;
 }
 
-- 
cgit v1.2.3


From 1c0e5701c5e792c090aef0e5b9b8923c334d9324 Mon Sep 17 00:00:00 2001
From: Liming Sun <limings@nvidia.com>
Date: Fri, 7 May 2021 20:30:12 -0400
Subject: platform/mellanox: mlxbf-tmfifo: Fix a memory barrier issue

The virtio framework uses wmb() when updating avail->idx. It
guarantees the write order, but not necessarily loading order
for the code accessing the memory. This commit adds a load barrier
after reading the avail->idx to make sure all the data in the
descriptor is visible. It also adds a barrier when returning the
packet to virtio framework to make sure read/writes are visible to
the virtio code.

Fixes: 1357dfd7261f ("platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc")
Signed-off-by: Liming Sun <limings@nvidia.com>
Reviewed-by: Vadim Pasternak <vadimp@nvidia.com>
Link: https://lore.kernel.org/r/1620433812-17911-1-git-send-email-limings@nvidia.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/mellanox/mlxbf-tmfifo.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
index bbc4e71a16ff..38800e86ed8a 100644
--- a/drivers/platform/mellanox/mlxbf-tmfifo.c
+++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
@@ -294,6 +294,9 @@ mlxbf_tmfifo_get_next_desc(struct mlxbf_tmfifo_vring *vring)
 	if (vring->next_avail == virtio16_to_cpu(vdev, vr->avail->idx))
 		return NULL;
 
+	/* Make sure 'avail->idx' is visible already. */
+	virtio_rmb(false);
+
 	idx = vring->next_avail % vr->num;
 	head = virtio16_to_cpu(vdev, vr->avail->ring[idx]);
 	if (WARN_ON(head >= vr->num))
@@ -322,7 +325,7 @@ static void mlxbf_tmfifo_release_desc(struct mlxbf_tmfifo_vring *vring,
 	 * done or not. Add a memory barrier here to make sure the update above
 	 * completes before updating the idx.
 	 */
-	mb();
+	virtio_mb(false);
 	vr->used->idx = cpu_to_virtio16(vdev, vr_idx + 1);
 }
 
@@ -733,6 +736,12 @@ static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring,
 		desc = NULL;
 		fifo->vring[is_rx] = NULL;
 
+		/*
+		 * Make sure the load/store are in order before
+		 * returning back to virtio.
+		 */
+		virtio_mb(false);
+
 		/* Notify upper layer that packet is done. */
 		spin_lock_irqsave(&fifo->spin_lock[is_rx], flags);
 		vring_interrupt(0, vring->vq);
-- 
cgit v1.2.3


From 316a76a58c3f30735e5e416a6dc304d6bb86312d Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.ibm.com>
Date: Mon, 17 May 2021 16:09:31 +0200
Subject: perf test: Fix libpfm4 support (63) test error for nested event
 groups

Compiling perf with make LIBPFM4=1 includes libpfm support and
enables test case 63 'Test libpfm4 support'. This test reports an error
on all platforms for subtest 63.2 'test groups of --pfm-events'.
The reported error message is 'nested event groups not supported'

 # ./perf test -F 63
 63: Test libpfm4 support                                            :
 63.1: test of individual --pfm-events                               :
 Error:
 failed to parse event stereolab : event not found
 Error:
 failed to parse event stereolab,instructions : event not found
 Error:
 failed to parse event instructions,stereolab : event not found
  Ok
 63.2: test groups of --pfm-events                                   :
 Error:
 nested event groups not supported    <------ Error message here
 Error:
 failed to parse event {stereolab} : event not found
 Error:
 failed to parse event {instructions,cycles},{instructions,stereolab} :\
	 event not found
 Ok
 #

This patch addresses the error message 'nested event groups not supported'.
The root cause is function parse_libpfm_events_option() which parses the
event string '{},{instructions}' and can not handle a leading empty
group notation '{},...'.

The code detects the first (empty) group indicator '{' but does not
terminate group processing on the following group closing character '}'.
So when the second group indicator '{' is detected, the code assumes
a nested group and returns an error.

With the error message fixed, also change the expected event number to
one for the test case to succeed.

While at it also fix a memory leak. In good case the function does not
free the duplicated string given as first parameter.

Output after:
 # ./perf test -F 63
 63: Test libpfm4 support                                            :
 63.1: test of individual --pfm-events                               :
 Error:
 failed to parse event stereolab : event not found
 Error:
 failed to parse event stereolab,instructions : event not found
 Error:
 failed to parse event instructions,stereolab : event not found
  Ok
 63.2: test groups of --pfm-events                                   :
 Error:
 failed to parse event {stereolab} : event not found
 Error:
 failed to parse event {instructions,cycles},{instructions,stereolab} : \
	 event not found
  Ok
 #
Error message 'nested event groups not supported' is gone.

Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Acked-By: Ian Rogers <irogers@google.com>
Acked-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Link: http://lore.kernel.org/lkml/20210517140931.2559364-1-tmricht@linux.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/pfm.c |  4 ++--
 tools/perf/util/pfm.c  | 11 ++++++++++-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/tools/perf/tests/pfm.c b/tools/perf/tests/pfm.c
index 76a53126efdf..d4b0ef74defc 100644
--- a/tools/perf/tests/pfm.c
+++ b/tools/perf/tests/pfm.c
@@ -131,8 +131,8 @@ static int test__pfm_group(void)
 		},
 		{
 			.events = "{},{instructions}",
-			.nr_events = 0,
-			.nr_groups = 0,
+			.nr_events = 1,
+			.nr_groups = 1,
 		},
 		{
 			.events = "{instructions},{instructions}",
diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c
index d735acb6c29c..6eef6dfeaa57 100644
--- a/tools/perf/util/pfm.c
+++ b/tools/perf/util/pfm.c
@@ -62,8 +62,16 @@ int parse_libpfm_events_option(const struct option *opt, const char *str,
 		}
 
 		/* no event */
-		if (*q == '\0')
+		if (*q == '\0') {
+			if (*sep == '}') {
+				if (grp_evt < 0) {
+					ui__error("cannot close a non-existing event group\n");
+					goto error;
+				}
+				grp_evt--;
+			}
 			continue;
+		}
 
 		memset(&attr, 0, sizeof(attr));
 		event_attr_init(&attr);
@@ -107,6 +115,7 @@ int parse_libpfm_events_option(const struct option *opt, const char *str,
 			grp_evt = -1;
 		}
 	}
+	free(p_orig);
 	return 0;
 error:
 	free(p_orig);
-- 
cgit v1.2.3


From cb7987837c31b217b28089bbc78922d5c9187869 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 19 May 2021 10:45:13 +0300
Subject: perf intel-pt: Fix transaction abort handling

When adding support for power events, some handling of FUP packets was
unified. That resulted in breaking reporting of TSX aborts, by not
considering the associated TIP packet. Fix that.

Example:

A machine that supports TSX is required. It will have flag "rtm". Kernel
parameter tsx=on may be required.

 # for w in `cat /proc/cpuinfo | grep -m1 flags `;do echo $w | grep rtm ; done
 rtm

Test program:

 #include <stdio.h>
 #include <immintrin.h>

 int main()
 {
        int x = 0;

        if (_xbegin() == _XBEGIN_STARTED) {
                x = 1;
                _xabort(1);
        } else {
                printf("x = %d\n", x);
        }
        return 0;
 }

Compile with -mrtm i.e.

 gcc -Wall -Wextra -mrtm xabort.c -o xabort

Record:

 perf record -e intel_pt/cyc/u --filter 'filter main @ ./xabort' ./xabort

Before:

 # perf script --itrace=be -F+flags,+addr,-period,-event --ns
          xabort  1478 [007] 92161.431348552:   tr strt                             0 [unknown] ([unknown]) =>           400b6d main+0x0 (/root/xabort)
          xabort  1478 [007] 92161.431348624:   jmp                            400b96 main+0x29 (/root/xabort) =>           400bae main+0x41 (/root/xabort)
          xabort  1478 [007] 92161.431348624:   return                         400bb4 main+0x47 (/root/xabort) =>           400b87 main+0x1a (/root/xabort)
          xabort  1478 [007] 92161.431348637:   jcc                            400b8a main+0x1d (/root/xabort) =>           400b98 main+0x2b (/root/xabort)
          xabort  1478 [007] 92161.431348644:   tr end  call                   400ba9 main+0x3c (/root/xabort) =>           40f690 printf+0x0 (/root/xabort)
          xabort  1478 [007] 92161.431360859:   tr strt                             0 [unknown] ([unknown]) =>           400bae main+0x41 (/root/xabort)
          xabort  1478 [007] 92161.431360882:   tr end  return                 400bb4 main+0x47 (/root/xabort) =>           401139 __libc_start_main+0x309 (/root/xabort)

After:

 # perf script --itrace=be -F+flags,+addr,-period,-event --ns
          xabort  1478 [007] 92161.431348552:   tr strt                             0 [unknown] ([unknown]) =>           400b6d main+0x0 (/root/xabort)
          xabort  1478 [007] 92161.431348624:   tx abrt                        400b93 main+0x26 (/root/xabort) =>           400b87 main+0x1a (/root/xabort)
          xabort  1478 [007] 92161.431348637:   jcc                            400b8a main+0x1d (/root/xabort) =>           400b98 main+0x2b (/root/xabort)
          xabort  1478 [007] 92161.431348644:   tr end  call                   400ba9 main+0x3c (/root/xabort) =>           40f690 printf+0x0 (/root/xabort)
          xabort  1478 [007] 92161.431360859:   tr strt                             0 [unknown] ([unknown]) =>           400bae main+0x41 (/root/xabort)
          xabort  1478 [007] 92161.431360882:   tr end  return                 400bb4 main+0x47 (/root/xabort) =>           401139 __libc_start_main+0x309 (/root/xabort)

Fixes: a472e65fc490a ("perf intel-pt: Add decoder support for ptwrite and power event packets")
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: stable@vger.kernel.org
Link: http://lore.kernel.org/lkml/20210519074515.9262-2-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 8c59677bee13..20ad663978cc 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -1146,6 +1146,8 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
 		decoder->set_fup_tx_flags = false;
 		decoder->tx_flags = decoder->fup_tx_flags;
 		decoder->state.type = INTEL_PT_TRANSACTION;
+		if (decoder->fup_tx_flags & INTEL_PT_ABORT_TX)
+			decoder->state.type |= INTEL_PT_BRANCH;
 		decoder->state.from_ip = decoder->ip;
 		decoder->state.to_ip = 0;
 		decoder->state.flags = decoder->fup_tx_flags;
@@ -1220,8 +1222,10 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
 			return 0;
 		if (err == -EAGAIN ||
 		    intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) {
+			bool no_tip = decoder->pkt_state != INTEL_PT_STATE_FUP;
+
 			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
-			if (intel_pt_fup_event(decoder))
+			if (intel_pt_fup_event(decoder) && no_tip)
 				return 0;
 			return -EAGAIN;
 		}
-- 
cgit v1.2.3


From c954eb72b31a9dc56c99b450253ec5b121add320 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 19 May 2021 10:45:14 +0300
Subject: perf intel-pt: Fix sample instruction bytes

The decoder reports the current instruction if it was decoded. In some
cases the current instruction is not decoded, in which case the instruction
bytes length must be set to zero. Ensure that is always done.

Note perf script can anyway get the instruction bytes for any samples where
they are not present.

Also note, that there is a redundant "ptq->insn_len = 0" statement which is
not removed until a subsequent patch in order to make this patch apply
cleanly to stable branches.

Example:

A machne that supports TSX is required. It will have flag "rtm". Kernel
parameter tsx=on may be required.

 # for w in `cat /proc/cpuinfo | grep -m1 flags `;do echo $w | grep rtm ; done
 rtm

Test program:

 #include <stdio.h>
 #include <immintrin.h>

 int main()
 {
        int x = 0;

        if (_xbegin() == _XBEGIN_STARTED) {
                x = 1;
                _xabort(1);
        } else {
                printf("x = %d\n", x);
        }
        return 0;
 }

Compile with -mrtm i.e.

 gcc -Wall -Wextra -mrtm xabort.c -o xabort

Record:

 perf record -e intel_pt/cyc/u --filter 'filter main @ ./xabort' ./xabort

Before:

 # perf script --itrace=xe -F+flags,+insn,-period --xed --ns
          xabort  1478 [007] 92161.431348581:   transactions:   x                              400b81 main+0x14 (/root/xabort)          mov $0xffffffff, %eax
          xabort  1478 [007] 92161.431348624:   transactions:   tx abrt                        400b93 main+0x26 (/root/xabort)          mov $0xffffffff, %eax

After:

 # perf script --itrace=xe -F+flags,+insn,-period --xed --ns
          xabort  1478 [007] 92161.431348581:   transactions:   x                              400b81 main+0x14 (/root/xabort)          xbegin 0x6
          xabort  1478 [007] 92161.431348624:   transactions:   tx abrt                        400b93 main+0x26 (/root/xabort)          xabort $0x1

Fixes: faaa87680b25d ("perf intel-pt/bts: Report instruction bytes and length in sample")
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: stable@vger.kernel.org
Link: http://lore.kernel.org/lkml/20210519074515.9262-3-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/intel-pt.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 8658d42ce57a..beae5cbe9cc2 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -707,8 +707,10 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
 
 			*ip += intel_pt_insn->length;
 
-			if (to_ip && *ip == to_ip)
+			if (to_ip && *ip == to_ip) {
+				intel_pt_insn->length = 0;
 				goto out_no_cache;
+			}
 
 			if (*ip >= al.map->end)
 				break;
@@ -1198,6 +1200,7 @@ static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
 
 static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
 {
+	ptq->insn_len = 0;
 	if (ptq->state->flags & INTEL_PT_ABORT_TX) {
 		ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
 	} else if (ptq->state->flags & INTEL_PT_ASYNC) {
-- 
cgit v1.2.3


From 0a0c59724516fabf9705c0d9927fa12319908852 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 19 May 2021 10:45:15 +0300
Subject: perf intel-pt: Remove redundant setting of ptq->insn_len

Remove redundant "ptq->insn_len = 0" statement.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lore.kernel.org/lkml/20210519074515.9262-4-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/intel-pt.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index beae5cbe9cc2..0dfec8761b9a 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -1214,7 +1214,6 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
 			ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
 				     PERF_IP_FLAG_ASYNC |
 				     PERF_IP_FLAG_INTERRUPT;
-		ptq->insn_len = 0;
 	} else {
 		if (ptq->state->from_ip)
 			ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
-- 
cgit v1.2.3


From 86bf2b8ffec40eb4c278ce393e2b0bf48d335e59 Mon Sep 17 00:00:00 2001
From: Thomas Weißschuh <linux@weissschuh.net>
Date: Tue, 11 May 2021 00:15:43 +0200
Subject: platform/x86: gigabyte-wmi: streamline dmi matching
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Streamline dmi matching.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/r/20210510221545.412522-1-linux@weissschuh.net
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/gigabyte-wmi.c | 36 ++++++++++++------------------------
 1 file changed, 12 insertions(+), 24 deletions(-)

diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c
index 13d57434e60f..b95a94ed40b8 100644
--- a/drivers/platform/x86/gigabyte-wmi.c
+++ b/drivers/platform/x86/gigabyte-wmi.c
@@ -133,31 +133,19 @@ static u8 gigabyte_wmi_detect_sensor_usability(struct wmi_device *wdev)
 	return r;
 }
 
+#define DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME(name) \
+	{ .matches = { \
+		DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."), \
+		DMI_EXACT_MATCH(DMI_BOARD_NAME, name), \
+	}}
+
 static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = {
-	{ .matches = {
-		DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."),
-		DMI_EXACT_MATCH(DMI_BOARD_NAME, "B550 GAMING X V2"),
-	}},
-	{ .matches = {
-		DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."),
-		DMI_EXACT_MATCH(DMI_BOARD_NAME, "B550M AORUS PRO-P"),
-	}},
-	{ .matches = {
-		DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."),
-		DMI_EXACT_MATCH(DMI_BOARD_NAME, "B550M DS3H"),
-	}},
-	{ .matches = {
-		DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."),
-		DMI_EXACT_MATCH(DMI_BOARD_NAME, "Z390 I AORUS PRO WIFI-CF"),
-	}},
-	{ .matches = {
-		DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."),
-		DMI_EXACT_MATCH(DMI_BOARD_NAME, "X570 AORUS ELITE"),
-	}},
-	{ .matches = {
-		DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."),
-		DMI_EXACT_MATCH(DMI_BOARD_NAME, "X570 I AORUS PRO WIFI"),
-	}},
+	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 GAMING X V2"),
+	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M AORUS PRO-P"),
+	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"),
+	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z390 I AORUS PRO WIFI-CF"),
+	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 AORUS ELITE"),
+	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 I AORUS PRO WIFI"),
 	{ }
 };
 
-- 
cgit v1.2.3


From 8605d64f485fbdb71cb4d55a53085feb000e426e Mon Sep 17 00:00:00 2001
From: Thomas Weißschuh <linux@weissschuh.net>
Date: Tue, 11 May 2021 00:15:44 +0200
Subject: platform/x86: gigabyte-wmi: add support for X570 UD
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reported as working here:
https://github.com/t-8ch/linux-gigabyte-wmi-driver/issues/4

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/r/20210510221545.412522-2-linux@weissschuh.net
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/gigabyte-wmi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c
index b95a94ed40b8..7af6c24151e2 100644
--- a/drivers/platform/x86/gigabyte-wmi.c
+++ b/drivers/platform/x86/gigabyte-wmi.c
@@ -146,6 +146,7 @@ static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = {
 	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z390 I AORUS PRO WIFI-CF"),
 	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 AORUS ELITE"),
 	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 I AORUS PRO WIFI"),
+	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 UD"),
 	{ }
 };
 
-- 
cgit v1.2.3


From dac282def6f57d251234e7bbb87d21d7a57b26fe Mon Sep 17 00:00:00 2001
From: Thomas Weißschuh <linux@weissschuh.net>
Date: Tue, 11 May 2021 00:15:45 +0200
Subject: platform/x86: gigabyte-wmi: add support for B550 Aorus Elite
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reported as working here:
https://github.com/t-8ch/linux-gigabyte-wmi-driver/issues/1#issuecomment-837210304

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/r/20210510221545.412522-3-linux@weissschuh.net
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/gigabyte-wmi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c
index 7af6c24151e2..5529d7b0abea 100644
--- a/drivers/platform/x86/gigabyte-wmi.c
+++ b/drivers/platform/x86/gigabyte-wmi.c
@@ -140,6 +140,7 @@ static u8 gigabyte_wmi_detect_sensor_usability(struct wmi_device *wdev)
 	}}
 
 static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = {
+	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE"),
 	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 GAMING X V2"),
 	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M AORUS PRO-P"),
 	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"),
-- 
cgit v1.2.3


From b68e182a3062e326b891f47152a3a1b84abccf0f Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 12 May 2021 14:55:23 +0200
Subject: platform/x86: intel_int0002_vgpio: Only call enable_irq_wake() when
 using s2idle

Commit 871f1f2bcb01 ("platform/x86: intel_int0002_vgpio: Only implement
irq_set_wake on Bay Trail") stopped passing irq_set_wake requests on to
the parents IRQ because this was breaking suspend (causing immediate
wakeups) on an Asus E202SA.

This workaround for the Asus E202SA is causing wakeup by USB keyboard to
not work on other devices with Airmont CPU cores such as the Medion Akoya
E1239T. In hindsight the problem with the Asus E202SA has nothing to do
with Silvermont vs Airmont CPU cores, so the differentiation between the
2 types of CPU cores introduced by the previous fix is wrong.

The real issue at hand is s2idle vs S3 suspend where the suspend is
mostly handled by firmware. The parent IRQ for the INT0002 device is shared
with the ACPI SCI and the real problem is that the INT0002 code should not
be messing with the wakeup settings of that IRQ when suspend/resume is
being handled by the firmware.

Note that on systems which support both s2idle and S3 suspend, which
suspend method to use can be changed at runtime.

This patch fixes both the Asus E202SA spurious wakeups issue as well as
the wakeup by USB keyboard not working on the Medion Akoya E1239T issue.

These are both fixed by replacing the old workaround with delaying the
enable_irq_wake(parent_irq) call till system-suspend time and protecting
it with a !pm_suspend_via_firmware() check so that we still do not call
it on devices using firmware-based (S3) suspend such as the Asus E202SA.

Note rather then adding #ifdef CONFIG_PM_SLEEP, this commit simply adds
a "depends on PM_SLEEP" to the Kconfig since this drivers whole purpose
is to deal with wakeup events, so using it without CONFIG_PM_SLEEP makes
no sense.

Cc: Maxim Mikityanskiy <maxtram95@gmail.com>
Fixes: 871f1f2bcb01 ("platform/x86: intel_int0002_vgpio: Only implement irq_set_wake on Bay Trail")
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://lore.kernel.org/r/20210512125523.55215-2-hdegoede@redhat.com
---
 drivers/platform/x86/Kconfig               |  2 +-
 drivers/platform/x86/intel_int0002_vgpio.c | 80 +++++++++++++++++++++---------
 2 files changed, 57 insertions(+), 25 deletions(-)

diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 2714f7c3843e..60592fb88e7a 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -711,7 +711,7 @@ config INTEL_HID_EVENT
 
 config INTEL_INT0002_VGPIO
 	tristate "Intel ACPI INT0002 Virtual GPIO driver"
-	depends on GPIOLIB && ACPI
+	depends on GPIOLIB && ACPI && PM_SLEEP
 	select GPIOLIB_IRQCHIP
 	help
 	  Some peripherals on Bay Trail and Cherry Trail platforms signal a
diff --git a/drivers/platform/x86/intel_int0002_vgpio.c b/drivers/platform/x86/intel_int0002_vgpio.c
index 289c6655d425..569342aa8926 100644
--- a/drivers/platform/x86/intel_int0002_vgpio.c
+++ b/drivers/platform/x86/intel_int0002_vgpio.c
@@ -51,6 +51,12 @@
 #define GPE0A_STS_PORT			0x420
 #define GPE0A_EN_PORT			0x428
 
+struct int0002_data {
+	struct gpio_chip chip;
+	int parent_irq;
+	int wake_enable_count;
+};
+
 /*
  * As this is not a real GPIO at all, but just a hack to model an event in
  * ACPI the get / set functions are dummy functions.
@@ -98,14 +104,16 @@ static void int0002_irq_mask(struct irq_data *data)
 static int int0002_irq_set_wake(struct irq_data *data, unsigned int on)
 {
 	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
-	struct platform_device *pdev = to_platform_device(chip->parent);
-	int irq = platform_get_irq(pdev, 0);
+	struct int0002_data *int0002 = container_of(chip, struct int0002_data, chip);
 
-	/* Propagate to parent irq */
+	/*
+	 * Applying of the wakeup flag to our parent IRQ is delayed till system
+	 * suspend, because we only want to do this when using s2idle.
+	 */
 	if (on)
-		enable_irq_wake(irq);
+		int0002->wake_enable_count++;
 	else
-		disable_irq_wake(irq);
+		int0002->wake_enable_count--;
 
 	return 0;
 }
@@ -135,7 +143,7 @@ static bool int0002_check_wake(void *data)
 	return (gpe_sts_reg & GPE0A_PME_B0_STS_BIT);
 }
 
-static struct irq_chip int0002_byt_irqchip = {
+static struct irq_chip int0002_irqchip = {
 	.name			= DRV_NAME,
 	.irq_ack		= int0002_irq_ack,
 	.irq_mask		= int0002_irq_mask,
@@ -143,21 +151,9 @@ static struct irq_chip int0002_byt_irqchip = {
 	.irq_set_wake		= int0002_irq_set_wake,
 };
 
-static struct irq_chip int0002_cht_irqchip = {
-	.name			= DRV_NAME,
-	.irq_ack		= int0002_irq_ack,
-	.irq_mask		= int0002_irq_mask,
-	.irq_unmask		= int0002_irq_unmask,
-	/*
-	 * No set_wake, on CHT the IRQ is typically shared with the ACPI SCI
-	 * and we don't want to mess with the ACPI SCI irq settings.
-	 */
-	.flags			= IRQCHIP_SKIP_SET_WAKE,
-};
-
 static const struct x86_cpu_id int0002_cpu_ids[] = {
-	X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT,	&int0002_byt_irqchip),
-	X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT,	&int0002_cht_irqchip),
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, NULL),
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, NULL),
 	{}
 };
 
@@ -172,8 +168,9 @@ static int int0002_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	const struct x86_cpu_id *cpu_id;
-	struct gpio_chip *chip;
+	struct int0002_data *int0002;
 	struct gpio_irq_chip *girq;
+	struct gpio_chip *chip;
 	int irq, ret;
 
 	/* Menlow has a different INT0002 device? <sigh> */
@@ -185,10 +182,13 @@ static int int0002_probe(struct platform_device *pdev)
 	if (irq < 0)
 		return irq;
 
-	chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
-	if (!chip)
+	int0002 = devm_kzalloc(dev, sizeof(*int0002), GFP_KERNEL);
+	if (!int0002)
 		return -ENOMEM;
 
+	int0002->parent_irq = irq;
+
+	chip = &int0002->chip;
 	chip->label = DRV_NAME;
 	chip->parent = dev;
 	chip->owner = THIS_MODULE;
@@ -214,7 +214,7 @@ static int int0002_probe(struct platform_device *pdev)
 	}
 
 	girq = &chip->irq;
-	girq->chip = (struct irq_chip *)cpu_id->driver_data;
+	girq->chip = &int0002_irqchip;
 	/* This let us handle the parent IRQ in the driver */
 	girq->parent_handler = NULL;
 	girq->num_parents = 0;
@@ -230,6 +230,7 @@ static int int0002_probe(struct platform_device *pdev)
 
 	acpi_register_wakeup_handler(irq, int0002_check_wake, NULL);
 	device_init_wakeup(dev, true);
+	dev_set_drvdata(dev, int0002);
 	return 0;
 }
 
@@ -240,6 +241,36 @@ static int int0002_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static int int0002_suspend(struct device *dev)
+{
+	struct int0002_data *int0002 = dev_get_drvdata(dev);
+
+	/*
+	 * The INT0002 parent IRQ is often shared with the ACPI GPE IRQ, don't
+	 * muck with it when firmware based suspend is used, otherwise we may
+	 * cause spurious wakeups from firmware managed suspend.
+	 */
+	if (!pm_suspend_via_firmware() && int0002->wake_enable_count)
+		enable_irq_wake(int0002->parent_irq);
+
+	return 0;
+}
+
+static int int0002_resume(struct device *dev)
+{
+	struct int0002_data *int0002 = dev_get_drvdata(dev);
+
+	if (!pm_suspend_via_firmware() && int0002->wake_enable_count)
+		disable_irq_wake(int0002->parent_irq);
+
+	return 0;
+}
+
+static const struct dev_pm_ops int0002_pm_ops = {
+	.suspend = int0002_suspend,
+	.resume = int0002_resume,
+};
+
 static const struct acpi_device_id int0002_acpi_ids[] = {
 	{ "INT0002", 0 },
 	{ },
@@ -250,6 +281,7 @@ static struct platform_driver int0002_driver = {
 	.driver = {
 		.name			= DRV_NAME,
 		.acpi_match_table	= int0002_acpi_ids,
+		.pm			= &int0002_pm_ops,
 	},
 	.probe	= int0002_probe,
 	.remove	= int0002_remove,
-- 
cgit v1.2.3


From f048630bdd55eb5379ef35f971639fe52fabe499 Mon Sep 17 00:00:00 2001
From: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Date: Fri, 14 May 2021 23:30:47 +0530
Subject: platform/x86: hp-wireless: add AMD's hardware id to the supported
 list

Newer AMD based laptops uses AMDI0051 as the hardware id to support the
airplane mode button. Adding this to the supported list.

Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Link: https://lore.kernel.org/r/20210514180047.1697543-1-Shyam-sundar.S-k@amd.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/hp-wireless.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/platform/x86/hp-wireless.c b/drivers/platform/x86/hp-wireless.c
index 12c31fd5d5ae..0753ef18e721 100644
--- a/drivers/platform/x86/hp-wireless.c
+++ b/drivers/platform/x86/hp-wireless.c
@@ -17,12 +17,14 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Alex Hung");
 MODULE_ALIAS("acpi*:HPQ6001:*");
 MODULE_ALIAS("acpi*:WSTADEF:*");
+MODULE_ALIAS("acpi*:AMDI0051:*");
 
 static struct input_dev *hpwl_input_dev;
 
 static const struct acpi_device_id hpwl_ids[] = {
 	{"HPQ6001", 0},
 	{"WSTADEF", 0},
+	{"AMDI0051", 0},
 	{"", 0},
 };
 
-- 
cgit v1.2.3


From 3a53587423d25c87af4b4126a806a0575104b45e Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 18 May 2021 14:50:27 +0200
Subject: platform/x86: dell-smbios-wmi: Fix oops on rmmod dell_smbios

init_dell_smbios_wmi() only registers the dell_smbios_wmi_driver on systems
where the Dell WMI interface is supported. While exit_dell_smbios_wmi()
unregisters it unconditionally, this leads to the following oops:

[  175.722921] ------------[ cut here ]------------
[  175.722925] Unexpected driver unregister!
[  175.722939] WARNING: CPU: 1 PID: 3630 at drivers/base/driver.c:194 driver_unregister+0x38/0x40
...
[  175.723089] Call Trace:
[  175.723094]  cleanup_module+0x5/0xedd [dell_smbios]
...
[  175.723148] ---[ end trace 064c34e1ad49509d ]---

Make the unregister happen on the same condition the register happens
to fix this.

Cc: Mario Limonciello <mario.limonciello@outlook.com>
Fixes: 1a258e670434 ("platform/x86: dell-smbios-wmi: Add new WMI dispatcher driver")
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Mario Limonciello <mario.limonciello@outlook.com>
Reviewed-by: Mark Gross <mgross@linux.intel.com>
Link: https://lore.kernel.org/r/20210518125027.21824-1-hdegoede@redhat.com
---
 drivers/platform/x86/dell/dell-smbios-wmi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/platform/x86/dell/dell-smbios-wmi.c b/drivers/platform/x86/dell/dell-smbios-wmi.c
index a1753485159c..33f823772733 100644
--- a/drivers/platform/x86/dell/dell-smbios-wmi.c
+++ b/drivers/platform/x86/dell/dell-smbios-wmi.c
@@ -270,7 +270,8 @@ int init_dell_smbios_wmi(void)
 
 void exit_dell_smbios_wmi(void)
 {
-	wmi_driver_unregister(&dell_smbios_wmi_driver);
+	if (wmi_supported)
+		wmi_driver_unregister(&dell_smbios_wmi_driver);
 }
 
 MODULE_DEVICE_TABLE(wmi, dell_smbios_wmi_id_table);
-- 
cgit v1.2.3


From bc1eca606d8084465e6f89fd646cc71defbad490 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 19 May 2021 13:15:21 +0300
Subject: platform/x86: intel_punit_ipc: Append MODULE_DEVICE_TABLE for ACPI

The intel_punit_ipc driver might be compiled as a module.
When udev handles the event of the devices appearing
the intel_punit_ipc module is missing.

Append MODULE_DEVICE_TABLE for ACPI case to fix the loading issue.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210519101521.79338-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/intel_punit_ipc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/x86/intel_punit_ipc.c b/drivers/platform/x86/intel_punit_ipc.c
index 05cced59e251..f58b8543f6ac 100644
--- a/drivers/platform/x86/intel_punit_ipc.c
+++ b/drivers/platform/x86/intel_punit_ipc.c
@@ -312,6 +312,7 @@ static const struct acpi_device_id punit_ipc_acpi_ids[] = {
 	{ "INT34D4", 0 },
 	{ }
 };
+MODULE_DEVICE_TABLE(acpi, punit_ipc_acpi_ids);
 
 static struct platform_driver intel_punit_ipc_driver = {
 	.probe = intel_punit_ipc_probe,
-- 
cgit v1.2.3


From 39a6172ea88b3117353ae16cbb0a53cd80a9340a Mon Sep 17 00:00:00 2001
From: Teava Radu <rateava@gmail.com>
Date: Tue, 4 May 2021 20:57:46 +0200
Subject: platform/x86: touchscreen_dmi: Add info for the Mediacom Winpad 7.0
 W700 tablet

Add touchscreen info for the Mediacom Winpad 7.0 W700 tablet.
Tested on 5.11 hirsute.
Note: it's hw clone to Wintron surftab 7.

Signed-off-by: Teava Radu <rateava@gmail.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210504185746.175461-6-hdegoede@redhat.com
---
 drivers/platform/x86/touchscreen_dmi.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c
index 90fe4f8f3c2c..875519c6c206 100644
--- a/drivers/platform/x86/touchscreen_dmi.c
+++ b/drivers/platform/x86/touchscreen_dmi.c
@@ -1096,6 +1096,14 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
 			DMI_MATCH(DMI_BIOS_VERSION, "jumperx.T87.KFBNEEA"),
 		},
 	},
+	{
+		/* Mediacom WinPad 7.0 W700 (same hw as Wintron surftab 7") */
+		.driver_data = (void *)&trekstor_surftab_wintron70_data,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "MEDIACOM"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "WinPad 7 W10 - WPW700"),
+		},
+	},
 	{
 		/* Mediacom Flexbook Edge 11 (same hw as TS Primebook C11) */
 		.driver_data = (void *)&trekstor_primebook_c11_data,
-- 
cgit v1.2.3


From 05ca447630334c323c9e2b788b61133ab75d60d3 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 18 May 2021 10:39:39 +0200
Subject: ALSA: line6: Fix racy initialization of LINE6 MIDI

The initialization of MIDI devices that are found on some LINE6
drivers are currently done in a racy way; namely, the MIDI buffer
instance is allocated and initialized in each private_init callback
while the communication with the interface is already started via
line6_init_cap_control() call before that point.  This may lead to
Oops in line6_data_received() when a spurious event is received, as
reported by syzkaller.

This patch moves the MIDI initialization to line6_init_cap_control()
as well instead of the too-lately-called private_init for avoiding the
race.  Also this reduces slightly more lines, so it's a win-win
change.

Reported-by: syzbot+0d2b3feb0a2887862e06@syzkallerlkml..appspotmail.com
Link: https://lore.kernel.org/r/000000000000a4be9405c28520de@google.com
Link: https://lore.kernel.org/r/20210517132725.GA50495@hyeyoo
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210518083939.1927-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/line6/driver.c | 4 ++++
 sound/usb/line6/pod.c    | 5 -----
 sound/usb/line6/variax.c | 6 ------
 3 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c
index a030dd65eb28..9602929b7de9 100644
--- a/sound/usb/line6/driver.c
+++ b/sound/usb/line6/driver.c
@@ -699,6 +699,10 @@ static int line6_init_cap_control(struct usb_line6 *line6)
 		line6->buffer_message = kmalloc(LINE6_MIDI_MESSAGE_MAXLEN, GFP_KERNEL);
 		if (!line6->buffer_message)
 			return -ENOMEM;
+
+		ret = line6_init_midi(line6);
+		if (ret < 0)
+			return ret;
 	} else {
 		ret = line6_hwdep_init(line6);
 		if (ret < 0)
diff --git a/sound/usb/line6/pod.c b/sound/usb/line6/pod.c
index cd44cb5f1310..16e644330c4d 100644
--- a/sound/usb/line6/pod.c
+++ b/sound/usb/line6/pod.c
@@ -376,11 +376,6 @@ static int pod_init(struct usb_line6 *line6,
 	if (err < 0)
 		return err;
 
-	/* initialize MIDI subsystem: */
-	err = line6_init_midi(line6);
-	if (err < 0)
-		return err;
-
 	/* initialize PCM subsystem: */
 	err = line6_init_pcm(line6, &pod_pcm_properties);
 	if (err < 0)
diff --git a/sound/usb/line6/variax.c b/sound/usb/line6/variax.c
index ed158f04de80..c2245aa93b08 100644
--- a/sound/usb/line6/variax.c
+++ b/sound/usb/line6/variax.c
@@ -159,7 +159,6 @@ static int variax_init(struct usb_line6 *line6,
 		       const struct usb_device_id *id)
 {
 	struct usb_line6_variax *variax = line6_to_variax(line6);
-	int err;
 
 	line6->process_message = line6_variax_process_message;
 	line6->disconnect = line6_variax_disconnect;
@@ -172,11 +171,6 @@ static int variax_init(struct usb_line6 *line6,
 	if (variax->buffer_activate == NULL)
 		return -ENOMEM;
 
-	/* initialize MIDI subsystem: */
-	err = line6_init_midi(&variax->line6);
-	if (err < 0)
-		return err;
-
 	/* initiate startup procedure: */
 	schedule_delayed_work(&line6->startup_work,
 			      msecs_to_jiffies(VARIAX_STARTUP_DELAY1));
-- 
cgit v1.2.3


From b250f2f7792d15bcde98e0456781e2835556d5fa Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 19 May 2021 15:52:44 +0200
Subject: x86/sev-es: Don't return NULL from sev_es_get_ghcb()

sev_es_get_ghcb() is called from several places but only one of them
checks the return value. The reaction to returning NULL is always the
same: calling panic() and kill the machine.

Instead of adding checks to all call sites, move the panic() into the
function itself so that it will no longer return NULL.

Fixes: 0786138c78e7 ("x86/sev-es: Add a Runtime #VC Exception Handler")
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: stable@vger.kernel.org # v5.10+
Link: https://lkml.kernel.org/r/20210519135251.30093-2-joro@8bytes.org
---
 arch/x86/kernel/sev.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 4fa111becc93..82bced88153b 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -203,8 +203,18 @@ static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state)
 	if (unlikely(data->ghcb_active)) {
 		/* GHCB is already in use - save its contents */
 
-		if (unlikely(data->backup_ghcb_active))
-			return NULL;
+		if (unlikely(data->backup_ghcb_active)) {
+			/*
+			 * Backup-GHCB is also already in use. There is no way
+			 * to continue here so just kill the machine. To make
+			 * panic() work, mark GHCBs inactive so that messages
+			 * can be printed out.
+			 */
+			data->ghcb_active        = false;
+			data->backup_ghcb_active = false;
+
+			panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
+		}
 
 		/* Mark backup_ghcb active before writing to it */
 		data->backup_ghcb_active = true;
@@ -1289,7 +1299,6 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs)
  */
 DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
 {
-	struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
 	irqentry_state_t irq_state;
 	struct ghcb_state state;
 	struct es_em_ctxt ctxt;
@@ -1315,16 +1324,6 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
 	 */
 
 	ghcb = sev_es_get_ghcb(&state);
-	if (!ghcb) {
-		/*
-		 * Mark GHCBs inactive so that panic() is able to print the
-		 * message.
-		 */
-		data->ghcb_active        = false;
-		data->backup_ghcb_active = false;
-
-		panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
-	}
 
 	vc_ghcb_invalidate(ghcb);
 	result = vc_init_em_ctxt(&ctxt, regs, error_code);
-- 
cgit v1.2.3


From c0d46717b95735b0eacfddbcca9df37a49de9c7a Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Sat, 15 May 2021 09:52:22 -0500
Subject: SMB3: incorrect file id in requests compounded with open

See MS-SMB2 3.2.4.1.4, file ids in compounded requests should be set to
0xFFFFFFFFFFFFFFFF (we were treating it as u32 not u64 and setting
it incorrectly).

Signed-off-by: Steve French <stfrench@microsoft.com>
Reported-by: Stefan Metzmacher <metze@samba.org>
Reviewed-by: Shyam Prasad N <sprasad@microsoft.com>
---
 fs/cifs/smb2pdu.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index a8bf43184773..9f24eb88297a 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -3900,10 +3900,10 @@ smb2_new_read_req(void **buf, unsigned int *total_len,
 			 * Related requests use info from previous read request
 			 * in chain.
 			 */
-			shdr->SessionId = 0xFFFFFFFF;
+			shdr->SessionId = 0xFFFFFFFFFFFFFFFF;
 			shdr->TreeId = 0xFFFFFFFF;
-			req->PersistentFileId = 0xFFFFFFFF;
-			req->VolatileFileId = 0xFFFFFFFF;
+			req->PersistentFileId = 0xFFFFFFFFFFFFFFFF;
+			req->VolatileFileId = 0xFFFFFFFFFFFFFFFF;
 		}
 	}
 	if (remaining_bytes > io_parms->length)
-- 
cgit v1.2.3


From c25bbdb564060adaad5c3a8a10765c13487ba6a3 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 19 May 2021 15:52:45 +0200
Subject: x86/sev-es: Forward page-faults which happen during emulation

When emulating guest instructions for MMIO or IOIO accesses, the #VC
handler might get a page-fault and will not be able to complete. Forward
the page-fault in this case to the correct handler instead of killing
the machine.

Fixes: 0786138c78e7 ("x86/sev-es: Add a Runtime #VC Exception Handler")
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: stable@vger.kernel.org # v5.10+
Link: https://lkml.kernel.org/r/20210519135251.30093-3-joro@8bytes.org
---
 arch/x86/kernel/sev.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 82bced88153b..1f428f401bed 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -1270,6 +1270,10 @@ static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt)
 	case X86_TRAP_UD:
 		exc_invalid_op(ctxt->regs);
 		break;
+	case X86_TRAP_PF:
+		write_cr2(ctxt->fi.cr2);
+		exc_page_fault(ctxt->regs, error_code);
+		break;
 	case X86_TRAP_AC:
 		exc_alignment_check(ctxt->regs, error_code);
 		break;
-- 
cgit v1.2.3


From 293837b9ac8d3021657f44c9d7a14948ec01c5d0 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 19 May 2021 05:55:57 -1000
Subject: Revert "i915: fix remap_io_sg to verify the pgprot"

This reverts commit b12d691ea5e01db42ccf3b4207e57cb3ce7cfe91.

It turns out this is not ready for primetime yet.  The intentions are
good, but using remap_pfn_range() requires that there is nothing already
mapped in the area, and the i915 code seems to very much intentionally
remap the same area multiple times.

That will then just trigger the

                BUG_ON(!pte_none(*pte));

in mm/memory.c: remap_pte_range().

There are also reports of mapping type inconsistencies, resulting in
warnings and in screen corruption.

Link: https://lore.kernel.org/lkml/20210519024322.GA29704@xsang-OptiPlex-9020/
Link: https://lore.kernel.org/lkml/YKUjvoaKKggAmpIR@sf/
Link: https://lore.kernel.org/lkml/b6b61cf0-5874-f4c0-1fcc-4b3848451c31@redhat.com/
Reported-by: kernel test robot <oliver.sang@intel.com>
Reported-by: Kalle Valo <kvalo@codeaurora.org>
Reported-by: Hans de Goede <hdegoede@redhat.com>
Reported-by: Sergei Trofimovich <slyfox@gentoo.org>
Acked-by: Christoph Hellwig <hch@lst.de>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/i915/i915_mm.c | 73 +++++++++++++++++++++++++++++-------------
 1 file changed, 50 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c
index 4c8cd08c672d..9a777b0ff59b 100644
--- a/drivers/gpu/drm/i915/i915_mm.c
+++ b/drivers/gpu/drm/i915/i915_mm.c
@@ -28,10 +28,46 @@
 
 #include "i915_drv.h"
 
-#define EXPECTED_FLAGS (VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP)
+struct remap_pfn {
+	struct mm_struct *mm;
+	unsigned long pfn;
+	pgprot_t prot;
+
+	struct sgt_iter sgt;
+	resource_size_t iobase;
+};
 
 #define use_dma(io) ((io) != -1)
 
+static inline unsigned long sgt_pfn(const struct remap_pfn *r)
+{
+	if (use_dma(r->iobase))
+		return (r->sgt.dma + r->sgt.curr + r->iobase) >> PAGE_SHIFT;
+	else
+		return r->sgt.pfn + (r->sgt.curr >> PAGE_SHIFT);
+}
+
+static int remap_sg(pte_t *pte, unsigned long addr, void *data)
+{
+	struct remap_pfn *r = data;
+
+	if (GEM_WARN_ON(!r->sgt.sgp))
+		return -EINVAL;
+
+	/* Special PTE are not associated with any struct page */
+	set_pte_at(r->mm, addr, pte,
+		   pte_mkspecial(pfn_pte(sgt_pfn(r), r->prot)));
+	r->pfn++; /* track insertions in case we need to unwind later */
+
+	r->sgt.curr += PAGE_SIZE;
+	if (r->sgt.curr >= r->sgt.max)
+		r->sgt = __sgt_iter(__sg_next(r->sgt.sgp), use_dma(r->iobase));
+
+	return 0;
+}
+
+#define EXPECTED_FLAGS (VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP)
+
 /**
  * remap_io_sg - remap an IO mapping to userspace
  * @vma: user vma to map to
@@ -46,7 +82,12 @@ int remap_io_sg(struct vm_area_struct *vma,
 		unsigned long addr, unsigned long size,
 		struct scatterlist *sgl, resource_size_t iobase)
 {
-	unsigned long pfn, len, remapped = 0;
+	struct remap_pfn r = {
+		.mm = vma->vm_mm,
+		.prot = vma->vm_page_prot,
+		.sgt = __sgt_iter(sgl, use_dma(iobase)),
+		.iobase = iobase,
+	};
 	int err;
 
 	/* We rely on prevalidation of the io-mapping to skip track_pfn(). */
@@ -55,25 +96,11 @@ int remap_io_sg(struct vm_area_struct *vma,
 	if (!use_dma(iobase))
 		flush_cache_range(vma, addr, size);
 
-	do {
-		if (use_dma(iobase)) {
-			if (!sg_dma_len(sgl))
-				break;
-			pfn = (sg_dma_address(sgl) + iobase) >> PAGE_SHIFT;
-			len = sg_dma_len(sgl);
-		} else {
-			pfn = page_to_pfn(sg_page(sgl));
-			len = sgl->length;
-		}
-
-		err = remap_pfn_range(vma, addr + remapped, pfn, len,
-				      vma->vm_page_prot);
-		if (err)
-			break;
-		remapped += len;
-	} while ((sgl = __sg_next(sgl)));
-
-	if (err)
-		zap_vma_ptes(vma, addr, remapped);
-	return err;
+	err = apply_to_page_range(r.mm, addr, size, remap_sg, &r);
+	if (unlikely(err)) {
+		zap_vma_ptes(vma, addr, r.pfn << PAGE_SHIFT);
+		return err;
+	}
+
+	return 0;
 }
-- 
cgit v1.2.3


From cfa3b797118eda7d68f9ede9b1a0279192aca653 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@nvidia.com>
Date: Wed, 19 May 2021 11:41:32 +0300
Subject: RDMA/mlx5: Fix query DCT via DEVX

When executing DEVX command to query QP object, we need to take the QP
type from the mlx5_ib_qp struct which hold the driver specific QP types as
well, such as DC.

Fixes: 34613eb1d2ad ("IB/mlx5: Enable modify and query verbs objects via DEVX")
Link: https://lore.kernel.org/r/6eee15d63f09bb70787488e0cf96216e2957f5aa.1621413654.git.leonro@nvidia.com
Reviewed-by: Yishai Hadas <yishaih@nvidia.com>
Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/mlx5/devx.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index a0b677accd96..eb9b0a2707f8 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -630,9 +630,8 @@ static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs,
 	case UVERBS_OBJECT_QP:
 	{
 		struct mlx5_ib_qp *qp = to_mqp(uobj->object);
-		enum ib_qp_type	qp_type = qp->ibqp.qp_type;
 
-		if (qp_type == IB_QPT_RAW_PACKET ||
+		if (qp->type == IB_QPT_RAW_PACKET ||
 		    (qp->flags & IB_QP_CREATE_SOURCE_QPN)) {
 			struct mlx5_ib_raw_packet_qp *raw_packet_qp =
 							 &qp->raw_packet_qp;
@@ -649,10 +648,9 @@ static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs,
 					       sq->tisn) == obj_id);
 		}
 
-		if (qp_type == MLX5_IB_QPT_DCT)
+		if (qp->type == MLX5_IB_QPT_DCT)
 			return get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
 					      qp->dct.mdct.mqp.qpn) == obj_id;
-
 		return get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
 				      qp->ibqp.qp_num) == obj_id;
 	}
-- 
cgit v1.2.3


From 4954f5b8ef0baf70fe978d1a99a5f70e4dd5c877 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 19 May 2021 15:52:46 +0200
Subject: x86/sev-es: Use __put_user()/__get_user() for data accesses

The put_user() and get_user() functions do checks on the address which is
passed to them. They check whether the address is actually a user-space
address and whether its fine to access it. They also call might_fault()
to indicate that they could fault and possibly sleep.

All of these checks are neither wanted nor needed in the #VC exception
handler, which can be invoked from almost any context and also for MMIO
instructions from kernel space on kernel memory. All the #VC handler
wants to know is whether a fault happened when the access was tried.

This is provided by __put_user()/__get_user(), which just do the access
no matter what. Also add comments explaining why __get_user() and
__put_user() are the best choice here and why it is safe to use them
in this context. Also explain why copy_to/from_user can't be used.

In addition, also revert commit

  7024f60d6552 ("x86/sev-es: Handle string port IO to kernel memory properly")

because using __get_user()/__put_user() fixes the same problem while
the above commit introduced several problems:

  1) It uses access_ok() which is only allowed in task context.

  2) It uses memcpy() which has no fault handling at all and is
     thus unsafe to use here.

  [ bp: Fix up commit ID of the reverted commit above. ]

Fixes: f980f9c31a92 ("x86/sev-es: Compile early handler code into kernel image")
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: stable@vger.kernel.org # v5.10+
Link: https://lkml.kernel.org/r/20210519135251.30093-4-joro@8bytes.org
---
 arch/x86/kernel/sev.c | 66 +++++++++++++++++++++++++++++++++++----------------
 1 file changed, 46 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 1f428f401bed..651b81cd648e 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -315,31 +315,44 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
 	u16 d2;
 	u8  d1;
 
-	/* If instruction ran in kernel mode and the I/O buffer is in kernel space */
-	if (!user_mode(ctxt->regs) && !access_ok(target, size)) {
-		memcpy(dst, buf, size);
-		return ES_OK;
-	}
-
+	/*
+	 * This function uses __put_user() independent of whether kernel or user
+	 * memory is accessed. This works fine because __put_user() does no
+	 * sanity checks of the pointer being accessed. All that it does is
+	 * to report when the access failed.
+	 *
+	 * Also, this function runs in atomic context, so __put_user() is not
+	 * allowed to sleep. The page-fault handler detects that it is running
+	 * in atomic context and will not try to take mmap_sem and handle the
+	 * fault, so additional pagefault_enable()/disable() calls are not
+	 * needed.
+	 *
+	 * The access can't be done via copy_to_user() here because
+	 * vc_write_mem() must not use string instructions to access unsafe
+	 * memory. The reason is that MOVS is emulated by the #VC handler by
+	 * splitting the move up into a read and a write and taking a nested #VC
+	 * exception on whatever of them is the MMIO access. Using string
+	 * instructions here would cause infinite nesting.
+	 */
 	switch (size) {
 	case 1:
 		memcpy(&d1, buf, 1);
-		if (put_user(d1, target))
+		if (__put_user(d1, target))
 			goto fault;
 		break;
 	case 2:
 		memcpy(&d2, buf, 2);
-		if (put_user(d2, target))
+		if (__put_user(d2, target))
 			goto fault;
 		break;
 	case 4:
 		memcpy(&d4, buf, 4);
-		if (put_user(d4, target))
+		if (__put_user(d4, target))
 			goto fault;
 		break;
 	case 8:
 		memcpy(&d8, buf, 8);
-		if (put_user(d8, target))
+		if (__put_user(d8, target))
 			goto fault;
 		break;
 	default:
@@ -370,30 +383,43 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
 	u16 d2;
 	u8  d1;
 
-	/* If instruction ran in kernel mode and the I/O buffer is in kernel space */
-	if (!user_mode(ctxt->regs) && !access_ok(s, size)) {
-		memcpy(buf, src, size);
-		return ES_OK;
-	}
-
+	/*
+	 * This function uses __get_user() independent of whether kernel or user
+	 * memory is accessed. This works fine because __get_user() does no
+	 * sanity checks of the pointer being accessed. All that it does is
+	 * to report when the access failed.
+	 *
+	 * Also, this function runs in atomic context, so __get_user() is not
+	 * allowed to sleep. The page-fault handler detects that it is running
+	 * in atomic context and will not try to take mmap_sem and handle the
+	 * fault, so additional pagefault_enable()/disable() calls are not
+	 * needed.
+	 *
+	 * The access can't be done via copy_from_user() here because
+	 * vc_read_mem() must not use string instructions to access unsafe
+	 * memory. The reason is that MOVS is emulated by the #VC handler by
+	 * splitting the move up into a read and a write and taking a nested #VC
+	 * exception on whatever of them is the MMIO access. Using string
+	 * instructions here would cause infinite nesting.
+	 */
 	switch (size) {
 	case 1:
-		if (get_user(d1, s))
+		if (__get_user(d1, s))
 			goto fault;
 		memcpy(buf, &d1, 1);
 		break;
 	case 2:
-		if (get_user(d2, s))
+		if (__get_user(d2, s))
 			goto fault;
 		memcpy(buf, &d2, 2);
 		break;
 	case 4:
-		if (get_user(d4, s))
+		if (__get_user(d4, s))
 			goto fault;
 		memcpy(buf, &d4, 4);
 		break;
 	case 8:
-		if (get_user(d8, s))
+		if (__get_user(d8, s))
 			goto fault;
 		memcpy(buf, &d8, 8);
 		break;
-- 
cgit v1.2.3


From fb6c79d7261afb7e942251254ea47951c2a9a706 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 10 Feb 2021 17:33:27 +0900
Subject: perf tools: Add 'cgroup-switches' software event

It counts how often cgroups are changed actually during the context
switches.

  # perf stat -a -e context-switches,cgroup-switches -a sleep 1

   Performance counter stats for 'system wide':

              11,267      context-switches
              10,950      cgroup-switches

         1.015634369 seconds time elapsed

Committer notes:

The kernel patches landed in v5.13, but this entry wasn't filled in
perf's parse-events tables, which was leading to a segfault when running
'perf list' on a kernel with that feature, as reported by Thomas
Richter.

Also removed the part touching tools/include/uapi/linux/perf_event.h as
it was updated in the usual sync with the kernel UAPI headers, in a
previous, already upstream, patch.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Link: http://lore.kernel.org/lkml/20210210083327.22726-3-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/parse-events.c | 4 ++++
 tools/perf/util/parse-events.l | 1 +
 2 files changed, 5 insertions(+)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4dad14265b81..269997066f6e 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -150,6 +150,10 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
 		.symbol = "bpf-output",
 		.alias  = "",
 	},
+	[PERF_COUNT_SW_CGROUP_SWITCHES] = {
+		.symbol = "cgroup-switches",
+		.alias  = "",
+	},
 };
 
 #define __PERF_EVENT_FIELD(config, name) \
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index fb8646cc3e83..923849024b15 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -347,6 +347,7 @@ emulation-faults				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EM
 dummy						{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
 duration_time					{ return tool(yyscanner, PERF_TOOL_DURATION_TIME); }
 bpf-output					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
+cgroup-switches					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); }
 
 	/*
 	 * We have to handle the kernel PMU event cycles-ct/cycles-t/mem-loads/mem-stores separately.
-- 
cgit v1.2.3


From 463a3f66473b58d71428a1c3ce69ea52c05440e5 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 14 May 2021 17:18:10 +0300
Subject: RDMA/uverbs: Fix a NULL vs IS_ERR() bug

The uapi_get_object() function returns error pointers, it never returns
NULL.

Fixes: 149d3845f4a5 ("RDMA/uverbs: Add a method to introspect handles in a context")
Link: https://lore.kernel.org/r/YJ6Got+U7lz+3n9a@mwanda
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/core/uverbs_std_types_device.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c
index a03021d94e11..049684880ae0 100644
--- a/drivers/infiniband/core/uverbs_std_types_device.c
+++ b/drivers/infiniband/core/uverbs_std_types_device.c
@@ -117,8 +117,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_INFO_HANDLES)(
 		return ret;
 
 	uapi_object = uapi_get_object(attrs->ufile->device->uapi, object_id);
-	if (!uapi_object)
-		return -EINVAL;
+	if (IS_ERR(uapi_object))
+		return PTR_ERR(uapi_object);
 
 	handles = gather_objects_handle(attrs->ufile, uapi_object, attrs,
 					out_len, &total);
-- 
cgit v1.2.3


From c71b99640d2d350ee3146452c1057bd59cb2c5e0 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 19 May 2021 10:10:38 +0800
Subject: ethtool: stats: Fix a copy-paste error

data->ctrl_stats should be memset with correct size.

Fixes: bfad2b979ddc ("ethtool: add interface to read standard MAC Ctrl stats")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethtool/stats.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ethtool/stats.c b/net/ethtool/stats.c
index b7642dc96d50..ec07f5765e03 100644
--- a/net/ethtool/stats.c
+++ b/net/ethtool/stats.c
@@ -119,7 +119,7 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base,
 	 */
 	memset(&data->phy_stats, 0xff, sizeof(data->phy_stats));
 	memset(&data->mac_stats, 0xff, sizeof(data->mac_stats));
-	memset(&data->ctrl_stats, 0xff, sizeof(data->mac_stats));
+	memset(&data->ctrl_stats, 0xff, sizeof(data->ctrl_stats));
 	memset(&data->rmon_stats, 0xff, sizeof(data->rmon_stats));
 
 	if (test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask) &&
-- 
cgit v1.2.3


From 88c380df84fbd03f9b137c2b9d0a44b9f2f553b0 Mon Sep 17 00:00:00 2001
From: Raju Rangoju <rajur@chelsio.com>
Date: Wed, 19 May 2021 16:48:31 +0530
Subject: cxgb4: avoid accessing registers when clearing filters

Hardware register having the server TID base can contain
invalid values when adapter is in bad state (for example,
due to AER fatal error). Reading these invalid values in the
register can lead to out-of-bound memory access. So, fix
by using the saved server TID base when clearing filters.

Fixes: b1a79360ee86 ("cxgb4: Delete all hash and TCAM filters before resource cleanup")
Signed-off-by: Raju Rangoju <rajur@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index bc581b149b11..22c9ac922eba 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -1042,7 +1042,7 @@ void clear_all_filters(struct adapter *adapter)
 				cxgb4_del_filter(dev, f->tid, &f->fs);
 		}
 
-		sb = t4_read_reg(adapter, LE_DB_SRVR_START_INDEX_A);
+		sb = adapter->tids.stid_base;
 		for (i = 0; i < sb; i++) {
 			f = (struct filter_entry *)adapter->tids.tid_tab[i];
 
-- 
cgit v1.2.3


From d5b3bd6ab5418e34d85f64fba7c6ca02c3cbfb63 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 19 May 2021 15:02:53 +0200
Subject: dt-bindings: net: renesas,ether: Update Sergei's email address

Update Sergei's email address, as per commit 534a8bf0ccdd7b3f
("MAINTAINERS: switch to my private email for Renesas Ethernet
drivers").

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Sergei Shtylyov <sergei.shtylyov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/renesas,ether.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/net/renesas,ether.yaml b/Documentation/devicetree/bindings/net/renesas,ether.yaml
index 8ce5ed8a58dd..c101a1ec846e 100644
--- a/Documentation/devicetree/bindings/net/renesas,ether.yaml
+++ b/Documentation/devicetree/bindings/net/renesas,ether.yaml
@@ -10,7 +10,7 @@ allOf:
   - $ref: ethernet-controller.yaml#
 
 maintainers:
-  - Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
+  - Sergei Shtylyov <sergei.shtylyov@gmail.com>
 
 properties:
   compatible:
-- 
cgit v1.2.3


From 4d52ebc7ace491d58f96d1f4a1cb9070c506b2e7 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 19 May 2021 14:47:17 +0200
Subject: net: hso: bail out on interrupt URB allocation failure

Commit 31db0dbd7244 ("net: hso: check for allocation failure in
hso_create_bulk_serial_device()") recently started returning an error
when the driver fails to allocate resources for the interrupt endpoint
and tiocmget functionality.

For consistency let's bail out from probe also if the URB allocation
fails.

Signed-off-by: Johan Hovold <johan@kernel.org>
Reviewed-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/hso.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index 260f850d69eb..b48b2a25210c 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -2635,14 +2635,14 @@ static struct hso_device *hso_create_bulk_serial_device(
 		}
 
 		tiocmget->urb = usb_alloc_urb(0, GFP_KERNEL);
-		if (tiocmget->urb) {
-			mutex_init(&tiocmget->mutex);
-			init_waitqueue_head(&tiocmget->waitq);
-		} else
-			hso_free_tiomget(serial);
-	}
-	else
+		if (!tiocmget->urb)
+			goto exit;
+
+		mutex_init(&tiocmget->mutex);
+		init_waitqueue_head(&tiocmget->waitq);
+	} else {
 		num_urbs = 1;
+	}
 
 	if (hso_serial_common_create(serial, num_urbs, BULK_URB_RX_SIZE,
 				     BULK_URB_TX_SIZE))
-- 
cgit v1.2.3


From 84c63d040938f64a7dc195696301166e75231bf5 Mon Sep 17 00:00:00 2001
From: Nikola Cornij <nikola.cornij@amd.com>
Date: Thu, 6 May 2021 22:46:52 -0400
Subject: drm/amd/display: Use the correct max downscaling value for DCN3.x
 family

[why]
As per spec, DCN3.x can do 6:1 downscaling and DCN2.x can do 4:1. The
max downscaling limit value for DCN2.x is 250, which means it's
calculated as 1000 / 4 = 250. For DCN3.x this then gives 1000 / 6 = 167.

[how]
Set maximum downscaling limit to 167 for DCN3.x

Signed-off-by: Nikola Cornij <nikola.cornij@amd.com>
Reviewed-by: Charlene Liu <Charlene.Liu@amd.com>
Reviewed-by: Harry Wentland <Harry.Wentland@amd.com>
Acked-by: Stylon Wang <stylon.wang@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c   | 7 ++++---
 drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c | 7 ++++---
 drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c | 7 ++++---
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
index 4a5fa23d8e7b..5fcc2e64305d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
@@ -826,10 +826,11 @@ static const struct dc_plane_cap plane_cap = {
 			.fp16 = 16000
 	},
 
+	/* 6:1 downscaling ratio: 1000/6 = 166.666 */
 	.max_downscale_factor = {
-			.argb8888 = 600,
-			.nv12 = 600,
-			.fp16 = 600
+			.argb8888 = 167,
+			.nv12 = 167,
+			.fp16 = 167
 	}
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
index 5b54b7fc5105..472696f949ac 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
@@ -843,10 +843,11 @@ static const struct dc_plane_cap plane_cap = {
 			.fp16 = 16000
 	},
 
+	/* 6:1 downscaling ratio: 1000/6 = 166.666 */
 	.max_downscale_factor = {
-			.argb8888 = 600,
-			.nv12 = 600,
-			.fp16 = 600
+			.argb8888 = 167,
+			.nv12 = 167,
+			.fp16 = 167 
 	},
 	64,
 	64
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
index fc2dea243d1b..a33f0365329b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
@@ -284,10 +284,11 @@ static const struct dc_plane_cap plane_cap = {
 				.nv12 = 16000,
 				.fp16 = 16000
 		},
+		/* 6:1 downscaling ratio: 1000/6 = 166.666 */
 		.max_downscale_factor = {
-				.argb8888 = 600,
-				.nv12 = 600,
-				.fp16 = 600
+				.argb8888 = 167,
+				.nv12 = 167,
+				.fp16 = 167
 		},
 		16,
 		16
-- 
cgit v1.2.3


From 0c8df343c200529e6b9820bdfed01814140f75e4 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 12 May 2021 10:36:43 +0200
Subject: drm/radeon: use the dummy page for GART if needed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Imported BOs don't have a pagelist any more.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Fixes: 0575ff3d33cd ("drm/radeon: stop using pages with drm_prime_sg_to_page_addr_arrays v2")
CC: stable@vger.kernel.org # 5.12
---
 drivers/gpu/drm/radeon/radeon_gart.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index 3808a753127b..04109a2a6fd7 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -301,7 +301,8 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
 	p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
 
 	for (i = 0; i < pages; i++, p++) {
-		rdev->gart.pages[p] = pagelist[i];
+		rdev->gart.pages[p] = pagelist ? pagelist[i] :
+			rdev->dummy_page.page;
 		page_base = dma_addr[i];
 		for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) {
 			page_entry = radeon_gart_get_page_entry(page_base, flags);
-- 
cgit v1.2.3


From d53751568359e5b3ffb859b13cbd79dc77a571f1 Mon Sep 17 00:00:00 2001
From: Yi Li <liyi@loongson.cn>
Date: Fri, 14 May 2021 14:40:39 +0800
Subject: drm/amdgpu: Fix GPU TLB update error when PAGE_SIZE >
 AMDGPU_PAGE_SIZE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When PAGE_SIZE is larger than AMDGPU_PAGE_SIZE, the number of GPU TLB
entries which need to update in amdgpu_map_buffer() should be multiplied
by AMDGPU_GPU_PAGES_IN_CPU_PAGE (PAGE_SIZE / AMDGPU_PAGE_SIZE).

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Yi Li <liyi@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 3bef0432cac2..a376a993e474 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -225,7 +225,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
 	*addr += mm_cur->start & ~PAGE_MASK;
 
 	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
-	num_bytes = num_pages * 8;
+	num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
 
 	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
 				     AMDGPU_IB_POOL_DELAYED, &job);
-- 
cgit v1.2.3


From dbd1003d1252db5973dddf20b24bb0106ac52aa2 Mon Sep 17 00:00:00 2001
From: Changfeng <Changfeng.Zhu@amd.com>
Date: Fri, 14 May 2021 15:28:25 +0800
Subject: drm/amdgpu: disable 3DCGCG on picasso/raven1 to avoid compute hang

There is problem with 3DCGCG firmware and it will cause compute test
hang on picasso/raven1. It needs to disable 3DCGCG in driver to avoid
compute hang.

Signed-off-by: Changfeng <Changfeng.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 10 +++++++---
 drivers/gpu/drm/amd/amdgpu/soc15.c    |  2 --
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index a078a38c2cee..516467e962b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4943,7 +4943,7 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
 	amdgpu_gfx_rlc_enter_safe_mode(adev);
 
 	/* Enable 3D CGCG/CGLS */
-	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
+	if (enable) {
 		/* write cmd to clear cgcg/cgls ov */
 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
 		/* unset CGCG override */
@@ -4955,8 +4955,12 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
 		/* enable 3Dcgcg FSM(0x0000363f) */
 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
 
-		data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
-			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+			data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+				RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+		else
+			data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
+
 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 8e1b9a40839f..980675052b53 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1392,7 +1392,6 @@ static int soc15_common_early_init(void *handle)
 			adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
 				AMD_CG_SUPPORT_GFX_MGLS |
 				AMD_CG_SUPPORT_GFX_CP_LS |
-				AMD_CG_SUPPORT_GFX_3D_CGCG |
 				AMD_CG_SUPPORT_GFX_3D_CGLS |
 				AMD_CG_SUPPORT_GFX_CGCG |
 				AMD_CG_SUPPORT_GFX_CGLS |
@@ -1412,7 +1411,6 @@ static int soc15_common_early_init(void *handle)
 				AMD_CG_SUPPORT_GFX_MGLS |
 				AMD_CG_SUPPORT_GFX_RLC_LS |
 				AMD_CG_SUPPORT_GFX_CP_LS |
-				AMD_CG_SUPPORT_GFX_3D_CGCG |
 				AMD_CG_SUPPORT_GFX_3D_CGLS |
 				AMD_CG_SUPPORT_GFX_CGCG |
 				AMD_CG_SUPPORT_GFX_CGLS |
-- 
cgit v1.2.3


From 080039273b126eeb0185a61c045893a25dbc046e Mon Sep 17 00:00:00 2001
From: Chris Park <Chris.Park@amd.com>
Date: Tue, 4 May 2021 16:20:55 -0400
Subject: drm/amd/display: Disconnect non-DP with no EDID

[Why]
Active DP dongles return no EDID when dongle
is connected, but VGA display is taken out.
Current driver behavior does not remove the
active display when this happens, and this is
a gap between dongle DTP and dongle behavior.

[How]
For active DP dongles and non-DP scenario,
disconnect sink on detection when no EDID
is read due to timeout.

Signed-off-by: Chris Park <Chris.Park@amd.com>
Reviewed-by: Nicholas Kazlauskas <Nicholas.Kazlauskas@amd.com>
Acked-by: Stylon Wang <stylon.wang@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/core/dc_link.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index f4374d83662a..c1f5474c205a 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -1076,6 +1076,24 @@ static bool dc_link_detect_helper(struct dc_link *link,
 			    dc_is_dvi_signal(link->connector_signal)) {
 				if (prev_sink)
 					dc_sink_release(prev_sink);
+				link_disconnect_sink(link);
+
+				return false;
+			}
+			/*
+			 * Abort detection for DP connectors if we have
+			 * no EDID and connector is active converter
+			 * as there are no display downstream
+			 *
+			 */
+			if (dc_is_dp_sst_signal(link->connector_signal) &&
+				(link->dpcd_caps.dongle_type ==
+						DISPLAY_DONGLE_DP_VGA_CONVERTER ||
+				link->dpcd_caps.dongle_type ==
+						DISPLAY_DONGLE_DP_DVI_CONVERTER)) {
+				if (prev_sink)
+					dc_sink_release(prev_sink);
+				link_disconnect_sink(link);
 
 				return false;
 			}
-- 
cgit v1.2.3


From fa7e6abc75f3d491bc561734312d065dc9dc2a77 Mon Sep 17 00:00:00 2001
From: Jingwen Chen <Jingwen.Chen2@amd.com>
Date: Mon, 17 May 2021 16:16:10 +0800
Subject: drm/amd/amdgpu: fix refcount leak
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[Why]
the gem object rfb->base.obj[0] is get according to num_planes
in amdgpufb_create, but is not put according to num_planes

[How]
put rfb->base.obj[0] in amdgpu_fbdev_destroy according to num_planes

Signed-off-by: Jingwen Chen <Jingwen.Chen2@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 4f10c4529840..09b048647523 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -288,10 +288,13 @@ out:
 static int amdgpu_fbdev_destroy(struct drm_device *dev, struct amdgpu_fbdev *rfbdev)
 {
 	struct amdgpu_framebuffer *rfb = &rfbdev->rfb;
+	int i;
 
 	drm_fb_helper_unregister_fbi(&rfbdev->helper);
 
 	if (rfb->base.obj[0]) {
+		for (i = 0; i < rfb->base.format->num_planes; i++)
+			drm_gem_object_put(rfb->base.obj[0]);
 		amdgpufb_destroy_pinned_object(rfb->base.obj[0]);
 		rfb->base.obj[0] = NULL;
 		drm_framebuffer_unregister_private(&rfb->base);
-- 
cgit v1.2.3


From ab95cb3e1bc44d4376bd8d331b1cff82b99020e3 Mon Sep 17 00:00:00 2001
From: James Zhu <James.Zhu@amd.com>
Date: Tue, 18 May 2021 08:44:23 -0400
Subject: drm/amdgpu: add video_codecs query support for aldebaran

Add video_codecs query support for aldebaran.

Signed-off-by: James Zhu <James.Zhu@amd.com>
Reviewed-by: Leo Liu <leo.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 980675052b53..e65c286f93a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -302,6 +302,7 @@ static int soc15_query_video_codecs(struct amdgpu_device *adev, bool encode,
 			*codecs = &rv_video_codecs_decode;
 		return 0;
 	case CHIP_ARCTURUS:
+	case CHIP_ALDEBARAN:
 	case CHIP_RENOIR:
 		if (encode)
 			*codecs = &vega_video_codecs_encode;
-- 
cgit v1.2.3


From 1e5c37385097c35911b0f8a0c67ffd10ee1af9a2 Mon Sep 17 00:00:00 2001
From: xinhui pan <xinhui.pan@amd.com>
Date: Tue, 18 May 2021 10:56:07 +0800
Subject: drm/amdgpu: Fix a use-after-free
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

looks like we forget to set ttm->sg to NULL.
Hit panic below

[ 1235.844104] general protection fault, probably for non-canonical address 0x6b6b6b6b6b6b7b4b: 0000 [#1] SMP DEBUG_PAGEALLOC NOPTI
[ 1235.989074] Call Trace:
[ 1235.991751]  sg_free_table+0x17/0x20
[ 1235.995667]  amdgpu_ttm_backend_unbind.cold+0x4d/0xf7 [amdgpu]
[ 1236.002288]  amdgpu_ttm_backend_destroy+0x29/0x130 [amdgpu]
[ 1236.008464]  ttm_tt_destroy+0x1e/0x30 [ttm]
[ 1236.013066]  ttm_bo_cleanup_memtype_use+0x51/0xa0 [ttm]
[ 1236.018783]  ttm_bo_release+0x262/0xa50 [ttm]
[ 1236.023547]  ttm_bo_put+0x82/0xd0 [ttm]
[ 1236.027766]  amdgpu_bo_unref+0x26/0x50 [amdgpu]
[ 1236.032809]  amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu+0x7aa/0xd90 [amdgpu]
[ 1236.040400]  kfd_ioctl_alloc_memory_of_gpu+0xe2/0x330 [amdgpu]
[ 1236.046912]  kfd_ioctl+0x463/0x690 [amdgpu]

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index a376a993e474..d5cbc51c5eaa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1210,6 +1210,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
 	if (gtt && gtt->userptr) {
 		amdgpu_ttm_tt_set_user_pages(ttm, NULL);
 		kfree(ttm->sg);
+		ttm->sg = NULL;
 		ttm->page_flags &= ~TTM_PAGE_FLAG_SG;
 		return;
 	}
-- 
cgit v1.2.3


From 99c45ba5799d6b938bd9bd20edfeb6f3e3e039b9 Mon Sep 17 00:00:00 2001
From: Guchun Chen <guchun.chen@amd.com>
Date: Mon, 17 May 2021 16:35:40 +0800
Subject: drm/amdgpu: update gc golden setting for Navi12

Current golden setting is out of date.

Signed-off-by: Guchun Chen <guchun.chen@amd.com>
Reviewed-by: Kenneth Feng <kenneth.feng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 2408ed4c7d84..7ce76a6b3a35 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -1395,9 +1395,10 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] =
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04800000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04900000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1800ff, 0x00000044),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x00007fff, 0x000001fe),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
@@ -1415,12 +1416,13 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] =
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000820, 0x00000820),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070104),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0xffdf80ff, 0x479c0010),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00800000)
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00c00000)
 };
 
 static bool gfx_v10_is_rlcg_rw(struct amdgpu_device *adev, u32 offset, uint32_t *flag, bool write)
-- 
cgit v1.2.3


From 77194d8642dd4cb7ea8ced77bfaea55610574c38 Mon Sep 17 00:00:00 2001
From: Guchun Chen <guchun.chen@amd.com>
Date: Mon, 17 May 2021 16:38:00 +0800
Subject: drm/amdgpu: update sdma golden setting for Navi12

Current golden setting is out of date.

Signed-off-by: Guchun Chen <guchun.chen@amd.com>
Reviewed-by: Kenneth Feng <kenneth.feng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 920fc6d4a127..8859133ce37e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -123,6 +123,10 @@ static const struct soc15_reg_golden golden_settings_sdma_nv14[] = {
 
 static const struct soc15_reg_golden golden_settings_sdma_nv12[] = {
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG, 0x001877ff, 0x00000044),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG, 0x001877ff, 0x00000044),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
 };
 
-- 
cgit v1.2.3


From 9c2876d56f1ce9b6b2072f1446fb1e8d1532cb3d Mon Sep 17 00:00:00 2001
From: Lang Yu <Lang.Yu@amd.com>
Date: Mon, 17 May 2021 12:47:20 +0800
Subject: drm/amd/amdgpu: fix a potential deadlock in gpu reset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When amdgpu_ib_ring_tests failed, the reset logic called
amdgpu_device_ip_suspend twice, then deadlock occurred.
Deadlock log:

[  805.655192] amdgpu 0000:04:00.0: amdgpu: ib ring test failed (-110).
[  806.290952] [drm] free PSP TMR buffer

[  806.319406] ============================================
[  806.320315] WARNING: possible recursive locking detected
[  806.321225] 5.11.0-custom #1 Tainted: G        W  OEL
[  806.322135] --------------------------------------------
[  806.323043] cat/2593 is trying to acquire lock:
[  806.323825] ffff888136b1cdc8 (&adev->dm.dc_lock){+.+.}-{3:3}, at: dm_suspend+0xb8/0x1d0 [amdgpu]
[  806.325668]
               but task is already holding lock:
[  806.326664] ffff888136b1cdc8 (&adev->dm.dc_lock){+.+.}-{3:3}, at: dm_suspend+0xb8/0x1d0 [amdgpu]
[  806.328430]
               other info that might help us debug this:
[  806.329539]  Possible unsafe locking scenario:

[  806.330549]        CPU0
[  806.330983]        ----
[  806.331416]   lock(&adev->dm.dc_lock);
[  806.332086]   lock(&adev->dm.dc_lock);
[  806.332738]
                *** DEADLOCK ***

[  806.333747]  May be due to missing lock nesting notation

[  806.334899] 3 locks held by cat/2593:
[  806.335537]  #0: ffff888100d3f1b8 (&attr->mutex){+.+.}-{3:3}, at: simple_attr_read+0x4e/0x110
[  806.337009]  #1: ffff888136b1fd78 (&adev->reset_sem){++++}-{3:3}, at: amdgpu_device_lock_adev+0x42/0x94 [amdgpu]
[  806.339018]  #2: ffff888136b1cdc8 (&adev->dm.dc_lock){+.+.}-{3:3}, at: dm_suspend+0xb8/0x1d0 [amdgpu]
[  806.340869]
               stack backtrace:
[  806.341621] CPU: 6 PID: 2593 Comm: cat Tainted: G        W  OEL    5.11.0-custom #1
[  806.342921] Hardware name: AMD Celadon-CZN/Celadon-CZN, BIOS WLD0C23N_Weekly_20_12_2 12/23/2020
[  806.344413] Call Trace:
[  806.344849]  dump_stack+0x93/0xbd
[  806.345435]  __lock_acquire.cold+0x18a/0x2cf
[  806.346179]  lock_acquire+0xca/0x390
[  806.346807]  ? dm_suspend+0xb8/0x1d0 [amdgpu]
[  806.347813]  __mutex_lock+0x9b/0x930
[  806.348454]  ? dm_suspend+0xb8/0x1d0 [amdgpu]
[  806.349434]  ? amdgpu_device_indirect_rreg+0x58/0x70 [amdgpu]
[  806.350581]  ? _raw_spin_unlock_irqrestore+0x47/0x50
[  806.351437]  ? dm_suspend+0xb8/0x1d0 [amdgpu]
[  806.352437]  ? rcu_read_lock_sched_held+0x4f/0x80
[  806.353252]  ? rcu_read_lock_sched_held+0x4f/0x80
[  806.354064]  mutex_lock_nested+0x1b/0x20
[  806.354747]  ? mutex_lock_nested+0x1b/0x20
[  806.355457]  dm_suspend+0xb8/0x1d0 [amdgpu]
[  806.356427]  ? soc15_common_set_clockgating_state+0x17d/0x19 [amdgpu]
[  806.357736]  amdgpu_device_ip_suspend_phase1+0x78/0xd0 [amdgpu]
[  806.360394]  amdgpu_device_ip_suspend+0x21/0x70 [amdgpu]
[  806.362926]  amdgpu_device_pre_asic_reset+0xb3/0x270 [amdgpu]
[  806.365560]  amdgpu_device_gpu_recover.cold+0x679/0x8eb [amdgpu]

Signed-off-by: Lang Yu <Lang.Yu@amd.com>
Acked-by: Christian KÃnig <christian.koenig@amd.com>
Reviewed-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 8b2a37bf2adf..66ddfe4f58c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4479,7 +4479,6 @@ out:
 			r = amdgpu_ib_ring_tests(tmp_adev);
 			if (r) {
 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
-				r = amdgpu_device_ip_suspend(tmp_adev);
 				need_full_reset = true;
 				r = -EAGAIN;
 				goto end;
-- 
cgit v1.2.3


From a2b4785f01280a4291edb9fda69032fc2e4bfd3f Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Tue, 18 May 2021 17:48:02 +0200
Subject: drm/amdgpu: stop touching sched.ready in the backend
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This unfortunately comes up in regular intervals and breaks
GPU reset for the engine in question.

The sched.ready flag controls if an engine can't get working
during hw_init, but should never be set to false during hw_fini.

v2: squash in unused variable fix (Alex)

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c | 2 --
 drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c | 2 --
 drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 5 -----
 drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c  | 8 +-------
 4 files changed, 1 insertion(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
index 83531997aeba..938ef4ce5b76 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
@@ -198,8 +198,6 @@ static int jpeg_v2_5_hw_fini(void *handle)
 		if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
 		      RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS))
 			jpeg_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
-
-		ring->sched.ready = false;
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
index de5dfcfb3859..94be35357f7d 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
@@ -166,8 +166,6 @@ static int jpeg_v3_0_hw_fini(void *handle)
 	      RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS))
 		jpeg_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
 
-	ring->sched.ready = false;
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index b1ad9e52b234..240596b25fe4 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -497,11 +497,6 @@ static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev)
 		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
 		WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
 	}
-
-	sdma0->sched.ready = false;
-	sdma1->sched.ready = false;
-	sdma2->sched.ready = false;
-	sdma3->sched.ready = false;
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index cf165ab5dd26..14470da52113 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -373,7 +373,7 @@ static int vcn_v3_0_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_ring *ring;
-	int i, j;
+	int i;
 
 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
 		if (adev->vcn.harvest_config & (1 << i))
@@ -388,12 +388,6 @@ static int vcn_v3_0_hw_fini(void *handle)
 				vcn_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
 			}
 		}
-		ring->sched.ready = false;
-
-		for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
-			ring = &adev->vcn.inst[i].ring_enc[j];
-			ring->sched.ready = false;
-		}
 	}
 
 	return 0;
-- 
cgit v1.2.3


From d201d7631ca170b038e7f8921120d05eec70d7c5 Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Wed, 19 May 2021 08:40:11 +1000
Subject: cifs: fix memory leak in smb2_copychunk_range

When using smb2_copychunk_range() for large ranges we will
run through several iterations of a loop calling SMB2_ioctl()
but never actually free the returned buffer except for the final
iteration.
This leads to memory leaks everytime a large copychunk is requested.

Fixes: 9bf0c9cd4314 ("CIFS: Fix SMB2/SMB3 Copy offload support (refcopy) for large files")
Cc: <stable@vger.kernel.org>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2ops.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index dd0eb665b680..c693624a7267 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1861,6 +1861,8 @@ smb2_copychunk_range(const unsigned int xid,
 			cpu_to_le32(min_t(u32, len, tcon->max_bytes_chunk));
 
 		/* Request server copy to target from src identified by key */
+		kfree(retbuf);
+		retbuf = NULL;
 		rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
 			trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE,
 			true /* is_fsctl */, (char *)pcchunk,
-- 
cgit v1.2.3


From e83aa3528a38bddae182a35d0efb5a6c35143c1c Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Wed, 19 May 2021 18:47:07 +0800
Subject: cifs: Fix inconsistent indenting

Eliminate the follow smatch warning:

fs/cifs/fs_context.c:1148 smb3_fs_context_parse_param() warn:
inconsistent indenting.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/fs_context.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
index 5d21cd905315..92d4ab029c91 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -1145,7 +1145,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
 		/* if iocharset not set then load_nls_default
 		 * is used by caller
 		 */
-		 cifs_dbg(FYI, "iocharset set to %s\n", ctx->iocharset);
+		cifs_dbg(FYI, "iocharset set to %s\n", ctx->iocharset);
 		break;
 	case Opt_netbiosname:
 		memset(ctx->source_rfc1001_name, 0x20,
-- 
cgit v1.2.3


From 860b69a9d77160d21ca00357fd6c5217f9d41fb1 Mon Sep 17 00:00:00 2001
From: Rohith Surabattula <rohiths@microsoft.com>
Date: Wed, 5 May 2021 10:56:47 +0000
Subject: Fix kernel oops when CONFIG_DEBUG_ATOMIC_SLEEP is enabled.

Removed oplock_break_received flag which was added to achieve
synchronization between oplock handler and open handler by earlier commit.

It is not needed because there is an existing lock open_file_lock to achieve
the same. find_readable_file takes open_file_lock and then traverses the
openFileList. Similarly, cifs_oplock_break while closing the deferred
handle (i.e cifsFileInfo_put) takes open_file_lock and then sends close
to the server.

Added comments for better readability.

Signed-off-by: Rohith Surabattula <rohiths@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifsfs.c   |  2 +-
 fs/cifs/cifsglob.h |  3 +--
 fs/cifs/file.c     | 27 ++++++++++++---------------
 fs/cifs/misc.c     |  9 +++++++++
 4 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index d7ea9c5fe0f8..2ffcb29d5c8f 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -133,7 +133,7 @@ struct workqueue_struct	*cifsiod_wq;
 struct workqueue_struct	*decrypt_wq;
 struct workqueue_struct	*fileinfo_put_wq;
 struct workqueue_struct	*cifsoplockd_wq;
-struct workqueue_struct *deferredclose_wq;
+struct workqueue_struct	*deferredclose_wq;
 __u32 cifs_lock_secret;
 
 /*
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index d88b4b523dcc..ea90c53386b8 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1257,8 +1257,7 @@ struct cifsFileInfo {
 	struct work_struct oplock_break; /* work for oplock breaks */
 	struct work_struct put; /* work for the final part of _put */
 	struct delayed_work deferred;
-	bool oplock_break_received; /* Flag to indicate oplock break */
-	bool deferred_scheduled;
+	bool deferred_close_scheduled; /* Flag to indicate close is scheduled */
 };
 
 struct cifs_io_parms {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 6caad100c3f3..304d9d3783c6 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -323,8 +323,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 	cfile->dentry = dget(dentry);
 	cfile->f_flags = file->f_flags;
 	cfile->invalidHandle = false;
-	cfile->oplock_break_received = false;
-	cfile->deferred_scheduled = false;
+	cfile->deferred_close_scheduled = false;
 	cfile->tlink = cifs_get_tlink(tlink);
 	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
 	INIT_WORK(&cfile->put, cifsFileInfo_put_work);
@@ -574,21 +573,18 @@ int cifs_open(struct inode *inode, struct file *file)
 			file->f_op = &cifs_file_direct_ops;
 	}
 
-	spin_lock(&CIFS_I(inode)->deferred_lock);
 	/* Get the cached handle as SMB2 close is deferred */
 	rc = cifs_get_readable_path(tcon, full_path, &cfile);
 	if (rc == 0) {
 		if (file->f_flags == cfile->f_flags) {
 			file->private_data = cfile;
+			spin_lock(&CIFS_I(inode)->deferred_lock);
 			cifs_del_deferred_close(cfile);
 			spin_unlock(&CIFS_I(inode)->deferred_lock);
 			goto out;
 		} else {
-			spin_unlock(&CIFS_I(inode)->deferred_lock);
 			_cifsFileInfo_put(cfile, true, false);
 		}
-	} else {
-		spin_unlock(&CIFS_I(inode)->deferred_lock);
 	}
 
 	if (server->oplocks)
@@ -878,12 +874,12 @@ void smb2_deferred_work_close(struct work_struct *work)
 			struct cifsFileInfo, deferred.work);
 
 	spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
-	if (!cfile->deferred_scheduled) {
+	if (!cfile->deferred_close_scheduled) {
 		spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
 		return;
 	}
 	cifs_del_deferred_close(cfile);
-	cfile->deferred_scheduled = false;
+	cfile->deferred_close_scheduled = false;
 	spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
 	_cifsFileInfo_put(cfile, true, false);
 }
@@ -905,14 +901,15 @@ int cifs_close(struct inode *inode, struct file *file)
 				inode->i_ctime = inode->i_mtime = current_time(inode);
 			spin_lock(&cinode->deferred_lock);
 			cifs_add_deferred_close(cfile, dclose);
-			if (cfile->deferred_scheduled) {
+			if (cfile->deferred_close_scheduled &&
+			    delayed_work_pending(&cfile->deferred)) {
 				mod_delayed_work(deferredclose_wq,
 						&cfile->deferred, cifs_sb->ctx->acregmax);
 			} else {
 				/* Deferred close for files */
 				queue_delayed_work(deferredclose_wq,
 						&cfile->deferred, cifs_sb->ctx->acregmax);
-				cfile->deferred_scheduled = true;
+				cfile->deferred_close_scheduled = true;
 				spin_unlock(&cinode->deferred_lock);
 				return 0;
 			}
@@ -2020,8 +2017,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
 			continue;
 		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
-			if ((!open_file->invalidHandle) &&
-				(!open_file->oplock_break_received)) {
+			if ((!open_file->invalidHandle)) {
 				/* found a good file */
 				/* lock it so it will not be closed on us */
 				cifsFileInfo_get(open_file);
@@ -4874,13 +4870,14 @@ oplock_break_ack:
 	}
 	/*
 	 * When oplock break is received and there are no active
-	 * file handles but cached, then set the flag oplock_break_received.
+	 * file handles but cached, then schedule deferred close immediately.
 	 * So, new open will not use cached handle.
 	 */
 	spin_lock(&CIFS_I(inode)->deferred_lock);
 	is_deferred = cifs_is_deferred_close(cfile, &dclose);
-	if (is_deferred && cfile->deferred_scheduled) {
-		cfile->oplock_break_received = true;
+	if (is_deferred &&
+	    cfile->deferred_close_scheduled &&
+	    delayed_work_pending(&cfile->deferred)) {
 		mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
 	}
 	spin_unlock(&CIFS_I(inode)->deferred_lock);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 801a5300f765..34f2a7e80c58 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -672,6 +672,9 @@ cifs_add_pending_open(struct cifs_fid *fid, struct tcon_link *tlink,
 	spin_unlock(&tlink_tcon(open->tlink)->open_file_lock);
 }
 
+/*
+ * Critical section which runs after acquiring deferred_lock.
+ */
 bool
 cifs_is_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close **pdclose)
 {
@@ -688,6 +691,9 @@ cifs_is_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close **
 	return false;
 }
 
+/*
+ * Critical section which runs after acquiring deferred_lock.
+ */
 void
 cifs_add_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close *dclose)
 {
@@ -707,6 +713,9 @@ cifs_add_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close *
 	list_add_tail(&dclose->dlist, &CIFS_I(d_inode(cfile->dentry))->deferred_closes);
 }
 
+/*
+ * Critical section which runs after acquiring deferred_lock.
+ */
 void
 cifs_del_deferred_close(struct cifsFileInfo *cfile)
 {
-- 
cgit v1.2.3


From 0ab95c2510b641fb860a773b3d242ef9768a8f66 Mon Sep 17 00:00:00 2001
From: Rohith Surabattula <rohiths@microsoft.com>
Date: Mon, 17 May 2021 11:28:34 +0000
Subject: Defer close only when lease is enabled.

When smb2 lease parameter is disabled on server. Server grants
batch oplock instead of RHW lease by default on open, inode page cache
needs to be zapped immediatley upon close as cache is not valid.

Signed-off-by: Rohith Surabattula <rohiths@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifsglob.h | 1 +
 fs/cifs/file.c     | 1 +
 fs/cifs/smb2ops.c  | 2 ++
 3 files changed, 4 insertions(+)

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index ea90c53386b8..8488d7024462 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1417,6 +1417,7 @@ struct cifsInodeInfo {
 	struct inode vfs_inode;
 	struct list_head deferred_closes; /* list of deferred closes */
 	spinlock_t deferred_lock; /* protection on deferred list */
+	bool lease_granted; /* Flag to indicate whether lease or oplock is granted. */
 };
 
 static inline struct cifsInodeInfo *
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 304d9d3783c6..a1abd3da1d44 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -896,6 +896,7 @@ int cifs_close(struct inode *inode, struct file *file)
 		file->private_data = NULL;
 		dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
 		if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
+		    cinode->lease_granted &&
 		    dclose) {
 			if (test_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags))
 				inode->i_ctime = inode->i_mtime = current_time(inode);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index c693624a7267..21ef51d338e0 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -3983,6 +3983,7 @@ smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
 		      unsigned int epoch, bool *purge_cache)
 {
 	oplock &= 0xFF;
+	cinode->lease_granted = false;
 	if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE)
 		return;
 	if (oplock == SMB2_OPLOCK_LEVEL_BATCH) {
@@ -4009,6 +4010,7 @@ smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
 	unsigned int new_oplock = 0;
 
 	oplock &= 0xFF;
+	cinode->lease_granted = true;
 	if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE)
 		return;
 
-- 
cgit v1.2.3


From e2f5efd0f0e229bd110eab513e7c0331d61a4649 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Thu, 20 May 2021 13:29:19 +1000
Subject: powerpc: Fix early setup to make early_ioremap() work

The immediate problem is that after commit
0bd3f9e953bd ("powerpc/legacy_serial: Use early_ioremap()") the kernel
silently reboots on some systems.

The reason is that early_ioremap() returns broken addresses as it uses
slot_virt[] array which initialized with offsets from FIXADDR_TOP ==
IOREMAP_END+FIXADDR_SIZE == KERN_IO_END - FIXADDR_SIZ + FIXADDR_SIZE ==
__kernel_io_end which is 0 when early_ioremap_setup() is called.
__kernel_io_end is initialized little bit later in early_init_mmu().

This fixes the initialization by swapping early_ioremap_setup() and
early_init_mmu().

Fixes: 265c3491c4bc ("powerpc: Add support for GENERIC_EARLY_IOREMAP")
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
[mpe: Drop unrelated cleanup & cleanup change log]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210520032919.358935-1-aik@ozlabs.ru
---
 arch/powerpc/kernel/setup_64.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index b779d25761cf..e42b85e4f1aa 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -369,11 +369,11 @@ void __init early_setup(unsigned long dt_ptr)
 	apply_feature_fixups();
 	setup_feature_keys();
 
-	early_ioremap_setup();
-
 	/* Initialize the hash table or TLB handling */
 	early_init_mmu();
 
+	early_ioremap_setup();
+
 	/*
 	 * After firmware and early platform setup code has set things up,
 	 * we note the SPR values for configurable control/performance
-- 
cgit v1.2.3


From 7e008b02557ccece4d2c31fb0eaf6243cbc87121 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Mon, 17 May 2021 13:20:17 +0200
Subject: dma-buf: fix unintended pin/unpin warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

DMA-buf internal users call the pin/unpin functions without having a
dynamic attachment. Avoid the warning and backtrace in the logs.

Signed-off-by: Christian König <christian.koenig@amd.com>
Bugs: https://gitlab.freedesktop.org/drm/intel/-/issues/3481
Fixes: c545781e1c55 ("dma-buf: doc polish for pin/unpin")
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
CC: stable@kernel.org
Link: https://patchwork.freedesktop.org/patch/msgid/20210517115705.2141-1-christian.koenig@amd.com
---
 drivers/dma-buf/dma-buf.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index f264b70c383e..eadd1eaa2fb5 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -760,7 +760,7 @@ dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev,
 
 		if (dma_buf_is_dynamic(attach->dmabuf)) {
 			dma_resv_lock(attach->dmabuf->resv, NULL);
-			ret = dma_buf_pin(attach);
+			ret = dmabuf->ops->pin(attach);
 			if (ret)
 				goto err_unlock;
 		}
@@ -786,7 +786,7 @@ err_attach:
 
 err_unpin:
 	if (dma_buf_is_dynamic(attach->dmabuf))
-		dma_buf_unpin(attach);
+		dmabuf->ops->unpin(attach);
 
 err_unlock:
 	if (dma_buf_is_dynamic(attach->dmabuf))
@@ -843,7 +843,7 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
 		__unmap_dma_buf(attach, attach->sgt, attach->dir);
 
 		if (dma_buf_is_dynamic(attach->dmabuf)) {
-			dma_buf_unpin(attach);
+			dmabuf->ops->unpin(attach);
 			dma_resv_unlock(attach->dmabuf->resv);
 		}
 	}
@@ -956,7 +956,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
 	if (dma_buf_is_dynamic(attach->dmabuf)) {
 		dma_resv_assert_held(attach->dmabuf->resv);
 		if (!IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) {
-			r = dma_buf_pin(attach);
+			r = attach->dmabuf->ops->pin(attach);
 			if (r)
 				return ERR_PTR(r);
 		}
@@ -968,7 +968,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
 
 	if (IS_ERR(sg_table) && dma_buf_is_dynamic(attach->dmabuf) &&
 	     !IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY))
-		dma_buf_unpin(attach);
+		attach->dmabuf->ops->unpin(attach);
 
 	if (!IS_ERR(sg_table) && attach->dmabuf->ops->cache_sgt_mapping) {
 		attach->sgt = sg_table;
-- 
cgit v1.2.3


From e68671e9e1275dfdda333c3e83b6d28963af16b6 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 20 May 2021 11:32:28 +0200
Subject: platform/x86: touchscreen_dmi: Add info for the Chuwi Hi10 Pro
 (CWI529) tablet

Add touchscreen info for the Chuwi Hi10 Pro (CWI529) tablet. This includes
info for getting the firmware directly from the UEFI, so that the user does
not need to manually install the firmware in /lib/firmware/silead.

This change will make the touchscreen on these devices work OOTB,
without requiring any manual setup.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210520093228.7439-1-hdegoede@redhat.com
---
 drivers/platform/x86/touchscreen_dmi.c | 35 ++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c
index 875519c6c206..bde740d6120e 100644
--- a/drivers/platform/x86/touchscreen_dmi.c
+++ b/drivers/platform/x86/touchscreen_dmi.c
@@ -115,6 +115,32 @@ static const struct ts_dmi_data chuwi_hi10_plus_data = {
 	.properties     = chuwi_hi10_plus_props,
 };
 
+static const struct property_entry chuwi_hi10_pro_props[] = {
+	PROPERTY_ENTRY_U32("touchscreen-min-x", 8),
+	PROPERTY_ENTRY_U32("touchscreen-min-y", 8),
+	PROPERTY_ENTRY_U32("touchscreen-size-x", 1912),
+	PROPERTY_ENTRY_U32("touchscreen-size-y", 1272),
+	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
+	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-chuwi-hi10-pro.fw"),
+	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
+	PROPERTY_ENTRY_BOOL("silead,home-button"),
+	{ }
+};
+
+static const struct ts_dmi_data chuwi_hi10_pro_data = {
+	.embedded_fw = {
+		.name	= "silead/gsl1680-chuwi-hi10-pro.fw",
+		.prefix = { 0xf0, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00 },
+		.length	= 42504,
+		.sha256	= { 0xdb, 0x92, 0x68, 0xa8, 0xdb, 0x81, 0x31, 0x00,
+			    0x1f, 0x58, 0x89, 0xdb, 0x19, 0x1b, 0x15, 0x8c,
+			    0x05, 0x14, 0xf4, 0x95, 0xba, 0x15, 0x45, 0x98,
+			    0x42, 0xa3, 0xbb, 0x65, 0xe3, 0x30, 0xa5, 0x93 },
+	},
+	.acpi_name      = "MSSL1680:00",
+	.properties     = chuwi_hi10_pro_props,
+};
+
 static const struct property_entry chuwi_vi8_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-min-x", 4),
 	PROPERTY_ENTRY_U32("touchscreen-min-y", 6),
@@ -915,6 +941,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
 			DMI_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"),
 		},
 	},
+	{
+		/* Chuwi Hi10 Prus (CWI597) */
+		.driver_data = (void *)&chuwi_hi10_pro_data,
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "Hampoo"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Hi10 pro tablet"),
+			DMI_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"),
+		},
+	},
 	{
 		/* Chuwi Vi8 (CWI506) */
 		.driver_data = (void *)&chuwi_vi8_data,
-- 
cgit v1.2.3


From ba5ef6dc8a827a904794210a227cdb94828e8ae7 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 20 May 2021 13:21:20 +0100
Subject: io_uring: fortify tctx/io_wq cleanup

We don't want anyone poking into tctx->io_wq awhile it's being destroyed
by io_wq_put_and_exit(), and even though it shouldn't even happen, if
buggy would be preferable to get a NULL-deref instead of subtle delayed
failure or UAF.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/827b021de17926fd807610b3e53a5a5fa8530856.1621513214.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 89ec10471b30..5f82954004f6 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -9035,15 +9035,15 @@ static void io_uring_del_task_file(unsigned long index)
 
 static void io_uring_clean_tctx(struct io_uring_task *tctx)
 {
+	struct io_wq *wq = tctx->io_wq;
 	struct io_tctx_node *node;
 	unsigned long index;
 
+	tctx->io_wq = NULL;
 	xa_for_each(&tctx->xa, index, node)
 		io_uring_del_task_file(index);
-	if (tctx->io_wq) {
-		io_wq_put_and_exit(tctx->io_wq);
-		tctx->io_wq = NULL;
-	}
+	if (wq)
+		io_wq_put_and_exit(wq);
 }
 
 static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
-- 
cgit v1.2.3


From e380adfc213a13677993c0e35cb48f5a8e61ebb0 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Date: Wed, 19 May 2021 00:40:27 +0900
Subject: btrfs: zoned: pass start block to btrfs_use_zone_append

btrfs_use_zone_append only needs the passed in extent_map's block_start
member, so there's no need to pass in the full extent map.

This also enables the use of btrfs_use_zone_append in places where we only
have a start byte but no extent_map.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 2 +-
 fs/btrfs/inode.c     | 2 +-
 fs/btrfs/zoned.c     | 4 ++--
 fs/btrfs/zoned.h     | 5 ++---
 4 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 360d997c7226..d9f20ca3ac7d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3762,7 +3762,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 		/* Note that em_end from extent_map_end() is exclusive */
 		iosize = min(em_end, end + 1) - cur;
 
-		if (btrfs_use_zone_append(inode, em))
+		if (btrfs_use_zone_append(inode, em->block_start))
 			opf = REQ_OP_ZONE_APPEND;
 
 		free_extent_map(em);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 095e452f59f0..bb4ab408d670 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7796,7 +7796,7 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
 	iomap->bdev = fs_info->fs_devices->latest_bdev;
 	iomap->length = len;
 
-	if (write && btrfs_use_zone_append(BTRFS_I(inode), em))
+	if (write && btrfs_use_zone_append(BTRFS_I(inode), em->block_start))
 		iomap->flags |= IOMAP_F_ZONE_APPEND;
 
 	free_extent_map(em);
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 304ce64c70a4..1bb8ee97aae0 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1278,7 +1278,7 @@ void btrfs_free_redirty_list(struct btrfs_transaction *trans)
 	spin_unlock(&trans->releasing_ebs_lock);
 }
 
-bool btrfs_use_zone_append(struct btrfs_inode *inode, struct extent_map *em)
+bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start)
 {
 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	struct btrfs_block_group *cache;
@@ -1293,7 +1293,7 @@ bool btrfs_use_zone_append(struct btrfs_inode *inode, struct extent_map *em)
 	if (!is_data_inode(&inode->vfs_inode))
 		return false;
 
-	cache = btrfs_lookup_block_group(fs_info, em->block_start);
+	cache = btrfs_lookup_block_group(fs_info, start);
 	ASSERT(cache);
 	if (!cache)
 		return false;
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index 5e41a74a9cb2..e55d32595c2c 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -53,7 +53,7 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache);
 void btrfs_redirty_list_add(struct btrfs_transaction *trans,
 			    struct extent_buffer *eb);
 void btrfs_free_redirty_list(struct btrfs_transaction *trans);
-bool btrfs_use_zone_append(struct btrfs_inode *inode, struct extent_map *em);
+bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start);
 void btrfs_record_physical_zoned(struct inode *inode, u64 file_offset,
 				 struct bio *bio);
 void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered);
@@ -152,8 +152,7 @@ static inline void btrfs_redirty_list_add(struct btrfs_transaction *trans,
 					  struct extent_buffer *eb) { }
 static inline void btrfs_free_redirty_list(struct btrfs_transaction *trans) { }
 
-static inline bool btrfs_use_zone_append(struct btrfs_inode *inode,
-					 struct extent_map *em)
+static inline bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start)
 {
 	return false;
 }
-- 
cgit v1.2.3


From 764c7c9a464b68f7c6a5a9ec0b923176a05e8e8f Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Date: Wed, 19 May 2021 00:40:28 +0900
Subject: btrfs: zoned: fix parallel compressed writes

When multiple processes write data to the same block group on a
compressed zoned filesystem, the underlying device could report I/O
errors and data corruption is possible.

This happens because on a zoned file system, compressed data writes
where sent to the device via a REQ_OP_WRITE instead of a
REQ_OP_ZONE_APPEND operation. But with REQ_OP_WRITE and parallel
submission it cannot be guaranteed that the data is always submitted
aligned to the underlying zone's write pointer.

The change to using REQ_OP_ZONE_APPEND instead of REQ_OP_WRITE on a
zoned filesystem is non intrusive on a regular file system or when
submitting to a conventional zone on a zoned filesystem, as it is
guarded by btrfs_use_zone_append.

Reported-by: David Sterba <dsterba@suse.com>
Fixes: 9d294a685fbc ("btrfs: zoned: enable to mount ZONED incompat flag")
CC: stable@vger.kernel.org # 5.12.x: e380adfc213a13: btrfs: zoned: pass start block to btrfs_use_zone_append
CC: stable@vger.kernel.org # 5.12.x
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 42 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 17f93fd28f7e..91743a0b34c5 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -28,6 +28,7 @@
 #include "compression.h"
 #include "extent_io.h"
 #include "extent_map.h"
+#include "zoned.h"
 
 static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" };
 
@@ -349,6 +350,7 @@ static void end_compressed_bio_write(struct bio *bio)
 	 */
 	inode = cb->inode;
 	cb->compressed_pages[0]->mapping = cb->inode->i_mapping;
+	btrfs_record_physical_zoned(inode, cb->start, bio);
 	btrfs_writepage_endio_finish_ordered(cb->compressed_pages[0],
 			cb->start, cb->start + cb->len - 1,
 			bio->bi_status == BLK_STS_OK);
@@ -401,6 +403,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 	u64 first_byte = disk_start;
 	blk_status_t ret;
 	int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
+	const bool use_append = btrfs_use_zone_append(inode, disk_start);
+	const unsigned int bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE;
 
 	WARN_ON(!PAGE_ALIGNED(start));
 	cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
@@ -418,10 +422,31 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 	cb->nr_pages = nr_pages;
 
 	bio = btrfs_bio_alloc(first_byte);
-	bio->bi_opf = REQ_OP_WRITE | write_flags;
+	bio->bi_opf = bio_op | write_flags;
 	bio->bi_private = cb;
 	bio->bi_end_io = end_compressed_bio_write;
 
+	if (use_append) {
+		struct extent_map *em;
+		struct map_lookup *map;
+		struct block_device *bdev;
+
+		em = btrfs_get_chunk_map(fs_info, disk_start, PAGE_SIZE);
+		if (IS_ERR(em)) {
+			kfree(cb);
+			bio_put(bio);
+			return BLK_STS_NOTSUPP;
+		}
+
+		map = em->map_lookup;
+		/* We only support single profile for now */
+		ASSERT(map->num_stripes == 1);
+		bdev = map->stripes[0].dev->bdev;
+
+		bio_set_dev(bio, bdev);
+		free_extent_map(em);
+	}
+
 	if (blkcg_css) {
 		bio->bi_opf |= REQ_CGROUP_PUNT;
 		kthread_associate_blkcg(blkcg_css);
@@ -432,6 +457,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 	bytes_left = compressed_len;
 	for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
 		int submit = 0;
+		int len;
 
 		page = compressed_pages[pg_index];
 		page->mapping = inode->vfs_inode.i_mapping;
@@ -439,9 +465,13 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 			submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, bio,
 							  0);
 
+		if (pg_index == 0 && use_append)
+			len = bio_add_zone_append_page(bio, page, PAGE_SIZE, 0);
+		else
+			len = bio_add_page(bio, page, PAGE_SIZE, 0);
+
 		page->mapping = NULL;
-		if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
-		    PAGE_SIZE) {
+		if (submit || len < PAGE_SIZE) {
 			/*
 			 * inc the count before we submit the bio so
 			 * we know the end IO handler won't happen before
@@ -465,11 +495,15 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 			}
 
 			bio = btrfs_bio_alloc(first_byte);
-			bio->bi_opf = REQ_OP_WRITE | write_flags;
+			bio->bi_opf = bio_op | write_flags;
 			bio->bi_private = cb;
 			bio->bi_end_io = end_compressed_bio_write;
 			if (blkcg_css)
 				bio->bi_opf |= REQ_CGROUP_PUNT;
+			/*
+			 * Use bio_add_page() to ensure the bio has at least one
+			 * page.
+			 */
 			bio_add_page(bio, page, PAGE_SIZE, 0);
 		}
 		if (bytes_left < PAGE_SIZE) {
-- 
cgit v1.2.3


From 6c60ff048ca1e0739f39aa25996543c6e662a46c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 14 May 2021 15:18:41 +0200
Subject: block: prevent block device lookups at the beginning of del_gendisk

As an artifact of how gendisk lookup used to work in earlier kernels,
GENHD_FL_UP is only cleared very late in del_gendisk, and a global lock
is used to prevent opens from succeeding while del_gendisk is tearing
down the gendisk.  Switch to clearing the flag early and under bd_mutex
so that callers can use bd_mutex to stabilize the flag, which removes
the need for the global mutex.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20210514131842.1600568-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c         | 11 +----------
 fs/block_dev.c        | 15 +++++----------
 include/linux/genhd.h |  2 --
 3 files changed, 6 insertions(+), 22 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 39ca97b0edc6..9f8cb7beaad1 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -29,8 +29,6 @@
 
 static struct kobject *block_depr;
 
-DECLARE_RWSEM(bdev_lookup_sem);
-
 /* for extended dynamic devt allocation, currently only one major is used */
 #define NR_EXT_DEVT		(1 << MINORBITS)
 static DEFINE_IDA(ext_devt_ida);
@@ -609,13 +607,8 @@ void del_gendisk(struct gendisk *disk)
 	blk_integrity_del(disk);
 	disk_del_events(disk);
 
-	/*
-	 * Block lookups of the disk until all bdevs are unhashed and the
-	 * disk is marked as dead (GENHD_FL_UP cleared).
-	 */
-	down_write(&bdev_lookup_sem);
-
 	mutex_lock(&disk->part0->bd_mutex);
+	disk->flags &= ~GENHD_FL_UP;
 	blk_drop_partitions(disk);
 	mutex_unlock(&disk->part0->bd_mutex);
 
@@ -629,8 +622,6 @@ void del_gendisk(struct gendisk *disk)
 	remove_inode_hash(disk->part0->bd_inode);
 
 	set_capacity(disk, 0);
-	disk->flags &= ~GENHD_FL_UP;
-	up_write(&bdev_lookup_sem);
 
 	if (!(disk->flags & GENHD_FL_HIDDEN)) {
 		sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
diff --git a/fs/block_dev.c b/fs/block_dev.c
index eb265d72fce8..580bae995b87 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1298,6 +1298,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode)
 	struct gendisk *disk = bdev->bd_disk;
 	int ret = 0;
 
+	if (!(disk->flags & GENHD_FL_UP))
+		return -ENXIO;
+
 	if (!bdev->bd_openers) {
 		if (!bdev_is_partition(bdev)) {
 			ret = 0;
@@ -1332,8 +1335,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode)
 			whole->bd_part_count++;
 			mutex_unlock(&whole->bd_mutex);
 
-			if (!(disk->flags & GENHD_FL_UP) ||
-			    !bdev_nr_sectors(bdev)) {
+			if (!bdev_nr_sectors(bdev)) {
 				__blkdev_put(whole, mode, 1);
 				bdput(whole);
 				return -ENXIO;
@@ -1364,16 +1366,12 @@ struct block_device *blkdev_get_no_open(dev_t dev)
 	struct block_device *bdev;
 	struct gendisk *disk;
 
-	down_read(&bdev_lookup_sem);
 	bdev = bdget(dev);
 	if (!bdev) {
-		up_read(&bdev_lookup_sem);
 		blk_request_module(dev);
-		down_read(&bdev_lookup_sem);
-
 		bdev = bdget(dev);
 		if (!bdev)
-			goto unlock;
+			return NULL;
 	}
 
 	disk = bdev->bd_disk;
@@ -1383,14 +1381,11 @@ struct block_device *blkdev_get_no_open(dev_t dev)
 		goto put_disk;
 	if (!try_module_get(bdev->bd_disk->fops->owner))
 		goto put_disk;
-	up_read(&bdev_lookup_sem);
 	return bdev;
 put_disk:
 	put_disk(disk);
 bdput:
 	bdput(bdev);
-unlock:
-	up_read(&bdev_lookup_sem);
 	return NULL;
 }
 
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 7e9660ea967d..6fc26f7bdf71 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -306,8 +306,6 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev,
 }
 #endif /* CONFIG_SYSFS */
 
-extern struct rw_semaphore bdev_lookup_sem;
-
 dev_t blk_lookup_devt(const char *name, int partno);
 void blk_request_module(dev_t devt);
 #ifdef CONFIG_BLOCK
-- 
cgit v1.2.3


From bc6a385132601c29a6da1dbf8148c0d3c9ad36dc Mon Sep 17 00:00:00 2001
From: Gulam Mohamed <gulam.mohamed@oracle.com>
Date: Fri, 14 May 2021 15:18:42 +0200
Subject: block: fix a race between del_gendisk and BLKRRPART

When BLKRRPART is called concurrently with del_gendisk, the partitions
rescan can create a stale partition that will never be be cleaned up.

Fix this by checking the the disk is up before rescanning partitions
while under bd_mutex.

Signed-off-by: Gulam Mohamed <gulam.mohamed@oracle.com>
[hch: split from a larger patch]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20210514131842.1600568-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/block_dev.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 580bae995b87..4494411fa4d3 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1244,6 +1244,9 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate)
 
 	lockdep_assert_held(&bdev->bd_mutex);
 
+	if (!(disk->flags & GENHD_FL_UP))
+		return -ENXIO;
+
 rescan:
 	if (bdev->bd_part_count)
 		return -EBUSY;
-- 
cgit v1.2.3


From 5665bc35c1ed917ac8fd06cb651317bb47a65b10 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 20 May 2021 21:19:30 +1000
Subject: powerpc/64s/syscall: Use pt_regs.trap to distinguish syscall ABI
 difference between sc and scv syscalls

The sc and scv 0 system calls have different ABI conventions, and
ptracers need to know which system call type is being used if they want
to look at the syscall registers.

Document that pt_regs.trap can be used for this, and fix one in-tree user
to work with scv 0 syscalls.

Fixes: 7fa95f9adaee ("powerpc/64s: system call support for scv/rfscv instructions")
Cc: stable@vger.kernel.org # v5.9+
Reported-by: "Dmitry V. Levin" <ldv@altlinux.org>
Suggested-by: "Dmitry V. Levin" <ldv@altlinux.org>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210520111931.2597127-1-npiggin@gmail.com
---
 Documentation/powerpc/syscall64-abi.rst       | 10 ++++++++++
 tools/testing/selftests/seccomp/seccomp_bpf.c | 27 ++++++++++++++++++---------
 2 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/Documentation/powerpc/syscall64-abi.rst b/Documentation/powerpc/syscall64-abi.rst
index dabee3729e5a..56490c4c0c07 100644
--- a/Documentation/powerpc/syscall64-abi.rst
+++ b/Documentation/powerpc/syscall64-abi.rst
@@ -109,6 +109,16 @@ auxiliary vector.
 
 scv 0 syscalls will always behave as PPC_FEATURE2_HTM_NOSC.
 
+ptrace
+------
+When ptracing system calls (PTRACE_SYSCALL), the pt_regs.trap value contains
+the system call type that can be used to distinguish between sc and scv 0
+system calls, and the different register conventions can be accounted for.
+
+If the value of (pt_regs.trap & 0xfff0) is 0xc00 then the system call was
+performed with the sc instruction, if it is 0x3000 then the system call was
+performed with the scv 0 instruction.
+
 vsyscall
 ========
 
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 98c3b647f54d..e3d5c77a8612 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -1753,16 +1753,25 @@ TEST_F(TRACE_poke, getpid_runs_normally)
 # define SYSCALL_RET_SET(_regs, _val)				\
 	do {							\
 		typeof(_val) _result = (_val);			\
-		/*						\
-		 * A syscall error is signaled by CR0 SO bit	\
-		 * and the code is stored as a positive value.	\
-		 */						\
-		if (_result < 0) {				\
-			SYSCALL_RET(_regs) = -_result;		\
-			(_regs).ccr |= 0x10000000;		\
-		} else {					\
+		if ((_regs.trap & 0xfff0) == 0x3000) {		\
+			/*					\
+			 * scv 0 system call uses -ve result	\
+			 * for error, so no need to adjust.	\
+			 */					\
 			SYSCALL_RET(_regs) = _result;		\
-			(_regs).ccr &= ~0x10000000;		\
+		} else {					\
+			/*					\
+			 * A syscall error is signaled by the	\
+			 * CR0 SO bit and the code is stored as	\
+			 * a positive value.			\
+			 */					\
+			if (_result < 0) {			\
+				SYSCALL_RET(_regs) = -_result;	\
+				(_regs).ccr |= 0x10000000;	\
+			} else {				\
+				SYSCALL_RET(_regs) = _result;	\
+				(_regs).ccr &= ~0x10000000;	\
+			}					\
 		}						\
 	} while (0)
 # define SYSCALL_RET_SET_ON_PTRACE_EXIT
-- 
cgit v1.2.3


From d72500f992849d31ebae8f821a023660ddd0dcc2 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 20 May 2021 21:19:31 +1000
Subject: powerpc/64s/syscall: Fix ptrace syscall info with scv syscalls

The scv implementation missed updating syscall return value and error
value get/set functions to deal with the changed register ABI. This
broke ptrace PTRACE_GET_SYSCALL_INFO as well as some kernel auditing
and tracing functions.

Fix. tools/testing/selftests/ptrace/get_syscall_info now passes when
scv is used.

Fixes: 7fa95f9adaee ("powerpc/64s: system call support for scv/rfscv instructions")
Cc: stable@vger.kernel.org # v5.9+
Reported-by: "Dmitry V. Levin" <ldv@altlinux.org>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Dmitry V. Levin <ldv@altlinux.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210520111931.2597127-2-npiggin@gmail.com
---
 arch/powerpc/include/asm/ptrace.h  | 45 ++++++++++++++++++++++----------------
 arch/powerpc/include/asm/syscall.h | 42 +++++++++++++++++++++--------------
 2 files changed, 52 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 9c9ab2746168..b476a685f066 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -19,6 +19,7 @@
 #ifndef _ASM_POWERPC_PTRACE_H
 #define _ASM_POWERPC_PTRACE_H
 
+#include <linux/err.h>
 #include <uapi/asm/ptrace.h>
 #include <asm/asm-const.h>
 
@@ -152,25 +153,6 @@ extern unsigned long profile_pc(struct pt_regs *regs);
 long do_syscall_trace_enter(struct pt_regs *regs);
 void do_syscall_trace_leave(struct pt_regs *regs);
 
-#define kernel_stack_pointer(regs) ((regs)->gpr[1])
-static inline int is_syscall_success(struct pt_regs *regs)
-{
-	return !(regs->ccr & 0x10000000);
-}
-
-static inline long regs_return_value(struct pt_regs *regs)
-{
-	if (is_syscall_success(regs))
-		return regs->gpr[3];
-	else
-		return -regs->gpr[3];
-}
-
-static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
-{
-	regs->gpr[3] = rc;
-}
-
 #ifdef __powerpc64__
 #define user_mode(regs) ((((regs)->msr) >> MSR_PR_LG) & 0x1)
 #else
@@ -235,6 +217,31 @@ static __always_inline void set_trap_norestart(struct pt_regs *regs)
 	regs->trap |= 0x1;
 }
 
+#define kernel_stack_pointer(regs) ((regs)->gpr[1])
+static inline int is_syscall_success(struct pt_regs *regs)
+{
+	if (trap_is_scv(regs))
+		return !IS_ERR_VALUE((unsigned long)regs->gpr[3]);
+	else
+		return !(regs->ccr & 0x10000000);
+}
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	if (trap_is_scv(regs))
+		return regs->gpr[3];
+
+	if (is_syscall_success(regs))
+		return regs->gpr[3];
+	else
+		return -regs->gpr[3];
+}
+
+static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
+{
+	regs->gpr[3] = rc;
+}
+
 #define arch_has_single_step()	(1)
 #define arch_has_block_step()	(true)
 #define ARCH_HAS_USER_SINGLE_STEP_REPORT
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index fd1b518eed17..ba0f88f3a30d 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -41,11 +41,17 @@ static inline void syscall_rollback(struct task_struct *task,
 static inline long syscall_get_error(struct task_struct *task,
 				     struct pt_regs *regs)
 {
-	/*
-	 * If the system call failed,
-	 * regs->gpr[3] contains a positive ERRORCODE.
-	 */
-	return (regs->ccr & 0x10000000UL) ? -regs->gpr[3] : 0;
+	if (trap_is_scv(regs)) {
+		unsigned long error = regs->gpr[3];
+
+		return IS_ERR_VALUE(error) ? error : 0;
+	} else {
+		/*
+		 * If the system call failed,
+		 * regs->gpr[3] contains a positive ERRORCODE.
+		 */
+		return (regs->ccr & 0x10000000UL) ? -regs->gpr[3] : 0;
+	}
 }
 
 static inline long syscall_get_return_value(struct task_struct *task,
@@ -58,18 +64,22 @@ static inline void syscall_set_return_value(struct task_struct *task,
 					    struct pt_regs *regs,
 					    int error, long val)
 {
-	/*
-	 * In the general case it's not obvious that we must deal with CCR
-	 * here, as the syscall exit path will also do that for us. However
-	 * there are some places, eg. the signal code, which check ccr to
-	 * decide if the value in r3 is actually an error.
-	 */
-	if (error) {
-		regs->ccr |= 0x10000000L;
-		regs->gpr[3] = error;
+	if (trap_is_scv(regs)) {
+		regs->gpr[3] = (long) error ?: val;
 	} else {
-		regs->ccr &= ~0x10000000L;
-		regs->gpr[3] = val;
+		/*
+		 * In the general case it's not obvious that we must deal with
+		 * CCR here, as the syscall exit path will also do that for us.
+		 * However there are some places, eg. the signal code, which
+		 * check ccr to decide if the value in r3 is actually an error.
+		 */
+		if (error) {
+			regs->ccr |= 0x10000000L;
+			regs->gpr[3] = error;
+		} else {
+			regs->ccr &= ~0x10000000L;
+			regs->gpr[3] = val;
+		}
 	}
 }
 
-- 
cgit v1.2.3


From 676a659b60afb13166371580f3f6f434e9ba6f21 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Sun, 9 May 2021 16:22:55 -0700
Subject: xfs: retry allocations when locality-based search fails

If a realtime allocation fails because we can't find a sufficiently
large free extent satisfying locality rules, relax the locality rules
and try again.  This reduces the occurrence of short writes to realtime
files when the write size is large and the free space is fragmented.

This was originally discovered by running generic/186 with the realtime
reflink patchset and a 128k cow extent size hint, but the short write
symptoms can manifest with a 128k extent size hint and no reflink, so
apply the fix now.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
---
 fs/xfs/xfs_bmap_util.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index c9381bf4f04b..0936f3a96fe6 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -84,6 +84,7 @@ xfs_bmap_rtalloc(
 	xfs_extlen_t		minlen = mp->m_sb.sb_rextsize;
 	xfs_extlen_t		raminlen;
 	bool			rtlocked = false;
+	bool			ignore_locality = false;
 	int			error;
 
 	align = xfs_get_extsz_hint(ap->ip);
@@ -158,7 +159,10 @@ retry:
 	/*
 	 * Realtime allocation, done through xfs_rtallocate_extent.
 	 */
-	do_div(ap->blkno, mp->m_sb.sb_rextsize);
+	if (ignore_locality)
+		ap->blkno = 0;
+	else
+		do_div(ap->blkno, mp->m_sb.sb_rextsize);
 	rtb = ap->blkno;
 	ap->length = ralen;
 	raminlen = max_t(xfs_extlen_t, 1, minlen / mp->m_sb.sb_rextsize);
@@ -197,6 +201,15 @@ retry:
 		goto retry;
 	}
 
+	if (!ignore_locality && ap->blkno != 0) {
+		/*
+		 * If we can't allocate near a specific rt extent, try again
+		 * without locality criteria.
+		 */
+		ignore_locality = true;
+		goto retry;
+	}
+
 	ap->blkno = NULLFSBLOCK;
 	ap->length = 0;
 	return 0;
-- 
cgit v1.2.3


From 16c9de54dc868c121918f2ae91e46330f919049f Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Wed, 12 May 2021 16:41:13 -0700
Subject: xfs: fix deadlock retry tracepoint arguments

sc->ip is the inode that's being scrubbed, which means that it's not set
for scrub types that don't involve inodes.  If one of those scrubbers
(e.g. inode btrees) returns EDEADLOCK, we'll trip over the null pointer.
Fix that by reporting either the file being examined or the file that
was used to call scrub.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/scrub/common.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index aa874607618a..be38c960da85 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -74,7 +74,9 @@ __xchk_process_error(
 		return true;
 	case -EDEADLOCK:
 		/* Used to restart an op with deadlock avoidance. */
-		trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
+		trace_xchk_deadlock_retry(
+				sc->ip ? sc->ip : XFS_I(file_inode(sc->file)),
+				sc->sm, *error);
 		break;
 	case -EFSBADCRC:
 	case -EFSCORRUPTED:
-- 
cgit v1.2.3


From e3c2b047475b52739bcf178a9e95176c42bbcf8f Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Wed, 12 May 2021 16:43:10 -0700
Subject: xfs: restore old ioctl definitions

These ioctl definitions in xfs_fs.h are part of the userspace ABI and
were mistakenly removed during the 5.13 merge window.

Fixes: 9fefd5db08ce ("xfs: convert to fileattr")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_fs.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index a83bdd0c47a8..bde2b4c64dbe 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -770,6 +770,8 @@ struct xfs_scrub_metadata {
 /*
  * ioctl commands that are used by Linux filesystems
  */
+#define XFS_IOC_GETXFLAGS	FS_IOC_GETFLAGS
+#define XFS_IOC_SETXFLAGS	FS_IOC_SETFLAGS
 #define XFS_IOC_GETVERSION	FS_IOC_GETVERSION
 
 /*
@@ -780,6 +782,8 @@ struct xfs_scrub_metadata {
 #define XFS_IOC_ALLOCSP		_IOW ('X', 10, struct xfs_flock64)
 #define XFS_IOC_FREESP		_IOW ('X', 11, struct xfs_flock64)
 #define XFS_IOC_DIOINFO		_IOR ('X', 30, struct dioattr)
+#define XFS_IOC_FSGETXATTR	FS_IOC_FSGETXATTR
+#define XFS_IOC_FSSETXATTR	FS_IOC_FSSETXATTR
 #define XFS_IOC_ALLOCSP64	_IOW ('X', 36, struct xfs_flock64)
 #define XFS_IOC_FREESP64	_IOW ('X', 37, struct xfs_flock64)
 #define XFS_IOC_GETBMAP		_IOWR('X', 38, struct getbmap)
-- 
cgit v1.2.3


From 45e1ba40837ac2f6f4d4716bddb8d44bd7e4a251 Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeelb@google.com>
Date: Wed, 12 May 2021 13:19:46 -0700
Subject: cgroup: disable controllers at parse time
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch effectively reverts the commit a3e72739b7a7 ("cgroup: fix
too early usage of static_branch_disable()"). The commit 6041186a3258
("init: initialize jump labels before command line option parsing") has
moved the jump_label_init() before parse_args() which has made the
commit a3e72739b7a7 unnecessary. On the other hand there are
consequences of disabling the controllers later as there are subsystems
doing the controller checks for different decisions. One such incident
is reported [1] regarding the memory controller and its impact on memory
reclaim code.

[1] https://lore.kernel.org/linux-mm/921e53f3-4b13-aab8-4a9e-e83ff15371e4@nec.com

Signed-off-by: Shakeel Butt <shakeelb@google.com>
Reported-by: NOMURA JUNICHI(野村　淳一) <junichi.nomura@nec.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Tested-by: Jun'ichi Nomura <junichi.nomura@nec.com>
---
 kernel/cgroup/cgroup.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index e049edd66776..e7a9a2998245 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -5634,8 +5634,6 @@ int __init cgroup_init_early(void)
 	return 0;
 }
 
-static u16 cgroup_disable_mask __initdata;
-
 /**
  * cgroup_init - cgroup initialization
  *
@@ -5694,12 +5692,8 @@ int __init cgroup_init(void)
 		 * disabled flag and cftype registration needs kmalloc,
 		 * both of which aren't available during early_init.
 		 */
-		if (cgroup_disable_mask & (1 << ssid)) {
-			static_branch_disable(cgroup_subsys_enabled_key[ssid]);
-			printk(KERN_INFO "Disabling %s control group subsystem\n",
-			       ss->name);
+		if (!cgroup_ssid_enabled(ssid))
 			continue;
-		}
 
 		if (cgroup1_ssid_disabled(ssid))
 			printk(KERN_INFO "Disabling %s control group subsystem in v1 mounts\n",
@@ -6214,7 +6208,10 @@ static int __init cgroup_disable(char *str)
 			if (strcmp(token, ss->name) &&
 			    strcmp(token, ss->legacy_name))
 				continue;
-			cgroup_disable_mask |= 1 << i;
+
+			static_branch_disable(cgroup_subsys_enabled_key[i]);
+			pr_info("Disabling %s control group subsystem\n",
+				ss->name);
 		}
 	}
 	return 1;
-- 
cgit v1.2.3


From 940d71c6462e8151c78f28e4919aa8882ff2054e Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <senozhatsky@chromium.org>
Date: Thu, 20 May 2021 19:14:22 +0900
Subject: wq: handle VM suspension in stall detection

If VCPU is suspended (VM suspend) in wq_watchdog_timer_fn() then
once this VCPU resumes it will see the new jiffies value, while it
may take a while before IRQ detects PVCLOCK_GUEST_STOPPED on this
VCPU and updates all the watchdogs via pvclock_touch_watchdogs().
There is a small chance of misreported WQ stalls in the meantime,
because new jiffies is time_after() old 'ts + thresh'.

wq_watchdog_timer_fn()
{
	for_each_pool(pool, pi) {
		if (time_after(jiffies, ts + thresh)) {
			pr_emerg("BUG: workqueue lockup - pool");
		}
	}
}

Save jiffies at the beginning of this function and use that value
for stall detection. If VM gets suspended then we continue using
"old" jiffies value and old WQ touch timestamps. If IRQ at some
point restarts the stall detection cycle (pvclock_touch_watchdogs())
then old jiffies will always be before new 'ts + thresh'.

Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index b19d759e55a5..50142fc08902 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -50,6 +50,7 @@
 #include <linux/uaccess.h>
 #include <linux/sched/isolation.h>
 #include <linux/nmi.h>
+#include <linux/kvm_para.h>
 
 #include "workqueue_internal.h"
 
@@ -5772,6 +5773,7 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
 {
 	unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
 	bool lockup_detected = false;
+	unsigned long now = jiffies;
 	struct worker_pool *pool;
 	int pi;
 
@@ -5786,6 +5788,12 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
 		if (list_empty(&pool->worklist))
 			continue;
 
+		/*
+		 * If a virtual machine is stopped by the host it can look to
+		 * the watchdog like a stall.
+		 */
+		kvm_check_and_clear_guest_paused();
+
 		/* get the latest of pool and touched timestamps */
 		if (pool->cpu >= 0)
 			touched = READ_ONCE(per_cpu(wq_watchdog_touched_cpu, pool->cpu));
@@ -5799,12 +5807,12 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
 			ts = touched;
 
 		/* did we stall? */
-		if (time_after(jiffies, ts + thresh)) {
+		if (time_after(now, ts + thresh)) {
 			lockup_detected = true;
 			pr_emerg("BUG: workqueue lockup - pool");
 			pr_cont_pool_info(pool);
 			pr_cont(" stuck for %us!\n",
-				jiffies_to_msecs(jiffies - pool_ts) / 1000);
+				jiffies_to_msecs(now - pool_ts) / 1000);
 		}
 	}
 
-- 
cgit v1.2.3


From 9687c85dfbf84a6a37522626b4d5c5191a695e6c Mon Sep 17 00:00:00 2001
From: Rohith Surabattula <rohiths@microsoft.com>
Date: Thu, 20 May 2021 16:45:01 +0000
Subject: Fix KASAN identified use-after-free issue.

[  612.157429] ==================================================================
[  612.158275] BUG: KASAN: use-after-free in process_one_work+0x90/0x9b0
[  612.158801] Read of size 8 at addr ffff88810a31ca60 by task kworker/2:9/2382

[  612.159611] CPU: 2 PID: 2382 Comm: kworker/2:9 Tainted: G
OE     5.13.0-rc2+ #98
[  612.159623] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS 1.14.0-1.fc33 04/01/2014
[  612.159640] Workqueue:  0x0 (deferredclose)
[  612.159669] Call Trace:
[  612.159685]  dump_stack+0xbb/0x107
[  612.159711]  print_address_description.constprop.0+0x18/0x140
[  612.159733]  ? process_one_work+0x90/0x9b0
[  612.159743]  ? process_one_work+0x90/0x9b0
[  612.159754]  kasan_report.cold+0x7c/0xd8
[  612.159778]  ? lock_is_held_type+0x80/0x130
[  612.159789]  ? process_one_work+0x90/0x9b0
[  612.159812]  kasan_check_range+0x145/0x1a0
[  612.159834]  process_one_work+0x90/0x9b0
[  612.159877]  ? pwq_dec_nr_in_flight+0x110/0x110
[  612.159914]  ? spin_bug+0x90/0x90
[  612.159967]  worker_thread+0x3b6/0x6c0
[  612.160023]  ? process_one_work+0x9b0/0x9b0
[  612.160038]  kthread+0x1dc/0x200
[  612.160051]  ? kthread_create_worker_on_cpu+0xd0/0xd0
[  612.160092]  ret_from_fork+0x1f/0x30

[  612.160399] Allocated by task 2358:
[  612.160757]  kasan_save_stack+0x1b/0x40
[  612.160768]  __kasan_kmalloc+0x9b/0xd0
[  612.160778]  cifs_new_fileinfo+0xb0/0x960 [cifs]
[  612.161170]  cifs_open+0xadf/0xf20 [cifs]
[  612.161421]  do_dentry_open+0x2aa/0x6b0
[  612.161432]  path_openat+0xbd9/0xfa0
[  612.161441]  do_filp_open+0x11d/0x230
[  612.161450]  do_sys_openat2+0x115/0x240
[  612.161460]  __x64_sys_openat+0xce/0x140

When mod_delayed_work is called to modify the delay of pending work,
it might return false and queue a new work when pending work is
already scheduled or when try to grab pending work failed.

So, Increase the reference count when new work is scheduled to
avoid use-after-free.

Signed-off-by: Rohith Surabattula <rohiths@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/file.c | 20 +++++++++++++-------
 fs/cifs/misc.c | 12 ++++++++++--
 2 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index a1abd3da1d44..379a427f3c2f 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -874,10 +874,6 @@ void smb2_deferred_work_close(struct work_struct *work)
 			struct cifsFileInfo, deferred.work);
 
 	spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
-	if (!cfile->deferred_close_scheduled) {
-		spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
-		return;
-	}
 	cifs_del_deferred_close(cfile);
 	cfile->deferred_close_scheduled = false;
 	spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
@@ -904,8 +900,13 @@ int cifs_close(struct inode *inode, struct file *file)
 			cifs_add_deferred_close(cfile, dclose);
 			if (cfile->deferred_close_scheduled &&
 			    delayed_work_pending(&cfile->deferred)) {
-				mod_delayed_work(deferredclose_wq,
-						&cfile->deferred, cifs_sb->ctx->acregmax);
+				/*
+				 * If there is no pending work, mod_delayed_work queues new work.
+				 * So, Increase the ref count to avoid use-after-free.
+				 */
+				if (!mod_delayed_work(deferredclose_wq,
+						&cfile->deferred, cifs_sb->ctx->acregmax))
+					cifsFileInfo_get(cfile);
 			} else {
 				/* Deferred close for files */
 				queue_delayed_work(deferredclose_wq,
@@ -4879,7 +4880,12 @@ oplock_break_ack:
 	if (is_deferred &&
 	    cfile->deferred_close_scheduled &&
 	    delayed_work_pending(&cfile->deferred)) {
-		mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
+		/*
+		 * If there is no pending work, mod_delayed_work queues new work.
+		 * So, Increase the ref count to avoid use-after-free.
+		 */
+		if (!mod_delayed_work(deferredclose_wq, &cfile->deferred, 0))
+			cifsFileInfo_get(cfile);
 	}
 	spin_unlock(&CIFS_I(inode)->deferred_lock);
 	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 34f2a7e80c58..7207a63819cb 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -674,6 +674,8 @@ cifs_add_pending_open(struct cifs_fid *fid, struct tcon_link *tlink,
 
 /*
  * Critical section which runs after acquiring deferred_lock.
+ * As there is no reference count on cifs_deferred_close, pdclose
+ * should not be used outside deferred_lock.
  */
 bool
 cifs_is_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close **pdclose)
@@ -752,8 +754,14 @@ cifs_close_all_deferred_files(struct cifs_tcon *tcon)
 	spin_lock(&tcon->open_file_lock);
 	list_for_each(tmp, &tcon->openFileList) {
 		cfile = list_entry(tmp, struct cifsFileInfo, tlist);
-		if (delayed_work_pending(&cfile->deferred))
-			mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
+		if (delayed_work_pending(&cfile->deferred)) {
+			/*
+			 * If there is no pending work, mod_delayed_work queues new work.
+			 * So, Increase the ref count to avoid use-after-free.
+			 */
+			if (!mod_delayed_work(deferredclose_wq, &cfile->deferred, 0))
+				cifsFileInfo_get(cfile);
+		}
 	}
 	spin_unlock(&tcon->open_file_lock);
 }
-- 
cgit v1.2.3


From 8570e75a55430844a8e85e3458e5701556334ffd Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 19 May 2021 21:33:33 +0000
Subject: selftests: Add .gitignore for nci test suite

Building the nci test suite produces a binary, nci_dev, that git then
tries to track. Add a .gitignore file to tell git to ignore this binary.

Signed-off-by: David Matlack <dmatlack@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/nci/.gitignore | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 tools/testing/selftests/nci/.gitignore

diff --git a/tools/testing/selftests/nci/.gitignore b/tools/testing/selftests/nci/.gitignore
new file mode 100644
index 000000000000..448eeb4590fc
--- /dev/null
+++ b/tools/testing/selftests/nci/.gitignore
@@ -0,0 +1 @@
+/nci_dev
-- 
cgit v1.2.3


From 6bdacdb48e94ff26c03c6eeeef48c03c5e2f7dd4 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 12 May 2021 20:57:14 +0200
Subject: bpf: Fix BPF_JIT kconfig symbol dependency

Randy reported a randconfig build error recently on i386:

  ld: arch/x86/net/bpf_jit_comp32.o: in function `do_jit':
  bpf_jit_comp32.c:(.text+0x28c9): undefined reference to `__bpf_call_base'
  ld: arch/x86/net/bpf_jit_comp32.o: in function `bpf_int_jit_compile':
  bpf_jit_comp32.c:(.text+0x3694): undefined reference to `bpf_jit_blind_constants'
  ld: bpf_jit_comp32.c:(.text+0x3719): undefined reference to `bpf_jit_binary_free'
  ld: bpf_jit_comp32.c:(.text+0x3745): undefined reference to `bpf_jit_binary_alloc'
  ld: bpf_jit_comp32.c:(.text+0x37d3): undefined reference to `bpf_jit_prog_release_other'
  [...]

The cause was that b24abcff918a ("bpf, kconfig: Add consolidated menu entry for
bpf with core options") moved BPF_JIT from net/Kconfig into kernel/bpf/Kconfig
and previously BPF_JIT was guarded by a 'if NET'. However, there is no actual
dependency on NET, it's just that menuconfig NET selects BPF. And the latter in
turn causes kernel/bpf/core.o to be built which contains above symbols. Randy's
randconfig didn't have NET set, and BPF wasn't either, but BPF_JIT otoh was.
Detangle this by making BPF_JIT depend on BPF instead. arm64 was the only arch
that pulled in its JIT in net/ via obj-$(CONFIG_NET), all others unconditionally
pull this dir in via obj-y. Do the same since CONFIG_NET guard there is really
useless as we compiled the JIT via obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o anyway.

Fixes: b24abcff918a ("bpf, kconfig: Add consolidated menu entry for bpf with core options")
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
---
 arch/arm64/Kbuild  | 3 +--
 kernel/bpf/Kconfig | 1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/Kbuild b/arch/arm64/Kbuild
index d6465823b281..7b393cfec071 100644
--- a/arch/arm64/Kbuild
+++ b/arch/arm64/Kbuild
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-y			+= kernel/ mm/
-obj-$(CONFIG_NET)	+= net/
+obj-y			+= kernel/ mm/ net/
 obj-$(CONFIG_KVM)	+= kvm/
 obj-$(CONFIG_XEN)	+= xen/
 obj-$(CONFIG_CRYPTO)	+= crypto/
diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
index 26b591e23f16..bd04f4a44c01 100644
--- a/kernel/bpf/Kconfig
+++ b/kernel/bpf/Kconfig
@@ -37,6 +37,7 @@ config BPF_SYSCALL
 
 config BPF_JIT
 	bool "Enable BPF Just In Time compiler"
+	depends on BPF
 	depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT
 	depends on MODULES
 	help
-- 
cgit v1.2.3


From 8f1634b82189e715b0f82f16ce54fab43cfedd8a Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Fri, 14 May 2021 10:05:28 -0700
Subject: selftests/bpf: Convert static to global in tc_redirect progs

Both IFINDEX_SRC and IFINDEX_DST are set from the userspace
and it won't work once bpf merges with bpf-next.

Fixes: 096eccdef0b3 ("selftests/bpf: Rewrite test_tc_redirect.sh as prog_tests/tc_redirect.c")
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210514170528.3750250-1-sdf@google.com
---
 tools/testing/selftests/bpf/progs/test_tc_neigh.c | 4 ++--
 tools/testing/selftests/bpf/progs/test_tc_peer.c  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
index 90f64a85998f..0c93d326a663 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_neigh.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
@@ -33,8 +33,8 @@
 				 a.s6_addr32[3] == b.s6_addr32[3])
 #endif
 
-static volatile const __u32 IFINDEX_SRC;
-static volatile const __u32 IFINDEX_DST;
+volatile const __u32 IFINDEX_SRC;
+volatile const __u32 IFINDEX_DST;
 
 static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb,
 					    __be32 addr)
diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c
index 72c72950c3bb..ef264bced0e6 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_peer.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c
@@ -8,8 +8,8 @@
 
 #include <bpf/bpf_helpers.h>
 
-static volatile const __u32 IFINDEX_SRC;
-static volatile const __u32 IFINDEX_DST;
+volatile const __u32 IFINDEX_SRC;
+volatile const __u32 IFINDEX_DST;
 
 SEC("classifier/chk_egress")
 int tc_chk(struct __sk_buff *skb)
-- 
cgit v1.2.3


From 704e2beba23c45eaa056b1c03b5e1fb221e03f80 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 14 May 2021 11:07:26 -0700
Subject: selftests/bpf: Test ringbuf mmap read-only and read-write
 restrictions

Extend ringbuf selftest to validate read/write and read-only restrictions on
memory mapping consumer/producer/data pages. Ensure no "escalations" from
PROT_READ to PROT_WRITE/PROT_EXEC is allowed. And test that mremap() fails to
expand mmap()'ed area.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210514180726.843157-1-andrii@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/ringbuf.c | 49 +++++++++++++++++++++++-
 1 file changed, 48 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index de78617f6550..f9a8ae331963 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -86,8 +86,9 @@ void test_ringbuf(void)
 	const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample);
 	pthread_t thread;
 	long bg_ret = -1;
-	int err, cnt;
+	int err, cnt, rb_fd;
 	int page_size = getpagesize();
+	void *mmap_ptr, *tmp_ptr;
 
 	skel = test_ringbuf__open();
 	if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
@@ -101,6 +102,52 @@ void test_ringbuf(void)
 	if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
 		goto cleanup;
 
+	rb_fd = bpf_map__fd(skel->maps.ringbuf);
+	/* good read/write cons_pos */
+	mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0);
+	ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos");
+	tmp_ptr = mremap(mmap_ptr, page_size, 2 * page_size, MREMAP_MAYMOVE);
+	if (!ASSERT_ERR_PTR(tmp_ptr, "rw_extend"))
+		goto cleanup;
+	ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_cons_pos_protect");
+	ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw");
+
+	/* bad writeable prod_pos */
+	mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, page_size);
+	err = -errno;
+	ASSERT_ERR_PTR(mmap_ptr, "wr_prod_pos");
+	ASSERT_EQ(err, -EPERM, "wr_prod_pos_err");
+
+	/* bad writeable data pages */
+	mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size);
+	err = -errno;
+	ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_one");
+	ASSERT_EQ(err, -EPERM, "wr_data_page_one_err");
+	mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 3 * page_size);
+	ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_two");
+	mmap_ptr = mmap(NULL, 2 * page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size);
+	ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_all");
+
+	/* good read-only pages */
+	mmap_ptr = mmap(NULL, 4 * page_size, PROT_READ, MAP_SHARED, rb_fd, 0);
+	if (!ASSERT_OK_PTR(mmap_ptr, "ro_prod_pos"))
+		goto cleanup;
+
+	ASSERT_ERR(mprotect(mmap_ptr, 4 * page_size, PROT_WRITE), "write_protect");
+	ASSERT_ERR(mprotect(mmap_ptr, 4 * page_size, PROT_EXEC), "exec_protect");
+	ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 4 * page_size, MREMAP_MAYMOVE), "ro_remap");
+	ASSERT_OK(munmap(mmap_ptr, 4 * page_size), "unmap_ro");
+
+	/* good read-only pages with initial offset */
+	mmap_ptr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, rb_fd, page_size);
+	if (!ASSERT_OK_PTR(mmap_ptr, "ro_prod_pos"))
+		goto cleanup;
+
+	ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_WRITE), "write_protect");
+	ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_protect");
+	ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 3 * page_size, MREMAP_MAYMOVE), "ro_remap");
+	ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_ro");
+
 	/* only trigger BPF program for current process */
 	skel->bss->pid = getpid();
 
-- 
cgit v1.2.3


From 8afcc19fbf083a8459284d9a29b4b5ac1cb2396c Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Mon, 17 May 2021 11:28:29 +0200
Subject: bpf: Clarify a bpf_bprintf_prepare macro

The per-cpu buffers contain bprintf data rather than printf arguments.
The macro name and comment were a bit confusing, this rewords them in a
clearer way.

Signed-off-by: Florent Revest <revest@chromium.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/bpf/20210517092830.1026418-1-revest@chromium.org
---
 kernel/bpf/helpers.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index ef658a9ea5c9..3a5ab614cbb0 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -692,13 +692,14 @@ static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
 	return -EINVAL;
 }
 
-/* Per-cpu temp buffers which can be used by printf-like helpers for %s or %p
+/* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary
+ * arguments representation.
  */
-#define MAX_PRINTF_BUF_LEN	512
+#define MAX_BPRINTF_BUF_LEN	512
 
 /* Support executing three nested bprintf helper calls on a given CPU */
 struct bpf_bprintf_buffers {
-	char tmp_bufs[3][MAX_PRINTF_BUF_LEN];
+	char tmp_bufs[3][MAX_BPRINTF_BUF_LEN];
 };
 static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs);
 static DEFINE_PER_CPU(int, bpf_bprintf_nest_level);
@@ -761,7 +762,7 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
 		if (num_args && try_get_fmt_tmp_buf(&tmp_buf))
 			return -EBUSY;
 
-		tmp_buf_end = tmp_buf + MAX_PRINTF_BUF_LEN;
+		tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN;
 		*bin_args = (u32 *)tmp_buf;
 	}
 
-- 
cgit v1.2.3


From 0af02eb2a7d76ca85a1ecaf4b3775e2c86408fab Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Mon, 17 May 2021 11:28:30 +0200
Subject: bpf: Avoid using ARRAY_SIZE on an uninitialized pointer

The cppcheck static code analysis reported the following error:

    if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bufs->tmp_bufs))) {
                                             ^
ARRAY_SIZE is a macro that expands to sizeofs, so bufs is not actually
dereferenced at runtime, and the code is actually safe. But to keep
things tidy, this patch removes the need for a call to ARRAY_SIZE by
extracting the size of the array into a macro. Cppcheck should no longer
be confused and the code ends up being a bit cleaner.

Fixes: e2d5b2bb769f ("bpf: Fix nested bpf_bprintf_prepare with more per-cpu buffers")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Florent Revest <revest@chromium.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/bpf/20210517092830.1026418-2-revest@chromium.org
---
 kernel/bpf/helpers.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 3a5ab614cbb0..73443498d88f 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -698,8 +698,9 @@ static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
 #define MAX_BPRINTF_BUF_LEN	512
 
 /* Support executing three nested bprintf helper calls on a given CPU */
+#define MAX_BPRINTF_NEST_LEVEL	3
 struct bpf_bprintf_buffers {
-	char tmp_bufs[3][MAX_BPRINTF_BUF_LEN];
+	char tmp_bufs[MAX_BPRINTF_NEST_LEVEL][MAX_BPRINTF_BUF_LEN];
 };
 static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs);
 static DEFINE_PER_CPU(int, bpf_bprintf_nest_level);
@@ -711,7 +712,7 @@ static int try_get_fmt_tmp_buf(char **tmp_buf)
 
 	preempt_disable();
 	nest_level = this_cpu_inc_return(bpf_bprintf_nest_level);
-	if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bufs->tmp_bufs))) {
+	if (WARN_ON_ONCE(nest_level > MAX_BPRINTF_NEST_LEVEL)) {
 		this_cpu_dec(bpf_bprintf_nest_level);
 		preempt_enable();
 		return -EBUSY;
-- 
cgit v1.2.3


From ceb11679d9fcf3fdb358a310a38760fcbe9b63ed Mon Sep 17 00:00:00 2001
From: Yinjun Zhang <yinjun.zhang@corigine.com>
Date: Thu, 20 May 2021 10:58:34 +0200
Subject: bpf, offload: Reorder offload callback 'prepare' in verifier

Commit 4976b718c355 ("bpf: Introduce pseudo_btf_id") switched the
order of resolve_pseudo_ldimm(), in which some pseudo instructions
are rewritten. Thus those rewritten instructions cannot be passed
to driver via 'prepare' offload callback.

Reorder the 'prepare' offload callback to fix it.

Fixes: 4976b718c355 ("bpf: Introduce pseudo_btf_id")
Signed-off-by: Yinjun Zhang <yinjun.zhang@corigine.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20210520085834.15023-1-simon.horman@netronome.com
---
 kernel/bpf/verifier.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c58598ef4b5b..09849e43f035 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -13368,12 +13368,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
 	if (is_priv)
 		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
 
-	if (bpf_prog_is_dev_bound(env->prog->aux)) {
-		ret = bpf_prog_offload_verifier_prep(env->prog);
-		if (ret)
-			goto skip_full_check;
-	}
-
 	env->explored_states = kvcalloc(state_htab_size(env),
 				       sizeof(struct bpf_verifier_state_list *),
 				       GFP_USER);
@@ -13401,6 +13395,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
 	if (ret < 0)
 		goto skip_full_check;
 
+	if (bpf_prog_is_dev_bound(env->prog->aux)) {
+		ret = bpf_prog_offload_verifier_prep(env->prog);
+		if (ret)
+			goto skip_full_check;
+	}
+
 	ret = check_cfg(env);
 	if (ret < 0)
 		goto skip_full_check;
-- 
cgit v1.2.3


From 84316ca4e100d8cbfccd9f774e23817cb2059868 Mon Sep 17 00:00:00 2001
From: Jussi Maki <joamaki@gmail.com>
Date: Wed, 19 May 2021 15:47:42 +0000
Subject: bpf: Set mac_len in bpf_skb_change_head

The skb_change_head() helper did not set "skb->mac_len", which is
problematic when it's used in combination with skb_redirect_peer().
Without it, redirecting a packet from a L3 device such as wireguard to
the veth peer device will cause skb->data to point to the middle of the
IP header on entry to tcp_v4_rcv() since the L2 header is not pulled
correctly due to mac_len=0.

Fixes: 3a0af8fd61f9 ("bpf: BPF for lightweight tunnel infrastructure")
Signed-off-by: Jussi Maki <joamaki@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210519154743.2554771-2-joamaki@gmail.com
---
 net/core/filter.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/filter.c b/net/core/filter.c
index cae56d08a670..65ab4e21c087 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3784,6 +3784,7 @@ static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
 		__skb_push(skb, head_room);
 		memset(skb->data, 0, head_room);
 		skb_reset_mac_header(skb);
+		skb_reset_mac_len(skb);
 	}
 
 	return ret;
-- 
cgit v1.2.3


From 63e39d29b3da02e901349f6cd71159818a4737a6 Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Thu, 20 May 2021 11:18:35 -0700
Subject: ixgbe: fix large MTU request from VF

Check that the MTU value requested by the VF is in the supported
range of MTUs before attempting to set the VF large packet enable,
otherwise reject the request. This also avoids unnecessary
register updates in the case of the 82599 controller.

Fixes: 872844ddb9e4 ("ixgbe: Enable jumbo frames support w/ SR-IOV")
Co-developed-by: Piotr Skajewski <piotrx.skajewski@intel.com>
Signed-off-by: Piotr Skajewski <piotrx.skajewski@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Co-developed-by: Mateusz Palczewski <mateusz.palczewski@intel.com>
Signed-off-by: Mateusz Palczewski <mateusz.palczewski@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 988db46bff0e..214a38de3f41 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -467,12 +467,16 @@ static int ixgbe_set_vf_vlan(struct ixgbe_adapter *adapter, int add, int vid,
 	return err;
 }
 
-static s32 ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
+static int ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 max_frame, u32 vf)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
-	int max_frame = msgbuf[1];
 	u32 max_frs;
 
+	if (max_frame < ETH_MIN_MTU || max_frame > IXGBE_MAX_JUMBO_FRAME_SIZE) {
+		e_err(drv, "VF max_frame %d out of range\n", max_frame);
+		return -EINVAL;
+	}
+
 	/*
 	 * For 82599EB we have to keep all PFs and VFs operating with
 	 * the same max_frame value in order to avoid sending an oversize
@@ -533,12 +537,6 @@ static s32 ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
 		}
 	}
 
-	/* MTU < 68 is an error and causes problems on some kernels */
-	if (max_frame > IXGBE_MAX_JUMBO_FRAME_SIZE) {
-		e_err(drv, "VF max_frame %d out of range\n", max_frame);
-		return -EINVAL;
-	}
-
 	/* pull current max frame size from hardware */
 	max_frs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
 	max_frs &= IXGBE_MHADD_MFS_MASK;
@@ -1249,7 +1247,7 @@ static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf)
 		retval = ixgbe_set_vf_vlan_msg(adapter, msgbuf, vf);
 		break;
 	case IXGBE_VF_SET_LPE:
-		retval = ixgbe_set_vf_lpe(adapter, msgbuf, vf);
+		retval = ixgbe_set_vf_lpe(adapter, msgbuf[1], vf);
 		break;
 	case IXGBE_VF_SET_MACVLAN:
 		retval = ixgbe_set_vf_macvlan_msg(adapter, msgbuf, vf);
-- 
cgit v1.2.3


From 503c599a4f53fe3d959aebfd22c34da27da49777 Mon Sep 17 00:00:00 2001
From: Aditya Srivastava <yashsri421@gmail.com>
Date: Fri, 21 May 2021 00:19:15 +0530
Subject: net: encx24j600: fix kernel-doc syntax in file headers

The opening comment mark '/**' is used for highlighting the beginning of
kernel-doc comments.
The header for drivers/net/ethernet/microchip/encx24j600 files follows
this syntax, but the content inside does not comply with kernel-doc.

This line was probably not meant for kernel-doc parsing, but is parsed
due to the presence of kernel-doc like comment syntax(i.e, '/**'), which
causes unexpected warning from kernel-doc.
For e.g., running scripts/kernel-doc -none
drivers/net/ethernet/microchip/encx24j600_hw.h emits:
warning: expecting prototype for h(). Prototype was for _ENCX24J600_HW_H() instead

Provide a simple fix by replacing such occurrences with general comment
format, i.e. '/*', to prevent kernel-doc from parsing it.

Signed-off-by: Aditya Srivastava <yashsri421@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/encx24j600.c    | 2 +-
 drivers/net/ethernet/microchip/encx24j600_hw.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c
index 3658c4ae3c37..ee921a99e439 100644
--- a/drivers/net/ethernet/microchip/encx24j600.c
+++ b/drivers/net/ethernet/microchip/encx24j600.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
-/**
+/*
  * Microchip ENCX24J600 ethernet driver
  *
  * Copyright (C) 2015 Gridpoint
diff --git a/drivers/net/ethernet/microchip/encx24j600_hw.h b/drivers/net/ethernet/microchip/encx24j600_hw.h
index f604a260ede7..fac61a8fbd02 100644
--- a/drivers/net/ethernet/microchip/encx24j600_hw.h
+++ b/drivers/net/ethernet/microchip/encx24j600_hw.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/**
+/*
  * encx24j600_hw.h: Register definitions
  *
  */
-- 
cgit v1.2.3


From 13a6f3153922391e90036ba2267d34eed63196fc Mon Sep 17 00:00:00 2001
From: Zheyu Ma <zheyuma97@gmail.com>
Date: Thu, 20 May 2021 12:32:36 +0000
Subject: net/qla3xxx: fix schedule while atomic in ql_sem_spinlock

When calling the 'ql_sem_spinlock', the driver has already acquired the
spin lock, so the driver should not call 'ssleep' in atomic context.

This bug can be fixed by using 'mdelay' instead of 'ssleep'.

The KASAN's log reveals it:

[    3.238124 ] BUG: scheduling while atomic: swapper/0/1/0x00000002
[    3.238748 ] 2 locks held by swapper/0/1:
[    3.239151 ]  #0: ffff88810177b240 (&dev->mutex){....}-{3:3}, at:
__device_driver_lock+0x41/0x60
[    3.240026 ]  #1: ffff888107c60e28 (&qdev->hw_lock){....}-{2:2}, at:
ql3xxx_probe+0x2aa/0xea0
[    3.240873 ] Modules linked in:
[    3.241187 ] irq event stamp: 460854
[    3.241541 ] hardirqs last  enabled at (460853): [<ffffffff843051bf>]
_raw_spin_unlock_irqrestore+0x4f/0x70
[    3.242245 ] hardirqs last disabled at (460854): [<ffffffff843058ca>]
_raw_spin_lock_irqsave+0x2a/0x70
[    3.242245 ] softirqs last  enabled at (446076): [<ffffffff846002e4>]
__do_softirq+0x2e4/0x4b1
[    3.242245 ] softirqs last disabled at (446069): [<ffffffff811ba5e0>]
irq_exit_rcu+0x100/0x110
[    3.242245 ] Preemption disabled at:
[    3.242245 ] [<ffffffff828ca5ba>] ql3xxx_probe+0x2aa/0xea0
[    3.242245 ] Kernel panic - not syncing: scheduling while atomic
[    3.242245 ] CPU: 2 PID: 1 Comm: swapper/0 Not tainted
5.13.0-rc1-00145
-gee7dc339169-dirty #16
[    3.242245 ] Call Trace:
[    3.242245 ]  dump_stack+0xba/0xf5
[    3.242245 ]  ? ql3xxx_probe+0x1f0/0xea0
[    3.242245 ]  panic+0x15a/0x3f2
[    3.242245 ]  ? vprintk+0x76/0x150
[    3.242245 ]  ? ql3xxx_probe+0x2aa/0xea0
[    3.242245 ]  __schedule_bug+0xae/0xe0
[    3.242245 ]  __schedule+0x72e/0xa00
[    3.242245 ]  schedule+0x43/0xf0
[    3.242245 ]  schedule_timeout+0x28b/0x500
[    3.242245 ]  ? del_timer_sync+0xf0/0xf0
[    3.242245 ]  ? msleep+0x2f/0x70
[    3.242245 ]  msleep+0x59/0x70
[    3.242245 ]  ql3xxx_probe+0x307/0xea0
[    3.242245 ]  ? _raw_spin_unlock_irqrestore+0x3a/0x70
[    3.242245 ]  ? pci_device_remove+0x110/0x110
[    3.242245 ]  local_pci_probe+0x45/0xa0
[    3.242245 ]  pci_device_probe+0x12b/0x1d0
[    3.242245 ]  really_probe+0x2a9/0x610
[    3.242245 ]  driver_probe_device+0x90/0x1d0
[    3.242245 ]  ? mutex_lock_nested+0x1b/0x20
[    3.242245 ]  device_driver_attach+0x68/0x70
[    3.242245 ]  __driver_attach+0x124/0x1b0
[    3.242245 ]  ? device_driver_attach+0x70/0x70
[    3.242245 ]  bus_for_each_dev+0xbb/0x110
[    3.242245 ]  ? rdinit_setup+0x45/0x45
[    3.242245 ]  driver_attach+0x27/0x30
[    3.242245 ]  bus_add_driver+0x1eb/0x2a0
[    3.242245 ]  driver_register+0xa9/0x180
[    3.242245 ]  __pci_register_driver+0x82/0x90
[    3.242245 ]  ? yellowfin_init+0x25/0x25
[    3.242245 ]  ql3xxx_driver_init+0x23/0x25
[    3.242245 ]  do_one_initcall+0x7f/0x3d0
[    3.242245 ]  ? rdinit_setup+0x45/0x45
[    3.242245 ]  ? rcu_read_lock_sched_held+0x4f/0x80
[    3.242245 ]  kernel_init_freeable+0x2aa/0x301
[    3.242245 ]  ? rest_init+0x2c0/0x2c0
[    3.242245 ]  kernel_init+0x18/0x190
[    3.242245 ]  ? rest_init+0x2c0/0x2c0
[    3.242245 ]  ? rest_init+0x2c0/0x2c0
[    3.242245 ]  ret_from_fork+0x1f/0x30
[    3.242245 ] Dumping ftrace buffer:
[    3.242245 ]    (ftrace buffer empty)
[    3.242245 ] Kernel Offset: disabled
[    3.242245 ] Rebooting in 1 seconds.

Reported-by: Zheyu Ma <zheyuma97@gmail.com>
Signed-off-by: Zheyu Ma <zheyuma97@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qla3xxx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c
index 214e347097a7..2376b2729633 100644
--- a/drivers/net/ethernet/qlogic/qla3xxx.c
+++ b/drivers/net/ethernet/qlogic/qla3xxx.c
@@ -114,7 +114,7 @@ static int ql_sem_spinlock(struct ql3_adapter *qdev,
 		value = readl(&port_regs->CommonRegs.semaphoreReg);
 		if ((value & (sem_mask >> 16)) == sem_bits)
 			return 0;
-		ssleep(1);
+		mdelay(1000);
 	} while (--seconds);
 	return -1;
 }
-- 
cgit v1.2.3


From b3dcb312778664bfbe0a73242fa04a628719b066 Mon Sep 17 00:00:00 2001
From: Joakim Zhang <qiangqing.zhang@nxp.com>
Date: Thu, 20 May 2021 20:51:16 +0800
Subject: net: stmmac: correct clocks enabled in stmmac_vlan_rx_kill_vid()

This should be a mistake to fix conflicts when removing RFC tag to
repost the patch.

Fixes: 5ec55823438e ("net: stmmac: add clocks management for gmac driver")
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index fea3bf07ae89..df4ce5977fad 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -6191,12 +6191,6 @@ static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid
 	bool is_double = false;
 	int ret;
 
-	ret = pm_runtime_get_sync(priv->device);
-	if (ret < 0) {
-		pm_runtime_put_noidle(priv->device);
-		return ret;
-	}
-
 	if (be16_to_cpu(proto) == ETH_P_8021AD)
 		is_double = true;
 
@@ -6222,6 +6216,12 @@ static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vi
 	bool is_double = false;
 	int ret;
 
+	ret = pm_runtime_get_sync(priv->device);
+	if (ret < 0) {
+		pm_runtime_put_noidle(priv->device);
+		return ret;
+	}
+
 	if (be16_to_cpu(proto) == ETH_P_8021AD)
 		is_double = true;
 
-- 
cgit v1.2.3


From 4691ffb18ac908609aab07d13af7995b6b89d33c Mon Sep 17 00:00:00 2001
From: Joakim Zhang <qiangqing.zhang@nxp.com>
Date: Thu, 20 May 2021 20:51:17 +0800
Subject: net: stmmac: fix system hang if change mac address after interface
 ifdown

Fix system hang with below sequences:
~# ifconfig ethx down
~# ifconfig ethx hw ether xx:xx:xx:xx:xx:xx

After ethx down, stmmac all clocks gated off and then register access causes
system hang.

Fixes: 5ec55823438e ("net: stmmac: add clocks management for gmac driver")
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index df4ce5977fad..5d956a553434 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -5891,12 +5891,21 @@ static int stmmac_set_mac_address(struct net_device *ndev, void *addr)
 	struct stmmac_priv *priv = netdev_priv(ndev);
 	int ret = 0;
 
+	ret = pm_runtime_get_sync(priv->device);
+	if (ret < 0) {
+		pm_runtime_put_noidle(priv->device);
+		return ret;
+	}
+
 	ret = eth_mac_addr(ndev, addr);
 	if (ret)
-		return ret;
+		goto set_mac_error;
 
 	stmmac_set_umac_addr(priv, priv->hw, ndev->dev_addr, 0);
 
+set_mac_error:
+	pm_runtime_put(priv->device);
+
 	return ret;
 }
 
-- 
cgit v1.2.3


From ae897fda4f507e4b239f0bdfd578b3688ca96fb4 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Thu, 20 May 2021 13:42:42 +0200
Subject: x86/Xen: swap NX determination and GDT setup on BSP

xen_setup_gdt(), via xen_load_gdt_boot(), wants to adjust page tables.
For this to work when NX is not available, x86_configure_nx() needs to
be called first.

[jgross] Note that this is a revert of 36104cb9012a82e73 ("x86/xen:
Delay get_cpu_cap until stack canary is established"), which is possible
now that we no longer support running as PV guest in 32-bit mode.

Cc: <stable.vger.kernel.org> # 5.9
Fixes: 36104cb9012a82e73 ("x86/xen: Delay get_cpu_cap until stack canary is established")
Reported-by: Olaf Hering <olaf@aepfle.de>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Juergen Gross <jgross@suse.com>

Link: https://lore.kernel.org/r/12a866b0-9e89-59f7-ebeb-a2a6cec0987a@suse.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/xen/enlighten_pv.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 17503fed2017..e87699aa2dc8 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1273,16 +1273,16 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	/* Get mfn list */
 	xen_build_dynamic_phys_to_machine();
 
+	/* Work out if we support NX */
+	get_cpu_cap(&boot_cpu_data);
+	x86_configure_nx();
+
 	/*
 	 * Set up kernel GDT and segment registers, mainly so that
 	 * -fstack-protector code can be executed.
 	 */
 	xen_setup_gdt(0);
 
-	/* Work out if we support NX */
-	get_cpu_cap(&boot_cpu_data);
-	x86_configure_nx();
-
 	/* Determine virtual and physical address sizes */
 	get_cpu_address_sizes(&boot_cpu_data);
 
-- 
cgit v1.2.3


From 4ba50e7c423c29639878c00573288869aa627068 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Tue, 18 May 2021 18:13:42 +0200
Subject: xen-pciback: redo VF placement in the virtual topology

The commit referenced below was incomplete: It merely affected what
would get written to the vdev-<N> xenstore node. The guest would still
find the function at the original function number as long as
__xen_pcibk_get_pci_dev() wouldn't be in sync. The same goes for AER wrt
__xen_pcibk_get_pcifront_dev().

Undo overriding the function to zero and instead make sure that VFs at
function zero remain alone in their slot. This has the added benefit of
improving overall capacity, considering that there's only a total of 32
slots available right now (PCI segment and bus can both only ever be
zero at present).

Fixes: 8a5248fe10b1 ("xen PV passthru: assign SR-IOV virtual functions to separate virtual slots")
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: stable@vger.kernel.org
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Link: https://lore.kernel.org/r/8def783b-404c-3452-196d-3f3fd4d72c9e@suse.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/xen-pciback/vpci.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c
index 4162d0e7e00d..cc7450f2b2a9 100644
--- a/drivers/xen/xen-pciback/vpci.c
+++ b/drivers/xen/xen-pciback/vpci.c
@@ -70,7 +70,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
 				   struct pci_dev *dev, int devid,
 				   publish_pci_dev_cb publish_cb)
 {
-	int err = 0, slot, func = -1;
+	int err = 0, slot, func = PCI_FUNC(dev->devfn);
 	struct pci_dev_entry *t, *dev_entry;
 	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
 
@@ -95,22 +95,25 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
 
 	/*
 	 * Keep multi-function devices together on the virtual PCI bus, except
-	 * virtual functions.
+	 * that we want to keep virtual functions at func 0 on their own. They
+	 * aren't multi-function devices and hence their presence at func 0
+	 * may cause guests to not scan the other functions.
 	 */
-	if (!dev->is_virtfn) {
+	if (!dev->is_virtfn || func) {
 		for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
 			if (list_empty(&vpci_dev->dev_list[slot]))
 				continue;
 
 			t = list_entry(list_first(&vpci_dev->dev_list[slot]),
 				       struct pci_dev_entry, list);
+			if (t->dev->is_virtfn && !PCI_FUNC(t->dev->devfn))
+				continue;
 
 			if (match_slot(dev, t->dev)) {
 				dev_info(&dev->dev, "vpci: assign to virtual slot %d func %d\n",
-					 slot, PCI_FUNC(dev->devfn));
+					 slot, func);
 				list_add_tail(&dev_entry->list,
 					      &vpci_dev->dev_list[slot]);
-				func = PCI_FUNC(dev->devfn);
 				goto unlock;
 			}
 		}
@@ -123,7 +126,6 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
 				 slot);
 			list_add_tail(&dev_entry->list,
 				      &vpci_dev->dev_list[slot]);
-			func = dev->is_virtfn ? 0 : PCI_FUNC(dev->devfn);
 			goto unlock;
 		}
 	}
-- 
cgit v1.2.3


From c81d3d24602540f65256f98831d0a25599ea6b87 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Tue, 18 May 2021 18:14:07 +0200
Subject: xen-pciback: reconfigure also from backend watch handler

When multiple PCI devices get assigned to a guest right at boot, libxl
incrementally populates the backend tree. The writes for the first of
the devices trigger the backend watch. In turn xen_pcibk_setup_backend()
will set the XenBus state to Initialised, at which point no further
reconfigures would happen unless a device got hotplugged. Arrange for
reconfigure to also get triggered from the backend watch handler.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: stable@vger.kernel.org
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Link: https://lore.kernel.org/r/2337cbd6-94b9-4187-9862-c03ea12e0c61@suse.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/xen-pciback/xenbus.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 5188f02e75fb..c09c7ebd6968 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -359,7 +359,8 @@ out:
 	return err;
 }
 
-static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev)
+static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev,
+				 enum xenbus_state state)
 {
 	int err = 0;
 	int num_devs;
@@ -373,9 +374,7 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev)
 	dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
 
 	mutex_lock(&pdev->dev_lock);
-	/* Make sure we only reconfigure once */
-	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
-	    XenbusStateReconfiguring)
+	if (xenbus_read_driver_state(pdev->xdev->nodename) != state)
 		goto out;
 
 	err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
@@ -500,6 +499,10 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev)
 		}
 	}
 
+	if (state != XenbusStateReconfiguring)
+		/* Make sure we only reconfigure once. */
+		goto out;
+
 	err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured);
 	if (err) {
 		xenbus_dev_fatal(pdev->xdev, err,
@@ -525,7 +528,7 @@ static void xen_pcibk_frontend_changed(struct xenbus_device *xdev,
 		break;
 
 	case XenbusStateReconfiguring:
-		xen_pcibk_reconfigure(pdev);
+		xen_pcibk_reconfigure(pdev, XenbusStateReconfiguring);
 		break;
 
 	case XenbusStateConnected:
@@ -664,6 +667,15 @@ static void xen_pcibk_be_watch(struct xenbus_watch *watch,
 		xen_pcibk_setup_backend(pdev);
 		break;
 
+	case XenbusStateInitialised:
+		/*
+		 * We typically move to Initialised when the first device was
+		 * added. Hence subsequent devices getting added may need
+		 * reconfiguring.
+		 */
+		xen_pcibk_reconfigure(pdev, XenbusStateInitialised);
+		break;
+
 	default:
 		break;
 	}
-- 
cgit v1.2.3


From 3b2f17ad1770e51b8b4e68b5069c4f1ee477eff8 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 19 May 2021 13:50:31 -0300
Subject: perf parse-events: Check if the software events array slots are
 populated

To avoid a NULL pointer dereference when the kernel supports the new
feature but the tooling still hasn't an entry for it.

This happened with the recently added PERF_COUNT_SW_CGROUP_SWITCHES
software event.

Reported-by: Thomas Richter <tmricht@linux.ibm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Sumanth Korikkar <sumanthk@linux.ibm.com>
Link: https://lore.kernel.org/linux-perf-users/YKVESEKRjKtILhog@kernel.org/
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/parse-events.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 269997066f6e..84108c17f48d 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -2932,9 +2932,14 @@ restart:
 	}
 
 	for (i = 0; i < max; i++, syms++) {
+		/*
+		 * New attr.config still not supported here, the latest
+		 * example was PERF_COUNT_SW_CGROUP_SWITCHES
+		 */
+		if (syms->symbol == NULL)
+			continue;
 
-		if (event_glob != NULL && syms->symbol != NULL &&
-		    !(strglobmatch(syms->symbol, event_glob) ||
+		if (event_glob != NULL && !(strglobmatch(syms->symbol, event_glob) ||
 		      (syms->alias && strglobmatch(syms->alias, event_glob))))
 			continue;
 
-- 
cgit v1.2.3


From bda7db1d952c3ff7c24c11bc295aa72aaeb98451 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 20 May 2021 16:12:37 +0300
Subject: spi: sc18is602: don't consider the chip select byte in
 sc18is602_check_transfer

For each spi_message, the sc18is602 I2C-to-SPI bridge driver checks the
length of each spi_transfer against 200 (the size of the chip's internal
buffer) minus hw->tlen (the number of bytes transferred so far).

The first byte of the transferred data is the Function ID (the SPI
slave's chip select) and as per the documentation of the chip:
https://www.nxp.com/docs/en/data-sheet/SC18IS602B.pdf
the data buffer is up to 200 bytes deep _without_ accounting for the
Function ID byte.

However, in sc18is602_txrx(), the driver keeps the Function ID as part
of the buffer, and increments hw->tlen from 0 to 1. Combined with the
check in sc18is602_check_transfer, this prevents us from issuing a
transfer that has exactly 200 bytes in size, but only 199.

Adjust the check function to reflect that the Function ID is not part of
the 200 byte deep data buffer.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://lore.kernel.org/r/20210520131238.2903024-2-olteanv@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-sc18is602.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-sc18is602.c b/drivers/spi/spi-sc18is602.c
index 297c512069a5..37871edc7962 100644
--- a/drivers/spi/spi-sc18is602.c
+++ b/drivers/spi/spi-sc18is602.c
@@ -174,7 +174,7 @@ static int sc18is602_setup_transfer(struct sc18is602 *hw, u32 hz, u8 mode)
 static int sc18is602_check_transfer(struct spi_device *spi,
 				    struct spi_transfer *t, int tlen)
 {
-	if (t && t->len + tlen > SC18IS602_BUFSIZ)
+	if (t && t->len + tlen > SC18IS602_BUFSIZ + 1)
 		return -EINVAL;
 
 	return 0;
-- 
cgit v1.2.3


From b4e46c9954ad55092502e1e8c44ceb9b6744bade Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 20 May 2021 16:12:38 +0300
Subject: spi: sc18is602: implement .max_{transfer,message}_size() for the
 controller

Allow SPI peripherals attached to this controller to know what is the
maximum transfer size and message size, so they can limit their transfer
lengths properly in case they are otherwise capable of larger transfer
sizes. For the sc18is602, this is 200 bytes in both cases, since as far
as I understand, it isn't possible to tell the controller to keep the
chip select asserted after the STOP command is sent.

The controller can support SPI messages larger than 200 bytes if
cs_change is set for individual transfers such that the portions with
chip select asserted are never longer than 200 bytes. What is not
supported is just SPI messages with a continuous chip select larger than
200. I don't think it is possible to express this using the current API,
so drivers which do send SPI messages with cs_change can safely just
look at the max_transfer_size limit.

An example of user for this is sja1105_xfer() in
drivers/net/dsa/sja1105/sja1105_spi.c which sends by default 64 * 4 =
256 byte transfers.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://lore.kernel.org/r/20210520131238.2903024-3-olteanv@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-sc18is602.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/spi/spi-sc18is602.c b/drivers/spi/spi-sc18is602.c
index 37871edc7962..5d27ee482237 100644
--- a/drivers/spi/spi-sc18is602.c
+++ b/drivers/spi/spi-sc18is602.c
@@ -219,6 +219,11 @@ static int sc18is602_transfer_one(struct spi_master *master,
 	return status;
 }
 
+static size_t sc18is602_max_transfer_size(struct spi_device *spi)
+{
+	return SC18IS602_BUFSIZ;
+}
+
 static int sc18is602_setup(struct spi_device *spi)
 {
 	struct sc18is602 *hw = spi_master_get_devdata(spi->master);
@@ -293,6 +298,8 @@ static int sc18is602_probe(struct i2c_client *client,
 	master->bits_per_word_mask = SPI_BPW_MASK(8);
 	master->setup = sc18is602_setup;
 	master->transfer_one_message = sc18is602_transfer_one;
+	master->max_transfer_size = sc18is602_max_transfer_size;
+	master->max_message_size = sc18is602_max_transfer_size;
 	master->dev.of_node = np;
 	master->min_speed_hz = hw->freq / 128;
 	master->max_speed_hz = hw->freq / 4;
-- 
cgit v1.2.3


From ec347b7c319156c3b488681d1813d08d88499cc6 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 21 May 2021 16:00:31 -0300
Subject: tools headers UAPI: Sync linux/fs.h with the kernel sources

To pick the trivial change in:

  63c8af5687f6b1b7 ("block: uapi: fix comment about block device ioctl")

This silences this perf build warning:

  Warning: Kernel ABI header at 'tools/include/uapi/linux/fs.h' differs from latest version at 'include/uapi/linux/fs.h'
  diff -u tools/include/uapi/linux/fs.h include/uapi/linux/fs.h

Cc: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h
index f44eb0a04afd..4c32e97dcdf0 100644
--- a/tools/include/uapi/linux/fs.h
+++ b/tools/include/uapi/linux/fs.h
@@ -185,7 +185,7 @@ struct fsxattr {
 #define BLKROTATIONAL _IO(0x12,126)
 #define BLKZEROOUT _IO(0x12,127)
 /*
- * A jump here: 130-131 are reserved for zoned block devices
+ * A jump here: 130-136 are reserved for zoned block devices
  * (see uapi/linux/blkzoned.h)
  */
 
-- 
cgit v1.2.3


From 4224680ee7aaf0f13ab762ffb2a77373737dce5e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 21 May 2021 16:00:31 -0300
Subject: tools headers UAPI: Sync linux/perf_event.h with the kernel sources

To pick the trivial change in:

  0683b53197b55343 ("signal: Deliver all of the siginfo perf data in _perf")

This silences this perf build warning:

  Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h' differs from latest version at 'include/uapi/linux/perf_event.h'
  diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h

Cc: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/perf_event.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index bf8143505c49..f92880a15645 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -464,7 +464,7 @@ struct perf_event_attr {
 
 	/*
 	 * User provided data if sigtrap=1, passed back to user via
-	 * siginfo_t::si_perf, e.g. to permit user to identify the event.
+	 * siginfo_t::si_perf_data, e.g. to permit user to identify the event.
 	 */
 	__u64	sig_data;
 };
-- 
cgit v1.2.3


From bffcbe79370e8fda7f1d19899de83aa2a833bf69 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 21 May 2021 16:14:00 -0300
Subject: tools headers UAPI: Sync files changed by the quotactl_path unwiring

To pick the changes in this csets:

  5b9fedb31e476693 ("quota: Disable quotactl_path syscall")

That silences these perf build warnings:

  Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/unistd.h' differs from latest version at 'include/uapi/asm-generic/unistd.h'
  diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h
  Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl'
  diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
  Warning: Kernel ABI header at 'tools/perf/arch/powerpc/entry/syscalls/syscall.tbl' differs from latest version at 'arch/powerpc/kernel/syscalls/syscall.tbl'
  diff -u tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl
  Warning: Kernel ABI header at 'tools/perf/arch/s390/entry/syscalls/syscall.tbl' differs from latest version at 'arch/s390/kernel/syscalls/syscall.tbl'
  diff -u tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl
  Warning: Kernel ABI header at 'tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl' differs from latest version at 'arch/mips/kernel/syscalls/syscall_n64.tbl'
  diff -u tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl arch/mips/kernel/syscalls/syscall_n64.tbl

Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl | 2 +-
 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl  | 2 +-
 tools/perf/arch/s390/entry/syscalls/syscall.tbl     | 2 +-
 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
index 9974f5f8e49b..9cd1c34f31b5 100644
--- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
+++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
@@ -357,7 +357,7 @@
 440	n64	process_madvise			sys_process_madvise
 441	n64	epoll_pwait2			sys_epoll_pwait2
 442	n64	mount_setattr			sys_mount_setattr
-443	n64	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	n64	landlock_create_ruleset		sys_landlock_create_ruleset
 445	n64	landlock_add_rule		sys_landlock_add_rule
 446	n64	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 2e68fbb57cc6..8f052ff4058c 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -522,7 +522,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-443	common	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index 7e4a2aba366d..0690263df1dd 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -445,7 +445,7 @@
 440  common	process_madvise		sys_process_madvise		sys_process_madvise
 441  common	epoll_pwait2		sys_epoll_pwait2		compat_sys_epoll_pwait2
 442  common	mount_setattr		sys_mount_setattr		sys_mount_setattr
-443  common	quotactl_path		sys_quotactl_path		sys_quotactl_path
+# 443 reserved for quotactl_path
 444  common	landlock_create_ruleset	sys_landlock_create_ruleset	sys_landlock_create_ruleset
 445  common	landlock_add_rule	sys_landlock_add_rule		sys_landlock_add_rule
 446  common	landlock_restrict_self	sys_landlock_restrict_self	sys_landlock_restrict_self
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index ecd551b08d05..ce18119ea0d0 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -364,7 +364,7 @@
 440	common	process_madvise		sys_process_madvise
 441	common	epoll_pwait2		sys_epoll_pwait2
 442	common	mount_setattr		sys_mount_setattr
-443	common	quotactl_path		sys_quotactl_path
+# 443 reserved for quotactl_path
 444	common	landlock_create_ruleset	sys_landlock_create_ruleset
 445	common	landlock_add_rule	sys_landlock_add_rule
 446	common	landlock_restrict_self	sys_landlock_restrict_self
-- 
cgit v1.2.3


From a6172059758ba1b496ae024cece7d5bdc8d017db Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 21 May 2021 12:20:51 +0300
Subject: perf scripts python: exported-sql-viewer.py: Fix copy to clipboard
 from Top Calls by elapsed Time report

Provide missing argument to prevent following error when copying a
selection to the clipboard:

Traceback (most recent call last):
  File "tools/perf/scripts/python/exported-sql-viewer.py", line 4041, in <lambda>
    menu.addAction(CreateAction("&Copy selection", "Copy to clipboard", lambda: CopyCellsToClipboardHdr(self.view), self.view))
  File "tools/perf/scripts/python/exported-sql-viewer.py", line 4021, in CopyCellsToClipboardHdr
    CopyCellsToClipboard(view, False, True)
  File "tools/perf/scripts/python/exported-sql-viewer.py", line 4018, in CopyCellsToClipboard
    view.CopyCellsToClipboard(view, as_csv, with_hdr)
  File "tools/perf/scripts/python/exported-sql-viewer.py", line 3871, in CopyTableCellsToClipboard
    val = model.headerData(col, Qt.Horizontal)
TypeError: headerData() missing 1 required positional argument: 'role'

Fixes: 96c43b9a7ab3b ("perf scripts python: exported-sql-viewer.py: Add copy to clipboard")
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: stable@vger.kernel.org
Link: http://lore.kernel.org/lkml/20210521092053.25683-2-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/scripts/python/exported-sql-viewer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index 7daa8bb70a5a..b5078d65704e 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -3868,7 +3868,7 @@ def CopyTableCellsToClipboard(view, as_csv=False, with_hdr=False):
 	if with_hdr:
 		model = indexes[0].model()
 		for col in range(min_col, max_col + 1):
-			val = model.headerData(col, Qt.Horizontal)
+			val = model.headerData(col, Qt.Horizontal, Qt.DisplayRole)
 			if as_csv:
 				text += sep + ToCSValue(val)
 				sep = ","
-- 
cgit v1.2.3


From fd931b2e234a7cc451a7bbb1965d6ce623189158 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 21 May 2021 12:20:52 +0300
Subject: perf scripts python: exported-sql-viewer.py: Fix Array TypeError

The 'Array' class is present in more than one python standard library.
In some versions of Python 3, the following error occurs:

Traceback (most recent call last):
  File "tools/perf/scripts/python/exported-sql-viewer.py", line 4702, in <lambda>
    reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda a=None,x=dbid: self.NewBranchView(x), self))
  File "tools/perf/scripts/python/exported-sql-viewer.py", line 4727, in NewBranchView
    BranchWindow(self.glb, event_id, ReportVars(), self)
  File "tools/perf/scripts/python/exported-sql-viewer.py", line 3208, in __init__
    self.model = LookupCreateModel(model_name, lambda: BranchModel(glb, event_id, report_vars.where_clause))
  File "tools/perf/scripts/python/exported-sql-viewer.py", line 343, in LookupCreateModel
    model = create_fn()
  File "tools/perf/scripts/python/exported-sql-viewer.py", line 3208, in <lambda>
    self.model = LookupCreateModel(model_name, lambda: BranchModel(glb, event_id, report_vars.where_clause))
  File "tools/perf/scripts/python/exported-sql-viewer.py", line 3124, in __init__
    self.fetcher = SQLFetcher(glb, sql, prep, self.AddSample)
  File "tools/perf/scripts/python/exported-sql-viewer.py", line 2658, in __init__
    self.buffer = Array(c_char, self.buffer_size, lock=False)
TypeError: abstract class

This apparently happens because Python can be inconsistent about which
class of the name 'Array' gets imported. Fix by importing explicitly by
name so that only the desired 'Array' gets imported.

Fixes: 8392b74b575c3 ("perf scripts python: exported-sql-viewer.py: Add ability to display all the database tables")
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: stable@vger.kernel.org
Link: http://lore.kernel.org/lkml/20210521092053.25683-3-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/scripts/python/exported-sql-viewer.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index b5078d65704e..4a63843f623c 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -125,8 +125,9 @@ if pyside_version_1:
 	from PySide.QtGui import *
 	from PySide.QtSql import *
 
-from decimal import *
-from ctypes import *
+from decimal import Decimal, ROUND_HALF_UP
+from ctypes import CDLL, Structure, create_string_buffer, addressof, sizeof, \
+		   c_void_p, c_bool, c_byte, c_char, c_int, c_uint, c_longlong, c_ulonglong
 from multiprocessing import Process, Array, Value, Event
 
 # xrange is range in Python3
-- 
cgit v1.2.3


From f56299a9c998e0bfbd4ab07cafe9eb8444512448 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 21 May 2021 12:20:53 +0300
Subject: perf scripts python: exported-sql-viewer.py: Fix warning display

Deprecation warnings are useful only for the developer, not an end user.
Display warnings only when requested using the python -W option. This
stops the display of warnings like:

 tools/perf/scripts/python/exported-sql-viewer.py:5102: DeprecationWarning:
         an integer is required (got type PySide2.QtCore.Qt.AlignmentFlag).
         Implicit conversion to integers using __int__ is deprecated, and
         may be removed in a future version of Python.
    err = app.exec_()

Since the warning can be fixed only in PySide2, we must wait for it to
be finally fixed there.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: stable@vger.kernel.org      # v5.3+
Link: http://lore.kernel.org/lkml/20210521092053.25683-4-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/scripts/python/exported-sql-viewer.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index 4a63843f623c..711d4f9f5645 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -91,6 +91,11 @@
 from __future__ import print_function
 
 import sys
+# Only change warnings if the python -W option was not used
+if not sys.warnoptions:
+	import warnings
+	# PySide2 causes deprecation warnings, ignore them.
+	warnings.filterwarnings("ignore", category=DeprecationWarning)
 import argparse
 import weakref
 import threading
-- 
cgit v1.2.3


From f42907e8a4515635615a6ffd44242454ef843c04 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 21 May 2021 20:51:27 +0300
Subject: perf script: Add missing PERF_IP_FLAG_CHARS for VM-Entry and VM-Exit

Add 'g' (guest) for VM-Entry and 'h' (host) for VM-Exit.

Fixes: c025d46cd932c ("perf script: Add branch types for VM-Entry and VM-Exit")
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lore.kernel.org/lkml/20210521175127.27264-1-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-intel-pt.txt | 6 +++---
 tools/perf/Documentation/perf-script.txt   | 7 ++++---
 tools/perf/util/event.h                    | 2 +-
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt
index 1dcec73c910c..bcf3eca5afbe 100644
--- a/tools/perf/Documentation/perf-intel-pt.txt
+++ b/tools/perf/Documentation/perf-intel-pt.txt
@@ -108,9 +108,9 @@ displayed as follows:
 
 	perf script --itrace=ibxwpe -F+flags
 
-The flags are "bcrosyiABEx" which stand for branch, call, return, conditional,
-system, asynchronous, interrupt, transaction abort, trace begin, trace end, and
-in transaction, respectively.
+The flags are "bcrosyiABExgh" which stand for branch, call, return, conditional,
+system, asynchronous, interrupt, transaction abort, trace begin, trace end,
+in transaction, VM-entry, and VM-exit respectively.
 
 perf script also supports higher level ways to dump instruction traces:
 
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 5b8b61075039..48a5f5b26dd4 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -183,14 +183,15 @@ OPTIONS
 	At this point usage is displayed, and perf-script exits.
 
 	The flags field is synthesized and may have a value when Instruction
-	Trace decoding. The flags are "bcrosyiABEx" which stand for branch,
+	Trace decoding. The flags are "bcrosyiABExgh" which stand for branch,
 	call, return, conditional, system, asynchronous, interrupt,
-	transaction abort, trace begin, trace end, and in transaction,
+	transaction abort, trace begin, trace end, in transaction, VM-Entry, and VM-Exit
 	respectively. Known combinations of flags are printed more nicely e.g.
 	"call" for "bc", "return" for "br", "jcc" for "bo", "jmp" for "b",
 	"int" for "bci", "iret" for "bri", "syscall" for "bcs", "sysret" for "brs",
 	"async" for "by", "hw int" for "bcyi", "tx abrt" for "bA", "tr strt" for "bB",
-	"tr end" for "bE". However the "x" flag will be display separately in those
+	"tr end" for "bE", "vmentry" for "bcg", "vmexit" for "bch".
+	However the "x" flag will be displayed separately in those
 	cases e.g. "jcc     (x)" for a condition branch within a transaction.
 
 	The callindent field is synthesized and may have a value when
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 8a62fb39e365..19ad64f2bd83 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -100,7 +100,7 @@ enum {
 	PERF_IP_FLAG_VMEXIT		= 1ULL << 12,
 };
 
-#define PERF_IP_FLAG_CHARS "bcrosyiABEx"
+#define PERF_IP_FLAG_CHARS "bcrosyiABExgh"
 
 #define PERF_BRANCH_MASK		(\
 	PERF_IP_FLAG_BRANCH		|\
-- 
cgit v1.2.3


From f8b61bd20479c094fb421da42fef6b4ff22a589e Mon Sep 17 00:00:00 2001
From: Song Liu <song@kernel.org>
Date: Tue, 11 May 2021 23:51:16 -0700
Subject: perf stat: Skip evlist__[enable|disable] when all events uses BPF

When all events of a perf-stat session use BPF, it is not necessary to
call evlist__enable() and evlist__disable(). Skip them when
all_counters_use_bpf is true.

Signed-off-by: Song Liu <song@kernel.org>
Reported-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 13 ++++++++++---
 tools/perf/util/evlist.c  |  3 ---
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 5a830ae09418..f9f74a514315 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -572,7 +572,8 @@ static int enable_counters(void)
 	 * - we have initial delay configured
 	 */
 	if (!target__none(&target) || stat_config.initial_delay) {
-		evlist__enable(evsel_list);
+		if (!all_counters_use_bpf)
+			evlist__enable(evsel_list);
 		if (stat_config.initial_delay > 0)
 			pr_info(EVLIST_ENABLED_MSG);
 	}
@@ -581,13 +582,19 @@ static int enable_counters(void)
 
 static void disable_counters(void)
 {
+	struct evsel *counter;
+
 	/*
 	 * If we don't have tracee (attaching to task or cpu), counters may
 	 * still be running. To get accurate group ratios, we must stop groups
 	 * from counting before reading their constituent counters.
 	 */
-	if (!target__none(&target))
-		evlist__disable(evsel_list);
+	if (!target__none(&target)) {
+		evlist__for_each_entry(evsel_list, counter)
+			bpf_counter__disable(counter);
+		if (!all_counters_use_bpf)
+			evlist__disable(evsel_list);
+	}
 }
 
 static volatile int workload_exec_errno;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 6e5c41528c7d..6ea3e677dc1e 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -425,9 +425,6 @@ static void __evlist__disable(struct evlist *evlist, char *evsel_name)
 	if (affinity__setup(&affinity) < 0)
 		return;
 
-	evlist__for_each_entry(evlist, pos)
-		bpf_counter__disable(pos);
-
 	/* Disable 'immediate' events last */
 	for (imm = 0; imm <= 1; imm++) {
 		evlist__for_each_cpu(evlist, i, cpu) {
-- 
cgit v1.2.3


From 430bfe0576120b52cf7f62116bc7549180da4706 Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Thu, 20 May 2021 10:43:18 +0200
Subject: net: ethernet: mtk_eth_soc: Fix DIM support for MT7628/88

When updating to latest mainline for some testing on the GARDENA smart
gateway based on the MT7628, I noticed that ethernet does not work any
more. Commit e9229ffd550b ("net: ethernet: mtk_eth_soc: implement
dynamic interrupt moderation") introduced this problem, as it missed the
RX_DIM & TX_DIM configuration for this SoC variant. This patch fixes
this by calling mtk_dim_rx() & mtk_dim_tx() in this case as well.

Signed-off-by: Stefan Roese <sr@denx.de>
Fixes: e9229ffd550b ("net: ethernet: mtk_eth_soc: implement dynamic interrupt moderation")
Cc: Felix Fietkau <nbd@nbd.name>
Cc: John Crispin <john@phrozen.org>
Cc: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Cc: Reto Schneider <code@reto-schneider.ch>
Cc: Reto Schneider <reto.schneider@husqvarnagroup.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index ed4eacef17ce..d6cc06ee0caa 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -2423,7 +2423,8 @@ static void mtk_dim_rx(struct work_struct *work)
 	val |= cur << MTK_PDMA_DELAY_RX_PINT_SHIFT;
 
 	mtk_w32(eth, val, MTK_PDMA_DELAY_INT);
-	mtk_w32(eth, val, MTK_QDMA_DELAY_INT);
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+		mtk_w32(eth, val, MTK_QDMA_DELAY_INT);
 
 	spin_unlock_bh(&eth->dim_lock);
 
@@ -2452,7 +2453,8 @@ static void mtk_dim_tx(struct work_struct *work)
 	val |= cur << MTK_PDMA_DELAY_TX_PINT_SHIFT;
 
 	mtk_w32(eth, val, MTK_PDMA_DELAY_INT);
-	mtk_w32(eth, val, MTK_QDMA_DELAY_INT);
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+		mtk_w32(eth, val, MTK_QDMA_DELAY_INT);
 
 	spin_unlock_bh(&eth->dim_lock);
 
@@ -2480,6 +2482,10 @@ static int mtk_hw_init(struct mtk_eth *eth)
 			goto err_disable_pm;
 		}
 
+		/* set interrupt delays based on current Net DIM sample */
+		mtk_dim_rx(&eth->rx_dim.work);
+		mtk_dim_tx(&eth->tx_dim.work);
+
 		/* disable delay and normal interrupt */
 		mtk_tx_irq_disable(eth, ~0);
 		mtk_rx_irq_disable(eth, ~0);
-- 
cgit v1.2.3


From e5bfaed7508fd34ae95a79d1eb76c38ecc82c947 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Fri, 21 May 2021 15:28:56 +0200
Subject: MAINTAINERS: s390/net: add netdev list

Discussions for network-related code should include the netdev list.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index c1cb2e38ae2e..88722efd94a1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15944,6 +15944,7 @@ S390 IUCV NETWORK LAYER
 M:	Julian Wiedmann <jwi@linux.ibm.com>
 M:	Karsten Graul <kgraul@linux.ibm.com>
 L:	linux-s390@vger.kernel.org
+L:	netdev@vger.kernel.org
 S:	Supported
 W:	http://www.ibm.com/developerworks/linux/linux390/
 F:	drivers/s390/net/*iucv*
@@ -15954,6 +15955,7 @@ S390 NETWORK DRIVERS
 M:	Julian Wiedmann <jwi@linux.ibm.com>
 M:	Karsten Graul <kgraul@linux.ibm.com>
 L:	linux-s390@vger.kernel.org
+L:	netdev@vger.kernel.org
 S:	Supported
 W:	http://www.ibm.com/developerworks/linux/linux390/
 F:	drivers/s390/net/
-- 
cgit v1.2.3


From fc516d3a6aa2c6ffe27d0da8818d13839e023e7e Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 21 May 2021 10:46:14 -0700
Subject: net: dsa: bcm_sf2: Fix bcm_sf2_reg_rgmii_cntrl() call for non-RGMII
 port
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We cannot call bcm_sf2_reg_rgmii_cntrl() for a port that is not RGMII,
yet we do that in bcm_sf2_sw_mac_link_up() irrespective of the port's
interface. Move that read until we have properly qualified the PHY
interface mode. This avoids triggering a warning on 7278 platforms that
have GMII ports.

Fixes: 55cfeb396965 ("net: dsa: bcm_sf2: add function finding RGMII register")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/bcm_sf2.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 9150038b60cb..3b018fcf4412 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -821,11 +821,9 @@ static void bcm_sf2_sw_mac_link_up(struct dsa_switch *ds, int port,
 	bcm_sf2_sw_mac_link_set(ds, port, interface, true);
 
 	if (port != core_readl(priv, CORE_IMP0_PRT_ID)) {
-		u32 reg_rgmii_ctrl;
+		u32 reg_rgmii_ctrl = 0;
 		u32 reg, offset;
 
-		reg_rgmii_ctrl = bcm_sf2_reg_rgmii_cntrl(priv, port);
-
 		if (priv->type == BCM4908_DEVICE_ID ||
 		    priv->type == BCM7445_DEVICE_ID)
 			offset = CORE_STS_OVERRIDE_GMIIP_PORT(port);
@@ -836,6 +834,7 @@ static void bcm_sf2_sw_mac_link_up(struct dsa_switch *ds, int port,
 		    interface == PHY_INTERFACE_MODE_RGMII_TXID ||
 		    interface == PHY_INTERFACE_MODE_MII ||
 		    interface == PHY_INTERFACE_MODE_REVMII) {
+			reg_rgmii_ctrl = bcm_sf2_reg_rgmii_cntrl(priv, port);
 			reg = reg_readl(priv, reg_rgmii_ctrl);
 			reg &= ~(RX_PAUSE_EN | TX_PAUSE_EN);
 
-- 
cgit v1.2.3


From c7718ee96dbc2f9c5fc3b578abdf296dd44b9c20 Mon Sep 17 00:00:00 2001
From: Aleksander Jan Bajkowski <olek2@wp.pl>
Date: Fri, 21 May 2021 16:45:58 +0200
Subject: net: lantiq: fix memory corruption in RX ring

In a situation where memory allocation or dma mapping fails, an
invalid address is programmed into the descriptor. This can lead
to memory corruption. If the memory allocation fails, DMA should
reuse the previous skb and mapping and drop the packet. This patch
also increments rx drop counter.

Fixes: fe1a56420cf2 ("net: lantiq: Add Lantiq / Intel VRX200 Ethernet driver ")
Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/lantiq_xrx200.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c
index 41c2ad210bc9..36dc3e5f6218 100644
--- a/drivers/net/ethernet/lantiq_xrx200.c
+++ b/drivers/net/ethernet/lantiq_xrx200.c
@@ -154,6 +154,7 @@ static int xrx200_close(struct net_device *net_dev)
 
 static int xrx200_alloc_skb(struct xrx200_chan *ch)
 {
+	dma_addr_t mapping;
 	int ret = 0;
 
 	ch->skb[ch->dma.desc] = netdev_alloc_skb_ip_align(ch->priv->net_dev,
@@ -163,16 +164,17 @@ static int xrx200_alloc_skb(struct xrx200_chan *ch)
 		goto skip;
 	}
 
-	ch->dma.desc_base[ch->dma.desc].addr = dma_map_single(ch->priv->dev,
-			ch->skb[ch->dma.desc]->data, XRX200_DMA_DATA_LEN,
-			DMA_FROM_DEVICE);
-	if (unlikely(dma_mapping_error(ch->priv->dev,
-				       ch->dma.desc_base[ch->dma.desc].addr))) {
+	mapping = dma_map_single(ch->priv->dev, ch->skb[ch->dma.desc]->data,
+				 XRX200_DMA_DATA_LEN, DMA_FROM_DEVICE);
+	if (unlikely(dma_mapping_error(ch->priv->dev, mapping))) {
 		dev_kfree_skb_any(ch->skb[ch->dma.desc]);
 		ret = -ENOMEM;
 		goto skip;
 	}
 
+	ch->dma.desc_base[ch->dma.desc].addr = mapping;
+	/* Make sure the address is written before we give it to HW */
+	wmb();
 skip:
 	ch->dma.desc_base[ch->dma.desc].ctl =
 		LTQ_DMA_OWN | LTQ_DMA_RX_OFFSET(NET_IP_ALIGN) |
@@ -196,6 +198,8 @@ static int xrx200_hw_receive(struct xrx200_chan *ch)
 	ch->dma.desc %= LTQ_DESC_NUM;
 
 	if (ret) {
+		ch->skb[ch->dma.desc] = skb;
+		net_dev->stats.rx_dropped++;
 		netdev_err(net_dev, "failed to allocate new rx buffer\n");
 		return ret;
 	}
-- 
cgit v1.2.3


From 29bf1993fdba17703a836cf098712cf15f96706d Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Sat, 22 May 2021 00:01:00 +0300
Subject: MAINTAINERS: remove Ioana Radulescu from dpaa2-eth

Remove Ioana Radulescu from dpaa2-eth since she is no longer working on
the DPAA2 set of drivers.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 88722efd94a1..251111e5da53 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5569,7 +5569,6 @@ F:	drivers/soc/fsl/dpio
 
 DPAA2 ETHERNET DRIVER
 M:	Ioana Ciornei <ioana.ciornei@nxp.com>
-M:	Ioana Radulescu <ruxandra.radulescu@nxp.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	Documentation/networking/device_drivers/ethernet/freescale/dpaa2/ethernet-driver.rst
-- 
cgit v1.2.3


From aced3ce57cd37b5ca332bcacd370d01f5a8c5371 Mon Sep 17 00:00:00 2001
From: Rao Shoaib <rao.shoaib@oracle.com>
Date: Fri, 21 May 2021 11:08:06 -0700
Subject: RDS tcp loopback connection can hang

When TCP is used as transport and a program on the
system connects to RDS port 16385, connection is
accepted but denied per the rules of RDS. However,
RDS connections object is left in the list. Next
loopback connection will select that connection
object as it is at the head of list. The connection
attempt will hang as the connection object is set
to connect over TCP which is not allowed

The issue can be reproduced easily, use rds-ping
to ping a local IP address. After that use any
program like ncat to connect to the same IP
address and port 16385. This will hang so ctrl-c out.
Now try rds-ping, it will hang.

To fix the issue this patch adds checks to disallow
the connection object creation and destroys the
connection object.

Signed-off-by: Rao Shoaib <rao.shoaib@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/connection.c | 23 +++++++++++++++++------
 net/rds/tcp.c        |  4 ++--
 net/rds/tcp.h        |  3 ++-
 net/rds/tcp_listen.c |  6 ++++++
 4 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/net/rds/connection.c b/net/rds/connection.c
index f2fcab182095..a3bc4b54d491 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -240,12 +240,23 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 	if (loop_trans) {
 		rds_trans_put(loop_trans);
 		conn->c_loopback = 1;
-		if (is_outgoing && trans->t_prefer_loopback) {
-			/* "outgoing" connection - and the transport
-			 * says it wants the connection handled by the
-			 * loopback transport. This is what TCP does.
-			 */
-			trans = &rds_loop_transport;
+		if (trans->t_prefer_loopback) {
+			if (likely(is_outgoing)) {
+				/* "outgoing" connection to local address.
+				 * Protocol says it wants the connection
+				 * handled by the loopback transport.
+				 * This is what TCP does.
+				 */
+				trans = &rds_loop_transport;
+			} else {
+				/* No transport currently in use
+				 * should end up here, but if it
+				 * does, reset/destroy the connection.
+				 */
+				kmem_cache_free(rds_conn_slab, conn);
+				conn = ERR_PTR(-EOPNOTSUPP);
+				goto out;
+			}
 		}
 	}
 
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 43db0eca911f..abf19c0e3ba0 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -313,8 +313,8 @@ out:
 }
 #endif
 
-static int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr,
-			       __u32 scope_id)
+int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr,
+			__u32 scope_id)
 {
 	struct net_device *dev = NULL;
 #if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index bad9cf49d565..dc8d745d6857 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -59,7 +59,8 @@ u32 rds_tcp_snd_una(struct rds_tcp_connection *tc);
 u64 rds_tcp_map_seq(struct rds_tcp_connection *tc, u32 seq);
 extern struct rds_transport rds_tcp_transport;
 void rds_tcp_accept_work(struct sock *sk);
-
+int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr,
+			__u32 scope_id);
 /* tcp_connect.c */
 int rds_tcp_conn_path_connect(struct rds_conn_path *cp);
 void rds_tcp_conn_path_shutdown(struct rds_conn_path *conn);
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 101cf14215a0..09cadd556d1e 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -167,6 +167,12 @@ int rds_tcp_accept_one(struct socket *sock)
 	}
 #endif
 
+	if (!rds_tcp_laddr_check(sock_net(sock->sk), peer_addr, dev_if)) {
+		/* local address connection is only allowed via loopback */
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
 	conn = rds_conn_create(sock_net(sock->sk),
 			       my_addr, peer_addr,
 			       &rds_tcp_transport, 0, GFP_KERNEL, dev_if);
-- 
cgit v1.2.3


From 8f03eeb6e0a0a0b8d617ee0a4bce729e47130036 Mon Sep 17 00:00:00 2001
From: Íñigo Huguet <ihuguet@redhat.com>
Date: Fri, 21 May 2021 16:38:35 +0200
Subject: net:sfc: fix non-freed irq in legacy irq mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SFC driver can be configured via modparam to work using MSI-X, MSI or
legacy IRQ interrupts. In the last one, the interrupt was not properly
released on module remove.

It was not freed because the flag irqs_hooked was not set during
initialization in the case of using legacy IRQ.

Example of (trimmed) trace during module remove without this fix:

remove_proc_entry: removing non-empty directory 'irq/125', leaking at least '0000:3b:00.1'
WARNING: CPU: 39 PID: 3658 at fs/proc/generic.c:715 remove_proc_entry+0x15c/0x170
...trimmed...
Call Trace:
 unregister_irq_proc+0xe3/0x100
 free_desc+0x29/0x70
 irq_free_descs+0x47/0x70
 mp_unmap_irq+0x58/0x60
 acpi_unregister_gsi_ioapic+0x2a/0x40
 acpi_pci_irq_disable+0x78/0xb0
 pci_disable_device+0xd1/0x100
 efx_pci_remove+0xa1/0x1e0 [sfc]
 pci_device_remove+0x38/0xa0
 __device_release_driver+0x177/0x230
 driver_detach+0xcb/0x110
 bus_remove_driver+0x58/0xd0
 pci_unregister_driver+0x2a/0xb0
 efx_exit_module+0x24/0xf40 [sfc]
 __do_sys_delete_module.constprop.0+0x171/0x280
 ? exit_to_user_mode_prepare+0x83/0x1d0
 do_syscall_64+0x3d/0x80
 entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x7f9f9385800b
...trimmed...

Signed-off-by: Íñigo Huguet <ihuguet@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/nic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c
index d1e908846f5d..22fbb0ae77fb 100644
--- a/drivers/net/ethernet/sfc/nic.c
+++ b/drivers/net/ethernet/sfc/nic.c
@@ -90,6 +90,7 @@ int efx_nic_init_interrupt(struct efx_nic *efx)
 				  efx->pci_dev->irq);
 			goto fail1;
 		}
+		efx->irqs_hooked = true;
 		return 0;
 	}
 
-- 
cgit v1.2.3


From e29f011e8fc04b2cdc742a2b9bbfa1b62518381a Mon Sep 17 00:00:00 2001
From: Francesco Ruggeri <fruggeri@arista.com>
Date: Fri, 21 May 2021 13:21:14 -0700
Subject: ipv6: record frag_max_size in atomic fragments in input path

Commit dbd1759e6a9c ("ipv6: on reassembly, record frag_max_size")
filled the frag_max_size field in IP6CB in the input path.
The field should also be filled in case of atomic fragments.

Fixes: dbd1759e6a9c ('ipv6: on reassembly, record frag_max_size')
Signed-off-by: Francesco Ruggeri <fruggeri@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/reassembly.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 47a0dc46cbdb..28e44782c94d 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -343,7 +343,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 	hdr = ipv6_hdr(skb);
 	fhdr = (struct frag_hdr *)skb_transport_header(skb);
 
-	if (!(fhdr->frag_off & htons(0xFFF9))) {
+	if (!(fhdr->frag_off & htons(IP6_OFFSET | IP6_MF))) {
 		/* It is not a fragmented frame */
 		skb->transport_header += sizeof(struct frag_hdr);
 		__IP6_INC_STATS(net,
@@ -351,6 +351,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 
 		IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
 		IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
+		IP6CB(skb)->frag_max_size = ntohs(hdr->payload_len) +
+					    sizeof(struct ipv6hdr);
 		return 1;
 	}
 
-- 
cgit v1.2.3


From f9f74dc218c3cfdf0b7f9a95ddae81a081bdb79d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 22 May 2021 17:41:43 -0700
Subject: mm/shuffle: fix section mismatch warning

clang sometimes decides not to inline shuffle_zone(), but it calls a
__meminit function.  Without the extra __meminit annotation we get this
warning:

  WARNING: modpost: vmlinux.o(.text+0x2a86d4): Section mismatch in reference from the function shuffle_zone() to the function .meminit.text:__shuffle_zone()
  The function shuffle_zone() references
  the function __meminit __shuffle_zone().
  This is often because shuffle_zone lacks a __meminit
  annotation or the annotation of __shuffle_zone is wrong.

shuffle_free_memory() did not show the same problem in my tests, but it
could happen in theory as well, so mark both as __meminit.

Link: https://lkml.kernel.org/r/20210514135952.2928094-1-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/shuffle.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/shuffle.h b/mm/shuffle.h
index 71b784f0b7c3..cec62984f7d3 100644
--- a/mm/shuffle.h
+++ b/mm/shuffle.h
@@ -10,7 +10,7 @@
 DECLARE_STATIC_KEY_FALSE(page_alloc_shuffle_key);
 extern void __shuffle_free_memory(pg_data_t *pgdat);
 extern bool shuffle_pick_tail(void);
-static inline void shuffle_free_memory(pg_data_t *pgdat)
+static inline void __meminit shuffle_free_memory(pg_data_t *pgdat)
 {
 	if (!static_branch_unlikely(&page_alloc_shuffle_key))
 		return;
@@ -18,7 +18,7 @@ static inline void shuffle_free_memory(pg_data_t *pgdat)
 }
 
 extern void __shuffle_zone(struct zone *z);
-static inline void shuffle_zone(struct zone *z)
+static inline void __meminit shuffle_zone(struct zone *z)
 {
 	if (!static_branch_unlikely(&page_alloc_shuffle_key))
 		return;
-- 
cgit v1.2.3


From f10628d2f613195132532e0fbda439eeed8d12a2 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Sat, 22 May 2021 17:41:46 -0700
Subject: Revert "mm/gup: check page posion status for coredump."

While reviewing [1] I came across commit d3378e86d182 ("mm/gup: check
page posion status for coredump.") and noticed that this patch is broken
in two ways.  First it doesn't really prevent hwpoison pages from being
dumped because hwpoison pages can be marked asynchornously at any time
after the check.  Secondly, and more importantly, the patch introduces a
ref count leak because get_dump_page takes a reference on the page which
is not released.

It also seems that the patch was merged incorrectly because there were
follow up changes not included as well as discussions on how to address
the underlying problem [2]

Therefore revert the original patch.

Link: http://lkml.kernel.org/r/20210429122519.15183-4-david@redhat.com [1]
Link: http://lkml.kernel.org/r/57ac524c-b49a-99ec-c1e4-ef5027bfb61b@redhat.com [2]
Link: https://lkml.kernel.org/r/20210505135407.31590-1-mhocko@kernel.org
Fixes: d3378e86d182 ("mm/gup: check page posion status for coredump.")
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Aili Yao <yaoaili@kingsoft.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/gup.c      |  4 ----
 mm/internal.h | 20 --------------------
 2 files changed, 24 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index 0697134b6a12..3ded6a5f26b2 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1593,10 +1593,6 @@ struct page *get_dump_page(unsigned long addr)
 				      FOLL_FORCE | FOLL_DUMP | FOLL_GET);
 	if (locked)
 		mmap_read_unlock(mm);
-
-	if (ret == 1 && is_page_poisoned(page))
-		return NULL;
-
 	return (ret == 1) ? page : NULL;
 }
 #endif /* CONFIG_ELF_CORE */
diff --git a/mm/internal.h b/mm/internal.h
index 54bd0dc2c23c..2f1182948aa6 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -96,26 +96,6 @@ static inline void set_page_refcounted(struct page *page)
 	set_page_count(page, 1);
 }
 
-/*
- * When kernel touch the user page, the user page may be have been marked
- * poison but still mapped in user space, if without this page, the kernel
- * can guarantee the data integrity and operation success, the kernel is
- * better to check the posion status and avoid touching it, be good not to
- * panic, coredump for process fatal signal is a sample case matching this
- * scenario. Or if kernel can't guarantee the data integrity, it's better
- * not to call this function, let kernel touch the poison page and get to
- * panic.
- */
-static inline bool is_page_poisoned(struct page *page)
-{
-	if (PageHWPoison(page))
-		return true;
-	else if (PageHuge(page) && PageHWPoison(compound_head(page)))
-		return true;
-
-	return false;
-}
-
 extern unsigned long highest_memmap_pfn;
 
 /*
-- 
cgit v1.2.3


From a11ddb37bf367e6b5239b95ca759e5389bb46048 Mon Sep 17 00:00:00 2001
From: Varad Gautam <varad.gautam@suse.com>
Date: Sat, 22 May 2021 17:41:49 -0700
Subject: ipc/mqueue, msg, sem: avoid relying on a stack reference past its
 expiry

do_mq_timedreceive calls wq_sleep with a stack local address.  The
sender (do_mq_timedsend) uses this address to later call pipelined_send.

This leads to a very hard to trigger race where a do_mq_timedreceive
call might return and leave do_mq_timedsend to rely on an invalid
address, causing the following crash:

  RIP: 0010:wake_q_add_safe+0x13/0x60
  Call Trace:
   __x64_sys_mq_timedsend+0x2a9/0x490
   do_syscall_64+0x80/0x680
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
  RIP: 0033:0x7f5928e40343

The race occurs as:

1. do_mq_timedreceive calls wq_sleep with the address of `struct
   ext_wait_queue` on function stack (aliased as `ewq_addr` here) - it
   holds a valid `struct ext_wait_queue *` as long as the stack has not
   been overwritten.

2. `ewq_addr` gets added to info->e_wait_q[RECV].list in wq_add, and
   do_mq_timedsend receives it via wq_get_first_waiter(info, RECV) to call
   __pipelined_op.

3. Sender calls __pipelined_op::smp_store_release(&this->state,
   STATE_READY).  Here is where the race window begins.  (`this` is
   `ewq_addr`.)

4. If the receiver wakes up now in do_mq_timedreceive::wq_sleep, it
   will see `state == STATE_READY` and break.

5. do_mq_timedreceive returns, and `ewq_addr` is no longer guaranteed
   to be a `struct ext_wait_queue *` since it was on do_mq_timedreceive's
   stack.  (Although the address may not get overwritten until another
   function happens to touch it, which means it can persist around for an
   indefinite time.)

6. do_mq_timedsend::__pipelined_op() still believes `ewq_addr` is a
   `struct ext_wait_queue *`, and uses it to find a task_struct to pass to
   the wake_q_add_safe call.  In the lucky case where nothing has
   overwritten `ewq_addr` yet, `ewq_addr->task` is the right task_struct.
   In the unlucky case, __pipelined_op::wake_q_add_safe gets handed a
   bogus address as the receiver's task_struct causing the crash.

do_mq_timedsend::__pipelined_op() should not dereference `this` after
setting STATE_READY, as the receiver counterpart is now free to return.
Change __pipelined_op to call wake_q_add_safe on the receiver's
task_struct returned by get_task_struct, instead of dereferencing `this`
which sits on the receiver's stack.

As Manfred pointed out, the race potentially also exists in
ipc/msg.c::expunge_all and ipc/sem.c::wake_up_sem_queue_prepare.  Fix
those in the same way.

Link: https://lkml.kernel.org/r/20210510102950.12551-1-varad.gautam@suse.com
Fixes: c5b2cbdbdac563 ("ipc/mqueue.c: update/document memory barriers")
Fixes: 8116b54e7e23ef ("ipc/sem.c: document and update memory barriers")
Fixes: 0d97a82ba830d8 ("ipc/msg.c: update and document memory barriers")
Signed-off-by: Varad Gautam <varad.gautam@suse.com>
Reported-by: Matthias von Faber <matthias.vonfaber@aox-tech.de>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Acked-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/mqueue.c | 6 ++++--
 ipc/msg.c    | 6 ++++--
 ipc/sem.c    | 6 ++++--
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 8031464ed4ae..4e4e61111500 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -1004,12 +1004,14 @@ static inline void __pipelined_op(struct wake_q_head *wake_q,
 				  struct mqueue_inode_info *info,
 				  struct ext_wait_queue *this)
 {
+	struct task_struct *task;
+
 	list_del(&this->list);
-	get_task_struct(this->task);
+	task = get_task_struct(this->task);
 
 	/* see MQ_BARRIER for purpose/pairing */
 	smp_store_release(&this->state, STATE_READY);
-	wake_q_add_safe(wake_q, this->task);
+	wake_q_add_safe(wake_q, task);
 }
 
 /* pipelined_send() - send a message directly to the task waiting in
diff --git a/ipc/msg.c b/ipc/msg.c
index acd1bc7af55a..6e6c8e0c9380 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -251,11 +251,13 @@ static void expunge_all(struct msg_queue *msq, int res,
 	struct msg_receiver *msr, *t;
 
 	list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
-		get_task_struct(msr->r_tsk);
+		struct task_struct *r_tsk;
+
+		r_tsk = get_task_struct(msr->r_tsk);
 
 		/* see MSG_BARRIER for purpose/pairing */
 		smp_store_release(&msr->r_msg, ERR_PTR(res));
-		wake_q_add_safe(wake_q, msr->r_tsk);
+		wake_q_add_safe(wake_q, r_tsk);
 	}
 }
 
diff --git a/ipc/sem.c b/ipc/sem.c
index e0ec239680cb..bf534c74293e 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -784,12 +784,14 @@ would_block:
 static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error,
 					     struct wake_q_head *wake_q)
 {
-	get_task_struct(q->sleeper);
+	struct task_struct *sleeper;
+
+	sleeper = get_task_struct(q->sleeper);
 
 	/* see SEM_BARRIER_2 for purpose/pairing */
 	smp_store_release(&q->status, error);
 
-	wake_q_add_safe(wake_q, q->sleeper);
+	wake_q_add_safe(wake_q, sleeper);
 }
 
 static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
-- 
cgit v1.2.3


From 4d1cd3b2c5c1c32826454de3a18c6183238d47ed Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Sat, 22 May 2021 17:41:53 -0700
Subject: tools/testing/selftests/exec: fix link error

Fix the link error by adding '-static':

  gcc -Wall  -Wl,-z,max-page-size=0x1000 -pie load_address.c -o /home/yang/linux/tools/testing/selftests/exec/load_address_4096
  /usr/bin/ld: /tmp/ccopEGun.o: relocation R_AARCH64_ADR_PREL_PG_HI21 against symbol `stderr@@GLIBC_2.17' which may bind externally can not be used when making a shared object; recompile with -fPIC
  /usr/bin/ld: /tmp/ccopEGun.o(.text+0x158): unresolvable R_AARCH64_ADR_PREL_PG_HI21 relocation against symbol `stderr@@GLIBC_2.17'
  /usr/bin/ld: final link failed: bad value
  collect2: error: ld returned 1 exit status
  make: *** [Makefile:25: tools/testing/selftests/exec/load_address_4096] Error 1

Link: https://lkml.kernel.org/r/20210514092422.2367367-1-yangyingliang@huawei.com
Fixes: 206e22f01941 ("tools/testing/selftests: add self-test for verifying load alignment")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Cc: Chris Kennelly <ckennelly@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/exec/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index cf69b2fcce59..dd61118df66e 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -28,8 +28,8 @@ $(OUTPUT)/execveat.denatured: $(OUTPUT)/execveat
 	cp $< $@
 	chmod -x $@
 $(OUTPUT)/load_address_4096: load_address.c
-	$(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie $< -o $@
+	$(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie -static $< -o $@
 $(OUTPUT)/load_address_2097152: load_address.c
-	$(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie $< -o $@
+	$(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie -static $< -o $@
 $(OUTPUT)/load_address_16777216: load_address.c
-	$(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie $< -o $@
+	$(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie -static $< -o $@
-- 
cgit v1.2.3


From f70b00496f2a0669fdb19a783e613bdbdedcf901 Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Sat, 22 May 2021 17:41:56 -0700
Subject: kasan: slab: always reset the tag in get_freepointer_safe()

With CONFIG_DEBUG_PAGEALLOC enabled, the kernel should also untag the
object pointer, as done in get_freepointer().

Failing to do so reportedly leads to SLUB freelist corruptions that
manifest as boot-time crashes.

Link: https://lkml.kernel.org/r/20210514072228.534418-1-glider@google.com
Signed-off-by: Alexander Potapenko <glider@google.com>
Cc: Marco Elver <elver@google.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Elliot Berman <eberman@codeaurora.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/slub.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/slub.c b/mm/slub.c
index 438fa8d4c970..3f96e099817a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -301,6 +301,7 @@ static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
 	if (!debug_pagealloc_enabled_static())
 		return get_freepointer(s, object);
 
+	object = kasan_reset_tag(object);
 	freepointer_addr = (unsigned long)object + s->offset;
 	copy_from_kernel_nofault(&p, (void **)freepointer_addr, sizeof(p));
 	return freelist_ptr(s, p, freepointer_addr);
-- 
cgit v1.2.3


From 0f90b88dbcd1143e0f408502eba0af97429c502a Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Sat, 22 May 2021 17:41:59 -0700
Subject: watchdog: reliable handling of timestamps

Commit 9bf3bc949f8a ("watchdog: cleanup handling of false positives")
tried to handle a virtual host stopped by the host a more
straightforward and cleaner way.

But it introduced a risk of false softlockup reports.  The virtual host
might be stopped at any time, for example between
kvm_check_and_clear_guest_paused() and is_softlockup().  As a result,
is_softlockup() might read the updated jiffies and detects a softlockup.

A solution might be to put back kvm_check_and_clear_guest_paused() after
is_softlockup() and detect it.  But it would put back the cycle that
complicates the logic.

In fact, the handling of all the timestamps is not reliable.  The code
does not guarantee when and how many times the timestamps are read.  For
example, "period_ts" might be touched anytime also from NMI and re-read in
is_softlockup().  It works just by chance.

Fix all the problems by making the code even more explicit.

1. Make sure that "now" and "period_ts" timestamps are read only once.
   They might be changed at anytime by NMI or when the virtual guest is
   stopped by the host.  Note that "now" timestamp does this implicitly
   because "jiffies" is marked volatile.

2. "now" time must be read first.  The state of "period_ts" will
   decide whether it will be used or the period will get restarted.

3. kvm_check_and_clear_guest_paused() must be called before reading
   "period_ts".  It touches the variable when the guest was stopped.

As a result, "now" timestamp is used only when the watchdog was not
touched and the guest not stopped in the meantime.  "period_ts" is
restarted in all other situations.

Link: https://lkml.kernel.org/r/YKT55gw+RZfyoFf7@alley
Fixes: 9bf3bc949f8aeefeacea4b ("watchdog: cleanup handling of false positives")
Signed-off-by: Petr Mladek <pmladek@suse.com>
Reported-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/watchdog.c | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 7c397907d0e9..92d3bcc5a5e0 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -302,10 +302,10 @@ void touch_softlockup_watchdog_sync(void)
 	__this_cpu_write(watchdog_report_ts, SOFTLOCKUP_DELAY_REPORT);
 }
 
-static int is_softlockup(unsigned long touch_ts, unsigned long period_ts)
+static int is_softlockup(unsigned long touch_ts,
+			 unsigned long period_ts,
+			 unsigned long now)
 {
-	unsigned long now = get_timestamp();
-
 	if ((watchdog_enabled & SOFT_WATCHDOG_ENABLED) && watchdog_thresh){
 		/* Warn about unreasonable delays. */
 		if (time_after(now, period_ts + get_softlockup_thresh()))
@@ -353,8 +353,7 @@ static int softlockup_fn(void *data)
 /* watchdog kicker functions */
 static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 {
-	unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
-	unsigned long period_ts = __this_cpu_read(watchdog_report_ts);
+	unsigned long touch_ts, period_ts, now;
 	struct pt_regs *regs = get_irq_regs();
 	int duration;
 	int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
@@ -376,12 +375,23 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	/* .. and repeat */
 	hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
 
+	/*
+	 * Read the current timestamp first. It might become invalid anytime
+	 * when a virtual machine is stopped by the host or when the watchog
+	 * is touched from NMI.
+	 */
+	now = get_timestamp();
 	/*
 	 * If a virtual machine is stopped by the host it can look to
-	 * the watchdog like a soft lockup. Check to see if the host
-	 * stopped the vm before we process the timestamps.
+	 * the watchdog like a soft lockup. This function touches the watchdog.
 	 */
 	kvm_check_and_clear_guest_paused();
+	/*
+	 * The stored timestamp is comparable with @now only when not touched.
+	 * It might get touched anytime from NMI. Make sure that is_softlockup()
+	 * uses the same (valid) value.
+	 */
+	period_ts = READ_ONCE(*this_cpu_ptr(&watchdog_report_ts));
 
 	/* Reset the interval when touched by known problematic code. */
 	if (period_ts == SOFTLOCKUP_DELAY_REPORT) {
@@ -398,13 +408,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 		return HRTIMER_RESTART;
 	}
 
-	/* check for a softlockup
-	 * This is done by making sure a high priority task is
-	 * being scheduled.  The task touches the watchdog to
-	 * indicate it is getting cpu time.  If it hasn't then
-	 * this is a good indication some task is hogging the cpu
-	 */
-	duration = is_softlockup(touch_ts, period_ts);
+	/* Check for a softlockup. */
+	touch_ts = __this_cpu_read(watchdog_touch_ts);
+	duration = is_softlockup(touch_ts, period_ts, now);
 	if (unlikely(duration)) {
 		/*
 		 * Prevent multiple soft-lockup reports if one cpu is already
-- 
cgit v1.2.3


From f747e6667ebb2ffb8133486c9cd19800d72b0d98 Mon Sep 17 00:00:00 2001
From: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Date: Sat, 22 May 2021 17:42:02 -0700
Subject: linux/bits.h: fix compilation error with GENMASK

GENMASK() has an input check which uses __builtin_choose_expr() to
enable a compile time sanity check of its inputs if they are known at
compile time.

However, it turns out that __builtin_constant_p() does not always return
a compile time constant [0].  It was thought this problem was fixed with
gcc 4.9 [1], but apparently this is not the case [2].

Switch to use __is_constexpr() instead which always returns a compile time
constant, regardless of its inputs.

Link: https://lore.kernel.org/lkml/42b4342b-aefc-a16a-0d43-9f9c0d63ba7a@rasmusvillemoes.dk [0]
Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=19449 [1]
Link: https://lore.kernel.org/lkml/1ac7bbc2-45d9-26ed-0b33-bf382b8d858b@I-love.SAKURA.ne.jp [2]
Link: https://lkml.kernel.org/r/20210511203716.117010-1-rikard.falkeborn@gmail.com
Signed-off-by: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Yury Norov <yury.norov@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bits.h        |  2 +-
 include/linux/const.h       |  8 ++++++++
 include/linux/minmax.h      | 10 ++--------
 tools/include/linux/bits.h  |  2 +-
 tools/include/linux/const.h |  8 ++++++++
 5 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/include/linux/bits.h b/include/linux/bits.h
index 7f475d59a097..87d112650dfb 100644
--- a/include/linux/bits.h
+++ b/include/linux/bits.h
@@ -22,7 +22,7 @@
 #include <linux/build_bug.h>
 #define GENMASK_INPUT_CHECK(h, l) \
 	(BUILD_BUG_ON_ZERO(__builtin_choose_expr( \
-		__builtin_constant_p((l) > (h)), (l) > (h), 0)))
+		__is_constexpr((l) > (h)), (l) > (h), 0)))
 #else
 /*
  * BUILD_BUG_ON_ZERO is not available in h files included from asm files,
diff --git a/include/linux/const.h b/include/linux/const.h
index 81b8aae5a855..435ddd72d2c4 100644
--- a/include/linux/const.h
+++ b/include/linux/const.h
@@ -3,4 +3,12 @@
 
 #include <vdso/const.h>
 
+/*
+ * This returns a constant expression while determining if an argument is
+ * a constant expression, most importantly without evaluating the argument.
+ * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de>
+ */
+#define __is_constexpr(x) \
+	(sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))
+
 #endif /* _LINUX_CONST_H */
diff --git a/include/linux/minmax.h b/include/linux/minmax.h
index c0f57b0c64d9..5433c08fcc68 100644
--- a/include/linux/minmax.h
+++ b/include/linux/minmax.h
@@ -2,6 +2,8 @@
 #ifndef _LINUX_MINMAX_H
 #define _LINUX_MINMAX_H
 
+#include <linux/const.h>
+
 /*
  * min()/max()/clamp() macros must accomplish three things:
  *
@@ -17,14 +19,6 @@
 #define __typecheck(x, y) \
 	(!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
 
-/*
- * This returns a constant expression while determining if an argument is
- * a constant expression, most importantly without evaluating the argument.
- * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de>
- */
-#define __is_constexpr(x) \
-	(sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))
-
 #define __no_side_effects(x, y) \
 		(__is_constexpr(x) && __is_constexpr(y))
 
diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h
index 7f475d59a097..87d112650dfb 100644
--- a/tools/include/linux/bits.h
+++ b/tools/include/linux/bits.h
@@ -22,7 +22,7 @@
 #include <linux/build_bug.h>
 #define GENMASK_INPUT_CHECK(h, l) \
 	(BUILD_BUG_ON_ZERO(__builtin_choose_expr( \
-		__builtin_constant_p((l) > (h)), (l) > (h), 0)))
+		__is_constexpr((l) > (h)), (l) > (h), 0)))
 #else
 /*
  * BUILD_BUG_ON_ZERO is not available in h files included from asm files,
diff --git a/tools/include/linux/const.h b/tools/include/linux/const.h
index 81b8aae5a855..435ddd72d2c4 100644
--- a/tools/include/linux/const.h
+++ b/tools/include/linux/const.h
@@ -3,4 +3,12 @@
 
 #include <vdso/const.h>
 
+/*
+ * This returns a constant expression while determining if an argument is
+ * a constant expression, most importantly without evaluating the argument.
+ * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de>
+ */
+#define __is_constexpr(x) \
+	(sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))
+
 #endif /* _LINUX_CONST_H */
-- 
cgit v1.2.3


From 43b2ec977ce33b53e2be30999824b584e2be248a Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 22 May 2021 17:42:05 -0700
Subject: proc: remove Alexey from MAINTAINERS

People Cc me and I don't have time.

Link: https://lkml.kernel.org/r/YKarMxHJBIhMHQIh@localhost.localdomain
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index def1640cc294..81e1edeceae4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14735,7 +14735,6 @@ W:	https://wireless.wiki.kernel.org/en/users/Drivers/p54
 F:	drivers/net/wireless/intersil/prism54/
 
 PROC FILESYSTEM
-R:	Alexey Dobriyan <adobriyan@gmail.com>
 L:	linux-kernel@vger.kernel.org
 L:	linux-fsdevel@vger.kernel.org
 S:	Maintained
-- 
cgit v1.2.3


From 1b6d63938a9d868df01d5bc6e2da212133121b8d Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Sat, 22 May 2021 17:42:08 -0700
Subject: lib: kunit: suppress a compilation warning of frame size

lib/bitfield_kunit.c: In function `test_bitfields_constants':
lib/bitfield_kunit.c:93:1: warning: the frame size of 7456 bytes is larger than 2048 bytes [-Wframe-larger-than=]
 }
 ^

As the description of BITFIELD_KUNIT in lib/Kconfig.debug, it "Only useful
for kernel devs running the KUnit test harness, and not intended for
inclusion into a production build".  Therefore, it is not worth modifying
variable 'test_bitfields_constants' to clear this warning.  Just suppress
it.

Link: https://lkml.kernel.org/r/20210518094533.7652-1-thunder.leizhen@huawei.com
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Cc: Shuah Khan <skhan@linuxfoundation.org>
Cc: Vitor Massaru Iha <vitor@massaru.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/Makefile b/lib/Makefile
index e11cfc18b6c0..2cc359ec1fdd 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -348,6 +348,7 @@ obj-$(CONFIG_OBJAGG) += objagg.o
 obj-$(CONFIG_PLDMFW) += pldmfw/
 
 # KUnit tests
+CFLAGS_bitfield_kunit.o := $(call cc-option,-Wframe-larger-than=10240)
 obj-$(CONFIG_BITFIELD_KUNIT) += bitfield_kunit.o
 obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o
 obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o
-- 
cgit v1.2.3


From e32905e57358fdfb82f9de024534f205b3af7dac Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Sat, 22 May 2021 17:42:11 -0700
Subject: userfaultfd: hugetlbfs: fix new flag usage in error path

In commit d6995da31122 ("hugetlb: use page.private for hugetlb specific
page flags") the use of PagePrivate to indicate a reservation count
should be restored at free time was changed to the hugetlb specific flag
HPageRestoreReserve.  Changes to a userfaultfd error path as well as a
VM_BUG_ON() in remove_inode_hugepages() were overlooked.

Users could see incorrect hugetlb reserve counts if they experience an
error with a UFFDIO_COPY operation.  Specifically, this would be the
result of an unlikely copy_huge_page_from_user error.  There is not an
increased chance of hitting the VM_BUG_ON.

Link: https://lkml.kernel.org/r/20210521233952.236434-1-mike.kravetz@oracle.com
Fixes: d6995da31122 ("hugetlb: use page.private for hugetlb specific page flags")
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Mina Almasry <almasry.mina@google.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c |  2 +-
 mm/userfaultfd.c     | 28 ++++++++++++++--------------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 9d9e0097c1d3..55efd3dd04f6 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -529,7 +529,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
 			 * the subpool and global reserve usage count can need
 			 * to be adjusted.
 			 */
-			VM_BUG_ON(PagePrivate(page));
+			VM_BUG_ON(HPageRestoreReserve(page));
 			remove_huge_page(page);
 			freed++;
 			if (!truncate_op) {
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index e14b3820c6a8..63a73e164d55 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -360,38 +360,38 @@ out:
 		 * If a reservation for the page existed in the reservation
 		 * map of a private mapping, the map was modified to indicate
 		 * the reservation was consumed when the page was allocated.
-		 * We clear the PagePrivate flag now so that the global
+		 * We clear the HPageRestoreReserve flag now so that the global
 		 * reserve count will not be incremented in free_huge_page.
 		 * The reservation map will still indicate the reservation
 		 * was consumed and possibly prevent later page allocation.
 		 * This is better than leaking a global reservation.  If no
-		 * reservation existed, it is still safe to clear PagePrivate
-		 * as no adjustments to reservation counts were made during
-		 * allocation.
+		 * reservation existed, it is still safe to clear
+		 * HPageRestoreReserve as no adjustments to reservation counts
+		 * were made during allocation.
 		 *
 		 * The reservation map for shared mappings indicates which
 		 * pages have reservations.  When a huge page is allocated
 		 * for an address with a reservation, no change is made to
-		 * the reserve map.  In this case PagePrivate will be set
-		 * to indicate that the global reservation count should be
+		 * the reserve map.  In this case HPageRestoreReserve will be
+		 * set to indicate that the global reservation count should be
 		 * incremented when the page is freed.  This is the desired
 		 * behavior.  However, when a huge page is allocated for an
 		 * address without a reservation a reservation entry is added
-		 * to the reservation map, and PagePrivate will not be set.
-		 * When the page is freed, the global reserve count will NOT
-		 * be incremented and it will appear as though we have leaked
-		 * reserved page.  In this case, set PagePrivate so that the
-		 * global reserve count will be incremented to match the
-		 * reservation map entry which was created.
+		 * to the reservation map, and HPageRestoreReserve will not be
+		 * set. When the page is freed, the global reserve count will
+		 * NOT be incremented and it will appear as though we have
+		 * leaked reserved page.  In this case, set HPageRestoreReserve
+		 * so that the global reserve count will be incremented to
+		 * match the reservation map entry which was created.
 		 *
 		 * Note that vm_alloc_shared is based on the flags of the vma
 		 * for which the page was originally allocated.  dst_vma could
 		 * be different or NULL on error.
 		 */
 		if (vm_alloc_shared)
-			SetPagePrivate(page);
+			SetHPageRestoreReserve(page);
 		else
-			ClearPagePrivate(page);
+			ClearHPageRestoreReserve(page);
 		put_page(page);
 	}
 	BUG_ON(copied < 0);
-- 
cgit v1.2.3


From c4681547bcce777daf576925a966ffa824edd09d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 23 May 2021 11:42:48 -1000
Subject: Linux 5.13-rc3

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 0ed7e061c8e9..e4468353425a 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 13
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc3
 NAME = Frozen Wasteland
 
 # *DOCUMENTATION*
-- 
cgit v1.2.3


From 1e69abf98921fa27e2064970b614502d85230f9f Mon Sep 17 00:00:00 2001
From: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Date: Fri, 21 May 2021 17:46:54 -0700
Subject: MAINTAINERS: Add entries for CBS, ETF and taprio qdiscs

Add Vinicius Costa Gomes as maintainer for these qdiscs.

These qdiscs are all TSN (Time Sensitive Networking) related.

Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Acked-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 251111e5da53..2cc1cb72bc92 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4138,6 +4138,14 @@ S:	Odd Fixes
 F:	Documentation/devicetree/bindings/arm/cavium-thunder2.txt
 F:	arch/arm64/boot/dts/cavium/thunder2-99xx*
 
+CBS/ETF/TAPRIO QDISCS
+M:	Vinicius Costa Gomes <vinicius.gomes@intel.com>
+S:	Maintained
+L:	netdev@vger.kernel.org
+F:	net/sched/sch_cbs.c
+F:	net/sched/sch_etf.c
+F:	net/sched/sch_taprio.c
+
 CC2520 IEEE-802.15.4 RADIO DRIVER
 M:	Varka Bhadram <varkabhadram@gmail.com>
 L:	linux-wpan@vger.kernel.org
-- 
cgit v1.2.3


From ad79fd2c42f7626bdf6935cd72134c2a5a59ff2d Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Sat, 22 May 2021 09:56:30 +0200
Subject: net: ethernet: mtk_eth_soc: Fix packet statistics support for
 MT7628/88

The MT7628/88 SoC(s) have other (limited) packet counter registers than
currently supported in the mtk_eth_soc driver. This patch adds support
for reading these registers, so that the packet statistics are correctly
updated.

Additionally the defines for the non-MT7628 variant packet counter
registers are added and used in this patch instead of using hard coded
values.

Signed-off-by: Stefan Roese <sr@denx.de>
Fixes: 296c9120752b ("net: ethernet: mediatek: Add MT7628/88 SoC support")
Cc: Felix Fietkau <nbd@nbd.name>
Cc: John Crispin <john@phrozen.org>
Cc: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Cc: Reto Schneider <code@reto-schneider.ch>
Cc: Reto Schneider <reto.schneider@husqvarnagroup.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 67 +++++++++++++++++++----------
 drivers/net/ethernet/mediatek/mtk_eth_soc.h | 24 ++++++++++-
 2 files changed, 66 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index d6cc06ee0caa..64adfd24e134 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -681,32 +681,53 @@ static int mtk_set_mac_address(struct net_device *dev, void *p)
 void mtk_stats_update_mac(struct mtk_mac *mac)
 {
 	struct mtk_hw_stats *hw_stats = mac->hw_stats;
-	unsigned int base = MTK_GDM1_TX_GBCNT;
-	u64 stats;
-
-	base += hw_stats->reg_offset;
+	struct mtk_eth *eth = mac->hw;
 
 	u64_stats_update_begin(&hw_stats->syncp);
 
-	hw_stats->rx_bytes += mtk_r32(mac->hw, base);
-	stats =  mtk_r32(mac->hw, base + 0x04);
-	if (stats)
-		hw_stats->rx_bytes += (stats << 32);
-	hw_stats->rx_packets += mtk_r32(mac->hw, base + 0x08);
-	hw_stats->rx_overflow += mtk_r32(mac->hw, base + 0x10);
-	hw_stats->rx_fcs_errors += mtk_r32(mac->hw, base + 0x14);
-	hw_stats->rx_short_errors += mtk_r32(mac->hw, base + 0x18);
-	hw_stats->rx_long_errors += mtk_r32(mac->hw, base + 0x1c);
-	hw_stats->rx_checksum_errors += mtk_r32(mac->hw, base + 0x20);
-	hw_stats->rx_flow_control_packets +=
-					mtk_r32(mac->hw, base + 0x24);
-	hw_stats->tx_skip += mtk_r32(mac->hw, base + 0x28);
-	hw_stats->tx_collisions += mtk_r32(mac->hw, base + 0x2c);
-	hw_stats->tx_bytes += mtk_r32(mac->hw, base + 0x30);
-	stats =  mtk_r32(mac->hw, base + 0x34);
-	if (stats)
-		hw_stats->tx_bytes += (stats << 32);
-	hw_stats->tx_packets += mtk_r32(mac->hw, base + 0x38);
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
+		hw_stats->tx_packets += mtk_r32(mac->hw, MT7628_SDM_TPCNT);
+		hw_stats->tx_bytes += mtk_r32(mac->hw, MT7628_SDM_TBCNT);
+		hw_stats->rx_packets += mtk_r32(mac->hw, MT7628_SDM_RPCNT);
+		hw_stats->rx_bytes += mtk_r32(mac->hw, MT7628_SDM_RBCNT);
+		hw_stats->rx_checksum_errors +=
+			mtk_r32(mac->hw, MT7628_SDM_CS_ERR);
+	} else {
+		unsigned int offs = hw_stats->reg_offset;
+		u64 stats;
+
+		hw_stats->rx_bytes += mtk_r32(mac->hw,
+					      MTK_GDM1_RX_GBCNT_L + offs);
+		stats = mtk_r32(mac->hw, MTK_GDM1_RX_GBCNT_H + offs);
+		if (stats)
+			hw_stats->rx_bytes += (stats << 32);
+		hw_stats->rx_packets +=
+			mtk_r32(mac->hw, MTK_GDM1_RX_GPCNT + offs);
+		hw_stats->rx_overflow +=
+			mtk_r32(mac->hw, MTK_GDM1_RX_OERCNT + offs);
+		hw_stats->rx_fcs_errors +=
+			mtk_r32(mac->hw, MTK_GDM1_RX_FERCNT + offs);
+		hw_stats->rx_short_errors +=
+			mtk_r32(mac->hw, MTK_GDM1_RX_SERCNT + offs);
+		hw_stats->rx_long_errors +=
+			mtk_r32(mac->hw, MTK_GDM1_RX_LENCNT + offs);
+		hw_stats->rx_checksum_errors +=
+			mtk_r32(mac->hw, MTK_GDM1_RX_CERCNT + offs);
+		hw_stats->rx_flow_control_packets +=
+			mtk_r32(mac->hw, MTK_GDM1_RX_FCCNT + offs);
+		hw_stats->tx_skip +=
+			mtk_r32(mac->hw, MTK_GDM1_TX_SKIPCNT + offs);
+		hw_stats->tx_collisions +=
+			mtk_r32(mac->hw, MTK_GDM1_TX_COLCNT + offs);
+		hw_stats->tx_bytes +=
+			mtk_r32(mac->hw, MTK_GDM1_TX_GBCNT_L + offs);
+		stats =  mtk_r32(mac->hw, MTK_GDM1_TX_GBCNT_H + offs);
+		if (stats)
+			hw_stats->tx_bytes += (stats << 32);
+		hw_stats->tx_packets +=
+			mtk_r32(mac->hw, MTK_GDM1_TX_GPCNT + offs);
+	}
+
 	u64_stats_update_end(&hw_stats->syncp);
 }
 
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 11331b44ba07..5ef70dd8b49c 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -278,8 +278,21 @@
 /* QDMA FQ Free Page Buffer Length Register */
 #define MTK_QDMA_FQ_BLEN	0x1B2C
 
-/* GMA1 Received Good Byte Count Register */
-#define MTK_GDM1_TX_GBCNT	0x2400
+/* GMA1 counter / statics register */
+#define MTK_GDM1_RX_GBCNT_L	0x2400
+#define MTK_GDM1_RX_GBCNT_H	0x2404
+#define MTK_GDM1_RX_GPCNT	0x2408
+#define MTK_GDM1_RX_OERCNT	0x2410
+#define MTK_GDM1_RX_FERCNT	0x2414
+#define MTK_GDM1_RX_SERCNT	0x2418
+#define MTK_GDM1_RX_LENCNT	0x241c
+#define MTK_GDM1_RX_CERCNT	0x2420
+#define MTK_GDM1_RX_FCCNT	0x2424
+#define MTK_GDM1_TX_SKIPCNT	0x2428
+#define MTK_GDM1_TX_COLCNT	0x242c
+#define MTK_GDM1_TX_GBCNT_L	0x2430
+#define MTK_GDM1_TX_GBCNT_H	0x2434
+#define MTK_GDM1_TX_GPCNT	0x2438
 #define MTK_STAT_OFFSET		0x40
 
 /* QDMA descriptor txd4 */
@@ -502,6 +515,13 @@
 #define MT7628_SDM_MAC_ADRL	(MT7628_SDM_OFFSET + 0x0c)
 #define MT7628_SDM_MAC_ADRH	(MT7628_SDM_OFFSET + 0x10)
 
+/* Counter / stat register */
+#define MT7628_SDM_TPCNT	(MT7628_SDM_OFFSET + 0x100)
+#define MT7628_SDM_TBCNT	(MT7628_SDM_OFFSET + 0x104)
+#define MT7628_SDM_RPCNT	(MT7628_SDM_OFFSET + 0x108)
+#define MT7628_SDM_RBCNT	(MT7628_SDM_OFFSET + 0x10c)
+#define MT7628_SDM_CS_ERR	(MT7628_SDM_OFFSET + 0x110)
+
 struct mtk_rx_dma {
 	unsigned int rxd1;
 	unsigned int rxd2;
-- 
cgit v1.2.3


From 5eff1461a6dec84f04fafa9128548bad51d96147 Mon Sep 17 00:00:00 2001
From: Zong Li <zong.li@sifive.com>
Date: Sat, 22 May 2021 17:16:11 +0800
Subject: net: macb: ensure the device is available before accessing GEMGXL
 control registers

If runtime power menagement is enabled, the gigabit ethernet PLL would
be disabled after macb_probe(). During this period of time, the system
would hang up if we try to access GEMGXL control registers.

We can't put runtime_pm_get/runtime_pm_put/ there due to the issue of
sleep inside atomic section (7fa2955ff70ce453 ("sh_eth: Fix sleeping
function called from invalid context"). Add netif_running checking to
ensure the device is available before accessing GEMGXL device.

Changed in v2:
 - Use netif_running instead of its own flag

Signed-off-by: Zong Li <zong.li@sifive.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cadence/macb_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 6bc7d41d519b..a0c7b1167dbb 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -2867,6 +2867,9 @@ static struct net_device_stats *gem_get_stats(struct macb *bp)
 	struct gem_stats *hwstat = &bp->hw_stats.gem;
 	struct net_device_stats *nstat = &bp->dev->stats;
 
+	if (!netif_running(bp->dev))
+		return nstat;
+
 	gem_update_stats(bp);
 
 	nstat->rx_errors = (hwstat->rx_frame_check_sequence_errors +
-- 
cgit v1.2.3


From 3a62fed2fd7b6fea96d720e779cafc30dfb3a22e Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Sat, 22 May 2021 15:14:45 +0200
Subject: net/sched: fq_pie: re-factor fix for fq_pie endless loop

the patch that fixed an endless loop in_fq_pie_init() was not considering
that 65535 is a valid class id. The correct bugfix for this infinite loop
is to change 'idx' to become an u32, like Colin proposed in the past [1].

Fix this as follows:
 - restore 65536 as maximum possible values of 'flows_cnt'
 - use u32 'idx' when iterating on 'q->flows'
 - fix the TDC selftest

This reverts commit bb2f930d6dd708469a587dc9ed1efe1ef969c0bf.

[1] https://lore.kernel.org/netdev/20210407163808.499027-1-colin.king@canonical.com/

CC: Colin Ian King <colin.king@canonical.com>
CC: stable@vger.kernel.org
Fixes: bb2f930d6dd7 ("net/sched: fix infinite loop in sch_fq_pie")
Fixes: ec97ecf1ebe4 ("net: sched: add Flow Queue PIE packet scheduler")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_fq_pie.c                                         | 10 +++++-----
 tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json |  8 ++++----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index 949163fe68af..266c7c1869d9 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -297,9 +297,9 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt,
 			goto flow_error;
 		}
 		q->flows_cnt = nla_get_u32(tb[TCA_FQ_PIE_FLOWS]);
-		if (!q->flows_cnt || q->flows_cnt >= 65536) {
+		if (!q->flows_cnt || q->flows_cnt > 65536) {
 			NL_SET_ERR_MSG_MOD(extack,
-					   "Number of flows must range in [1..65535]");
+					   "Number of flows must range in [1..65536]");
 			goto flow_error;
 		}
 	}
@@ -367,7 +367,7 @@ static void fq_pie_timer(struct timer_list *t)
 	struct fq_pie_sched_data *q = from_timer(q, t, adapt_timer);
 	struct Qdisc *sch = q->sch;
 	spinlock_t *root_lock; /* to lock qdisc for probability calculations */
-	u16 idx;
+	u32 idx;
 
 	root_lock = qdisc_lock(qdisc_root_sleeping(sch));
 	spin_lock(root_lock);
@@ -388,7 +388,7 @@ static int fq_pie_init(struct Qdisc *sch, struct nlattr *opt,
 {
 	struct fq_pie_sched_data *q = qdisc_priv(sch);
 	int err;
-	u16 idx;
+	u32 idx;
 
 	pie_params_init(&q->p_params);
 	sch->limit = 10 * 1024;
@@ -500,7 +500,7 @@ static int fq_pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 static void fq_pie_reset(struct Qdisc *sch)
 {
 	struct fq_pie_sched_data *q = qdisc_priv(sch);
-	u16 idx;
+	u32 idx;
 
 	INIT_LIST_HEAD(&q->new_flows);
 	INIT_LIST_HEAD(&q->old_flows);
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
index 1cda2e11b3ad..773c5027553d 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
@@ -9,11 +9,11 @@
         "setup": [
             "$IP link add dev $DUMMY type dummy || /bin/true"
         ],
-        "cmdUnderTest": "$TC qdisc add dev $DUMMY root fq_pie flows 65536",
-        "expExitCode": "2",
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_pie flows 65536",
+        "expExitCode": "0",
         "verifyCmd": "$TC qdisc show dev $DUMMY",
-        "matchPattern": "qdisc",
-        "matchCount": "0",
+        "matchPattern": "qdisc fq_pie 1: root refcnt 2 limit 10240p flows 65536",
+        "matchCount": "1",
         "teardown": [
             "$IP link del dev $DUMMY"
         ]
-- 
cgit v1.2.3


From e70f7a11876a1a788ceadf75e9e5f7af2c868680 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Sat, 22 May 2021 15:15:13 +0200
Subject: net/sched: fq_pie: fix OOB access in the traffic path

the following script:

  # tc qdisc add dev eth0 handle 0x1 root fq_pie flows 2
  # tc qdisc add dev eth0 clsact
  # tc filter add dev eth0 egress matchall action skbedit priority 0x10002
  # ping 192.0.2.2 -I eth0 -c2 -w1 -q

produces the following splat:

 BUG: KASAN: slab-out-of-bounds in fq_pie_qdisc_enqueue+0x1314/0x19d0 [sch_fq_pie]
 Read of size 4 at addr ffff888171306924 by task ping/942

 CPU: 3 PID: 942 Comm: ping Not tainted 5.12.0+ #441
 Hardware name: Red Hat KVM, BIOS 1.11.1-4.module+el8.1.0+4066+0f1aadab 04/01/2014
 Call Trace:
  dump_stack+0x92/0xc1
  print_address_description.constprop.7+0x1a/0x150
  kasan_report.cold.13+0x7f/0x111
  fq_pie_qdisc_enqueue+0x1314/0x19d0 [sch_fq_pie]
  __dev_queue_xmit+0x1034/0x2b10
  ip_finish_output2+0xc62/0x2120
  __ip_finish_output+0x553/0xea0
  ip_output+0x1ca/0x4d0
  ip_send_skb+0x37/0xa0
  raw_sendmsg+0x1c4b/0x2d00
  sock_sendmsg+0xdb/0x110
  __sys_sendto+0x1d7/0x2b0
  __x64_sys_sendto+0xdd/0x1b0
  do_syscall_64+0x3c/0x80
  entry_SYSCALL_64_after_hwframe+0x44/0xae
 RIP: 0033:0x7fe69735c3eb
 Code: 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 f3 0f 1e fa 48 8d 05 75 42 2c 00 41 89 ca 8b 00 85 c0 75 14 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 75 c3 0f 1f 40 00 41 57 4d 89 c7 41 56 41 89
 RSP: 002b:00007fff06d7fb38 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
 RAX: ffffffffffffffda RBX: 000055e961413700 RCX: 00007fe69735c3eb
 RDX: 0000000000000040 RSI: 000055e961413700 RDI: 0000000000000003
 RBP: 0000000000000040 R08: 000055e961410500 R09: 0000000000000010
 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fff06d81260
 R13: 00007fff06d7fb40 R14: 00007fff06d7fc30 R15: 000055e96140f0a0

 Allocated by task 917:
  kasan_save_stack+0x19/0x40
  __kasan_kmalloc+0x7f/0xa0
  __kmalloc_node+0x139/0x280
  fq_pie_init+0x555/0x8e8 [sch_fq_pie]
  qdisc_create+0x407/0x11b0
  tc_modify_qdisc+0x3c2/0x17e0
  rtnetlink_rcv_msg+0x346/0x8e0
  netlink_rcv_skb+0x120/0x380
  netlink_unicast+0x439/0x630
  netlink_sendmsg+0x719/0xbf0
  sock_sendmsg+0xe2/0x110
  ____sys_sendmsg+0x5ba/0x890
  ___sys_sendmsg+0xe9/0x160
  __sys_sendmsg+0xd3/0x170
  do_syscall_64+0x3c/0x80
  entry_SYSCALL_64_after_hwframe+0x44/0xae

 The buggy address belongs to the object at ffff888171306800
  which belongs to the cache kmalloc-256 of size 256
 The buggy address is located 36 bytes to the right of
  256-byte region [ffff888171306800, ffff888171306900)
 The buggy address belongs to the page:
 page:00000000bcfb624e refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x171306
 head:00000000bcfb624e order:1 compound_mapcount:0
 flags: 0x17ffffc0010200(slab|head|node=0|zone=2|lastcpupid=0x1fffff)
 raw: 0017ffffc0010200 dead000000000100 dead000000000122 ffff888100042b40
 raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000
 page dumped because: kasan: bad access detected

 Memory state around the buggy address:
  ffff888171306800: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  ffff888171306880: 00 00 00 00 00 00 00 00 00 00 00 00 fc fc fc fc
 >ffff888171306900: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
                                ^
  ffff888171306980: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
  ffff888171306a00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb

fix fq_pie traffic path to avoid selecting 'q->flows + q->flows_cnt' as a
valid flow: it's an address beyond the allocated memory.

Fixes: ec97ecf1ebe4 ("net: sched: add Flow Queue PIE packet scheduler")
CC: stable@vger.kernel.org
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_fq_pie.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index 266c7c1869d9..cac684952edc 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -138,8 +138,15 @@ static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 
 	/* Classifies packet into corresponding flow */
 	idx = fq_pie_classify(skb, sch, &ret);
-	sel_flow = &q->flows[idx];
+	if (idx == 0) {
+		if (ret & __NET_XMIT_BYPASS)
+			qdisc_qstats_drop(sch);
+		__qdisc_drop(skb, to_free);
+		return ret;
+	}
+	idx--;
 
+	sel_flow = &q->flows[idx];
 	/* Checks whether adding a new packet would exceed memory limit */
 	get_pie_cb(skb)->mem_usage = skb->truesize;
 	memory_limited = q->memory_usage > q->memory_limit + skb->truesize;
-- 
cgit v1.2.3


From 474a2ddaa192777522a7499784f1d60691cd831a Mon Sep 17 00:00:00 2001
From: DENG Qingfang <dqfext@gmail.com>
Date: Sun, 23 May 2021 22:51:54 +0800
Subject: net: dsa: mt7530: fix VLAN traffic leaks

PCR_MATRIX field was set to all 1's when VLAN filtering is enabled, but
was not reset when it is disabled, which may cause traffic leaks:

	ip link add br0 type bridge vlan_filtering 1
	ip link add br1 type bridge vlan_filtering 1
	ip link set swp0 master br0
	ip link set swp1 master br1
	ip link set br0 type bridge vlan_filtering 0
	ip link set br1 type bridge vlan_filtering 0
	# traffic in br0 and br1 will start leaking to each other

As port_bridge_{add,del} have set up PCR_MATRIX properly, remove the
PCR_MATRIX write from mt7530_port_set_vlan_aware.

Fixes: 83163f7dca56 ("net: dsa: mediatek: add VLAN support for MT7530")
Signed-off-by: DENG Qingfang <dqfext@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mt7530.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 96f7c9eede35..9b90f3d3a8f5 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -1262,14 +1262,6 @@ mt7530_port_set_vlan_aware(struct dsa_switch *ds, int port)
 {
 	struct mt7530_priv *priv = ds->priv;
 
-	/* The real fabric path would be decided on the membership in the
-	 * entry of VLAN table. PCR_MATRIX set up here with ALL_MEMBERS
-	 * means potential VLAN can be consisting of certain subset of all
-	 * ports.
-	 */
-	mt7530_rmw(priv, MT7530_PCR_P(port),
-		   PCR_MATRIX_MASK, PCR_MATRIX(MT7530_ALL_MEMBERS));
-
 	/* Trapped into security mode allows packet forwarding through VLAN
 	 * table lookup. CPU port is set to fallback mode to let untagged
 	 * frames pass through.
-- 
cgit v1.2.3


From 4dd649d130c634415c26df771e09e373f77fc688 Mon Sep 17 00:00:00 2001
From: Aditya Srivastava <yashsri421@gmail.com>
Date: Mon, 24 May 2021 02:39:09 +0530
Subject: NFC: nfcmrvl: fix kernel-doc syntax in file headers

The opening comment mark '/**' is used for highlighting the beginning of
kernel-doc comments.
The header for drivers/nfc/nfcmrvl follows this syntax, but the content
inside does not comply with kernel-doc.

This line was probably not meant for kernel-doc parsing, but is parsed
due to the presence of kernel-doc like comment syntax(i.e, '/**'), which
causes unexpected warnings from kernel-doc.
For e.g., running scripts/kernel-doc -none on drivers/nfc/nfcmrvl/spi.c
causes warning:
warning: expecting prototype for Marvell NFC(). Prototype was for SPI_WAIT_HANDSHAKE() instead

Provide a simple fix by replacing such occurrences with general comment
format, i.e. '/*', to prevent kernel-doc from parsing it.

Signed-off-by: Aditya Srivastava <yashsri421@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nfc/nfcmrvl/fw_dnld.h | 2 +-
 drivers/nfc/nfcmrvl/i2c.c     | 2 +-
 drivers/nfc/nfcmrvl/nfcmrvl.h | 2 +-
 drivers/nfc/nfcmrvl/spi.c     | 2 +-
 drivers/nfc/nfcmrvl/uart.c    | 2 +-
 drivers/nfc/nfcmrvl/usb.c     | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/nfc/nfcmrvl/fw_dnld.h b/drivers/nfc/nfcmrvl/fw_dnld.h
index ee4a339c05fd..058ce77b3cbc 100644
--- a/drivers/nfc/nfcmrvl/fw_dnld.h
+++ b/drivers/nfc/nfcmrvl/fw_dnld.h
@@ -1,4 +1,4 @@
-/**
+/*
  * Marvell NFC driver: Firmware downloader
  *
  * Copyright (C) 2015, Marvell International Ltd.
diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c
index 18cd96284b77..c5420616b7bc 100644
--- a/drivers/nfc/nfcmrvl/i2c.c
+++ b/drivers/nfc/nfcmrvl/i2c.c
@@ -1,4 +1,4 @@
-/**
+/*
  * Marvell NFC-over-I2C driver: I2C interface related functions
  *
  * Copyright (C) 2015, Marvell International Ltd.
diff --git a/drivers/nfc/nfcmrvl/nfcmrvl.h b/drivers/nfc/nfcmrvl/nfcmrvl.h
index de68ff45e49a..e84ee18c73ae 100644
--- a/drivers/nfc/nfcmrvl/nfcmrvl.h
+++ b/drivers/nfc/nfcmrvl/nfcmrvl.h
@@ -1,4 +1,4 @@
-/**
+/*
  * Marvell NFC driver
  *
  * Copyright (C) 2014-2015, Marvell International Ltd.
diff --git a/drivers/nfc/nfcmrvl/spi.c b/drivers/nfc/nfcmrvl/spi.c
index 8e0ddb434770..dec0d3eb3648 100644
--- a/drivers/nfc/nfcmrvl/spi.c
+++ b/drivers/nfc/nfcmrvl/spi.c
@@ -1,4 +1,4 @@
-/**
+/*
  * Marvell NFC-over-SPI driver: SPI interface related functions
  *
  * Copyright (C) 2015, Marvell International Ltd.
diff --git a/drivers/nfc/nfcmrvl/uart.c b/drivers/nfc/nfcmrvl/uart.c
index e5a622ce4b95..7194dd7ef0f1 100644
--- a/drivers/nfc/nfcmrvl/uart.c
+++ b/drivers/nfc/nfcmrvl/uart.c
@@ -1,4 +1,4 @@
-/**
+/*
  * Marvell NFC-over-UART driver
  *
  * Copyright (C) 2015, Marvell International Ltd.
diff --git a/drivers/nfc/nfcmrvl/usb.c b/drivers/nfc/nfcmrvl/usb.c
index 888e298f610b..bcd563cb556c 100644
--- a/drivers/nfc/nfcmrvl/usb.c
+++ b/drivers/nfc/nfcmrvl/usb.c
@@ -1,4 +1,4 @@
-/**
+/*
  * Marvell NFC-over-USB driver: USB interface related functions
  *
  * Copyright (C) 2014, Marvell International Ltd.
-- 
cgit v1.2.3


From 08b2b6fdf6b26032f025084ce2893924a0cdb4a2 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Mon, 24 May 2021 16:29:43 +0800
Subject: cgroup: fix spelling mistakes

Fix some spelling mistakes in comments:
hierarhcy ==> hierarchy
automtically ==> automatically
overriden ==> overridden
In absense of .. or ==> In absence of .. and
assocaited ==> associated
taget ==> target
initate ==> initiate
succeded ==> succeeded
curremt ==> current
udpated ==> updated

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup-defs.h | 6 +++---
 include/linux/cgroup.h      | 2 +-
 kernel/cgroup/cgroup-v1.c   | 2 +-
 kernel/cgroup/cgroup.c      | 8 ++++----
 kernel/cgroup/cpuset.c      | 2 +-
 kernel/cgroup/rdma.c        | 2 +-
 kernel/cgroup/rstat.c       | 2 +-
 7 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 559ee05f86b2..fb8f6d2cd104 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -232,7 +232,7 @@ struct css_set {
 	struct list_head task_iters;
 
 	/*
-	 * On the default hierarhcy, ->subsys[ssid] may point to a css
+	 * On the default hierarchy, ->subsys[ssid] may point to a css
 	 * attached to an ancestor instead of the cgroup this css_set is
 	 * associated with.  The following node is anchored at
 	 * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to
@@ -668,7 +668,7 @@ struct cgroup_subsys {
 	 */
 	bool threaded:1;
 
-	/* the following two fields are initialized automtically during boot */
+	/* the following two fields are initialized automatically during boot */
 	int id;
 	const char *name;
 
@@ -757,7 +757,7 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {}
  * sock_cgroup_data overloads (prioidx, classid) and the cgroup pointer.
  * On boot, sock_cgroup_data records the cgroup that the sock was created
  * in so that cgroup2 matches can be made; however, once either net_prio or
- * net_cls starts being used, the area is overriden to carry prioidx and/or
+ * net_cls starts being used, the area is overridden to carry prioidx and/or
  * classid.  The two modes are distinguished by whether the lowest bit is
  * set.  Clear bit indicates cgroup pointer while set bit prioidx and
  * classid.
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 4f2f79de083e..6bc9c76680b2 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -32,7 +32,7 @@ struct kernel_clone_args;
 #ifdef CONFIG_CGROUPS
 
 /*
- * All weight knobs on the default hierarhcy should use the following min,
+ * All weight knobs on the default hierarchy should use the following min,
  * default and max values.  The default value is the logarithmic center of
  * MIN and MAX and allows 100x to be expressed in both directions.
  */
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 391aa570369b..8190b6bfc978 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -1001,7 +1001,7 @@ static int check_cgroupfs_options(struct fs_context *fc)
 	ctx->subsys_mask &= enabled;
 
 	/*
-	 * In absense of 'none', 'name=' or subsystem name options,
+	 * In absence of 'none', 'name=' and subsystem name options,
 	 * let's default to 'all'.
 	 */
 	if (!ctx->subsys_mask && !ctx->none && !ctx->name)
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index e7a9a2998245..21ecc6ee6a6d 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -468,7 +468,7 @@ static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
  * @cgrp: the cgroup of interest
  * @ss: the subsystem of interest
  *
- * Find and get @cgrp's css assocaited with @ss.  If the css doesn't exist
+ * Find and get @cgrp's css associated with @ss.  If the css doesn't exist
  * or is offline, %NULL is returned.
  */
 static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
@@ -1633,7 +1633,7 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
 
 /**
  * css_clear_dir - remove subsys files in a cgroup directory
- * @css: taget css
+ * @css: target css
  */
 static void css_clear_dir(struct cgroup_subsys_state *css)
 {
@@ -5350,7 +5350,7 @@ out_unlock:
 /*
  * This is called when the refcnt of a css is confirmed to be killed.
  * css_tryget_online() is now guaranteed to fail.  Tell the subsystem to
- * initate destruction and put the css ref from kill_css().
+ * initiate destruction and put the css ref from kill_css().
  */
 static void css_killed_work_fn(struct work_struct *work)
 {
@@ -6052,7 +6052,7 @@ out_revert:
  * @kargs: the arguments passed to create the child process
  *
  * This calls the cancel_fork() callbacks if a fork failed *after*
- * cgroup_can_fork() succeded and cleans up references we took to
+ * cgroup_can_fork() succeeded and cleans up references we took to
  * prepare a new css_set for the child process in cgroup_can_fork().
  */
 void cgroup_cancel_fork(struct task_struct *child,
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index a945504c0ae7..adb5190c4429 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -3376,7 +3376,7 @@ nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
 }
 
 /**
- * cpuset_nodemask_valid_mems_allowed - check nodemask vs. curremt mems_allowed
+ * cpuset_nodemask_valid_mems_allowed - check nodemask vs. current mems_allowed
  * @nodemask: the nodemask to be checked
  *
  * Are any of the nodes in the nodemask allowed in current->mems_allowed?
diff --git a/kernel/cgroup/rdma.c b/kernel/cgroup/rdma.c
index ae042c347c64..3135406608c7 100644
--- a/kernel/cgroup/rdma.c
+++ b/kernel/cgroup/rdma.c
@@ -244,7 +244,7 @@ EXPORT_SYMBOL(rdmacg_uncharge);
  * This function follows charging resource in hierarchical way.
  * It will fail if the charge would cause the new value to exceed the
  * hierarchical limit.
- * Returns 0 if the charge succeded, otherwise -EAGAIN, -ENOMEM or -EINVAL.
+ * Returns 0 if the charge succeeded, otherwise -EAGAIN, -ENOMEM or -EINVAL.
  * Returns pointer to rdmacg for this resource when charging is successful.
  *
  * Charger needs to account resources on two criteria.
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index 3a3fd2993a65..cee265cb535c 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -75,7 +75,7 @@ void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
  * @root: root of the tree to traversal
  * @cpu: target cpu
  *
- * Walks the udpated rstat_cpu tree on @cpu from @root.  %NULL @pos starts
+ * Walks the updated rstat_cpu tree on @cpu from @root.  %NULL @pos starts
  * the traversal and %NULL return indicates the end.  During traversal,
  * each returned cgroup is unlinked from the tree.  Must be called with the
  * matching cgroup_rstat_cpu_lock held.
-- 
cgit v1.2.3


From 9b76eade16423ef06829cccfe3e100cfce31afcd Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Sun, 23 May 2021 14:38:53 +0000
Subject: sch_dsmark: fix a NULL deref in qdisc_reset()

If Qdisc_ops->init() is failed, Qdisc_ops->reset() would be called.
When dsmark_init(Qdisc_ops->init()) is failed, it possibly doesn't
initialize dsmark_qdisc_data->q. But dsmark_reset(Qdisc_ops->reset())
uses dsmark_qdisc_data->q pointer wihtout any null checking.
So, panic would occur.

Test commands:
    sysctl net.core.default_qdisc=dsmark -w
    ip link add dummy0 type dummy
    ip link add vw0 link dummy0 type virt_wifi
    ip link set vw0 up

Splat looks like:
KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f]
CPU: 3 PID: 684 Comm: ip Not tainted 5.12.0+ #910
RIP: 0010:qdisc_reset+0x2b/0x680
Code: 1f 44 00 00 48 b8 00 00 00 00 00 fc ff df 41 57 41 56 41 55 41 54
55 48 89 fd 48 83 c7 18 53 48 89 fa 48 c1 ea 03 48 83 ec 20 <80> 3c 02
00 0f 85 09 06 00 00 4c 8b 65 18 0f 1f 44 00 00 65 8b 1d
RSP: 0018:ffff88800fda6bf8 EFLAGS: 00010282
RAX: dffffc0000000000 RBX: ffff8880050ed800 RCX: 0000000000000000
RDX: 0000000000000003 RSI: ffffffff99e34100 RDI: 0000000000000018
RBP: 0000000000000000 R08: fffffbfff346b553 R09: fffffbfff346b553
R10: 0000000000000001 R11: fffffbfff346b552 R12: ffffffffc0824940
R13: ffff888109e83800 R14: 00000000ffffffff R15: ffffffffc08249e0
FS:  00007f5042287680(0000) GS:ffff888119800000(0000)
knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000055ae1f4dbd90 CR3: 0000000006760002 CR4: 00000000003706e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 ? rcu_read_lock_bh_held+0xa0/0xa0
 dsmark_reset+0x3d/0xf0 [sch_dsmark]
 qdisc_reset+0xa9/0x680
 qdisc_destroy+0x84/0x370
 qdisc_create_dflt+0x1fe/0x380
 attach_one_default_qdisc.constprop.41+0xa4/0x180
 dev_activate+0x4d5/0x8c0
 ? __dev_open+0x268/0x390
 __dev_open+0x270/0x390

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_dsmark.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index cd2748e2d4a2..d320bcfb2da2 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -407,7 +407,8 @@ static void dsmark_reset(struct Qdisc *sch)
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 
 	pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
-	qdisc_reset(p->q);
+	if (p->q)
+		qdisc_reset(p->q);
 	sch->qstats.backlog = 0;
 	sch->q.qlen = 0;
 }
-- 
cgit v1.2.3


From 1a44fb38cc65bc30bac490291412aa1940659fe1 Mon Sep 17 00:00:00 2001
From: Hayes Wang <hayeswang@realtek.com>
Date: Mon, 24 May 2021 14:49:42 +0800
Subject: r8152: check the informaton of the device

Verify some fields of the USB descriptor to make sure the driver
could be used by the device.

Besides, remove the check of endpoint number in rtl8152_probe().
usb_find_common_endpoints() includes it.

BugLink: https://syzkaller.appspot.com/bug?id=912c9c373656996801b4de61f1e3cb326fe940aa
Reported-by: syzbot+95afd23673f5dd295c57@syzkaller.appspotmail.com
Fixes: c2198943e33b ("r8152: search the configuration of vendor mode")
Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Reviewed-by: Johan Hovold <johan@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 42 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 37 insertions(+), 5 deletions(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 136ea06540ff..f6abb2fbf972 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -8107,6 +8107,37 @@ static void r8156b_init(struct r8152 *tp)
 	tp->coalesce = 15000;	/* 15 us */
 }
 
+static bool rtl_check_vendor_ok(struct usb_interface *intf)
+{
+	struct usb_host_interface *alt = intf->cur_altsetting;
+	struct usb_endpoint_descriptor *in, *out, *intr;
+
+	if (usb_find_common_endpoints(alt, &in, &out, &intr, NULL) < 0) {
+		dev_err(&intf->dev, "Expected endpoints are not found\n");
+		return false;
+	}
+
+	/* Check Rx endpoint address */
+	if (usb_endpoint_num(in) != 1) {
+		dev_err(&intf->dev, "Invalid Rx endpoint address\n");
+		return false;
+	}
+
+	/* Check Tx endpoint address */
+	if (usb_endpoint_num(out) != 2) {
+		dev_err(&intf->dev, "Invalid Tx endpoint address\n");
+		return false;
+	}
+
+	/* Check interrupt endpoint address */
+	if (usb_endpoint_num(intr) != 3) {
+		dev_err(&intf->dev, "Invalid interrupt endpoint address\n");
+		return false;
+	}
+
+	return true;
+}
+
 static bool rtl_vendor_mode(struct usb_interface *intf)
 {
 	struct usb_host_interface *alt = intf->cur_altsetting;
@@ -8115,12 +8146,15 @@ static bool rtl_vendor_mode(struct usb_interface *intf)
 	int i, num_configs;
 
 	if (alt->desc.bInterfaceClass == USB_CLASS_VENDOR_SPEC)
-		return true;
+		return rtl_check_vendor_ok(intf);
 
 	/* The vendor mode is not always config #1, so to find it out. */
 	udev = interface_to_usbdev(intf);
 	c = udev->config;
 	num_configs = udev->descriptor.bNumConfigurations;
+	if (num_configs < 2)
+		return false;
+
 	for (i = 0; i < num_configs; (i++, c++)) {
 		struct usb_interface_descriptor	*desc = NULL;
 
@@ -8135,7 +8169,8 @@ static bool rtl_vendor_mode(struct usb_interface *intf)
 		}
 	}
 
-	WARN_ON_ONCE(i == num_configs);
+	if (i == num_configs)
+		dev_err(&intf->dev, "Unexpected Device\n");
 
 	return false;
 }
@@ -9381,9 +9416,6 @@ static int rtl8152_probe(struct usb_interface *intf,
 	if (!rtl_vendor_mode(intf))
 		return -ENODEV;
 
-	if (intf->cur_altsetting->desc.bNumEndpoints < 3)
-		return -ENODEV;
-
 	usb_reset_device(udev);
 	netdev = alloc_etherdev(sizeof(struct r8152));
 	if (!netdev) {
-- 
cgit v1.2.3


From 1a6e9a9c68c1f183872e4bcc947382111c2e04eb Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 24 May 2021 11:25:11 +0200
Subject: net: hso: fix control-request directions

The direction of the pipe argument must match the request-type direction
bit or control requests may fail depending on the host-controller-driver
implementation.

Fix the tiocmset and rfkill requests which erroneously used
usb_rcvctrlpipe().

Fixes: 72dc1c096c70 ("HSO: add option hso driver")
Cc: stable@vger.kernel.org      # 2.6.27
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/hso.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index b48b2a25210c..5c779cc0ea11 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -1689,7 +1689,7 @@ static int hso_serial_tiocmset(struct tty_struct *tty,
 	spin_unlock_irqrestore(&serial->serial_lock, flags);
 
 	return usb_control_msg(serial->parent->usb,
-			       usb_rcvctrlpipe(serial->parent->usb, 0), 0x22,
+			       usb_sndctrlpipe(serial->parent->usb, 0), 0x22,
 			       0x21, val, if_num, NULL, 0,
 			       USB_CTRL_SET_TIMEOUT);
 }
@@ -2436,7 +2436,7 @@ static int hso_rfkill_set_block(void *data, bool blocked)
 	if (hso_dev->usb_gone)
 		rv = 0;
 	else
-		rv = usb_control_msg(hso_dev->usb, usb_rcvctrlpipe(hso_dev->usb, 0),
+		rv = usb_control_msg(hso_dev->usb, usb_sndctrlpipe(hso_dev->usb, 0),
 				       enabled ? 0x82 : 0x81, 0x40, 0, 0, NULL, 0,
 				       USB_CTRL_SET_TIMEOUT);
 	mutex_unlock(&hso_dev->mutex);
-- 
cgit v1.2.3


From ba61cf167cb77e54c1ec5adb7aa49a22ab3c9b28 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 24 May 2021 12:25:22 +0300
Subject: net: dsa: sja1105: fix VL lookup command packing for P/Q/R/S

At the beginning of the sja1105_dynamic_config.c file there is a diagram
of the dynamic config interface layout:

 packed_buf

 |
 V
 +-----------------------------------------+------------------+
 |              ENTRY BUFFER               |  COMMAND BUFFER  |
 +-----------------------------------------+------------------+

 <----------------------- packed_size ------------------------>

So in order to pack/unpack the command bits into the buffer,
sja1105_vl_lookup_cmd_packing must first advance the buffer pointer by
the length of the entry. This is similar to what the other *cmd_packing
functions do.

This bug exists because the command packing function for P/Q/R/S was
copied from the E/T generation, and on E/T, the command was actually
embedded within the entry buffer itself.

Fixes: 94f94d4acfb2 ("net: dsa: sja1105: add static tables for virtual links")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_dynamic_config.c | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
index b777d3f37573..12cd04b56803 100644
--- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
@@ -167,9 +167,10 @@ enum sja1105_hostcmd {
 	SJA1105_HOSTCMD_INVALIDATE = 4,
 };
 
+/* Command and entry overlap */
 static void
-sja1105_vl_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
-			      enum packing_op op)
+sja1105et_vl_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+				enum packing_op op)
 {
 	const int size = SJA1105_SIZE_DYN_CMD;
 
@@ -179,6 +180,20 @@ sja1105_vl_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 	sja1105_packing(buf, &cmd->index,    9,  0, size, op);
 }
 
+/* Command and entry are separate */
+static void
+sja1105pqrs_vl_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+				  enum packing_op op)
+{
+	u8 *p = buf + SJA1105_SIZE_VL_LOOKUP_ENTRY;
+	const int size = SJA1105_SIZE_DYN_CMD;
+
+	sja1105_packing(p, &cmd->valid,   31, 31, size, op);
+	sja1105_packing(p, &cmd->errors,  30, 30, size, op);
+	sja1105_packing(p, &cmd->rdwrset, 29, 29, size, op);
+	sja1105_packing(p, &cmd->index,    9,  0, size, op);
+}
+
 static size_t sja1105et_vl_lookup_entry_packing(void *buf, void *entry_ptr,
 						enum packing_op op)
 {
@@ -641,7 +656,7 @@ static size_t sja1105pqrs_cbs_entry_packing(void *buf, void *entry_ptr,
 const struct sja1105_dynamic_table_ops sja1105et_dyn_ops[BLK_IDX_MAX_DYN] = {
 	[BLK_IDX_VL_LOOKUP] = {
 		.entry_packing = sja1105et_vl_lookup_entry_packing,
-		.cmd_packing = sja1105_vl_lookup_cmd_packing,
+		.cmd_packing = sja1105et_vl_lookup_cmd_packing,
 		.access = OP_WRITE,
 		.max_entry_count = SJA1105_MAX_VL_LOOKUP_COUNT,
 		.packed_size = SJA1105ET_SIZE_VL_LOOKUP_DYN_CMD,
@@ -725,7 +740,7 @@ const struct sja1105_dynamic_table_ops sja1105et_dyn_ops[BLK_IDX_MAX_DYN] = {
 const struct sja1105_dynamic_table_ops sja1105pqrs_dyn_ops[BLK_IDX_MAX_DYN] = {
 	[BLK_IDX_VL_LOOKUP] = {
 		.entry_packing = sja1105_vl_lookup_entry_packing,
-		.cmd_packing = sja1105_vl_lookup_cmd_packing,
+		.cmd_packing = sja1105pqrs_vl_lookup_cmd_packing,
 		.access = (OP_READ | OP_WRITE),
 		.max_entry_count = SJA1105_MAX_VL_LOOKUP_COUNT,
 		.packed_size = SJA1105PQRS_SIZE_VL_LOOKUP_DYN_CMD,
-- 
cgit v1.2.3


From dc596e3fe63f88e3d1e509f64e7f761cd4135538 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 24 May 2021 12:25:23 +0300
Subject: net: dsa: sja1105: call dsa_unregister_switch when allocating memory
 fails

Unlike other drivers which pretty much end their .probe() execution with
dsa_register_switch(), the sja1105 does some extra stuff. When that
fails with -ENOMEM, the driver is quick to return that, forgetting to
call dsa_unregister_switch(). Not critical, but a bug nonetheless.

Fixes: 4d7525085a9b ("net: dsa: sja1105: offload the Credit-Based Shaper qdisc")
Fixes: a68578c20a96 ("net: dsa: Make deferred_xmit private to sja1105")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 405024b637d6..2248152b4836 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -3646,8 +3646,10 @@ static int sja1105_probe(struct spi_device *spi)
 		priv->cbs = devm_kcalloc(dev, priv->info->num_cbs_shapers,
 					 sizeof(struct sja1105_cbs_entry),
 					 GFP_KERNEL);
-		if (!priv->cbs)
-			return -ENOMEM;
+		if (!priv->cbs) {
+			rc = -ENOMEM;
+			goto out_unregister_switch;
+		}
 	}
 
 	/* Connections between dsa_port and sja1105_port */
@@ -3672,7 +3674,7 @@ static int sja1105_probe(struct spi_device *spi)
 			dev_err(ds->dev,
 				"failed to create deferred xmit thread: %d\n",
 				rc);
-			goto out;
+			goto out_destroy_workers;
 		}
 		skb_queue_head_init(&sp->xmit_queue);
 		sp->xmit_tpid = ETH_P_SJA1105;
@@ -3682,7 +3684,8 @@ static int sja1105_probe(struct spi_device *spi)
 	}
 
 	return 0;
-out:
+
+out_destroy_workers:
 	while (port-- > 0) {
 		struct sja1105_port *sp = &priv->ports[port];
 
@@ -3691,6 +3694,10 @@ out:
 
 		kthread_destroy_worker(sp->xmit_worker);
 	}
+
+out_unregister_switch:
+	dsa_unregister_switch(ds);
+
 	return rc;
 }
 
-- 
cgit v1.2.3


From cec279a898a3b004411682f212215ccaea1cd0fb Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 24 May 2021 12:25:24 +0300
Subject: net: dsa: sja1105: add error handling in sja1105_setup()

If any of sja1105_static_config_load(), sja1105_clocking_setup() or
sja1105_devlink_setup() fails, we can't just return in the middle of
sja1105_setup() or memory will leak. Add a cleanup path.

Fixes: 0a7bdbc23d8a ("net: dsa: sja1105: move devlink param code to sja1105_devlink.c")
Fixes: 8aa9ebccae87 ("net: dsa: Introduce driver for NXP SJA1105 5-port L2 switch")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 2248152b4836..c7a1be8bbddf 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -2976,13 +2976,13 @@ static int sja1105_setup(struct dsa_switch *ds)
 	rc = sja1105_static_config_load(priv, ports);
 	if (rc < 0) {
 		dev_err(ds->dev, "Failed to load static config: %d\n", rc);
-		return rc;
+		goto out_ptp_clock_unregister;
 	}
 	/* Configure the CGU (PHY link modes and speeds) */
 	rc = sja1105_clocking_setup(priv);
 	if (rc < 0) {
 		dev_err(ds->dev, "Failed to configure MII clocking: %d\n", rc);
-		return rc;
+		goto out_static_config_free;
 	}
 	/* On SJA1105, VLAN filtering per se is always enabled in hardware.
 	 * The only thing we can do to disable it is lie about what the 802.1Q
@@ -3003,7 +3003,7 @@ static int sja1105_setup(struct dsa_switch *ds)
 
 	rc = sja1105_devlink_setup(ds);
 	if (rc < 0)
-		return rc;
+		goto out_static_config_free;
 
 	/* The DSA/switchdev model brings up switch ports in standalone mode by
 	 * default, and that means vlan_filtering is 0 since they're not under
@@ -3012,6 +3012,17 @@ static int sja1105_setup(struct dsa_switch *ds)
 	rtnl_lock();
 	rc = sja1105_setup_8021q_tagging(ds, true);
 	rtnl_unlock();
+	if (rc)
+		goto out_devlink_teardown;
+
+	return 0;
+
+out_devlink_teardown:
+	sja1105_devlink_teardown(ds);
+out_ptp_clock_unregister:
+	sja1105_ptp_clock_unregister(ds);
+out_static_config_free:
+	sja1105_static_config_free(&priv->static_config);
 
 	return rc;
 }
-- 
cgit v1.2.3


From 6729188d2646709941903052e4b78e1d82c239b9 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 24 May 2021 12:25:25 +0300
Subject: net: dsa: sja1105: error out on unsupported PHY mode

The driver continues probing when a port is configured for an
unsupported PHY interface type, instead it should stop.

Fixes: 8aa9ebccae87 ("net: dsa: Introduce driver for NXP SJA1105 5-port L2 switch")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index c7a1be8bbddf..7f7e0424a442 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -207,6 +207,7 @@ static int sja1105_init_mii_settings(struct sja1105_private *priv,
 		default:
 			dev_err(dev, "Unsupported PHY mode %s!\n",
 				phy_modes(ports[i].phy_mode));
+			return -EINVAL;
 		}
 
 		/* Even though the SerDes port is able to drive SGMII autoneg
-- 
cgit v1.2.3


From ed040abca4c1db72dfd3b8483b6ed6bfb7c2571e Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 24 May 2021 12:25:26 +0300
Subject: net: dsa: sja1105: use 4095 as the private VLAN for untagged traffic

One thing became visible when writing the blamed commit, and that was
that STP and PTP frames injected by net/dsa/tag_sja1105.c using the
deferred xmit mechanism are always classified to the pvid of the CPU
port, regardless of whatever VLAN there might be in these packets.

So a decision needed to be taken regarding the mechanism through which
we should ensure that delivery of STP and PTP traffic is possible when
we are in a VLAN awareness mode that involves tag_8021q. This is because
tag_8021q is not concerned with managing the pvid of the CPU port, since
as far as tag_8021q is concerned, no traffic should be sent as untagged
from the CPU port. So we end up not actually having a pvid on the CPU
port if we only listen to tag_8021q, and unless we do something about it.

The decision taken at the time was to keep VLAN 1 in the list of
priv->dsa_8021q_vlans, and make it a pvid of the CPU port. This ensures
that STP and PTP frames can always be sent to the outside world.

However there is a problem. If we do the following while we are in
the best_effort_vlan_filtering=true mode:

ip link add br0 type bridge vlan_filtering 1
ip link set swp2 master br0
bridge vlan del dev swp2 vid 1

Then untagged and pvid-tagged frames should be dropped. But we observe
that they aren't, and this is because of the precaution we took that VID
1 is always installed on all ports.

So clearly VLAN 1 is not good for this purpose. What about VLAN 0?
Well, VLAN 0 is managed by the 8021q module, and that module wants to
ensure that 802.1p tagged frames are always received by a port, and are
always transmitted as VLAN-tagged (with VLAN ID 0). Whereas we want our
STP and PTP frames to be untagged if the stack sent them as untagged -
we don't want the driver to just decide out of the blue that it adds
VID 0 to some packets.

So what to do?

Well, there is one other VLAN that is reserved, and that is 4095:
$ ip link add link swp2 name swp2.4095 type vlan id 4095
Error: 8021q: Invalid VLAN id.
$ bridge vlan add dev swp2 vid 4095
Error: bridge: Vlan id is invalid.

After we made this change, VLAN 1 is indeed forwarded and/or dropped
according to the bridge VLAN table, there are no further alterations
done by the sja1105 driver.

Fixes: ec5ae61076d0 ("net: dsa: sja1105: save/restore VLANs using a delta commit method")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 7f7e0424a442..dffa7dd83877 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -26,6 +26,7 @@
 #include "sja1105_tas.h"
 
 #define SJA1105_UNKNOWN_MULTICAST	0x010000000000ull
+#define SJA1105_DEFAULT_VLAN		(VLAN_N_VID - 1)
 
 static const struct dsa_switch_ops sja1105_switch_ops;
 
@@ -322,6 +323,13 @@ static int sja1105_init_l2_lookup_params(struct sja1105_private *priv)
 	return 0;
 }
 
+/* Set up a default VLAN for untagged traffic injected from the CPU
+ * using management routes (e.g. STP, PTP) as opposed to tag_8021q.
+ * All DT-defined ports are members of this VLAN, and there are no
+ * restrictions on forwarding (since the CPU selects the destination).
+ * Frames from this VLAN will always be transmitted as untagged, and
+ * neither the bridge nor the 8021q module cannot create this VLAN ID.
+ */
 static int sja1105_init_static_vlan(struct sja1105_private *priv)
 {
 	struct sja1105_table *table;
@@ -331,17 +339,13 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv)
 		.vmemb_port = 0,
 		.vlan_bc = 0,
 		.tag_port = 0,
-		.vlanid = 1,
+		.vlanid = SJA1105_DEFAULT_VLAN,
 	};
 	struct dsa_switch *ds = priv->ds;
 	int port;
 
 	table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP];
 
-	/* The static VLAN table will only contain the initial pvid of 1.
-	 * All other VLANs are to be configured through dynamic entries,
-	 * and kept in the static configuration table as backing memory.
-	 */
 	if (table->entry_count) {
 		kfree(table->entries);
 		table->entry_count = 0;
@@ -354,9 +358,6 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv)
 
 	table->entry_count = 1;
 
-	/* VLAN 1: all DT-defined ports are members; no restrictions on
-	 * forwarding; always transmit as untagged.
-	 */
 	for (port = 0; port < ds->num_ports; port++) {
 		struct sja1105_bridge_vlan *v;
 
@@ -367,15 +368,12 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv)
 		pvid.vlan_bc |= BIT(port);
 		pvid.tag_port &= ~BIT(port);
 
-		/* Let traffic that don't need dsa_8021q (e.g. STP, PTP) be
-		 * transmitted as untagged.
-		 */
 		v = kzalloc(sizeof(*v), GFP_KERNEL);
 		if (!v)
 			return -ENOMEM;
 
 		v->port = port;
-		v->vid = 1;
+		v->vid = SJA1105_DEFAULT_VLAN;
 		v->untagged = true;
 		if (dsa_is_cpu_port(ds, port))
 			v->pvid = true;
-- 
cgit v1.2.3


From b38e659de966a122fe2cb178c1e39c9bea06bc62 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 24 May 2021 12:25:27 +0300
Subject: net: dsa: sja1105: update existing VLANs from the bridge VLAN list

When running this sequence of operations:

ip link add br0 type bridge vlan_filtering 1
ip link set swp4 master br0
bridge vlan add dev swp4 vid 1

We observe the traffic sent on swp4 is still untagged, even though the
bridge has overwritten the existing VLAN entry:

port    vlan ids
swp4     1 PVID

br0      1 PVID Egress Untagged

This happens because we didn't consider that the 'bridge vlan add'
command just overwrites VLANs like it's nothing. We treat the 'vid 1
pvid untagged' and the 'vid 1' as two separate VLANs, and the first
still has precedence when calling sja1105_build_vlan_table. Obviously
there is a disagreement regarding semantics, and we end up doing
something unexpected from the PoV of the bridge.

Let's actually consider an "existing VLAN" to be one which is on the
same port, and has the same VLAN ID, as one we already have, and update
it if it has different flags than we do.

The first blamed commit is the one introducing the bug, the second one
is the latest on top of which the bugfix still applies.

Fixes: ec5ae61076d0 ("net: dsa: sja1105: save/restore VLANs using a delta commit method")
Fixes: 5899ee367ab3 ("net: dsa: tag_8021q: add a context structure")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index dffa7dd83877..b88d9ef45a1f 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -2816,11 +2816,22 @@ static int sja1105_vlan_add_one(struct dsa_switch *ds, int port, u16 vid,
 	bool pvid = flags & BRIDGE_VLAN_INFO_PVID;
 	struct sja1105_bridge_vlan *v;
 
-	list_for_each_entry(v, vlan_list, list)
-		if (v->port == port && v->vid == vid &&
-		    v->untagged == untagged && v->pvid == pvid)
+	list_for_each_entry(v, vlan_list, list) {
+		if (v->port == port && v->vid == vid) {
 			/* Already added */
-			return 0;
+			if (v->untagged == untagged && v->pvid == pvid)
+				/* Nothing changed */
+				return 0;
+
+			/* It's the same VLAN, but some of the flags changed
+			 * and the user did not bother to delete it first.
+			 * Update it and trigger sja1105_build_vlan_table.
+			 */
+			v->untagged = untagged;
+			v->pvid = pvid;
+			return 1;
+		}
+	}
 
 	v = kzalloc(sizeof(*v), GFP_KERNEL);
 	if (!v) {
-- 
cgit v1.2.3


From a4dd4fc6105e54393d637450a11d4cddb5fabc4f Mon Sep 17 00:00:00 2001
From: Saubhik Mukherjee <saubhik.mukherjee@gmail.com>
Date: Mon, 24 May 2021 19:07:12 +0530
Subject: net: appletalk: cops: Fix data race in cops_probe1

In cops_probe1(), there is a write to dev->base_addr after requesting an
interrupt line and registering the interrupt handler cops_interrupt().
The handler might be called in parallel to handle an interrupt.
cops_interrupt() tries to read dev->base_addr leading to a potential
data race. So write to dev->base_addr before calling request_irq().

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Saubhik Mukherjee <saubhik.mukherjee@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/appletalk/cops.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c
index ba8e70a8e312..6b12ce822e51 100644
--- a/drivers/net/appletalk/cops.c
+++ b/drivers/net/appletalk/cops.c
@@ -327,6 +327,8 @@ static int __init cops_probe1(struct net_device *dev, int ioaddr)
 			break;
 	}
 
+	dev->base_addr = ioaddr;
+
 	/* Reserve any actual interrupt. */
 	if (dev->irq) {
 		retval = request_irq(dev->irq, cops_interrupt, 0, dev->name, dev);
@@ -334,8 +336,6 @@ static int __init cops_probe1(struct net_device *dev, int ioaddr)
 			goto err_out;
 	}
 
-	dev->base_addr = ioaddr;
-
         lp = netdev_priv(dev);
         spin_lock_init(&lp->lock);
 
-- 
cgit v1.2.3


From 48b491a5cc74333c4a6a82fe21cea42c055a3b0b Mon Sep 17 00:00:00 2001
From: George McCollister <george.mccollister@gmail.com>
Date: Mon, 24 May 2021 13:50:54 -0500
Subject: net: hsr: fix mac_len checks

Commit 2e9f60932a2c ("net: hsr: check skb can contain struct hsr_ethhdr
in fill_frame_info") added the following which resulted in -EINVAL
always being returned:
	if (skb->mac_len < sizeof(struct hsr_ethhdr))
		return -EINVAL;

mac_len was not being set correctly so this check completely broke
HSR/PRP since it was always 14, not 20.

Set mac_len correctly and modify the mac_len checks to test in the
correct places since sometimes it is legitimately 14.

Fixes: 2e9f60932a2c ("net: hsr: check skb can contain struct hsr_ethhdr in fill_frame_info")
Signed-off-by: George McCollister <george.mccollister@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/hsr/hsr_device.c  |  2 ++
 net/hsr/hsr_forward.c | 30 +++++++++++++++++++++---------
 net/hsr/hsr_forward.h |  8 ++++----
 net/hsr/hsr_main.h    |  4 ++--
 net/hsr/hsr_slave.c   | 11 +++++------
 5 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index bfcdc75fc01e..26c32407f029 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -218,6 +218,7 @@ static netdev_tx_t hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (master) {
 		skb->dev = master->dev;
 		skb_reset_mac_header(skb);
+		skb_reset_mac_len(skb);
 		hsr_forward_skb(skb, master);
 	} else {
 		atomic_long_inc(&dev->tx_dropped);
@@ -259,6 +260,7 @@ static struct sk_buff *hsr_init_skb(struct hsr_port *master)
 		goto out;
 
 	skb_reset_mac_header(skb);
+	skb_reset_mac_len(skb);
 	skb_reset_network_header(skb);
 	skb_reset_transport_header(skb);
 
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index 6852e9bccf5b..ceb8afb2a62f 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -474,8 +474,8 @@ static void handle_std_frame(struct sk_buff *skb,
 	}
 }
 
-void hsr_fill_frame_info(__be16 proto, struct sk_buff *skb,
-			 struct hsr_frame_info *frame)
+int hsr_fill_frame_info(__be16 proto, struct sk_buff *skb,
+			struct hsr_frame_info *frame)
 {
 	struct hsr_port *port = frame->port_rcv;
 	struct hsr_priv *hsr = port->hsr;
@@ -483,20 +483,26 @@ void hsr_fill_frame_info(__be16 proto, struct sk_buff *skb,
 	/* HSRv0 supervisory frames double as a tag so treat them as tagged. */
 	if ((!hsr->prot_version && proto == htons(ETH_P_PRP)) ||
 	    proto == htons(ETH_P_HSR)) {
+		/* Check if skb contains hsr_ethhdr */
+		if (skb->mac_len < sizeof(struct hsr_ethhdr))
+			return -EINVAL;
+
 		/* HSR tagged frame :- Data or Supervision */
 		frame->skb_std = NULL;
 		frame->skb_prp = NULL;
 		frame->skb_hsr = skb;
 		frame->sequence_nr = hsr_get_skb_sequence_nr(skb);
-		return;
+		return 0;
 	}
 
 	/* Standard frame or PRP from master port */
 	handle_std_frame(skb, frame);
+
+	return 0;
 }
 
-void prp_fill_frame_info(__be16 proto, struct sk_buff *skb,
-			 struct hsr_frame_info *frame)
+int prp_fill_frame_info(__be16 proto, struct sk_buff *skb,
+			struct hsr_frame_info *frame)
 {
 	/* Supervision frame */
 	struct prp_rct *rct = skb_get_PRP_rct(skb);
@@ -507,9 +513,11 @@ void prp_fill_frame_info(__be16 proto, struct sk_buff *skb,
 		frame->skb_std = NULL;
 		frame->skb_prp = skb;
 		frame->sequence_nr = prp_get_skb_sequence_nr(rct);
-		return;
+		return 0;
 	}
 	handle_std_frame(skb, frame);
+
+	return 0;
 }
 
 static int fill_frame_info(struct hsr_frame_info *frame,
@@ -519,9 +527,10 @@ static int fill_frame_info(struct hsr_frame_info *frame,
 	struct hsr_vlan_ethhdr *vlan_hdr;
 	struct ethhdr *ethhdr;
 	__be16 proto;
+	int ret;
 
-	/* Check if skb contains hsr_ethhdr */
-	if (skb->mac_len < sizeof(struct hsr_ethhdr))
+	/* Check if skb contains ethhdr */
+	if (skb->mac_len < sizeof(struct ethhdr))
 		return -EINVAL;
 
 	memset(frame, 0, sizeof(*frame));
@@ -548,7 +557,10 @@ static int fill_frame_info(struct hsr_frame_info *frame,
 
 	frame->is_from_san = false;
 	frame->port_rcv = port;
-	hsr->proto_ops->fill_frame_info(proto, skb, frame);
+	ret = hsr->proto_ops->fill_frame_info(proto, skb, frame);
+	if (ret)
+		return ret;
+
 	check_local_dest(port->hsr, skb, frame);
 
 	return 0;
diff --git a/net/hsr/hsr_forward.h b/net/hsr/hsr_forward.h
index b6acaafa83fc..206636750b30 100644
--- a/net/hsr/hsr_forward.h
+++ b/net/hsr/hsr_forward.h
@@ -24,8 +24,8 @@ struct sk_buff *prp_get_untagged_frame(struct hsr_frame_info *frame,
 				       struct hsr_port *port);
 bool prp_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port);
 bool hsr_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port);
-void prp_fill_frame_info(__be16 proto, struct sk_buff *skb,
-			 struct hsr_frame_info *frame);
-void hsr_fill_frame_info(__be16 proto, struct sk_buff *skb,
-			 struct hsr_frame_info *frame);
+int prp_fill_frame_info(__be16 proto, struct sk_buff *skb,
+			struct hsr_frame_info *frame);
+int hsr_fill_frame_info(__be16 proto, struct sk_buff *skb,
+			struct hsr_frame_info *frame);
 #endif /* __HSR_FORWARD_H */
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index 8f264672b70b..53d1f7a82463 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -186,8 +186,8 @@ struct hsr_proto_ops {
 					       struct hsr_port *port);
 	struct sk_buff * (*create_tagged_frame)(struct hsr_frame_info *frame,
 						struct hsr_port *port);
-	void (*fill_frame_info)(__be16 proto, struct sk_buff *skb,
-				struct hsr_frame_info *frame);
+	int (*fill_frame_info)(__be16 proto, struct sk_buff *skb,
+			       struct hsr_frame_info *frame);
 	bool (*invalid_dan_ingress_frame)(__be16 protocol);
 	void (*update_san_info)(struct hsr_node *node, bool is_sup);
 };
diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
index c5227d42faf5..b70e6bbf6021 100644
--- a/net/hsr/hsr_slave.c
+++ b/net/hsr/hsr_slave.c
@@ -60,12 +60,11 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb)
 		goto finish_pass;
 
 	skb_push(skb, ETH_HLEN);
-
-	if (skb_mac_header(skb) != skb->data) {
-		WARN_ONCE(1, "%s:%d: Malformed frame at source port %s)\n",
-			  __func__, __LINE__, port->dev->name);
-		goto finish_consume;
-	}
+	skb_reset_mac_header(skb);
+	if ((!hsr->prot_version && protocol == htons(ETH_P_PRP)) ||
+	    protocol == htons(ETH_P_HSR))
+		skb_set_network_header(skb, ETH_HLEN + HSR_HLEN);
+	skb_reset_mac_len(skb);
 
 	hsr_forward_skb(skb, port);
 
-- 
cgit v1.2.3


From 46a8b29c6306d8bbfd92b614ef65a47c900d8e70 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Mon, 24 May 2021 23:02:08 +0300
Subject: net: usb: fix memory leak in smsc75xx_bind

Syzbot reported memory leak in smsc75xx_bind().
The problem was is non-freed memory in case of
errors after memory allocation.

backtrace:
  [<ffffffff84245b62>] kmalloc include/linux/slab.h:556 [inline]
  [<ffffffff84245b62>] kzalloc include/linux/slab.h:686 [inline]
  [<ffffffff84245b62>] smsc75xx_bind+0x7a/0x334 drivers/net/usb/smsc75xx.c:1460
  [<ffffffff82b5b2e6>] usbnet_probe+0x3b6/0xc30 drivers/net/usb/usbnet.c:1728

Fixes: d0cad871703b ("smsc75xx: SMSC LAN75xx USB gigabit ethernet adapter driver")
Cc: stable@kernel.vger.org
Reported-and-tested-by: syzbot+b558506ba8165425fee2@syzkaller.appspotmail.com
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/smsc75xx.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index f8cdabb9ef5a..b286993da67c 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -1483,7 +1483,7 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf)
 	ret = smsc75xx_wait_ready(dev, 0);
 	if (ret < 0) {
 		netdev_warn(dev->net, "device not ready in smsc75xx_bind\n");
-		return ret;
+		goto err;
 	}
 
 	smsc75xx_init_mac_address(dev);
@@ -1492,7 +1492,7 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf)
 	ret = smsc75xx_reset(dev);
 	if (ret < 0) {
 		netdev_warn(dev->net, "smsc75xx_reset error %d\n", ret);
-		return ret;
+		goto err;
 	}
 
 	dev->net->netdev_ops = &smsc75xx_netdev_ops;
@@ -1502,6 +1502,10 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf)
 	dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len;
 	dev->net->max_mtu = MAX_SINGLE_PACKET_SIZE;
 	return 0;
+
+err:
+	kfree(pdata);
+	return ret;
 }
 
 static void smsc75xx_unbind(struct usbnet *dev, struct usb_interface *intf)
-- 
cgit v1.2.3


From 8c42a49738f16af0061f9ae5c2f5a955f268d9e3 Mon Sep 17 00:00:00 2001
From: George McCollister <george.mccollister@gmail.com>
Date: Mon, 24 May 2021 15:29:53 -0500
Subject: net: dsa: microchip: enable phy errata workaround on 9567

Also enable phy errata workaround on 9567 since has the same errata as
the 9477 according to the manufacture's documentation.

Signed-off-by: George McCollister <george.mccollister@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/microchip/ksz9477.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c
index 55e5d479acce..854e25f43fa7 100644
--- a/drivers/net/dsa/microchip/ksz9477.c
+++ b/drivers/net/dsa/microchip/ksz9477.c
@@ -1530,6 +1530,7 @@ static const struct ksz_chip_data ksz9477_switch_chips[] = {
 		.num_statics = 16,
 		.cpu_ports = 0x7F,	/* can be configured as cpu port */
 		.port_cnt = 7,		/* total physical port count */
+		.phy_errata_9477 = true,
 	},
 };
 
-- 
cgit v1.2.3


From 19dee613816d5065ad09f2ccc20b35d23dca9f28 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 13 May 2021 11:03:32 +0100
Subject: netfs: Pass flags through to grab_cache_page_write_begin()

In netfs_write_begin(), pass the AOP flags through to
grab_cache_page_write_begin() so that a request to use GFP_NOFS is
honoured.

Fixes: e1b1240c1ff5 ("netfs: Add write_begin helper")
Reported-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
cc: linux-mm@kvack.org
cc: linux-cachefs@redhat.com
cc: linux-afs@lists.infradead.org
cc: linux-nfs@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: ceph-devel@vger.kernel.org
cc: v9fs-developer@lists.sourceforge.net
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/162090295383.3165945.13595101698295243662.stgit@warthog.procyon.org.uk # v1
---
 fs/netfs/read_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c
index 193841d03de0..725614625ed4 100644
--- a/fs/netfs/read_helper.c
+++ b/fs/netfs/read_helper.c
@@ -1068,7 +1068,7 @@ int netfs_write_begin(struct file *file, struct address_space *mapping,
 	DEFINE_READAHEAD(ractl, file, NULL, mapping, index);
 
 retry:
-	page = grab_cache_page_write_begin(mapping, index, 0);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From b71c791254ff5e78a124c8949585dccd9e225e06 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 13 May 2021 11:40:27 +0100
Subject: netfs: Make CONFIG_NETFS_SUPPORT auto-selected rather than manual

Make the netfs helper library selected automatically by the things that use
it rather than being manually configured, even though it's required[1].

Fixes: 3a5829fefd3b ("netfs: Make a netfs helper module")
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-mm@kvack.org
cc: linux-cachefs@redhat.com
cc: linux-afs@lists.infradead.org
cc: linux-nfs@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: ceph-devel@vger.kernel.org
cc: v9fs-developer@lists.sourceforge.net
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/CAMuHMdXJZ7iNQE964CdBOU=vRKVMFzo=YF_eiwsGgqzuvZ+TuA@mail.gmail.com [1]
Link: https://lore.kernel.org/r/162090298141.3166007.2971118149366779916.stgit@warthog.procyon.org.uk # v1
---
 fs/netfs/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/netfs/Kconfig b/fs/netfs/Kconfig
index 578112713703..b4db21022cb4 100644
--- a/fs/netfs/Kconfig
+++ b/fs/netfs/Kconfig
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 
 config NETFS_SUPPORT
-	tristate "Support for network filesystem high-level I/O"
+	tristate
 	help
 	  This option enables support for network filesystems, including
 	  helpers for high-level buffered I/O, abstracting out read
-- 
cgit v1.2.3


From a8deba8547e39f26440101164a3bbc2899c5b305 Mon Sep 17 00:00:00 2001
From: Liu Jian <liujian56@huawei.com>
Date: Tue, 25 May 2021 09:41:39 +0800
Subject: bpftool: Add sock_release help info for cgroup attach/prog load
 command

The help information was not added at the time when the function got added.
Fix this and add the missing information to its cli, documentation and bash
completion.

Fixes: db94cc0b4805 ("bpftool: Add support for BPF_CGROUP_INET_SOCK_RELEASE")
Signed-off-by: Liu Jian <liujian56@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20210525014139.323859-1-liujian56@huawei.com
---
 tools/bpf/bpftool/Documentation/bpftool-cgroup.rst | 4 +++-
 tools/bpf/bpftool/Documentation/bpftool-prog.rst   | 2 +-
 tools/bpf/bpftool/bash-completion/bpftool          | 6 +++---
 tools/bpf/bpftool/cgroup.c                         | 3 ++-
 tools/bpf/bpftool/prog.c                           | 2 +-
 5 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index 790944c35602..baee8591ac76 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -30,7 +30,8 @@ CGROUP COMMANDS
 |	*ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** |
 |		**bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** |
 |               **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** |
-|               **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** }
+|               **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** |
+|               **sock_release** }
 |	*ATTACH_FLAGS* := { **multi** | **override** }
 
 DESCRIPTION
@@ -106,6 +107,7 @@ DESCRIPTION
 		  **getpeername6** call to getpeername(2) for an inet6 socket (since 5.8);
 		  **getsockname4** call to getsockname(2) for an inet4 socket (since 5.8);
 		  **getsockname6** call to getsockname(2) for an inet6 socket (since 5.8).
+		  **sock_release** closing an userspace inet socket (since 5.9).
 
 	**bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
 		  Detach *PROG* from the cgroup *CGROUP* and attach type
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 358c7309d419..fe1b38e7e887 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -44,7 +44,7 @@ PROG COMMANDS
 |		**cgroup/connect4** | **cgroup/connect6** | **cgroup/getpeername4** | **cgroup/getpeername6** |
 |               **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
 |		**cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** |
-|		**cgroup/getsockopt** | **cgroup/setsockopt** |
+|		**cgroup/getsockopt** | **cgroup/setsockopt** | **cgroup/sock_release** |
 |		**struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup**
 |	}
 |       *ATTACH_TYPE* := {
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index d67518bcbd44..cc33c5824a2f 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -478,7 +478,7 @@ _bpftool()
                                 cgroup/recvmsg4 cgroup/recvmsg6 \
                                 cgroup/post_bind4 cgroup/post_bind6 \
                                 cgroup/sysctl cgroup/getsockopt \
-                                cgroup/setsockopt struct_ops \
+                                cgroup/setsockopt cgroup/sock_release struct_ops \
                                 fentry fexit freplace sk_lookup" -- \
                                                    "$cur" ) )
                             return 0
@@ -1021,7 +1021,7 @@ _bpftool()
                         device bind4 bind6 post_bind4 post_bind6 connect4 connect6 \
                         getpeername4 getpeername6 getsockname4 getsockname6 \
                         sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl getsockopt \
-                        setsockopt'
+                        setsockopt sock_release'
                     local ATTACH_FLAGS='multi override'
                     local PROG_TYPE='id pinned tag name'
                     case $prev in
@@ -1032,7 +1032,7 @@ _bpftool()
                         ingress|egress|sock_create|sock_ops|device|bind4|bind6|\
                         post_bind4|post_bind6|connect4|connect6|getpeername4|\
                         getpeername6|getsockname4|getsockname6|sendmsg4|sendmsg6|\
-                        recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt)
+                        recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt|sock_release)
                             COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
                                 "$cur" ) )
                             return 0
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index d901cc1b904a..6e53b1d393f4 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -28,7 +28,8 @@
 	"                        connect6 | getpeername4 | getpeername6 |\n"   \
 	"                        getsockname4 | getsockname6 | sendmsg4 |\n"   \
 	"                        sendmsg6 | recvmsg4 | recvmsg6 |\n"           \
-	"                        sysctl | getsockopt | setsockopt }"
+	"                        sysctl | getsockopt | setsockopt |\n"	       \
+	"                        sock_release }"
 
 static unsigned int query_flags;
 
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 3f067d2d7584..da4846c9856a 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -2138,7 +2138,7 @@ static int do_help(int argc, char **argv)
 		"                 cgroup/getpeername4 | cgroup/getpeername6 |\n"
 		"                 cgroup/getsockname4 | cgroup/getsockname6 | cgroup/sendmsg4 |\n"
 		"                 cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n"
-		"                 cgroup/getsockopt | cgroup/setsockopt |\n"
+		"                 cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n"
 		"                 struct_ops | fentry | fexit | freplace | sk_lookup }\n"
 		"       ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
 		"                        flow_dissector }\n"
-- 
cgit v1.2.3


From 6fd5fb63820a9a1146aba0bba2fdbc1db4b903e7 Mon Sep 17 00:00:00 2001
From: Jussi Maki <joamaki@gmail.com>
Date: Tue, 25 May 2021 10:29:55 +0000
Subject: selftests/bpf: Add test for l3 use of bpf_redirect_peer

Add a test case for using bpf_skb_change_head() in combination with
bpf_redirect_peer() to redirect a packet from a L3 device to veth and back.

The test uses a BPF program that adds L2 headers to the packet coming
from a L3 device and then calls bpf_redirect_peer() to redirect the packet
to a veth device. The test fails as skb->mac_len is not set properly and
thus the ethernet headers are not properly skb_pull'd in cls_bpf_classify(),
causing tcp_v4_rcv() to point the TCP header into middle of the IP header.

Signed-off-by: Jussi Maki <joamaki@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210525102955.2811090-1-joamaki@gmail.com
---
 .../testing/selftests/bpf/prog_tests/tc_redirect.c | 552 ++++++++++++++-------
 tools/testing/selftests/bpf/progs/test_tc_peer.c   |  31 ++
 2 files changed, 405 insertions(+), 178 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 95ef9fcd31d8..5703c918812b 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -11,14 +11,17 @@
  */
 
 #define _GNU_SOURCE
-#include <fcntl.h>
+
+#include <arpa/inet.h>
 #include <linux/limits.h>
 #include <linux/sysctl.h>
+#include <linux/if_tun.h>
+#include <linux/if.h>
 #include <sched.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <sys/stat.h>
-#include <sys/types.h>
+#include <sys/mount.h>
 
 #include "test_progs.h"
 #include "network_helpers.h"
@@ -32,18 +35,25 @@
 
 #define IP4_SRC "172.16.1.100"
 #define IP4_DST "172.16.2.100"
+#define IP4_TUN_SRC "172.17.1.100"
+#define IP4_TUN_FWD "172.17.1.200"
 #define IP4_PORT 9004
 
-#define IP6_SRC "::1:dead:beef:cafe"
-#define IP6_DST "::2:dead:beef:cafe"
+#define IP6_SRC "0::1:dead:beef:cafe"
+#define IP6_DST "0::2:dead:beef:cafe"
+#define IP6_TUN_SRC "1::1:dead:beef:cafe"
+#define IP6_TUN_FWD "1::2:dead:beef:cafe"
 #define IP6_PORT 9006
 
 #define IP4_SLL "169.254.0.1"
 #define IP4_DLL "169.254.0.2"
 #define IP4_NET "169.254.0.0"
 
+#define MAC_DST_FWD "00:11:22:33:44:55"
+#define MAC_DST "00:22:33:44:55:66"
+
 #define IFADDR_STR_LEN 18
-#define PING_ARGS "-c 3 -w 10 -q"
+#define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
 
 #define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src"
 #define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst"
@@ -51,120 +61,104 @@
 
 #define TIMEOUT_MILLIS 10000
 
-#define MAX_PROC_MODS 128
-#define MAX_PROC_VALUE_LEN 16
-
 #define log_err(MSG, ...) \
 	fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
 		__FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
 
-struct proc_mod {
-	char path[PATH_MAX];
-	char oldval[MAX_PROC_VALUE_LEN];
-	int oldlen;
-};
-
 static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
-static int root_netns_fd = -1;
-static int num_proc_mods;
-static struct proc_mod proc_mods[MAX_PROC_MODS];
 
-/**
- * modify_proc() - Modify entry in /proc
- *
- * Modifies an entry in /proc and saves the original value for later
- * restoration with restore_proc().
- */
-static int modify_proc(const char *path, const char *newval)
+static int write_file(const char *path, const char *newval)
 {
-	struct proc_mod *mod;
 	FILE *f;
 
-	if (num_proc_mods + 1 > MAX_PROC_MODS)
-		return -1;
-
 	f = fopen(path, "r+");
 	if (!f)
 		return -1;
-
-	mod = &proc_mods[num_proc_mods];
-	num_proc_mods++;
-
-	strncpy(mod->path, path, PATH_MAX);
-
-	if (!fread(mod->oldval, 1, MAX_PROC_VALUE_LEN, f)) {
-		log_err("reading from %s failed", path);
-		goto fail;
-	}
-	rewind(f);
 	if (fwrite(newval, strlen(newval), 1, f) != 1) {
 		log_err("writing to %s failed", path);
-		goto fail;
+		fclose(f);
+		return -1;
 	}
-
 	fclose(f);
 	return 0;
-
-fail:
-	fclose(f);
-	num_proc_mods--;
-	return -1;
 }
 
-/**
- * restore_proc() - Restore all /proc modifications
- */
-static void restore_proc(void)
+struct nstoken {
+	int orig_netns_fd;
+};
+
+static int setns_by_fd(int nsfd)
 {
-	int i;
+	int err;
 
-	for (i = 0; i < num_proc_mods; i++) {
-		struct proc_mod *mod = &proc_mods[i];
-		FILE *f;
+	err = setns(nsfd, CLONE_NEWNET);
+	close(nsfd);
 
-		f = fopen(mod->path, "w");
-		if (!f) {
-			log_err("fopen of %s failed", mod->path);
-			continue;
-		}
+	if (!ASSERT_OK(err, "setns"))
+		return err;
 
-		if (fwrite(mod->oldval, mod->oldlen, 1, f) != 1)
-			log_err("fwrite to %s failed", mod->path);
+	/* Switch /sys to the new namespace so that e.g. /sys/class/net
+	 * reflects the devices in the new namespace.
+	 */
+	err = unshare(CLONE_NEWNS);
+	if (!ASSERT_OK(err, "unshare"))
+		return err;
 
-		fclose(f);
-	}
-	num_proc_mods = 0;
+	err = umount2("/sys", MNT_DETACH);
+	if (!ASSERT_OK(err, "umount2 /sys"))
+		return err;
+
+	err = mount("sysfs", "/sys", "sysfs", 0, NULL);
+	if (!ASSERT_OK(err, "mount /sys"))
+		return err;
+
+	err = mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL);
+	if (!ASSERT_OK(err, "mount /sys/fs/bpf"))
+		return err;
+
+	return 0;
 }
 
 /**
- * setns_by_name() - Set networks namespace by name
+ * open_netns() - Switch to specified network namespace by name.
+ *
+ * Returns token with which to restore the original namespace
+ * using close_netns().
  */
-static int setns_by_name(const char *name)
+static struct nstoken *open_netns(const char *name)
 {
 	int nsfd;
 	char nspath[PATH_MAX];
 	int err;
+	struct nstoken *token;
+
+	token = malloc(sizeof(struct nstoken));
+	if (!ASSERT_OK_PTR(token, "malloc token"))
+		return NULL;
+
+	token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+	if (!ASSERT_GE(token->orig_netns_fd, 0, "open /proc/self/ns/net"))
+		goto fail;
 
 	snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
 	nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
-	if (nsfd < 0)
-		return nsfd;
+	if (!ASSERT_GE(nsfd, 0, "open netns fd"))
+		goto fail;
 
-	err = setns(nsfd, CLONE_NEWNET);
-	close(nsfd);
+	err = setns_by_fd(nsfd);
+	if (!ASSERT_OK(err, "setns_by_fd"))
+		goto fail;
 
-	return err;
+	return token;
+fail:
+	free(token);
+	return NULL;
 }
 
-/**
- * setns_root() - Set network namespace to original (root) namespace
- *
- * Not expected to ever fail, so error not returned, but failure logged
- * and test marked as failed.
- */
-static void setns_root(void)
+static void close_netns(struct nstoken *token)
 {
-	ASSERT_OK(setns(root_netns_fd, CLONE_NEWNET), "setns root");
+	ASSERT_OK(setns_by_fd(token->orig_netns_fd), "setns_by_fd");
+	free(token);
 }
 
 static int netns_setup_namespaces(const char *verb)
@@ -237,15 +231,17 @@ static int get_ifindex(const char *name)
 
 static int netns_setup_links_and_routes(struct netns_setup_result *result)
 {
+	struct nstoken *nstoken = NULL;
 	char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
-	char veth_dst_fwd_addr[IFADDR_STR_LEN+1] = {};
 
 	SYS("ip link add veth_src type veth peer name veth_src_fwd");
 	SYS("ip link add veth_dst type veth peer name veth_dst_fwd");
+
+	SYS("ip link set veth_dst_fwd address " MAC_DST_FWD);
+	SYS("ip link set veth_dst address " MAC_DST);
+
 	if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
 		goto fail;
-	if (get_ifaddr("veth_dst_fwd", veth_dst_fwd_addr))
-		goto fail;
 
 	result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd");
 	if (result->ifindex_veth_src_fwd < 0)
@@ -260,7 +256,8 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
 	SYS("ip link set veth_dst netns " NS_DST);
 
 	/** setup in 'src' namespace */
-	if (!ASSERT_OK(setns_by_name(NS_SRC), "setns src"))
+	nstoken = open_netns(NS_SRC);
+	if (!ASSERT_OK_PTR(nstoken, "setns src"))
 		goto fail;
 
 	SYS("ip addr add " IP4_SRC "/32 dev veth_src");
@@ -276,8 +273,11 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
 	SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s",
 	    veth_src_fwd_addr);
 
+	close_netns(nstoken);
+
 	/** setup in 'fwd' namespace */
-	if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
+	nstoken = open_netns(NS_FWD);
+	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
 		goto fail;
 
 	/* The fwd netns automatically gets a v6 LL address / routes, but also
@@ -294,8 +294,11 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
 	SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
 	SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
 
+	close_netns(nstoken);
+
 	/** setup in 'dst' namespace */
-	if (!ASSERT_OK(setns_by_name(NS_DST), "setns dst"))
+	nstoken = open_netns(NS_DST);
+	if (!ASSERT_OK_PTR(nstoken, "setns dst"))
 		goto fail;
 
 	SYS("ip addr add " IP4_DST "/32 dev veth_dst");
@@ -306,23 +309,20 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
 	SYS("ip route add " IP4_NET "/16 dev veth_dst scope global");
 	SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global");
 
-	SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr %s",
-	    veth_dst_fwd_addr);
-	SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr %s",
-	    veth_dst_fwd_addr);
+	SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+	SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+
+	close_netns(nstoken);
 
-	setns_root();
 	return 0;
 fail:
-	setns_root();
+	if (nstoken)
+		close_netns(nstoken);
 	return -1;
 }
 
 static int netns_load_bpf(void)
 {
-	if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
-		return -1;
-
 	SYS("tc qdisc add dev veth_src_fwd clsact");
 	SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned "
 	    SRC_PROG_PIN_FILE);
@@ -335,42 +335,29 @@ static int netns_load_bpf(void)
 	SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
 	    CHK_PROG_PIN_FILE);
 
-	setns_root();
-	return -1;
-fail:
-	setns_root();
-	return -1;
-}
-
-static int netns_unload_bpf(void)
-{
-	if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
-		goto fail;
-	SYS("tc qdisc delete dev veth_src_fwd clsact");
-	SYS("tc qdisc delete dev veth_dst_fwd clsact");
-
-	setns_root();
 	return 0;
 fail:
-	setns_root();
 	return -1;
 }
 
-
 static void test_tcp(int family, const char *addr, __u16 port)
 {
 	int listen_fd = -1, accept_fd = -1, client_fd = -1;
 	char buf[] = "testing testing";
 	int n;
+	struct nstoken *nstoken;
 
-	if (!ASSERT_OK(setns_by_name(NS_DST), "setns dst"))
+	nstoken = open_netns(NS_DST);
+	if (!ASSERT_OK_PTR(nstoken, "setns dst"))
 		return;
 
 	listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
 	if (!ASSERT_GE(listen_fd, 0, "listen"))
 		goto done;
 
-	if (!ASSERT_OK(setns_by_name(NS_SRC), "setns src"))
+	close_netns(nstoken);
+	nstoken = open_netns(NS_SRC);
+	if (!ASSERT_OK_PTR(nstoken, "setns src"))
 		goto done;
 
 	client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
@@ -392,7 +379,8 @@ static void test_tcp(int family, const char *addr, __u16 port)
 	ASSERT_EQ(n, sizeof(buf), "recv from server");
 
 done:
-	setns_root();
+	if (nstoken)
+		close_netns(nstoken);
 	if (listen_fd >= 0)
 		close(listen_fd);
 	if (accept_fd >= 0)
@@ -405,7 +393,7 @@ static int test_ping(int family, const char *addr)
 {
 	const char *ping = family == AF_INET6 ? "ping6" : "ping";
 
-	SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s", ping, addr);
+	SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping, addr);
 	return 0;
 fail:
 	return -1;
@@ -419,19 +407,37 @@ static void test_connectivity(void)
 	test_ping(AF_INET6, IP6_DST);
 }
 
+static int set_forwarding(bool enable)
+{
+	int err;
+
+	err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0");
+	if (!ASSERT_OK(err, "set ipv4.ip_forward=0"))
+		return err;
+
+	err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0");
+	if (!ASSERT_OK(err, "set ipv6.forwarding=0"))
+		return err;
+
+	return 0;
+}
+
 static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
 {
-	struct test_tc_neigh_fib *skel;
+	struct nstoken *nstoken = NULL;
+	struct test_tc_neigh_fib *skel = NULL;
 	int err;
 
+	nstoken = open_netns(NS_FWD);
+	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+		return;
+
 	skel = test_tc_neigh_fib__open();
 	if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
-		return;
+		goto done;
 
-	if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load")) {
-		test_tc_neigh_fib__destroy(skel);
-		return;
-	}
+	if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
+		goto done;
 
 	err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
 	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
@@ -449,46 +455,37 @@ static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
 		goto done;
 
 	/* bpf_fib_lookup() checks if forwarding is enabled */
-	if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
+	if (!ASSERT_OK(set_forwarding(true), "enable forwarding"))
 		goto done;
 
-	err = modify_proc("/proc/sys/net/ipv4/ip_forward", "1");
-	if (!ASSERT_OK(err, "set ipv4.ip_forward"))
-		goto done;
-
-	err = modify_proc("/proc/sys/net/ipv6/conf/all/forwarding", "1");
-	if (!ASSERT_OK(err, "set ipv6.forwarding"))
-		goto done;
-	setns_root();
-
 	test_connectivity();
+
 done:
-	bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
-	bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
-	bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
-	test_tc_neigh_fib__destroy(skel);
-	netns_unload_bpf();
-	setns_root();
-	restore_proc();
+	if (skel)
+		test_tc_neigh_fib__destroy(skel);
+	close_netns(nstoken);
 }
 
 static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
 {
-	struct test_tc_neigh *skel;
+	struct nstoken *nstoken = NULL;
+	struct test_tc_neigh *skel = NULL;
 	int err;
 
+	nstoken = open_netns(NS_FWD);
+	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+		return;
+
 	skel = test_tc_neigh__open();
 	if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
-		return;
+		goto done;
 
 	skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
 	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
 
 	err = test_tc_neigh__load(skel);
-	if (!ASSERT_OK(err, "test_tc_neigh__load")) {
-		test_tc_neigh__destroy(skel);
-		return;
-	}
+	if (!ASSERT_OK(err, "test_tc_neigh__load"))
+		goto done;
 
 	err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
 	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
@@ -505,34 +502,37 @@ static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
 	if (netns_load_bpf())
 		goto done;
 
+	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+		goto done;
+
 	test_connectivity();
 
 done:
-	bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
-	bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
-	bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
-	test_tc_neigh__destroy(skel);
-	netns_unload_bpf();
-	setns_root();
+	if (skel)
+		test_tc_neigh__destroy(skel);
+	close_netns(nstoken);
 }
 
 static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
 {
+	struct nstoken *nstoken;
 	struct test_tc_peer *skel;
 	int err;
 
+	nstoken = open_netns(NS_FWD);
+	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+		return;
+
 	skel = test_tc_peer__open();
 	if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
-		return;
+		goto done;
 
 	skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
 	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
 
 	err = test_tc_peer__load(skel);
-	if (!ASSERT_OK(err, "test_tc_peer__load")) {
-		test_tc_peer__destroy(skel);
-		return;
-	}
+	if (!ASSERT_OK(err, "test_tc_peer__load"))
+		goto done;
 
 	err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
 	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
@@ -549,41 +549,237 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
 	if (netns_load_bpf())
 		goto done;
 
+	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+		goto done;
+
 	test_connectivity();
 
 done:
-	bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
-	bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
-	bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
-	test_tc_peer__destroy(skel);
-	netns_unload_bpf();
-	setns_root();
+	if (skel)
+		test_tc_peer__destroy(skel);
+	close_netns(nstoken);
 }
 
-void test_tc_redirect(void)
+static int tun_open(char *name)
+{
+	struct ifreq ifr;
+	int fd, err;
+
+	fd = open("/dev/net/tun", O_RDWR);
+	if (!ASSERT_GE(fd, 0, "open /dev/net/tun"))
+		return -1;
+
+	memset(&ifr, 0, sizeof(ifr));
+
+	ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
+	if (*name)
+		strncpy(ifr.ifr_name, name, IFNAMSIZ);
+
+	err = ioctl(fd, TUNSETIFF, &ifr);
+	if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
+		goto fail;
+
+	SYS("ip link set dev %s up", name);
+
+	return fd;
+fail:
+	close(fd);
+	return -1;
+}
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+enum {
+	SRC_TO_TARGET = 0,
+	TARGET_TO_SRC = 1,
+};
+
+static int tun_relay_loop(int src_fd, int target_fd)
 {
-	struct netns_setup_result setup_result;
+	fd_set rfds, wfds;
 
-	root_netns_fd = open("/proc/self/ns/net", O_RDONLY);
-	if (!ASSERT_GE(root_netns_fd, 0, "open /proc/self/ns/net"))
+	FD_ZERO(&rfds);
+	FD_ZERO(&wfds);
+
+	for (;;) {
+		char buf[1500];
+		int direction, nread, nwrite;
+
+		FD_SET(src_fd, &rfds);
+		FD_SET(target_fd, &rfds);
+
+		if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
+			log_err("select failed");
+			return 1;
+		}
+
+		direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
+
+		nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
+		if (nread < 0) {
+			log_err("read failed");
+			return 1;
+		}
+
+		nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
+		if (nwrite != nread) {
+			log_err("write failed");
+			return 1;
+		}
+	}
+}
+
+static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
+{
+	struct test_tc_peer *skel = NULL;
+	struct nstoken *nstoken = NULL;
+	int err;
+	int tunnel_pid = -1;
+	int src_fd, target_fd;
+	int ifindex;
+
+	/* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
+	 * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
+	 * expose the L2 headers encapsulating the IP packet to BPF and hence
+	 * don't have skb in suitable state for this test. Alternative to TUN/TAP
+	 * would be e.g. Wireguard which would appear as a pure L3 device to BPF,
+	 * but that requires much more complicated setup.
+	 */
+	nstoken = open_netns(NS_SRC);
+	if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
 		return;
 
-	if (netns_setup_namespaces("add"))
-		goto done;
+	src_fd = tun_open("tun_src");
+	if (!ASSERT_GE(src_fd, 0, "tun_open tun_src"))
+		goto fail;
 
-	if (netns_setup_links_and_routes(&setup_result))
-		goto done;
+	close_netns(nstoken);
+
+	nstoken = open_netns(NS_FWD);
+	if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
+		goto fail;
 
-	if (test__start_subtest("tc_redirect_peer"))
-		test_tc_redirect_peer(&setup_result);
+	target_fd = tun_open("tun_fwd");
+	if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd"))
+		goto fail;
 
-	if (test__start_subtest("tc_redirect_neigh"))
-		test_tc_redirect_neigh(&setup_result);
+	tunnel_pid = fork();
+	if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
+		goto fail;
 
-	if (test__start_subtest("tc_redirect_neigh_fib"))
-		test_tc_redirect_neigh_fib(&setup_result);
+	if (tunnel_pid == 0)
+		exit(tun_relay_loop(src_fd, target_fd));
 
-done:
-	close(root_netns_fd);
-	netns_setup_namespaces("delete");
+	skel = test_tc_peer__open();
+	if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
+		goto fail;
+
+	ifindex = get_ifindex("tun_fwd");
+	if (!ASSERT_GE(ifindex, 0, "get_ifindex tun_fwd"))
+		goto fail;
+
+	skel->rodata->IFINDEX_SRC = ifindex;
+	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+
+	err = test_tc_peer__load(skel);
+	if (!ASSERT_OK(err, "test_tc_peer__load"))
+		goto fail;
+
+	err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE);
+	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
+		goto fail;
+
+	err = bpf_program__pin(skel->progs.tc_dst_l3, DST_PROG_PIN_FILE);
+	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
+		goto fail;
+
+	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
+	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
+		goto fail;
+
+	/* Load "tc_src_l3" to the tun_fwd interface to redirect packets
+	 * towards dst, and "tc_dst" to redirect packets
+	 * and "tc_chk" on veth_dst_fwd to drop non-redirected packets.
+	 */
+	SYS("tc qdisc add dev tun_fwd clsact");
+	SYS("tc filter add dev tun_fwd ingress bpf da object-pinned "
+	    SRC_PROG_PIN_FILE);
+
+	SYS("tc qdisc add dev veth_dst_fwd clsact");
+	SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
+	    DST_PROG_PIN_FILE);
+	SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
+	    CHK_PROG_PIN_FILE);
+
+	/* Setup route and neigh tables */
+	SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
+	SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
+
+	SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
+	SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
+
+	SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
+	SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
+	    " dev tun_src scope global");
+	SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
+	SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
+	SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
+	    " dev tun_src scope global");
+	SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
+
+	SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+	SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+
+	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+		goto fail;
+
+	test_connectivity();
+
+fail:
+	if (tunnel_pid > 0) {
+		kill(tunnel_pid, SIGTERM);
+		waitpid(tunnel_pid, NULL, 0);
+	}
+	if (src_fd >= 0)
+		close(src_fd);
+	if (target_fd >= 0)
+		close(target_fd);
+	if (skel)
+		test_tc_peer__destroy(skel);
+	if (nstoken)
+		close_netns(nstoken);
+}
+
+#define RUN_TEST(name)                                                                      \
+	({                                                                                  \
+		struct netns_setup_result setup_result;                                     \
+		if (test__start_subtest(#name))                                             \
+			if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
+				if (ASSERT_OK(netns_setup_links_and_routes(&setup_result),  \
+					      "setup links and routes"))                    \
+					test_ ## name(&setup_result);                       \
+				netns_setup_namespaces("delete");                           \
+			}                                                                   \
+	})
+
+static void *test_tc_redirect_run_tests(void *arg)
+{
+	RUN_TEST(tc_redirect_peer);
+	RUN_TEST(tc_redirect_peer_l3);
+	RUN_TEST(tc_redirect_neigh);
+	RUN_TEST(tc_redirect_neigh_fib);
+	return NULL;
+}
+
+void test_tc_redirect(void)
+{
+	pthread_t test_thread;
+	int err;
+
+	/* Run the tests in their own thread to isolate the namespace changes
+	 * so they do not affect the environment of other tests.
+	 * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
+	 */
+	err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL);
+	if (ASSERT_OK(err, "pthread_create"))
+		ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
 }
diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c
index ef264bced0e6..fe818cd5f010 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_peer.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c
@@ -5,12 +5,17 @@
 #include <linux/bpf.h>
 #include <linux/stddef.h>
 #include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
 
 #include <bpf/bpf_helpers.h>
 
 volatile const __u32 IFINDEX_SRC;
 volatile const __u32 IFINDEX_DST;
 
+static const __u8 src_mac[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55};
+static const __u8 dst_mac[] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66};
+
 SEC("classifier/chk_egress")
 int tc_chk(struct __sk_buff *skb)
 {
@@ -29,4 +34,30 @@ int tc_src(struct __sk_buff *skb)
 	return bpf_redirect_peer(IFINDEX_DST, 0);
 }
 
+SEC("classifier/dst_ingress_l3")
+int tc_dst_l3(struct __sk_buff *skb)
+{
+	return bpf_redirect(IFINDEX_SRC, 0);
+}
+
+SEC("classifier/src_ingress_l3")
+int tc_src_l3(struct __sk_buff *skb)
+{
+	__u16 proto = skb->protocol;
+
+	if (bpf_skb_change_head(skb, ETH_HLEN, 0) != 0)
+		return TC_ACT_SHOT;
+
+	if (bpf_skb_store_bytes(skb, 0, &src_mac, ETH_ALEN, 0) != 0)
+		return TC_ACT_SHOT;
+
+	if (bpf_skb_store_bytes(skb, ETH_ALEN, &dst_mac, ETH_ALEN, 0) != 0)
+		return TC_ACT_SHOT;
+
+	if (bpf_skb_store_bytes(skb, ETH_ALEN + ETH_ALEN, &proto, sizeof(__u16), 0) != 0)
+		return TC_ACT_SHOT;
+
+	return bpf_redirect_peer(IFINDEX_DST, 0);
+}
+
 char __license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From b2db6c35ba986ebe1ddd6b65f21a810346299d7f Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Tue, 25 May 2021 15:40:22 +0100
Subject: afs: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix multiple
warnings by explicitly adding multiple fallthrough pseudo-keywords in
places where the code is intended to fall through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeffrey Altman <jaltman@auristor.com>
cc: linux-afs@lists.infradead.org
cc: linux-hardening@vger.kernel.org
Link: https://lore.kernel.org/r/51150b54e0b0431a2c401cd54f2c4e7f50e94601.1605896059.git.gustavoars@kernel.org/ # v1
Link: https://lore.kernel.org/r/20210420211615.GA51432@embeddedor/ # v2
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/afs/cmservice.c | 5 +++++
 fs/afs/fsclient.c  | 4 ++++
 fs/afs/vlclient.c  | 1 +
 3 files changed, 10 insertions(+)

diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index a4e9e6e07e93..d3c6bb22c5f4 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -322,6 +322,8 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 			return ret;
 
 		call->unmarshall++;
+		fallthrough;
+
 	case 5:
 		break;
 	}
@@ -418,6 +420,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
 			r->node[loop] = ntohl(b[loop + 5]);
 
 		call->unmarshall++;
+		fallthrough;
 
 	case 2:
 		break;
@@ -530,6 +533,7 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
 			r->node[loop] = ntohl(b[loop + 5]);
 
 		call->unmarshall++;
+		fallthrough;
 
 	case 2:
 		break;
@@ -663,6 +667,7 @@ static int afs_deliver_yfs_cb_callback(struct afs_call *call)
 
 		afs_extract_to_tmp(call);
 		call->unmarshall++;
+		fallthrough;
 
 	case 3:
 		break;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 2f695a260442..dd3f45d906d2 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -388,6 +388,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 		req->file_size = vp->scb.status.size;
 
 		call->unmarshall++;
+		fallthrough;
 
 	case 5:
 		break;
@@ -1408,6 +1409,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
 		_debug("motd '%s'", p);
 
 		call->unmarshall++;
+		fallthrough;
 
 	case 8:
 		break;
@@ -1845,6 +1847,7 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
 		xdr_decode_AFSVolSync(&bp, &op->volsync);
 
 		call->unmarshall++;
+		fallthrough;
 
 	case 6:
 		break;
@@ -1979,6 +1982,7 @@ static int afs_deliver_fs_fetch_acl(struct afs_call *call)
 		xdr_decode_AFSVolSync(&bp, &op->volsync);
 
 		call->unmarshall++;
+		fallthrough;
 
 	case 4:
 		break;
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index dc9327332f06..00fca3c66ba6 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -593,6 +593,7 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
 		if (ret < 0)
 			return ret;
 		call->unmarshall = 6;
+		fallthrough;
 
 	case 6:
 		break;
-- 
cgit v1.2.3


From 5c9d706f61336d9f7f285df64c734af778c70f39 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 25 May 2021 20:35:29 +0200
Subject: bpf: Fix BPF_LSM kconfig symbol dependency

Similarly as 6bdacdb48e94 ("bpf: Fix BPF_JIT kconfig symbol dependency") we
need to detangle the hard BPF_LSM dependency on NET. This was previously
implicit by its dependency on BPF_JIT which itself was dependent on NET (but
without any actual/real hard dependency code-wise). Given the latter was
lifted, so should be the former as BPF_LSMs could well exist on net-less
systems. This therefore also fixes a randconfig build error recently reported
by Randy:

  ld: kernel/bpf/bpf_lsm.o: in function `bpf_lsm_func_proto':
  bpf_lsm.c:(.text+0x1a0): undefined reference to `bpf_sk_storage_get_proto'
  ld: bpf_lsm.c:(.text+0x1b8): undefined reference to `bpf_sk_storage_delete_proto'
  [...]

Fixes: b24abcff918a ("bpf, kconfig: Add consolidated menu entry for bpf with core options")
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
---
 kernel/bpf/bpf_lsm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 5efb2b24012c..da471bf01b97 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -107,10 +107,12 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_inode_storage_get_proto;
 	case BPF_FUNC_inode_storage_delete:
 		return &bpf_inode_storage_delete_proto;
+#ifdef CONFIG_NET
 	case BPF_FUNC_sk_storage_get:
 		return &bpf_sk_storage_get_proto;
 	case BPF_FUNC_sk_storage_delete:
 		return &bpf_sk_storage_delete_proto;
+#endif /* CONFIG_NET */
 	case BPF_FUNC_spin_lock:
 		return &bpf_spin_lock_proto;
 	case BPF_FUNC_spin_unlock:
-- 
cgit v1.2.3


From 3d0220f6861d713213b015b582e9f21e5b28d2e0 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 21 May 2021 10:17:36 +0000
Subject: bpf: Wrap aux data inside bpf_sanitize_info container

Add a container structure struct bpf_sanitize_info which holds
the current aux info, and update call-sites to sanitize_ptr_alu()
to pass it in. This is needed for passing in additional state
later on.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Piotr Krysiuk <piotras@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 09849e43f035..98690f5367f9 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6486,15 +6486,19 @@ static bool sanitize_needed(u8 opcode)
 	return opcode == BPF_ADD || opcode == BPF_SUB;
 }
 
+struct bpf_sanitize_info {
+	struct bpf_insn_aux_data aux;
+};
+
 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
 			    struct bpf_insn *insn,
 			    const struct bpf_reg_state *ptr_reg,
 			    const struct bpf_reg_state *off_reg,
 			    struct bpf_reg_state *dst_reg,
-			    struct bpf_insn_aux_data *tmp_aux,
+			    struct bpf_sanitize_info *info,
 			    const bool commit_window)
 {
-	struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : tmp_aux;
+	struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
 	struct bpf_verifier_state *vstate = env->cur_state;
 	bool off_is_imm = tnum_is_const(off_reg->var_off);
 	bool off_is_neg = off_reg->smin_value < 0;
@@ -6523,8 +6527,8 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
 		/* In commit phase we narrow the masking window based on
 		 * the observed pointer move after the simulated operation.
 		 */
-		alu_state = tmp_aux->alu_state;
-		alu_limit = abs(tmp_aux->alu_limit - alu_limit);
+		alu_state = info->aux.alu_state;
+		alu_limit = abs(info->aux.alu_limit - alu_limit);
 	} else {
 		alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
 		alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
@@ -6685,7 +6689,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 	    smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
 	u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
 	    umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
-	struct bpf_insn_aux_data tmp_aux = {};
+	struct bpf_sanitize_info info = {};
 	u8 opcode = BPF_OP(insn->code);
 	u32 dst = insn->dst_reg;
 	int ret;
@@ -6754,7 +6758,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 
 	if (sanitize_needed(opcode)) {
 		ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
-				       &tmp_aux, false);
+				       &info, false);
 		if (ret < 0)
 			return sanitize_err(env, insn, ret, off_reg, dst_reg);
 	}
@@ -6895,7 +6899,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 		return -EACCES;
 	if (sanitize_needed(opcode)) {
 		ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
-				       &tmp_aux, true);
+				       &info, true);
 		if (ret < 0)
 			return sanitize_err(env, insn, ret, off_reg, dst_reg);
 	}
-- 
cgit v1.2.3


From bb01a1bba579b4b1c5566af24d95f1767859771e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 21 May 2021 10:19:22 +0000
Subject: bpf: Fix mask direction swap upon off reg sign change

Masking direction as indicated via mask_to_left is considered to be
calculated once and then used to derive pointer limits. Thus, this
needs to be placed into bpf_sanitize_info instead so we can pass it
to sanitize_ptr_alu() call after the pointer move. Piotr noticed a
corner case where the off reg causes masking direction change which
then results in an incorrect final aux->alu_limit.

Fixes: 7fedb63a8307 ("bpf: Tighten speculative pointer arithmetic mask")
Reported-by: Piotr Krysiuk <piotras@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Piotr Krysiuk <piotras@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 98690f5367f9..8574cb60915a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6409,18 +6409,10 @@ enum {
 };
 
 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
-			      const struct bpf_reg_state *off_reg,
-			      u32 *alu_limit, u8 opcode)
+			      u32 *alu_limit, bool mask_to_left)
 {
-	bool off_is_neg = off_reg->smin_value < 0;
-	bool mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
-			    (opcode == BPF_SUB && !off_is_neg);
 	u32 max = 0, ptr_limit = 0;
 
-	if (!tnum_is_const(off_reg->var_off) &&
-	    (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
-		return REASON_BOUNDS;
-
 	switch (ptr_reg->type) {
 	case PTR_TO_STACK:
 		/* Offset 0 is out-of-bounds, but acceptable start for the
@@ -6488,6 +6480,7 @@ static bool sanitize_needed(u8 opcode)
 
 struct bpf_sanitize_info {
 	struct bpf_insn_aux_data aux;
+	bool mask_to_left;
 };
 
 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
@@ -6519,7 +6512,16 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
 	if (vstate->speculative)
 		goto do_sim;
 
-	err = retrieve_ptr_limit(ptr_reg, off_reg, &alu_limit, opcode);
+	if (!commit_window) {
+		if (!tnum_is_const(off_reg->var_off) &&
+		    (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
+			return REASON_BOUNDS;
+
+		info->mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
+				     (opcode == BPF_SUB && !off_is_neg);
+	}
+
+	err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
 	if (err < 0)
 		return err;
 
-- 
cgit v1.2.3


From a7036191277f9fa68d92f2071ddc38c09b1e5ee5 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 4 May 2021 08:58:25 +0000
Subject: bpf: No need to simulate speculative domain for immediates

In 801c6058d14a ("bpf: Fix leakage of uninitialized bpf stack under
speculation") we replaced masking logic with direct loads of immediates
if the register is a known constant. Given in this case we do not apply
any masking, there is also no reason for the operation to be truncated
under the speculative domain.

Therefore, there is also zero reason for the verifier to branch-off and
simulate this case, it only needs to do it for unknown but bounded scalars.
As a side-effect, this also enables few test cases that were previously
rejected due to simulation under zero truncation.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Piotr Krysiuk <piotras@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8574cb60915a..94ba5163d4c5 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6545,8 +6545,12 @@ do_sim:
 	/* If we're in commit phase, we're done here given we already
 	 * pushed the truncated dst_reg into the speculative verification
 	 * stack.
+	 *
+	 * Also, when register is a known constant, we rewrite register-based
+	 * operation to immediate-based, and thus do not need masking (and as
+	 * a consequence, do not need to simulate the zero-truncation either).
 	 */
-	if (commit_window)
+	if (commit_window || off_is_imm)
 		return 0;
 
 	/* Simulate and find potential out-of-bounds access under
-- 
cgit v1.2.3


From 1bad6fd52be4ce12d207e2820ceb0f29ab31fc53 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 4 May 2021 08:58:25 +0000
Subject: bpf, selftests: Adjust few selftest result_unpriv outcomes

Given we don't need to simulate the speculative domain for registers with
immediates anymore since the verifier uses direct imm-based rewrites instead
of having to mask, we can also lift a few cases that were previously rejected.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/verifier/stack_ptr.c       | 2 --
 tools/testing/selftests/bpf/verifier/value_ptr_arith.c | 8 --------
 2 files changed, 10 deletions(-)

diff --git a/tools/testing/selftests/bpf/verifier/stack_ptr.c b/tools/testing/selftests/bpf/verifier/stack_ptr.c
index 07eaa04412ae..8ab94d65f3d5 100644
--- a/tools/testing/selftests/bpf/verifier/stack_ptr.c
+++ b/tools/testing/selftests/bpf/verifier/stack_ptr.c
@@ -295,8 +295,6 @@
 	BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
 	BPF_EXIT_INSN(),
 	},
-	.result_unpriv = REJECT,
-	.errstr_unpriv = "invalid write to stack R1 off=0 size=1",
 	.result = ACCEPT,
 	.retval = 42,
 },
diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
index e5913fd3b903..7ae2859d495c 100644
--- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
@@ -300,8 +300,6 @@
 	},
 	.fixup_map_array_48b = { 3 },
 	.result = ACCEPT,
-	.result_unpriv = REJECT,
-	.errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
 	.retval = 1,
 },
 {
@@ -371,8 +369,6 @@
 	},
 	.fixup_map_array_48b = { 3 },
 	.result = ACCEPT,
-	.result_unpriv = REJECT,
-	.errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
 	.retval = 1,
 },
 {
@@ -472,8 +468,6 @@
 	},
 	.fixup_map_array_48b = { 3 },
 	.result = ACCEPT,
-	.result_unpriv = REJECT,
-	.errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
 	.retval = 1,
 },
 {
@@ -766,8 +760,6 @@
 	},
 	.fixup_map_array_48b = { 3 },
 	.result = ACCEPT,
-	.result_unpriv = REJECT,
-	.errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
 	.retval = 1,
 },
 {
-- 
cgit v1.2.3


From bfb819ea20ce8bbeeba17e1a6418bf8bda91fc28 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 25 May 2021 12:37:35 -0700
Subject: proc: Check /proc/$pid/attr/ writes against file opener

Fix another "confused deputy" weakness[1]. Writes to /proc/$pid/attr/
files need to check the opener credentials, since these fds do not
transition state across execve(). Without this, it is possible to
trick another process (which may have different credentials) to write
to its own /proc/$pid/attr/ files, leading to unexpected and possibly
exploitable behaviors.

[1] https://www.kernel.org/doc/html/latest/security/credentials.html?highlight=confused#open-file-credentials

Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2")
Cc: stable@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3851bfcdba56..58bbf334265b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2703,6 +2703,10 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
 	void *page;
 	int rv;
 
+	/* A task may only write when it was the opener. */
+	if (file->f_cred != current_real_cred())
+		return -EPERM;
+
 	rcu_read_lock();
 	task = pid_task(proc_pid(inode), PIDTYPE_PID);
 	if (!task) {
-- 
cgit v1.2.3


From 297739bd73f6e49d80bac4bfd27f3598b798c0d4 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 24 May 2021 22:49:24 -0400
Subject: sctp: add the missing setting for asoc encap_port

This patch is to add the missing setting back for asoc encap_port.

Fixes: 8dba29603b5c ("sctp: add SCTP_REMOTE_UDP_ENCAPS_PORT sockopt")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 40f9f6c4a0a1..a79d193ff872 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4473,6 +4473,7 @@ static int sctp_setsockopt_encap_port(struct sock *sk,
 				    transports)
 			t->encap_port = encap_port;
 
+		asoc->encap_port = encap_port;
 		return 0;
 	}
 
-- 
cgit v1.2.3


From b2540cdce6e22ecf3de54daf5129cc37951348cc Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 24 May 2021 22:49:42 -0400
Subject: sctp: fix the proc_handler for sysctl encap_port

proc_dointvec() cannot do min and max check for setting a value
when extra1/extra2 is set, so change it to proc_dointvec_minmax()
for sysctl encap_port.

Fixes: e8a3001c2120 ("sctp: add encap_port for netns sock asoc and transport")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sysctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index e92df779af73..55871b277f47 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -307,7 +307,7 @@ static struct ctl_table sctp_net_table[] = {
 		.data		= &init_net.sctp.encap_port,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= &udp_port_max,
 	},
-- 
cgit v1.2.3


From c1cf1afd8b0f2f1b077df84e90497c07094406fc Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Tue, 25 May 2021 18:52:47 +0800
Subject: net: hns: Fix kernel-doc

Fix function name in hns_ethtool.c kernel-doc comment
to remove these warnings found by clang_w1.

drivers/net/ethernet/hisilicon/hns/hns_ethtool.c:202: warning: expecting
prototype for hns_nic_set_link_settings(). Prototype was for
hns_nic_set_link_ksettings() instead.
drivers/net/ethernet/hisilicon/hns/hns_ethtool.c:837: warning: expecting
prototype for get_ethtool_stats(). Prototype was for
hns_get_ethtool_stats() instead.
drivers/net/ethernet/hisilicon/hns/hns_ethtool.c:894: warning:
expecting prototype for get_strings(). Prototype was for
hns_get_strings() instead.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Fixes: 'commit 262b38cdb3e4 ("net: ethernet: hisilicon: hns: use phydev
from struct net_device")'
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index da48c05435ea..7e62dcff2426 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -192,7 +192,7 @@ static int hns_nic_get_link_ksettings(struct net_device *net_dev,
 }
 
 /**
- *hns_nic_set_link_settings - implement ethtool set link ksettings
+ *hns_nic_set_link_ksettings - implement ethtool set link ksettings
  *@net_dev: net_device
  *@cmd: ethtool_link_ksettings
  *retuen 0 - success , negative --fail
@@ -827,7 +827,7 @@ hns_get_channels(struct net_device *net_dev, struct ethtool_channels *ch)
 }
 
 /**
- * get_ethtool_stats - get detail statistics.
+ * hns_get_ethtool_stats - get detail statistics.
  * @netdev: net device
  * @stats: statistics info.
  * @data: statistics data.
@@ -885,7 +885,7 @@ static void hns_get_ethtool_stats(struct net_device *netdev,
 }
 
 /**
- * get_strings: Return a set of strings that describe the requested objects
+ * hns_get_strings: Return a set of strings that describe the requested objects
  * @netdev: net device
  * @stringset: string set ID.
  * @data: objects data.
-- 
cgit v1.2.3


From 9453d45ecb6c2199d72e73c993e9d98677a2801b Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@nvidia.com>
Date: Tue, 25 May 2021 16:21:52 +0300
Subject: net: zero-initialize tc skb extension on allocation

Function skb_ext_add() doesn't initialize created skb extension with any
value and leaves it up to the user. However, since extension of type
TC_SKB_EXT originally contained only single value tc_skb_ext->chain its
users used to just assign the chain value without setting whole extension
memory to zero first. This assumption changed when TC_SKB_EXT extension was
extended with additional fields but not all users were updated to
initialize the new fields which leads to use of uninitialized memory
afterwards. UBSAN log:

[  778.299821] UBSAN: invalid-load in net/openvswitch/flow.c:899:28
[  778.301495] load of value 107 is not a valid value for type '_Bool'
[  778.303215] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc7+ #2
[  778.304933] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
[  778.307901] Call Trace:
[  778.308680]  <IRQ>
[  778.309358]  dump_stack+0xbb/0x107
[  778.310307]  ubsan_epilogue+0x5/0x40
[  778.311167]  __ubsan_handle_load_invalid_value.cold+0x43/0x48
[  778.312454]  ? memset+0x20/0x40
[  778.313230]  ovs_flow_key_extract.cold+0xf/0x14 [openvswitch]
[  778.314532]  ovs_vport_receive+0x19e/0x2e0 [openvswitch]
[  778.315749]  ? ovs_vport_find_upcall_portid+0x330/0x330 [openvswitch]
[  778.317188]  ? create_prof_cpu_mask+0x20/0x20
[  778.318220]  ? arch_stack_walk+0x82/0xf0
[  778.319153]  ? secondary_startup_64_no_verify+0xb0/0xbb
[  778.320399]  ? stack_trace_save+0x91/0xc0
[  778.321362]  ? stack_trace_consume_entry+0x160/0x160
[  778.322517]  ? lock_release+0x52e/0x760
[  778.323444]  netdev_frame_hook+0x323/0x610 [openvswitch]
[  778.324668]  ? ovs_netdev_get_vport+0xe0/0xe0 [openvswitch]
[  778.325950]  __netif_receive_skb_core+0x771/0x2db0
[  778.327067]  ? lock_downgrade+0x6e0/0x6f0
[  778.328021]  ? lock_acquire+0x565/0x720
[  778.328940]  ? generic_xdp_tx+0x4f0/0x4f0
[  778.329902]  ? inet_gro_receive+0x2a7/0x10a0
[  778.330914]  ? lock_downgrade+0x6f0/0x6f0
[  778.331867]  ? udp4_gro_receive+0x4c4/0x13e0
[  778.332876]  ? lock_release+0x52e/0x760
[  778.333808]  ? dev_gro_receive+0xcc8/0x2380
[  778.334810]  ? lock_downgrade+0x6f0/0x6f0
[  778.335769]  __netif_receive_skb_list_core+0x295/0x820
[  778.336955]  ? process_backlog+0x780/0x780
[  778.337941]  ? mlx5e_rep_tc_netdevice_event_unregister+0x20/0x20 [mlx5_core]
[  778.339613]  ? seqcount_lockdep_reader_access.constprop.0+0xa7/0xc0
[  778.341033]  ? kvm_clock_get_cycles+0x14/0x20
[  778.342072]  netif_receive_skb_list_internal+0x5f5/0xcb0
[  778.343288]  ? __kasan_kmalloc+0x7a/0x90
[  778.344234]  ? mlx5e_handle_rx_cqe_mpwrq+0x9e0/0x9e0 [mlx5_core]
[  778.345676]  ? mlx5e_xmit_xdp_frame_mpwqe+0x14d0/0x14d0 [mlx5_core]
[  778.347140]  ? __netif_receive_skb_list_core+0x820/0x820
[  778.348351]  ? mlx5e_post_rx_mpwqes+0xa6/0x25d0 [mlx5_core]
[  778.349688]  ? napi_gro_flush+0x26c/0x3c0
[  778.350641]  napi_complete_done+0x188/0x6b0
[  778.351627]  mlx5e_napi_poll+0x373/0x1b80 [mlx5_core]
[  778.352853]  __napi_poll+0x9f/0x510
[  778.353704]  ? mlx5_flow_namespace_set_mode+0x260/0x260 [mlx5_core]
[  778.355158]  net_rx_action+0x34c/0xa40
[  778.356060]  ? napi_threaded_poll+0x3d0/0x3d0
[  778.357083]  ? sched_clock_cpu+0x18/0x190
[  778.358041]  ? __common_interrupt+0x8e/0x1a0
[  778.359045]  __do_softirq+0x1ce/0x984
[  778.359938]  __irq_exit_rcu+0x137/0x1d0
[  778.360865]  irq_exit_rcu+0xa/0x20
[  778.361708]  common_interrupt+0x80/0xa0
[  778.362640]  </IRQ>
[  778.363212]  asm_common_interrupt+0x1e/0x40
[  778.364204] RIP: 0010:native_safe_halt+0xe/0x10
[  778.365273] Code: 4f ff ff ff 4c 89 e7 e8 50 3f 40 fe e9 dc fe ff ff 48 89 df e8 43 3f 40 fe eb 90 cc e9 07 00 00 00 0f 00 2d 74 05 62 00 fb f4 <c3> 90 e9 07 00 00 00 0f 00 2d 64 05 62 00 f4 c3 cc cc 0f 1f 44 00
[  778.369355] RSP: 0018:ffffffff84407e48 EFLAGS: 00000246
[  778.370570] RAX: ffff88842de46a80 RBX: ffffffff84425840 RCX: ffffffff83418468
[  778.372143] RDX: 000000000026f1da RSI: 0000000000000004 RDI: ffffffff8343af5e
[  778.373722] RBP: fffffbfff0884b08 R08: 0000000000000000 R09: ffff88842de46bcb
[  778.375292] R10: ffffed1085bc8d79 R11: 0000000000000001 R12: 0000000000000000
[  778.376860] R13: ffffffff851124a0 R14: 0000000000000000 R15: dffffc0000000000
[  778.378491]  ? rcu_eqs_enter.constprop.0+0xb8/0xe0
[  778.379606]  ? default_idle_call+0x5e/0xe0
[  778.380578]  default_idle+0xa/0x10
[  778.381406]  default_idle_call+0x96/0xe0
[  778.382350]  do_idle+0x3d4/0x550
[  778.383153]  ? arch_cpu_idle_exit+0x40/0x40
[  778.384143]  cpu_startup_entry+0x19/0x20
[  778.385078]  start_kernel+0x3c7/0x3e5
[  778.385978]  secondary_startup_64_no_verify+0xb0/0xbb

Fix the issue by providing new function tc_skb_ext_alloc() that allocates
tc skb extension and initializes its memory to 0 before returning it to the
caller. Change all existing users to use new API instead of calling
skb_ext_add() directly.

Fixes: 038ebb1a713d ("net/sched: act_ct: fix miss set mru for ovs after defrag in act_ct")
Fixes: d29334c15d33 ("net/sched: act_api: fix miss set post_ct for ovs after do conntrack in act_ct")
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Acked-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c     |  2 +-
 include/net/pkt_cls.h                               | 11 +++++++++++
 net/sched/cls_api.c                                 |  2 +-
 4 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index 6cdc52d50a48..311382261840 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -626,7 +626,7 @@ static bool mlx5e_restore_skb(struct sk_buff *skb, u32 chain, u32 reg_c1,
 		struct mlx5_eswitch *esw;
 		u32 zone_restore_id;
 
-		tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
+		tc_skb_ext = tc_skb_ext_alloc(skb);
 		if (!tc_skb_ext) {
 			WARN_ON(1);
 			return false;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index bccdb43a880b..2c776e7a7692 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -5090,7 +5090,7 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
 
 	if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
 		chain = mapped_obj.chain;
-		tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
+		tc_skb_ext = tc_skb_ext_alloc(skb);
 		if (WARN_ON(!tc_skb_ext))
 			return false;
 
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 255e4f4b521f..ec7823921bd2 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -709,6 +709,17 @@ tc_cls_common_offload_init(struct flow_cls_common_offload *cls_common,
 		cls_common->extack = extack;
 }
 
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+static inline struct tc_skb_ext *tc_skb_ext_alloc(struct sk_buff *skb)
+{
+	struct tc_skb_ext *tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
+
+	if (tc_skb_ext)
+		memset(tc_skb_ext, 0, sizeof(*tc_skb_ext));
+	return tc_skb_ext;
+}
+#endif
+
 enum tc_matchall_command {
 	TC_CLSMATCHALL_REPLACE,
 	TC_CLSMATCHALL_DESTROY,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 40fbea626dfd..279f9e2a2319 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1624,7 +1624,7 @@ int tcf_classify_ingress(struct sk_buff *skb,
 
 	/* If we missed on some chain */
 	if (ret == TC_ACT_UNSPEC && last_executed_chain) {
-		ext = skb_ext_add(skb, TC_SKB_EXT);
+		ext = tc_skb_ext_alloc(skb);
 		if (WARN_ON_ONCE(!ext))
 			return TC_ACT_SHOT;
 		ext->chain = last_executed_chain;
-- 
cgit v1.2.3


From 65161c35554f7135e6656b3df1ce2c500ca0bdcf Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Tue, 25 May 2021 19:00:12 +0800
Subject: bnx2x: Fix missing error code in bnx2x_iov_init_one()

Eliminate the follow smatch warning:

drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c:1227
bnx2x_iov_init_one() warn: missing error code 'err'.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index d21f085044cd..27943b0446c2 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -1223,8 +1223,10 @@ int bnx2x_iov_init_one(struct bnx2x *bp, int int_mode_param,
 		goto failed;
 
 	/* SR-IOV capability was enabled but there are no VFs*/
-	if (iov->total == 0)
+	if (iov->total == 0) {
+		err = -EINVAL;
 		goto failed;
+	}
 
 	iov->nr_virtfn = min_t(u16, iov->total, num_vfs_param);
 
-- 
cgit v1.2.3


From 17f9c1b63cdd4439523cfcdf5683e5070b911f24 Mon Sep 17 00:00:00 2001
From: Stefan Chulski <stefanc@marvell.com>
Date: Tue, 25 May 2021 19:04:41 +0300
Subject: net: mvpp2: add buffer header handling in RX

If Link Partner sends frames larger than RX buffer size, MAC mark it
as oversize but still would pass it to the Packet Processor.
In this scenario, Packet Processor scatter frame between multiple buffers,
but only a single buffer would be returned to the Buffer Manager pool and
it would not refill the poll.

Patch add handling of oversize error with buffer header handling, so all
buffers would be returned to the Buffer Manager pool.

Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit")
Reported-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Stefan Chulski <stefanc@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2.h      | 22 ++++++++++
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 54 ++++++++++++++++++++-----
 2 files changed, 67 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
index 8edba5ea90f0..4a61c90003b5 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
@@ -993,6 +993,14 @@ enum mvpp22_ptp_packet_format {
 
 #define MVPP2_DESC_DMA_MASK	DMA_BIT_MASK(40)
 
+/* Buffer header info bits */
+#define MVPP2_B_HDR_INFO_MC_ID_MASK	0xfff
+#define MVPP2_B_HDR_INFO_MC_ID(info)	((info) & MVPP2_B_HDR_INFO_MC_ID_MASK)
+#define MVPP2_B_HDR_INFO_LAST_OFFS	12
+#define MVPP2_B_HDR_INFO_LAST_MASK	BIT(12)
+#define MVPP2_B_HDR_INFO_IS_LAST(info) \
+	   (((info) & MVPP2_B_HDR_INFO_LAST_MASK) >> MVPP2_B_HDR_INFO_LAST_OFFS)
+
 struct mvpp2_tai;
 
 /* Definitions */
@@ -1002,6 +1010,20 @@ struct mvpp2_rss_table {
 	u32 indir[MVPP22_RSS_TABLE_ENTRIES];
 };
 
+struct mvpp2_buff_hdr {
+	__le32 next_phys_addr;
+	__le32 next_dma_addr;
+	__le16 byte_count;
+	__le16 info;
+	__le16 reserved1;	/* bm_qset (for future use, BM) */
+	u8 next_phys_addr_high;
+	u8 next_dma_addr_high;
+	__le16 reserved2;
+	__le16 reserved3;
+	__le16 reserved4;
+	__le16 reserved5;
+};
+
 /* Shared Packet Processor resources */
 struct mvpp2 {
 	/* Shared registers' base addresses */
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index ec706d614cac..d39c7639cdba 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -3839,6 +3839,35 @@ mvpp2_run_xdp(struct mvpp2_port *port, struct mvpp2_rx_queue *rxq,
 	return ret;
 }
 
+static void mvpp2_buff_hdr_pool_put(struct mvpp2_port *port, struct mvpp2_rx_desc *rx_desc,
+				    int pool, u32 rx_status)
+{
+	phys_addr_t phys_addr, phys_addr_next;
+	dma_addr_t dma_addr, dma_addr_next;
+	struct mvpp2_buff_hdr *buff_hdr;
+
+	phys_addr = mvpp2_rxdesc_dma_addr_get(port, rx_desc);
+	dma_addr = mvpp2_rxdesc_cookie_get(port, rx_desc);
+
+	do {
+		buff_hdr = (struct mvpp2_buff_hdr *)phys_to_virt(phys_addr);
+
+		phys_addr_next = le32_to_cpu(buff_hdr->next_phys_addr);
+		dma_addr_next = le32_to_cpu(buff_hdr->next_dma_addr);
+
+		if (port->priv->hw_version >= MVPP22) {
+			phys_addr_next |= ((u64)buff_hdr->next_phys_addr_high << 32);
+			dma_addr_next |= ((u64)buff_hdr->next_dma_addr_high << 32);
+		}
+
+		mvpp2_bm_pool_put(port, pool, dma_addr, phys_addr);
+
+		phys_addr = phys_addr_next;
+		dma_addr = dma_addr_next;
+
+	} while (!MVPP2_B_HDR_INFO_IS_LAST(le16_to_cpu(buff_hdr->info)));
+}
+
 /* Main rx processing */
 static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
 		    int rx_todo, struct mvpp2_rx_queue *rxq)
@@ -3885,14 +3914,6 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
 			MVPP2_RXD_BM_POOL_ID_OFFS;
 		bm_pool = &port->priv->bm_pools[pool];
 
-		/* In case of an error, release the requested buffer pointer
-		 * to the Buffer Manager. This request process is controlled
-		 * by the hardware, and the information about the buffer is
-		 * comprised by the RX descriptor.
-		 */
-		if (rx_status & MVPP2_RXD_ERR_SUMMARY)
-			goto err_drop_frame;
-
 		if (port->priv->percpu_pools) {
 			pp = port->priv->page_pool[pool];
 			dma_dir = page_pool_get_dma_dir(pp);
@@ -3904,6 +3925,18 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
 					rx_bytes + MVPP2_MH_SIZE,
 					dma_dir);
 
+		/* Buffer header not supported */
+		if (rx_status & MVPP2_RXD_BUF_HDR)
+			goto err_drop_frame;
+
+		/* In case of an error, release the requested buffer pointer
+		 * to the Buffer Manager. This request process is controlled
+		 * by the hardware, and the information about the buffer is
+		 * comprised by the RX descriptor.
+		 */
+		if (rx_status & MVPP2_RXD_ERR_SUMMARY)
+			goto err_drop_frame;
+
 		/* Prefetch header */
 		prefetch(data);
 
@@ -3985,7 +4018,10 @@ err_drop_frame:
 		dev->stats.rx_errors++;
 		mvpp2_rx_error(port, rx_desc);
 		/* Return the buffer to the pool */
-		mvpp2_bm_pool_put(port, pool, dma_addr, phys_addr);
+		if (rx_status & MVPP2_RXD_BUF_HDR)
+			mvpp2_buff_hdr_pool_put(port, rx_desc, pool, rx_status);
+		else
+			mvpp2_bm_pool_put(port, pool, dma_addr, phys_addr);
 	}
 
 	rcu_read_unlock();
-- 
cgit v1.2.3


From bab09fe2f65200a67209a360988bc24f3de4b95d Mon Sep 17 00:00:00 2001
From: Simon Horman <simon.horman@corigine.com>
Date: Tue, 25 May 2021 17:47:04 +0200
Subject: nfp: update maintainer and mailing list addresses

Some of Netronome's activities and people have moved over to Corigine,
including NFP driver maintenance and myself.

Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 2cc1cb72bc92..d34c0036bdcd 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12687,9 +12687,9 @@ F:	drivers/rtc/rtc-ntxec.c
 F:	include/linux/mfd/ntxec.h
 
 NETRONOME ETHERNET DRIVERS
-M:	Simon Horman <simon.horman@netronome.com>
+M:	Simon Horman <simon.horman@corigine.com>
 R:	Jakub Kicinski <kuba@kernel.org>
-L:	oss-drivers@netronome.com
+L:	oss-drivers@corigine.com
 S:	Maintained
 F:	drivers/net/ethernet/netronome/
 
-- 
cgit v1.2.3


From 20b5759f21cf53a0e03031bd3fe539e332b13568 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 25 May 2021 14:23:10 -0700
Subject: mptcp: avoid OOB access in setsockopt()

We can't use tcp_set_congestion_control() on an mptcp socket, as
such function can end-up accessing a tcp-specific field -
prior_ssthresh - causing an OOB access.

To allow propagating the correct ca algo on subflow, cache the ca
name at initialization time.

Additionally avoid overriding the user-selected CA (if any) at
clone time.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/182
Fixes: aa1fbd94e5c7 ("mptcp: sockopt: add TCP_CONGESTION and TCP_INFO")
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 14 +++++++++++---
 net/mptcp/protocol.h |  1 +
 net/mptcp/sockopt.c  |  4 ++--
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 2d21a4793d9d..2bc199549a88 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2424,13 +2424,12 @@ static int __mptcp_init_sock(struct sock *sk)
 	timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
 	timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0);
 
-	tcp_assign_congestion_control(sk);
-
 	return 0;
 }
 
 static int mptcp_init_sock(struct sock *sk)
 {
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct net *net = sock_net(sk);
 	int ret;
 
@@ -2448,6 +2447,16 @@ static int mptcp_init_sock(struct sock *sk)
 	if (ret)
 		return ret;
 
+	/* fetch the ca name; do it outside __mptcp_init_sock(), so that clone will
+	 * propagate the correct value
+	 */
+	tcp_assign_congestion_control(sk);
+	strcpy(mptcp_sk(sk)->ca_name, icsk->icsk_ca_ops->name);
+
+	/* no need to keep a reference to the ops, the name will suffice */
+	tcp_cleanup_congestion_control(sk);
+	icsk->icsk_ca_ops = NULL;
+
 	sk_sockets_allocated_inc(sk);
 	sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
 	sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
@@ -2622,7 +2631,6 @@ static void __mptcp_destroy_sock(struct sock *sk)
 	sk_stream_kill_queues(sk);
 	xfrm_sk_free_policy(sk);
 
-	tcp_cleanup_congestion_control(sk);
 	sk_refcnt_debug_release(sk);
 	mptcp_dispose_initial_subflow(msk);
 	sock_put(sk);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index edc0128730df..165c8b40b384 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -258,6 +258,7 @@ struct mptcp_sock {
 	} rcvq_space;
 
 	u32 setsockopt_seq;
+	char		ca_name[TCP_CA_NAME_MAX];
 };
 
 #define mptcp_lock_sock(___sk, cb) do {					\
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 00d941b66c1e..a79798189599 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -547,7 +547,7 @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t
 	}
 
 	if (ret == 0)
-		tcp_set_congestion_control(sk, name, false, cap_net_admin);
+		strcpy(msk->ca_name, name);
 
 	release_sock(sk);
 	return ret;
@@ -705,7 +705,7 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
 	sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG));
 
 	if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops)
-		tcp_set_congestion_control(ssk, inet_csk(sk)->icsk_ca_ops->name, false, true);
+		tcp_set_congestion_control(ssk, msk->ca_name, false, true);
 }
 
 static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
-- 
cgit v1.2.3


From 3812ce895047afdb78dc750a236515416e0ccded Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 25 May 2021 14:23:11 -0700
Subject: mptcp: drop unconditional pr_warn on bad opt

This is a left-over of early day. A malicious peer can flood
the kernel logs with useless messages, just drop it.

Fixes: f296234c98a8 ("mptcp: Add handling of incoming MP_JOIN requests")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/options.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 99fc21406168..71c535f4e1ef 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -130,7 +130,6 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 			memcpy(mp_opt->hmac, ptr, MPTCPOPT_HMAC_LEN);
 			pr_debug("MP_JOIN hmac");
 		} else {
-			pr_warn("MP_JOIN bad option size");
 			mp_opt->mp_join = 0;
 		}
 		break;
-- 
cgit v1.2.3


From 3ed0a585bfadb6bd7080f11184adbc9edcce7dbc Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 25 May 2021 14:23:12 -0700
Subject: mptcp: avoid error message on infinite mapping

Another left-over. Avoid flooding dmesg with useless text,
we already have a MIB for that event.

Fixes: 648ef4b88673 ("mptcp: Implement MPTCP receive path")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/subflow.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index a5ede357cfbc..bde6be77ea73 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -867,7 +867,6 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
 
 	data_len = mpext->data_len;
 	if (data_len == 0) {
-		pr_err("Infinite mapping not handled");
 		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
 		return MAPPING_INVALID;
 	}
-- 
cgit v1.2.3


From d58300c3185b78ab910092488126b97f0abe3ae2 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Tue, 25 May 2021 14:23:13 -0700
Subject: mptcp: validate 'id' when stopping the ADD_ADDR retransmit timer

when Linux receives an echo-ed ADD_ADDR, it checks the IP address against
the list of "announced" addresses. In case of a positive match, the timer
that handles retransmissions is stopped regardless of the 'Address Id' in
the received packet: this behaviour does not comply with RFC8684 3.4.1.

Fix it by validating the 'Address Id' in received echo-ed ADD_ADDRs.
Tested using packetdrill, with the following captured output:

 unpatched kernel:

 Out <...> Flags [.], ack 1, win 256, options [mptcp add-addr v1 id 1 198.51.100.2 hmac 0xfd2e62517888fe29,mptcp dss ack 3007449509], length 0
 In  <...> Flags [.], ack 1, win 257, options [mptcp add-addr v1-echo id 1 1.2.3.4,mptcp dss ack 3013740213], length 0
 Out <...> Flags [.], ack 1, win 256, options [mptcp add-addr v1 id 1 198.51.100.2 hmac 0xfd2e62517888fe29,mptcp dss ack 3007449509], length 0
 In  <...> Flags [.], ack 1, win 257, options [mptcp add-addr v1-echo id 90 198.51.100.2,mptcp dss ack 3013740213], length 0
        ^^^ retransmission is stopped here, but 'Address Id' is 90

 patched kernel:

 Out <...> Flags [.], ack 1, win 256, options [mptcp add-addr v1 id 1 198.51.100.2 hmac 0x1cf372d59e05f4b8,mptcp dss ack 3007449509], length 0
 In  <...> Flags [.], ack 1, win 257, options [mptcp add-addr v1-echo id 1 1.2.3.4,mptcp dss ack 1672384568], length 0
 Out <...> Flags [.], ack 1, win 256, options [mptcp add-addr v1 id 1 198.51.100.2 hmac 0x1cf372d59e05f4b8,mptcp dss ack 3007449509], length 0
 In  <...> Flags [.], ack 1, win 257, options [mptcp add-addr v1-echo id 90 198.51.100.2,mptcp dss ack 1672384568], length 0
 Out <...> Flags [.], ack 1, win 256, options [mptcp add-addr v1 id 1 198.51.100.2 hmac 0x1cf372d59e05f4b8,mptcp dss ack 3007449509], length 0
 In  <...> Flags [.], ack 1, win 257, options [mptcp add-addr v1-echo id 1 198.51.100.2,mptcp dss ack 1672384568], length 0
        ^^^ retransmission is stopped here, only when both 'Address Id' and 'IP Address' match

Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/options.c    | 2 +-
 net/mptcp/pm_netlink.c | 8 ++++----
 net/mptcp/protocol.h   | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 71c535f4e1ef..6b825fb3fa83 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -1023,7 +1023,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
 			MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR);
 		} else {
 			mptcp_pm_add_addr_echoed(msk, &mp_opt.addr);
-			mptcp_pm_del_add_timer(msk, &mp_opt.addr);
+			mptcp_pm_del_add_timer(msk, &mp_opt.addr, true);
 			MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD);
 		}
 
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 6ba040897738..2469e06a3a9d 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -346,18 +346,18 @@ out:
 
 struct mptcp_pm_add_entry *
 mptcp_pm_del_add_timer(struct mptcp_sock *msk,
-		       struct mptcp_addr_info *addr)
+		       struct mptcp_addr_info *addr, bool check_id)
 {
 	struct mptcp_pm_add_entry *entry;
 	struct sock *sk = (struct sock *)msk;
 
 	spin_lock_bh(&msk->pm.lock);
 	entry = mptcp_lookup_anno_list_by_saddr(msk, addr);
-	if (entry)
+	if (entry && (!check_id || entry->addr.id == addr->id))
 		entry->retrans_times = ADD_ADDR_RETRANS_MAX;
 	spin_unlock_bh(&msk->pm.lock);
 
-	if (entry)
+	if (entry && (!check_id || entry->addr.id == addr->id))
 		sk_stop_timer_sync(sk, &entry->add_timer);
 
 	return entry;
@@ -1064,7 +1064,7 @@ static bool remove_anno_list_by_saddr(struct mptcp_sock *msk,
 {
 	struct mptcp_pm_add_entry *entry;
 
-	entry = mptcp_pm_del_add_timer(msk, addr);
+	entry = mptcp_pm_del_add_timer(msk, addr, false);
 	if (entry) {
 		list_del(&entry->list);
 		kfree(entry);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 165c8b40b384..0c6f99c67345 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -672,7 +672,7 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
 bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
 struct mptcp_pm_add_entry *
 mptcp_pm_del_add_timer(struct mptcp_sock *msk,
-		       struct mptcp_addr_info *addr);
+		       struct mptcp_addr_info *addr, bool check_id);
 struct mptcp_pm_add_entry *
 mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk,
 				struct mptcp_addr_info *addr);
-- 
cgit v1.2.3


From 62f3415db237b8d2aa9a804ff84ce2efa87df179 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 26 May 2021 11:46:17 -0700
Subject: net: phy: Document phydev::dev_flags bits allocation

Document the phydev::dev_flags bit allocation to allow bits 15:0 to
define PHY driver specific behavior, bits 23:16 to be reserved for now,
and bits 31:24 to hold generic PHY driver flags.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://lore.kernel.org/r/20210526184617.3105012-1-f.fainelli@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 60d2b26026a2..852743f07e3e 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -496,6 +496,11 @@ struct macsec_ops;
  * @mac_managed_pm: Set true if MAC driver takes of suspending/resuming PHY
  * @state: State of the PHY for management purposes
  * @dev_flags: Device-specific flags used by the PHY driver.
+ *		Bits [15:0] are free to use by the PHY driver to communicate
+ *			    driver specific behavior.
+ *		Bits [23:16] are currently reserved for future use.
+ *		Bits [31:24] are reserved for defining generic
+ *			     PHY driver behavior.
  * @irq: IRQ number of the PHY's interrupt (-1 if none)
  * @phy_timer: The timer for handling the state machine
  * @phylink: Pointer to phylink instance for this PHY
-- 
cgit v1.2.3